├── MANIFEST.in
├── setup.cfg
├── tests
    ├── __init__.py
    ├── context.py
    ├── test_encoder.py
    └── test_decoder.py
├── prison
    ├── __version__.py
    ├── __init__.py
    ├── utils.py
    ├── constants.py
    ├── encoder.py
    └── decoder.py
├── LICENSE
├── .gitignore
├── setup.py
└── README.md


/MANIFEST.in:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/prison/__version__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.2.1'
2 | 


--------------------------------------------------------------------------------
/prison/__init__.py:
--------------------------------------------------------------------------------
1 | from .decoder import loads
2 | from .encoder import dumps
3 | 
4 | __all__ = ['loads', 'dumps']
5 | 


--------------------------------------------------------------------------------
/tests/context.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.insert(0, os.path.abspath(
4 |     os.path.join(os.path.dirname(__file__), '..')))
5 | 
6 | import prison
7 | 


--------------------------------------------------------------------------------
/prison/utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import urllib
 3 | 
 4 | 
 5 | RE_QUOTE = re.compile('^[-A-Za-z0-9~!*()_.\',:@$/]*$')
 6 | 
 7 | 
 8 | def quote(x):
 9 |     if RE_QUOTE.match(x):
10 |         return x
11 | 
12 |     return urllib.quote(x.encode('utf-8'))\
13 |         .replace('%2C', ',', 'g')\
14 |         .replace('%3A', ':', 'g')\
15 |         .replace('%40', '@', 'g')\
16 |         .replace('%24', '$', 'g')\
17 |         .replace('%2F', '/', 'g')\
18 |         .replace('%20', '+', 'g')
19 | 


--------------------------------------------------------------------------------
/prison/constants.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | WHITESPACE = ''
 5 | 
 6 | IDCHAR_PUNCTUATION = '_-./~%+'
 7 | 
 8 | NOT_IDCHAR = ''.join([c for c in (chr(i) for i in range(127))
 9 |                       if not (c.isalnum()
10 |                               or c in IDCHAR_PUNCTUATION)])
11 | 
12 | # Additionally, we need to distinguish ids and numbers by first char.
13 | NOT_IDSTART = '-0123456789'
14 | 
15 | # Regexp string matching a valid id.
16 | IDRX = ('[^' + NOT_IDSTART + NOT_IDCHAR + '][^' + NOT_IDCHAR + ']*')
17 | 
18 | # Regexp to check for valid rison ids.
19 | ID_OK_RE = re.compile('^' + IDRX + '$', re.M)
20 | 
21 | # Regexp to find the end of an id when parsing.
22 | NEXT_ID_RE = re.compile(IDRX, re.M)
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2019 Beto Dealmeida
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 7 | of the Software, and to permit persons to whom the Software is furnished to do
 8 | so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/tests/test_encoder.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | try:
 4 |     import prison
 5 | except ImportError:
 6 |     from .context import prison
 7 | 
 8 | 
 9 | class TestEncoder(unittest.TestCase):
10 | 
11 |     def test_dict(self):
12 |         self.assertEqual('()', prison.dumps({}))
13 |         self.assertEqual('(a:0,b:1)', prison.dumps({
14 |             'a': 0,
15 |             'b': 1
16 |         }))
17 |         self.assertEqual("(a:0,b:foo,c:'23skidoo')", prison.dumps({
18 |             'a': 0,
19 |             'c': '23skidoo',
20 |             'b': 'foo'
21 |         }))
22 |         self.assertEqual('(id:!n,type:/common/document)', prison.dumps({
23 |             'type': '/common/document',
24 |             'id': None
25 |         }))
26 |         self.assertEqual("(a:0)", prison.dumps({
27 |             'a': 0
28 |         }))
29 |         self.assertEqual("(a:%)", prison.dumps({
30 |             'a': '%'
31 |         }))
32 |         self.assertEqual("(a:/w+/)", prison.dumps({
33 |             'a': '/w+/'
34 |         }))
35 | 
36 |     def test_bool(self):
37 |         self.assertEqual('!t', prison.dumps(True))
38 |         self.assertEqual('!f', prison.dumps(False))
39 | 
40 |     def test_none(self):
41 |         self.assertEqual('!n', prison.dumps(None))
42 | 
43 |     def test_list(self):
44 |         self.assertEqual('!(1,2,3)', prison.dumps([1, 2, 3]))
45 |         self.assertEqual('!()', prison.dumps([]))
46 |         self.assertEqual("!(!t,!f,!n,'')", prison.dumps([True, False, None, '']))
47 | 
48 |     def test_number(self):
49 |         self.assertEqual('0', prison.dumps(0))
50 |         self.assertEqual('1.5', prison.dumps(1.5))
51 |         self.assertEqual('-3', prison.dumps(-3))
52 |         self.assertEqual('1e30', prison.dumps(1e+30))
53 |         self.assertEqual('1e-30', prison.dumps(1.0000000000000001e-30))
54 | 
55 |     def test_string(self):
56 |         self.assertEqual("''", prison.dumps(''))
57 |         self.assertEqual('G.', prison.dumps('G.'))
58 |         self.assertEqual('a', prison.dumps('a'))
59 |         self.assertEqual("'0a'", prison.dumps('0a'))
60 |         self.assertEqual("'abc def'", prison.dumps('abc def'))
61 |         self.assertEqual("'-h'", prison.dumps('-h'))
62 |         self.assertEqual('a-z', prison.dumps('a-z'))
63 |         self.assertEqual("'wow!!'", prison.dumps('wow!'))
64 |         self.assertEqual('domain.com', prison.dumps('domain.com'))
65 |         self.assertEqual("'user@domain.com'", prison.dumps('user@domain.com'))
66 |         self.assertEqual("'US $10'", prison.dumps('US $10'))
67 |         self.assertEqual("'can!'t'", prison.dumps("can't"))
68 | 


--------------------------------------------------------------------------------
/tests/test_decoder.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | try:
 4 |     import prison
 5 | except ImportError:
 6 |     from .context import prison
 7 | 
 8 | 
 9 | class TestDecoder(unittest.TestCase):
10 | 
11 |     def test_dict(self):
12 |         self.assertEqual(prison.loads('()'), {})
13 |         self.assertEqual(prison.loads('(a:0,b:1)'), {
14 |             'a': 0,
15 |             'b': 1
16 |         })
17 |         self.assertEqual(prison.loads("(a:0,b:foo,c:'23skidoo')"), {
18 |             'a': 0,
19 |             'c': '23skidoo',
20 |             'b': 'foo'
21 |         })
22 |         self.assertEqual(prison.loads('(id:!n,type:/common/document)'), {
23 |             'type': '/common/document',
24 |             'id': None
25 |         })
26 |         self.assertEqual(prison.loads("(a:0)"), {
27 |             'a': 0
28 |         })
29 |         self.assertEqual(prison.loads("(a:%)"), {
30 |             'a': '%'
31 |         })
32 |         self.assertEqual(prison.loads("(a:/w+/)"), {
33 |             'a': '/w+/'
34 |         })
35 | 
36 |     def test_bool(self):
37 |         self.assertEqual(prison.loads('!t'), True)
38 |         self.assertEqual(prison.loads('!f'), False)
39 | 
40 |     def test_invalid(self):
41 |         with self.assertRaises(prison.decoder.ParserException):
42 |             prison.loads('(')
43 | 
44 |     def test_none(self):
45 |         self.assertEqual(prison.loads('!n'), None)
46 | 
47 |     def test_list(self):
48 |         self.assertEqual(prison.loads('!(1,2,3)'), [1, 2, 3])
49 |         self.assertEqual(prison.loads('!()'), [])
50 |         self.assertEqual(prison.loads("!(!t,!f,!n,'')"), [True, False, None, ''])
51 | 
52 |     def test_number(self):
53 |         self.assertEqual(prison.loads('0'), 0)
54 |         self.assertEqual(prison.loads('1.5'), 1.5)
55 |         self.assertEqual(prison.loads('-3'), -3)
56 |         self.assertEqual(prison.loads('1e30'), 1e+30)
57 |         self.assertEqual(prison.loads('1e-30'), 1.0000000000000001e-30)
58 | 
59 |     def test_string(self):
60 |         self.assertEqual(prison.loads("''"), '')
61 |         self.assertEqual(prison.loads('G.'), 'G.')
62 |         self.assertEqual(prison.loads('a'), 'a')
63 |         self.assertEqual(prison.loads("'0a'"), '0a')
64 |         self.assertEqual(prison.loads("'abc def'"), 'abc def')
65 |         self.assertEqual(prison.loads("'-h'"), '-h')
66 |         self.assertEqual(prison.loads('a-z'), 'a-z')
67 |         self.assertEqual(prison.loads("'wow!!'"), 'wow!')
68 |         self.assertEqual(prison.loads('domain.com'), 'domain.com')
69 |         self.assertEqual(prison.loads("'user@domain.com'"), 'user@domain.com')
70 |         self.assertEqual(prison.loads("'US $10'"), 'US $10')
71 |         self.assertEqual(prison.loads("'can!'t'"), "can't")
72 | 


--------------------------------------------------------------------------------
/prison/encoder.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | from six import string_types
  4 | 
  5 | from .utils import quote
  6 | from .constants import ID_OK_RE
  7 | 
  8 | 
  9 | class Encoder(object):
 10 | 
 11 |     def __init__(self):
 12 |         pass
 13 | 
 14 |     @staticmethod
 15 |     def encoder(v):
 16 |         if isinstance(v, list):
 17 |             return Encoder.list
 18 |         elif isinstance(v, string_types):
 19 |             return Encoder.string
 20 |         elif isinstance(v, bool):
 21 |             return Encoder.bool
 22 |         elif isinstance(v, (float, int)):
 23 |             return Encoder.number
 24 |         elif isinstance(v, type(None)):
 25 |             return Encoder.none
 26 |         elif isinstance(v, dict):
 27 |             return Encoder.dict
 28 |         else:
 29 |             raise AssertionError('Unable to encode type: {0}'.format(type(v)))
 30 | 
 31 |     @staticmethod
 32 |     def encode(v):
 33 |         encoder = Encoder.encoder(v)
 34 |         return encoder(v)
 35 | 
 36 |     @staticmethod
 37 |     def list(x):
 38 |         a = ['!(']
 39 |         b = None
 40 |         for i in range(len(x)):
 41 |             v = x[i]
 42 |             f = Encoder.encoder(v)
 43 |             if f:
 44 |                 v = f(v)
 45 |                 if isinstance(v, string_types):
 46 |                     if b:
 47 |                         a.append(',')
 48 |                     a.append(v)
 49 |                     b = True
 50 |         a.append(')')
 51 |         return ''.join(a)
 52 | 
 53 |     @staticmethod
 54 |     def number(v):
 55 |         return str(v).replace('+', '')
 56 | 
 57 |     @staticmethod
 58 |     def none(_):
 59 |         return '!n'
 60 | 
 61 |     @staticmethod
 62 |     def bool(v):
 63 |         return '!t' if v else '!f'
 64 | 
 65 |     @staticmethod
 66 |     def string(v):
 67 |         if v == '':
 68 |             return "''"
 69 | 
 70 |         if ID_OK_RE.match(v):
 71 |             return v
 72 | 
 73 |         def replace(match):
 74 |             if match.group(0) in ["'", '!']:
 75 |                 return '!' + match.group(0)
 76 |             return match.group(0)
 77 | 
 78 |         v = re.sub(r'([\'!])', replace, v)
 79 | 
 80 |         return "'" + v + "'"
 81 | 
 82 |     @staticmethod
 83 |     def dict(x):
 84 |         a = ['(']
 85 |         b = None
 86 |         ks = sorted(x.keys())
 87 |         for i in ks:
 88 |             v = x[i]
 89 |             f = Encoder.encoder(v)
 90 |             if f:
 91 |                 v = f(v)
 92 |                 if isinstance(v, string_types):
 93 |                     if b:
 94 |                         a.append(',')
 95 |                     a.append(Encoder.string(i))
 96 |                     a.append(':')
 97 |                     a.append(v)
 98 |                     b = True
 99 | 
100 |         a.append(')')
101 |         return ''.join(a)
102 | 
103 | 
104 | def encode_array(v):
105 |     if not isinstance(v, list):
106 |         raise AssertionError('encode_array expects a list argument')
107 |     r = dumps(v)
108 |     return r[2, len(r)-1]
109 | 
110 | 
111 | def encode_object(v):
112 |     if not isinstance(v, dict) or v is None or isinstance(v, list):
113 |         raise AssertionError('encode_object expects an dict argument')
114 |     r = dumps(v)
115 |     return r[1, len(r)-1]
116 | 
117 | 
118 | def encode_uri(v):
119 |     return quote(dumps(v))
120 | 
121 | 
122 | def dumps(string):
123 |     return Encoder.encode(string)
124 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Note: To use the 'upload' functionality of this file, you must:
  5 | #   $ pip install twine
  6 | 
  7 | import os
  8 | import sys
  9 | from shutil import rmtree
 10 | 
 11 | from setuptools import find_packages, setup, Command
 12 | 
 13 | # Package metadata.
 14 | NAME = 'prison'
 15 | DESCRIPTION = 'Rison encoder/decoder'
 16 | URL = 'https://github.com/betodealmeida/python-rison'
 17 | EMAIL = 'beto@dealmeida.net'
 18 | AUTHOR = 'Beto Dealmeida'
 19 | 
 20 | REQUIRED = [
 21 |     'six',
 22 | ]
 23 | 
 24 | development_extras = [
 25 |     'nose',
 26 |     'pipreqs',
 27 |     'twine',
 28 | ]
 29 | 
 30 | here = os.path.abspath(os.path.dirname(__file__))
 31 | 
 32 | long_description = ''
 33 | 
 34 | # Load the package's __version__.py module as a dictionary.
 35 | about = {}
 36 | with open(os.path.join(here, NAME, '__version__.py')) as f:
 37 |     exec(f.read(), about)
 38 | 
 39 | 
 40 | class UploadCommand(Command):
 41 |     """Support setup.py upload."""
 42 | 
 43 |     description = 'Build and publish the package.'
 44 |     user_options = []
 45 | 
 46 |     @staticmethod
 47 |     def status(s):
 48 |         """Prints things in bold."""
 49 |         print('\033[1m{0}\033[0m'.format(s))
 50 | 
 51 |     def initialize_options(self):
 52 |         pass
 53 | 
 54 |     def finalize_options(self):
 55 |         pass
 56 | 
 57 |     def run(self):
 58 |         try:
 59 |             self.status('Removing previous builds…')
 60 |             rmtree(os.path.join(here, 'dist'))
 61 |         except OSError:
 62 |             pass
 63 | 
 64 |         self.status('Building Source and Wheel (universal) distribution…')
 65 |         os.system(
 66 |             '{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
 67 | 
 68 |         self.status('Uploading the package to PyPi via Twine…')
 69 |         os.system('twine upload dist/*')
 70 | 
 71 |         sys.exit()
 72 | 
 73 | 
 74 | # Where the magic happens:
 75 | setup(
 76 |     name=NAME,
 77 |     version=about['__version__'],
 78 |     description=DESCRIPTION,
 79 |     long_description=long_description,
 80 |     author=AUTHOR,
 81 |     author_email=EMAIL,
 82 |     url=URL,
 83 |     packages=find_packages(exclude=('tests',)),
 84 |     # If your package is a single module, use this instead of 'packages':
 85 |     # py_modules=['mypackage'],
 86 | 
 87 |     entry_points={},
 88 |     install_requires=REQUIRED,
 89 |     extras_require={
 90 |         'dev': development_extras,
 91 |     },
 92 |     include_package_data=True,
 93 |     license='MIT',
 94 |     classifiers=[
 95 |         # Trove classifiers
 96 |         # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
 97 |         'License :: OSI Approved :: MIT License',
 98 |         'Programming Language :: Python',
 99 |         'Programming Language :: Python :: 2.6',
100 |         'Programming Language :: Python :: 2.7',
101 |         'Programming Language :: Python :: 3',
102 |         'Programming Language :: Python :: 3.3',
103 |         'Programming Language :: Python :: 3.4',
104 |         'Programming Language :: Python :: 3.5',
105 |         'Programming Language :: Python :: 3.6',
106 |         'Programming Language :: Python :: Implementation :: CPython',
107 |         'Programming Language :: Python :: Implementation :: PyPy'
108 |     ],
109 |     # $ setup.py publish support.
110 |     cmdclass={
111 |         'upload': UploadCommand,
112 |     },
113 | )
114 | 


--------------------------------------------------------------------------------
/prison/decoder.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | import re
  4 | 
  5 | from .constants import NEXT_ID_RE, WHITESPACE
  6 | 
  7 | 
  8 | class ParserException(Exception):
  9 |     pass
 10 | 
 11 | 
 12 | class Parser(object):
 13 | 
 14 |     def __init__(self):
 15 |         self.string = None
 16 |         self.index = 0
 17 | 
 18 |     """
 19 |     This parser supports RISON, RISON-A and RISON-O.
 20 |     """
 21 |     def parse(self, string, format=str):
 22 |         if string == "(":
 23 |             raise ParserException("unmatched '('")
 24 |         if format in [list, 'A']:
 25 |             self.string = "!({0})".format(string)
 26 |         elif format in [dict, 'O']:
 27 |             self.string = "({0})".format(string)
 28 |         elif format is str:
 29 |             self.string = string
 30 |         else:
 31 |             raise ValueError("""Parse format should be one of str, list, dict,
 32 |                 'A' (alias for list), '0' (alias for dict).""")
 33 | 
 34 |         self.index = 0
 35 | 
 36 |         value = self.read_value()
 37 |         if self.next():
 38 |             raise ParserException("unable to parse rison string %r" % (string,))
 39 |         return value
 40 | 
 41 |     def read_value(self):
 42 |         c = self.next()
 43 | 
 44 |         if c == '!':
 45 |             return self.parse_bang()
 46 |         if c == '(':
 47 |             return self.parse_open_paren()
 48 |         if c == "'":
 49 |             return self.parse_single_quote()
 50 |         if c in '-0123456789':
 51 |             return self.parse_number()
 52 | 
 53 |         # fell through table, parse as an id
 54 |         s = self.string
 55 |         i = self.index-1
 56 | 
 57 |         m = NEXT_ID_RE.match(s, i)
 58 |         if m:
 59 |             _id = m.group(0)
 60 |             self.index = i + len(_id)
 61 |             return _id
 62 | 
 63 |         if c:
 64 |             raise ParserException("invalid character: '" + c + "'")
 65 |         raise ParserException("empty expression")
 66 | 
 67 |     def parse_array(self):
 68 |         ar = []
 69 |         while 1:
 70 |             c = self.next()
 71 |             if c == ')':
 72 |                 return ar
 73 | 
 74 |             if c is None:
 75 |                 raise ParserException("unmatched '!('")
 76 | 
 77 |             if len(ar):
 78 |                 if c != ',':
 79 |                     raise ParserException("missing ','")
 80 |             elif c == ',':
 81 |                 raise ParserException("extra ','")
 82 |             else:
 83 |                 self.index -= 1
 84 |             n = self.read_value()
 85 |             ar.append(n)
 86 | 
 87 |     def parse_bang(self):
 88 |         s = self.string
 89 |         c = s[self.index]
 90 |         self.index += 1
 91 |         if c is None:
 92 |             raise ParserException('"!" at end of input')
 93 |         if c not in self.bangs:
 94 |             raise ParserException('unknown literal: "!' + c + '"')
 95 |         x = self.bangs[c]
 96 |         if callable(x):
 97 |             return x(self)
 98 | 
 99 |         return x
100 | 
101 |     def parse_open_paren(self):
102 |         count = 0
103 |         o = {}
104 | 
105 |         while 1:
106 |             c = self.next()
107 |             if c == ')':
108 |                 return o
109 |             if count:
110 |                 if c != ',':
111 |                     raise ParserException("missing ','")
112 |             elif c == ',':
113 |                 raise ParserException("extra ','")
114 |             else:
115 |                 self.index -= 1
116 |             k = self.read_value()
117 | 
118 |             if self.next() != ':':
119 |                 raise ParserException("missing ':'")
120 |             v = self.read_value()
121 | 
122 |             o[k] = v
123 |             count += 1
124 | 
125 |     def parse_single_quote(self):
126 |         s = self.string
127 |         i = self.index
128 |         start = i
129 |         segments = []
130 | 
131 |         while 1:
132 |             if i >= len(s):
133 |                 raise ParserException('unmatched "\'"')
134 | 
135 |             c = s[i]
136 |             i += 1
137 |             if c == "'":
138 |                 break
139 | 
140 |             if c == '!':
141 |                 if start < i-1:
142 |                     segments.append(s[start:i-1])
143 |                 c = s[i]
144 |                 i += 1
145 |                 if c in "!'":
146 |                     segments.append(c)
147 |                 else:
148 |                     raise ParserException('invalid string escape: "!'+c+'"')
149 | 
150 |                 start = i
151 | 
152 |         if start < i-1:
153 |             segments.append(s[start:i-1])
154 |         self.index = i
155 |         return ''.join(segments)
156 | 
157 |     # Also any number start (digit or '-')
158 |     def parse_number(self):
159 |         s = self.string
160 |         i = self.index
161 |         start = i-1
162 |         state = 'int'
163 |         permitted_signs = '-'
164 |         transitions = {
165 |             'int+.': 'frac',
166 |             'int+e': 'exp',
167 |             'frac+e': 'exp'
168 |         }
169 | 
170 |         while 1:
171 |             if i >= len(s):
172 |                 i += 1
173 |                 break
174 | 
175 |             c = s[i]
176 |             i += 1
177 | 
178 |             if '0' <= c <= '9':
179 |                 continue
180 | 
181 |             if permitted_signs.find(c) >= 0:
182 |                 permitted_signs = ''
183 |                 continue
184 | 
185 |             state = transitions.get(state + '+' + c.lower(), None)
186 |             if state is None:
187 |                 break
188 |             if state == 'exp':
189 |                 permitted_signs = '-'
190 | 
191 |         self.index = i - 1
192 |         s = s[start:self.index]
193 |         if s == '-':
194 |             raise ParserException("invalid number")
195 |         if re.search('[.e]', s):
196 |             return float(s)
197 |         return int(s)
198 | 
199 |     # return the next non-whitespace character, or undefined
200 |     def next(self):
201 |         s = self.string
202 |         i = self.index
203 | 
204 |         while 1:
205 |             if i == len(s):
206 |                 return None
207 |             c = s[i]
208 |             i += 1
209 |             if c not in WHITESPACE:
210 |                 break
211 | 
212 |         self.index = i
213 |         return c
214 | 
215 |     bangs = {
216 |         't': True,
217 |         'f': False,
218 |         'n': None,
219 |         '(': parse_array
220 |     }
221 | 
222 | 
223 | def loads(s, format=str):
224 |     return Parser().parse(s, format=format)
225 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Prison, a Python encoder/decoder for Rison
  2 | 
  3 | ## Quickstart
  4 | 
  5 | ```bash
  6 | $ pip install prison
  7 | ```
  8 | 
  9 | ```python
 10 | >>> import prison
 11 | >>> prison.dumps({'foo': 'bar'})
 12 | '(foo:bar)'
 13 | >>> prison.loads('(foo:bar)')
 14 | {'foo': 'bar'}
 15 | ```
 16 | 
 17 | ## Rison - Compact Data in URIs
 18 | 
 19 | This page describes *Rison*, a data serialization format optimized for compactness in URIs. Rison is a slight variation of JSON that looks vastly superior after URI encoding. Rison still expresses exactly the same set of data structures as JSON, so data can be translated back and forth without loss or guesswork. 
 20 | 
 21 | You can skip straight to some [examples](#examples), or read on for more background. 
 22 | 
 23 | ### Why another data serialization format?
 24 | 
 25 | Rison is intended to meet the following goals, in roughly this order:
 26 | 
 27 | - Comply with [URI specifications](http://gbiv.com/protocols/uri/rfc/rfc3986.html) and usage
 28 | - Express **nested** data structures
 29 | - Be **human-readable**
 30 | - Be **compact**
 31 | 
 32 | Rison is necessary because the obvious alternatives fail to meet these goals:
 33 | 
 34 | - URI-encoded XML and JSON are illegible and inefficient.
 35 | - [HTML Form encoding](http://www.w3.org/TR/html4/interact/forms.html#form-content-type) rules the web but can only represent a flat list of string pairs.
 36 | - Ian Bicking's [FormEncode](http://formencode.org/) package includes the [variabledecode](http://formencode.org/Validator.html#id16) parser, an interesting convention for form encoding that allows nested lists and dictionaries. However, it becomes inefficient with deeper nesting, and allows no terminal datatypes except strings.
 37 | 
 38 | Note that these goals are shaped almost entirely by the constraints of URIs, though Rison has turned out to be useful in command-line tools as well. In the *body* of an HTTP request or response, length is less critical and URI encoding can be avoided, so JSON would usually be preferred to Rison.
 39 | 
 40 | Given that a new syntax is needed, Rison tries to innovate as little as possible:
 41 | 
 42 | - It uses the same data model as, and a very similar syntax to [JSON](http://json.org/). The Rison grammar is only a slight alteration of the JSON grammar.
 43 | - It introduces very little additional quoting, since we assume that URI encoding will be applied on top of the Rison encoding.
 44 | 
 45 | ### Differences from JSON syntax
 46 | 
 47 |   * no whitespace is permitted except inside quoted strings. 
 48 |   * almost all character escaping is left to the uri encoder. 
 49 |   * single-quotes are used for quoting, but quotes can and should be left off strings when the strings are simple identifiers. 
 50 |   * the `e+` exponent format is forbidden, since `+` is not safe in form values and the plain `e` format is equivalent. 
 51 |   * the `E`, `E+`, and `E` exponent formats are removed. 
 52 |   * object keys should be lexically sorted when encoding. the intent is to improve url cacheability. 
 53 |   * uri-safe tokens are used in place of the standard json tokens: 
 54 |     
 55 |     |rison token|json token|meaning      |
 56 |     |:----------|:---------|:------------|
 57 |     |`'`        |`"`       |string quote |
 58 |     |`!`        |`\`       |string escape|
 59 |     |`(...)`    |`{...}`   |object       |
 60 |     |`!(...)`   |`[...]`   |array        |
 61 |     
 62 |   * the JSON literals that look like identifiers (`true`, `false` and `null`) are represented as `!` sequences: 
 63 |     
 64 |     |rison token|json token|
 65 |     |:----------|:---------|
 66 |     |`!t`       |`true`    |
 67 |     |`!f`       |`false`   |
 68 |     |`!n`       |`null`    |
 69 | 
 70 | The `!` character plays two similar but different roles, as an escape character within strings, and as a marker for special values. This may be confusing.
 71 | 
 72 | Notice that services can distinguish Rison-encoded strings from JSON-encoded strings by checking the first character. Rison structures start with `(` or `!(`. JSON structures start with `[` or `{`. This means that a service which expects a JSON encoded object or array can accept Rison-encoded objects without loss of compatibility.
 73 | 
 74 | ### Interaction with URI %-encoding
 75 | 
 76 | Rison syntax is designed to produce strings that be legible after being [form-encoded](http://www.w3.org/TR/html4/interact/forms.html#form-content-type) for the [query](http://gbiv.com/protocols/uri/rfc/rfc3986.html#query) section of a URI. None of the characters in the Rison syntax need to be URI encoded in that context, though the data itself may require URI encoding. Rison tries to be orthogonal to the %-encoding process - it just defines a string format that should survive %-encoding with very little bloat. Rison quoting is only applied when necessary to quote characters that might otherwise be interpreted as special syntax.
 77 | 
 78 | Note that most URI encoding libraries are very conservative, percent-encoding many characters that are legal according to [RFC
 79 | 3986](http://gbiv.com/protocols/uri/rfc/rfc3986.html). For example, Javascript's builtin `encodeURIComponent()` function will still make Rison strings difficult to read. The rison.js library includes a more tolerant URI encoder.
 80 | 
 81 | Rison uses its own quoting for strings, using the single quote (**`'`**) as a string delimiter and the exclamation point (**`!`**) as the string escape character. Both of these characters are legal in uris. Rison quoting is largely inspired by Unix shell command line parsing.
 82 | 
 83 | All Unicode characters other than **`'`** and **`!`** are legal inside quoted strings. This includes newlines and control characters. Quoting all such characters is left to the %-encoding process.
 84 | 
 85 | ### Interaction with IRIs
 86 | 
 87 | This still needs to be addressed. Advice from an IRI expert would be very welcome.
 88 | 
 89 | Particular attention should be paid to Unicode characters that may be interpreted as Rison syntax characters.
 90 | 
 91 | The *idchars* set is hard to define well. The goal is to include foreign language alphanumeric characters and some punctuation that is common in identifiers (`_`, `-`, `.`, `/`, and others). However, whitespace and most punctuation characters should require quoting. 
 92 | 
 93 | ### Emailing URIs
 94 | 
 95 | Most text emailers are conservative about what they turn into a hyperlink, and they will assume that characters like `(` mean the end of the URI. This results in broken, truncated links.
 96 | 
 97 | This is actually a problem with URI encoding rather than with Rison, but it comes up a lot in practice. You could use Rison with a more aggressive URI encoder to generate emailable URIs. You can also wrap your emailed URIs in angle brackets: `<http://...>` which some mail readers have better luck with.
 98 | 
 99 | ### Further Rationale
100 | 
101 | **Passing data in URIs** is necessary in many situations. Many web services rely on the HTTP GET method, which can take advantage of an extensive deployed caching infrastructure. Browsers also have different capabilities for GET, including the crucial ability to make cross-site requests. It is also very convenient to store the state of a small browser application in the URI.
102 | 
103 | **Human readability** makes everything go faster. Primarily this means avoiding URI encoding whenever possible. This requires careful choice of characters for the syntax, and a tolerant URI encoder that only encodes characters when absolutely necessary.
104 | 
105 | **Compactness** is important because of implementation limits on URI length. Internet Explorer is once again the weakest link at 2K. One could certainly invent a more compact representation by dropping the human-readable constraint and using a compression algorithm.
106 | 
107 | ### Variations
108 | 
109 | There are several variations on Rison which are useful or at least thought-provoking. 
110 | 
111 | #### O-Rison
112 | 
113 | When you know the parameter being encoded will always be an object, always wrapping it in a containing `()` is unnecessary and hard to explain. Until you've dealt with nested structures, the need for parentheses is hard to explain. In this case you may wish to declare that the argument is encoded in *O-Rison*, which can be translated to Rison by wrapping it in parentheses.
114 | 
115 | Here's a URI with a single query argument which is a nested structure: `http://example.com/service?query=(q:'*',start:10,count:10)`
116 | 
117 | This is more legible if you specify that the argument is O-Rison instead of Rison, and leave the containing `()` as implied: `http://example.com/service?query=q:'*',start:10,count:10`
118 | 
119 | This seems to be useful in enough situations that it is worth defining the term *O-Rison*.
120 | 
121 | #### A-Rison
122 | 
123 | Similarly, sometimes you know the value will always be an array. Instead of specifying a Rison argument: `.../?items=!(item1,item2,item3)` you can specify the far more legible A-Rison argument: `.../?items=item1,item2,item3`
124 | 
125 | #### Accepting other delimiters
126 | 
127 | Notice that O-Rison looks almost like a drop-in replacement for [URL form encoding](http://www.w3.org/TR/html4/interact/forms.html#form-content-type), with two substitutions:
128 | 
129 | - `:` for `=`
130 | - `,` for `&`
131 | 
132 | We could expand the Rison parser to treat all of `,`, `&`, and `;` as valid item separators and both `:` and `=` as key-value separators. In this case the vast majority of URI queries would form a flat subset of O-Rison. The exceptions are services that depend on ordering of query parameters or allow duplicate parameter names.
133 | 
134 | This extension doesn't change the parsing of standard Rison strings because `&`, `=`, and `;` are already illegal in Rison identifiers. 
135 | 
136 | ### Examples
137 | 
138 | These examples compare Rison and JSON representations of identical values.
139 | 
140 | | Rison | JSON | URI-encoded Rison | URI-encoded JSON | Roundtrip test | Compression |
141 | | --- | --- | --- | --- | --- | --- |
142 | | `(a:0,b:1)` | `{"a": 0, "b": 1}` | `(a:0,b:1)` | `%7B%22a%22:+0,+%22b%22:+1%7D` | ok | 67.86% |
143 | | `(a:0,b:foo,c:'23skidoo')` | `{"a": 0, "b": "foo", "c": "23skidoo"}` | `(a:0,b:foo,c:'23skidoo')` | `%7B%22a%22:+0,+%22b%22:+%22foo%22,+%22c%22:+%2223skidoo%22%7D` | ok | 60.66% |
144 | | `!t` | `true` | `!t` | `true` | ok | 50.00% |
145 | | `1.5` | `1.5` | `1.5` | `1.5` | ok | 0.00% |
146 | | `-3` | `-3` | `-3` | `-3` | ok | 0.00% |
147 | | `1e30` | `1e+30` | `1e30` | `1e%2B30` | ok | 42.86% |
148 | | `1e-30` | `1e-30` | `1e-30` | `1e-30` | ok | 0.00% |
149 | | `a` | `"a"` | `a` | `%22a%22` | ok | 85.71% |
150 | | `'0a'` | `"0a"` | `'0a'` | `%220a%22` | ok | 50.00% |
151 | | `'abc def'` | `"abc def"` | `%27abc+def%27` | `%22abc+def%22` | ok | 0.00% |
152 | | `(a:0)` | `{"a": 0}` | `(a:0)` | `%7B%22a%22:+0%7D` | ok | 68.75% |
153 | | `(id:!n,type:/common/document)` | `{"id": null, "type": "/common/document"}` | `(id:!n,type:/common/document)` | `%7B%22id%22:+null,+%22type%22:+%22/common/document%22%7D` | ok | 48.21% |
154 | | `!(!t,!f,!n,'')` | `[true, false, null, ""]` | `!(!t,!f,!n,'')` | `%5Btrue,+false,+null,+%22%22%5D` | ok | 54.84% |
155 | | `'-h'` | `"-h"` | `'-h'` | `%22-h%22` | ok | 50.00% |
156 | | `a-z` | `"a-z"` | `a-z` | `%22a-z%22` | ok | 66.67% |
157 | | `'wow!!'` | `"wow!"` | `'wow!!'` | `%22wow%21%22` | ok | 41.67% |
158 | | `domain.com` | `"domain.com"` | `domain.com` | `%22domain.com%22` | ok | 37.50% |
159 | | `'user@domain.com'` | `"user@domain.com"` | `'user@domain.com'` | `%22user@domain.com%22` | ok | 19.05% |
160 | | `'US $10'` | `"US $10"` | `%27US+$10%27` | `%22US+$10%22` | ok | 0.00% |
161 | | `'can!'t'` | `"can't"` | `'can!'t'` | `%22can%27t%22` | ok | 38.46% |
162 | | `'Control-F: '` | `"Control-F: \u0006"` | `%27Control-F:+%06%27` | `%22Control-F:+%5Cu0006%22` | ok | 20.00% |
163 | | `'Unicode: ௫'` | `"Unicode: \u0beb"` | `%27Unicode:+%E0%AF%AB%27` | `%22Unicode:+%5Cu0beb%22` | ok | -4.35% |
164 | 
165 | The compression ratio column shows (1 - encoded_rison_size / encoded_json_size).
166 | 
167 | On a log of Freebase mqlread service URIs, the queries were from 35% to 45% smaller when encoded with Rison.
168 | 
169 | URI encoding is done with a custom URI encoder which is less aggressive than Javascript's built-in `encodeURIComponent()`. 
170 | 
171 | ### Grammar
172 | 
173 | Modified from the [json.org](https://web.archive.org/web/20130910064110/http://json.org/) grammar.
174 | 
175 | - _object_
176 |   - `()`
177 |   - `(` _members_ `)`
178 | - _members_
179 |   - _pair_
180 |   - _pair_ `,` _members_
181 | - _pair_
182 |   - _key_ `:` _value_
183 | - _array_
184 |   - `!()`
185 |   - `!(` _elements_ `)`
186 | - _elements_
187 |   - _value_
188 |   - _value_ `,` _elements_
189 | - _key_
190 |   - _id_
191 |   - _string_
192 | - _value_
193 |   - _id_
194 |   - _string_
195 |   - _number_
196 |   - _object_
197 |   - _array_
198 |   - `!t`
199 |   - `!f`
200 |   - `!n`
201 |     <br>
202 |     　　　　────────────
203 | - _id_
204 |   - _idstart_
205 |   - _idstart_ _idchars_
206 | - _idchars_
207 |   - _idchar_
208 |   - _idchar_ _idchars_
209 | - _idchar_
210 |   - any alphanumeric ASCII character
211 |   - any ASCII character from the set `-` `_` `.` `/` `~`
212 |   - any non-ASCII Unicode character
213 | - _idstart_
214 |   - any _idchar_ not in `-`, _digit_
215 |     <br>
216 |     　　　　────────────
217 | - _string_
218 |   - `''`
219 |   - `'` _strchars_ `'`
220 | - _strchars_
221 |   - _strchar_
222 |   - _strchar_ _strchars_
223 | - _strchar_
224 |   - any Unicode character except ASCII `'` and `!`
225 |   - `!!`
226 |   - `!'`
227 |     <br>
228 |     　　　　────────────
229 | - _number_
230 |   - _int_
231 |   - _int_ _frac_
232 |   - _int_ _exp_
233 |   - _int_ _frac_ _exp_
234 | - _int_
235 |   - _digit_
236 |   - _digit1-9_ _digits_
237 |   - `-` digit
238 |   - `-` digit1-9 digits
239 | - _frac_
240 |   - `.` _digits_
241 | - _exp_
242 |   - _e_ _digits_
243 | - _digits_
244 |   - _digit_
245 |   - _digit_ _digits_
246 | - _e_
247 |   - `e`
248 |   - `e-`
249 | 
250 | ## History
251 | 
252 | Rison original website is now dead. You can find an archive [here](https://web.archive.org/web/20130910064110/http://www.mjtemplate.org/examples/rison.html).
253 | 
254 | Prison was forked from https://github.com/pifantastic/python-rison and updated for Python 3 compatibility. It was named "prison" because the original "rison" package entry still exists in PyPI, although without a downloadable link.
255 | 


--------------------------------------------------------------------------------