├── .gitignore ├── test.sh ├── test_dir.sh ├── json_keys ├── random_json ├── json_compare ├── 2json ├── json2 ├── README.md ├── dir2json └── json2dir /.gitignore: -------------------------------------------------------------------------------- 1 | random/ 2 | .*.type 3 | *.j2 4 | *.json 5 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | echo "Note: this test is endless" 6 | 7 | while true; do 8 | ./random_json 10 > orig.json 9 | python3 ./json2 < orig.json | python3 ./2json > roundtripped.json 10 | ./json_compare orig.json roundtripped.json 11 | printf '.' 12 | done 13 | -------------------------------------------------------------------------------- /test_dir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | echo "Note: this test is endless" 6 | 7 | D="$(mktemp -d)" 8 | 9 | trap "rm -Rf \"$D\"" EXIT 10 | 11 | FS=$(df "$D" | tail -n -1 | cut -d' ' -f 1) 12 | 13 | if [ "$FS" != "tmpfs" ]; then 14 | echo "Note: Usage of tmpfs is recommended for this test (and for json2dir actuallly)" 15 | echo "You can set TMPDIR to some tmpfs location" 16 | fi 17 | 18 | function test2() { 19 | 20 | rm -Rf "$D" 21 | mkdir -p "$D" 22 | ./random_json 6 > orig.json 23 | ./json2dir orig.json "$D"/o 24 | ./dir2json "$D"/o roundtripped.json 25 | ./json_compare orig.json roundtripped.json 26 | printf '.' 27 | 28 | } 29 | 30 | while true; do 31 | export JSON2DIR_SAFEFILENAMES=false 32 | test2 33 | export JSON2DIR_SAFEFILENAMES=true 34 | test2 35 | done 36 | -------------------------------------------------------------------------------- /json_keys: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Created by _Vi in 2013; License: MIT or 2-clause BSD. 4 | 5 | 6 | # output the list of object keys used in the input file 7 | 8 | import json 9 | import sys 10 | 11 | f = sys.stdin 12 | 13 | if len(sys.argv)>1: f=open(sys.argv[1],"rt") 14 | 15 | 16 | dom = json.load(f) 17 | 18 | json_keys = set() 19 | 20 | def save_keys_from_hierarchy_to_json_keys(object_): 21 | if type(object_) == list: 22 | for i in range(0,len(object_)): 23 | subobject = object_[i] 24 | save_keys_from_hierarchy_to_json_keys(subobject) 25 | elif type(object_) == dict: 26 | for k,v in object_.items(): 27 | json_keys.add(k) 28 | save_keys_from_hierarchy_to_json_keys(v) 29 | 30 | save_keys_from_hierarchy_to_json_keys(dom) 31 | 32 | sys.stdout.write("\n".join(list(json_keys))) 33 | -------------------------------------------------------------------------------- /random_json: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import json 3 | import random 4 | import sys 5 | 6 | # Created by _Vi in 2013; License: MIT or 2-clause BSD. 7 | 8 | 9 | maxlev = 3 10 | if len(sys.argv)>1: maxlev = int(sys.argv[1]) 11 | 12 | def random_string(): 13 | l = random.choice(range(0,8)) 14 | s = "" 15 | for i in range(0,l): 16 | s += random.choice(["a", "1", "0", ".", "-", "+", "_", "false", "true", "/", "=", "\\", "e", "{", "}", "[", "]", ",", "\r", "\n", "\n", " ", "\t"]) 17 | return s 18 | 19 | def random_hierarchy(maxlevel): 20 | typeslist = [] 21 | typeslist += [list, dict]*maxlevel 22 | typeslist += [int, float, str, None] 23 | ty = random.choice(typeslist) 24 | if ty == int: 25 | return random.randint(-100,100) 26 | elif ty == float: 27 | return -200+100*random.random() 28 | elif ty == str: 29 | return random_string() 30 | elif ty == None: 31 | return None 32 | elif ty == list: 33 | l = random.choice([maxlevel-1,maxlevel-1,maxlevel]+list(range(0,4))) 34 | li = [] 35 | for i in range(0,l): 36 | li += [random_hierarchy(maxlevel-1)] 37 | return li 38 | elif ty == dict: 39 | l = random.choice([maxlevel-1,maxlevel-1,maxlevel]+list(range(0,3))) 40 | di = {} 41 | for i in range(0,l): 42 | k = random_string() 43 | v = random_hierarchy(maxlevel-1) 44 | di[k]=v 45 | return di 46 | 47 | json.dump(random_hierarchy(maxlev), sys.stdout) 48 | print("") 49 | -------------------------------------------------------------------------------- /json_compare: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | import json 4 | import sys 5 | 6 | # Created by _Vi in 2013; License: MIT or 2-clause BSD. 7 | 8 | 9 | if sys.version_info[0] < 3: 10 | str=unicode 11 | 12 | if len(sys.argv)!=3: 13 | print("Usage: json_compare file1.json file2.json\n", file=sys.stderr) 14 | sys.exit(2) 15 | 16 | f1=open(sys.argv[1],"rt") 17 | f2=open(sys.argv[2],"rt") 18 | 19 | o1 = json.load(f1) 20 | o2 = json.load(f2) 21 | 22 | def compare_floats(x1, x2): 23 | if x1 == x1: return True 24 | k = (x1-x2)/(abs(x1)+abs(x2)) 25 | return k<0.0000001 26 | 27 | def compare_recursive(o1, o2, current_path): 28 | def fail(x): 29 | print(x + " at "+current_path+"\n",file=sys.stderr) 30 | sys.exit(1) 31 | 32 | if type(o1) != type(o2): 33 | fail("Mismatched types "+str(type(o1)) + " and "+str(type(o2))) 34 | elif type(o1) in [int, str, type(None), bool]: 35 | if o1 != o2: 36 | fail(str(o1) +" != "+ str(o2)) 37 | elif type(o1) == float: 38 | if not compare_floats(o1, o2): 39 | fail(str(o1) +" != "+ str(o2)) 40 | elif type(o1) == list: 41 | if len(o1) != len(o2): 42 | fail("Mismatched list length: "+str(len(o1))+" and "+str(len(o2))) 43 | for i in range(0,len(o1)): 44 | compare_recursive(o1[i], o2[i], current_path+str(i)+"/") 45 | elif type(o1) == dict: 46 | if len(o1) != len(o2): 47 | fail("Mismatched obj length: "+str(len(o1))+" and "+str(len(o2))) 48 | for k,v in o1.items(): 49 | if k not in o2: 50 | fail(k + " not found in the second object") 51 | compare_recursive(o1[k], o2[k], current_path+k+"/") 52 | else: 53 | fail("unknown type") 54 | 55 | compare_recursive(o1,o2,"/") 56 | -------------------------------------------------------------------------------- /2json: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Created by _Vi in 2013; License: MIT or 2-clause BSD. 4 | 5 | from __future__ import print_function 6 | import json 7 | import os 8 | import re 9 | import sys 10 | 11 | if sys.version_info[0] < 3: 12 | str=unicode 13 | 14 | f = sys.stdin 15 | 16 | if len(sys.argv)>1: f=open(sys.argv[1],"rt") 17 | 18 | def try_parse_int(x): 19 | try: 20 | return int(x) 21 | except ValueError: 22 | return None 23 | 24 | def try_parse_float(x): 25 | try: 26 | return float(x) 27 | except ValueError: 28 | return None 29 | 30 | # corresponds to "mange_key" in json2 31 | def demangle_key(s): 32 | if len(s) >= 2: 33 | if s[0] == "\\": 34 | if try_parse_float(s[1:]) is not None: 35 | s = s[1:] 36 | s = s\ 37 | .replace("\\n", "\n")\ 38 | .replace("\\r", "\r")\ 39 | .replace("\\t", "\t")\ 40 | .replace("\\|", "/")\ 41 | .replace("\\'", "\"")\ 42 | .replace("\\_", "=")\ 43 | .replace("\\0", "0")\ 44 | .replace("\\!", "\\") 45 | return s 46 | 47 | # this function both assigns to the dereferenced arguments and returs them 48 | def descend_and_set(obj, pathcomps, val): 49 | if (len(pathcomps)==0): 50 | if type(obj) == str or type(obj) == __builtins__.str: 51 | return obj + "\n" + val 52 | return val 53 | (head, tail) = (pathcomps[0], pathcomps[1:]) 54 | listkey = try_parse_int(head) 55 | if listkey is not None and listkey>=0: 56 | if obj is None: 57 | return [None] * listkey + [descend_and_set(None,tail,val)] 58 | elif type(obj) == list: 59 | if(len(obj)<=listkey): 60 | obj += [None] * (listkey - len(obj) + 1) 61 | obj[listkey] = descend_and_set(obj[listkey],tail,val) 62 | return obj 63 | else: 64 | raise Exception("Conflicting types: expected list, got "+str(type(obj))) 65 | dictkey = demangle_key(head) 66 | if obj is None: 67 | return {dictkey: descend_and_set(None,tail,val)} 68 | elif type(obj) == dict: 69 | if dictkey not in obj: 70 | obj[dictkey] = None 71 | obj[dictkey] = descend_and_set(obj[dictkey],tail,val) 72 | return obj 73 | else: 74 | raise Exception("Conflicting types: expected dict, got "+str(type(obj))) 75 | 76 | def deserialize_righthand(st): 77 | i = try_parse_int(st) 78 | if i is not None: return i 79 | f = try_parse_float(st) 80 | if f is not None: return f 81 | 82 | if st == "false": return False 83 | if st == "true": return True 84 | 85 | if st == "null": return None 86 | if st == "[]": return [] 87 | if st == "{}": return {} 88 | 89 | # OK, it is a string 90 | if len(st) > 0: 91 | if st[0]=="\"": st=st[1:] 92 | return st 93 | 94 | 95 | def read_json2_file(f): 96 | answer = None 97 | 98 | for s in f.readlines(): 99 | if len(s): 100 | if s[-1]=='\n': s=s[:-1] 101 | 102 | if s.find("=") == -1: 103 | print("Invalid line "+s+"\n", file=sys.stderr) 104 | continue 105 | 106 | (path,val) = s.split("=",1) 107 | pathcomps = path.split("/") 108 | 109 | if len(pathcomps)>0 and pathcomps[0] == "": 110 | pathcomps = pathcomps[1:] 111 | 112 | answer = descend_and_set(answer, pathcomps, deserialize_righthand(val)) 113 | return answer 114 | 115 | 116 | 117 | obj = read_json2_file(f) 118 | json.dump(obj, sys.stdout) 119 | sys.stdout.write("\n") 120 | -------------------------------------------------------------------------------- /json2: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Created by _Vi in 2013; License: MIT or 2-clause BSD. 4 | 5 | from __future__ import print_function 6 | import json 7 | import os 8 | import re 9 | import sys 10 | 11 | if sys.version_info[0] < 3: 12 | str=unicode 13 | import codecs 14 | sys.stdout = codecs.getwriter('utf8')(sys.stdout) 15 | else: 16 | long=int 17 | 18 | f = sys.stdin 19 | 20 | if len(sys.argv)>1: f=open(sys.argv[1],"rt") 21 | 22 | 23 | dom = json.load(f) 24 | #print(dom) 25 | 26 | def str2bool(v): 27 | # http://stackoverflow.com/a/715468/266720 28 | return v.lower() in ("yes", "true", "t", "1") 29 | 30 | always_output_stubs = str2bool(os.environ.get("JSON2_ALWAYS_STUBS","False")) 31 | always_mark_strings = str2bool(os.environ.get("JSON2_ALWAYS_MARK_STRINGS","False")) 32 | 33 | tricky_strings = set(["" 34 | ,"null" 35 | ,"[]" 36 | ,"{}" 37 | ,"true" 38 | ,"false" 39 | ,"=" 40 | ,"NaN" 41 | ,"Infinity" 42 | ,"-Infinity" 43 | ]) 44 | 45 | def is_number(s): 46 | try: 47 | float(s) 48 | return True 49 | except ValueError: 50 | try: 51 | int(s) # example: "+ 1" on Python 2 52 | return True 53 | except ValueError: 54 | return False 55 | 56 | def can_be_misinterpreted(s): 57 | if always_mark_strings: return True 58 | if s.lower().strip() in tricky_strings: return True 59 | if s.strip()[0] == '"': return True 60 | if is_number(s): return True 61 | return False 62 | 63 | 64 | # ensure the key does not have "/", "=" or "\n" in it 65 | # make sure to run test.sh if you change this fucntion 66 | def mangle_key(s): 67 | if not s: return "" 68 | s = s\ 69 | .replace("\\", "\\!")\ 70 | .replace("\n", "\\n")\ 71 | .replace("\r", "\\r")\ 72 | .replace("\t", "\\t")\ 73 | .replace("/", "\\|")\ 74 | .replace("\"", "\\'")\ 75 | .replace("=", "\\_") 76 | if is_number(s): s="\\"+s; 77 | return s 78 | 79 | 80 | def recursive_outputter(file_, object_, prefix): 81 | if type(object_) == int: 82 | file_.write(prefix+"="+str(object_)+"\n") 83 | elif type(object_) == long: 84 | file_.write(prefix+"="+str(object_)+"\n") 85 | elif type(object_) == float: 86 | file_.write(prefix+"="+str(object_)+"\n") 87 | elif type(object_) == bool: 88 | file_.write(prefix+"="+ ("true" if object_ else "false") + "\n") 89 | elif type(object_) == str: 90 | splits = object_.split("\n") 91 | mark_strings_here = always_mark_strings 92 | if len(splits) > 1: 93 | mark_strings_here = True 94 | for s in object_.split("\n"): 95 | if mark_strings_here or can_be_misinterpreted(s): 96 | s='"'+s 97 | try: 98 | file_.write(prefix+"="+s+"\n") 99 | except: 100 | file_.write(prefix+"=error\n") 101 | elif object_ is None: 102 | file_.write(prefix+"=null\n") 103 | elif type(object_) == list: 104 | if always_output_stubs or len(object_)==0: 105 | file_.write(prefix+"=[]\n") 106 | for i in range(0,len(object_)): 107 | subobject = object_[i] 108 | recursive_outputter(file_, subobject, prefix+"/"+str(i)) 109 | elif type(object_) == dict: 110 | if always_output_stubs or len(object_)==0: 111 | file_.write(prefix+"={}\n") 112 | for k,v in object_.items(): 113 | #print((prefix,k,mangle_key(str(k))),file=sys.stderr) 114 | recursive_outputter(file_, v, prefix+"/"+mangle_key(str(k))) 115 | else: 116 | raise Exception("Unknown type "+str(type(object_))) 117 | 118 | 119 | 120 | recursive_outputter(sys.stdout, dom, "") 121 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | There is a [tool](http://www.ofb.net/~egnor/xml2/) to convert XML files to 2 | intermediate format that allows editing and data extraction to be performed 3 | with simple (not XML-aware) tools, such as regular expressions-based `grep` 4 | or `sed`. It does not solve the general task of transforming XML files, but 5 | still allows text handling tools to go farther than in case of 6 | [direct attempt to use them on XML](http://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags). 7 | 8 | But xml2 is for XML, and somebody may want the similar tool for JSON. 9 | 10 | Here there are two main tools plus several supplementrary ones: 11 | 12 | * json2 - converts JSON to intermediate text-editable format; 13 | * 2json - converts that intermediate format back to JSON; 14 | * json_compare - compares two JSON files to equality and reports the found difference, if any; 15 | * random_json - generates random "tricky" JSON (with confusing strings, empty objects, etc.); 16 | * json_keys - gathers keys used for in objects in the JSON 17 | * test.sh - endless "fuzz test" of `json2 | 2json` using random_json and json_compare. 18 | * json2dir and dir2json - "unpacks" JSON to files and directories and back; 19 | 20 | Tested with Python 2.6, 2.7 and 3.2. 21 | 22 | Example 23 | --- 24 | 25 | JSON file 26 | 27 | ``` 28 | { "mist": "qua\nlity\n", 29 | "fist": [], 30 | "gist": [5,6,"7"], 31 | "...": null, 32 | "test":[[[false]]], 33 | "var":{"lib":{"dpkg":"status"}} 34 | } 35 | ``` 36 | 37 | Output of json2: 38 | 39 | ``` 40 | /...=null 41 | /gist/0=5 42 | /gist/1=6 43 | /gist/2="7 44 | /var/lib/dpkg=status 45 | /fist=[] 46 | /test/0/0/0=false 47 | /mist="qua 48 | /mist="lity 49 | /mist=" 50 | ``` 51 | 52 | Rules of the format 53 | --- 54 | 55 | * Each line must contain "=". The first "=" on each line is always put by 56 | json2, 57 | subsequent "="s may happen in the data extracted from JSON; 58 | * The left part of the line before "=" is "address", the right part after the 59 | first 60 | "=" is "value". 61 | * Value can be string, number, null, float, boolean, empty list or empty object. 62 | * Any value that can't be interpreted as non-string is interpreted as string. 63 | Using `"` character just after `=` forces it to be a string. By default `json2` 64 | uses unescaped strings where possible: `if there_may_be_problems then 65 | prefix_with_" else use_the_string_as_is`. `JSON2_ALWAYS_MARK_STRINGS=true` 66 | overrides this and makes json2 put `"` before any string values. 67 | * Only empty lists and objects must be explicitly mentioned as values. Non-empty 68 | lists and objects still can have "stubs" like `=[]` or `={}` at the respective 69 | address. `JSON2_ALWAYS_STUBS=true` forces stubs for all lists and objects. 70 | * Address is a list of keys separated by "/". The first empty key (before the 71 | first `/`) is ignored, subsequent empty keys are assumed as empty keys of 72 | objects (for example, `{"":{"":""}}` -> `//="`). Each address entry "descends" 73 | from the top-level list of object into it's children (creating intermediate 74 | lists or objects if necessary). 75 | * Numeric keys are used as indexes (starting from 0) of the lists in JSON. 76 | Non-numeric keys are keys for object fileds. 77 | * All keys of object fileds are mangled to preserve assumptions about usage of 78 | `/`, `=`, `"` and `\n` characters and to avoid mistakingly interepreting them 79 | as indexes for lists instead of keys for objects. Mangling rules are not 80 | standard: apart from usual \n, \r and \t, `/ " = \` becomes `\| \' \_ \!`. 81 | Additionally the entire key may be prefixed with `\` if it looks like a number. 82 | * Multiline string values are handled as repeated lines (with the same address). 83 | * Apart from multi-line string values, lines in `2json` input file may be reordered arbitrarily. 84 | 85 | Limitations 86 | --- 87 | 88 | * Order of fields in objects is not preserved; 89 | * 2json is slow. It navigates into the hierarchy of objects and lists from the 90 | root for every line; 91 | * All tools load the entire input file in memory as a tree, 92 | not "streamed". 93 | * Is may be poor option if you need to handle recursive JSON files. 94 | * There may be corner case incompabilitis between json2 format generated when executing by Python 2 and Python 3. For example `+ 1` is not considered a valid number of Python 3, hence not prepended with `\`. 95 | * Round-trip test fails on Python 2 in tricky corner case (involving tricky characters in keys) 96 | 97 | See also 98 | --- 99 | 100 | * [gron](https://github.com/tomnomnom/gron) 101 | -------------------------------------------------------------------------------- /dir2json: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | import json 5 | import os 6 | import re 7 | import sys 8 | 9 | if sys.version_info[0] < 3: 10 | str=unicode 11 | chr=unichr 12 | 13 | 14 | if len(sys.argv)!=3: 15 | print("Usage: json2dir input_directory_or_file_to_be_scanned {file1.json|-}\n", file=sys.stderr) 16 | sys.exit(2) 17 | 18 | fout = sys.stdout 19 | if sys.argv[2] != "-": fout = open(sys.argv[2],"wt") 20 | 21 | di = sys.argv[1] 22 | 23 | j = os.path.join 24 | 25 | def readfile(di, filename, for_type_): 26 | n = None 27 | if for_type_: 28 | n = j(di,"."+filename+".type") 29 | else: 30 | n = j(di,filename) 31 | 32 | with open(n,"rt") as f: 33 | return f.read() 34 | 35 | # FIXME: code dup; these functions are the same as in "2json" 36 | def try_parse_int(x): 37 | try: 38 | return int(x) 39 | except ValueError: 40 | return None 41 | 42 | def try_parse_float(x): 43 | try: 44 | return float(x) 45 | except ValueError: 46 | return None 47 | 48 | def deserialize_righthand(st): 49 | i = try_parse_int(st) 50 | if i is not None: return i 51 | f = try_parse_float(st) 52 | if f is not None: return f 53 | 54 | if st == "false": return False 55 | if st == "true": return True 56 | 57 | if st == "null": return None 58 | if st == "[]": return [] 59 | if st == "{}": return {} 60 | 61 | # OK, it is a string 62 | return st 63 | 64 | def str2bool(v): 65 | # http://stackoverflow.com/a/715468/266720 66 | return v.strip().lower() in ("yes", "true", "t", "1") 67 | 68 | 69 | 70 | def demangle_key_character(m): 71 | c=m.group(0) 72 | if c == "__" : return "_" 73 | if c == "_w_": return " " 74 | if c == "_p_": return "+" 75 | if c == "_m_": return "-" 76 | if c == "_a_": return "*" 77 | if c == "_s_": return "/" 78 | if c == "_d_": return "." 79 | if c == "_n_": return "\n" 80 | if c == "_r_": return "\r" 81 | if c == "_t_": return "\t" 82 | 83 | # assuming it's "_hexdigits_" here 84 | try: 85 | c = c[1:-1] 86 | ordd = int(c, 16) 87 | return chr(ordd) 88 | except: 89 | return m.group(0)[0] 90 | 91 | # filenames (apart from ".type", and ".$key.type") will always be [a-zA-Z0-9_]+ 92 | def demangle_key_safe(s): 93 | if s == "_": return "" 94 | return re.sub("\_[a-zA-Z0-9]*\_", demangle_key_character, s) 95 | 96 | # Assuming any bytes in filenames apart from "/" and "\0" and empty string 97 | def demangle_key_preserving(s): 98 | if s == "_": return "" 99 | if s[0] == "_": 100 | if s[1] == "." or s[1] == "_": 101 | s = s[1:] 102 | s = s.replace("_slash_", "/"); 103 | s = s.replace("_zero_","\u0000"); 104 | s = re.sub("_lit((?:lit)*)slash_", "_\\1slash_", s); 105 | s = re.sub("_lit((?:lit)*)zero_", "_\\1zero_", s); 106 | return s 107 | 108 | 109 | 110 | 111 | 112 | 113 | def build_from_dir(d, fn): 114 | t = None 115 | if os.path.exists(j(d,"."+fn+".type")): 116 | t = readfile(d,fn,True).strip() 117 | 118 | if not os.path.isdir(j(d,fn)): 119 | forcestr = False 120 | if t == "string": 121 | forcestr = True 122 | # the rest of types are not checked 123 | 124 | content = readfile(d,fn,False) 125 | if len(content)>0 and content[-1] == '\n': content = content[:-1] 126 | 127 | if forcestr: return content 128 | return deserialize_righthand(content) 129 | 130 | # is a directory 131 | if os.path.exists(j(d,fn,".type")): 132 | t = readfile(j(d,fn),".type",False).strip() 133 | 134 | if t=="array": 135 | li = [] 136 | i = 0 137 | while os.path.exists(j(d,fn,str(i))): 138 | li.append(build_from_dir(j(d,fn),str(i))) 139 | i = i + 1 140 | return li 141 | else: 142 | # assuming "object" 143 | safefilenames = True 144 | if os.path.exists(j(d,"."+fn+".safefilenames")): 145 | safefilenames = str2bool(readfile(d, "."+fn+".safefilenames", False)) 146 | if os.path.exists(j(d,fn,".safefilenames")): 147 | safefilenames = str2bool(readfile(j(d,fn), ".safefilenames", False)) 148 | 149 | dic = {} 150 | for i in os.listdir(j(d,fn)): 151 | if i[0] == '.': continue 152 | key = demangle_key_safe(i) if safefilenames else demangle_key_preserving(i) 153 | dic[key] = build_from_dir(j(d,fn), i) 154 | return dic 155 | 156 | 157 | (did, dif) = os.path.split(di) 158 | obj = build_from_dir(did, dif) 159 | 160 | 161 | json.dump(obj, fout) 162 | fout.write("\n") 163 | -------------------------------------------------------------------------------- /json2dir: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Created by _Vi in 2013; License: MIT or 2-clause BSD. 4 | 5 | from __future__ import print_function 6 | import json 7 | import os 8 | import re 9 | import sys 10 | 11 | if sys.version_info[0] < 3: 12 | str=unicode 13 | import codecs 14 | open=lambda fn,mode: codecs.open(fn,mode,"UTF-8") 15 | else: 16 | long=int 17 | 18 | f = sys.stdin 19 | 20 | out="." 21 | 22 | if len(sys.argv)!=3: 23 | print("Usage: json2dir {file1.json|-} output_file_or_directory_to_be_created\n", file=sys.stderr) 24 | sys.exit(2) 25 | 26 | if sys.argv[1] != "-": f = open(sys.argv[1],"rt") 27 | out = sys.argv[2] 28 | 29 | 30 | dom = json.load(f) 31 | #print(dom) 32 | 33 | def str2bool(v): 34 | # http://stackoverflow.com/a/715468/266720 35 | return v.lower() in ("yes", "true", "t", "1") 36 | 37 | option_safefilenames = str2bool(os.environ.get("JSON2DIR_SAFEFILENAMES","True")) 38 | option_allowunicode = str2bool(os.environ.get("JSON2DIR_ALLOWUNICODE","True")) 39 | option_alwaystypefiles = str2bool(os.environ.get("JSON2DIR_ALWAYSTYPEFILES","False")) 40 | 41 | # FIXME: Code duplication with json2 42 | tricky_strings = set(["" 43 | ,"null" 44 | ,"[]" 45 | ,"{}" 46 | ,"true" 47 | ,"false" 48 | ,"=" 49 | ,"NaN" 50 | ,"Infinity" 51 | ,"-Infinity" 52 | ]) 53 | 54 | def is_number(s): 55 | try: 56 | float(s) 57 | return True 58 | except ValueError: 59 | return False 60 | 61 | def can_be_misinterpreted(s): 62 | if option_alwaystypefiles: return True 63 | if s.lower().strip() in tricky_strings: return True 64 | if s.strip()[0] == '"': return True 65 | if is_number(s): return True 66 | return False 67 | 68 | 69 | def mangle_key_character(m): 70 | c=m.group(0)[0] 71 | if c == "_": return "__" 72 | if c == " ": return "_w_" 73 | if c == "+": return "_p_" 74 | if c == "-": return "_m_" 75 | if c == "*": return "_a_" 76 | if c == "/": return "_s_" 77 | if c == ".": return "_d_" 78 | if c == "\n": return "_n_" 79 | if c == "\r": return "_r_" 80 | if c == "\t": return "_t_" 81 | if ord(c)>0x7F and option_allowunicode: 82 | return c 83 | return "_" + ("%x" % ord(c)) + "_" 84 | 85 | # filenames (apart from ".type", and ".$key.type") will always be [a-zA-Z0-9_]+ 86 | def mangle_key_safe(s): 87 | if s == "": return "_" 88 | return re.sub("[^a-zA-Z0-9]", mangle_key_character, s) 89 | 90 | # Assuming any bytes in filenames apart from "/" and "\0" and empty string 91 | def mangle_key_preserving(s): 92 | if s == "": return "_" 93 | s = re.sub("_((?:lit)*)slash_", "_lit\\1slash_", s); 94 | s = re.sub("_((?:lit)*)zero_", "_lit\\1zero_", s); 95 | s = s.replace("/","_slash_"); 96 | s = s.replace("\u0000","_zero_"); 97 | if s[0] in "_.": s = "_"+s 98 | return s 99 | 100 | mangle_key = mangle_key_safe if option_safefilenames else mangle_key_preserving 101 | 102 | def trymkdir(x): 103 | try: 104 | os.mkdir(x) 105 | except OSError: 106 | pass; # assuming it's "Already exists" error 107 | 108 | j = os.path.join 109 | 110 | def writefile(outdir, filename, for_type_, content): 111 | n = None 112 | if for_type_: 113 | n = j(outdir,"."+filename+".type") 114 | else: 115 | n = j(outdir,filename) 116 | 117 | with open(n,"wt") as f: 118 | f.write(content) 119 | 120 | def trywritefile(outdir, filename, for_type_, content): 121 | try: 122 | writefile(outdir, filename, for_type_, content) 123 | except: 124 | pass 125 | 126 | def recursive_outputter(outdir, object_, filename): 127 | if option_alwaystypefiles or os.path.exists(j(outdir,"."+filename+".type")): 128 | writefile(outdir, filename, True, { 129 | int:"number\n" 130 | ,str:"string\n" 131 | ,bool:"boolean\n" 132 | ,float:"number\n" 133 | ,type(None):"null\n" 134 | ,list:"array\n" 135 | ,dict:"object\n" 136 | }[type(object_)]); 137 | if type(object_) in [int, float,long]: 138 | writefile(outdir, filename, False, str(object_)+"\n"); 139 | elif type(object_) == bool: 140 | writefile(outdir, filename, False, 141 | "true\n" if object_ else "false\n") 142 | elif type(object_) == str: 143 | writefile(outdir, filename, False, object_+"\n") 144 | if not option_alwaystypefiles and can_be_misinterpreted(object_): 145 | writefile(outdir, filename, True, "string\n") 146 | elif object_ is None: 147 | writefile(outdir, filename, False, "null\n") 148 | elif type(object_) == list: 149 | trywritefile(outdir, filename, True, "array\n") 150 | trymkdir(j(outdir,filename)) 151 | 152 | # additional type declaration in case of part of JSON file copied somewhere 153 | writefile(j(outdir,filename),".type", False, "array\n") 154 | 155 | for i in range(0,len(object_)): 156 | subobject = object_[i] 157 | recursive_outputter(j(outdir,filename), subobject, str(i)) 158 | elif type(object_) == dict: 159 | trymkdir(j(outdir,filename)) 160 | if not option_safefilenames: 161 | writefile(j(outdir,filename),".safefilenames", False, "false\n") 162 | if option_alwaystypefiles and option_safefilenames: 163 | writefile(j(outdir,filename),".safefilenames", False, "true\n") 164 | if option_alwaystypefiles or os.path.exists(j(outdir,filename,".type")) : 165 | writefile(j(outdir,filename),".type", False, "object\n") 166 | 167 | 168 | for k,v in object_.items(): 169 | fn = mangle_key(k) 170 | recursive_outputter(j(outdir,filename), v, fn) 171 | else: 172 | raise Exception("Unknown type "+str(type(object_))) 173 | 174 | (outdir, outfile) = os.path.split(out) 175 | recursive_outputter(outdir, dom, outfile) 176 | --------------------------------------------------------------------------------