├── .editorconfig ├── README.rst └── generate-scala-code.py /.editorconfig: -------------------------------------------------------------------------------- 1 | [*.scala] 2 | indent_style = space 3 | indent_size = 2 4 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Here's how I generate the scala code:: 2 | 3 | python generate-scala-code.py crash_report.json 4 | 5 | Or from a remote url (requires ``requests``):: 6 | 7 | python generate-scala-code.py https://github.com/mozilla/socorro/raw/master/socorro/schemas/crash_report.json 8 | 9 | To generate Python code (instead of Scala) add:: 10 | 11 | --python 12 | -------------------------------------------------------------------------------- /generate-scala-code.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import json 4 | from pprint import pprint 5 | 6 | import re 7 | 8 | 9 | START_AT = '' #' ' * 6 10 | 11 | 12 | def camelize(s): 13 | def replacer(match): 14 | return match.group().replace('_', '').upper() 15 | 16 | return re.sub('_([a-z])', replacer, s) 17 | 18 | 19 | def get_rows(schema, depth=1, python=False): 20 | if 'properties' not in schema: 21 | raise Exception('HELL!!') 22 | 23 | def render_bool(thing, value): 24 | if python: 25 | return '{}={}'.format(thing, value)#TEMP 26 | else: 27 | value = value and 'true' or 'false' 28 | return '{} = {}'.format(thing, value) 29 | 30 | def render_type(thing): 31 | if python: 32 | return thing + '()' 33 | else: 34 | return thing 35 | 36 | for prop in sorted(schema['properties']): 37 | meta = schema['properties'][prop] 38 | if 'string' in meta['type']: 39 | if 'integer' in meta['type']: 40 | print >>sys.stderr, ( 41 | "NOTE!! {!r} allows the type to be String AND Integer".format( 42 | prop 43 | ) 44 | ) 45 | yield 'StructField("{}", {}, {})'.format( 46 | prop, 47 | render_type('StringType'), 48 | render_bool('nullable', 'null' in meta['type']) 49 | ) 50 | elif 'integer' in meta['type']: 51 | yield 'StructField("{}", {}, {})'.format( 52 | prop, 53 | render_type('IntegerType'), 54 | render_bool('nullable', 'null' in meta['type']) 55 | ) 56 | elif 'boolean' in meta['type']: 57 | yield 'StructField("{}", {}, {})'.format( 58 | prop, 59 | render_type('BooleanType'), 60 | render_bool('nullable', 'null' in meta['type']) 61 | ) 62 | elif meta['type'] == 'array' and 'items' not in meta: 63 | # Assuming strings in the array 64 | # XXX what does this containsNull = false mean?! 65 | yield ( 66 | 'StructField("{}", ArrayType({}, {}' 67 | '), {})'.format( 68 | prop, 69 | render_type('StringType'), 70 | render_bool('containsNull', False), 71 | render_bool('nullable', True) 72 | ) 73 | ) 74 | elif meta['type'] == 'array' and 'items' in meta: 75 | # Assuming strings in the array 76 | # XXX what does this containsNull = false mean?! 77 | # if 'properties' not in meta: 78 | # meta = definitions[prop] 79 | rows = list(get_rows(meta['items'], depth=depth + 1, python=python)) 80 | yield ( 81 | 'StructField("{}", ArrayType({}), {})'.format( 82 | prop, 83 | write_rows(rows, ' ' * (2 * (depth + 1)), python=python), 84 | render_bool('nullable', True) 85 | ) 86 | ) 87 | elif meta['type'] == 'object': 88 | rows = list(get_rows(meta, depth=depth + 1, python=python)) 89 | yield 'StructField("{}", {}, {})'.format( 90 | prop, 91 | write_rows(rows, ' ' * (2 * (depth + 1)), python=python), 92 | render_bool('nullable', True) 93 | ) 94 | 95 | else: 96 | print >>sys.stderr, "TROUBLE", prop, str(meta)[:100] 97 | 98 | 99 | def write_rows(rows, indentation=' ' * 2, python=False): 100 | if python: 101 | code = 'StructType([\n' 102 | else: 103 | code = 'StructType(List(\n' 104 | 105 | for row in rows: 106 | code += '{}{}'.format(indentation, row) 107 | code += ',\n' 108 | code = code.rstrip().rstrip(',') 109 | if python: 110 | code += '\n{}])'.format(indentation[:-2]) 111 | else: 112 | code += '\n{}))'.format(indentation[:-2]) 113 | 114 | return code 115 | 116 | 117 | def replace_definitions(schema, definitions): 118 | if 'properties' in schema: 119 | for prop, meta in schema['properties'].items(): 120 | replace_definitions(meta, definitions) 121 | elif 'items' in schema: 122 | if '$ref' in schema['items']: 123 | ref = schema['items']['$ref'].split('/')[-1] 124 | schema['items'] = definitions[ref] 125 | replace_definitions(schema['items'], definitions) 126 | else: 127 | replace_definitions(schema['items'], definitions) 128 | elif '$ref' in str(schema): 129 | print str(schema) 130 | raise Problem 131 | 132 | 133 | def run(schema_uri, python=False): 134 | if '://' in schema_uri and schema_uri.startswith('http'): 135 | import requests 136 | schema = requests.get(schema_uri).json() 137 | else: 138 | with open(schema_uri) as f: 139 | schema = json.load(f) 140 | replace_definitions(schema, schema['definitions']) 141 | 142 | assert '$ref' not in str(schema), 're-write didnt work' 143 | 144 | rows = list(get_rows(schema, python=python)) 145 | indentation = '' 146 | code = '' 147 | code += '\n' 148 | code += write_rows(rows, python=python) 149 | 150 | print code 151 | 152 | return 0 153 | 154 | 155 | def main(): 156 | parser = argparse.ArgumentParser() 157 | parser.add_argument( 158 | 'schema', 159 | help="Location of the crash_report.json schema file (URL or file)", 160 | # nargs='+' 161 | ) 162 | parser.add_argument( 163 | '--python', 164 | help="Generate Python code instead of Scala code", 165 | action="store_true", 166 | ) 167 | args = parser.parse_args() 168 | return run( 169 | args.schema, 170 | python=args.python, 171 | ) 172 | 173 | if __name__ == '__main__': 174 | sys.exit(main()) 175 | --------------------------------------------------------------------------------