├── data ├── to_label │ ├── .gitkeep │ └── scale.json ├── movies.csv ├── training │ ├── labeler.json │ ├── schema.json │ ├── manual.json │ └── saket2018.json ├── compassql_examples │ ├── input │ │ ├── cql_4.json │ │ ├── 1d-N.json │ │ ├── 1d-O.json │ │ ├── 1d-N-mark.json │ │ ├── 1d-Q-mark.json │ │ ├── 1d-T.json │ │ ├── scale-type.json │ │ ├── bin-maxbins.json │ │ ├── 1d-Q.json │ │ ├── scatter.json │ │ ├── cql_2.json │ │ ├── cql_3.json │ │ ├── 2d-NxN.json │ │ ├── 2d-NxQ.json │ │ ├── showme_automatic-mark.json │ │ ├── cql_1.json │ │ ├── voyager_exact-match.json │ │ ├── 2d-OxQ.json │ │ ├── 2d-QxT.json │ │ ├── rank-by-feature_histogram.json │ │ ├── 2d-QxQ.json │ │ ├── showme_add-to-sheet.json │ │ ├── 3d-NxOxQ.json │ │ ├── 3d-OxQxQ.json │ │ └── 3d-NxQxQ.json │ ├── output │ │ ├── 1d-T.json │ │ ├── cql_4.json │ │ ├── 1d-Q-mark.json │ │ ├── 1d-Q.json │ │ ├── scale-type.json │ │ ├── 1d-N.json │ │ ├── 2d-QxQ.json │ │ ├── 1d-N-mark.json │ │ ├── 1d-O.json │ │ ├── 2d-NxQ.json │ │ ├── 2d-OxQ.json │ │ ├── cql_1.json │ │ ├── cql_3.json │ │ ├── rank-by-feature_histogram.json │ │ ├── voyager_exact-match.json │ │ ├── showme_automatic-mark.json │ │ ├── 2d-QxT.json │ │ ├── cql_2.json │ │ ├── scatter.json │ │ ├── bin-maxbins.json │ │ ├── 3d-NxOxQ.json │ │ ├── 3d-OxQxQ.json │ │ ├── 3d-NxQxQ.json │ │ ├── 2d-NxN.json │ │ └── showme_add-to-sheet.json │ └── run_compassql.js ├── spec_pairs │ ├── data.json │ ├── draco_cql.json │ └── draco_cql_default_weights.json ├── driving.json ├── weights.json ├── random_data.json └── cars.csv ├── malfoy ├── generate │ ├── __init__.py │ ├── screenshots │ │ ├── databased_1.png │ │ ├── databased_2.png │ │ ├── interactionbased_1.png │ │ └── interactionbased_2.png │ ├── define │ │ ├── definitions.json │ │ ├── type_distribution.json │ │ ├── dummy_schema.json │ │ ├── distributions.json │ │ └── interactions.json │ ├── spec.py │ ├── prop_objects.py │ ├── generator.py │ ├── run.py │ ├── README.md │ └── model.py ├── learn │ ├── __init__.py │ ├── populate_data.py │ ├── mln.py │ ├── helper.py │ ├── generator.py │ ├── playground.py │ ├── linear.py │ └── data_util.py └── __init__.py ├── setup.cfg ├── .gitignore ├── requirements.txt ├── README.md ├── setup.py ├── .travis.yml └── tests └── learn ├── test_learn_helper.py └── test_data_util.py /data/to_label/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /malfoy/generate/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /malfoy/learn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test=pytest -------------------------------------------------------------------------------- /malfoy/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.1" 2 | -------------------------------------------------------------------------------- /data/movies.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwdata/draco-learn/master/data/movies.csv -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/* 2 | .eggs/* 3 | .mypy_cache 4 | .pytest_cache 5 | dist 6 | malfoy.egg-info -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | agate 2 | clyngor 3 | draco 4 | pandas 5 | pytest 6 | RandomWords 7 | scipy 8 | sklearn 9 | -------------------------------------------------------------------------------- /data/training/labeler.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "./schema.json", 3 | "source": "labeler", 4 | "data": [ 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /malfoy/generate/screenshots/databased_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwdata/draco-learn/master/malfoy/generate/screenshots/databased_1.png -------------------------------------------------------------------------------- /malfoy/generate/screenshots/databased_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwdata/draco-learn/master/malfoy/generate/screenshots/databased_2.png -------------------------------------------------------------------------------- /malfoy/generate/screenshots/interactionbased_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwdata/draco-learn/master/malfoy/generate/screenshots/interactionbased_1.png -------------------------------------------------------------------------------- /malfoy/generate/screenshots/interactionbased_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwdata/draco-learn/master/malfoy/generate/screenshots/interactionbased_2.png -------------------------------------------------------------------------------- /malfoy/generate/define/definitions.json: -------------------------------------------------------------------------------- 1 | { 2 | "topLevelProps": ["mark"], 3 | "encodingProps": ["interesting", "aggregate", "bin", "scale", "timeUnit", "stack"] 4 | } 5 | -------------------------------------------------------------------------------- /data/compassql_examples/input/cql_4.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "?", 6 | "encodings": [ 7 | { 8 | "channel": "?", 9 | "field": "Horsepower" 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /data/compassql_examples/input/1d-N.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "?", 7 | "field": "Origin", 8 | "type": "nominal" 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/1d-O.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "?", 7 | "field": "Cylinders", 8 | "type": "ordinal" 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/1d-N-mark.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "x", 7 | "field": "Origin", 8 | "type": "nominal" 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/1d-Q-mark.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "x", 7 | "field": "Horsepower", 8 | "type": "quantitative" 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/1d-T.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/movies.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "?", 7 | "timeUnit": "?", 8 | "field": "Release_Date", 9 | "type": "temporal" 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/scale-type.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "tick", 4 | "encodings": [ 5 | { 6 | "channel": "x", 7 | "scale": {"type": "?"}, 8 | "field": "Miles_per_Gallon", 9 | "type": "quantitative" 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/bin-maxbins.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "bar", 4 | "encodings": [ 5 | { 6 | "channel": "x", 7 | "bin": {"maxbins": "?"}, 8 | "field": "Miles_per_Gallon", 9 | "type": "quantitative" 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/1d-Q.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "?", 7 | "bin": "?", 8 | "aggregate": "?", 9 | "field": "Miles_per_Gallon", 10 | "type": "quantitative" 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/scatter.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "?", 6 | "encodings": [ 7 | {"field": "Acceleration", "channel": "?", "type": "?"}, 8 | {"field": "Horsepower", "channel": "?", "type": "?"} 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Draco-Learn 2 | 3 | [![Build Status](https://travis-ci.com/uwdata/draco-learn.svg?branch=master)](https://travis-ci.com/uwdata/draco-learn) 4 | [![PyPi](https://img.shields.io/pypi/v/malfoy.svg)](https://pypi.org/project/malfoy/) 5 | 6 | This repository houses the machine learning tools behind Draco-Learn. 7 | -------------------------------------------------------------------------------- /data/compassql_examples/input/cql_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "?", 6 | "encodings": [ 7 | { 8 | "channel": "?", 9 | "field": "Horsepower" 10 | }, 11 | { 12 | "channel": "?", 13 | "field": "Acceleration" 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /data/compassql_examples/input/cql_3.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "?", 6 | "encodings": [ 7 | { 8 | "channel": "?", 9 | "aggregate": "mean", 10 | "field": "Horsepower" 11 | }, 12 | { 13 | "channel": "?", 14 | "field": "Cylinders" 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /data/compassql_examples/input/2d-NxN.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/movies.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "?", 7 | "field": "Major_Genre", 8 | "type": "nominal" 9 | },{ 10 | "channel": "?", 11 | "field": "Creative_Type", 12 | "type": "nominal" 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/2d-NxQ.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "?", 7 | "field": "Origin", 8 | "type": "nominal" 9 | },{ 10 | "channel": "?", 11 | "bin": "?", 12 | "aggregate": "?", 13 | "field": "Horsepower", 14 | "type": "quantitative" 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/showme_automatic-mark.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "x", 7 | "aggregate": "mean", 8 | "field": "Horsepower", 9 | "type": "quantitative" 10 | },{ 11 | "channel": "y", 12 | "field": "Cylinders", 13 | "type": "ordinal" 14 | } 15 | ] 16 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/cql_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "?", 6 | "encodings": [ 7 | { 8 | "channel": "x", 9 | "aggregate": "mean", 10 | "field": "Horsepower", 11 | "type": "quantitative" 12 | }, 13 | { 14 | "channel": "y", 15 | "field": "Cylinders", 16 | "type": "ordinal" 17 | } 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /data/compassql_examples/input/voyager_exact-match.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "?", 7 | "field": "Cylinders", 8 | "type": "ordinal" 9 | },{ 10 | "channel": "?", 11 | "bin": "?", 12 | "aggregate": "?", 13 | "field": "Horsepower", 14 | "type": "quantitative" 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/1d-T.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/movies.json" 4 | }, 5 | "mark": "point", 6 | "encoding": { 7 | "x": { 8 | "field": "Release_Date", 9 | "type": "temporal", 10 | "scale": {} 11 | } 12 | }, 13 | "config": { 14 | "overlay": { 15 | "line": true 16 | }, 17 | "scale": { 18 | "useUnaggregatedDomain": true 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/cql_4.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "tick", 6 | "encoding": { 7 | "x": { 8 | "field": "Horsepower", 9 | "type": "quantitative", 10 | "scale": {} 11 | } 12 | }, 13 | "config": { 14 | "overlay": { 15 | "line": true 16 | }, 17 | "scale": { 18 | "useUnaggregatedDomain": true 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/2d-OxQ.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "?", 7 | "field": "Cylinders", 8 | "type": "ordinal" 9 | },{ 10 | "channel": "?", 11 | "bin": "?", 12 | "aggregate": "?", 13 | "field": "Horsepower", 14 | "type": "quantitative" 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/1d-Q-mark.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "tick", 6 | "encoding": { 7 | "x": { 8 | "field": "Horsepower", 9 | "type": "quantitative", 10 | "scale": {} 11 | } 12 | }, 13 | "config": { 14 | "overlay": { 15 | "line": true 16 | }, 17 | "scale": { 18 | "useUnaggregatedDomain": true 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/1d-Q.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "tick", 6 | "encoding": { 7 | "y": { 8 | "field": "Miles_per_Gallon", 9 | "type": "quantitative", 10 | "scale": {} 11 | } 12 | }, 13 | "config": { 14 | "overlay": { 15 | "line": true 16 | }, 17 | "scale": { 18 | "useUnaggregatedDomain": true 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/2d-QxT.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/movies.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "?", 7 | "bin": "?", 8 | "aggregate": "?", 9 | "field": "IMDB_Rating", 10 | "type": "quantitative" 11 | },{ 12 | "channel": "?", 13 | "timeUnit": "?", 14 | "field": "Release_Date", 15 | "type": "temporal" 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/rank-by-feature_histogram.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "?", 7 | "bin": "?", 8 | "timeUnit": "?", 9 | "field": "?", 10 | "type": "?" 11 | }, 12 | { 13 | "channel": "?", 14 | "field": "*", 15 | "aggregate": "count", 16 | "type": "quantitative" 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/scale-type.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "tick", 6 | "encoding": { 7 | "x": { 8 | "field": "Miles_per_Gallon", 9 | "type": "quantitative", 10 | "scale": {} 11 | } 12 | }, 13 | "config": { 14 | "overlay": { 15 | "line": true 16 | }, 17 | "scale": { 18 | "useUnaggregatedDomain": true 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/2d-QxQ.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "?", 7 | "bin": "?", 8 | "aggregate": "?", 9 | "field": "Miles_per_Gallon", 10 | "type": "quantitative" 11 | },{ 12 | "channel": "?", 13 | "bin": "?", 14 | "aggregate": "?", 15 | "field": "Horsepower", 16 | "type": "quantitative" 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/showme_add-to-sheet.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "x", 7 | "aggregate": "mean", 8 | "field": "Horsepower", 9 | "type": "quantitative" 10 | },{ 11 | "channel": "y", 12 | "field": "Cylinders", 13 | "type": "ordinal" 14 | },{ 15 | "channel": "?", 16 | "field": "Origin", 17 | "type": "nominal" 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /data/compassql_examples/input/3d-NxOxQ.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "?", 7 | "field": "Cylinders", 8 | "type": "ordinal" 9 | },{ 10 | "channel": "?", 11 | "field": "Origin", 12 | "type": "nominal" 13 | },{ 14 | "channel": "?", 15 | "bin": "?", 16 | "aggregate": "?", 17 | "field": "Acceleration", 18 | "type": "quantitative" 19 | } 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /data/compassql_examples/output/1d-N.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "bar", 6 | "encoding": { 7 | "y": { 8 | "field": "Origin", 9 | "type": "nominal" 10 | }, 11 | "x": { 12 | "aggregate": "count", 13 | "field": "*", 14 | "type": "quantitative" 15 | } 16 | }, 17 | "config": { 18 | "overlay": { 19 | "line": true 20 | }, 21 | "scale": { 22 | "useUnaggregatedDomain": true 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/2d-QxQ.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "point", 6 | "encoding": { 7 | "x": { 8 | "field": "Miles_per_Gallon", 9 | "type": "quantitative" 10 | }, 11 | "y": { 12 | "field": "Horsepower", 13 | "type": "quantitative" 14 | } 15 | }, 16 | "config": { 17 | "overlay": { 18 | "line": true 19 | }, 20 | "scale": { 21 | "useUnaggregatedDomain": true 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /data/compassql_examples/output/1d-N-mark.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "bar", 6 | "encoding": { 7 | "x": { 8 | "field": "Origin", 9 | "type": "nominal" 10 | }, 11 | "y": { 12 | "aggregate": "count", 13 | "field": "*", 14 | "type": "quantitative" 15 | } 16 | }, 17 | "config": { 18 | "overlay": { 19 | "line": true 20 | }, 21 | "scale": { 22 | "useUnaggregatedDomain": true 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/1d-O.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "bar", 6 | "encoding": { 7 | "y": { 8 | "field": "Cylinders", 9 | "type": "ordinal" 10 | }, 11 | "x": { 12 | "aggregate": "count", 13 | "field": "*", 14 | "type": "quantitative" 15 | } 16 | }, 17 | "config": { 18 | "overlay": { 19 | "line": true 20 | }, 21 | "scale": { 22 | "useUnaggregatedDomain": true 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/2d-NxQ.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "tick", 6 | "encoding": { 7 | "y": { 8 | "field": "Origin", 9 | "type": "nominal" 10 | }, 11 | "x": { 12 | "field": "Horsepower", 13 | "type": "quantitative", 14 | "scale": {} 15 | } 16 | }, 17 | "config": { 18 | "overlay": { 19 | "line": true 20 | }, 21 | "scale": { 22 | "useUnaggregatedDomain": true 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/2d-OxQ.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "tick", 6 | "encoding": { 7 | "y": { 8 | "field": "Cylinders", 9 | "type": "ordinal" 10 | }, 11 | "x": { 12 | "field": "Horsepower", 13 | "type": "quantitative", 14 | "scale": {} 15 | } 16 | }, 17 | "config": { 18 | "overlay": { 19 | "line": true 20 | }, 21 | "scale": { 22 | "useUnaggregatedDomain": true 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/cql_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "bar", 6 | "encoding": { 7 | "x": { 8 | "aggregate": "mean", 9 | "field": "Horsepower", 10 | "type": "quantitative" 11 | }, 12 | "y": { 13 | "field": "Cylinders", 14 | "type": "ordinal" 15 | } 16 | }, 17 | "config": { 18 | "overlay": { 19 | "line": true 20 | }, 21 | "scale": { 22 | "useUnaggregatedDomain": true 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/cql_3.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "bar", 6 | "encoding": { 7 | "x": { 8 | "aggregate": "mean", 9 | "field": "Horsepower", 10 | "type": "quantitative" 11 | }, 12 | "y": { 13 | "field": "Cylinders", 14 | "type": "nominal" 15 | } 16 | }, 17 | "config": { 18 | "overlay": { 19 | "line": true 20 | }, 21 | "scale": { 22 | "useUnaggregatedDomain": true 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/rank-by-feature_histogram.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "bar", 6 | "encoding": { 7 | "y": { 8 | "field": "Origin", 9 | "type": "nominal" 10 | }, 11 | "x": { 12 | "aggregate": "count", 13 | "field": "*", 14 | "type": "quantitative" 15 | } 16 | }, 17 | "config": { 18 | "overlay": { 19 | "line": true 20 | }, 21 | "scale": { 22 | "useUnaggregatedDomain": true 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/voyager_exact-match.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "tick", 6 | "encoding": { 7 | "y": { 8 | "field": "Cylinders", 9 | "type": "ordinal" 10 | }, 11 | "x": { 12 | "field": "Horsepower", 13 | "type": "quantitative", 14 | "scale": {} 15 | } 16 | }, 17 | "config": { 18 | "overlay": { 19 | "line": true 20 | }, 21 | "scale": { 22 | "useUnaggregatedDomain": true 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/showme_automatic-mark.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "bar", 6 | "encoding": { 7 | "x": { 8 | "aggregate": "mean", 9 | "field": "Horsepower", 10 | "type": "quantitative" 11 | }, 12 | "y": { 13 | "field": "Cylinders", 14 | "type": "ordinal" 15 | } 16 | }, 17 | "config": { 18 | "overlay": { 19 | "line": true 20 | }, 21 | "scale": { 22 | "useUnaggregatedDomain": true 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/2d-QxT.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/movies.json" 4 | }, 5 | "mark": "point", 6 | "encoding": { 7 | "y": { 8 | "field": "IMDB_Rating", 9 | "type": "quantitative", 10 | "scale": {} 11 | }, 12 | "x": { 13 | "field": "Release_Date", 14 | "type": "temporal", 15 | "scale": {} 16 | } 17 | }, 18 | "config": { 19 | "overlay": { 20 | "line": true 21 | }, 22 | "scale": { 23 | "useUnaggregatedDomain": true 24 | } 25 | } 26 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/cql_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "point", 6 | "encoding": { 7 | "x": { 8 | "field": "Horsepower", 9 | "type": "quantitative", 10 | "scale": {} 11 | }, 12 | "y": { 13 | "field": "Acceleration", 14 | "type": "quantitative", 15 | "scale": {} 16 | } 17 | }, 18 | "config": { 19 | "overlay": { 20 | "line": true 21 | }, 22 | "scale": { 23 | "useUnaggregatedDomain": true 24 | } 25 | } 26 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/scatter.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "point", 6 | "encoding": { 7 | "x": { 8 | "field": "Acceleration", 9 | "type": "quantitative", 10 | "scale": {} 11 | }, 12 | "y": { 13 | "field": "Horsepower", 14 | "type": "quantitative", 15 | "scale": {} 16 | } 17 | }, 18 | "config": { 19 | "overlay": { 20 | "line": true 21 | }, 22 | "scale": { 23 | "useUnaggregatedDomain": true 24 | } 25 | } 26 | } -------------------------------------------------------------------------------- /data/compassql_examples/input/3d-OxQxQ.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "?", 7 | "field": "Origin", 8 | "type": "nominal" 9 | },{ 10 | "channel": "?", 11 | "bin": "?", 12 | "aggregate": "?", 13 | "field": "Horsepower", 14 | "type": "quantitative" 15 | },{ 16 | "channel": "?", 17 | "bin": "?", 18 | "aggregate": "?", 19 | "field": "Acceleration", 20 | "type": "quantitative" 21 | } 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /data/compassql_examples/input/3d-NxQxQ.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": {"url": "data/cars.json"}, 3 | "mark": "?", 4 | "encodings": [ 5 | { 6 | "channel": "?", 7 | "field": "Cylinders", 8 | "type": "ordinal" 9 | },{ 10 | "channel": "?", 11 | "bin": "?", 12 | "aggregate": "?", 13 | "field": "Horsepower", 14 | "type": "quantitative" 15 | },{ 16 | "channel": "?", 17 | "bin": "?", 18 | "aggregate": "?", 19 | "field": "Acceleration", 20 | "type": "quantitative" 21 | } 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /data/compassql_examples/output/bin-maxbins.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "bar", 6 | "encoding": { 7 | "x": { 8 | "bin": { 9 | "maxbins": 5 10 | }, 11 | "field": "Miles_per_Gallon", 12 | "type": "quantitative" 13 | }, 14 | "y": { 15 | "aggregate": "count", 16 | "field": "*", 17 | "type": "quantitative" 18 | } 19 | }, 20 | "config": { 21 | "overlay": { 22 | "line": true 23 | }, 24 | "scale": { 25 | "useUnaggregatedDomain": true 26 | } 27 | } 28 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/3d-NxOxQ.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "point", 6 | "encoding": { 7 | "x": { 8 | "field": "Cylinders", 9 | "type": "ordinal" 10 | }, 11 | "y": { 12 | "field": "Origin", 13 | "type": "nominal" 14 | }, 15 | "size": { 16 | "aggregate": "mean", 17 | "field": "Acceleration", 18 | "type": "quantitative" 19 | } 20 | }, 21 | "config": { 22 | "overlay": { 23 | "line": true 24 | }, 25 | "scale": { 26 | "useUnaggregatedDomain": true 27 | } 28 | } 29 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/3d-OxQxQ.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "point", 6 | "encoding": { 7 | "y": { 8 | "field": "Origin", 9 | "type": "nominal" 10 | }, 11 | "size": { 12 | "aggregate": "mean", 13 | "field": "Horsepower", 14 | "type": "quantitative" 15 | }, 16 | "x": { 17 | "bin": {}, 18 | "field": "Acceleration", 19 | "type": "quantitative" 20 | } 21 | }, 22 | "config": { 23 | "overlay": { 24 | "line": true 25 | }, 26 | "scale": { 27 | "useUnaggregatedDomain": true 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /malfoy/generate/define/type_distribution.json: -------------------------------------------------------------------------------- 1 | { 2 | "number": { 3 | "quantitative": 0.95, 4 | "ordinal": 0.04, 5 | "nominal": 0.01, 6 | "temporal": 0 7 | }, 8 | "string": { 9 | "nominal": 0.95, 10 | "ordinal": 0.05, 11 | "quantitative": 0, 12 | "temporal": 0 13 | }, 14 | "datetime": { 15 | "temporal": 1, 16 | "nominal": 0, 17 | "ordinal": 0, 18 | "quantitative": 0 19 | }, 20 | "boolean": { 21 | "nominal": 1, 22 | "ordinal": 0, 23 | "quantitative": 0, 24 | "temporal": 0 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /data/compassql_examples/output/3d-NxQxQ.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "point", 6 | "encoding": { 7 | "y": { 8 | "field": "Cylinders", 9 | "type": "ordinal" 10 | }, 11 | "size": { 12 | "aggregate": "mean", 13 | "field": "Horsepower", 14 | "type": "quantitative" 15 | }, 16 | "x": { 17 | "bin": {}, 18 | "field": "Acceleration", 19 | "type": "quantitative" 20 | } 21 | }, 22 | "config": { 23 | "overlay": { 24 | "line": true 25 | }, 26 | "scale": { 27 | "useUnaggregatedDomain": true 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/2d-NxN.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/movies.json" 4 | }, 5 | "mark": "point", 6 | "encoding": { 7 | "x": { 8 | "field": "Major_Genre", 9 | "type": "nominal", 10 | "scale": { 11 | "rangeStep": 12 12 | } 13 | }, 14 | "y": { 15 | "field": "Creative_Type", 16 | "type": "nominal" 17 | }, 18 | "size": { 19 | "aggregate": "count", 20 | "field": "*", 21 | "type": "quantitative" 22 | } 23 | }, 24 | "config": { 25 | "overlay": { 26 | "line": true 27 | }, 28 | "scale": { 29 | "useUnaggregatedDomain": true 30 | } 31 | } 32 | } -------------------------------------------------------------------------------- /data/compassql_examples/output/showme_add-to-sheet.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "url": "data/cars.json" 4 | }, 5 | "mark": "bar", 6 | "encoding": { 7 | "x": { 8 | "aggregate": "mean", 9 | "field": "Horsepower", 10 | "type": "quantitative" 11 | }, 12 | "y": { 13 | "field": "Cylinders", 14 | "type": "ordinal", 15 | "scale": { 16 | "rangeStep": 12 17 | } 18 | }, 19 | "row": { 20 | "field": "Origin", 21 | "type": "nominal" 22 | } 23 | }, 24 | "config": { 25 | "overlay": { 26 | "line": true 27 | }, 28 | "scale": { 29 | "useUnaggregatedDomain": true 30 | } 31 | } 32 | } -------------------------------------------------------------------------------- /malfoy/generate/spec.py: -------------------------------------------------------------------------------- 1 | import math 2 | import json 3 | from typing import Dict, Optional 4 | from sortedcontainers import SortedDict 5 | 6 | 7 | class Spec(SortedDict): 8 | def __init__(self, *args, **kw): 9 | super(Spec, self).__init__(*args, **kw) 10 | 11 | def __hash__(self): 12 | return json.dumps(self).__hash__() 13 | 14 | def get_enc_by_channel(self, channel) -> Optional[Dict]: 15 | """ 16 | Returns the encoding associated with the given channel, 17 | None if it does not exist. 18 | """ 19 | if channel in self["encoding"]: 20 | return self["encoding"][channel] 21 | else: 22 | return None 23 | -------------------------------------------------------------------------------- /malfoy/generate/define/dummy_schema.json: -------------------------------------------------------------------------------- 1 | 2 | [ 3 | { "name": "Car Type", "type": "string", "cardinality": 5 }, 4 | { "name": "Make", "type": "string", "cardinality": 4 }, 5 | { "name": "AWD", "type": "boolean", "cardinality": 2 }, 6 | { "name": "Cylinders", "type": "number", "cardinality": 8 }, 7 | { "name": "Rating", "type": "number", "cardinality": 4 }, 8 | { "name": "Retail Price", "type": "number", "cardinality": 34 }, 9 | { "name": "Horsepower", "type": "number", "cardinality": 100 }, 10 | { "name": "Engine Size", "type": "number", "cardinality": 40 }, 11 | { "name": "Sell Date", "type": "datetime", "cardinality": 237 }, 12 | { "name": "Resale Date", "type": "datetime", "cardinality": 31 }, 13 | { "name": "count", "type": "number", "cardinality": 1 } 14 | ] 15 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from os.path import abspath, dirname, join 2 | from setuptools import Command, setup 3 | 4 | from malfoy import __version__ 5 | 6 | this_dir = abspath(dirname(__file__)) 7 | with open(join(this_dir, "README.md"), encoding="utf-8") as file: 8 | long_description = file.read() 9 | 10 | setup( 11 | name="malfoy", 12 | version=__version__, 13 | description="Learning weights for Draco", 14 | long_description=long_description, 15 | author="Dominik Moritz, Chenglong Wang", 16 | author_email="domoritz@cs.washington.edu, clwang@cs.washington.edu", 17 | license="BSD-3", 18 | url="https://github.com/uwdata/draco-learn", 19 | packages=["malfoy"], 20 | install_requires=["draco"], 21 | setup_requires=["pytest-runner"], 22 | tests_require=["pytest"], 23 | ) 24 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | cache: 5 | - pip 6 | before_install: 7 | # install conda 8 | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 9 | - bash miniconda.sh -b -p $HOME/miniconda 10 | - export PATH="$HOME/miniconda/bin:$PATH" 11 | - hash -r 12 | - conda config --set always_yes yes --set changeps1 no 13 | - conda update -q conda 14 | # Useful for debugging any issues with conda 15 | - conda info -a 16 | install: 17 | - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION coveralls 18 | - source activate test-environment 19 | # now we are in an environment 20 | - conda install -c potassco clingo 21 | - pip install -r requirements.txt 22 | - pip install -e . 23 | script: 24 | - python setup.py test 25 | -------------------------------------------------------------------------------- /malfoy/generate/prop_objects.py: -------------------------------------------------------------------------------- 1 | from sortedcontainers import SortedDict 2 | 3 | 4 | class PropObjects: 5 | """ 6 | Functions to retrieve objects for fields that require object values 7 | """ 8 | 9 | @staticmethod 10 | def get_bin(max_bins): 11 | """ 12 | Returns a bin object with given max_bins 13 | """ 14 | return SortedDict({"maxbins": max_bins}) 15 | 16 | @staticmethod 17 | def get_scale(scale_enum): 18 | """ 19 | Returns a scale object for the given type. 20 | 21 | type -- `zero` or `log` 22 | """ 23 | if scale_enum == "zero": 24 | return SortedDict({"zero": True}) 25 | elif scale_enum == "log": 26 | return SortedDict({"type": "log"}) 27 | else: 28 | raise ValueError("scale should be zero or log") 29 | -------------------------------------------------------------------------------- /tests/learn/test_learn_helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from draco import vl2asp 5 | from malfoy.learn.helper import count_violations, current_weights 6 | 7 | def test_current_weights(): 8 | assert "encoding_weight" in current_weights() 9 | 10 | 11 | def test_count_violations(): 12 | query_json = { 13 | "mark": "bar", 14 | "data": {"url": "data/cars.csv"}, 15 | "encoding": { 16 | "x": {"field": "origin", "type": "ordinal"}, 17 | "y": {"field": "horsepower", "type": "quantitative", "aggregate": "mean"}, 18 | }, 19 | } 20 | 21 | draco_query = vl2asp(query_json) # add: os.path.dirname(os.path.abspath(__file__)) 22 | 23 | print(draco_query) 24 | 25 | violations = count_violations(draco_query) 26 | 27 | print(violations) 28 | 29 | assert "encoding" in violations.keys() 30 | assert violations.get("encoding") == 2 31 | -------------------------------------------------------------------------------- /tests/learn/test_data_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | import pytest 5 | 6 | from malfoy.learn.data_util import load_data, pos_neg_pickle_path, run_in_parallel 7 | 8 | 9 | def test_load_data(): 10 | if not os.path.isfile(pos_neg_pickle_path): 11 | pytest.skip("Test needs data file") 12 | 13 | train, test = load_data() 14 | 15 | size = len(train) + len(test) 16 | assert len(train) - int(0.7 * size) <= 1 17 | assert len(test) - int(0.3 * size) <= 1 18 | 19 | 20 | def square(x): 21 | return x ** 2 22 | 23 | 24 | def batch_square(d): 25 | _, _, xs = d 26 | 27 | s = pd.Series() 28 | for i, x in xs: 29 | s = s.append(pd.Series([x ** 2], index=[i])) 30 | return s 31 | 32 | 33 | def test_run_in_parallel(): 34 | a = range(100) 35 | expected = list(map(square, a)) 36 | actual = run_in_parallel(batch_square, list(enumerate(a)), ("a", "b")) 37 | 38 | assert list(actual.values) == expected 39 | -------------------------------------------------------------------------------- /malfoy/learn/populate_data.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | 4 | 5 | def main(args): 6 | with open(args.file) as f: 7 | data = json.load(f) 8 | populate(data, args.data_url) 9 | 10 | with open(args.file, "w") as outfile: 11 | json.dump(data, outfile, indent=4) 12 | 13 | 14 | def populate(data, data_url): 15 | if is_spec(data): 16 | data["data"] = {"url": data_url} 17 | else: 18 | if type(data) is dict: 19 | for child in data: 20 | populate(data[child], data_url) 21 | elif type(data) is list: 22 | for child in data: 23 | populate(child, data_url) 24 | 25 | return 26 | 27 | 28 | def is_spec(data): 29 | return (type(data) is dict) and ("mark" in data and "encoding" in data) 30 | 31 | 32 | if __name__ == "__main__": 33 | parser = argparse.ArgumentParser() 34 | parser.add_argument("file", help="the json containing specs") 35 | parser.add_argument("data_url", help="the url to insert as data") 36 | 37 | args = parser.parse_args() 38 | main(args) 39 | -------------------------------------------------------------------------------- /data/compassql_examples/run_compassql.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | const cql = require('compassql'); 4 | const dl = require('datalib'); 5 | const fs = require('fs'); 6 | 7 | var path = require('path'); 8 | 9 | // the folder containing input partial specs 10 | const inputDir = 'data/compassql_examples/input/'; 11 | // the folder for output full specs 12 | const outputDir = 'data/compassql_examples/output/'; 13 | 14 | files = fs.readdirSync(inputDir); 15 | 16 | for (var i = 0; i < files.length; i ++) { 17 | 18 | console.log('[OK] Processing ' + files[i]); 19 | 20 | input = path.join(inputDir, files[i]); 21 | output = path.join(outputDir, files[i]); 22 | 23 | // read spec 24 | var raw_spec = fs.readFileSync(input, 'utf8'); 25 | var spec = JSON.parse(raw_spec); 26 | 27 | // compile data schema for compassql 28 | var data = dl.json(spec.data.url); 29 | var schema = cql.schema.build(data); 30 | 31 | const query = { 32 | spec, 33 | chooseBy: 'effectiveness', 34 | config: { autoAddCount: true } 35 | }; 36 | 37 | const recommendation = cql.recommend(query, schema); 38 | 39 | const vlSpec = recommendation.result.items[0].toSpec(); 40 | 41 | fs.writeFileSync(output, JSON.stringify(vlSpec, null, 2), 'utf8'); 42 | } 43 | 44 | -------------------------------------------------------------------------------- /data/spec_pairs/data.json: -------------------------------------------------------------------------------- 1 | { 2 | "headers": { 3 | "first": { 4 | "title": "Some spec", 5 | "subtitle": "Some description" 6 | }, 7 | "second": { 8 | "title": "Some other spec", 9 | "subtitle": "Some description" 10 | } 11 | }, 12 | "specs": [{ 13 | "first": { 14 | "mark": "point", 15 | "encoding": { 16 | "y": { 17 | "scale": { 18 | "zero": true 19 | }, 20 | "field": "Miles_per_Gallon", 21 | "type": "quantitative", 22 | "aggregate": "sum" 23 | } 24 | }, 25 | "data": { 26 | "url": "data/cars.json" 27 | }, 28 | "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json" 29 | }, 30 | "second": { 31 | "mark": "tick", 32 | "encoding": { 33 | "y": { 34 | "scale": { 35 | "zero": false 36 | }, 37 | "field": "Miles_per_Gallon", 38 | "type": "quantitative" 39 | } 40 | }, 41 | "data": { 42 | "url": "data/cars.json" 43 | }, 44 | "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json" 45 | }, 46 | "properties": {} 47 | }] 48 | } 49 | -------------------------------------------------------------------------------- /malfoy/learn/mln.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from pprint import pprint 5 | 6 | from draco.spec import * 7 | from helper import * 8 | from draco.run import run 9 | 10 | import data_util 11 | 12 | import logging 13 | 14 | import copy 15 | 16 | 17 | def discriminative_learning( 18 | train_data, initial_weights, learning_rate=0.01, max_iter=100 19 | ): 20 | """ discriminative learning for mln from partial and full specs """ 21 | 22 | weights = {} 23 | for k in initial_weights: 24 | weights[k] = initial_weights[k] * 50 25 | 26 | logging.disable(logging.CRITICAL) 27 | 28 | t = 0 29 | while t < max_iter: 30 | print("[Iteration] {}".format(t)) 31 | for case in train_data: 32 | partial_spec, full_spec = train_data[case][0], train_data[case][1] 33 | draco_rec = run(partial_spec, constants=weights, silence_warnings=True) 34 | 35 | map_state = count_violations(draco_rec) 36 | truth_state = count_violations(full_spec) 37 | 38 | # get the names of violated rules in two specs 39 | violated_rules = set(list(map_state.keys()) + list(truth_state.keys())) 40 | 41 | for r in violated_rules: 42 | # get the num violations of the rule r 43 | n1 = map_state.get(r, 0) 44 | n2 = truth_state.get(r, 0) 45 | 46 | # since our weights are costs and we want to minimize the loss 47 | weights[r + "_weight"] += n1 - n2 48 | 49 | # the solution generated by visrec solution 50 | # print(draco_rec.to_vegalite_json()) 51 | break 52 | t += 1 53 | 54 | 55 | if __name__ == "__main__": 56 | partial_full_data = data_util.load_partial_full_data() 57 | 58 | pprint(len(partial_full_data)) 59 | # weights = discriminative_learning(train_data, current_weights()) 60 | -------------------------------------------------------------------------------- /malfoy/learn/helper.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions for learning algorithm. 3 | """ 4 | 5 | import json 6 | import os 7 | from typing import Dict, List, Optional 8 | 9 | import numpy as np 10 | 11 | from draco.run import run 12 | 13 | 14 | def current_weights() -> Dict: 15 | """ Get the current weights as a dictionary. """ 16 | with open(os.path.join(os.path.dirname(__file__), "../../data/weights.json")) as f: 17 | return json.load(f) 18 | 19 | 20 | def compute_cost(violations: Dict) -> int: 21 | weights = current_weights() 22 | c = 0 23 | for k, v in violations.items(): 24 | c += v * weights[f"{k}_weight"] 25 | return c 26 | 27 | 28 | def compute_violation_costs(violations: Dict) -> Dict: 29 | """Get a dictionary of violation -> (count, weight)""" 30 | result = {} 31 | 32 | weights = current_weights() 33 | for k, v in violations.items(): 34 | result[k] = (v, weights[f"{k}_weight"]) 35 | 36 | return result 37 | 38 | 39 | def count_violations(draco_query: List[str], debug=False) -> Optional[Dict[str, int]]: 40 | """ Get a dictionary of violations for a full spec. 41 | Args: 42 | task: a task spec object 43 | Returns: 44 | a dictionary storing violations of soft rules 45 | """ 46 | result = run( 47 | draco_query, 48 | files=["define.lp", "soft.lp", "output.lp"], 49 | silence_warnings=True, 50 | debug=debug, 51 | ) 52 | if result is not None: 53 | return result.violations 54 | else: 55 | return None 56 | 57 | 58 | def contingency_table(labels_1: np.array, labels_2: np.array) -> np.array: 59 | """ 60 | Compute a contingency table for two arrays of booleans. 61 | """ 62 | return [ 63 | [np.sum(labels_1), len(labels_1) - np.sum(labels_1)], 64 | [np.sum(labels_2), len(labels_2) - np.sum(labels_2)], 65 | ] 66 | -------------------------------------------------------------------------------- /data/to_label/scale.json: -------------------------------------------------------------------------------- 1 | { 2 | "1": [], 3 | "2": [], 4 | "3": [ 5 | [ 6 | { 7 | "data": { 8 | "url": "data/cars_mod.json" 9 | }, 10 | "encoding": { 11 | "color": { 12 | "field": "Rating", 13 | "type": "nominal" 14 | }, 15 | "x": { 16 | "aggregate": "sum", 17 | "field": "Retail Price", 18 | "scale": { 19 | "zero": true 20 | }, 21 | "type": "quantitative" 22 | }, 23 | "y": { 24 | "bin": { 25 | "maxbins": 20 26 | }, 27 | "field": "Horsepower", 28 | "scale": { 29 | "zero": true 30 | }, 31 | "type": "quantitative" 32 | } 33 | }, 34 | "mark": "bar" 35 | }, 36 | { 37 | "data": { 38 | "url": "data/cars_mod.json" 39 | }, 40 | "encoding": { 41 | "color": { 42 | "field": "Rating", 43 | "type": "nominal" 44 | }, 45 | "x": { 46 | "aggregate": "sum", 47 | "field": "Retail Price", 48 | "scale": { 49 | "zero": true 50 | }, 51 | "type": "quantitative" 52 | }, 53 | "y": { 54 | "bin": { 55 | "maxbins": 20 56 | }, 57 | "field": "Horsepower", 58 | "type": "quantitative" 59 | } 60 | }, 61 | "mark": "bar" 62 | } 63 | ] 64 | ], 65 | "4": [] 66 | } -------------------------------------------------------------------------------- /data/training/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-06/schema", 3 | "type": "object", 4 | "additionalProperties": false, 5 | "required": ["data", "source"], 6 | "properties": { 7 | "source": { 8 | "type": "string" 9 | }, 10 | "$schema": { 11 | "type": "string" 12 | }, 13 | "data": { 14 | "type": "array", 15 | "items": { 16 | "type": "object", 17 | "properties": { 18 | "fields": { 19 | "type": "array", 20 | "items": { 21 | "type": "object", 22 | "properties": { 23 | "name": { 24 | "type": "string" 25 | }, 26 | "type": { 27 | "type": "string" 28 | }, 29 | "entropy": { 30 | "type": "number" 31 | }, 32 | "cardinality": { 33 | "type": "number" 34 | }, 35 | "interesting": { 36 | "type": "boolean" 37 | } 38 | }, 39 | "additionalProperties": false, 40 | "required": [ 41 | "name", 42 | "type" 43 | ] 44 | } 45 | }, 46 | "num_rows": { 47 | "type": "number" 48 | }, 49 | "pvalue": { 50 | "type": "number" 51 | }, 52 | "task": { 53 | "type": "string", 54 | "enum": [ 55 | "value", 56 | "summary" 57 | ] 58 | }, 59 | "negative": { 60 | "$ref": "https://vega.github.io/schema/vega-lite/v2.json" 61 | }, 62 | "positive": { 63 | "$ref": "https://vega.github.io/schema/vega-lite/v2.json" 64 | } 65 | }, 66 | "additionalProperties": false, 67 | "required": [ 68 | "fields", 69 | "negative", 70 | "positive" 71 | ] 72 | } 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /malfoy/generate/define/distributions.json: -------------------------------------------------------------------------------- 1 | { 2 | "mark": { 3 | "probability": 1, 4 | "values": [ 5 | { "name": "area", "probability": 0.15 }, 6 | { "name": "bar", "probability": 0.25 }, 7 | { "name": "line", "probability": 0.2 }, 8 | { "name": "point", "probability": 0.2 }, 9 | { "name": "rect", "probability": 0.05 }, 10 | { "name": "text", "probability": 0.05 }, 11 | { "name": "tick", "probability": 0.1 } 12 | ] 13 | }, 14 | "channel": { 15 | "probability": 1, 16 | "values": [ 17 | { "name": "x", "probability": 0.4 }, 18 | { "name": "y", "probability": 0.3 }, 19 | { "name": "color", "probability": 0.1 }, 20 | { "name": "size", "probability": 0.1 }, 21 | { "name": "shape", "probability": 0.04 }, 22 | { "name": "text", "probability": 0.01 }, 23 | { "name": "row", "probability": 0.02 }, 24 | { "name": "column", "probability": 0.02 }, 25 | { "name": "detail", "probability": 0.1 } 26 | ] 27 | }, 28 | "aggregate": { 29 | "probability": 0.1, 30 | "values": [ 31 | { "name": "sum", "probability": 0.3 }, 32 | { "name": "mean", "probability": 0.3 }, 33 | { "name": "stdev", "probability": 0.1 }, 34 | { "name": "median", "probability": 0.1 }, 35 | { "name": "min", "probability": 0.1 }, 36 | { "name": "max", "probability": 0.1 } 37 | ] 38 | }, 39 | "bin": { 40 | "probability": 0.1, 41 | "values": [ 42 | { "name": 3, "probability": 0.10 }, 43 | { "name": 5, "probability": 0.25 }, 44 | { "name": 10, "probability": 0.4 }, 45 | { "name": 20, "probability": 0.1 }, 46 | { "name": 100, "probability": 0.1 }, 47 | { "name": 200, "probability": 0.05 } 48 | ] 49 | }, 50 | "scale": { 51 | "probability": 0.1, 52 | "values": [ 53 | { "name": "zero", "probability": 0.5 }, 54 | { "name": "log", "probability": 0.5 } 55 | ] 56 | }, 57 | "timeUnit": { 58 | "probability": 0, 59 | "values": [ 60 | { "name": 3, "probability": 0.1 }, 61 | { "name": 5, "probability": 0.25 }, 62 | { "name": 10, "probability": 0.25 }, 63 | { "name": 20, "probability": 0.25 }, 64 | { "name": 100, "probability": 0.1 }, 65 | { "name": 200, "probability": 0.05 } 66 | ] 67 | }, 68 | "stack": { 69 | "probability": 0.1, 70 | "values": [ 71 | { "name": null, "probability": 0.1 }, 72 | { "name": "zero", "probability": 0.7 }, 73 | { "name": "normalize", "probability": 0.2 } 74 | ] 75 | }, 76 | "task": { 77 | "probability": 0, 78 | "values": [ 79 | { "name": "summary", "probability": 0.5 }, 80 | { "name": "value", "probability": 0.5 } 81 | ] 82 | }, 83 | "interesting": { 84 | "probability": 0, 85 | "values": [ 86 | { "name": true, "probability": 1 } 87 | ] 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /malfoy/generate/generator.py: -------------------------------------------------------------------------------- 1 | import random 2 | from copy import deepcopy 3 | from typing import Any, Dict, List, Set 4 | 5 | from draco.generation.helper import is_valid 6 | from draco.generation.model import Model 7 | from draco.generation.spec import Spec 8 | from draco.spec import Data, Field, Query, Task 9 | 10 | 11 | class Generator: 12 | """ 13 | A Generator can be used to generate specs that represent 14 | mutations over a list of properties. 15 | """ 16 | 17 | def __init__( 18 | self, 19 | distributions: Dict, 20 | type_distribution: Dict, 21 | definitions: Dict, 22 | data_schema: Dict, 23 | data_url: str, 24 | ) -> None: 25 | top_level_props = definitions["topLevelProps"] 26 | encoding_props = definitions["encodingProps"] 27 | data_fields = [ 28 | Field(x["name"], x["type"], cardinality=x["cardinality"]) 29 | for x in data_schema 30 | ] 31 | 32 | self.model = Model( 33 | data_fields, 34 | distributions, 35 | type_distribution, 36 | top_level_props, 37 | encoding_props, 38 | ) 39 | self.data = Data(data_fields) 40 | self.data_url = data_url 41 | 42 | def generate_interaction( 43 | self, props: List[str], dimensions: int, seen_base_specs: Set[Spec], cross: bool 44 | ) -> List[Spec]: 45 | """ 46 | Generates a list of specs by enumerating over the given properties' enums. 47 | """ 48 | base_spec = self.model.generate_spec(dimensions) 49 | self.model.pre_improve(base_spec, props) 50 | 51 | while base_spec in seen_base_specs: 52 | base_spec = self.model.generate_spec(dimensions) 53 | self.model.pre_improve(base_spec, props) 54 | 55 | seen_base_specs.add(base_spec) 56 | 57 | specs: List[Spec] = [] 58 | 59 | if cross: 60 | self.__mutate_spec(base_spec, props, 0, set(), specs) 61 | else: 62 | raise NotImplementedError("noncross not implement") 63 | return specs 64 | 65 | def __mutate_spec( 66 | self, 67 | base_spec: Spec, 68 | props: List[str], 69 | prop_index: int, 70 | seen: Set[Spec], 71 | specs: List[Spec], 72 | ): 73 | # base case 74 | if prop_index == len(props): 75 | self.model.post_improve(base_spec, props) 76 | base_spec["data"] = {"url": self.data_url} 77 | 78 | # within a group, don't repeat the same specs 79 | if not (base_spec in seen): 80 | seen.add(base_spec) 81 | 82 | query = Query.from_vegalite(base_spec) 83 | 84 | if is_valid(Task(self.data, query)): 85 | specs.append(base_spec) 86 | # recursive case 87 | else: 88 | prop_to_mutate = props[prop_index] 89 | for enum in self.model.get_enums(prop_to_mutate): 90 | spec = deepcopy(base_spec) 91 | self.model.mutate_prop(spec, prop_to_mutate, enum) 92 | 93 | # recursive call 94 | self.__mutate_spec(spec, props, prop_index + 1, seen, specs) 95 | 96 | return 97 | -------------------------------------------------------------------------------- /data/training/manual.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "./schema.json", 3 | "source": "manual", 4 | "data": [ 5 | { 6 | "fields": [ 7 | { 8 | "name": "q1", 9 | "type": "number", 10 | "cardinality": 100, 11 | "entropy": 1 12 | }, 13 | { 14 | "name": "q2", 15 | "type": "number", 16 | "cardinality": 100, 17 | "entropy": 1 18 | }, 19 | { 20 | "name": "n", 21 | "type": "string", 22 | "cardinality": 5 23 | } 24 | ], 25 | "num_rows": 100, 26 | "negative": { 27 | "mark": "point", 28 | "encoding": { 29 | "x": { 30 | "field": "q1", 31 | "type": "quantitative" 32 | }, 33 | "y": { 34 | "field": "q1", 35 | "type": "quantitative" 36 | } 37 | } 38 | }, 39 | "positive": { 40 | "mark": "point", 41 | "encoding": { 42 | "x": { 43 | "field": "q1", 44 | "type": "quantitative" 45 | }, 46 | "y": { 47 | "field": "q2", 48 | "type": "quantitative" 49 | } 50 | } 51 | } 52 | }, 53 | { 54 | "fields": [ 55 | { 56 | "name": "q1", 57 | "type": "number", 58 | "cardinality": 100, 59 | "entropy": 1 60 | }, 61 | { 62 | "name": "q2", 63 | "type": "number", 64 | "cardinality": 100, 65 | "entropy": 1 66 | }, 67 | { 68 | "name": "n", 69 | "type": "string", 70 | "cardinality": 5 71 | } 72 | ], 73 | "num_rows": 100, 74 | "negative": { 75 | "mark": "point", 76 | "encoding": { 77 | "x": { 78 | "field": "q1", 79 | "type": "quantitative" 80 | }, 81 | "color": { 82 | "field": "q2", 83 | "type": "quantitative" 84 | } 85 | } 86 | }, 87 | "positive": { 88 | "mark": "point", 89 | "encoding": { 90 | "x": { 91 | "field": "q1", 92 | "type": "quantitative" 93 | }, 94 | "y": { 95 | "field": "q2", 96 | "type": "quantitative" 97 | } 98 | } 99 | } 100 | } 101 | ] 102 | } 103 | -------------------------------------------------------------------------------- /data/driving.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"side": "left", "year": 1956, "miles": 3675, "gas": 2.38}, 3 | {"side": "right", "year": 1957, "miles": 3706, "gas": 2.40}, 4 | {"side": "bottom", "year": 1958, "miles": 3766, "gas": 2.26}, 5 | {"side": "top", "year": 1959, "miles": 3905, "gas": 2.31}, 6 | {"side": "right", "year": 1960, "miles": 3935, "gas": 2.27}, 7 | {"side": "bottom", "year": 1961, "miles": 3977, "gas": 2.25}, 8 | {"side": "right", "year": 1962, "miles": 4085, "gas": 2.22}, 9 | {"side": "bottom", "year": 1963, "miles": 4218, "gas": 2.12}, 10 | {"side": "bottom", "year": 1964, "miles": 4369, "gas": 2.11}, 11 | {"side": "bottom", "year": 1965, "miles": 4538, "gas": 2.14}, 12 | {"side": "top", "year": 1966, "miles": 4676, "gas": 2.14}, 13 | {"side": "bottom", "year": 1967, "miles": 4827, "gas": 2.14}, 14 | {"side": "right", "year": 1968, "miles": 5038, "gas": 2.13}, 15 | {"side": "right", "year": 1969, "miles": 5207, "gas": 2.07}, 16 | {"side": "right", "year": 1970, "miles": 5376, "gas": 2.01}, 17 | {"side": "bottom", "year": 1971, "miles": 5617, "gas": 1.93}, 18 | {"side": "bottom", "year": 1972, "miles": 5973, "gas": 1.87}, 19 | {"side": "right", "year": 1973, "miles": 6154, "gas": 1.90}, 20 | {"side": "left", "year": 1974, "miles": 5943, "gas": 2.34}, 21 | {"side": "bottom", "year": 1975, "miles": 6111, "gas": 2.31}, 22 | {"side": "bottom", "year": 1976, "miles": 6389, "gas": 2.32}, 23 | {"side": "top", "year": 1977, "miles": 6630, "gas": 2.36}, 24 | {"side": "bottom", "year": 1978, "miles": 6883, "gas": 2.23}, 25 | {"side": "left", "year": 1979, "miles": 6744, "gas": 2.68}, 26 | {"side": "left", "year": 1980, "miles": 6672, "gas": 3.30}, 27 | {"side": "right", "year": 1981, "miles": 6732, "gas": 3.30}, 28 | {"side": "right", "year": 1982, "miles": 6835, "gas": 2.92}, 29 | {"side": "right", "year": 1983, "miles": 6943, "gas": 2.66}, 30 | {"side": "right", "year": 1984, "miles": 7130, "gas": 2.48}, 31 | {"side": "right", "year": 1985, "miles": 7323, "gas": 2.36}, 32 | {"side": "left", "year": 1986, "miles": 7558, "gas": 1.76}, 33 | {"side": "top", "year": 1987, "miles": 7770, "gas": 1.76}, 34 | {"side": "bottom", "year": 1988, "miles": 8089, "gas": 1.68}, 35 | {"side": "left", "year": 1989, "miles": 8397, "gas": 1.75}, 36 | {"side": "top", "year": 1990, "miles": 8529, "gas": 1.88}, 37 | {"side": "right", "year": 1991, "miles": 8535, "gas": 1.78}, 38 | {"side": "right", "year": 1992, "miles": 8662, "gas": 1.69}, 39 | {"side": "left", "year": 1993, "miles": 8855, "gas": 1.60}, 40 | {"side": "bottom", "year": 1994, "miles": 8909, "gas": 1.59}, 41 | {"side": "bottom", "year": 1995, "miles": 9150, "gas": 1.60}, 42 | {"side": "top", "year": 1996, "miles": 9192, "gas": 1.67}, 43 | {"side": "right", "year": 1997, "miles": 9416, "gas": 1.65}, 44 | {"side": "bottom", "year": 1998, "miles": 9590, "gas": 1.39}, 45 | {"side": "right", "year": 1999, "miles": 9687, "gas": 1.50}, 46 | {"side": "top", "year": 2000, "miles": 9717, "gas": 1.89}, 47 | {"side": "left", "year": 2001, "miles": 9699, "gas": 1.77}, 48 | {"side": "bottom", "year": 2002, "miles": 9814, "gas": 1.64}, 49 | {"side": "right", "year": 2003, "miles": 9868, "gas": 1.86}, 50 | {"side": "left", "year": 2004, "miles": 9994, "gas": 2.14}, 51 | {"side": "left", "year": 2005, "miles": 10067, "gas": 2.53}, 52 | {"side": "right", "year": 2006, "miles": 10037, "gas": 2.79}, 53 | {"side": "right", "year": 2007, "miles": 10025, "gas": 2.95}, 54 | {"side": "left", "year": 2008, "miles": 9880, "gas": 3.31}, 55 | {"side": "bottom", "year": 2009, "miles": 9657, "gas": 2.38}, 56 | {"side": "left", "year": 2010, "miles": 9596, "gas": 2.61} 57 | ] -------------------------------------------------------------------------------- /malfoy/learn/generator.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sys 4 | import random 5 | 6 | import data_util 7 | from draco.spec import * 8 | 9 | from draco import spec 10 | 11 | 12 | def sample_partial_specs(specs, N=None): 13 | """ Given a list of full specs, sample partial specs from them 14 | Args: 15 | specs: full specs formed from (data, query) pairs, query is a vegalite json file 16 | Returns: 17 | a list of (data, partial_spec, spec) pairs, where the partial_spec is created from spec 18 | """ 19 | results = [] 20 | 21 | i = 0 22 | for key in specs: 23 | 24 | if N is not None and i >= N: 25 | break 26 | 27 | i += 1 28 | 29 | entry = specs[key] 30 | 31 | data, task, query = entry.data, entry.task, Query.from_vegalite(entry.positive) 32 | 33 | partial_query = insert_holes(query) 34 | 35 | if (not data.content) and (data.url is None): 36 | data.fill_with_random_content() 37 | 38 | # re-run the insert function until we find a partial spec different from the input. 39 | while partial_query.to_asp() == query.to_asp(): 40 | partial_query = insert_holes(query) 41 | 42 | results.append((Task(data, partial_query, task), Task(data, query, task))) 43 | 44 | return results 45 | 46 | 47 | def subst_w_prob(v1, v2, prob): 48 | return np.random.choice([v1, v2], p=[prob, 1.0 - prob]) 49 | 50 | 51 | def insert_holes(query, prob=0.8, subst_val=spec.HOLE): 52 | """ given a query, randomly substitute values to generate a partial spec 53 | Args: 54 | query: a vegalite object 55 | prob: the probability to substitute an attribute in query with None 56 | Returns: 57 | a partial spec generated from the full spec 58 | """ 59 | mark = subst_w_prob(query.mark, subst_val, prob) 60 | encodings = [] 61 | 62 | for enc in query.encodings: 63 | channel = subst_w_prob(enc.channel, subst_val, prob) 64 | field = enc.field 65 | ty = subst_w_prob(enc.ty, subst_val, prob) 66 | aggregate = subst_w_prob(enc.aggregate, subst_val, prob) 67 | binning = subst_w_prob(enc.binning, subst_val, prob) 68 | log_scale = subst_w_prob(enc.log_scale, subst_val, prob) 69 | zero = subst_w_prob(enc.zero, subst_val, prob) 70 | stack = subst_w_prob(enc.stack, subst_val, prob) 71 | encodings.append( 72 | Encoding( 73 | channel, field, ty, aggregate, binning, log_scale, zero, stack, enc.id 74 | ) 75 | ) 76 | 77 | return Query(mark, encodings) 78 | 79 | 80 | if __name__ == "__main__": 81 | 82 | np.random.seed(1) 83 | 84 | # relative to this folder 85 | synthetic_data_dir = os.path.join( 86 | os.path.dirname(__file__), "..", "..", "data", "synthetic" 87 | ) 88 | cql_out_dir = os.path.join(synthetic_data_dir, "input") 89 | vl_out_dir = os.path.join(synthetic_data_dir, "output") 90 | 91 | specs = data_util.load_neg_pos_specs() 92 | 93 | results = sample_partial_specs(specs) 94 | 95 | indexes = list(range(len(results))) 96 | np.random.shuffle(indexes) 97 | 98 | N = 10 99 | 100 | for i in range(N): 101 | entry = results[indexes[i]] 102 | 103 | with open(os.path.join(cql_out_dir, f"spec_{i}.json"), "w") as f: 104 | json.dump(entry[0].to_compassql(), f, indent=4) 105 | 106 | # the out dir should contain vegalite specs 107 | with open(os.path.join(vl_out_dir, f"spec_{i}.json"), "w") as f: 108 | json.dump(entry[1].to_vegalite(), f, indent=4) 109 | -------------------------------------------------------------------------------- /malfoy/generate/run.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import os 5 | from copy import deepcopy 6 | 7 | from draco.generation.generator import Generator 8 | 9 | logging.basicConfig() 10 | logging.getLogger().setLevel(logging.WARN) 11 | 12 | 13 | def absolute_path(path): 14 | return os.path.join(os.path.dirname(__file__), path) 15 | 16 | 17 | INTERACTIONS_PATH = absolute_path("define/interactions.json") 18 | DISTRIBUTIONS_PATH = absolute_path("define/distributions.json") 19 | DEFINITIONS_PATH = absolute_path("define/definitions.json") 20 | DUMMY_SCHEMA_PATH = absolute_path("define/dummy_schema.json") 21 | TYPE_DISTRIBUTIONS = absolute_path("define/type_distribution.json") 22 | DATA_URL = "data/cars_mod.json" 23 | 24 | NUM_TRIES = 100 25 | MAX_DIMENSIONS = 4 26 | 27 | 28 | def main(args): 29 | logger = logging.getLogger(__name__) 30 | 31 | interactions = load_json(INTERACTIONS_PATH) 32 | distributions = load_json(DISTRIBUTIONS_PATH) 33 | definitions = load_json(DEFINITIONS_PATH) 34 | dummy_schema = load_json(DUMMY_SCHEMA_PATH) 35 | type_distribution = load_json(TYPE_DISTRIBUTIONS) 36 | 37 | out_dir = args.output_dir 38 | 39 | generator = Generator( 40 | distributions, type_distribution, definitions, dummy_schema, DATA_URL 41 | ) 42 | 43 | chosen = str(args.interaction) 44 | num_groups = int(args.groups) 45 | 46 | for interaction in interactions: 47 | specified = interaction["name"] == chosen or chosen == "all" 48 | cross = "nonCross" not in interaction # TODO: change to cross 49 | 50 | base_num_groups = interaction["groups"] if num_groups == -1 else num_groups 51 | if interaction["include"] and specified: 52 | out = {} 53 | for d in range(1, MAX_DIMENSIONS + 1): 54 | # to not generate too many 1D visualizations 55 | n = base_num_groups // 4 if d == 1 else base_num_groups 56 | 57 | seen_base_specs = set() 58 | groups = [] 59 | for _ in range(n): 60 | specs = generator.generate_interaction( 61 | interaction["props"], d, seen_base_specs, cross 62 | ) 63 | 64 | tries = 0 65 | while len(specs) < 2 and tries < NUM_TRIES: 66 | specs = generator.generate_interaction( 67 | interaction["props"], d, seen_base_specs, cross 68 | ) 69 | tries += 1 70 | 71 | if tries == NUM_TRIES: 72 | logger.warning( 73 | "exceeded maximum tries for {0} with d={1}".format( 74 | interaction["name"], d 75 | ) 76 | ) 77 | continue 78 | 79 | groups.append(specs) 80 | 81 | out[d] = groups 82 | 83 | output_name = "{0}/{1}.json".format(out_dir, interaction["name"]) 84 | with open(output_name, "w") as outfile: 85 | json.dump(out, outfile, indent=4) 86 | 87 | 88 | def load_json(file_path): 89 | with open(file_path) as data: 90 | return json.load(data) 91 | 92 | 93 | if __name__ == "__main__": 94 | parser = argparse.ArgumentParser() 95 | parser.add_argument("--interaction", "-i", default="all") 96 | parser.add_argument("--groups", "-g", default=-1) 97 | parser.add_argument( 98 | "--output_dir", "-o", default=absolute_path("../../data/to_label") 99 | ) 100 | 101 | args = parser.parse_args() 102 | main(args) 103 | -------------------------------------------------------------------------------- /malfoy/generate/define/interactions.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "mark", 4 | "props": [ 5 | "mark" 6 | ], 7 | "groups": 20, 8 | "include": true 9 | }, 10 | { 11 | "name": "channel", 12 | "props": [ 13 | "channel" 14 | ], 15 | "groups": 20, 16 | "include": true 17 | }, 18 | { 19 | "name": "aggregate", 20 | "props": [ 21 | "aggregate" 22 | ], 23 | "groups": 20, 24 | "include": true 25 | }, 26 | { 27 | "name": "bin", 28 | "props": [ 29 | "bin" 30 | ], 31 | "groups": 20, 32 | "include": true 33 | }, 34 | { 35 | "name": "scale", 36 | "props": [ 37 | "scale" 38 | ], 39 | "groups": 20, 40 | "include": true 41 | }, 42 | { 43 | "name": "stack", 44 | "props": [ 45 | "stack" 46 | ], 47 | "groups": 30, 48 | "include": true 49 | }, 50 | { 51 | "name": "sort", 52 | "props": [ 53 | "sort" 54 | ], 55 | "groups": 20, 56 | "include": true 57 | }, 58 | { 59 | "name": "timeUnit", 60 | "props": [ 61 | "timeUnit" 62 | ], 63 | "groups": 20, 64 | "include": false 65 | }, 66 | { 67 | "name": "mark-type", 68 | "props": [ 69 | "mark", 70 | "type" 71 | ], 72 | "groups": 20, 73 | "include": true 74 | }, 75 | { 76 | "name": "mark-channel", 77 | "props": [ 78 | "mark", 79 | "channel" 80 | ], 81 | "groups": 20, 82 | "include": true 83 | }, 84 | { 85 | "name": "type-channel", 86 | "props": [ 87 | "type", 88 | "channel" 89 | ], 90 | "groups": 20, 91 | "include": true 92 | }, 93 | { 94 | "name": "mark-aggregate", 95 | "props": [ 96 | "mark", 97 | "aggregate" 98 | ], 99 | "groups": 10, 100 | "include": true 101 | }, 102 | { 103 | "name": "channel-aggregate", 104 | "props": [ 105 | "channel", 106 | "aggregate" 107 | ], 108 | "groups": 5, 109 | "include": true 110 | }, 111 | { 112 | "name": "channel-channel", 113 | "props": [ 114 | "channel", 115 | "channel" 116 | ], 117 | "groups": 5, 118 | "include": true 119 | }, 120 | { 121 | "name": "mark-scale", 122 | "props": [ 123 | "mark", 124 | "scale" 125 | ], 126 | "groups": 20, 127 | "include": true 128 | }, 129 | { 130 | "name": "channel-scale", 131 | "props": [ 132 | "channel", 133 | "scale" 134 | ], 135 | "groups": 10, 136 | "include": true 137 | }, 138 | { 139 | "name": "datatype-type", 140 | "props": [ 141 | "field", 142 | "type" 143 | ], 144 | "groups": 10, 145 | "include": false 146 | }, 147 | { 148 | "name": "interesting-channel", 149 | "props": [ 150 | "interesting", 151 | "channel" 152 | ], 153 | "groups": 20, 154 | "include": true 155 | }, 156 | { 157 | "name": "mark-task", 158 | "props": [ 159 | "mark", 160 | "task" 161 | ], 162 | "groups": 20, 163 | "include": false 164 | }, 165 | { 166 | "name": "bin-channel", 167 | "props": [ 168 | "bin", 169 | "channel" 170 | ], 171 | "groups": 10, 172 | "include": true 173 | }, 174 | { 175 | "name": "type-type-mark", 176 | "props": [ 177 | "type", 178 | "type", 179 | "mark" 180 | ], 181 | "groups": 20, 182 | "include": false, 183 | "nonCross": true 184 | } 185 | ] 186 | -------------------------------------------------------------------------------- /malfoy/learn/playground.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | 5 | import numpy as np 6 | 7 | import data_util 8 | import linear 9 | from helper import current_weights 10 | from draco.run import run 11 | from draco.spec import Task 12 | 13 | 14 | logging.basicConfig(level=logging.INFO) 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def absolute_path(p: str) -> str: 19 | return os.path.join(os.path.dirname(__file__), p) 20 | 21 | 22 | def play(partial_full_data, train_weights=True, output_file=None): 23 | 24 | init_weights = current_weights() 25 | 26 | if train_weights: 27 | train_dev, _ = data_util.load_data() 28 | 29 | X = train_dev.positive - train_dev.negative 30 | clf = linear.train_model(X) 31 | 32 | # columns where all X[i] are zero 33 | unused_features = np.nonzero(np.sum(np.abs(X), axis=0) == 0)[0] 34 | # if a feature is not used, its weight is 0 35 | learnt_weights = [ 36 | int(x * 1000) if (i not in unused_features) else None 37 | for i, x in enumerate(clf.coef_[0]) 38 | ] 39 | 40 | weights = {} 41 | for i, k in enumerate(init_weights): 42 | if learnt_weights[i] is not None: 43 | weights[k] = learnt_weights[i] 44 | else: 45 | weights[k] = 10000 + init_weights[k] 46 | else: 47 | weights = init_weights 48 | 49 | pairs = generate_visual_pairs(partial_full_data, weights) 50 | 51 | if output_file is not None: 52 | with open(output_file, "w+") as f: 53 | print(f"Writing pairs to {output_file}") 54 | json.dump(pairs, f) 55 | else: 56 | print(json.dumps(pairs)) 57 | 58 | 59 | def generate_visual_pairs(partial_full_data, weights): 60 | # Generate pairs that can be visualized by bug finders 61 | result = {} 62 | result["headers"] = { 63 | "first": {"title": "Draco", "subtitle": "Draco Prediction"}, 64 | "second": {"title": "CQL", "subtitle": "Compassql Prediction"}, 65 | } 66 | 67 | result["specs"] = [] 68 | for case in partial_full_data: 69 | partial_spec, full_spec = partial_full_data[case] 70 | 71 | draco_rec = run(Task.from_cql(partial_spec), constants=weights) 72 | 73 | if draco_rec is None: 74 | logger.warning(f"Could not find a spec for {partial_spec}") 75 | 76 | result["specs"].append( 77 | { 78 | "first": None, 79 | "second": full_spec, 80 | "properties": {"input": partial_spec}, 81 | } 82 | ) 83 | 84 | continue 85 | 86 | result["specs"].append( 87 | { 88 | "first": draco_rec.to_vegalite(), 89 | "second": full_spec, 90 | "properties": {"input": partial_spec}, 91 | } 92 | ) 93 | 94 | return result 95 | 96 | 97 | if __name__ == "__main__": 98 | # spec_dir = absolute_path("../../data/synthetic") 99 | # dataset = data_util.load_partial_full_data(spec_dir) 100 | # output_file = absolute_path("../../data/spec_pairs/synthetic.json") 101 | # play(dataset, train_weights=True, output_file=output_file) 102 | 103 | # spec_dir = absolute_path("../../data/synthetic") 104 | # dataset = data_util.load_partial_full_data(spec_dir) 105 | # output_file = absolute_path("../../data/spec_pairs/synthetic_default_weights.json") 106 | # play(dataset, train_weights=False, output_file=output_file) 107 | 108 | # spec_dir = absolute_path("../../data/compassql_examples") 109 | # dataset = data_util.load_partial_full_data(spec_dir) 110 | # output_file = absolute_path("../../data/spec_pairs/draco_cql.json") 111 | # play(dataset, train_weights=True, output_file=output_file) 112 | 113 | spec_dir = absolute_path("./data/compassql_examples") 114 | dataset = data_util.load_partial_full_data(spec_dir) 115 | output_file = absolute_path("./data/spec_pairs/draco_cql_default_weights.json") 116 | play(dataset, train_weights=False, output_file=output_file) 117 | # open `http://localhost:3000/specviewer?data=spec_pairs/draco_cql_default_weights.json` 118 | -------------------------------------------------------------------------------- /data/weights.json: -------------------------------------------------------------------------------- 1 | { 2 | "type_q_weight": 0, 3 | "type_o_weight": 1, 4 | "type_n_weight": 2, 5 | "aggregate_weight": 1, 6 | "bin_weight": 2, 7 | "bin_high_weight": 10, 8 | "bin_low_weight": 6, 9 | "encoding_weight": 0, 10 | "encoding_field_weight": 6, 11 | "same_field_2_weight": 8, 12 | "same_field_gte3_weight": 16, 13 | "count_twice_weight": 50, 14 | "shape_cardinality_weight": 5, 15 | "number_nominal_weight": 10, 16 | "bin_cardinality_weight": 5, 17 | "quant_bin_weight": 1, 18 | "agg_dim_weight": 2, 19 | "only_discrete_weight": 30, 20 | "multiple_non_pos_weight": 3, 21 | "non_positional_pref_weight": 10, 22 | "aggregate_group_by_raw_weight": 3, 23 | "x_y_raw_weight": 1, 24 | "log_weight": 1, 25 | "zero_weight": 1, 26 | "zero_size_weight": 3, 27 | "zero_positional_weight": 1, 28 | "zero_skew_weight": 5, 29 | "includes_zero_weight": 10, 30 | "only_x_weight": 1, 31 | "orientation_binned_weight": -1, 32 | "high_cardinality_ordinal_weight": 10, 33 | "high_cardinality_nominal_weight": 10, 34 | "high_cardinality_nominal_color_weight": 10, 35 | "horizontal_scrolling_weight": 20, 36 | "temporal_date_weight": 1, 37 | "quantitative_numbers_weight": 2, 38 | "position_entropy_weight": 2, 39 | "high_cardinality_size_weight": 1, 40 | "value_agg_weight": 1, 41 | "facet_summary_weight": 0, 42 | "x_row_weight": 1, 43 | "y_row_weight": 1, 44 | "x_column_weight": 1, 45 | "y_column_weight": 1, 46 | "color_entropy_high_weight": 0, 47 | "color_entropy_low_weight": 0, 48 | "size_entropy_high_weight": 0, 49 | "size_entropy_low_weight": 0, 50 | "c_d_column_weight": 5, 51 | "temporal_y_weight": -1, 52 | "d_d_overlap_weight": 20, 53 | "c_c_point_weight": 0, 54 | "c_c_line_weight": 20, 55 | "c_c_area_weight": 20, 56 | "c_c_text_weight": 2, 57 | "c_c_tick_weight": 5, 58 | "c_d_point_weight": 10, 59 | "c_d_bar_weight": 20, 60 | "c_d_line_weight": 20, 61 | "c_d_area_weight": 20, 62 | "c_d_text_weight": 50, 63 | "c_d_tick_weight": 0, 64 | "c_d_no_overlap_point_weight": 20, 65 | "c_d_no_overlap_bar_weight": 0, 66 | "c_d_no_overlap_line_weight": 20, 67 | "c_d_no_overlap_area_weight": 20, 68 | "c_d_no_overlap_text_weight": 30, 69 | "c_d_no_overlap_tick_weight": 25, 70 | "d_d_point_weight": 0, 71 | "d_d_text_weight": 1, 72 | "d_d_rect_weight": 0, 73 | "continuous_x_weight": 0, 74 | "continuous_y_weight": 0, 75 | "continuous_color_weight": 10, 76 | "continuous_size_weight": 1, 77 | "continuous_text_weight": 20, 78 | "ordered_x_weight": 1, 79 | "ordered_y_weight": 0, 80 | "ordered_color_weight": 8, 81 | "ordered_size_weight": 10, 82 | "ordered_text_weight": 32, 83 | "ordered_row_weight": 10, 84 | "ordered_column_weight": 10, 85 | "nominal_x_weight": 3, 86 | "nominal_y_weight": 0, 87 | "nominal_color_weight": 10, 88 | "nominal_shape_weight": 11, 89 | "nominal_text_weight": 12, 90 | "nominal_row_weight": 7, 91 | "nominal_column_weight": 10, 92 | "nominal_detail_weight": 20, 93 | "interesting_x_weight": 0, 94 | "interesting_y_weight": 1, 95 | "interesting_color_weight": 2, 96 | "interesting_size_weight": 2, 97 | "interesting_shape_weight": 3, 98 | "interesting_text_weight": 6, 99 | "interesting_row_weight": 6, 100 | "interesting_column_weight": 7, 101 | "interesting_detail_weight": 20, 102 | "aggregate_count_weight": 0, 103 | "aggregate_sum_weight": 2, 104 | "aggregate_mean_weight": 1, 105 | "aggregate_median_weight": 3, 106 | "aggregate_min_weight": 4, 107 | "aggregate_max_weight": 4, 108 | "aggregate_stdev_weight": 5, 109 | "value_point_weight": 0, 110 | "value_bar_weight": 0, 111 | "value_line_weight": 0, 112 | "value_area_weight": 0, 113 | "value_text_weight": 0, 114 | "value_tick_weight": 0, 115 | "value_rect_weight": 0, 116 | "summary_point_weight": 0, 117 | "summary_bar_weight": 0, 118 | "summary_line_weight": 0, 119 | "summary_area_weight": 0, 120 | "summary_text_weight": 0, 121 | "summary_tick_weight": 0, 122 | "summary_rect_weight": 0, 123 | "value_continuous_x_weight": 0, 124 | "value_continuous_y_weight": 0, 125 | "value_continuous_color_weight": 0, 126 | "value_continuous_size_weight": 0, 127 | "value_continuous_text_weight": 0, 128 | "value_discrete_x_weight": 0, 129 | "value_discrete_y_weight": 0, 130 | "value_discrete_color_weight": 0, 131 | "value_discrete_shape_weight": 0, 132 | "value_discrete_size_weight": 0, 133 | "value_discrete_text_weight": 0, 134 | "value_discrete_row_weight": 0, 135 | "value_discrete_column_weight": 0, 136 | "summary_continuous_x_weight": 0, 137 | "summary_continuous_y_weight": 0, 138 | "summary_continuous_color_weight": 0, 139 | "summary_continuous_size_weight": 0, 140 | "summary_continuous_text_weight": 0, 141 | "summary_discrete_x_weight": 0, 142 | "summary_discrete_y_weight": 0, 143 | "summary_discrete_color_weight": 0, 144 | "summary_discrete_shape_weight": 0, 145 | "summary_discrete_size_weight": 0, 146 | "summary_discrete_text_weight": 0, 147 | "summary_discrete_row_weight": 0, 148 | "summary_discrete_column_weight": 0, 149 | "stack_zero_weight": 0, 150 | "stack_normalize_weight": 1 151 | } -------------------------------------------------------------------------------- /malfoy/generate/README.md: -------------------------------------------------------------------------------- 1 | # Data Generation 2 | 3 | The files in this repository are intended to generate groups of visualization for pair-wise comparison. Note, **this is a work-in-progress**. 4 | 5 | ## Tools 6 | 7 | ### Generation Tools 8 | 9 | You can run generation by using `run.py`. It accepts an interaction, number of groups per interaction, and an output directory as optional command-line arguments. For example, to generate a series of files (1 file per interaction) in the data directory, each with 20 groups, run: 10 | 11 | ```python run.py --interaction all --groups 20 --output_dir ../../data/to_label``` 12 | 13 | ### Dataset Viewer 14 | 15 | To view the generated data, start a the Draco Tools server (`yarn start` from `/draco-tools`) and navigate to the `Dataset Viewer`. You view the various interactions, divided into pages by the number of dimensions in the visualizations. Note, this tool is barebone, as it is used internally for development at the moment. 16 | 17 | ### Labeler 18 | 19 | Getting this to function initially is a little involved. 20 | 21 | 1. In `/draco/learn/data_util.py`, comment out lines 281-283 and uncomment lines 286-288. Then run it. 22 | 2. Remove `/draco-tools/server/label_data.db` if it exists. 23 | 3. Run `/draco-tools/server/db_util.py`. 24 | 4. Start the server by running `/draco-tools/server/labeler.py`. 25 | 5. You should be good to go! Start the Draco Tools server (`yarn start` from `/draco-tools`) and navigate to the Labeler. 26 | 27 | The page displays two visualizations, their specs, and the data used. Use the mouse or arrow keys to classify a >, <, =, or both bad relationship between the two. 28 | 29 | ## Approach 30 | 31 | ### Probabilistic Specification Generation 32 | 33 | We define a probabilistic model for generation of individual specifications, roughly modeled after the distribution of visualizations we expect in the real-world. This allows us to generate a wide breadth of visualization specifications via probabilistic sampling, approximating the true distribution without the requirement of matching the enormous space. There are two key ideas in the definition of this model. 34 | 35 | 1. For each property of a Vega-Lite specification (i.e. each JSON key, nested or not), we define a probability for that property appearing in the specification. For example, `mark` may have a probability of 1, as it is a required property. Meanwhile, `stack` may have a probability of 0.1, as it is relatively uncommon in the real-world. 36 | 2. For each value of a Vega-Lite specification (i.e. each JSON value, nested or not), we define a probability for that value appearing, conditioned on its property appearing. For example, a `center` value for `stack` may have a 0.1 probability, meaning that if `stack` is a property chosen by the model, we have a 0.1 chance of producing a `center` stacked visualization. The sum of the probabilities of each value conditioned on its parent property is constrained to 1 to preserve a valid distribution (though the code does not actually need this enforced to function properly). 37 | 38 | ### Group Generation 39 | 40 | **Groups** are a set of visualizations within which each individual will be pair-wise compared (better, worse, even) with each other individual in the group. It follows that a group with *n* visualizations will have *n* choose 2 pairs. We generate numerous groups for each of the parameters (e.g. each interaction, each field set) for the approaches described below, in order to increase diversity of specifications. 41 | 42 | We have thus far considered two approaches to group generation. In both approaches, all visualizations in a group have the same number of dimensions and fields visualized. 43 | 44 | 1. Interaction based generation. This is the approach currently in `master`. 45 | 2. Data based generation. This approach can be found in the branch `data_based_gen` (note that this was thrown together rather quickly by modifying the code for the approach described above, and so generation code nor UI are particularly clean or easy to understand. In the dataset viewer here, files are by number of dimensions, and the dimension selector is irrelevant). 46 | 47 | #### 1. Interaction-Based Generation 48 | 49 | Here, visualizations within a group differ on the basis of a pre-defined set of *interactions*. *Interactions* take the form of a set of specification properties. For example, visualizations in a group generated for the `mark` interaction will vary in the mark specified, while visualizations in a `mark-channel` group will vary by either mark, channel, or both. Our current approach is to guarantee enumeration over all property values for a given interaction (thus a group for `mark` will attempt to cover all mark types). 50 | 51 | In this approach, each group starts off as a single probabilistically generated **base specification**. This base specification is then modified according to the rules described above, and each valid result is added to the group. 52 | 53 | Because the specs in these groups vary in a pre-defined way, we expect pairs to be easier to label when compared to the Data-Based approach described below. 54 | 55 | Example Group 1 56 | ![example 1](screenshots/interactionbased_1.png) 57 | 58 | Example Group 2 59 | ![example 2](screenshots/interactionbased_2.png) 60 | 61 | #### 2. Data-Based Generation 62 | 63 | This approach generates groups that are a more diverse in *design* compared to those generated by interaction-based generation. Visualizations in groups generated here are only guaranteed to show the same data. 64 | 65 | In this approach, each group starts off as a subset of field names from the data. Specs are then probabilistically generated and their field names filled-in such that all specs in a group may have vastly different encodings (e.g. channels, scales) but will share the same data fields. 66 | 67 | While potentially closer to a real-world dataset, allowing for comparison between wildly different visualization designs, this can also mean higher difficulty in labeling, even for experts. 68 | 69 | Example Group 1 70 | ![example 1](screenshots/databased_1.png) 71 | 72 | Example Group 2 73 | ![example 2](screenshots/databased_2.png) 74 | 75 | #### Visualization Quality Optimizations 76 | 77 | We check all specs against Draco's hard constraints (see `/asp/hard.lp`) before adding them to the group. Base specs that have been seen before are thrown out, and modified specs that have been seen before within a group are likewise thrown out. Specs are *improved* as a last step to conform to basic effectiveness principles (e.g. adding a zero scale for bar charts) and pass hard constraint tests that would otherwise be difficult to satisfy through probabilistic generation. 78 | -------------------------------------------------------------------------------- /malfoy/learn/linear.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import pandas as pd 7 | from matplotlib.colors import ListedColormap 8 | from sklearn import svm 9 | from sklearn.decomposition import PCA 10 | from sklearn.model_selection import train_test_split 11 | 12 | import data_util 13 | 14 | logging.basicConfig(level=logging.INFO) 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def train_model(X: pd.DataFrame, test_size: float = 0.3, C: float = 1, quiet=False): 19 | """ Given features X and labels y, train a linear model to classify them 20 | Args: 21 | X: a N x M matrix, representing feature vectors 22 | y: a N vector, representing labels 23 | test_size: the fraction of test data 24 | """ 25 | 26 | X_train, X_dev = train_test_split(X, test_size=test_size, random_state=1) 27 | 28 | if isinstance(X_train, pd.DataFrame): 29 | X_train = X_train.as_matrix() 30 | 31 | size = len(X_train) 32 | 33 | y_train = np.ones(size) 34 | 35 | # flip a few examples at random 36 | idx = np.ones(size, dtype=bool) 37 | idx[: int(size / 2)] = False 38 | np.random.shuffle(idx) 39 | 40 | X_train[idx] = -X_train[idx] 41 | y_train[idx] = -y_train[idx] 42 | 43 | clf = svm.LinearSVC(C=C, fit_intercept=False) 44 | clf.fit(X_train, y_train) 45 | 46 | if not quiet: 47 | print("Train score: ", clf.score(X_train, y_train)) 48 | if test_size > 0: 49 | print("Dev score: ", clf.score(X_dev, np.ones(len(X_dev)))) 50 | 51 | return clf 52 | 53 | 54 | def train_and_plot(data: pd.DataFrame, test_size: float = 0.3): 55 | """ use SVM to classify them and then plot them after projecting X, y into 2D using PCA 56 | """ 57 | X = data.negative - data.positive 58 | 59 | pca = PCA(n_components=2) 60 | X2 = pca.fit_transform(X) 61 | 62 | clf = train_model(X, test_size) 63 | 64 | # for plotting 65 | X0, X1 = X2[:, 0], X2[:, 1] 66 | xx, yy = make_meshgrid(X0, X1) 67 | 68 | cm_bright = ListedColormap(["#FF0000", "#0000FF"]) 69 | 70 | f, ax = plt.subplots(figsize=(8, 6)) 71 | 72 | # predictions made by the model 73 | pred = clf.predict(X) 74 | 75 | correct = pred > 0 76 | 77 | plt.scatter( 78 | X0[correct], 79 | X1[correct], 80 | c="g", 81 | cmap=cm_bright, 82 | alpha=0.5, 83 | marker=">", 84 | label="correct", 85 | ) 86 | plt.scatter( 87 | X0[~correct], 88 | X1[~correct], 89 | c="r", 90 | cmap=cm_bright, 91 | alpha=0.5, 92 | marker="<", 93 | label="incorrect", 94 | ) 95 | 96 | ax.set_xlim(xx.min(), xx.max()) 97 | ax.set_ylim(yy.min(), yy.max()) 98 | 99 | ax.set_xlabel("X0") 100 | ax.set_ylabel("X1") 101 | 102 | ax.set_xticks(()) 103 | ax.set_yticks(()) 104 | 105 | plt.title("Predictions of Linear Model") 106 | 107 | plt.annotate( 108 | f"Score: {clf.score(X, np.ones(len(X))):.{5}}. N: {int(len(data))}", 109 | (0, 0), 110 | (0, -20), 111 | xycoords="axes fraction", 112 | textcoords="offset points", 113 | va="top", 114 | ) 115 | 116 | plt.legend(loc="lower right") 117 | plt.axis("tight") 118 | 119 | plt.show() 120 | 121 | return clf 122 | 123 | 124 | def project_and_plot(data: pd.DataFrame, test_size: float = 0.3): 125 | """ Reduce X, y into 2D using PCA and use SVM to classify them 126 | Then plot the decision boundary as well as raw data points 127 | """ 128 | X = data.negative - data.positive 129 | 130 | pca = PCA(n_components=2) 131 | X = pca.fit_transform(X) 132 | 133 | clf = train_model(X, test_size) 134 | 135 | # for plotting 136 | X0, X1 = X[:, 0], X[:, 1] 137 | xx, yy = make_meshgrid(X0, X1) 138 | 139 | cm_bright = ListedColormap(["#FF0000", "#0000FF"]) 140 | 141 | f, ax = plt.subplots(figsize=(8, 6)) 142 | 143 | plot_contours(ax, clf, xx, yy) 144 | 145 | # predictions made by the model 146 | pred = clf.predict(X) 147 | 148 | correct = pred > 0 149 | 150 | plt.scatter( 151 | X0[correct], 152 | X1[correct], 153 | c="g", 154 | cmap=cm_bright, 155 | alpha=0.5, 156 | marker=">", 157 | label="correct", 158 | ) 159 | plt.scatter( 160 | X0[~correct], 161 | X1[~correct], 162 | c="r", 163 | cmap=cm_bright, 164 | alpha=0.5, 165 | marker="<", 166 | label="incorrect", 167 | ) 168 | 169 | ax.set_xlim(xx.min(), xx.max()) 170 | ax.set_ylim(yy.min(), yy.max()) 171 | 172 | ax.set_xlabel("X0") 173 | ax.set_ylabel("X1") 174 | 175 | ax.set_xticks(()) 176 | ax.set_yticks(()) 177 | 178 | plt.title("Predictions of Linear Model") 179 | 180 | plt.annotate( 181 | f"Score: {clf.score(X, np.ones(len(X))):.{5}}. N: {int(len(data))}", 182 | (0, 0), 183 | (0, -20), 184 | xycoords="axes fraction", 185 | textcoords="offset points", 186 | va="top", 187 | ) 188 | 189 | plt.legend(loc="lower right") 190 | plt.axis("tight") 191 | 192 | plt.show() 193 | 194 | return clf 195 | 196 | 197 | def plot_contours(ax, clf, xx, yy, **params): 198 | """Plot the decision boundaries for a classifier. 199 | Params: 200 | ax: matplotlib axes object 201 | clf: a classifier 202 | xx: meshgrid ndarray 203 | yy: meshgrid ndarray 204 | params: dictionary of params to pass to contourf, optional 205 | """ 206 | Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) 207 | Z = Z.reshape(xx.shape) 208 | out = ax.contourf(xx, yy, Z, **params) 209 | return out 210 | 211 | 212 | def make_meshgrid(x, y, h=0.01): 213 | """Create a mesh of points to plot in 214 | Params: 215 | x: data to base x-axis meshgrid on 216 | y: data to base y-axis meshgrid on 217 | h: stepsize for meshgrid, optional 218 | Returns: 219 | xx, yy : ndarray 220 | """ 221 | x_min, x_max = x.min() - 1, x.max() + 1 222 | y_min, y_max = y.min() - 1, y.max() + 1 223 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) 224 | return xx, yy 225 | 226 | 227 | def main(): 228 | test_size = 0.3 229 | train_dev, _ = data_util.load_data(test_size=test_size) 230 | 231 | clf = train_and_plot(train_dev, test_size=test_size) 232 | features = train_dev.negative.columns 233 | 234 | path = os.path.abspath( 235 | os.path.join(os.path.dirname(__file__), "../../asp/weights_learned.lp") 236 | ) 237 | 238 | with open(path, "w") as f: 239 | f.write("% Generated with `python draco/learn/linear.py`.\n\n") 240 | 241 | for feature, weight in zip(features, clf.coef_[0]): 242 | f.write(f"#const {feature}_weight = {int(weight * 1000)}.\n") 243 | 244 | logger.info(f"Wrote model to {path}") 245 | 246 | 247 | if __name__ == "__main__": 248 | main() 249 | -------------------------------------------------------------------------------- /data/spec_pairs/draco_cql.json: -------------------------------------------------------------------------------- 1 | {"headers": {"first": {"title": "Draco", "subtitle": "Draco Prediction"}, "second": {"title": "CQL", "subtitle": "Compassql Prediction"}}, "specs": [{"first": {"mark": "tick", "encoding": {"x": {"scale": {"zero": true}, "field": "Miles_per_Gallon", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "tick", "encoding": {"y": {"scale": {"zero": false}, "field": "Miles_per_Gallon", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "tick", "encoding": {"x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "tick", "encoding": {"x": {"scale": {"zero": false}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "tick", "encoding": {"row": {"scale": {"zero": false}, "field": "Cylinders", "type": "ordinal"}, "x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "tick", "encoding": {"y": {"scale": {"zero": false}, "field": "Cylinders", "type": "ordinal"}, "x": {"scale": {"zero": false}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "tick", "encoding": {"x": {"scale": {"zero": true}, "field": "Miles_per_Gallon", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "tick", "encoding": {"x": {"scale": {"zero": false}, "field": "Miles_per_Gallon", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "rule", "encoding": {"row": {"scale": {"zero": false}, "field": "Origin", "type": "nominal"}, "x": {"scale": {"zero": true}, "type": "quantitative", "aggregate": "count"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "bar", "encoding": {"y": {"scale": {"zero": false}, "field": "Origin", "type": "nominal"}, "x": {"scale": {"zero": false}, "type": "quantitative", "aggregate": "count"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "line", "encoding": {"x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative", "aggregate": "mean"}, "y": {"scale": {"zero": false}, "field": "Cylinders", "type": "ordinal", "aggregate": "min"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "bar", "encoding": {"x": {"scale": {"zero": false}, "field": "Horsepower", "type": "quantitative", "aggregate": "mean"}, "y": {"scale": {"zero": false}, "field": "Cylinders", "type": "ordinal"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "area", "encoding": {"y": {"scale": {"zero": true}, "field": "Miles_per_Gallon", "type": "quantitative"}, "x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "point", "encoding": {"x": {"scale": {"zero": false}, "field": "Miles_per_Gallon", "type": "quantitative", "aggregate": "mean"}, "y": {"scale": {"zero": false}, "field": "Horsepower", "type": "quantitative", "aggregate": "mean"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "rule", "encoding": {"x": {"scale": {"zero": false}, "field": "Origin", "type": "nominal"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "bar", "encoding": {"x": {"scale": {"zero": false}, "field": "Origin", "type": "nominal"}, "y": {"scale": {"zero": false}, "type": "quantitative", "aggregate": "count"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "rule", "encoding": {"row": {"scale": {"zero": false}, "field": "Cylinders", "type": "ordinal", "aggregate": "min"}, "x": {"scale": {"zero": true}, "type": "quantitative", "aggregate": "count"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "bar", "encoding": {"y": {"scale": {"zero": false}, "field": "Cylinders", "type": "ordinal"}, "x": {"scale": {"zero": false}, "type": "quantitative", "aggregate": "count"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "area", "encoding": {"x": {"scale": {"zero": true}, "type": "quantitative", "aggregate": "count"}, "y": {"scale": {"zero": true}, "type": "quantitative", "aggregate": "count"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "bar", "encoding": {"y": {"scale": {"zero": false}, "field": "Origin", "type": "nominal"}, "x": {"scale": {"zero": false}, "type": "quantitative", "aggregate": "count"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "area", "encoding": {"row": {"scale": {"zero": false}, "field": "Origin", "type": "ordinal", "aggregate": "min"}, "y": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative", "aggregate": "min"}, "x": {"scale": {"zero": true}, "field": "Acceleration", "type": "quantitative", "aggregate": "min"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "point", "encoding": {"y": {"scale": {"zero": false}, "field": "Origin", "type": "ordinal"}, "size": {"scale": {"zero": false}, "field": "Horsepower", "type": "quantitative", "aggregate": "mean"}, "x": {"scale": {"zero": false}, "field": "Acceleration", "type": "quantitative", "bin": {"maxbins": 10}}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "area", "encoding": {"row": {"scale": {"zero": false}, "field": "Cylinders", "type": "nominal"}, "x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative", "aggregate": "min"}, "y": {"scale": {"zero": true}, "field": "Acceleration", "type": "quantitative", "aggregate": "min"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "point", "encoding": {"y": {"scale": {"zero": false}, "field": "Cylinders", "type": "nominal"}, "size": {"scale": {"zero": false}, "field": "Horsepower", "type": "quantitative", "aggregate": "mean"}, "x": {"scale": {"zero": false}, "field": "Acceleration", "type": "quantitative", "bin": {"maxbins": 10}}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "bar", "encoding": {"x": {"scale": {"zero": false}, "field": "Miles_per_Gallon", "type": "quantitative", "bin": {"maxbins": 10}}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "bar", "encoding": {"x": {"scale": {"zero": false}, "field": "Miles_per_Gallon", "type": "quantitative", "bin": {"maxbins": 5}}, "y": {"scale": {"zero": false}, "type": "quantitative", "aggregate": "count"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "tick", "encoding": {"row": {"scale": {"zero": false}, "field": "Origin", "type": "nominal"}, "x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "tick", "encoding": {"y": {"scale": {"zero": false}, "field": "Origin", "type": "nominal"}, "x": {"scale": {"zero": false}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "line", "encoding": {"x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative", "aggregate": "mean"}, "y": {"scale": {"zero": false}, "field": "Cylinders", "type": "nominal"}, "row": {"scale": {"zero": false}, "field": "Origin", "type": "nominal"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "bar", "encoding": {"x": {"scale": {"zero": false}, "field": "Horsepower", "type": "quantitative", "aggregate": "mean"}, "y": {"scale": {"zero": false}, "field": "Cylinders", "type": "nominal"}, "row": {"scale": {"zero": false}, "field": "Origin", "type": "nominal"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "line", "encoding": {"row": {"scale": {"zero": false}, "field": "Cylinders", "type": "nominal"}, "y": {"scale": {"zero": false}, "field": "Origin", "type": "ordinal", "aggregate": "min"}, "x": {"scale": {"zero": true}, "field": "Acceleration", "type": "quantitative", "aggregate": "min"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "point", "encoding": {"x": {"scale": {"zero": false}, "field": "Cylinders", "type": "nominal"}, "y": {"scale": {"zero": false}, "field": "Origin", "type": "ordinal"}, "size": {"scale": {"zero": false}, "field": "Acceleration", "type": "quantitative", "aggregate": "mean"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}, {"first": {"mark": "tick", "encoding": {"row": {"scale": {"zero": false}, "field": "Cylinders", "type": "ordinal"}, "x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "second": {"mark": "tick", "encoding": {"y": {"scale": {"zero": false}, "field": "Cylinders", "type": "ordinal"}, "x": {"scale": {"zero": false}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json"}, "properties": {}}]} -------------------------------------------------------------------------------- /malfoy/learn/data_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Processing data for learning procedures. 3 | """ 4 | 5 | import itertools 6 | import json 7 | import logging 8 | import math 9 | import os 10 | from collections import namedtuple 11 | from multiprocessing import Manager, cpu_count 12 | from typing import Any, Dict, Iterable, List, Tuple, Union 13 | 14 | import numpy as np 15 | import pandas as pd 16 | from pandas.util import hash_pandas_object 17 | from sklearn.model_selection import train_test_split 18 | 19 | from malfoy.learn.helper import count_violations, current_weights 20 | from draco.spec import Data, Encoding, Field, Query, Task 21 | 22 | logging.basicConfig(level=logging.INFO) 23 | logger = logging.getLogger(__name__) 24 | 25 | 26 | def absolute_path(p: str) -> str: 27 | return os.path.join(os.path.dirname(__file__), p) 28 | 29 | 30 | pos_neg_pickle_path = absolute_path("../../__tmp__/pos_neg.pickle") 31 | unlabeled_pickle_path = absolute_path("../../__tmp__/unlabeled.pickle") 32 | 33 | man_data_path = absolute_path("../../data/training/manual.json") 34 | yh_data_path = absolute_path("../../data/training/kim2018.json") 35 | ba_data_path = absolute_path("../../data/training/saket2018.json") 36 | label_data_path = absolute_path("../../data/training/labeler.json") 37 | compassql_data_path = absolute_path("../../data/compassql_examples") 38 | data_dir = absolute_path("../../data/") # the dir containing data used in visualization 39 | 40 | halden_data_path = absolute_path("../../data/to_label") 41 | 42 | 43 | PosNegExample = namedtuple( 44 | "PosNeg", ["pair_id", "data", "task", "source", "negative", "positive"] 45 | ) 46 | UnlabeledExample = namedtuple( 47 | "Unlabeled", ["pair_id", "data", "task", "source", "left", "right"] 48 | ) 49 | 50 | 51 | def load_neg_pos_specs() -> Dict[str, PosNegExample]: 52 | raw_data = {} 53 | 54 | for path in [man_data_path, yh_data_path, ba_data_path, label_data_path]: 55 | with open(path) as f: 56 | i = 0 57 | json_data = json.load(f) 58 | 59 | for row in json_data["data"]: 60 | fields = list(map(Field.from_obj, row["fields"])) 61 | spec_schema = Data(fields, row.get("num_rows")) 62 | src = json_data["source"] 63 | 64 | key = f"{src}-{i}" 65 | raw_data[key] = PosNegExample( 66 | key, 67 | spec_schema, 68 | row.get("task"), 69 | src, 70 | row["negative"], 71 | row["positive"], 72 | ) 73 | 74 | i += 1 75 | 76 | return raw_data 77 | 78 | 79 | def load_partial_full_data(path=compassql_data_path): 80 | """ load partial-full spec pairs from the directory 81 | 82 | Returns a dictionary mapping each case name into a pair of partial spec - full spec. 83 | """ 84 | 85 | def load_spec(input_dir): 86 | files = [os.path.join(input_dir, f) for f in os.listdir(input_dir)] 87 | result = {} 88 | for fname in files: 89 | if fname.endswith(".json"): 90 | with open(fname, "r") as f: 91 | spec = json.load(f) 92 | result[os.path.basename(fname)] = spec 93 | return result 94 | 95 | partial_specs = load_spec(os.path.join(path, "input")) 96 | compassql_outs = load_spec(os.path.join(path, "output")) 97 | 98 | result = {} 99 | for k in partial_specs: 100 | result[k] = (partial_specs[k], compassql_outs[k]) 101 | return result 102 | 103 | 104 | def load_unlabeled_specs() -> Dict[str, UnlabeledExample]: 105 | files = [ 106 | os.path.join(halden_data_path, f) 107 | for f in os.listdir(halden_data_path) 108 | if f.endswith(".json") 109 | ] 110 | 111 | data_cache: Dict[str, Data] = {} 112 | 113 | def acquire_data(url): 114 | if url not in data_cache: 115 | data_cache[url] = Data.from_json( 116 | os.path.join(data_dir, os.path.basename(url)) 117 | ) 118 | # set the url to short name, since the one above set it to full name in the current machine 119 | data_cache[url].url = url 120 | return data_cache[url] 121 | 122 | raw_data: Dict[str, UnlabeledExample] = {} 123 | 124 | cnt = 0 125 | 126 | for fname in files: 127 | with open(fname, "r") as f: 128 | content = json.load(f) 129 | for num_channel in content: 130 | for i, spec_list in enumerate(content[num_channel]): 131 | for left, right in itertools.combinations(spec_list, 2): 132 | if left == right: 133 | logger.warning( 134 | f"Found pair with the same content file:{os.path.basename(fname)} - num_channel:{num_channel} - group:{i}" 135 | ) 136 | continue 137 | 138 | assert left["data"]["url"] == right["data"]["url"] 139 | 140 | url = left["data"]["url"] 141 | 142 | key = f"halden-{cnt}" 143 | raw_data[key] = UnlabeledExample( 144 | key, acquire_data(url), None, "halden", left, right 145 | ) 146 | cnt += 1 147 | 148 | return raw_data 149 | 150 | 151 | def count_violations_memoized(processed_specs: Dict[str, Dict], task: Task): 152 | key = task.to_asp() 153 | if key not in processed_specs: 154 | violations = count_violations(task) 155 | if violations is not None: 156 | processed_specs[key] = violations 157 | return processed_specs[key] 158 | 159 | 160 | def get_nested_index(fields=None): 161 | """ 162 | Gives you a nested pandas index that we apply to the data when creating a dataframe. 163 | """ 164 | features = get_feature_names() 165 | 166 | iterables = [fields or ["negative", "positive"], features] 167 | index = pd.MultiIndex.from_product(iterables, names=["category", "feature"]) 168 | index = index.append(pd.MultiIndex.from_arrays([["source", "task"], ["", ""]])) 169 | return index 170 | 171 | 172 | def get_feature_names(): 173 | weights = current_weights() 174 | features = sorted(map(lambda s: s[: -len("_weight")], weights.keys())) 175 | 176 | return features 177 | 178 | 179 | def pair_partition_to_vec( 180 | input_data: Tuple[ 181 | Dict, 182 | Tuple[str, str], 183 | Iterable[Union[PosNegExample, UnlabeledExample, np.ndarray]], 184 | ] 185 | ): 186 | processed_specs, fields, partiton_data = input_data 187 | 188 | columns = get_nested_index(fields) 189 | dfs = [] 190 | 191 | for example in partiton_data: 192 | Encoding.encoding_cnt = 0 193 | 194 | # hack to get named tuples to work in parallel 195 | if isinstance(example, np.ndarray): 196 | example = PosNegExample(*example) 197 | 198 | # use numbers because we odn't know the names here 199 | neg_feature_vec = count_violations_memoized( 200 | processed_specs, 201 | Task(example.data, Query.from_vegalite(example[4]), example.task), 202 | ) 203 | pos_feature_vec = count_violations_memoized( 204 | processed_specs, 205 | Task(example.data, Query.from_vegalite(example[5]), example.task), 206 | ) 207 | 208 | # Reformat the json data so that we can insert it into a multi index data frame. 209 | # https://stackoverflow.com/questions/24988131/nested-dictionary-to-multiindex-dataframe-where-dictionary-keys-are-column-label 210 | specs = {(fields[0], key): values for key, values in neg_feature_vec.items()} 211 | specs.update( 212 | {(fields[1], key): values for key, values in pos_feature_vec.items()} 213 | ) 214 | 215 | specs[("source", "")] = example.source 216 | specs[("task", "")] = example.task 217 | 218 | dfs.append(pd.DataFrame(specs, columns=columns, index=[example.pair_id])) 219 | 220 | return pd.concat(dfs) 221 | 222 | 223 | def run_in_parallel( 224 | func, data: List[Union[PosNegExample, UnlabeledExample]], fields: Tuple[str, str] 225 | ) -> pd.DataFrame: 226 | """ Like map, but parallel. """ 227 | 228 | splits = min([cpu_count() * 20, math.ceil(len(data) / 10)]) 229 | df_split = np.array_split(data, splits) 230 | processes = min(cpu_count(), splits) 231 | 232 | logger.info( 233 | f"Running {splits} partitions of {len(data)} items in parallel on {processes} processes." 234 | ) 235 | 236 | with Manager() as manager: 237 | m: Any = manager # fix for mypy 238 | d = m.dict() # shared dict for memoization 239 | pool = m.Pool(processes=processes) 240 | 241 | tuples: List[Tuple[Dict, Tuple[str, str], Any]] = [] 242 | for s in df_split: 243 | # add some arguments 244 | tuples.append((d, fields, s)) 245 | 246 | df = pd.concat(pool.map(func, tuples)) 247 | pool.close() 248 | pool.join() 249 | 250 | df = df.sort_index() 251 | 252 | logger.info(f"Hash of dataframe: {hash_pandas_object(df).sum()}") 253 | 254 | return df 255 | 256 | 257 | def pairs_to_vec( 258 | specs: List[Union[PosNegExample, UnlabeledExample]], fields: Tuple[str, str] 259 | ) -> pd.DataFrame: 260 | """ given specs, convert them into feature vectors. """ 261 | 262 | return run_in_parallel(pair_partition_to_vec, specs, fields) 263 | 264 | 265 | def _get_pos_neg_data() -> pd.DataFrame: 266 | """ 267 | Internal function to load the feature vecors. 268 | """ 269 | data = pd.read_pickle(pos_neg_pickle_path) 270 | data.fillna(0, inplace=True) 271 | 272 | assert set(data.negative.columns) == set( 273 | get_feature_names() 274 | ), "Feature names do not match." 275 | 276 | return data 277 | 278 | 279 | def load_data( 280 | test_size: float = 0.3, random_state=1 281 | ) -> Tuple[pd.DataFrame, pd.DataFrame]: 282 | """ Returns: 283 | a tuple containing: train_dev, test. 284 | """ 285 | data = _get_pos_neg_data() 286 | return train_test_split(data, test_size=test_size, random_state=random_state) 287 | 288 | 289 | def get_labeled_data() -> Tuple[Dict[str, PosNegExample], pd.DataFrame]: 290 | specs = load_neg_pos_specs() 291 | vecs = _get_pos_neg_data() 292 | 293 | assert len(specs) == len(vecs) 294 | 295 | return specs, vecs 296 | 297 | 298 | def get_unlabeled_data() -> Tuple[Dict[str, UnlabeledExample], pd.DataFrame]: 299 | specs = load_unlabeled_specs() 300 | 301 | vecs = pd.read_pickle(unlabeled_pickle_path) 302 | vecs.fillna(0, inplace=True) 303 | 304 | assert len(specs) == len(vecs) 305 | 306 | return specs, vecs 307 | 308 | 309 | if __name__ == "__main__": 310 | """ Generate and store vectors for labeled data in default path. """ 311 | # neg_pos_specs = load_neg_pos_specs() 312 | # neg_pos_data = pairs_to_vec(list(neg_pos_specs.values()), ('negative', 'positive')) 313 | # neg_pos_data.to_pickle(pos_neg_pickle_path) 314 | 315 | # TODO: uncomment when we use this 316 | unlabeled_specs = load_unlabeled_specs() 317 | unlabeled_data = pairs_to_vec(list(unlabeled_specs.values()), ("left", "right")) 318 | unlabeled_data.to_pickle(unlabeled_pickle_path) 319 | -------------------------------------------------------------------------------- /malfoy/generate/model.py: -------------------------------------------------------------------------------- 1 | import random 2 | import inspect 3 | from copy import deepcopy 4 | from typing import Any, Dict, List, Tuple 5 | 6 | import numpy as np 7 | from sortedcontainers import SortedDict 8 | 9 | from draco.generation.spec import Spec 10 | from draco.generation.prop_objects import PropObjects 11 | from draco.spec import Field 12 | 13 | 14 | class Model: 15 | """ 16 | A model handles the generation and improvement 17 | of random specs. 18 | """ 19 | 20 | # enums that require non-primitive values 21 | SPECIAL_ENUMS = {"bin": PropObjects.get_bin, "scale": PropObjects.get_scale} 22 | 23 | # only 1 of these can appear in all encodings 24 | UNIQUE_ENCODING_PROPS = set(["stack"]) 25 | 26 | def __init__( 27 | self, 28 | fields: List[Field], 29 | distributions: Dict, 30 | type_distribution: Dict, 31 | top_level_props: List[str], 32 | encoding_props: List[str], 33 | ) -> None: 34 | """ 35 | distributions -- see distributions.json 36 | top_level_props -- a list of top level properties 37 | encoding_props -- a list of encoding level properties 38 | """ 39 | self.fields = fields 40 | self.distributions = distributions 41 | self.type_distribution = type_distribution 42 | self.top_level_props = set(top_level_props) 43 | self.encoding_props = set(encoding_props) 44 | 45 | self.enums: Dict[str, List[str]] = {} 46 | self.probs: Dict = {} 47 | self.enum_probs: Dict = {} 48 | 49 | for spec in distributions: 50 | self.enums[spec] = [x["name"] for x in distributions[spec]["values"]] 51 | self.probs[spec] = [x["probability"] for x in distributions[spec]["values"]] 52 | 53 | for prop in self.enums: 54 | self.enum_probs[prop] = {} 55 | enums = self.enums[prop] 56 | probs = self.probs[prop] 57 | 58 | for i in range(len(enums)): 59 | self.enum_probs[prop][enums[i]] = probs[i] 60 | 61 | return 62 | 63 | def generate_spec(self, n_dimensions: int): 64 | """ 65 | Returns a spec, randomizing props. 66 | 67 | n_dimensions -- the number of encodings to generate 68 | """ 69 | self.__ready() 70 | spec = Spec() 71 | spec["encoding"] = SortedDict() 72 | 73 | for prop in self.top_level_props: 74 | if self.__include(prop): 75 | spec[prop] = self.__sample_prop(prop) 76 | 77 | for _ in range(n_dimensions): 78 | enc = self.__generate_enc() 79 | 80 | channel = self.__sample_prop("channel") 81 | spec["encoding"][channel] = enc 82 | 83 | return spec 84 | 85 | def mutate_prop(self, spec: Spec, prop: str, enum: str): 86 | """ 87 | Mutates the prop in the given spec to the given enum. 88 | """ 89 | if not ( 90 | prop in self.top_level_props 91 | or prop == "channel" 92 | or prop in self.encoding_props 93 | ): 94 | raise ValueError("invalid prop {0}".format(prop)) 95 | 96 | if prop in self.top_level_props: 97 | spec[prop] = Model.build_value_from_enum(prop, enum) 98 | 99 | elif prop == "channel" and not enum in spec["encoding"]: 100 | used_channels = list(spec["encoding"].keys()) 101 | 102 | # the least likely channel has the highest prob of being replaced 103 | probs = [(1 - self.enum_probs["channel"][x]) for x in used_channels] 104 | to_replace, _ = Model.sample(used_channels, probs) 105 | 106 | enc = spec["encoding"][to_replace] 107 | del spec["encoding"][to_replace] 108 | spec["encoding"][enum] = enc 109 | 110 | elif prop in self.encoding_props: 111 | used_channels = list(spec["encoding"].keys()) 112 | 113 | # the most likely channel has the highest prob of being modified 114 | probs = [self.enum_probs["channel"][x] for x in used_channels] 115 | to_modify, _ = Model.sample(used_channels, probs) 116 | 117 | enc = spec["encoding"][to_modify] 118 | enc[prop] = Model.build_value_from_enum(prop, enum) 119 | 120 | return 121 | 122 | def pre_improve(self, spec: Spec, props: List[str]): 123 | """ 124 | Improves the given spec to fit a small set of hard constraints 125 | and improve comparisons 126 | This will run all methods found in class PreImprovements. 127 | """ 128 | self.__improve(spec, props, PreImprovements) 129 | return 130 | 131 | def post_improve(self, spec: Spec, props: List[str]): 132 | """ 133 | Improves the given spec to fit a small set of hard constraints 134 | and improve comparisons 135 | This will run all methods found in class PostImprovements. 136 | """ 137 | self.__improve(spec, props, PostImprovements) 138 | return 139 | 140 | def get_enums(self, prop: str) -> List[str]: 141 | """ 142 | Returns the enums for the given prop 143 | """ 144 | return self.enums[prop] 145 | 146 | def __improve(self, spec: Spec, props: List[str], improvement_class): 147 | """ 148 | Runs all improvements in the given improvement_class over the given spec. 149 | """ 150 | attr_names = [attr for attr in dir(improvement_class)] 151 | improvements = [] 152 | for name in attr_names: 153 | attr = getattr(improvement_class, name) 154 | if inspect.isfunction(attr): 155 | improvements.append(attr) 156 | 157 | for imp in improvements: 158 | imp(spec, props) 159 | 160 | def __ready(self): 161 | """ 162 | Prepares this to generate a spec 163 | """ 164 | self.curr_enums = deepcopy(self.enums) 165 | self.curr_probs = deepcopy(self.probs) 166 | self.used_enc_props = set() 167 | self.available_fields = deepcopy(self.fields) 168 | 169 | def __generate_enc(self): 170 | """ 171 | Returns an encoding, randomizing props. 172 | """ 173 | enc = SortedDict() 174 | 175 | # set the field / type 176 | field_name, vl_type = self.__sample_field() 177 | 178 | # special case for count 179 | if field_name == "count": 180 | enc["aggregate"] = "count" 181 | enc["type"] = "quantitative" 182 | return enc 183 | 184 | enc["field"] = field_name 185 | enc["type"] = vl_type 186 | 187 | # everything else 188 | for prop in self.encoding_props: 189 | if self.__include(prop): 190 | self.used_enc_props.add(prop) 191 | enc[prop] = self.__sample_prop(prop) 192 | 193 | return enc 194 | 195 | def __sample_field(self) -> Tuple[str, str]: 196 | field_index = random.randrange(len(self.available_fields)) 197 | field = self.available_fields.pop(field_index) 198 | 199 | vl_type = None 200 | if field.ty == "string" or field.ty == "boolean": 201 | # strings and booleans are always nominal 202 | vl_type = "nominal" 203 | elif field.ty == "datetime": 204 | vl_type = "temporal" 205 | elif field.ty == "number": 206 | # we need to decide between nominal, ordinal, quantitative, 207 | # based off the cardinality 208 | vl_type = Model.sample_vl_type(field.cardinality) 209 | else: 210 | raise Exception("No type for %s", field.ty) 211 | 212 | return field.name, vl_type 213 | 214 | def __include(self, prop: str) -> bool: 215 | """ 216 | Decides randomly from `self.distributions` whether or not 217 | the given spec should be included 218 | """ 219 | prob = self.distributions[prop]["probability"] 220 | picked = random.random() < prob 221 | 222 | allowed = ( 223 | prop not in Model.UNIQUE_ENCODING_PROPS or prop not in self.used_enc_props 224 | ) 225 | 226 | return picked and allowed 227 | 228 | def __sample_prop(self, prop: str) -> Any: 229 | """ 230 | Returns a random value (enum or object) for the given prop. 231 | """ 232 | enum = self.__sample_enum_value(prop) 233 | if prop in Model.SPECIAL_ENUMS: 234 | return Model.SPECIAL_ENUMS[prop](enum) 235 | 236 | return enum 237 | 238 | def __sample_enum_value(self, prop: str) -> str: 239 | """ 240 | Returns a random enum for the given prop. 241 | 242 | Params: 243 | distributions -- {object} see `distributions.json` 244 | spec -- {string} e.g. `mark`, `channel`, etc. 245 | """ 246 | enums = self.curr_enums[prop] 247 | probs = self.curr_probs[prop] 248 | 249 | try: 250 | result, index = Model.sample(enums, probs) 251 | if prop == "channel": 252 | enums.pop(index) 253 | probs.pop(index) 254 | 255 | return result 256 | except ValueError: 257 | raise ValueError("{0} empty".format(prop)) 258 | 259 | @staticmethod 260 | def sample(enums: List[str], probs: List[float]) -> Tuple[str, int]: 261 | """ 262 | Returns a probabilistic choice and index from the given list 263 | of enums, where probs[i] = probability for enums[i]. Expects sum(probs) = 1 264 | """ 265 | if not probs: 266 | raise ValueError() 267 | 268 | cumulative = np.cumsum(probs) 269 | 270 | choice = random.uniform(0, cumulative[-1]) 271 | index = np.searchsorted(cumulative, choice) 272 | if index == len(cumulative): 273 | # in case choice rests exactly on the upper bound 274 | index -= 1 275 | 276 | result = enums[index] 277 | 278 | return result, index 279 | 280 | @staticmethod 281 | def sample_vl_type(cardinality: int) -> str: 282 | """ 283 | Samples a vega-lite type for a numerical field based 284 | off its cardinality. 285 | """ 286 | # tanh with coef of 0.12, which gives us prob of picking 287 | # quantitative at 98.5% when cardinality is 20. 288 | coef = 0.12 289 | 290 | q_prob = np.tanh(coef * cardinality) 291 | 292 | if random.random() < q_prob: 293 | return "quantitative" 294 | else: 295 | return random.choice(["nominal", "quantitative"]) 296 | 297 | @staticmethod 298 | def build_value_from_enum(prop: str, enum: str) -> Any: 299 | """ 300 | Builds a value for the given prop using given enum 301 | value. For example scale requires an object as its value, 302 | even as the enums for scale are strings. 303 | """ 304 | if prop in Model.SPECIAL_ENUMS: 305 | return Model.SPECIAL_ENUMS[prop](enum) 306 | else: 307 | return enum 308 | 309 | 310 | class PreImprovements: 311 | """ 312 | Optimizations to base specs to avoid failing hard constraints 313 | and increase quality of comparison 314 | """ 315 | 316 | @staticmethod 317 | def improve_stack(spec, props): 318 | """ 319 | If we are trying to inspect 'stack' in an interaction, 320 | we force bar or area marks. In any case, 321 | We also only want to stack on x and y, and stack should 322 | be accompanied by aggregate. 323 | """ 324 | if "stack" in props: 325 | mark = "bar" if random.random() < 0.7 else "area" 326 | spec["mark"] = mark 327 | 328 | # TODO: add a 'memory' to the spec for the aggregate type 329 | # such that mutations of the same base spec can be improved 330 | # in the same manner in post. 331 | 332 | 333 | class PostImprovements: 334 | """ 335 | Optimizations to completed specs to avoid failing hard constraints and 336 | to increase quality of comparison. 337 | """ 338 | 339 | @staticmethod 340 | def improve_aggregate(spec: Spec, props: List[str]): 341 | """ 342 | Give an aggregate to bar, line, area 343 | plots that are not qxq unless we are inspecting 344 | aggregate. 345 | """ 346 | if spec["mark"] in ["bar", "line", "area"]: 347 | if "aggregate" not in props: 348 | x_enc = spec.get_enc_by_channel("x") 349 | y_enc = spec.get_enc_by_channel("y") 350 | 351 | if x_enc is None or y_enc is None or len(spec["encoding"]) > 2: 352 | return 353 | if (x_enc["type"] != "quantitative") != ( 354 | y_enc["type"] != "quantitative" 355 | ): 356 | q_enc = x_enc if x_enc["type"] == "quantitative" else y_enc 357 | q_enc["aggregate"] = "mean" 358 | 359 | return 360 | 361 | @staticmethod 362 | def improve_bar(spec: Spec, props: List[str]): 363 | """ 364 | Adds `scale: { 'zero': True }` to the given spec 365 | if the mark is a bar. 366 | """ 367 | if spec["mark"] == "bar": 368 | x_enc = spec.get_enc_by_channel("x") 369 | y_enc = spec.get_enc_by_channel("x") 370 | 371 | if x_enc is None or y_enc is None: 372 | return 373 | 374 | zero_enc = x_enc if x_enc["type"] == "quantitative" else y_enc 375 | zero_enc["scale"] = {"zero": True} 376 | 377 | return 378 | 379 | @staticmethod 380 | def improve_stack(spec: Spec, props: List[str]): 381 | """ 382 | Ensures stack encodings are aggregated as well. 383 | """ 384 | for channel in spec["encoding"]: 385 | enc = spec["encoding"][channel] 386 | if "stack" in enc: 387 | if not (channel == "x" or channel == "y"): 388 | del enc["stack"] 389 | elif "aggregate" not in enc: 390 | aggregate = "sum" 391 | enc["aggregate"] = aggregate 392 | -------------------------------------------------------------------------------- /data/training/saket2018.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "./schema.json", 3 | "source": "saket2018", 4 | "data": [ 5 | { 6 | "num_rows": 407, 7 | "significant": "completion", 8 | "fields": [ 9 | { 10 | "name": "n", 11 | "type": "string", 12 | "cardinality": 9 13 | }, 14 | { 15 | "name": "q1", 16 | "type": "number", 17 | "cardinality": 25 18 | }, 19 | { 20 | "name": "q2", 21 | "type": "number", 22 | "cardinality": 407 23 | } 24 | ], 25 | "task": "summary", 26 | "negative": { 27 | "mark": "point", 28 | "encoding": { 29 | "x": { 30 | "field": "n", 31 | "type": "nominal" 32 | }, 33 | "y": { 34 | "field": "q2", 35 | "type": "quantitative", 36 | "aggregate": "mean", 37 | "scale": { 38 | "zero": true 39 | } 40 | } 41 | } 42 | }, 43 | "positive": { 44 | "mark": "line", 45 | "encoding": { 46 | "x": { 47 | "field": "n", 48 | "type": "nominal" 49 | }, 50 | "y": { 51 | "field": "q2", 52 | "type": "quantitative", 53 | "aggregate": "mean", 54 | "scale": { 55 | "zero": true 56 | } 57 | } 58 | } 59 | }, 60 | "pvalue": 0.00693 61 | }, 62 | { 63 | "num_rows": 335, 64 | "significant": "accuracy", 65 | "fields": [ 66 | { 67 | "name": "n", 68 | "type": "string", 69 | "cardinality": 9 70 | }, 71 | { 72 | "name": "q1", 73 | "type": "number", 74 | "cardinality": 25 75 | }, 76 | { 77 | "name": "q2", 78 | "type": "number", 79 | "cardinality": 335 80 | } 81 | ], 82 | "task": "summary", 83 | "negative": { 84 | "mark": "line", 85 | "encoding": { 86 | "x": { 87 | "field": "q1", 88 | "type": "quantitative" 89 | }, 90 | "y": { 91 | "field": "q2", 92 | "type": "quantitative", 93 | "aggregate": "mean", 94 | "scale": { 95 | "zero": true 96 | } 97 | } 98 | } 99 | }, 100 | "positive": { 101 | "mark": "bar", 102 | "encoding": { 103 | "x": { 104 | "field": "q1", 105 | "type": "ordinal" 106 | }, 107 | "y": { 108 | "field": "q2", 109 | "type": "quantitative", 110 | "aggregate": "mean", 111 | "scale": { 112 | "zero": true 113 | } 114 | } 115 | } 116 | }, 117 | "pvalue": 0.002989 118 | }, 119 | { 120 | "num_rows": 407, 121 | "significant": "accuracy", 122 | "fields": [ 123 | { 124 | "name": "n", 125 | "type": "string", 126 | "cardinality": 9 127 | }, 128 | { 129 | "name": "q1", 130 | "type": "number", 131 | "cardinality": 25 132 | }, 133 | { 134 | "name": "q2", 135 | "type": "number", 136 | "cardinality": 407 137 | } 138 | ], 139 | "task": "summary", 140 | "negative": { 141 | "mark": "line", 142 | "encoding": { 143 | "x": { 144 | "field": "q1", 145 | "type": "quantitative" 146 | }, 147 | "y": { 148 | "field": "q2", 149 | "type": "quantitative", 150 | "aggregate": "mean", 151 | "scale": { 152 | "zero": true 153 | } 154 | } 155 | } 156 | }, 157 | "positive": { 158 | "mark": "bar", 159 | "encoding": { 160 | "x": { 161 | "field": "q1", 162 | "type": "ordinal" 163 | }, 164 | "y": { 165 | "field": "q2", 166 | "type": "quantitative", 167 | "aggregate": "mean", 168 | "scale": { 169 | "zero": true 170 | } 171 | } 172 | } 173 | }, 174 | "pvalue": 1e-06 175 | }, 176 | { 177 | "num_rows": 407, 178 | "significant": "accuracy", 179 | "fields": [ 180 | { 181 | "name": "n", 182 | "type": "string", 183 | "cardinality": 9 184 | }, 185 | { 186 | "name": "q1", 187 | "type": "number", 188 | "cardinality": 25 189 | }, 190 | { 191 | "name": "q2", 192 | "type": "number", 193 | "cardinality": 407 194 | } 195 | ], 196 | "task": "summary", 197 | "negative": { 198 | "mark": "line", 199 | "encoding": { 200 | "x": { 201 | "field": "q1", 202 | "type": "quantitative" 203 | }, 204 | "y": { 205 | "field": "q2", 206 | "type": "quantitative", 207 | "aggregate": "mean", 208 | "scale": { 209 | "zero": true 210 | } 211 | } 212 | } 213 | }, 214 | "positive": { 215 | "mark": "point", 216 | "encoding": { 217 | "x": { 218 | "field": "q1", 219 | "type": "quantitative" 220 | }, 221 | "y": { 222 | "field": "q2", 223 | "type": "quantitative", 224 | "aggregate": "mean", 225 | "scale": { 226 | "zero": true 227 | } 228 | } 229 | } 230 | }, 231 | "pvalue": 0.002024 232 | }, 233 | { 234 | "num_rows": 335, 235 | "significant": "accuracy", 236 | "fields": [ 237 | { 238 | "name": "n", 239 | "type": "string", 240 | "cardinality": 9 241 | }, 242 | { 243 | "name": "q1", 244 | "type": "number", 245 | "cardinality": 25 246 | }, 247 | { 248 | "name": "q2", 249 | "type": "number", 250 | "cardinality": 335 251 | } 252 | ], 253 | "task": "summary", 254 | "negative": { 255 | "mark": "line", 256 | "encoding": { 257 | "x": { 258 | "field": "n", 259 | "type": "ordinal" 260 | }, 261 | "y": { 262 | "field": "q2", 263 | "type": "quantitative", 264 | "aggregate": "mean", 265 | "scale": { 266 | "zero": true 267 | } 268 | } 269 | } 270 | }, 271 | "positive": { 272 | "mark": "bar", 273 | "encoding": { 274 | "x": { 275 | "field": "n", 276 | "type": "ordinal" 277 | }, 278 | "y": { 279 | "field": "q2", 280 | "type": "quantitative", 281 | "aggregate": "mean", 282 | "scale": { 283 | "zero": true 284 | } 285 | } 286 | } 287 | }, 288 | "pvalue": 0.001467 289 | }, 290 | { 291 | "num_rows": 335, 292 | "significant": "completion", 293 | "fields": [ 294 | { 295 | "name": "n", 296 | "type": "string", 297 | "cardinality": 9 298 | }, 299 | { 300 | "name": "q1", 301 | "type": "number", 302 | "cardinality": 25 303 | }, 304 | { 305 | "name": "q2", 306 | "type": "number", 307 | "cardinality": 335 308 | } 309 | ], 310 | "task": "value", 311 | "negative": { 312 | "mark": "bar", 313 | "encoding": { 314 | "x": { 315 | "field": "n", 316 | "type": "nominal" 317 | }, 318 | "y": { 319 | "field": "q2", 320 | "type": "quantitative", 321 | "aggregate": "mean", 322 | "scale": { 323 | "zero": true 324 | } 325 | } 326 | } 327 | }, 328 | "positive": { 329 | "mark": "line", 330 | "encoding": { 331 | "x": { 332 | "field": "n", 333 | "type": "nominal" 334 | }, 335 | "y": { 336 | "field": "q2", 337 | "type": "quantitative", 338 | "aggregate": "mean", 339 | "scale": { 340 | "zero": true 341 | } 342 | } 343 | } 344 | }, 345 | "pvalue": 0.006602 346 | }, 347 | { 348 | "num_rows": 335, 349 | "significant": "accuracy", 350 | "fields": [ 351 | { 352 | "name": "n", 353 | "type": "string", 354 | "cardinality": 9 355 | }, 356 | { 357 | "name": "q1", 358 | "type": "number", 359 | "cardinality": 25 360 | }, 361 | { 362 | "name": "q2", 363 | "type": "number", 364 | "cardinality": 335 365 | } 366 | ], 367 | "task": "value", 368 | "negative": { 369 | "mark": "line", 370 | "encoding": { 371 | "x": { 372 | "field": "q1", 373 | "type": "quantitative" 374 | }, 375 | "y": { 376 | "field": "q2", 377 | "type": "quantitative", 378 | "aggregate": "mean", 379 | "scale": { 380 | "zero": true 381 | } 382 | } 383 | } 384 | }, 385 | "positive": { 386 | "mark": "bar", 387 | "encoding": { 388 | "x": { 389 | "field": "q1", 390 | "type": "ordinal" 391 | }, 392 | "y": { 393 | "field": "q2", 394 | "type": "quantitative", 395 | "aggregate": "mean", 396 | "scale": { 397 | "zero": true 398 | } 399 | } 400 | } 401 | }, 402 | "pvalue": 0.000645 403 | }, 404 | { 405 | "num_rows": 335, 406 | "significant": "accuracy", 407 | "fields": [ 408 | { 409 | "name": "n", 410 | "type": "string", 411 | "cardinality": 9 412 | }, 413 | { 414 | "name": "q1", 415 | "type": "number", 416 | "cardinality": 25 417 | }, 418 | { 419 | "name": "q2", 420 | "type": "number", 421 | "cardinality": 335 422 | } 423 | ], 424 | "task": "value", 425 | "negative": { 426 | "mark": "point", 427 | "encoding": { 428 | "x": { 429 | "field": "q1", 430 | "type": "quantitative" 431 | }, 432 | "y": { 433 | "field": "q2", 434 | "type": "quantitative", 435 | "aggregate": "mean", 436 | "scale": { 437 | "zero": true 438 | } 439 | } 440 | } 441 | }, 442 | "positive": { 443 | "mark": "bar", 444 | "encoding": { 445 | "x": { 446 | "field": "q1", 447 | "type": "ordinal" 448 | }, 449 | "y": { 450 | "field": "q2", 451 | "type": "quantitative", 452 | "aggregate": "mean", 453 | "scale": { 454 | "zero": true 455 | } 456 | } 457 | } 458 | }, 459 | "pvalue": 0.00504 460 | }, 461 | { 462 | "num_rows": 407, 463 | "significant": "accuracy", 464 | "fields": [ 465 | { 466 | "name": "n", 467 | "type": "string", 468 | "cardinality": 9 469 | }, 470 | { 471 | "name": "q1", 472 | "type": "number", 473 | "cardinality": 25 474 | }, 475 | { 476 | "name": "q2", 477 | "type": "number", 478 | "cardinality": 407 479 | } 480 | ], 481 | "task": "value", 482 | "negative": { 483 | "mark": "line", 484 | "encoding": { 485 | "x": { 486 | "field": "q1", 487 | "type": "quantitative" 488 | }, 489 | "y": { 490 | "field": "q2", 491 | "type": "quantitative", 492 | "aggregate": "mean", 493 | "scale": { 494 | "zero": true 495 | } 496 | } 497 | } 498 | }, 499 | "positive": { 500 | "mark": "bar", 501 | "encoding": { 502 | "x": { 503 | "field": "q1", 504 | "type": "ordinal" 505 | }, 506 | "y": { 507 | "field": "q2", 508 | "type": "quantitative", 509 | "aggregate": "mean", 510 | "scale": { 511 | "zero": true 512 | } 513 | } 514 | } 515 | }, 516 | "pvalue": 0.001245 517 | }, 518 | { 519 | "num_rows": 335, 520 | "significant": "completion", 521 | "fields": [ 522 | { 523 | "name": "n", 524 | "type": "string", 525 | "cardinality": 9 526 | }, 527 | { 528 | "name": "q1", 529 | "type": "number", 530 | "cardinality": 25 531 | }, 532 | { 533 | "name": "q2", 534 | "type": "number", 535 | "cardinality": 335 536 | } 537 | ], 538 | "task": "value", 539 | "negative": { 540 | "mark": "bar", 541 | "encoding": { 542 | "x": { 543 | "field": "n", 544 | "type": "ordinal" 545 | }, 546 | "y": { 547 | "field": "q2", 548 | "type": "quantitative", 549 | "aggregate": "mean", 550 | "scale": { 551 | "zero": true 552 | } 553 | } 554 | } 555 | }, 556 | "positive": { 557 | "mark": "line", 558 | "encoding": { 559 | "x": { 560 | "field": "n", 561 | "type": "ordinal" 562 | }, 563 | "y": { 564 | "field": "q2", 565 | "type": "quantitative", 566 | "aggregate": "mean", 567 | "scale": { 568 | "zero": true 569 | } 570 | } 571 | } 572 | }, 573 | "pvalue": 0.002807 574 | } 575 | ] 576 | } 577 | -------------------------------------------------------------------------------- /data/spec_pairs/draco_cql_default_weights.json: -------------------------------------------------------------------------------- 1 | {"headers": {"first": {"title": "Draco", "subtitle": "Draco Prediction"}, "second": {"title": "CQL", "subtitle": "Compassql Prediction"}}, "specs": [{"first": {"mark": "bar", "encoding": {"x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative", "aggregate": "mean"}, "y": {"field": "Cylinders", "type": "ordinal"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "bar", "encoding": {"x": {"aggregate": "mean", "field": "Horsepower", "type": "quantitative"}, "y": {"field": "Cylinders", "type": "nominal"}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "?", "aggregate": "mean", "field": "Horsepower"}, {"channel": "?", "field": "Cylinders"}]}}}, {"first": {"mark": "tick", "encoding": {"x": {"scale": {"zero": true}, "field": "Miles_per_Gallon", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "tick", "encoding": {"y": {"field": "Miles_per_Gallon", "type": "quantitative", "scale": {}}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "?", "bin": "?", "aggregate": "?", "field": "Miles_per_Gallon", "type": "quantitative"}]}}}, {"first": {"mark": "tick", "encoding": {"x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "tick", "encoding": {"x": {"field": "Horsepower", "type": "quantitative", "scale": {}}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "x", "field": "Horsepower", "type": "quantitative"}]}}}, {"first": {"mark": "tick", "encoding": {"y": {"field": "Cylinders", "type": "ordinal"}, "x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "tick", "encoding": {"y": {"field": "Cylinders", "type": "ordinal"}, "x": {"field": "Horsepower", "type": "quantitative", "scale": {}}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "?", "field": "Cylinders", "type": "ordinal"}, {"channel": "?", "bin": "?", "aggregate": "?", "field": "Horsepower", "type": "quantitative"}]}}}, {"first": {"mark": "tick", "encoding": {"x": {"scale": {"zero": true}, "field": "Miles_per_Gallon", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "tick", "encoding": {"x": {"field": "Miles_per_Gallon", "type": "quantitative", "scale": {}}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "tick", "encodings": [{"channel": "x", "scale": {"type": "?"}, "field": "Miles_per_Gallon", "type": "quantitative"}]}}}, {"first": {"mark": "point", "encoding": {"x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative"}, "y": {"scale": {"zero": true}, "field": "Acceleration", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "point", "encoding": {"x": {"field": "Horsepower", "type": "quantitative", "scale": {}}, "y": {"field": "Acceleration", "type": "quantitative", "scale": {}}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "?", "field": "Horsepower"}, {"channel": "?", "field": "Acceleration"}]}}}, {"first": {"mark": "point", "encoding": {"x": {"scale": {"zero": true}, "field": "Acceleration", "type": "quantitative"}, "y": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "point", "encoding": {"x": {"field": "Acceleration", "type": "quantitative", "scale": {}}, "y": {"field": "Horsepower", "type": "quantitative", "scale": {}}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"field": "Acceleration", "channel": "?", "type": "?"}, {"field": "Horsepower", "channel": "?", "type": "?"}]}}}, {"first": {"mark": "bar", "encoding": {"y": {"field": "Origin", "type": "nominal"}, "x": {"scale": {"zero": true}, "type": "quantitative", "aggregate": "count"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "bar", "encoding": {"y": {"field": "Origin", "type": "nominal"}, "x": {"aggregate": "count", "field": "*", "type": "quantitative"}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "?", "field": "Origin", "type": "nominal"}]}}}, {"first": {"mark": "bar", "encoding": {"x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative", "aggregate": "mean"}, "y": {"field": "Cylinders", "type": "ordinal"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "bar", "encoding": {"x": {"aggregate": "mean", "field": "Horsepower", "type": "quantitative"}, "y": {"field": "Cylinders", "type": "ordinal"}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "x", "aggregate": "mean", "field": "Horsepower", "type": "quantitative"}, {"channel": "y", "field": "Cylinders", "type": "ordinal"}]}}}, {"first": {"mark": "point", "encoding": {"y": {"scale": {"zero": true}, "field": "Miles_per_Gallon", "type": "quantitative"}, "x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "point", "encoding": {"x": {"field": "Miles_per_Gallon", "type": "quantitative"}, "y": {"field": "Horsepower", "type": "quantitative"}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "?", "bin": "?", "aggregate": "?", "field": "Miles_per_Gallon", "type": "quantitative"}, {"channel": "?", "bin": "?", "aggregate": "?", "field": "Horsepower", "type": "quantitative"}]}}}, {"first": {"mark": "bar", "encoding": {"x": {"field": "Origin", "type": "nominal"}, "y": {"scale": {"zero": true}, "type": "quantitative", "aggregate": "count"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "bar", "encoding": {"x": {"field": "Origin", "type": "nominal"}, "y": {"aggregate": "count", "field": "*", "type": "quantitative"}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "x", "field": "Origin", "type": "nominal"}]}}}, {"first": {"mark": "tick", "encoding": {"x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "tick", "encoding": {"x": {"field": "Horsepower", "type": "quantitative", "scale": {}}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "?", "field": "Horsepower"}]}}}, {"first": {"mark": "bar", "encoding": {"y": {"field": "Cylinders", "type": "ordinal"}, "x": {"scale": {"zero": true}, "type": "quantitative", "aggregate": "count"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "bar", "encoding": {"y": {"field": "Cylinders", "type": "ordinal"}, "x": {"aggregate": "count", "field": "*", "type": "quantitative"}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "?", "field": "Cylinders", "type": "ordinal"}]}}}, {"first": {"mark": "bar", "encoding": {"x": {"scale": {"zero": true}, "type": "quantitative", "aggregate": "count"}, "y": {"field": "Displacement", "type": "quantitative", "bin": {"maxbins": 10}}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "bar", "encoding": {"y": {"field": "Origin", "type": "nominal"}, "x": {"aggregate": "count", "field": "*", "type": "quantitative"}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "?", "bin": "?", "timeUnit": "?", "field": "?", "type": "?"}, {"channel": "?", "field": "*", "aggregate": "count", "type": "quantitative"}]}}}, {"first": {"mark": "point", "encoding": {"y": {"field": "Origin", "type": "nominal"}, "x": {"field": "Horsepower", "type": "quantitative", "bin": {"maxbins": 10}}, "size": {"scale": {"zero": true}, "field": "Acceleration", "type": "quantitative", "aggregate": "mean"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "point", "encoding": {"y": {"field": "Origin", "type": "nominal"}, "size": {"aggregate": "mean", "field": "Horsepower", "type": "quantitative"}, "x": {"bin": {}, "field": "Acceleration", "type": "quantitative"}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "?", "field": "Origin", "type": "nominal"}, {"channel": "?", "bin": "?", "aggregate": "?", "field": "Horsepower", "type": "quantitative"}, {"channel": "?", "bin": "?", "aggregate": "?", "field": "Acceleration", "type": "quantitative"}]}}}, {"first": {"mark": "point", "encoding": {"x": {"field": "Cylinders", "type": "ordinal"}, "size": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative", "aggregate": "mean"}, "y": {"field": "Acceleration", "type": "quantitative", "bin": {"maxbins": 10}}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "point", "encoding": {"y": {"field": "Cylinders", "type": "ordinal"}, "size": {"aggregate": "mean", "field": "Horsepower", "type": "quantitative"}, "x": {"bin": {}, "field": "Acceleration", "type": "quantitative"}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "?", "field": "Cylinders", "type": "ordinal"}, {"channel": "?", "bin": "?", "aggregate": "?", "field": "Horsepower", "type": "quantitative"}, {"channel": "?", "bin": "?", "aggregate": "?", "field": "Acceleration", "type": "quantitative"}]}}}, {"first": {"mark": "bar", "encoding": {"x": {"field": "Miles_per_Gallon", "type": "quantitative", "bin": {"maxbins": 10}}, "y": {"scale": {"zero": true}, "type": "quantitative", "aggregate": "count"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "bar", "encoding": {"x": {"bin": {"maxbins": 5}, "field": "Miles_per_Gallon", "type": "quantitative"}, "y": {"aggregate": "count", "field": "*", "type": "quantitative"}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "bar", "encodings": [{"channel": "x", "bin": {"maxbins": "?"}, "field": "Miles_per_Gallon", "type": "quantitative"}]}}}, {"first": {"mark": "tick", "encoding": {"y": {"field": "Origin", "type": "nominal"}, "x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "tick", "encoding": {"y": {"field": "Origin", "type": "nominal"}, "x": {"field": "Horsepower", "type": "quantitative", "scale": {}}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "?", "field": "Origin", "type": "nominal"}, {"channel": "?", "bin": "?", "aggregate": "?", "field": "Horsepower", "type": "quantitative"}]}}}, {"first": {"mark": "tick", "encoding": {"y": {"field": "Release_Date", "type": "temporal"}}, "data": {"url": "data/movies.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/movies.json"}, "mark": "point", "encoding": {"x": {"field": "Release_Date", "type": "temporal", "scale": {}}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/movies.json"}, "mark": "?", "encodings": [{"channel": "?", "timeUnit": "?", "field": "Release_Date", "type": "temporal"}]}}}, {"first": {"mark": "bar", "encoding": {"x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative", "aggregate": "mean"}, "y": {"field": "Cylinders", "type": "ordinal"}, "row": {"field": "Origin", "type": "nominal"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "bar", "encoding": {"x": {"aggregate": "mean", "field": "Horsepower", "type": "quantitative"}, "y": {"field": "Cylinders", "type": "ordinal", "scale": {"rangeStep": 12}}, "row": {"field": "Origin", "type": "nominal"}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "x", "aggregate": "mean", "field": "Horsepower", "type": "quantitative"}, {"channel": "y", "field": "Cylinders", "type": "ordinal"}, {"channel": "?", "field": "Origin", "type": "nominal"}]}}}, {"first": {"mark": "point", "encoding": {"x": {"scale": {"zero": true}, "field": "IMDB_Rating", "type": "quantitative"}, "y": {"field": "Release_Date", "type": "temporal"}}, "data": {"url": "data/movies.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/movies.json"}, "mark": "point", "encoding": {"y": {"field": "IMDB_Rating", "type": "quantitative", "scale": {}}, "x": {"field": "Release_Date", "type": "temporal", "scale": {}}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/movies.json"}, "mark": "?", "encodings": [{"channel": "?", "bin": "?", "aggregate": "?", "field": "IMDB_Rating", "type": "quantitative"}, {"channel": "?", "timeUnit": "?", "field": "Release_Date", "type": "temporal"}]}}}, {"first": {"mark": "bar", "encoding": {"x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative", "aggregate": "mean"}, "y": {"field": "Cylinders", "type": "ordinal"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "bar", "encoding": {"x": {"aggregate": "mean", "field": "Horsepower", "type": "quantitative"}, "y": {"field": "Cylinders", "type": "ordinal"}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "x", "aggregate": "mean", "field": "Horsepower", "type": "quantitative"}, {"channel": "y", "field": "Cylinders", "type": "ordinal"}]}}}, {"first": {"mark": "point", "encoding": {"x": {"field": "Cylinders", "type": "ordinal"}, "y": {"field": "Origin", "type": "nominal"}, "size": {"scale": {"zero": true}, "field": "Acceleration", "type": "quantitative", "aggregate": "mean"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "point", "encoding": {"x": {"field": "Cylinders", "type": "ordinal"}, "y": {"field": "Origin", "type": "nominal"}, "size": {"aggregate": "mean", "field": "Acceleration", "type": "quantitative"}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "?", "field": "Cylinders", "type": "ordinal"}, {"channel": "?", "field": "Origin", "type": "nominal"}, {"channel": "?", "bin": "?", "aggregate": "?", "field": "Acceleration", "type": "quantitative"}]}}}, {"first": {"mark": "point", "encoding": {"y": {"field": "Major_Genre", "type": "nominal"}, "x": {"field": "Creative_Type", "type": "nominal"}, "size": {"scale": {"zero": true}, "type": "quantitative", "aggregate": "count"}}, "data": {"url": "data/movies.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/movies.json"}, "mark": "point", "encoding": {"x": {"field": "Major_Genre", "type": "nominal", "scale": {"rangeStep": 12}}, "y": {"field": "Creative_Type", "type": "nominal"}, "size": {"aggregate": "count", "field": "*", "type": "quantitative"}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/movies.json"}, "mark": "?", "encodings": [{"channel": "?", "field": "Major_Genre", "type": "nominal"}, {"channel": "?", "field": "Creative_Type", "type": "nominal"}]}}}, {"first": {"mark": "tick", "encoding": {"y": {"field": "Cylinders", "type": "ordinal"}, "x": {"scale": {"zero": true}, "field": "Horsepower", "type": "quantitative"}}, "data": {"url": "data/cars.json"}, "$schema": "https://vega.github.io/schema/vega-lite/v2.json"}, "second": {"data": {"url": "data/cars.json"}, "mark": "tick", "encoding": {"y": {"field": "Cylinders", "type": "ordinal"}, "x": {"field": "Horsepower", "type": "quantitative", "scale": {}}}, "config": {"overlay": {"line": true}, "scale": {"useUnaggregatedDomain": true}}}, "properties": {"input": {"data": {"url": "data/cars.json"}, "mark": "?", "encodings": [{"channel": "?", "field": "Cylinders", "type": "ordinal"}, {"channel": "?", "bin": "?", "aggregate": "?", "field": "Horsepower", "type": "quantitative"}]}}}]} -------------------------------------------------------------------------------- /data/random_data.json: -------------------------------------------------------------------------------- 1 | [{"q1":3.02,"q2":1,"q3":-1,"q4":73,"n1":"blue","n2":"dog","n3":"Seattle","n4":"home","o1":6,"o2":"sad","o3":"XS","o4":"light","t1":"03/04/2018","t2":"10/22/2017","t3":"01/28/2018","t4":"01/02/2018"}, 2 | {"q1":0.98,"q2":3,"q3":-1,"q4":78,"n1":"red","n2":"fish","n3":"San Francisco","n4":"home","o1":8,"o2":"neutral","o3":"XL","o4":"light","t1":"03/05/2018","t2":"04/27/2017","t3":"01/21/2018","t4":"01/04/2018"}, 3 | {"q1":0.23,"q2":6,"q3":-1,"q4":8,"n1":"green","n2":"dog","n3":"San Francisco","n4":"away","o1":6,"o2":"neutral","o3":"S","o4":"medium","t1":"03/03/2018","t2":"12/18/2017","t3":"01/14/2018","t4":"01/02/2018"}, 4 | {"q1":-1.15,"q2":1,"q3":-1,"q4":29,"n1":"green","n2":"dog","n3":"San Francisco","n4":"away","o1":8,"o2":"sad","o3":"M","o4":"light","t1":"03/03/2018","t2":"01/21/2018","t3":"01/08/2018","t4":"01/04/2018"}, 5 | {"q1":1.35,"q2":1,"q3":-1,"q4":36,"n1":"red","n2":"hamster","n3":"Seattle","n4":"away","o1":4,"o2":"neutral","o3":"L","o4":"dark","t1":"03/06/2018","t2":"04/21/2017","t3":"01/16/2018","t4":"01/01/2018"}, 6 | {"q1":-0.17,"q2":2,"q3":-1,"q4":67,"n1":"blue","n2":"hamster","n3":"New York","n4":"home","o1":4,"o2":"happy","o3":"S","o4":"dark","t1":"03/06/2018","t2":"08/20/2017","t3":"01/12/2018","t4":"01/04/2018"}, 7 | {"q1":0.98,"q2":1,"q3":-1,"q4":8,"n1":"red","n2":"cat","n3":"Seattle","n4":"away","o1":4,"o2":"happy","o3":"M","o4":"medium","t1":"03/07/2018","t2":"07/30/2017","t3":"01/23/2018","t4":"12/31/2017"}, 8 | {"q1":-1.57,"q2":5,"q3":-1,"q4":84,"n1":"red","n2":"fish","n3":"New York","n4":"away","o1":4,"o2":"happy","o3":"M","o4":"light","t1":"03/03/2018","t2":"08/24/2017","t3":"01/27/2018","t4":"12/31/2017"}, 9 | {"q1":0.34,"q2":1,"q3":-1,"q4":78,"n1":"green","n2":"dog","n3":"Seattle","n4":"home","o1":4,"o2":"happy","o3":"S","o4":"dark","t1":"03/09/2018","t2":"02/26/2018","t3":"01/21/2018","t4":"01/05/2018"}, 10 | {"q1":1.78,"q2":1,"q3":-1,"q4":71,"n1":"red","n2":"cat","n3":"New York","n4":"away","o1":8,"o2":"neutral","o3":"L","o4":"dark","t1":"03/03/2018","t2":"10/16/2017","t3":"01/02/2018","t4":"01/04/2018"}, 11 | {"q1":-1.27,"q2":3,"q3":-1,"q4":99,"n1":"red","n2":"hamster","n3":"San Francisco","n4":"away","o1":2,"o2":"happy","o3":"L","o4":"medium","t1":"03/07/2018","t2":"04/07/2017","t3":"01/24/2018","t4":"01/05/2018"}, 12 | {"q1":-0.87,"q2":2,"q3":-1,"q4":63,"n1":"green","n2":"dog","n3":"New York","n4":"away","o1":2,"o2":"sad","o3":"XL","o4":"medium","t1":"03/07/2018","t2":"04/12/2017","t3":"01/26/2018","t4":"01/03/2018"}, 13 | {"q1":-0.64,"q2":2,"q3":-1,"q4":45,"n1":"blue","n2":"hamster","n3":"New York","n4":"away","o1":4,"o2":"sad","o3":"XS","o4":"medium","t1":"03/09/2018","t2":"11/23/2017","t3":"01/30/2018","t4":"01/02/2018"}, 14 | {"q1":0.79,"q2":4,"q3":-1,"q4":76,"n1":"green","n2":"hamster","n3":"Seattle","n4":"away","o1":2,"o2":"sad","o3":"M","o4":"light","t1":"03/07/2018","t2":"06/29/2017","t3":"01/14/2018","t4":"01/01/2018"}, 15 | {"q1":0.95,"q2":1,"q3":-1,"q4":75,"n1":"green","n2":"cat","n3":"New York","n4":"away","o1":2,"o2":"neutral","o3":"L","o4":"light","t1":"03/07/2018","t2":"12/05/2017","t3":"01/04/2018","t4":"01/02/2018"}, 16 | {"q1":-0.65,"q2":1,"q3":-1,"q4":87,"n1":"red","n2":"dog","n3":"Seattle","n4":"home","o1":2,"o2":"neutral","o3":"S","o4":"medium","t1":"03/04/2018","t2":"06/15/2017","t3":"01/18/2018","t4":"01/04/2018"}, 17 | {"q1":0.9,"q2":2,"q3":-1,"q4":19,"n1":"green","n2":"hamster","n3":"San Francisco","n4":"home","o1":8,"o2":"sad","o3":"XL","o4":"light","t1":"03/06/2018","t2":"02/22/2018","t3":"01/15/2018","t4":"01/01/2018"}, 18 | {"q1":-0.51,"q2":3,"q3":-1,"q4":96,"n1":"green","n2":"cat","n3":"San Francisco","n4":"away","o1":6,"o2":"happy","o3":"M","o4":"light","t1":"03/09/2018","t2":"05/12/2017","t3":"01/06/2018","t4":"01/04/2018"}, 19 | {"q1":0.52,"q2":2,"q3":-1,"q4":59,"n1":"blue","n2":"hamster","n3":"Seattle","n4":"away","o1":2,"o2":"happy","o3":"L","o4":"light","t1":"03/04/2018","t2":"03/28/2017","t3":"01/08/2018","t4":"01/04/2018"}, 20 | {"q1":0.51,"q2":1,"q3":-1,"q4":85,"n1":"blue","n2":"fish","n3":"New York","n4":"home","o1":2,"o2":"neutral","o3":"XS","o4":"light","t1":"03/05/2018","t2":"05/16/2017","t3":"01/11/2018","t4":"01/05/2018"}, 21 | {"q1":-0.2,"q2":1,"q3":-1,"q4":7,"n1":"blue","n2":"hamster","n3":"San Francisco","n4":"home","o1":4,"o2":"neutral","o3":"XL","o4":"light","t1":"03/09/2018","t2":"12/14/2017","t3":"01/25/2018","t4":"01/05/2018"}, 22 | {"q1":-1.4,"q2":1,"q3":-1,"q4":73,"n1":"green","n2":"fish","n3":"New York","n4":"home","o1":2,"o2":"sad","o3":"XS","o4":"light","t1":"03/06/2018","t2":"11/16/2017","t3":"01/08/2018","t4":"01/02/2018"}, 23 | {"q1":1.18,"q2":3,"q3":-1,"q4":98,"n1":"red","n2":"dog","n3":"New York","n4":"home","o1":6,"o2":"sad","o3":"M","o4":"light","t1":"03/06/2018","t2":"09/08/2017","t3":"01/17/2018","t4":"01/01/2018"}, 24 | {"q1":-0.64,"q2":1,"q3":-1,"q4":92,"n1":"green","n2":"cat","n3":"Seattle","n4":"home","o1":8,"o2":"sad","o3":"XS","o4":"medium","t1":"03/05/2018","t2":"10/26/2017","t3":"01/24/2018","t4":"01/05/2018"}, 25 | {"q1":-1.27,"q2":3,"q3":-1,"q4":3,"n1":"blue","n2":"fish","n3":"San Francisco","n4":"away","o1":4,"o2":"neutral","o3":"L","o4":"medium","t1":"03/06/2018","t2":"11/20/2017","t3":"01/28/2018","t4":"01/05/2018"}, 26 | {"q1":-0.49,"q2":2,"q3":-1,"q4":76,"n1":"green","n2":"dog","n3":"Seattle","n4":"home","o1":8,"o2":"happy","o3":"L","o4":"light","t1":"03/05/2018","t2":"09/30/2017","t3":"01/19/2018","t4":"01/03/2018"}, 27 | {"q1":-0.87,"q2":4,"q3":-1,"q4":29,"n1":"blue","n2":"hamster","n3":"New York","n4":"home","o1":8,"o2":"sad","o3":"XS","o4":"dark","t1":"03/07/2018","t2":"02/04/2018","t3":"01/09/2018","t4":"12/31/2017"}, 28 | {"q1":-1.28,"q2":1,"q3":-1,"q4":58,"n1":"red","n2":"fish","n3":"San Francisco","n4":"home","o1":2,"o2":"sad","o3":"L","o4":"light","t1":"03/03/2018","t2":"04/22/2017","t3":"01/12/2018","t4":"01/02/2018"}, 29 | {"q1":0.98,"q2":2,"q3":-1,"q4":66,"n1":"green","n2":"cat","n3":"San Francisco","n4":"home","o1":6,"o2":"sad","o3":"M","o4":"dark","t1":"03/04/2018","t2":"02/03/2018","t3":"01/07/2018","t4":"01/02/2018"}, 30 | {"q1":-0.96,"q2":1,"q3":-1,"q4":28,"n1":"green","n2":"dog","n3":"Seattle","n4":"home","o1":6,"o2":"happy","o3":"M","o4":"medium","t1":"03/06/2018","t2":"11/24/2017","t3":"01/22/2018","t4":"01/03/2018"}, 31 | {"q1":-0.5,"q2":1,"q3":-1,"q4":36,"n1":"green","n2":"dog","n3":"Seattle","n4":"home","o1":4,"o2":"neutral","o3":"XL","o4":"medium","t1":"03/07/2018","t2":"06/15/2017","t3":"01/09/2018","t4":"01/01/2018"}, 32 | {"q1":1.2,"q2":1,"q3":-1,"q4":58,"n1":"green","n2":"cat","n3":"San Francisco","n4":"home","o1":8,"o2":"neutral","o3":"S","o4":"dark","t1":"03/04/2018","t2":"03/29/2017","t3":"01/13/2018","t4":"01/02/2018"}, 33 | {"q1":0.2,"q2":2,"q3":-1,"q4":77,"n1":"red","n2":"dog","n3":"New York","n4":"away","o1":2,"o2":"sad","o3":"L","o4":"dark","t1":"03/05/2018","t2":"03/14/2017","t3":"01/18/2018","t4":"01/03/2018"}, 34 | {"q1":0.25,"q2":2,"q3":-1,"q4":11,"n1":"green","n2":"dog","n3":"San Francisco","n4":"home","o1":2,"o2":"sad","o3":"XL","o4":"medium","t1":"03/07/2018","t2":"01/20/2018","t3":"01/23/2018","t4":"01/02/2018"}, 35 | {"q1":1.09,"q2":1,"q3":-1,"q4":56,"n1":"blue","n2":"dog","n3":"San Francisco","n4":"home","o1":6,"o2":"neutral","o3":"S","o4":"dark","t1":"03/04/2018","t2":"12/30/2017","t3":"01/19/2018","t4":"01/02/2018"}, 36 | {"q1":0.44,"q2":2,"q3":-1,"q4":64,"n1":"green","n2":"dog","n3":"Seattle","n4":"away","o1":4,"o2":"neutral","o3":"M","o4":"dark","t1":"03/05/2018","t2":"12/10/2017","t3":"01/17/2018","t4":"01/01/2018"}, 37 | {"q1":0.02,"q2":3,"q3":-1,"q4":96,"n1":"red","n2":"fish","n3":"New York","n4":"away","o1":4,"o2":"sad","o3":"M","o4":"light","t1":"03/07/2018","t2":"03/07/2017","t3":"01/27/2018","t4":"01/05/2018"}, 38 | {"q1":-0.24,"q2":4,"q3":-1,"q4":75,"n1":"blue","n2":"dog","n3":"San Francisco","n4":"away","o1":2,"o2":"sad","o3":"M","o4":"dark","t1":"03/06/2018","t2":"02/24/2018","t3":"01/01/2018","t4":"01/04/2018"}, 39 | {"q1":0.4,"q2":6,"q3":-1,"q4":78,"n1":"red","n2":"fish","n3":"New York","n4":"home","o1":6,"o2":"sad","o3":"XS","o4":"medium","t1":"03/09/2018","t2":"09/27/2017","t3":"01/25/2018","t4":"01/03/2018"}, 40 | {"q1":-1.14,"q2":4,"q3":-1,"q4":75,"n1":"blue","n2":"dog","n3":"San Francisco","n4":"home","o1":6,"o2":"happy","o3":"M","o4":"medium","t1":"03/09/2018","t2":"06/18/2017","t3":"01/23/2018","t4":"01/02/2018"}, 41 | {"q1":0.07,"q2":1,"q3":-1,"q4":76,"n1":"red","n2":"hamster","n3":"New York","n4":"away","o1":2,"o2":"neutral","o3":"XS","o4":"light","t1":"03/08/2018","t2":"09/15/2017","t3":"01/12/2018","t4":"01/04/2018"}, 42 | {"q1":-1.06,"q2":3,"q3":-1,"q4":87,"n1":"red","n2":"hamster","n3":"New York","n4":"home","o1":2,"o2":"neutral","o3":"S","o4":"light","t1":"03/03/2018","t2":"07/04/2017","t3":"01/26/2018","t4":"01/01/2018"}, 43 | {"q1":-0.37,"q2":1,"q3":-1,"q4":99,"n1":"red","n2":"dog","n3":"Seattle","n4":"away","o1":6,"o2":"happy","o3":"XL","o4":"medium","t1":"03/05/2018","t2":"10/17/2017","t3":"01/30/2018","t4":"01/01/2018"}, 44 | {"q1":0.06,"q2":2,"q3":-1,"q4":99,"n1":"red","n2":"cat","n3":"San Francisco","n4":"home","o1":4,"o2":"happy","o3":"L","o4":"medium","t1":"03/07/2018","t2":"11/07/2017","t3":"01/16/2018","t4":"01/04/2018"}, 45 | {"q1":-0.59,"q2":1,"q3":-1,"q4":15,"n1":"green","n2":"dog","n3":"New York","n4":"home","o1":6,"o2":"happy","o3":"S","o4":"medium","t1":"03/04/2018","t2":"07/08/2017","t3":"01/15/2018","t4":"01/01/2018"}, 46 | {"q1":-0.77,"q2":1,"q3":-1,"q4":5,"n1":"blue","n2":"fish","n3":"Seattle","n4":"away","o1":8,"o2":"sad","o3":"S","o4":"medium","t1":"03/04/2018","t2":"05/28/2017","t3":"01/21/2018","t4":"01/03/2018"}, 47 | {"q1":-1.19,"q2":4,"q3":-1,"q4":56,"n1":"blue","n2":"fish","n3":"Seattle","n4":"home","o1":8,"o2":"sad","o3":"S","o4":"medium","t1":"03/04/2018","t2":"07/26/2017","t3":"01/05/2018","t4":"01/05/2018"}, 48 | {"q1":-0.38,"q2":1,"q3":-1,"q4":74,"n1":"green","n2":"dog","n3":"Seattle","n4":"away","o1":4,"o2":"neutral","o3":"M","o4":"dark","t1":"03/06/2018","t2":"08/07/2017","t3":"01/03/2018","t4":"01/03/2018"}, 49 | {"q1":1.33,"q2":1,"q3":-1,"q4":67,"n1":"green","n2":"dog","n3":"San Francisco","n4":"home","o1":6,"o2":"happy","o3":"S","o4":"light","t1":"03/07/2018","t2":"10/07/2017","t3":"01/12/2018","t4":"01/01/2018"}, 50 | {"q1":-0.2,"q2":2,"q3":-1,"q4":21,"n1":"green","n2":"cat","n3":"New York","n4":"home","o1":8,"o2":"sad","o3":"M","o4":"dark","t1":"03/03/2018","t2":"02/08/2018","t3":"01/24/2018","t4":"01/04/2018"}, 51 | {"q1":0.45,"q2":2,"q3":-1,"q4":60,"n1":"red","n2":"hamster","n3":"San Francisco","n4":"home","o1":4,"o2":"sad","o3":"M","o4":"medium","t1":"03/03/2018","t2":"08/11/2017","t3":"01/12/2018","t4":"12/31/2017"}, 52 | {"q1":1.93,"q2":6,"q3":-1,"q4":6,"n1":"blue","n2":"hamster","n3":"Seattle","n4":"away","o1":4,"o2":"happy","o3":"XS","o4":"dark","t1":"03/06/2018","t2":"05/30/2017","t3":"01/01/2018","t4":"01/03/2018"}, 53 | {"q1":0.64,"q2":1,"q3":-1,"q4":74,"n1":"green","n2":"dog","n3":"Seattle","n4":"home","o1":4,"o2":"sad","o3":"XL","o4":"dark","t1":"03/07/2018","t2":"02/08/2018","t3":"01/09/2018","t4":"01/05/2018"}, 54 | {"q1":-0.99,"q2":3,"q3":-1,"q4":8,"n1":"green","n2":"cat","n3":"New York","n4":"away","o1":8,"o2":"happy","o3":"M","o4":"medium","t1":"03/06/2018","t2":"04/21/2017","t3":"01/27/2018","t4":"01/01/2018"}, 55 | {"q1":1.76,"q2":2,"q3":-1,"q4":59,"n1":"green","n2":"fish","n3":"Seattle","n4":"away","o1":6,"o2":"neutral","o3":"XL","o4":"medium","t1":"03/04/2018","t2":"10/19/2017","t3":"01/07/2018","t4":"01/02/2018"}, 56 | {"q1":0.55,"q2":1,"q3":-1,"q4":21,"n1":"green","n2":"hamster","n3":"New York","n4":"home","o1":4,"o2":"neutral","o3":"S","o4":"light","t1":"03/03/2018","t2":"01/25/2018","t3":"01/18/2018","t4":"01/03/2018"}, 57 | {"q1":0.09,"q2":1,"q3":-1,"q4":95,"n1":"red","n2":"hamster","n3":"New York","n4":"home","o1":2,"o2":"sad","o3":"XL","o4":"light","t1":"03/04/2018","t2":"08/20/2017","t3":"01/10/2018","t4":"01/02/2018"}, 58 | {"q1":0.45,"q2":3,"q3":-1,"q4":75,"n1":"blue","n2":"cat","n3":"New York","n4":"away","o1":8,"o2":"neutral","o3":"XL","o4":"dark","t1":"03/04/2018","t2":"03/20/2017","t3":"01/30/2018","t4":"01/02/2018"}, 59 | {"q1":1.37,"q2":1,"q3":-1,"q4":63,"n1":"blue","n2":"dog","n3":"Seattle","n4":"home","o1":2,"o2":"happy","o3":"S","o4":"light","t1":"03/07/2018","t2":"05/22/2017","t3":"01/10/2018","t4":"01/02/2018"}, 60 | {"q1":0.07,"q2":2,"q3":-1,"q4":72,"n1":"blue","n2":"dog","n3":"Seattle","n4":"home","o1":4,"o2":"neutral","o3":"L","o4":"dark","t1":"03/07/2018","t2":"02/16/2018","t3":"01/08/2018","t4":"01/02/2018"}, 61 | {"q1":0.59,"q2":1,"q3":-1,"q4":25,"n1":"blue","n2":"fish","n3":"Seattle","n4":"away","o1":2,"o2":"sad","o3":"S","o4":"dark","t1":"03/07/2018","t2":"09/20/2017","t3":"01/25/2018","t4":"01/04/2018"}, 62 | {"q1":2.11,"q2":2,"q3":-1,"q4":6,"n1":"green","n2":"cat","n3":"San Francisco","n4":"away","o1":6,"o2":"sad","o3":"M","o4":"medium","t1":"03/06/2018","t2":"10/11/2017","t3":"01/08/2018","t4":"01/02/2018"}, 63 | {"q1":0.21,"q2":3,"q3":-1,"q4":75,"n1":"red","n2":"fish","n3":"New York","n4":"away","o1":8,"o2":"sad","o3":"XS","o4":"dark","t1":"03/06/2018","t2":"03/05/2017","t3":"01/25/2018","t4":"01/01/2018"}, 64 | {"q1":-1.27,"q2":1,"q3":-1,"q4":3,"n1":"blue","n2":"dog","n3":"New York","n4":"home","o1":8,"o2":"sad","o3":"M","o4":"medium","t1":"03/09/2018","t2":"04/11/2017","t3":"01/19/2018","t4":"01/01/2018"}, 65 | {"q1":-1.3,"q2":3,"q3":-1,"q4":30,"n1":"blue","n2":"cat","n3":"Seattle","n4":"home","o1":2,"o2":"sad","o3":"XL","o4":"light","t1":"03/08/2018","t2":"05/17/2017","t3":"01/19/2018","t4":"01/04/2018"}, 66 | {"q1":0.09,"q2":1,"q3":-1,"q4":29,"n1":"blue","n2":"fish","n3":"New York","n4":"away","o1":4,"o2":"neutral","o3":"XL","o4":"light","t1":"03/06/2018","t2":"11/09/2017","t3":"01/27/2018","t4":"01/04/2018"}, 67 | {"q1":0.35,"q2":2,"q3":-1,"q4":95,"n1":"blue","n2":"hamster","n3":"San Francisco","n4":"home","o1":4,"o2":"neutral","o3":"XL","o4":"medium","t1":"03/09/2018","t2":"11/01/2017","t3":"01/15/2018","t4":"01/05/2018"}, 68 | {"q1":0.65,"q2":1,"q3":-1,"q4":4,"n1":"blue","n2":"dog","n3":"San Francisco","n4":"away","o1":8,"o2":"neutral","o3":"XS","o4":"medium","t1":"03/06/2018","t2":"10/21/2017","t3":"01/20/2018","t4":"12/31/2017"}, 69 | {"q1":-0.84,"q2":2,"q3":-1,"q4":23,"n1":"green","n2":"hamster","n3":"New York","n4":"away","o1":8,"o2":"neutral","o3":"S","o4":"medium","t1":"03/03/2018","t2":"05/10/2017","t3":"01/11/2018","t4":"12/31/2017"}, 70 | {"q1":-1.26,"q2":4,"q3":-1,"q4":92,"n1":"blue","n2":"dog","n3":"Seattle","n4":"away","o1":2,"o2":"happy","o3":"M","o4":"light","t1":"03/08/2018","t2":"08/12/2017","t3":"01/02/2018","t4":"01/02/2018"}, 71 | {"q1":0.17,"q2":1,"q3":-1,"q4":92,"n1":"blue","n2":"dog","n3":"San Francisco","n4":"home","o1":4,"o2":"sad","o3":"XL","o4":"dark","t1":"03/09/2018","t2":"12/23/2017","t3":"01/21/2018","t4":"01/02/2018"}, 72 | {"q1":0.75,"q2":1,"q3":-1,"q4":44,"n1":"red","n2":"hamster","n3":"San Francisco","n4":"home","o1":2,"o2":"happy","o3":"L","o4":"dark","t1":"03/03/2018","t2":"12/16/2017","t3":"01/17/2018","t4":"01/04/2018"}, 73 | {"q1":2.08,"q2":1,"q3":-1,"q4":33,"n1":"green","n2":"hamster","n3":"Seattle","n4":"away","o1":4,"o2":"neutral","o3":"L","o4":"light","t1":"03/03/2018","t2":"01/07/2018","t3":"01/19/2018","t4":"01/04/2018"}, 74 | {"q1":0.62,"q2":1,"q3":-1,"q4":88,"n1":"red","n2":"dog","n3":"New York","n4":"home","o1":2,"o2":"neutral","o3":"S","o4":"light","t1":"03/08/2018","t2":"11/24/2017","t3":"01/29/2018","t4":"12/31/2017"}, 75 | {"q1":-1.54,"q2":1,"q3":-1,"q4":19,"n1":"blue","n2":"cat","n3":"Seattle","n4":"home","o1":2,"o2":"neutral","o3":"XL","o4":"medium","t1":"03/04/2018","t2":"04/13/2017","t3":"01/30/2018","t4":"01/02/2018"}, 76 | {"q1":-0.4,"q2":2,"q3":-1,"q4":46,"n1":"green","n2":"hamster","n3":"San Francisco","n4":"away","o1":4,"o2":"sad","o3":"XS","o4":"medium","t1":"03/07/2018","t2":"09/23/2017","t3":"01/03/2018","t4":"01/02/2018"}, 77 | {"q1":0.21,"q2":6,"q3":-1,"q4":75,"n1":"red","n2":"hamster","n3":"Seattle","n4":"away","o1":2,"o2":"neutral","o3":"XL","o4":"light","t1":"03/03/2018","t2":"03/17/2017","t3":"01/05/2018","t4":"01/04/2018"}, 78 | {"q1":0.04,"q2":1,"q3":-1,"q4":53,"n1":"green","n2":"cat","n3":"New York","n4":"away","o1":6,"o2":"happy","o3":"S","o4":"light","t1":"03/09/2018","t2":"04/29/2017","t3":"01/03/2018","t4":"01/01/2018"}, 79 | {"q1":-0.36,"q2":4,"q3":-1,"q4":72,"n1":"green","n2":"fish","n3":"San Francisco","n4":"away","o1":2,"o2":"sad","o3":"S","o4":"light","t1":"03/04/2018","t2":"01/09/2018","t3":"01/21/2018","t4":"01/05/2018"}, 80 | {"q1":-1.6,"q2":1,"q3":-1,"q4":14,"n1":"blue","n2":"hamster","n3":"New York","n4":"away","o1":8,"o2":"happy","o3":"L","o4":"light","t1":"03/03/2018","t2":"08/27/2017","t3":"01/09/2018","t4":"01/04/2018"}, 81 | {"q1":-0.67,"q2":1,"q3":-1,"q4":13,"n1":"red","n2":"dog","n3":"New York","n4":"home","o1":6,"o2":"neutral","o3":"L","o4":"dark","t1":"03/03/2018","t2":"09/10/2017","t3":"01/05/2018","t4":"01/05/2018"}, 82 | {"q1":-0.51,"q2":2,"q3":-1,"q4":11,"n1":"green","n2":"fish","n3":"San Francisco","n4":"away","o1":2,"o2":"neutral","o3":"XS","o4":"medium","t1":"03/07/2018","t2":"02/08/2018","t3":"01/09/2018","t4":"01/02/2018"}, 83 | {"q1":0.36,"q2":2,"q3":-1,"q4":85,"n1":"red","n2":"dog","n3":"Seattle","n4":"away","o1":6,"o2":"happy","o3":"S","o4":"light","t1":"03/07/2018","t2":"05/18/2017","t3":"01/06/2018","t4":"01/05/2018"}, 84 | {"q1":-0.22,"q2":6,"q3":-1,"q4":16,"n1":"blue","n2":"fish","n3":"New York","n4":"away","o1":4,"o2":"neutral","o3":"XL","o4":"light","t1":"03/08/2018","t2":"10/28/2017","t3":"01/16/2018","t4":"01/05/2018"}, 85 | {"q1":0.18,"q2":2,"q3":-1,"q4":34,"n1":"blue","n2":"dog","n3":"San Francisco","n4":"away","o1":4,"o2":"happy","o3":"M","o4":"medium","t1":"03/05/2018","t2":"07/09/2017","t3":"01/23/2018","t4":"01/03/2018"}, 86 | {"q1":-1.71,"q2":2,"q3":-1,"q4":95,"n1":"green","n2":"hamster","n3":"Seattle","n4":"home","o1":6,"o2":"neutral","o3":"S","o4":"medium","t1":"03/09/2018","t2":"07/17/2017","t3":"01/19/2018","t4":"12/31/2017"}, 87 | {"q1":1.48,"q2":2,"q3":-1,"q4":78,"n1":"green","n2":"fish","n3":"San Francisco","n4":"home","o1":8,"o2":"sad","o3":"L","o4":"dark","t1":"03/08/2018","t2":"01/26/2018","t3":"01/07/2018","t4":"01/05/2018"}, 88 | {"q1":-0.53,"q2":1,"q3":-1,"q4":83,"n1":"blue","n2":"cat","n3":"New York","n4":"home","o1":8,"o2":"happy","o3":"S","o4":"dark","t1":"03/04/2018","t2":"08/29/2017","t3":"01/08/2018","t4":"01/04/2018"}, 89 | {"q1":-0.1,"q2":2,"q3":-1,"q4":61,"n1":"green","n2":"dog","n3":"New York","n4":"home","o1":2,"o2":"sad","o3":"XL","o4":"medium","t1":"03/03/2018","t2":"12/24/2017","t3":"01/21/2018","t4":"01/02/2018"}, 90 | {"q1":-0.58,"q2":3,"q3":-1,"q4":33,"n1":"blue","n2":"cat","n3":"New York","n4":"away","o1":6,"o2":"happy","o3":"L","o4":"light","t1":"03/09/2018","t2":"08/01/2017","t3":"01/25/2018","t4":"01/02/2018"}, 91 | {"q1":1.24,"q2":2,"q3":-1,"q4":38,"n1":"blue","n2":"cat","n3":"San Francisco","n4":"home","o1":4,"o2":"neutral","o3":"M","o4":"dark","t1":"03/05/2018","t2":"07/02/2017","t3":"01/03/2018","t4":"01/03/2018"}, 92 | {"q1":0.38,"q2":2,"q3":-1,"q4":36,"n1":"red","n2":"dog","n3":"Seattle","n4":"home","o1":6,"o2":"sad","o3":"XS","o4":"light","t1":"03/03/2018","t2":"12/16/2017","t3":"01/04/2018","t4":"01/02/2018"}, 93 | {"q1":1.08,"q2":1,"q3":-1,"q4":16,"n1":"green","n2":"cat","n3":"San Francisco","n4":"home","o1":2,"o2":"happy","o3":"M","o4":"light","t1":"03/09/2018","t2":"11/28/2017","t3":"01/24/2018","t4":"01/05/2018"}, 94 | {"q1":-0.8,"q2":4,"q3":-1,"q4":98,"n1":"green","n2":"cat","n3":"San Francisco","n4":"home","o1":4,"o2":"happy","o3":"XL","o4":"medium","t1":"03/08/2018","t2":"03/04/2017","t3":"01/21/2018","t4":"01/02/2018"}, 95 | {"q1":-0.55,"q2":1,"q3":-1,"q4":91,"n1":"red","n2":"fish","n3":"Seattle","n4":"home","o1":6,"o2":"sad","o3":"L","o4":"medium","t1":"03/06/2018","t2":"05/04/2017","t3":"01/21/2018","t4":"01/02/2018"}, 96 | {"q1":0.12,"q2":4,"q3":-1,"q4":59,"n1":"blue","n2":"cat","n3":"Seattle","n4":"home","o1":4,"o2":"neutral","o3":"XL","o4":"dark","t1":"03/09/2018","t2":"11/02/2017","t3":"01/16/2018","t4":"01/01/2018"}, 97 | {"q1":2.16,"q2":1,"q3":-1,"q4":23,"n1":"blue","n2":"fish","n3":"San Francisco","n4":"home","o1":4,"o2":"happy","o3":"L","o4":"medium","t1":"03/09/2018","t2":"08/24/2017","t3":"01/12/2018","t4":"01/01/2018"}, 98 | {"q1":-0.85,"q2":1,"q3":-1,"q4":1,"n1":"blue","n2":"cat","n3":"San Francisco","n4":"home","o1":4,"o2":"happy","o3":"S","o4":"light","t1":"03/04/2018","t2":"07/03/2017","t3":"01/18/2018","t4":"01/04/2018"}, 99 | {"q1":0.61,"q2":3,"q3":-1,"q4":95,"n1":"green","n2":"dog","n3":"New York","n4":"away","o1":4,"o2":"happy","o3":"S","o4":"dark","t1":"03/03/2018","t2":"07/17/2017","t3":"01/02/2018","t4":"12/31/2017"}, 100 | {"q1":-2.41,"q2":2,"q3":-1,"q4":21,"n1":"blue","n2":"hamster","n3":"New York","n4":"away","o1":6,"o2":"neutral","o3":"XL","o4":"light","t1":"03/03/2018","t2":"08/24/2017","t3":"01/27/2018","t4":"01/03/2018"}] 101 | -------------------------------------------------------------------------------- /data/cars.csv: -------------------------------------------------------------------------------- 1 | Name,Type,AWD,RWD,Retail Price,Dealer Cost,Engine Size (l),Cyl,Horsepower(HP),City Miles Per Gallon,Highway Miles Per Gallon,Weight,Wheel Base,Len,Width 2 | Acura 3.5 RL 4dr,Sedan,0,0,43755,39014,3.5,6,225,18,24,3880,115,197,72 3 | Acura 3.5 RL w/Navigation 4dr,Sedan,0,0,46100,41100,3.5,6,225,18,24,3893,115,197,72 4 | Acura MDX,SUV,1,0,36945,33337,3.5,6,265,17,23,4451,106,189,77 5 | Acura NSX coupe 2dr manual S,Sports Car,0,1,89765,79978,3.2,6,290,17,24,3153,100,174,71 6 | Acura RSX Type S 2dr,Sedan,0,0,23820,21761,2,4,200,24,31,2778,101,172,68 7 | Acura TL 4dr,Sedan,0,0,33195,30299,3.2,6,270,20,28,3575,108,186,72 8 | Acura TSX 4dr,Sedan,0,0,26990,24647,2.4,4,200,22,29,3230,105,183,69 9 | Audi A4 1.8T 4dr,Sedan,0,0,25940,23508,1.8,4,170,22,31,3252,104,179,70 10 | Audi A4 3.0 4dr,Sedan,0,0,31840,28846,3,0,220,20,28,3462,104,179,70 11 | Audi A4 3.0 convertible 2dr,Sedan,0,0,42490,38325,3,6,220,20,27,3814,105,180,70 12 | Audi A4 3.0 Quattro 4dr auto,Sedan,1,0,34480,31388,0,6,220,18,25,3627,104,179,70 13 | Audi A4 3.0 Quattro 4dr manual,Sedan,1,0,33430,30366,3,6,220,17,26,3583,104,179,70 14 | Audi A4 3.0 Quattro convertible 2dr,Sedan,1,0,44240,40075,3,6,220,18,25,4013,105,180,70 15 | Audi A41.8T convertible 2dr,Sedan,0,0,35940,32506,1.8,4,170,23,30,3638,105,180,70 16 | Audi A6 2.7 Turbo Quattro 4dr,Sedan,1,0,42840,38840,2.7,6,250,18,25,3836,109,192,71 17 | Audi A6 3.0 4dr,Sedan,0,0,36640,33129,3,6,220,20,27,3561,109,192,71 18 | Audi A6 3.0 Avant Quattro,Wagon,1,0,40840,37060,3,6,220,18,25,4035,109,192,71 19 | Audi A6 3.0 Quattro 4dr,Sedan,1,0,39640,35992,3,6,220,18,25,3880,109,192,2 20 | Audi A6 4.2 Quattro 4dr,Sedan,1,0,49690,44936,4.2,8,300,17,24,4024,109,193,71 21 | Audi A8 L Quattro 4dr,Sedan,1,0,69190,64740,4.2,8,330,17,24,4399,121,204,75 22 | Audi RS 6 4dr,Sports Car,0,0,84600,76417,4.2,8,450,15,22,4024,109,191,78 23 | Audi S4 Avant Quattro,Wagon,1,0,49090,44446,4.2,8,340,15,21,3936,104,179,70 24 | Audi S4 Quattro 4dr,Sedan,1,0,48040,43556,4.2,8,340,14,20,3825,104,179,70 25 | Audi TT 1.8 convertible 2dr (coupe),Sports Car,0,0,35940,32512,1.8,4,180,20,28,3131,95,159,73 26 | Audi TT 1.8 Quattro 2dr (convertible),Sports Car,1,0,37390,33891,1.8,4,225,20,28,2921,96,159,73 27 | Audi TT 3.2 coupe 2dr (convertible),Sports Car,1,0,40590,36739,3.2,6,250,21,29,3351,96,159,73 28 | BMW 325Ci 2dr,Sedan,0,1,30795,28245,2.5,6,184,20,29,3197,107,177,69 29 | BMW 325Ci convertible 2dr,Sedan,0,1,37995,34800,2.5,6,184,19,27,3560,107,177,69 30 | BMW 325i 4dr,Sedan,0,1,28495,26155,2.5,6,184,20,29,3219,107,176,69 31 | BMW 325xi 4dr,Sedan,1,0,30245,27745,2.5,6,184,19,27,3461,107,176,69 32 | BMW 325xi Sport,Wagon,1,0,32845,30110,2.5,6,184,19,26,3594,107,176,69 33 | BMW 330Ci 2dr,Sedan,0,1,36995,33890,3,6,225,20,30,3285,107,176,69 34 | BMW 330Ci convertible 2dr,Sedan,0,1,44295,40530,3,6,225,19,28,3616,107,177,69 35 | BMW 330i 4dr,Sedan,0,1,35495,32525,3,6,225,20,30,3285,107,176,69 36 | BMW 330xi 4dr,Sedan,1,0,37245,34115,3,6,225,20,29,3483,107,176,69 37 | BMW 525i 4dr,Sedan,0,1,39995,36620,2.5,6,184,19,28,3428,114,191,73 38 | BMW 530i 4dr,Sedan,0,1,44995,41170,3,6,225,20,30,3472,114,191,73 39 | BMW 545iA 4dr,Sedan,0,1,54995,50270,4.4,8,325,18,26,3814,114,191,73 40 | BMW 745i 4dr,Sedan,0,1,69195,63190,4.4,8,325,18,26,4376,118,198,75 41 | BMW 745Li 4dr,Sedan,0,1,73195,66830,4.4,8,325,18,26,4464,123,204,75 42 | BMW M3 convertible 2dr,Sports Car,0,1,56595,51815,3.2,6,333,16,23,3781,108,177,70 43 | BMW M3 coupe 2dr,Sports Car,0,1,48195,44170,3.2,6,333,16,24,3415,108,177,70 44 | BMW X3 3.0i,SUV,1,0,37000,33873,3,6,225,16,23,4023,110,180,73 45 | BMW X5 4.4i,SUV,1,0,52195,47720,4.4,8,325,16,22,4824,111,184,74 46 | BMW Z4 convertible 2.5i 2dr,Sports Car,0,1,33895,31065,2.5,6,184,20,28,2932,98,161,70 47 | BMW Z4 convertible 3.0i 2dr,Sports Car,0,1,41045,37575,3,6,225,21,29,2998,98,161,70 48 | Buick Century Custom 4dr,Sedan,0,0,22180,20351,3.1,6,175,20,30,3353,109,195,73 49 | Buick LeSabre Custom 4dr,Sedan,0,0,26470,24282,3.8,6,205,20,29,3567,112,200,74 50 | Buick LeSabre Limited 4dr,Sedan,0,0,32245,29566,3.8,6,205,20,29,3591,112,200,74 51 | Buick Park Avenue 4dr,Sedan,0,0,35545,32244,3.8,6,205,20,29,3778,0,207,75 52 | Buick Park Avenue Ultra 4dr,Sedan,0,0,40720,36927,3.8,6,240,18,28,3909,114,207,75 53 | Buick Rainier,SUV,1,0,37895,34357,4.2,6,275,15,21,4600,113,193,75 54 | Buick Regal GS 4dr,Sedan,0,0,28345,26047,3.8,6,240,18,28,3536,109,196,73 55 | Buick Regal LS 4dr,Sedan,0,0,24895,22835,3.8,6,200,20,30,3461,109,196,73 56 | Buick Rendezvous CX,SUV,0,0,26545,24085,3.4,6,185,19,26,4024,112,187,74 57 | Cadillac CTS VVT 4dr,Sedan,0,1,30835,28575,3.6,6,255,18,25,3694,113,190,71 58 | Cadillac Deville 4dr,Sedan,0,0,45445,41650,4.6,8,275,18,26,3984,115,207,74 59 | Cadillac Deville DTS 4dr,Sedan,0,0,50595,46362,4.6,8,300,18,26,4044,115,207,74 60 | Cadillac Escaladet,SUV,0,0,52795,48377,5.3,8,295,14,18,5367,116,199,79 61 | Cadillac Seville SLS 4dr,Sedan,0,0,47955,43841,4.6,8,275,18,26,3992,112,201,75 62 | Cadillac SRX V8,SUV,0,0,46995,43523,4.6,8,320,16,21,4302,116,195,73 63 | Cadillac XLR convertible 2dr,Sports Car,0,1,76200,70546,4.6,8,320,17,25,3647,106,178,72 64 | Chevrolet Astro,Minivan,1,0,26395,23954,4.3,6,190,14,17,4605,111,190,78 65 | Chevrolet Aveo 4dr,Sedan,0,0,11690,10965,1.6,4,103,28,34,2370,98,167,66 66 | Chevrolet Aveo LS 4dr hatch,Sedan,0,0,12585,11802,1.6,4,103,28,34,2348,98,153,66 67 | Chevrolet Cavalier 2dr,Sedan,0,0,14610,13697,2.2,4,140,26,37,2617,104,183,69 68 | Chevrolet Cavalier 4dr,Sedan,0,0,14810,13884,2.2,4,140,26,37,2676,104,183,68 69 | Chevrolet Cavalier LS 2dr,Sedan,0,0,16385,15357,2.2,4,140,26,37,2617,104,183,69 70 | Chevrolet Corvette 2dr,Sports Car,0,1,44535,39068,5.7,8,350,18,25,3246,105,180,74 71 | Chevrolet Corvette convertible 2dr,Sports Car,0,1,51535,45193,5.7,8,350,18,25,3248,105,180,74 72 | Chevrolet Impala 4dr,Sedan,0,0,21900,20095,3.4,6,180,21,32,3465,111,200,73 73 | Chevrolet Impala LS 4dr,Sedan,0,0,25000,22931,3.8,6,200,20,30,3476,111,200,73 74 | Chevrolet Impala SS 4dr,Sedan,0,0,27995,25672,3.8,6,240,18,28,3606,111,200,73 75 | Chevrolet Malibu 4dr,Sedan,0,0,18995,17434,2.2,4,145,24,34,3174,106,188,70 76 | Chevrolet Malibu LS 4dr,Sedan,0,0,20370,18639,3.5,6,200,22,30,3297,106,188,70 77 | Chevrolet Malibu LT 4dr,Sedan,0,0,23495,21551,3.5,6,200,23,32,3315,106,188,70 78 | Chevrolet Malibu Maxx LS,Wagon,0,0,22225,20394,3.5,6,200,22,30,3458,112,188,70 79 | Chevrolet Monte Carlo LS 2dr,Sedan,0,0,21825,20026,3.4,6,180,21,32,3340,111,198,73 80 | Chevrolet Monte Carlo SS 2dr,Sedan,0,0,24225,22222,3.8,6,200,18,28,3434,111,198,73 81 | Chevrolet Suburban 1500 LT,SUV,0,0,42735,37422,5.3,8,295,14,18,4947,130,219,79 82 | Chevrolet Tahoe LT,SUV,1,0,41465,36287,5.3,8,295,14,18,5050,116,197,79 83 | Chevrolet Tracker,SUV,0,0,20255,19108,2.5,6,165,19,22,2866,98,163,67 84 | Chevrolet TrailBlazer LT,SUV,0,0,30295,27479,4.2,6,275,16,21,4425,113,192,75 85 | Chevrolet Venture LS,Minivan,0,0,27020,24518,3.4,6,185,19,26,3699,112,187,72 86 | Chrvsler PT Cruiser GT 4dr,Sedan,0,0,25955,24172,2.4,4,220,21,27,3217,103,169,67 87 | Chrysler 300M 4dr,Sedan,0,0,29865,27797,3.5,6,250,18,27,3581,113,198,74 88 | Chrysler 300M Special Edition 4dr,Sedan,0,0,33295,30884,3.5,6,255,18,27,3650,113,198,74 89 | Chrysler Concorde LX 4dr,Sedan,0,0,24130,22452,2.7,6,200,21,29,3479,113,208,74 90 | Chrysler Concorde LXi 4dr,Sedan,0,0,26860,24909,3.5,6,232,19,27,3548,113,208,74 91 | Chrysler Crossfire 2dr,Sports Car,0,1,34495,32033,3.2,6,215,17,25,3060,95,160,70 92 | Chrysler Pacifica,Wagon,0,1,31230,28725,3.5,6,250,17,23,4675,116,199,79 93 | Chrysler PT Cruiser 4dr,Sedan,0,0,17985,16919,2.4,4,150,22,29,3101,103,169,67 94 | Chrysler PT Cruiser Limited 4dr,Sedan,0,0,22000,20573,2.4,4,150,22,29,3105,103,169,67 95 | Chrysler Sebring 4dr,Sedan,0,0,19090,17805,2.4,4,150,22,30,3173,108,191,71 96 | Chrysler Sebring convertible 2dr,Sedan,0,0,25215,23451,2.4,4,150,22,30,3357,106,194,64 97 | Chrysler Sebring Limited convertible 2dr,Sedan,0,0,30950,28613,2.7,6,200,21,28,3448,106,194,69 98 | Chrysler Sebring Touring 4dr,Sedan,0,0,21840,20284,2.7,6,200,21,28,3222,108,191,71 99 | Chrysler Town and Country Limited,Minivan,0,0,38380,35063,3.8,6,215,18,25,4331,119,201,79 100 | Chrysler Town and Country LX,Minivan,0,0,27490,25371,3.3,6,180,19,26,4068,119,201,79 101 | CMC Yukon 1500 SLE,SUV,0,0,35725,31361,4.8,8,285,16,19,5042,116,199,79 102 | Dodge Caravan SE,Minivan,0,0,21795,20508,2.4,4,150,20,26,3862,113,189,79 103 | Dodge Durango SLT,SUV,1,0,32235,29472,4.7,8,230,15,21,4987,119,201,76 104 | Dodge Grand Caravan SXT,Minivan,1,0,32660,29812,3.8,6,215,18,25,4440,119,201,79 105 | Dodge Intrepid ES 4dr,Sedan,0,0,24885,23058,3.5,6,232,18,27,3487,113,204,75 106 | Dodge Intrepid SE 4dr,Sedan,0,0,22035,20502,2.7,6,200,21,29,3469,113,204,75 107 | Dodge Neon SE 4dr,Sedan,0,0,13670,12849,2,4,132,29,36,2581,105,174,67 108 | Dodge Neon SXT 4dr,Sedan,0,0,15040,14086,2,4,132,29,36,2626,105,174,67 109 | Dodge Stratus SE 4dr,Sedan,0,0,20220,18821,2.4,4,150,21,28,3175,108,191,71 110 | Dodge Stratus SXT 4dr,Sedan,0,0,18820,17512,2.4,4,150,21,28,3182,108,191,71 111 | Ford Crown Victoria 4dr,Sedan,0,1,24345,22856,4.6,8,224,17,25,4057,115,212,78 112 | Ford Crown Victoria LX 4dr,Sedan,0,1,27370,25105,4.6,8,224,17,25,4057,115,212,78 113 | Ford Crown Victoria LX Sport 4dr,Sedan,0,1,30315,27756,4.6,8,239,17,25,4057,115,212,78 114 | Ford Escape XLS,SUV,1,0,22515,20907,3,6,201,18,23,3346,103,173,70 115 | Ford Expedition 4.6 XLT,SUV,0,0,34560,30468,4.6,8,232,15,19,5000,119,206,79 116 | Ford Explorer XLT V6,SUV,1,0,29670,26983,4,6,210,15,20,4463,114,190,72 117 | Ford Focus LX 4dr,Sedan,0,0,13730,12906,2,4,110,27,36,2606,103,168,67 118 | Ford Focus SE 4dr,Sedan,0,0,15460,14496,2,4,130,26,33,2606,103,168,67 119 | Ford Focus SVT 2dr,Sedan,0,0,19135,17878,2,4,170,21,28,2750,103,168,67 120 | Ford Focus ZTW,Wagon,0,0,17475,16375,2,4,130,26,33,2702,103,178,67 121 | Ford Focus ZX3 2dr hatch,Sedan,0,0,13270,12482,2,4,130,26,33,2612,103,168,67 122 | Ford Focus ZX5 5dr,Sedan,0,0,15580,14607,2,4,130,26,33,2691,103,168,67 123 | Ford Freestar SE,Minivan,0,0,26930,24498,3.9,6,193,17,23,4275,121,201,77 124 | Ford Mustang 2dr (convertible),Sports Car,0,1,18345,16943,3.8,6,193,20,29,3290,101,183,73 125 | Ford Mustang GT Premium convertible 2dr,Sports Car,0,1,29380,26875,4.6,8,260,17,25,3347,101,183,73 126 | Ford Taurus LX 4dr,Sedan,0,0,20320,18881,3,6,155,20,27,3306,109,198,73 127 | Ford Taurus SE,Wagon,0,0,22290,20457,3,6,155,19,26,3497,109,198,73 128 | Ford Taurus SES Duratec 4dr,Sedan,0,0,22735,20857,3,6,201,19,26,3313,109,198,73 129 | Ford Thunderbird Deluxe convert w/hardtop 2dr,Sports Car,0,0,37530,34483,3.9,8,280,17,24,3780,107,186,72 130 | GMC Envoy XUV SLE,SUV,0,0,31890,28922,4.2,6,275,15,19,4945,129,208,75 131 | GMC Safari SLE,Minivan,0,1,25640,23215,4.3,6,190,16,20,4309,111,190,78 132 | GMC Yukon XL 2500 SLT,SUV,1,0,46265,40534,6,8,325,13,17,6133,130,219,79 133 | Honda Accord EX 2dr,Sedan,0,0,22260,20080,2.4,4,160,26,34,3047,105,188,71 134 | Honda Accord EX V6 2dr,Sedan,0,0,26960,24304,3,6,240,21,30,3294,105,188,71 135 | Honda Accord LX 2dr,Sedan,0,0,19860,17924,2.4,4,160,26,34,2994,105,188,71 136 | Honda Accord LX V6 4dr,Sedan,0,0,23760,21428,3,6,240,21,30,3349,108,190,72 137 | Honda Civic DX 2dr,Sedan,0,0,13270,12175,1.7,4,115,32,38,2432,103,175,67 138 | Honda Civic EX 4dr,Sedan,0,0,17750,16265,1.7,4,127,32,37,2601,103,175,68 139 | Honda Civic HX 2dr,Sedan,0,0,14170,12996,1.7,4,117,36,44,2500,103,175,67 140 | Honda Civic Hybrid 4dr manual (gas/electric),Sedan,0,0,20140,18451,1.4,4,93,46,51,2732,103,175,68 141 | Honda Civic LX 4dr,Sedan,0,0,15850,14531,1.7,4,115,32,38,2513,103,175,68 142 | Honda Civic Si 2dr hatch,Sedan,0,0,19490,17849,2,4,160,26,30,2782,101,166,67 143 | Honda CR-V LX,SUV,1,0,19860,18419,2.4,4,160,21,25,3258,103,179,70 144 | Honda Element LX,SUV,1,0,18690,17334,2.4,4,160,21,24,3468,101,167,72 145 | Honda Insight 2dr (gas/electric),Sedan,0,0,19110,17911,2,3,73,60,66,1850,95,155,67 146 | Honda Odyssey EX,Minivan,0,0,27450,24744,3.5,6,240,18,25,4365,118,201,76 147 | Honda Odyssey LX,Minivan,0,0,24950,22498,3.5,6,240,18,25,4310,118,201,76 148 | Honda Pilot LX,SUV,1,0,27560,24843,3.5,6,240,17,22,4387,106,188,77 149 | Honda S2000 convertible 2dr,Sports Car,0,1,33260,29965,2.2,4,240,20,25,2835,95,162,69 150 | Hummer H2,SUV,1,0,49995,45815,6,8,316,10,12,6400,123,190,81 151 | Hyundai Accent 2dr hatch,Sedan,0,0,10539,10107,1.6,4,103,29,33,2255,96,167,66 152 | Hyundai Accent GL 4dr,Sedan,0,0,11839,11116,1.6,4,103,29,33,2290,96,167,66 153 | Hyundai Accent GT 2dr hatch,Sedan,0,0,11939,11209,1.6,4,103,29,33,2339,96,167,66 154 | Hyundai Elantra GLS 4dr,Sedan,0,0,13839,12781,2,4,138,26,34,2635,103,178,68 155 | Hyundai Elantra GT 4dr,Sedan,0,0,15389,14207,2,4,138,26,34,2635,103,178,68 156 | Hyundai Elantra GT 4dr hatch,Sedan,0,0,15389,14207,2,4,138,26,34,2698,103,178,68 157 | Hyundai Santa Fe GLS,SUV,0,0,21589,20201,2.7,6,173,19,-1100,3549,103,177,73 158 | Hyundai Sonata GLS 4dr,Sedan,0,0,19339,17574,2.7,6,170,19,27,3217,106,187,72 159 | Hyundai Sonata LX 4dr,Sedan,0,0,20339,18380,2.7,6,170,19,27,3217,106,187,72 160 | Hyundai Tiburon GT V6 2dr,Sports Car,0,0,18739,17101,2.7,6,172,19,26,3023,100,173,69 161 | Hyundai XG350 4dr,Sedan,0,0,24589,22055,3.5,6,194,17,26,3651,108,192,72 162 | Hyundai XG350 L 4dr,Sedan,0,0,26189,23486,3.5,6,194,17,26,3651,108,192,72 163 | Infiniti FX35,Wagon,0,1,34895,31756,3.5,6,280,16,22,4056,112,189,76 164 | Infiniti FX45,Wagon,1,0,36395,33121,4.5,8,315,1000,19,4309,112,189,76 165 | Infiniti G35 4dr,Sedan,0,1,28495,26157,3.5,6,260,18,26,3336,112,187,69 166 | Infiniti G35 4dr,Sedan,1,0,32445,29783,3.5,6,260,18,26,3677,112,187,69 167 | Infiniti G35 Sport Coupe 2dr,Sedan,0,1,29795,27536,3.5,6,280,18,26,3416,112,182,72 168 | Infiniti I35 4dr,Sedan,0,0,31145,28320,3.5,6,255,19,26,3306,108,194,70 169 | Infiniti M45 4dr,Sedan,0,1,42845,38792,4.5,8,340,17,23,3851,110,197,70 170 | Infiniti Q45 Luxury 4dr,Sedan,0,1,52545,47575,4.5,8,340,17,23,3977,113,200,73 171 | Isuzu Ascender S,SUV,1,0,31849,29977,4.2,6,275,15,20,4967,129,208,76 172 | Isuzu Rodeo S,SUV,0,0,20449,19261,3.2,6,193,17,21,3836,106,178,70 173 | Jaguar S-Type 3.0 4dr,Sedan,0,1,43895,40004,3,6,235,18,26,3777,115,192,72 174 | Jaguar S-Type 4.2 4dr,Sedan,0,1,49995,45556,4.2,8,294,18,28,3874,115,192,72 175 | Jaguar S-Type R 4dr,Sedan,0,1,63120,57499,4.2,8,390,17,24,4046,115,192,72 176 | Jaguar Vanden Plas 4dr,Sedan,0,1,68995,62846,4.2,8,294,18,28,3803,119,200,73 177 | Jaguar XJ8 4dr,Sedan,0,1,59995,54656,4.2,8,294,18,28,3803,119,200,73 178 | Jaguar XJR 4dr,Sedan,0,1,74995,68306,4.2,8,390,17,24,3948,119,200,73 179 | Jaguar XK8 convertible 2dr,Sports Car,0,1,74995,68306,4.2,8,294,18,26,3980,102,187,71 180 | Jaguar XK8 coupe 2dr,Sports Car,0,1,69995,63756,4.2,8,294,18,26,3779,102,187,71 181 | Jaguar XKR convertible 2dr,Sports Car,0,1,86995,79226,4.2,8,390,16,23,4042,102,187,71 182 | Jaguar XKR coupe 2dr,Sports Car,0,1,81995,74676,4.2,8,390,16,23,3865,102,187,71 183 | Jaguar X-Type 2.5 4dr,Sedan,1,0,29995,27355,2.5,6,192,18,26,3428,107,184,70 184 | Jaguar X-Type 3.0 4dr,Sedan,1,0,33995,30995,3,6,227,18,25,3516,107,184,70 185 | Jeep Grand Cherokee Laredo,SUV,0,0,27905,25686,4,6,195,16,21,3790,106,181,72 186 | Jeep Liberty Sport,SUV,1,0,20130,18973,2.4,4,150,20,24,3826,104,174,72 187 | Jeep Wrangler Sahara convertible 2dr,SUV,1,0,25520,23275,4,6,190,16,19,3575,93,150,67 188 | Kia Optima LX 4dr,Sedan,0,0,16040,14910,2.4,4,138,23,30,3281,106,186,72 189 | Kia Optima LX V6 4dr,Sedan,0,0,18435,16850,2.7,6,170,20,27,3279,106,186,72 190 | Kia Rio 4dr auto,Sedan,0,0,11155,10705,1.6,4,104,25,32,2458,95,167,66 191 | Kia Rio 4dr manual,Sedan,0,0,10280,9875,1.6,4,104,26,33,2403,95,167,66 192 | Kia Rio Cinco,Wagon,0,0,11905,11410,1.6,4,104,26,33,2447,95,167,66 193 | Kia Sedona LX,Minivan,0,0,20615,19400,3.5,6,195,16,22,4802,115,194,75 194 | Kia Sorento LX,SUV,0,0,19635,18630,3.5,6,192,16,19,4112,107,180,73 195 | Kia Spectra 4dr,Sedan,0,0,12360,11630,1.8,4,124,24,32,2661,101,178,68 196 | Kia Spectra GS 4dr hatch,Sedan,0,0,13580,12830,1.8,4,124,24,32,2686,101,178,68 197 | Kia Spectra GSX 4dr hatch,Sedan,0,0,14630,13790,1.8,4,124,24,32,2697,101,178,68 198 | Land Rover Discovery SE,SUV,1,0,39250,35777,4.6,8,217,12,16,4576,100,185,74 199 | Land Rover Freelander SE,SUV,1,0,25995,23969,2.5,6,174,18,21,3577,101,175,71 200 | Land Rover Range Rover HSE,SUV,1,0,72250,65807,4.4,8,282,12,16,5379,113,195,76 201 | Lexus ES 330 4dr,Sedan,0,0,32350,28755,3.3,6,225,20,29,3460,107,191,71 202 | Lexus GS 300 4dr,Sedan,0,1,41010,36196,3,6,220,18,25,3649,110,189,71 203 | Lexus GS 430 4dr,Sedan,0,1,48450,42232,4.3,8,300,18,23,3715,110,189,71 204 | Lexus GX 470,SUV,1,0,45700,39838,4.7,8,235,15,19,4740,110,188,74 205 | Lexus IS 300 4dr auto,Sedan,0,1,32415,28611,3,6,215,18,24,3285,105,177,68 206 | Lexus IS 300 4dr manual,Sedan,0,1,31045,27404,3,6,215,18,25,3255,105,177,68 207 | Lexus IS 300 SportCross,Wagon,0,1,32455,28647,3,6,215,18,24,3410,105,177,68 208 | Lexus LS 430 4dr,Sedan,0,1,55750,48583,4.3,8,290,18,25,3990,115,197,72 209 | Lexus LX 470,SUV,1,0,64800,56455,4.7,8,235,13,17,5590,112,193,76 210 | Lexus RX 330,SUV,1,0,39195,34576,3.3,6,230,18,24,4065,107,186,73 211 | Lexus SC 430 convertible 2dr,Sports Car,0,1,63200,55063,4.3,8,300,18,23,3840,103,178,72 212 | Lincoln Aviator Ultimate,SUV,0,0,42915,39443,4.6,8,302,13,18,4834,114,193,76 213 | Lincoln LS V6 Luxury 4dr,Sedan,0,1,32495,29969,3,6,232,20,26,3681,115,194,73 214 | Lincoln LS V6 Premium 4dr,Sedan,0,1,36895,33929,3,6,232,20,26,3681,115,194,73 215 | Lincoln LS V8 Sport 4dr,Sedan,0,1,40095,36809,3.9,8,280,17,24,3768,115,194,73 216 | Lincoln LS V8 Ultimate 4dr,Sedan,0,1,43495,39869,3.9,8,280,17,24,3768,115,194,73 217 | Lincoln Navigator Luxury,SUV,1,0,52775,46360,5.4,8,300,13,18,5969,119,206,80 218 | Lincoln Town Car Signature 4dr,Sedan,0,1,41815,38418,4.6,8,239,17,25,4369,118,215,78 219 | Lincoln Town Car Ultimate 4dr,Sedan,0,1,44925,41217,4.6,8,239,17,25,4369,118,215,78 220 | Lincoln Town Car Ultimate L 4dr,Sedan,0,1,50470,46208,4.6,8,239,17,25,4474,124,221,78 221 | Mazda MPV ES,Minivan,0,0,28750,26600,3,6,200,18,25,3812,112,188,72 222 | Mazda MX-5 Miata convertible 2dr,Sports Car,0,1,22388,20701,1.8,4,142,23,28,2387,89,156,66 223 | Mazda MX-5 Miata LS convertible 2dr,Sports Car,0,1,25193,23285,1.8,4,142,23,28,2387,89,156,66 224 | Mazda Tribute DX 2.0,SUV,1,0,21087,19742,2,4,130,22,25,3091,103,173,72 225 | Mazda6 i 4dr,Sedan,0,0,19270,17817,2.3,4,160,24,32,3042,105,187,70 226 | Mercedes-Benz C230 Sport 2dr,Sedan,0,1,26060,24249,1.8,4,189,22,30,3250,107,178,68 227 | Mercedes-Benz C240,Wagon,0,1,33780,31466,2.6,6,168,19,25,3470,107,179,68 228 | Mercedes-Benz C240 4dr,Sedan,0,1,32280,30071,2.6,6,168,20,25,3360,107,178,68 229 | Mercedes-Benz C240 4dr,Sedan,1,0,33480,31187,2.6,6,168,19,25,3360,107,178,68 230 | Mercedes-Benz C32 AMG 4dr,Sedan,0,1,52120,48522,3.2,6,349,16,21,3540,107,178,68 231 | Mercedes-Benz C320 4dr,Sedan,0,1,37630,35046,3.2,9,215,20,26,3450,107,178,68 232 | Mercedes-Benz C320 Sport 2dr,Sedan,0,1,28370,26435,3.2,6,215,19,26,3430,107,178,68 233 | Mercedes-Benz C320 Sport 4dr,Sedan,0,1,35920,33456,3.2,6,215,19,26,3430,107,178,68 234 | Mercedes-Benz CL500 2dr,Sedan,0,1,94820,88324,5,8,302,16,24,4085,114,196,73 235 | Mercedes-Benz CL600 2dr,Sedan,0,1,128420,119600,5.5,12,493,13,19,4473,114,196,73 236 | Mercedes-Benz CLK320 coupe 2dr (convertible),Sedan,0,1,45707,41966,3.2,6,215,20,26,3770,107,183,69 237 | Mercedes-Benz CLK500 coupe 2dr (convertible),Sedan,0,1,52800,49104,5,8,302,17,22,3585,107,183,69 238 | Mercedes-Benz E320,Wagon,0,1,50670,47174,3.2,6,221,19,27,3966,112,190,71 239 | Mercedes-Benz E320 4dr,Sedan,0,1,48170,44849,3.2,6,221,19,27,3635,112,190,71 240 | Mercedes-Benz E500,Wagon,1,0,60670,56474,5,8,302,16,24,4230,112,190,71 241 | Mercedes-Benz E500 4dr,Sedan,0,1,57270,53382,5,8,302,16,20,3815,112,190,71 242 | Mercedes-Benz G500,SUV,1,0,76870,71540,5,8,292,13,14,5423,112,186,71 243 | Mercedes-Benz ML500,SUV,1,0,46470,43268,5,8,288,14,17,4874,111,183,72 244 | Mercedes-Benz S430 4dr,Sedan,0,1,74320,69168,4.3,8,275,18,26,4160,122,203,73 245 | Mercedes-Benz S500 4dr,Sedan,1,0,86970,80939,5,8,302,16,24,4390,122,203,73 246 | Mercedes-Benz SL500 convertible 2dr,Sports Car,0,1,90520,84325,5,8,302,16,23,4065,101,179,72 247 | Mercedes-Benz SL55 AMG 2dr,Sports Car,0,1,121770,113388,5.5,8,493,14,21,4235,101,179,72 248 | Mercedes-Benz SL600 convertible 2dr,Sports Car,0,1,126670,117854,5.5,12,493,13,19,4429,101,179,72 249 | Mercedes-Benz SLK230 convertible 2dr,Sports Car,0,1,40320,37548,2.3,4,192,21,29,3055,95,158,68 250 | Mercedes-Benz SLK32 AMG 2dr,Sports Car,0,1,56170,52289,3.2,6,349,17,22,3220,95,158,68 251 | Mercury Grand Marquis GS 4dr,Sedan,0,1,24695,23217,4.6,8,224,17,25,4052,115,212,78 252 | Mercury Grand Marquis LS Premium 4dr,Sedan,0,1,29595,27148,4.6,8,224,17,25,4052,115,212,78 253 | Mercury Grand Marquis LS Ultimate 4dr,Sedan,0,1,30895,28318,4.6,8,224,17,25,4052,115,212,78 254 | Mercury Marauder 4dr,Sedan,0,1,34495,31558,4.6,8,302,17,23,4195,115,212,78 255 | Mercury Monterey Luxury,Minivan,0,0,33995,30846,4.2,6,201,16,23,4340,121,202,77 256 | Mercury Mountaineer,SUV,0,0,29995,27317,4,6,210,16,21,4374,114,190,72 257 | Mercury Sable GS,Wagon,0,0,22595,20748,3,6,155,19,26,3488,109,198,73 258 | Mercury Sable GS 4dr,Sedan,0,0,21595,19848,3,6,155,20,27,3308,109,200,73 259 | Mercury Sable LS Premium 4dr,Sedan,0,0,23895,21918,3,6,201,19,26,3315,109,200,73 260 | Mini Cooper,Sedan,0,0,16999,15437,1.6,4,115,28,37,2524,97,143,67 261 | Mini Cooper S,Sedan,0,0,19999,18137,1.6,4,163,25,34,2678,97,144,67 262 | Mitsubishi Diamante LS 4dr,Sedan,0,0,29282,27250,3.5,6,205,18,25,3549,107,194,70 263 | Mitsubishi Eclipse GTS 2dr,Sports Car,0,0,25092,23456,3,6,210,21,28,3241,101,177,69 264 | Mitsubishi Eclipse Spyder GT convertible 2dr,Sports Car,0,0,26992,25218,3,6,210,21,28,3296,101,177,69 265 | Mitsubishi Endeavor XLS,SUV,1,0,30492,28330,3.8,6,215,17,21,4134,109,190,74 266 | Mitsubishi Galant GTS 4dr,Sedan,0,0,25700,23883,3.8,6,230,18,26,3649,108,191,72 267 | Mitsubishi Lancer Evolution 4dr,Sports Car,0,0,29562,27466,2,4,271,18,26,3263,103,179,70 268 | Mitsubishi Montero XLS,SUV,1,0,33112,30763,3.8,6,215,15,19,4718,110,190,75 269 | Mitsubishi Outlander LS,SUV,0,0,18892,17569,2.4,4,160,21,27,3240,103,179,69 270 | Nissan 350Z coupe 2dr,Sports Car,0,1,26910,25203,3.5,6,287,20,26,3188,104,169,72 271 | Nissan 350Z Enthusiast convertible 2dr,Sports Car,0,1,34390,31845,3.5,6,287,20,26,3428,104,169,72 272 | Nissan Altima S 4dr,Sedan,0,0,19240,18030,2.5,4,175,21,26,3039,110,192,70 273 | Nissan Altima SE 4dr,Sedan,0,0,23290,21580,3.5,6,245,21,26,3197,110,192,70 274 | Nissan Maxima SE 4dr,Sedan,0,0,27490,25182,3.5,6,265,20,28,3473,111,194,72 275 | Nissan Maxima SL 4dr,Sedan,0,0,29440,26966,3.5,6,265,20,28,3476,111,194,72 276 | Nissan Murano SL,Wagon,0,1,28739,27300,3.5,6,245,20,25,3801,111,188,74 277 | Nissan Pathfinder Armada SE,SUV,0,0,33840,30815,5.6,8,305,13,19,5013,123,207,79 278 | Nissan Pathfinder SE,SUV,0,0,27339,25972,3.5,6,240,16,21,3871,106,183,72 279 | Nissan Quest S,Minivan,0,0,24780,22958,3.5,6,240,19,26,4012,124,204,78 280 | Nissan Quest SE,Minivan,0,0,32780,30019,3.5,6,240,18,25,4175,124,204,78 281 | Nissan Sentra 1.8 4dr,Sedan,0,0,12740,12205,1.8,4,126,28,35,2513,100,178,67 282 | Nissan Sentra 1.8 S 4dr,Sedan,0,0,14740,13747,1.8,4,126,28,35,2581,100,178,67 283 | Nissan Sentra SE-R 4dr,Sedan,0,0,17640,16444,2.5,4,165,23,28,2761,100,178,67 284 | Nissan Xterra XE V6,SUV,0,0,20939,19512,3.3,6,180,17,20,3760,104,178,70 285 | Oldsmobile Alero GLS 2dr,Sedan,0,0,23675,21485,3.4,6,170,20,29,3085,107,187,70 286 | Oldsmobile Alero GX 2dr,Sedan,0,0,18825,17642,2.2,4,140,24,32,2946,107,187,70 287 | Oldsmobile Silhouette GL,Minivan,0,0,28790,26120,3.4,6,185,19,26,3948,120,201,72 288 | Pontiac Aztekt,SUV,0,0,21595,19810,3.4,6,185,19,26,3779,108,182,74 289 | Pontiac Grand Am GT 2dr,Sedan,0,0,22450,20595,3.4,6,175,20,29,3118,107,186,70 290 | Pontiac Grand Prix GT1 4dr,Sedan,0,0,22395,20545,3.8,6,200,20,30,3477,111,198,74 291 | Pontiac Grand Prix GT2 4dr,Sedan,0,0,24295,22284,3.8,6,200,20,30,3484,111,198,74 292 | Pontiac Montana,Minivan,0,0,23845,21644,3.4,6,185,19,26,3803,112,187,72 293 | Pontiac Montana EWB,Minivan,1,0,31370,28454,3.4,6,185,18,24,4431,121,201,72 294 | Pontiac Sunfire 1SA 2dr,Sedan,0,0,15495,14375,2.2,4,140,24,33,2771,104,182,68 295 | Pontiac Sunfire 1SC 2dr,Sedan,0,0,17735,16369,2.2,4,140,24,33,2771,104,182,68 296 | Pontiac Vibe,Wagon,0,1,17045,15973,1.8,4,130,29,36,2701,102,172,70 297 | Porsche 911 Carrera 4S coupe 2dr (convert),Sports Car,1,0,84165,72206,3.6,6,315,17,24,3240,93,175,72 298 | Porsche 911 Carrera convertible 2dr (coupe),Sports Car,0,1,79165,69229,3.6,6,315,18,26,3135,93,175,70 299 | Porsche 911 GT2 2dr,Sports Car,0,1,192465,173560,3.6,6,477,17,24,3131,93,175,72 300 | Porsche 911 Targa coupe 2dr,Sports Car,0,1,76765,67128,3.6,6,315,18,26,3119,93,175,70 301 | Porsche Boxster convertible 2dr,Sports Car,0,1,43365,37886,2.7,6,228,20,29,2811,95,170,70 302 | Porsche Boxster S convertible 2dr,Sports Car,0,1,52365,45766,3.2,6,258,18,26,2911,95,170,70 303 | Porsche Cayenne S,SUV,1,0,56665,49865,4.5,8,340,14,18,4950,112,188,76 304 | Saab 9-3 Aero 4dr,Sedan,0,0,33360,31562,2,4,210,20,28,3175,105,183,69 305 | Saab 9-3 Aero convertible 2dr,Sedan,0,0,43175,40883,2,4,210,21,30,3700,105,182,69 306 | Saab 9-3 Arc convertible 2dr,Sedan,0,0,40670,38520,2,4,210,21,29,3480,105,182,69 307 | Saab 9-3 Arc Sport 4dr,Sedan,0,0,30860,29269,2,4,210,20,28,3175,105,183,69 308 | Saab 9-5 Aero,Wagon,0,0,40845,38376,2.3,4,250,19,29,3620,106,190,71 309 | Saab 9-5 Aero 4dr,Sedan,0,0,39465,37721,2.3,4,250,21,29,3470,106,190,71 310 | Saab 9-5 Arc 4dr,Sedan,0,0,35105,33011,2.3,4,220,21,29,3470,106,190,71 311 | Saturn Ion1 4dr,Sedan,0,0,10995,10319,2.2,4,140,26,35,2692,103,185,67 312 | Saturn L300 2,Wagon,0,0,23560,21779,2.2,4,140,24,34,3109,107,190,69 313 | Saturn L300-2 4dr,Sedan,0,0,21410,19801,3,6,182,20,28,3197,107,190,69 314 | Saturn lon2 4dr,Sedan,0,0,14300,13393,2.2,4,140,26,35,2692,103,185,67 315 | Saturn lon2 quad coupe 2dr,Sedan,0,0,14850,13904,2.2,4,140,26,35,2751,103,185,68 316 | Saturn lon3 4dr,Sedan,0,0,15825,14811,2.2,4,140,26,35,2692,103,185,67 317 | Saturn lon3 quad coupe 2dr,Sedan,0,0,16350,15299,2.2,4,140,26,35,2751,103,185,68 318 | Saturn VUE,SUV,1,0,20585,19238,2.2,4,143,21,26,3381,107,181,72 319 | Scion xA 4dr hatch,Sedan,0,0,12965,12340,1.5,4,108,32,38,2340,93,154,67 320 | Scion xB,Wagon,0,0,14165,13480,1.5,4,108,31,35,2425,98,155,67 321 | Subaru Forester X,Wagon,1,0,21445,19646,2.5,4,165,21,28,3090,99,175,68 322 | Subaru Impreza 2.5 RS 4dr,Sedan,1,0,19945,18399,2.5,4,165,22,28,2965,99,174,69 323 | Subaru Impreza WRX 4dr,Sports Car,1,0,25045,23022,2,4,227,20,27,3085,99,174,69 324 | Subaru Impreza WRX STi 4dr,Sports Car,1,0,31545,29130,2.5,4,300,18,24,3263,100,174,69 325 | Subaru Legacy GT 4dr,Sedan,1,0,25645,23336,2.5,4,165,21,28,3395,104,184,69 326 | Subaru Legacy L 4dr,Sedan,1,0,20445,18713,2.5,4,165,21,28,3285,104,184,69 327 | Subaru Outback,Wagon,1,0,23895,21773,2.5,4,165,21,28,3430,104,187,69 328 | Subaru Outback H6 4dr,Sedan,1,0,29345,26660,3,6,212,19,26,3610,104,184,69 329 | Subaru Outback H-6 VDC 4dr,Sedan,1,0,31545,28603,3,6,212,19,26,3630,104,184,69 330 | Subaru Outback Limited Sedan 4dr,Sedan,1,0,27145,24687,2.5,4,165,20,27,3495,104,184,69 331 | Suzuki Aeno S 4dr,Sedan,0,0,12884,12719,2.3,4,155,25,31,2676,98,171,68 332 | Suzuki Aerio LX 4dr,Sedan,0,0,14500,14317,2.3,4,155,25,31,2676,98,171,68 333 | Suzuki Aerio SX,Wagon,1,0,16497,16291,2.3,4,155,24,29,2932,98,167,68 334 | Suzuki Forenza EX 4dr,Sedan,0,0,15568,15378,2,4,119,22,30,2756,102,177,68 335 | Suzuki Forenza S 4dr,Sedan,0,0,12269,12116,2,4,119,24,31,2701,102,177,68 336 | Suzuki Verona LX 4dr,Sedan,0,0,17262,17053,2.5,6,155,20,27,3380,106,188,72 337 | Suzuki Vitara LX,SUV,1,0,17163,16949,2.5,6,165,19,22,3020,98,163,67 338 | Suzuki XL-7 EX,SUV,0,0,23699,22307,2.7,6,185,18,22,3682,110,187,70 339 | Toyota 4Runner SR5 V6,SUV,0,0,27710,24801,4,6,245,18,21,4035,110,189,74 340 | Toyota Avalon XL 4dr,Sedan,0,0,26560,23693,3,6,210,21,29,3417,107,192,72 341 | Toyota Avalon XLS 4dr,Sedan,0,0,30920,27271,3,6,210,21,29,3439,107,192,72 342 | Toyota Camry LE 4dr,Sedan,0,0,19560,17558,2.4,4,157,24,33,3086,107,189,71 343 | Toyota Camry LE V6 4dr,Sedan,0,0,22775,20325,3,6,210,21,29,3296,107,189,71 344 | Toyota Camry Solara SE 2dr,Sedan,0,0,19635,17722,2.4,4,157,24,33,3175,107,193,72 345 | Toyota Camry Solara SE V6 2dr,Sedan,0,0,21965,19819,3.3,6,225,20,29,3417,107,193,72 346 | Toyota Camry Solara SLE V6 2dr,Sedan,0,0,26510,23908,3.3,6,225,20,29,3439,107,193,72 347 | Toyota Camry XLE V6 4dr,Sedan,0,0,25920,23125,3,6,210,21,29,3362,107,189,71 348 | Toyota Celica GT-S 2dr,Sports Car,0,0,22570,20363,1.8,4,180,24,33,2500,102,171,68 349 | Toyota Corolla CE 4dr,Sedan,0,0,14085,13065,1.8,4,130,32,40,2502,102,178,67 350 | Toyota Corolla LE 4dr,Sedan,0,0,15295,13889,1.8,4,130,32,40,2524,102,178,67 351 | Toyota Corolla S 4dr,Sedan,0,0,15030,13650,1.8,4,130,32,40,2524,102,178,67 352 | Toyota Echo 2dr auto,Sedan,0,0,11560,10896,1.5,4,108,33,39,2085,93,163,65 353 | Toyota Echo 2dr manual,Sedan,0,0,10760,10144,1.5,4,108,35,43,2035,93,163,65 354 | Toyota Echo 4dr,Sedan,0,0,11290,10642,1.5,4,108,35,43,2055,93,163,65 355 | Toyota Highlander V6,SUV,1,0,27930,24915,3.3,6,230,18,24,3935,107,185,72 356 | Toyota Land Cruiser,SUV,1,0,54765,47986,4.7,8,325,13,17,5390,112,193,76 357 | Toyota Matrix XR,Wagon,0,0,16695,15156,1.8,4,130,29,36,2679,102,171,70 358 | Toyota MR2 Spyder convertible 2dr,Sports Car,0,1,25130,22787,1.8,4,138,26,32,2195,97,153,67 359 | Toyota Prius 4dr (gas/electric),Sedan,0,0,20510,18926,1.5,4,110,59,51,2890,106,175,68 360 | Toyota RAV4,SUV,1,0,20290,18553,2.4,4,161,22,27,3119,98,167,68 361 | Toyota Sequoia SR5,SUV,1,0,35695,31827,4.7,8,240,14,17,5270,118,204,78 362 | Toyota Sienna CE,Minivan,0,0,23495,21198,3.3,6,230,19,27,4120,119,200,77 363 | Toyota Sienna XLE Limited,Minivan,0,0,28800,25690,3.3,6,230,19,27,4165,119,200,77 364 | Volkswagen Golf GLS 4dr,Sedan,0,0,18715,17478,2,4,115,24,31,2897,99,165,68 365 | Volkswagen GTI 1.8T 2dr hatch,Sedan,0,0,19825,18109,1.8,4,180,24,31,2934,99,168,68 366 | Volkswagen Jetta GL,Wagon,0,0,19005,17427,2,4,115,24,30,3034,99,174,68 367 | Volkswagen Jetta GLI VR6 4dr,Sedan,0,0,23785,21686,2.8,6,200,21,30,3179,99,172,68 368 | Volkswagen Jetta GLS TDI 4dr,Sedan,0,0,21055,19638,1.9,4,100,38,46,3003,99,172,68 369 | Volkswagen New Beetle GLS 1.8T 2dr,Sedan,0,0,21055,19638,1.8,4,150,24,31,2820,99,161,68 370 | Volkswagen New Beetle GLS convertible 2dr,Sedan,0,0,23215,21689,2,4,115,24,30,3082,99,161,68 371 | Volkswagen Passat GLS 1.8T,Wagon,0,0,24955,22801,1.8,4,170,22,31,3338,106,184,69 372 | Volkswagen Passat GLS 4dr,Sedan,0,0,23955,21898,1.8,4,170,22,31,3241,106,185,69 373 | Volkswagen Passat GLX V6 4MOTION 4dr,Sedan,0,0,33180,30583,2.8,6,190,19,26,3721,106,185,69 374 | Volkswagen Passat W8,Wagon,0,0,40235,36956,4,8,270,18,25,4067,106,184,69 375 | Volkswagen Passat W8 4MOTION 4dr,Sedan,0,0,39235,36052,4,8,270,18,25,3953,106,185,69 376 | Volkswagen Touareg V6,SUV,1,0,35515,32243,3.2,6,220,15,20,5086,112,187,76 377 | Volvo C70 HPT convertible 2dr,Sedan,0,0,42565,40083,2.3,5,242,20,26,3450,105,186,72 378 | Volvo C70 LPT convertible 2dr,Sedan,0,0,40565,38203,2.4,5,197,21,28,3450,105,186,72 379 | Volvo S40 4dr,Sedan,0,0,25135,23701,1.9,4,170,22,29,2767,101,178,68 380 | Volvo S60 2.5 4dr,Sedan,1,0,31745,29916,2.5,5,208,20,27,3903,107,180,71 381 | Volvo S60 R 4dr,Sedan,1,0,37560,35382,2.5,5,300,18,25,3571,107,181,71 382 | Volvo S60 T5 4dr,Sedan,0,0,34845,32902,2.3,5,247,20,28,3766,107,180,71 383 | Volvo S80 2.5T 4dr,Sedan,1,0,37885,35688,2.5,5,194,20,27,3691,110,190,72 384 | Volvo S80 2.9 4dr,Sedan,0,0,37730,35542,2.9,6,208,20,28,3576,110,190,72 385 | Volvo S80 T6 4dr,Sedan,0,0,45210,42573,2.9,6,268,19,26,3653,110,190,72 386 | Volvo V40,Wagon,0,0,26135,24641,1.9,4,170,22,29,2822,101,180,68 387 | Volvo XC70,Wagon,1,0,35145,33112,2.5,5,208,20,27,3823,109,186,73 388 | Volvo XC90 T6,SUV,1,0,41250,38851,2.9,6,268,15,20,4638,113,189,75 --------------------------------------------------------------------------------