├── bobaserver ├── bobastats │ ├── __init__.py │ ├── sensitivity.py │ ├── bootstrap.py │ └── sampling.py ├── __init__.py ├── util.py ├── routes.py ├── run_server.py └── common.py ├── client ├── assets │ ├── null.png │ ├── blues.png │ ├── favicon.ico │ ├── simple.png │ ├── idl-logo.png │ ├── stacking.png │ ├── blues-thick.png │ └── style.css ├── .babelrc ├── webpack.prod.js ├── .eslintrc.js ├── webpack.dev.js ├── index.html ├── src │ ├── App.vue │ ├── components │ │ ├── LegendView.vue │ │ ├── HelpButton.vue │ │ ├── LoadingSpinner.vue │ │ ├── DetailTip.vue │ │ ├── TitleMenu.vue │ │ ├── monitor │ │ │ ├── DecisionProgressView.vue │ │ │ ├── OutcomeProgressView.vue │ │ │ ├── SnapshotButton.vue │ │ │ ├── MonitorAdgView.vue │ │ │ ├── MonitorModelFitView.vue │ │ │ ├── ErrorMessageView.vue │ │ │ └── ProgressCard.vue │ │ ├── InferenceConfig.vue │ │ ├── AdgView.vue │ │ ├── FilterOptionView.vue │ │ ├── OptionRatioView.vue │ │ └── SmallMultiplesView.vue │ ├── main.js │ ├── controllers │ │ ├── vis │ │ │ ├── raw_scale.js │ │ │ ├── dot_plot_scale.js │ │ │ ├── graph_scale.js │ │ │ ├── base_scale.js │ │ │ ├── brush.js │ │ │ └── brushX.js │ │ ├── config.js │ │ ├── constants.js │ │ ├── util.js │ │ ├── inference │ │ │ ├── infer_simple_plot.js │ │ │ ├── infer_null_plot.js │ │ │ └── infer_stacking_plot.js │ │ ├── raw_plot.js │ │ └── monitor │ │ │ └── outcome_progress_plot.js │ ├── archetype_vis │ │ ├── ChartsPage.vue │ │ ├── DensityPage.vue │ │ ├── HistPage.vue │ │ ├── VolcanoPage.vue │ │ ├── ContourPage.vue │ │ ├── ParallelLinePlot.vue │ │ ├── ForestPlotPage.vue │ │ ├── GridPage.vue │ │ ├── PCurvePage.vue │ │ ├── SpecCurvePage.vue │ │ ├── p_curve_plot.js │ │ ├── density_plot.js │ │ ├── parallel_line_plot.js │ │ ├── histogram.js │ │ ├── FacetPage.vue │ │ ├── forest_plot.js │ │ └── spec_curve_plot.js │ ├── router │ │ └── index.js │ └── pages │ │ ├── MonitorPage.vue │ │ └── MainPage.vue ├── webpack.common.js └── package.json ├── MANIFEST.in ├── deploy_package.sh ├── setup.cfg ├── .gitignore ├── doc ├── CLI.rst ├── visualizer_config.md └── format.md ├── deploy_demo.sh ├── HISTORY.rst ├── LICENSE ├── setup.py ├── README.rst └── example └── mortgage └── overview.json /bobaserver/bobastats/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /client/assets/null.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwdata/boba-visualizer/HEAD/client/assets/null.png -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include client/dist/* 2 | include README.rst 3 | include LICENSE 4 | include HISTORY.rst 5 | -------------------------------------------------------------------------------- /client/assets/blues.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwdata/boba-visualizer/HEAD/client/assets/blues.png -------------------------------------------------------------------------------- /client/assets/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwdata/boba-visualizer/HEAD/client/assets/favicon.ico -------------------------------------------------------------------------------- /client/assets/simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwdata/boba-visualizer/HEAD/client/assets/simple.png -------------------------------------------------------------------------------- /client/assets/idl-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwdata/boba-visualizer/HEAD/client/assets/idl-logo.png -------------------------------------------------------------------------------- /client/assets/stacking.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwdata/boba-visualizer/HEAD/client/assets/stacking.png -------------------------------------------------------------------------------- /client/assets/blues-thick.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwdata/boba-visualizer/HEAD/client/assets/blues-thick.png -------------------------------------------------------------------------------- /deploy_package.sh: -------------------------------------------------------------------------------- 1 | cd client 2 | npm run build 3 | 4 | cd .. 5 | rm -rf boba_visualizer.egg-info/ 6 | rm -rf build/ 7 | rm -rf dist/ 8 | python3 setup.py sdist bdist_wheel 9 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 1.1.1 3 | commit = False 4 | tag = False 5 | 6 | [bumpversion:file:setup.py] 7 | search = version='{current_version}' 8 | replace = version='{new_version}' 9 | -------------------------------------------------------------------------------- /client/.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | ["@babel/env", { 4 | "modules": false, 5 | "targets": { 6 | "browsers": ["> 1%", "last 2 versions", "not ie <= 8"] 7 | } 8 | }] 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /client/webpack.prod.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | const merge = require('webpack-merge') 3 | const common = require('./webpack.common.js') 4 | 5 | module.exports = merge(common, { 6 | mode: 'production', 7 | devtool: 'source-map' 8 | }) 9 | -------------------------------------------------------------------------------- /client/.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | parserOptions: { 3 | parser: 'babel-eslint' 4 | }, 5 | extends: [ 6 | 'plugin:vue/recommended', 7 | 'standard' 8 | ], 9 | plugins: [ 10 | 'vue' 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /client/webpack.dev.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | const merge = require('webpack-merge') 3 | const common = require('./webpack.common.js') 4 | 5 | module.exports = merge(common, { 6 | mode: "development", 7 | watch: true, 8 | devtool: 'cheap-module-eval-source-map' 9 | }) 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # javascript 2 | node_modules/ 3 | 4 | # IDE 5 | .idea/ 6 | 7 | # virtual env 8 | env/ 9 | 10 | # packaging 11 | *.egg-info/ 12 | __pycache__/ 13 | dist/ 14 | build/lib/ 15 | 16 | # other 17 | bobaserver/demo/ 18 | 19 | # derived data 20 | example/mortgage/sensitivity.json 21 | -------------------------------------------------------------------------------- /client/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Boba 7 | 8 | 9 |
10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /client/src/App.vue: -------------------------------------------------------------------------------- 1 | 10 | 11 | 20 | -------------------------------------------------------------------------------- /client/src/components/LegendView.vue: -------------------------------------------------------------------------------- 1 | 9 | 10 | 15 | 16 | -------------------------------------------------------------------------------- /bobaserver/__init__.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from flask_socketio import SocketIO 3 | from apscheduler.schedulers.background import BackgroundScheduler 4 | 5 | P_DIST = './dist/' 6 | 7 | app = Flask(__name__, static_url_path='', static_folder=P_DIST) 8 | socketio = SocketIO(app) 9 | scheduler = BackgroundScheduler() 10 | 11 | from bobaserver import routes 12 | from bobaserver import monitor 13 | -------------------------------------------------------------------------------- /doc/CLI.rst: -------------------------------------------------------------------------------- 1 | === 2 | CLI 3 | === 4 | 5 | You might invoke the server via:: 6 | boba-server [options] 7 | 8 | Here are the available options: 9 | 10 | ``--in, -i`` 11 | **default: .** (optional) 12 | 13 | The path to your multiverse outcomes and metadata 14 | 15 | ``--host`` 16 | **default: 0.0.0.0** (optional) 17 | 18 | The interface to bind the server to 19 | 20 | ``--port`` 21 | **default: 8080** (optional) 22 | 23 | The port to bind the server to 24 | 25 | ``--version`` 26 | Show version and exit. 27 | 28 | ``--help`` 29 | Show help and exit. 30 | -------------------------------------------------------------------------------- /deploy_demo.sh: -------------------------------------------------------------------------------- 1 | rm -rf dist 2 | 3 | # create dist 4 | python setup.py sdist bdist_wheel 5 | rm -rf build 6 | cd dist 7 | mkdir demo 8 | tar -xf boba-*.tar.gz -C demo --strip-components 1 9 | 10 | # copy data 11 | cp -R ~/code/multiverse-spec/example/hurricane/prototype/server ./ 12 | mv server data 13 | mv data demo/ 14 | 15 | # create init.sh 16 | text="virtualenv -p python3 env 17 | source env/bin/activate 18 | pip install -e . 19 | boba-server -i data/" 20 | echo "$text" > demo/init.sh 21 | 22 | echo "Done!" 23 | echo "Demo is in dist/demo/" 24 | echo "Run 'sh init.sh' in the demo folder to start" 25 | -------------------------------------------------------------------------------- /client/src/main.js: -------------------------------------------------------------------------------- 1 | import Vue from 'vue' 2 | import App from './App.vue' 3 | import router from './router' 4 | import BootstrapVue from 'bootstrap-vue' 5 | import 'bootstrap/dist/css/bootstrap.css' 6 | import 'bootstrap-vue/dist/bootstrap-vue.css' 7 | import '@fortawesome/fontawesome-free/js/all.min.js' 8 | import '../assets/style.css' 9 | import {log_debug} from './controllers/config' 10 | 11 | Vue.use(BootstrapVue) 12 | 13 | /* eslint-disable-next-line no-new */ 14 | new Vue({ 15 | el: '#app', 16 | router, 17 | render: h => h(App), 18 | mounted: function () { 19 | log_debug('main.js', 'mounted()') 20 | } 21 | }) 22 | -------------------------------------------------------------------------------- /client/src/components/HelpButton.vue: -------------------------------------------------------------------------------- 1 | 15 | 16 | 21 | 22 | -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | History 3 | ======= 4 | 5 | 1.1.1 (2021-04-25) 6 | ================== 7 | 8 | * Fix bug. 9 | 10 | 1.1.0 (2021-04-25) 11 | ================== 12 | 13 | * Introduce the Boba monitor. 14 | * Improve the sensitivity test. 15 | * Add a gallery of static charts, in /#/charts 16 | 17 | 1.0.3 (2020-10-26) 18 | ================== 19 | 20 | * Allow changing server host and port. 21 | 22 | 1.0.2 (2020-10-07) 23 | ================== 24 | 25 | * Fix various bugs. 26 | * Allow switching sensitivity method in the config. 27 | 28 | 1.0.1 (2020-09-31) 29 | ================== 30 | 31 | * Fix bug. 32 | 33 | 1.0.0 (2020-07-31) 34 | ================== 35 | 36 | * First release on PyPI. 37 | -------------------------------------------------------------------------------- /client/src/controllers/vis/raw_scale.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import BaseScale from './base_scale' 3 | 4 | class RawScale extends BaseScale { 5 | constructor (range, params) { 6 | super(params) 7 | 8 | // scales 9 | this.x = null 10 | this.y = null 11 | 12 | // initialize 13 | this.init(range) 14 | } 15 | 16 | init (range) { 17 | let h = this.height() 18 | let w = this.width() 19 | 20 | // y scale maps category to height 21 | this.y = d3.scaleBand().rangeRound([0, h]).padding(0.1) 22 | .domain(['actual', 'pred']) 23 | 24 | // x scale 25 | this.x = d3.scaleLinear() 26 | .range([0, w]).nice() 27 | 28 | this.x.domain(range) 29 | } 30 | } 31 | 32 | export default RawScale 33 | -------------------------------------------------------------------------------- /client/src/controllers/vis/dot_plot_scale.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import BaseScale from './base_scale' 3 | 4 | class DotPlotScale extends BaseScale { 5 | constructor (range, params) { 6 | super(params) 7 | 8 | // scales 9 | this.x = null 10 | this.y = null 11 | 12 | // initialize 13 | this.init(range) 14 | } 15 | 16 | init (range) { 17 | let h = this.height() 18 | let w = this.width() 19 | 20 | // make an "identity" y scale 21 | this.y = d3.scaleLinear().range([0, h]).domain([0, h]) 22 | 23 | // x scale 24 | this.x = d3.scaleLinear() 25 | .range([0, w]).nice() 26 | 27 | let xMax = range[1] 28 | let xMin = range[0] 29 | 30 | this.x.domain([xMin, xMax]) 31 | } 32 | } 33 | 34 | export default DotPlotScale 35 | -------------------------------------------------------------------------------- /client/src/controllers/config.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | import Store from './store' 4 | import Vue from 'vue' 5 | import Util from './util' 6 | 7 | const DEBUG = process.env.NODE_ENV === 'development' 8 | 9 | // print only if we are in dev build 10 | function log_debug (...args) { 11 | if (DEBUG) { 12 | console.log(...args) 13 | } 14 | } 15 | 16 | // shared utilities 17 | let util = new Util() 18 | 19 | // shared store 20 | let store = new Store() 21 | 22 | // global event bus 23 | let bus = new Vue() 24 | 25 | // color 26 | let tableau10 = '4c78a8f58518e4575672b7b254a24beeca3bb279a2ff9da69d755dbab0ac' 27 | 28 | // default config 29 | const default_config = { 30 | 'schema': [], 31 | 'dataset': 'multiverse', 32 | 'x_axis': 'Effect Size', 33 | 'x_axis_fit': '' 34 | } 35 | 36 | export { 37 | log_debug, 38 | store, 39 | bus, 40 | util, 41 | tableau10, 42 | default_config 43 | } 44 | -------------------------------------------------------------------------------- /client/src/components/LoadingSpinner.vue: -------------------------------------------------------------------------------- 1 | 12 | 13 | 21 | 22 | -------------------------------------------------------------------------------- /client/src/controllers/vis/graph_scale.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import _ from 'lodash' 3 | import BaseScale from './base_scale' 4 | 5 | class GraphScale extends BaseScale { 6 | constructor (nodes, params) { 7 | super(params) 8 | 9 | // own params 10 | this.node_radius = params.node_radius || 10 11 | 12 | // scales 13 | this.x = null 14 | this.y = null 15 | 16 | // initialize 17 | this.init(nodes) 18 | } 19 | 20 | init (nodes) { 21 | const LEVEL_H = 50 22 | let h = this.height() 23 | let w = this.width() 24 | 25 | // y scale 26 | let levels = _.uniq(_.map(nodes, this.y_field)) 27 | this.y = d3.scalePoint() 28 | .rangeRound([0, Math.min(h, levels.length * LEVEL_H)]) 29 | .domain(levels) 30 | .padding(0.5) 31 | 32 | // make an "identity" x scale 33 | // we will compute x manually and store in _x 34 | this.x = d3.scaleLinear().range([0, w]).domain([0, w]) 35 | } 36 | } 37 | 38 | export default GraphScale 39 | -------------------------------------------------------------------------------- /client/src/controllers/vis/base_scale.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import _ from 'lodash' 3 | 4 | class BaseScale { 5 | constructor (params) { 6 | // public params 7 | this.outerWidth = params.outerWidth 8 | this.outerHeight = params.outerHeight 9 | this.margin = params.margin 10 | 11 | this.x_field = params.x_field || 'x' 12 | this.y_field = params.y_field || 'y' 13 | } 14 | 15 | /** 16 | * Get the x field 17 | * @param d 18 | */ 19 | getRawX (d) { 20 | if (_.has(d, '_x')) return d._x 21 | return d[this.x_field] 22 | } 23 | 24 | /** 25 | * Get the y field 26 | * @param d 27 | */ 28 | getRawY (d) { 29 | if (_.has(d, '_y')) return d._y 30 | return d[this.y_field] 31 | } 32 | 33 | /** 34 | * A helper function to get the canvas width (outer minus margin) 35 | */ 36 | width () { 37 | return this.outerWidth - this.margin.left - this.margin.right 38 | } 39 | 40 | /** 41 | * A helper function to get the canvas height (outer minus margin) 42 | */ 43 | height () { 44 | return this.outerHeight - this.margin.top - this.margin.bottom 45 | } 46 | } 47 | 48 | export default BaseScale 49 | -------------------------------------------------------------------------------- /client/webpack.common.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | const { VueLoaderPlugin } = require('vue-loader') 3 | const path = require('path') 4 | 5 | module.exports = { 6 | entry: [ 7 | './src/main.js' 8 | ], 9 | output: { 10 | filename: 'bundle.js', 11 | path: path.resolve(__dirname, '../bobaserver/dist') 12 | }, 13 | module: { 14 | rules: [ 15 | { 16 | test: /\.vue$/, 17 | use: 'vue-loader' 18 | }, 19 | { 20 | test: /\.css$/, 21 | use: [ 22 | 'vue-style-loader', 23 | 'css-loader' 24 | ] 25 | }, 26 | { 27 | test: /\.styl(us)?$/, 28 | use: [ 29 | 'vue-style-loader', 30 | 'css-loader', 31 | 'stylus-loader' 32 | ] 33 | }, 34 | { 35 | test: /\.js$/, 36 | use: 'babel-loader' 37 | }, 38 | { 39 | test: /\.(png|svg|jpg|gif)$/, 40 | use: [ 41 | 'file-loader' 42 | ] 43 | }, 44 | { 45 | test: /\.(woff|woff2|eot|ttf|otf)$/, 46 | use: [ 47 | 'file-loader' 48 | ] 49 | } 50 | ] 51 | }, 52 | plugins: [ 53 | new VueLoaderPlugin() 54 | ] 55 | } -------------------------------------------------------------------------------- /client/src/controllers/constants.js: -------------------------------------------------------------------------------- 1 | const UNC_TYPE = { 2 | AGG: 'Aggregated', 3 | PDF: 'PDFs', 4 | CDF: 'CDFs' 5 | } 6 | 7 | const COLOR_TYPE = { 8 | P: 'P-value', 9 | SIGN: 'Sign', 10 | FIT: 'Model Fit', 11 | CUSTOM: 'Custom' 12 | } 13 | 14 | const VIEW_TYPE = { 15 | FIT: 'Model Quality', 16 | ERROR: 'Error Messages' 17 | } 18 | 19 | const SCHEMA = { 20 | POINT: 'point_estimate', 21 | FIT: 'fit', 22 | P: 'p_value', 23 | STDERR: 'standard_error', 24 | ANNOTATE: 'annotation', 25 | UNC: 'uncertainty', 26 | NUL: 'null_distribution', 27 | WEIGHT: 'stacking_weight', 28 | RAW: 'prediction' 29 | } 30 | 31 | const RUN_STATUS = { 32 | EMPTY: 'New', 33 | RUNNING: 'Running', 34 | STOPPING: 'Stopping', 35 | STOPPED: 'Stopped', 36 | DONE: 'Done' 37 | } 38 | 39 | const DTYPE = { 40 | POINT: 'float', 41 | FIT: 'float', 42 | P: 'float', 43 | STDERR: 'float', 44 | ANNOTATE: 'string', 45 | UNC: 'float', 46 | NUL: 'float', 47 | WEIGHT: 'float' 48 | } 49 | 50 | const SENSITIVITY = { 51 | F: 'f', 52 | KS: 'ks', 53 | AD: 'ad' 54 | } 55 | 56 | const sign = 0 57 | 58 | export {UNC_TYPE, COLOR_TYPE, VIEW_TYPE, SCHEMA, DTYPE, RUN_STATUS, 59 | SENSITIVITY, sign} 60 | -------------------------------------------------------------------------------- /client/src/archetype_vis/ChartsPage.vue: -------------------------------------------------------------------------------- 1 | 20 | 21 | 26 | 27 | 30 | -------------------------------------------------------------------------------- /client/src/archetype_vis/DensityPage.vue: -------------------------------------------------------------------------------- 1 | 16 | 17 | 51 | -------------------------------------------------------------------------------- /client/src/components/DetailTip.vue: -------------------------------------------------------------------------------- 1 | 9 | 10 | 42 | 43 | -------------------------------------------------------------------------------- /client/src/components/TitleMenu.vue: -------------------------------------------------------------------------------- 1 | 22 | 23 | 52 | 53 | -------------------------------------------------------------------------------- /client/assets/style.css: -------------------------------------------------------------------------------- 1 | .text-small { 2 | font-size: 0.8rem; 3 | } 4 | 5 | .text-large { 6 | font-size: 1.2rem; 7 | } 8 | 9 | .font-semi-bold { 10 | font-weight: 500; 11 | } 12 | 13 | .mouse-pointer { 14 | cursor: pointer 15 | } 16 | 17 | .bb-border-top { 18 | border-top: 1px solid rgba(0,0,0,0.1); 19 | } 20 | 21 | .bb-border-bottom { 22 | border-bottom: 1px solid rgba(0,0,0,0.1); 23 | } 24 | 25 | .bb-bar-title { 26 | font-size: 0.8rem; 27 | font-weight: 500; 28 | text-transform: capitalize; 29 | } 30 | 31 | .mn-card { 32 | background-color: #fff; 33 | border: 1px solid rgba(0,0,0,0.1); 34 | border-radius: 2px; 35 | padding: 1rem; 36 | } 37 | 38 | .mn-card-title { 39 | font-size: 0.8em; 40 | font-weight: 500; 41 | color: #6c757d; 42 | } 43 | 44 | .mn-card-title-lg { 45 | font-size: 0.9em; 46 | font-weight: 500; 47 | color: #6c757d; 48 | } 49 | 50 | .mn-card-body { 51 | font-size: 0.8em; 52 | color: #6c757d; 53 | } 54 | 55 | .mn-drop-container .mn-drop-menu { 56 | font-size: 12px; 57 | line-height: 1.5; 58 | min-width: 100px; 59 | padding: 0.2rem 0; 60 | } 61 | 62 | .mn-drop-container .btn { 63 | font-size: 12px; 64 | } 65 | 66 | .mn-dot.muted { 67 | fill: #9ca5ad; 68 | } 69 | 70 | .axis.muted .domain{ 71 | stroke: #6c757d 72 | } 73 | .axis.muted .tick text { 74 | fill: #6c757d 75 | } 76 | text.axis-label.muted { 77 | fill: #6c757d 78 | } 79 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019, University of Washington Interactive Data Lab. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /client/src/archetype_vis/HistPage.vue: -------------------------------------------------------------------------------- 1 | 17 | 18 | 56 | 57 | 60 | -------------------------------------------------------------------------------- /client/src/archetype_vis/VolcanoPage.vue: -------------------------------------------------------------------------------- 1 | 20 | 21 | 57 | 58 | 61 | -------------------------------------------------------------------------------- /client/src/archetype_vis/ContourPage.vue: -------------------------------------------------------------------------------- 1 | 20 | 21 | 58 | 59 | 62 | -------------------------------------------------------------------------------- /client/src/router/index.js: -------------------------------------------------------------------------------- 1 | import Vue from 'vue' 2 | import Router from 'vue-router' 3 | 4 | import MainPage from '../pages/MainPage.vue' 5 | import MonitorPage from '../pages/MonitorPage.vue' 6 | import SpecCurvePage from '../archetype_vis/SpecCurvePage.vue' 7 | import GridPage from '../archetype_vis/GridPage.vue' 8 | import ChartsPage from '../archetype_vis/ChartsPage.vue' 9 | import HistPage from '../archetype_vis/HistPage.vue' 10 | import ForestPage from '../archetype_vis/ForestPlotPage.vue' 11 | import VolcanoPage from '../archetype_vis/VolcanoPage.vue' 12 | import ContourPage from '../archetype_vis/ContourPage.vue' 13 | import FacetPage from '../archetype_vis/FacetPage.vue' 14 | import DensityPage from '../archetype_vis/DensityPage.vue' 15 | import PCurvePage from '../archetype_vis/PCurvePage.vue' 16 | import ParallelLinePlot from '../archetype_vis/ParallelLinePlot.vue' 17 | 18 | Vue.use(Router) 19 | 20 | export default new Router({ 21 | routes: [ 22 | { 23 | path: '/', 24 | name: 'home', 25 | component: MainPage 26 | }, 27 | { 28 | path: '/monitor', 29 | name: 'monitor', 30 | component: MonitorPage 31 | }, 32 | { 33 | path: '/charts', 34 | name: 'charts', 35 | component: ChartsPage 36 | }, 37 | {path: '/grid', component: GridPage}, 38 | {path: '/spec-curve', component: SpecCurvePage}, 39 | {path: '/hist', component: HistPage}, 40 | {path: '/forest',component: ForestPage}, 41 | {path: '/volcano', component: VolcanoPage}, 42 | {path: '/contour', component: ContourPage}, 43 | {path: '/facet', component: FacetPage}, 44 | {path: '/cdf', component: DensityPage}, 45 | {path: '/p-curve', component: PCurvePage}, 46 | {path: '/parallel', component: ParallelLinePlot} 47 | ] 48 | }) 49 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from setuptools import setup, find_packages 5 | 6 | with open("README.rst", "r") as fh: 7 | readme = fh.read() 8 | 9 | with open('HISTORY.rst') as history_file: 10 | history = history_file.read() 11 | 12 | requirements = ['flask>=1.1.1', 'Click>=7.0', 'pandas>=1.0.1', 'scipy>=1.4.1', 13 | 'boba>=1.1.1', 'flask-socketio>=5.0.0', 'apscheduler>=3.7.0', 14 | 'scikit-learn>=0.24.1'] 15 | 16 | setup_requirements = [] 17 | 18 | test_requirements = [] 19 | 20 | setup( 21 | name='boba-visualizer', 22 | url='https://github.com/yyyliu/boba-visualizer', 23 | version='1.1.1', 24 | author="Yang Liu", 25 | author_email='yliu0@uw.edu', 26 | license="BSD license", 27 | description="Visualize multiverse outcomes", 28 | keywords='multiverse analysis', 29 | classifiers=[ 30 | 'Development Status :: 3 - Alpha', 31 | 'Intended Audience :: Science/Research', 32 | 'License :: OSI Approved :: BSD License', 33 | 'Natural Language :: English', 34 | 'Programming Language :: Python :: 3', 35 | 'Programming Language :: Python :: 3.6', 36 | 'Programming Language :: Python :: 3.7', 37 | ], 38 | entry_points={ 39 | 'console_scripts': [ 40 | 'boba-server = bobaserver.run_server:main', 41 | ], 42 | }, 43 | install_requires=requirements, 44 | long_description=readme + '\n\n' + history, 45 | packages=find_packages(include=['bobaserver', 'bobaserver.*']), 46 | setup_requires=setup_requirements, 47 | test_suite='tests', 48 | tests_require=test_requirements, 49 | zip_safe=False, 50 | package_dir={'bobaserver': 'bobaserver/'}, 51 | package_data={'bobaserver': ['./dist/*']}, 52 | include_package_data=True 53 | ) 54 | -------------------------------------------------------------------------------- /client/src/archetype_vis/ParallelLinePlot.vue: -------------------------------------------------------------------------------- 1 | 21 | 22 | 66 | -------------------------------------------------------------------------------- /client/src/archetype_vis/ForestPlotPage.vue: -------------------------------------------------------------------------------- 1 | 21 | 22 | 65 | 66 | -------------------------------------------------------------------------------- /client/src/archetype_vis/GridPage.vue: -------------------------------------------------------------------------------- 1 | 17 | 18 | 57 | 58 | 78 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | boba-visualizer 3 | =============== 4 | 5 | The Boba Visualizer is a visual analysis interface for exploring multiverse outcomes. 6 | It helps users explore how analytic decisions impact statistical estimates, inferential uncertainty, and model fit. 7 | Watch the visualizer in action in this `video`_. 8 | 9 | .. image:: https://yangliu.life/build/images/boba-teaser.png 10 | :alt: Teaser image 11 | 12 | .. _video: https://youtu.be/NtHrUm4_kyw 13 | 14 | Installation 15 | ============ 16 | 17 | You might download and install the latest version of this software from the 18 | Python package index (PyPI):: 19 | 20 | pip install --upgrade boba-visualizer 21 | 22 | 23 | Usage 24 | ===== 25 | 26 | To start the visualizer, use the following command:: 27 | 28 | boba-server -i /path/to/file 29 | 30 | You will need to supply your own file path, which contains your multiverse outcomes and 31 | accompanying meta data. Learn more about the appropriate file format here_. This repository 32 | also include an `example folder`_ with the outcomes from the `mortgage multiverse`_. You 33 | could explore the example by cloning this repo and:: 34 | 35 | boba-server -i ./example/mortgage 36 | 37 | After running the above command in your console, open your browser and navigate to 38 | http://127.0.0.1:8080/ to start the user interface. 39 | 40 | You might also use a `configuration file`_ to control various aspects of the visualizer, 41 | and use `CLI options`_ to change the behavior of the server. 42 | 43 | .. _Boba DSL: https://github.com/uwdata/boba 44 | .. _here: https://github.com/uwdata/boba-visualizer/tree/master/doc/format.md 45 | .. _configuration file: https://github.com/uwdata/boba-visualizer/tree/master/doc/visualizer_config.md 46 | .. _CLI options: https://github.com/uwdata/boba-visualizer/blob/master/doc/CLI.rst 47 | .. _example folder: https://github.com/uwdata/boba-visualizer/tree/master/example/mortgage 48 | .. _mortgage multiverse: https://github.com/uwdata/boba/tree/master/example/mortgage 49 | -------------------------------------------------------------------------------- /client/src/archetype_vis/PCurvePage.vue: -------------------------------------------------------------------------------- 1 | 18 | 19 | 65 | -------------------------------------------------------------------------------- /example/mortgage/overview.json: -------------------------------------------------------------------------------- 1 | { 2 | "decisions": [ 3 | {"var": "black", "options": ["+ black", ""]}, 4 | {"var": "housing_expense_ratio", "options": ["+ housing_expense_ratio", ""]}, 5 | {"var": "self_employed", "options": ["+ self_employed", ""]}, 6 | {"var": "married", "options": ["+ married", ""]}, 7 | {"var": "bad_history", "options": ["+ bad_history", ""]}, 8 | {"var": "PI_ratio", "options": ["+ PI_ratio", ""]}, 9 | {"var": "loan_to_value", "options": ["+ loan_to_value", ""]}, 10 | {"var": "denied_PMI", "options": ["+ denied_PMI", ""]} 11 | ], 12 | "graph": { 13 | "nodes": [ 14 | {"id": 0, "name": "black"}, 15 | {"id": 1, "name": "housing_expense_ratio"}, 16 | {"id": 2, "name": "self_employed"}, 17 | {"id": 3, "name": "married"}, 18 | {"id": 4, "name": "bad_history"}, 19 | {"id": 5, "name": "PI_ratio"}, 20 | {"id": 6, "name": "loan_to_value"}, 21 | {"id": 7, "name": "denied_PMI"} 22 | ], 23 | "edges": [ 24 | {"source": 0, "target": 1, "type": "order"}, 25 | {"source": 1, "target": 2, "type": "order"}, 26 | {"source": 2, "target": 3, "type": "order"}, 27 | {"source": 3, "target": 4, "type": "order"}, 28 | {"source": 4, "target": 5, "type": "order"}, 29 | {"source": 5, "target": 6, "type": "order"}, 30 | {"source": 6, "target": 7, "type": "order"} 31 | ] 32 | }, 33 | "visualizer": { 34 | "files": [ 35 | {"id": "est", "path": "estimates.csv"}, 36 | {"id": "unc", "path": "uncertainty.csv"}, 37 | {"id": "fit", "path": "raw/disagg_pred_{}.csv", "multi": true} 38 | ], 39 | "schema": { 40 | "point_estimate": {"file": "est", "field": "estimate"}, 41 | "p_value": {"file": "est", "field": "p.value"}, 42 | "fit": {"file": "est", "field": "NRMSE"}, 43 | "uncertainty": {"file": "unc", "field": "estimate"}, 44 | "prediction": {"file": "fit"} 45 | }, 46 | "labels": { 47 | "dataset": "mortgage", 48 | "x_axis": "Coefficient on female", 49 | "x_axis_fit": "Approved", 50 | "x_range": [-3, 8] 51 | }, 52 | "sensitivity": "ad" 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /client/src/components/monitor/DecisionProgressView.vue: -------------------------------------------------------------------------------- 1 | 16 | 17 | 68 | 69 | 72 | -------------------------------------------------------------------------------- /client/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "boba-visualizer", 3 | "version": "0.1.0", 4 | "license": "BSD-3-Clause", 5 | "description": "Visual analysis of multiverse outcomes", 6 | "author": { 7 | "name": "Yang Liu", 8 | "url": "http://yangliu.life" 9 | }, 10 | "repository": { 11 | "type": "git", 12 | "url": "https://github.com/yyyliu/boba-visualizer" 13 | }, 14 | "scripts": { 15 | "build": "npm run prebuild && npm run webpack-prod", 16 | "dev": "npm run prebuild && npm run webpack-dev", 17 | "prebuild": "rm -rf ../bobaserver/dist && mkdir ../bobaserver/dist && cp -R assets/ ../bobaserver/dist && cp index.html ../bobaserver/dist", 18 | "lint": "eslint --ext .js,.vue src", 19 | "webpack-dev": "webpack --config webpack.dev.js", 20 | "webpack-prod": "webpack --progress --hide-modules --config webpack.prod.js", 21 | "start": "cd ../bobaserver/dist && python -m SimpleHTTPServer 8080" 22 | }, 23 | "dependencies": { 24 | "@fortawesome/fontawesome-free": "^5.11.2", 25 | "axios": "^0.19.0", 26 | "bootstrap": "^4.4.1", 27 | "bootstrap-vue": "^2.1.0", 28 | "d3": "^5.14.2", 29 | "d3-contour": "^2.0.0", 30 | "d3-hexbin": "^0.2.2", 31 | "dagre": "^0.8.5", 32 | "lodash": "^4.17.15", 33 | "socket.io-client": "^3.1.2", 34 | "vue": "^2.6.10", 35 | "vue-loading-template": "^1.3.2", 36 | "vue-router": "^3.1.3", 37 | "vue-slider-component": "^3.1.1", 38 | "vuescroll": "^4.17.3" 39 | }, 40 | "devDependencies": { 41 | "@babel/core": "^7.7.4", 42 | "@babel/preset-env": "^7.7.4", 43 | "babel-eslint": "^10.0.3", 44 | "babel-loader": "^8.0.6", 45 | "css-loader": "^3.2.0", 46 | "eslint": "^6.7.2", 47 | "eslint-config-standard": "^14.1.0", 48 | "eslint-loader": "^3.0.2", 49 | "eslint-plugin-import": "^2.18.2", 50 | "eslint-plugin-node": "^10.0.0", 51 | "eslint-plugin-promise": "^4.2.1", 52 | "eslint-plugin-standard": "^4.0.1", 53 | "eslint-plugin-vue": "^6.0.1", 54 | "stylus": "^0.54.7", 55 | "stylus-loader": "^3.0.2", 56 | "vue-loader": "^15.7.2", 57 | "vue-style-loader": "^4.1.2", 58 | "vue-template-compiler": "^2.6.10", 59 | "webpack": "^4.41.2", 60 | "webpack-cli": "^3.3.10", 61 | "webpack-merge": "^4.2.2" 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /client/src/components/monitor/OutcomeProgressView.vue: -------------------------------------------------------------------------------- 1 | 7 | 8 | 70 | 71 | 82 | -------------------------------------------------------------------------------- /client/src/archetype_vis/SpecCurvePage.vue: -------------------------------------------------------------------------------- 1 | 6 | 7 | 75 | 76 | -------------------------------------------------------------------------------- /client/src/components/monitor/SnapshotButton.vue: -------------------------------------------------------------------------------- 1 | 11 | 12 | 83 | 84 | 97 | -------------------------------------------------------------------------------- /client/src/components/monitor/MonitorAdgView.vue: -------------------------------------------------------------------------------- 1 | 13 | 14 | 92 | 93 | 96 | -------------------------------------------------------------------------------- /client/src/controllers/vis/brush.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import {bus} from '../config' 3 | import _ from 'lodash' 4 | 5 | class Brush { 6 | constructor (data, scale, selector) { 7 | this.selector = selector 8 | this.brush = this.init(data, scale) 9 | } 10 | 11 | init (data, scale) { 12 | let selector = this.selector 13 | 14 | function brushstart () { 15 | d3.selectAll(selector).classed('brushed', false) 16 | bus.$emit('brush', []) 17 | } 18 | 19 | function brushing () { 20 | // empty selection 21 | if (!d3.event.selection) return 22 | 23 | // x0, y0, x1, y1 24 | let sel = _.flatten(d3.event.selection) 25 | let bounds = _.map(sel, (s, idx) => idx % 2 ? scale.y.invert(s) : scale.x.invert(s)) 26 | bounds = label_bounds(bounds) 27 | 28 | // change color of selected points 29 | d3.selectAll(selector) 30 | .classed('brushed', (p) => { 31 | let inside = scale.getRawX(p) >= bounds.x0 && 32 | scale.getRawX(p) <= bounds.x1 && 33 | scale.getRawY(p) >= bounds.y0 && 34 | scale.getRawY(p) <=bounds.y1 35 | return inside 36 | }) 37 | } 38 | 39 | function brushended () { 40 | // empty selection 41 | if (!d3.event.selection) return 42 | 43 | // x0, y0, x1, y1 44 | let sel = _.flatten(d3.event.selection) 45 | let bounds = _.map(sel, (s, idx) => idx % 2 ? scale.y.invert(s) : scale.x.invert(s)) 46 | bounds = label_bounds(bounds) 47 | 48 | let pts = _.filter(data, (p) => { 49 | return scale.getRawX(p) >= bounds.x0 && 50 | scale.getRawX(p) <= bounds.x1 && 51 | scale.getRawY(p) >= bounds.y0 && 52 | scale.getRawY(p) <=bounds.y1 53 | }) 54 | 55 | bus.$emit('brush', pts) 56 | } 57 | 58 | return d3.brushX() 59 | .on('start', brushstart) 60 | .on('brush', brushing) 61 | .on("end", brushended) 62 | } 63 | 64 | /** 65 | * Clear current brush selection 66 | */ 67 | clear () { 68 | d3.select('.brush').call(this.brush.move, null) 69 | } 70 | 71 | /** 72 | * Remove brush div 73 | */ 74 | remove () { 75 | d3.selectAll('.brush') 76 | .call(this.brush.move, null) 77 | .remove() 78 | } 79 | 80 | /** 81 | * Attach the brush to the parent svg as the top-most layer 82 | * @param svg 83 | */ 84 | attach (svg) { 85 | svg.append('g').attr('class', 'brush').call(this.brush) 86 | } 87 | } 88 | 89 | /** 90 | * Helper function converting bounds array to an object. 91 | * @param arr 92 | * @returns {{x0: number, x1: number, y0: number, y1: number}} 93 | */ 94 | function label_bounds (arr) { 95 | return { 96 | x0: Math.min(arr[0], arr[2]), 97 | x1: Math.max(arr[0], arr[2]), 98 | y0: Math.min(arr[1], arr[3]), 99 | y1: Math.max(arr[1], arr[3]) 100 | } 101 | } 102 | 103 | export default Brush 104 | -------------------------------------------------------------------------------- /bobaserver/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import csv 4 | import json 5 | import pandas as pd 6 | import numpy as np 7 | 8 | 9 | class Colors: 10 | HEADER = '\033[95m' 11 | OKBLUE = '\033[94m' 12 | OKGREEN = '\033[92m' 13 | WARNING = '\033[93m' 14 | FAIL = '\033[91m' 15 | ENDC = '\033[0m' 16 | BOLD = '\033[1m' 17 | UNDERLINE = '\033[4m' 18 | 19 | 20 | def print_fail(msg): 21 | print(Colors.FAIL + msg + Colors.ENDC) 22 | 23 | 24 | def print_warn(msg): 25 | print(Colors.OKBLUE + msg + Colors.ENDC) 26 | 27 | 28 | def check_path(fn): 29 | """ Check if path exists """ 30 | if not os.path.exists(fn): 31 | msg = 'Error: {} does not exist.'.format(fn) 32 | return {'status': 'fail', 'message': msg} 33 | 34 | 35 | def read_csv(fn, row_start=1): 36 | """ Read csv with path check, discarding (optionally) the first row """ 37 | err = check_path(fn) 38 | if err: 39 | return err, None 40 | 41 | res = [] 42 | with open(fn, 'r', newline='') as f: 43 | reader = csv.reader(f, delimiter=',') 44 | for row in reader: 45 | res.append(row) 46 | return err, res[row_start:] 47 | 48 | 49 | def read_json(fn): 50 | """ Read a JSON file with path check""" 51 | err = check_path(fn) 52 | if err: 53 | return err, None 54 | 55 | with open(fn, 'rb') as f: 56 | try: 57 | res = json.load(f) 58 | return None, res 59 | except json.JSONDecodeError: 60 | msg = 'Cannot parse the JSON file {}'.format(fn) 61 | err = {'status': 'fail', 'message': msg} 62 | return err, None 63 | 64 | def write_json(data, fn, nice=False): 65 | param = {'indent': 4, 'sort_keys': True} if nice else {} 66 | with open(fn, 'w', encoding='utf-8') as f: 67 | json.dump(data, f, **param) 68 | 69 | 70 | def read_key_safe(obj, keys, default): 71 | """ Recursively check if key is in obj and return the value. 72 | Otherwise, return default. """ 73 | for key in keys: 74 | if key in obj: 75 | obj = obj[key] 76 | else: 77 | return default 78 | 79 | return obj 80 | 81 | def group_by(lst, func): 82 | res = {} 83 | for item in lst: 84 | k = func(item) 85 | if k in res: 86 | res[k].append(item) 87 | else: 88 | res[k] = [item] 89 | return res 90 | 91 | def remove_na (df, col, dtype=str): 92 | """ convert a column in a dataframe to a data type, and remove any rows 93 | with Inf or NA values in this column""" 94 | 95 | if dtype != str: 96 | dc = 'float' if dtype == float else 'integer' 97 | df[col] = pd.to_numeric(df[col], errors='coerce', downcast=dc) 98 | 99 | # remove Inf and NA 100 | df = df.replace([np.inf, -np.inf], np.nan) 101 | df = df.dropna(subset=[col]) 102 | 103 | return df 104 | -------------------------------------------------------------------------------- /bobaserver/bobastats/sensitivity.py: -------------------------------------------------------------------------------- 1 | from scipy import stats 2 | import numpy as np 3 | import warnings 4 | 5 | 6 | def sensitivity_ad (df, dec, options, col): 7 | """ use the k-sample Anderson-Darling test to compute sensitivity """ 8 | if len(options) < 2: 9 | return 0, 1 10 | 11 | groups = [] 12 | for opt in options: 13 | groups.append(df.loc[df[dec] == opt][col].tolist()) 14 | 15 | # groupby is incorrect because a decision can be omitted due to dependency 16 | # the decision column would have empty value in summary.csv 17 | # groups = df.groupby(dec)[col].apply(list).tolist() 18 | 19 | with warnings.catch_warnings(): 20 | # suppress the warning "p-value capped: true value larger than 0.25" 21 | warnings.simplefilter('ignore') 22 | 23 | # run the test 24 | ad = stats.anderson_ksamp(groups) 25 | 26 | # normalized test statistics and p-value 27 | return ad.statistic, ad.significance_level 28 | 29 | 30 | def ad_wrapper (df, dec, col): 31 | """ 32 | Run AD test for a given decision, while checking for minimum sample 33 | size requirements. Returns NaN if the check or the AD test fails. 34 | 35 | Returns: (test statistics, p-value) 36 | """ 37 | # each option should have some samples for the k-samples AD test to work 38 | min_group_size = 3 39 | 40 | # ensure that each level has at least n samples 41 | groups = df.groupby(dec).count() 42 | n_pass = groups[groups[col] >= min_group_size].shape[0] 43 | if n_pass < groups.shape[0]: 44 | return np.nan, np.nan 45 | 46 | # we are using the options in the current df, ignoring missing levels 47 | options = df.groupby(dec).groups.keys() 48 | try: 49 | s, p = sensitivity_ad(df, dec, options, col) 50 | return s, p 51 | except (ValueError, IndexError): 52 | return np.nan, np.nan 53 | 54 | 55 | def sensitivity_ks (df, dec, options, col): 56 | """ compute Kolmogorov-Smirnov statistic """ 57 | if len(options) < 2: 58 | return 0 59 | 60 | groups = [] 61 | for opt in options: 62 | groups.append(df.loc[df[dec] == opt][col].to_numpy()) 63 | 64 | kss = [] 65 | for i in range(len(groups)): 66 | for j in range(i + 1, len(groups)): 67 | ks = stats.ks_2samp(groups[i], groups[j]) 68 | kss.append(ks.statistic) # ks.pvalue gives p-value 69 | 70 | # median KS stat 71 | return np.median(kss) 72 | 73 | 74 | def sensitivity_f (df, dec, options, col): 75 | """ Compute one-way F-test to estimate decision sensitivity """ 76 | if len(options) < 2: 77 | return 0 78 | 79 | x_mean = df[col].mean() 80 | 81 | groups = [] 82 | for opt in options: 83 | groups.append(df.loc[df[dec] == opt][['uid', col]]) 84 | 85 | # ms between 86 | ms_b = 0 87 | for g in groups: 88 | ms_b += len(g) * (g[col].mean() - x_mean)**2 89 | ms_b /= len(groups) - 1 90 | 91 | # ms within 92 | ms_w = 0 93 | for g in groups: 94 | g_mean = g[col].mean() 95 | ms_w += sum((g[col] - g_mean)**2) 96 | ms_w /= len(df) - len(groups) 97 | 98 | return ms_b / ms_w 99 | -------------------------------------------------------------------------------- /client/src/controllers/vis/brushX.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import {bus} from '../config' 3 | import _ from 'lodash' 4 | 5 | class BrushX { 6 | constructor (data, scale) { 7 | // the d3 brush object 8 | this.brush = this.init(data, scale) 9 | 10 | // attributes 11 | this.selector = '' 12 | this.brushstart_callback = null 13 | this.brushing_callback = null 14 | } 15 | 16 | init (data, scale) { 17 | let that = this 18 | 19 | function brushstart () { 20 | d3.selectAll(that.selector).classed('brushed', false) 21 | if (that.brushstart_callback) { 22 | that.brushstart_callback() 23 | } 24 | bus.$emit('brush-remove', that.selector) 25 | } 26 | 27 | function brushing () { 28 | // empty selection 29 | if (!d3.event.selection) return 30 | 31 | // x0, x1 32 | let sel = _.flatten(d3.event.selection) 33 | let bounds = _.map(sel, (s) => s) 34 | 35 | // change color of selected points 36 | let uids = {} 37 | d3.selectAll(that.selector) 38 | .classed('brushed', (p) => { 39 | let is_in = scale.getRawX(p) >= bounds[0] && 40 | scale.getRawX(p) <= bounds[1] 41 | uids[p.uid] = is_in 42 | return is_in 43 | }) 44 | 45 | // callback 46 | if (that.brushing_callback) { 47 | that.brushing_callback(uids) 48 | } 49 | } 50 | 51 | function brushended () { 52 | // empty selection 53 | if (!d3.event.selection) { 54 | bus.$emit('brush', []) 55 | return 56 | } 57 | 58 | // x0, x1 59 | let sel = _.flatten(d3.event.selection) 60 | let bounds = _.map(sel, (s) => s) 61 | 62 | let pts = _.filter(data, (p) => { 63 | return scale.getRawX(p) >= bounds[0] && 64 | scale.getRawX(p) <= bounds[1] 65 | }) 66 | 67 | bus.$emit('brush', pts) 68 | } 69 | 70 | return d3.brushX() 71 | .on('start', brushstart) 72 | .on('brush', brushing) 73 | .on("end", brushended) 74 | } 75 | 76 | /** 77 | * Clear current brush selection 78 | */ 79 | clear () { 80 | // manually reset brush styling because calling brush.move will invoke 81 | // brushstart, and brushstart will fire the event again 82 | d3.selectAll(this.selector).classed('brushed', false) 83 | d3.select(`${this._container()} .brush .selection`) 84 | .style('display', 'none') 85 | } 86 | 87 | /** 88 | * Remove brush div 89 | */ 90 | remove () { 91 | d3.selectAll(`${this._container()} .brush`) 92 | .call(this.brush.move, null) 93 | .remove() 94 | } 95 | 96 | /** 97 | * Attach the brush to the parent svg as the top-most layer 98 | * @param svg 99 | */ 100 | attach (svg) { 101 | svg.append('g').attr('class', 'brush').call(this.brush) 102 | } 103 | 104 | /** 105 | * Helper function to get the selector for the subplot container. 106 | */ 107 | _container () { 108 | return this.selector.split(' ')[0] 109 | } 110 | } 111 | 112 | export default BrushX 113 | -------------------------------------------------------------------------------- /client/src/pages/MonitorPage.vue: -------------------------------------------------------------------------------- 1 | 29 | 30 | 72 | 73 | 92 | -------------------------------------------------------------------------------- /client/src/controllers/util.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import _ from 'lodash' 3 | 4 | class Util { 5 | constructor () { 6 | // canvas context for calculating text width 7 | this._context = null 8 | } 9 | 10 | wrapText (text, width, font) { 11 | let l = this.getTextWidth(text, font) 12 | if (l < width) { 13 | return [text] 14 | } 15 | let words = text.split(' ') 16 | let res = [''] 17 | let row = 0 18 | let rl = 0 19 | _.each(words, (word) => { 20 | let wl = this.getTextWidth(word + ' ') 21 | if (rl + wl > width) { 22 | res.push(word) 23 | rl = 0 24 | row += 1 25 | } else { 26 | rl += wl 27 | res[row] += ' ' + word 28 | } 29 | }) 30 | return res 31 | } 32 | 33 | clipText (text, width, font) { 34 | let l = this.getTextWidth(text, font) 35 | let i = Math.floor(width / l * text.length) - 4 36 | return l > width ? text.substr(0, i) + ' ...' : text 37 | } 38 | 39 | getTextWidth (text, font) { 40 | if (!this._context) { 41 | this._context = document.createElement('canvas').getContext('2d') 42 | } 43 | 44 | this._context.font = font 45 | return this._context.measureText(text).width 46 | } 47 | 48 | moveToBack (sel) { 49 | return sel.each(function () { 50 | let fc = this.parentNode.firstChild 51 | if (fc) { 52 | this.parentNode.insertBefore(this, fc) 53 | } 54 | }) 55 | } 56 | 57 | kde (kernel, X) { 58 | return (V) => { 59 | return X.map((x) => [x, d3.mean(V, (v) => kernel(x - v))]) 60 | } 61 | } 62 | 63 | epanechnikov (k) { 64 | return (v) => Math.abs(v /= k) <= 1 ? 0.75 * (1 - v * v) / k : 0 65 | } 66 | 67 | /** 68 | * KDE which chooses range and bandwidth based on data. 69 | * @param u Input data array. 70 | * @param smooth Smoothing factor, larger makes smoother 71 | * @returns Array Density array. 72 | */ 73 | kde_smart (u, smooth = 1) { 74 | u = _.sortBy(u) 75 | let n = u.length 76 | let step = (u[n - 1] - u[0]) / 40 77 | let rg = _.range(u[0] - step * 5, u[n - 1] + step * 5, step) 78 | let iqr = u[Math.floor(n * 0.75)] - u[Math.floor(n * 0.25)] 79 | let bw = 0.9 * iqr / 1.34 * Math.pow(n, 0.2) 80 | bw *= smooth 81 | 82 | let estimator = this.kde(this.epanechnikov(bw * 0.2), rg) 83 | return estimator(u) 84 | } 85 | 86 | toCdf (pdf) { 87 | if (pdf.length < 2) { 88 | return pdf 89 | } 90 | let step = pdf[1][0] - pdf[0][0] 91 | let sum = 0 92 | return _.map(pdf, (d) => { 93 | sum += step * d[1] 94 | return [d[0], sum] 95 | }) 96 | } 97 | 98 | /** 99 | * Get quantiles of an array. 100 | * @param arr The array 101 | * @param q A number within [0, 1] 102 | */ 103 | quantile (arr, q) { 104 | let sorted = _.sortBy(arr) 105 | let pos = (sorted.length - 1) * q 106 | let base = Math.floor(pos) 107 | let rest = pos - base 108 | 109 | if (base + 1 < sorted.length) { 110 | return sorted[base] + rest * (sorted[base + 1] - sorted[base]) 111 | } else { 112 | return sorted[base] 113 | } 114 | } 115 | } 116 | 117 | export default Util 118 | -------------------------------------------------------------------------------- /client/src/components/InferenceConfig.vue: -------------------------------------------------------------------------------- 1 | 41 | 42 | 87 | 88 | -------------------------------------------------------------------------------- /client/src/components/AdgView.vue: -------------------------------------------------------------------------------- 1 | 6 | 7 | 86 | 87 | 130 | -------------------------------------------------------------------------------- /client/src/components/monitor/MonitorModelFitView.vue: -------------------------------------------------------------------------------- 1 | 29 | 30 | 102 | 103 | 106 | -------------------------------------------------------------------------------- /client/src/archetype_vis/p_curve_plot.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import _ from 'lodash' 3 | 4 | class PCurvePlot { 5 | constructor () { 6 | this.outerWidth = 450 7 | this.outerHeight = 400 8 | this.margin = { 9 | top: 15, 10 | right: 15, 11 | bottom: 60, 12 | left: 60 13 | } 14 | 15 | this.y_label = 'Share of significant p-value' 16 | this.x_label = 'p-value' 17 | this.radius = 4 18 | this.cutoff = 0.1 19 | this.n_bins = 10 20 | } 21 | 22 | draw (parent, data) { 23 | let height = this.outerHeight - this.margin.top - this.margin.bottom 24 | let width = this.outerWidth - this.margin.left - this.margin.right 25 | 26 | // prepare the canvas 27 | let raw = d3.select(parent) 28 | .append('svg') 29 | .attr('width', this.outerWidth) 30 | .attr('height', this.outerHeight) 31 | let svg = raw.append('g') 32 | .attr('transform', `translate(${this.margin.left},${this.margin.top})`) 33 | svg.append('rect') 34 | .attr('x', 0) 35 | .attr('y', -10) 36 | .attr('width', width) 37 | .attr('height', height + 10) 38 | .attr('fill', 'none') 39 | .attr('stroke', '#000') 40 | 41 | // bins and scale 42 | let xs = d3.scaleLinear() 43 | .domain([d3.min(data), d3.max(data)]) 44 | .range([10, width - 10]) 45 | let histogram = d3.histogram() 46 | .value((d) => d) 47 | .domain(xs.domain()) 48 | .thresholds(this.n_bins) 49 | let bins =histogram(data) 50 | 51 | // convert count to frequency 52 | bins = _.map(bins, (arr) => { 53 | return {x0: arr.x0, x1: arr.x1, y: arr.length / data.length} 54 | }) 55 | let ys = d3.scaleLinear() 56 | .domain([0, d3.max(bins, (d) => d.y)]).nice() 57 | .range([height - 20, 0]) 58 | 59 | // draw the poly-line 60 | let line = d3.line() 61 | .x((d) => 0.5 * xs(d.x0) + 0.5 * xs(d.x1)) 62 | .y((d) => ys(d.y)) 63 | svg.append('path') 64 | .datum(bins) 65 | .attr('d', line) 66 | .attr('stroke', '#000') 67 | .attr('fill', 'none') 68 | 69 | // draw the little squares 70 | svg.selectAll('.pcurve-square') 71 | .data(bins) 72 | .enter() 73 | .append('rect') 74 | .attr('x', (d) => 0.5 * xs(d.x0) + 0.5 * xs(d.x1) - this.radius) 75 | .attr('y', (d) => ys(d.y) - this.radius) 76 | .attr('width', this.radius * 2) 77 | .attr('height', this.radius * 2) 78 | .attr('fill', '#fff') 79 | .attr('stroke', '#000') 80 | 81 | // draw axis 82 | svg.append('g') 83 | .attr('transform', 'translate(0,' + height + ')') 84 | .call(d3.axisBottom(xs)) 85 | .call((g) => g.select('.domain').remove()) 86 | svg.append('g') 87 | .call(d3.axisLeft(ys).ticks(10)) 88 | .call((g) => g.select('.domain').remove()) 89 | 90 | // draw the dashed line 91 | if (this.cutoff != null) { 92 | svg.append('path') 93 | .attr('d', `M0,${ys(this.cutoff)}H${width}`) 94 | .attr('stroke', '#000') 95 | .attr('stroke-dasharray', '4 4') 96 | } 97 | 98 | // axis labels 99 | svg.append('text') 100 | .attr('transform', 'rotate(-90)') 101 | .attr('y', - 38) 102 | .attr('x', -(height / 2)) 103 | .style('text-anchor', 'middle') 104 | .text(this.y_label) 105 | svg.append('text') 106 | .attr('y', height + 38) 107 | .attr('x', width / 2) 108 | .style('text-anchor', 'middle') 109 | .text(this.x_label) 110 | } 111 | } 112 | 113 | export default PCurvePlot 114 | -------------------------------------------------------------------------------- /client/src/components/FilterOptionView.vue: -------------------------------------------------------------------------------- 1 | 39 | 40 | 119 | 120 | -------------------------------------------------------------------------------- /client/src/archetype_vis/density_plot.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import _ from 'lodash' 3 | import {util} from '../controllers/config' 4 | 5 | class DensityPlot { 6 | constructor () { 7 | this.outerWidth = 450 8 | this.outerHeight = 400 9 | this.margin = { 10 | top: 15, 11 | right: 15, 12 | bottom: 50, 13 | left: 50 14 | } 15 | 16 | this.type = 0 // 0: PDF, 1: CDF 17 | this.x_label = 'Effect Size' 18 | this.label_font_size = 12 19 | 20 | this.label_cdf = 'Cumulative Frequency Distribution' 21 | this.label_pdf = 'Frequency' 22 | } 23 | 24 | draw (parent, data) { 25 | let height = this.outerHeight - this.margin.top - this.margin.bottom 26 | let width = this.outerWidth - this.margin.left - this.margin.right 27 | 28 | // prepare the canvas 29 | let raw = d3.select(parent) 30 | .append('svg') 31 | .attr('width', this.outerWidth) 32 | .attr('height', this.outerHeight) 33 | let svg = raw.append('g') 34 | .attr('transform', `translate(${this.margin.left},${this.margin.top})`) 35 | svg.append('rect') 36 | .attr('x', 0) 37 | .attr('y', 0) 38 | .attr('width', width) 39 | .attr('height', height) 40 | .attr('fill', 'none') 41 | .attr('stroke', '#ddd') 42 | .attr('stroke-width', 2) 43 | 44 | // compute density 45 | data = _.map(data, (d) => d.diff) 46 | let density = util.kde_smart(data) 47 | if (this.type === 1) { 48 | density = util.toCdf(density) 49 | } 50 | 51 | // scale 52 | let xs = d3.scaleLinear() 53 | .domain([density[0][0] * 1.1, density[density.length - 1][0] * 1.1]) 54 | .range([0, width]) 55 | let ys = d3.scaleLinear() 56 | .domain([-0.05, 1.05]) 57 | .range([height, 0]) 58 | 59 | // axis 60 | svg.append('g') 61 | .attr('transform', 'translate(0,' + height + ')') 62 | .call(d3.axisBottom(xs)) 63 | .call((g) => g.select('.domain').remove()) 64 | svg.append('g') 65 | .call(d3.axisLeft(ys)) 66 | .call((g) => g.select('.domain').remove()) 67 | 68 | // axis gridlines 69 | svg.append('g') 70 | .attr('transform', 'translate(0,' + height + ')') 71 | .call(d3.axisBottom(xs) 72 | .tickSize(-height) 73 | .tickFormat('')) 74 | .call((g) => g.selectAll('.tick') 75 | .attr('stroke-opacity', 0.07)) 76 | .call((g) => g.select('.domain').remove()) 77 | svg.append('g') 78 | .call(d3.axisLeft(ys) 79 | .tickSize(-width) 80 | .tickFormat('')) 81 | .call((g) => g.selectAll('.tick') 82 | .attr('stroke-opacity', 0.07)) 83 | .call((g) => g.select('.domain').remove()) 84 | 85 | 86 | // draw the density curve 87 | let line = d3.line().curve(d3.curveBasis) 88 | .x((d) => { 89 | console.log(d) 90 | return xs(d[0]) 91 | }) 92 | .y((d) => ys(d[1])) 93 | svg.selectAll('.density-curve') 94 | .data([density]) 95 | .enter() 96 | .append('path') 97 | .classed('density-curve', true) 98 | .attr('d', line) 99 | .attr('fill', 'none') 100 | .attr('stroke-linejoin', 'round') 101 | .attr('stroke', '#000') 102 | 103 | // axis label 104 | let y_label = this.type === 1 ? this.label_cdf : this.label_pdf 105 | svg.append('text') 106 | .attr('transform', 'rotate(-90)') 107 | .attr('y', - 38) 108 | .attr('x', -(height / 2)) 109 | .style('text-anchor', 'middle') 110 | .style('font-size', this.label_font_size) 111 | .text(y_label) 112 | svg.append('text') 113 | .attr('y', height + 38) 114 | .attr('x', width / 2) 115 | .style('text-anchor', 'middle') 116 | .style('font-size', this.label_font_size) 117 | .text(this.x_label) 118 | } 119 | } 120 | 121 | export default DensityPlot 122 | -------------------------------------------------------------------------------- /bobaserver/routes.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import math 5 | from flask import jsonify, request 6 | from bobaserver import app 7 | from .util import read_csv, read_json, read_key_safe, group_by, remove_na 8 | import bobaserver.common as common 9 | 10 | 11 | # entry 12 | @app.route('/') 13 | def index(): 14 | return app.send_static_file('index.html') 15 | 16 | # read the summary file 17 | @app.route('/api/get_universes', methods=['POST']) 18 | def get_universes(): 19 | fn = os.path.join(app.data_folder, 'summary.csv') 20 | err, res = read_csv(fn, 0) 21 | reply = err if err else {'status': 'success', 'data': res[1:], 22 | 'header': res[0]} 23 | return jsonify(reply), 200 24 | 25 | # read point estimates, p-value, fit metric value, and stacking weights 26 | @app.route('/api/get_pred', methods=['POST']) 27 | def get_pred(): 28 | fields = ['point_estimate', 'p_value', 'fit', 'stacking_weight', 29 | 'annotation', 'standard_error'] 30 | res = common.read_results_batch(fields) 31 | header = res.columns.tolist() 32 | 33 | # remove Inf and NA in point estimates 34 | res = remove_na(res, 'point_estimate', dtype=float) 35 | 36 | res = [res[n].values.tolist() for n in header] 37 | reply = {'status': 'success', 'data': res, 'header': header, 38 | 'sensitivity': app.sensitivity} 39 | return jsonify(reply), 200 40 | 41 | # read uncertainty 42 | @app.route('/api/get_uncertainty', methods=['POST']) 43 | def get_uncertainty(): 44 | f = app.schema['uncertainty'] 45 | fn = os.path.join(app.data_folder, f['file']) 46 | err, res = read_csv(fn, 0) 47 | reply = err if err else {'status': 'success', 'data': res[1:]} 48 | if not err: 49 | header = ['uncertainty' if d == f['field'] else d for d in res[0]] 50 | reply['header'] = header 51 | 52 | return jsonify(reply), 200 53 | 54 | # read the null distribution of point estimates 55 | @app.route('/api/get_null', methods=['POST']) 56 | def get_null(): 57 | f = app.schema['null_distribution'] 58 | fn = os.path.join(app.data_folder, f['file']) 59 | err, res = read_csv(fn, 0) 60 | reply = err if err else {'status': 'success', 'data': res[1:]} 61 | if not err: 62 | header = ['null_distribution' if d == f['field'] else d for d in res[0]] 63 | reply['header'] = header 64 | return jsonify(reply), 200 65 | 66 | # read the overview, including decisions and ADG 67 | @app.route('/api/get_overview', methods=['POST']) 68 | def get_overview(): 69 | res = {'schema': [app.schema[d]['name'] for d in app.schema], 70 | 'decisions': app.decisions} 71 | res.update(app.visualizer) 72 | reply = {'status': 'success', 'data': res} 73 | return jsonify(reply), 200 74 | 75 | # read the actual and predicted data of all data points in a universe 76 | @app.route('/api/get_raw', methods=['POST']) 77 | def get_raw(): 78 | # fixme: prediction might not exist 79 | # fixme: now we assume specific column order, should use field name 80 | uid = request.json['uid'] 81 | f = app.schema['prediction'] 82 | fn = os.path.join(app.data_folder, f['file'].format(uid)) 83 | err, res = read_csv(fn, 1) 84 | reply = err if err else {'status': 'success'} 85 | 86 | if not err: 87 | # sampling 88 | m = 100 89 | data = [] 90 | for i in range(2): 91 | long = list(map(lambda d: float(d[i]), res)) 92 | if len(long) > m: 93 | # quantile dot plot 94 | qt = np.append(np.arange(0, 1, 1 / m), 1.0) 95 | data.append(np.quantile(long, qt).tolist()) 96 | else: 97 | data.append(long) 98 | 99 | # apply transform 100 | trans = read_key_safe(f, ['transform'], None) 101 | if trans: 102 | trans = trans.format('x') 103 | for i in range(2): 104 | data[i] = [eval(trans) for x in data[i]] 105 | 106 | reply['data'] = data 107 | 108 | return jsonify(reply), 200 109 | -------------------------------------------------------------------------------- /client/src/components/OptionRatioView.vue: -------------------------------------------------------------------------------- 1 | 27 | 28 | 112 | 113 | -------------------------------------------------------------------------------- /client/src/components/SmallMultiplesView.vue: -------------------------------------------------------------------------------- 1 | 28 | 29 | 128 | 129 | 152 | -------------------------------------------------------------------------------- /bobaserver/bobastats/bootstrap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import stats 3 | from time import perf_counter 4 | 5 | class bootstrap(): 6 | """ 7 | This class implements the bootstrap procedure and offers 3 type of CIs: 8 | basic, percentile, and bias-corrected and accelerated (BCa). 9 | """ 10 | 11 | def __init__(self, func, ci_type='percentile', n=200, verbose=False): 12 | """ 13 | Parameters: 14 | - n: bootstrap how many times 15 | - func: function to compute the statistic of interest 16 | - ci_type: one of ['basic', 'percentile', 'bca', 'bc'] 17 | - verbose: if true, print elapsed time 18 | """ 19 | self.n = n 20 | self.stat = func 21 | self.ci_type = ci_type 22 | self.verbose = verbose 23 | 24 | 25 | def fit(self, data, *args, **kwargs): 26 | self.fit_with_probability(data, None, *args, **kwargs) 27 | 28 | 29 | def fit_with_probability(self, data, p, *args, **kwargs): 30 | """ 31 | Parameters: 32 | - data: the sample array to perform bootstrap on 33 | - p: probilities associated with each entry in data. If unspecified, draw 34 | uniformly when resampling. 35 | """ 36 | time_start = perf_counter() 37 | 38 | # sample statistics 39 | self.sample_stat = self.stat(data, *args, **kwargs) 40 | 41 | # fit bootstrap 42 | self.bootstrap_stats = [] 43 | for i in range(self.n): 44 | d = np.random.choice(data, size=len(data), replace=True, p=p) 45 | self.bootstrap_stats.append(self.stat(d, *args, **kwargs)) 46 | 47 | # jackknife 48 | self.jack_stats = [] 49 | if self.ci_type == 'bca': 50 | for i in range(len(data)): 51 | d = np.delete(data, i) 52 | self.jack_stats.append(self.stat(d, *args, **kwargs)) 53 | 54 | # elapsed time 55 | if self.verbose: 56 | print(f'Bootstrap time: {perf_counter() - time_start} seconds') 57 | 58 | 59 | def get_ci(self, alpha=0.05): 60 | """ Get the 100(1 - alpha)% confidence interval""" 61 | # drop NaN in the bootstrap statistics array 62 | arr = self._handle_null(self.bootstrap_stats) 63 | if not len(arr): 64 | return np.nan, np.nan 65 | 66 | if self.ci_type == 'basic': 67 | return self._get_ci_basic(arr, alpha) 68 | elif self.ci_type == 'bca': 69 | return self._get_ci_bca(arr, alpha) 70 | elif self.ci_type == 'bc': 71 | return self._get_ci_bc(arr, alpha) 72 | else: 73 | return self._get_ci_percentile(arr, alpha) 74 | 75 | 76 | def _get_ci_bc(self, arr, alpha): 77 | """ BCa bootstrap CI with the acceleration term set to 0 """ 78 | # bias-correction factor 79 | z0 = stats.norm.ppf(np.mean(np.asarray(arr) < self.sample_stat)) 80 | 81 | # the CI 82 | ql, qu = stats.norm.ppf(alpha / 2), stats.norm.ppf(1 - alpha / 2) 83 | a1 = stats.norm.cdf(z0 * 2 + ql) 84 | a2 = stats.norm.cdf(z0 * 2 + qu) 85 | return np.quantile(arr, a1), np.quantile(arr, a2) 86 | 87 | 88 | def _get_ci_bca(self, arr, alpha): 89 | """ Bias corrected and accelerated bootstrap CI """ 90 | # bias-correction factor 91 | z0 = stats.norm.ppf(np.mean(np.asarray(arr) < self.sample_stat)) 92 | z0 = np.sign(z0) * 5 if np.isinf(z0) else z0 # handle infinity 93 | 94 | # acceleration factor 95 | jack = np.asarray(self._handle_null(self.jack_stats)) 96 | if not len(jack): 97 | a_hat = 0 98 | else: 99 | nom = np.sum((jack.mean() - jack)**3) 100 | denom = 6 * np.sum((jack.mean() - jack)**2)**1.5 101 | a_hat = nom / denom 102 | 103 | # the CI 104 | ql, qu = stats.norm.ppf(alpha / 2), stats.norm.ppf(1 - alpha / 2) 105 | a1 = stats.norm.cdf(z0 + (z0 + ql) / (1 - a_hat * (z0 + ql))) 106 | a2 = stats.norm.cdf(z0 + (z0 + qu) / (1 - a_hat * (z0 + qu))) 107 | return np.quantile(arr, a1), np.quantile(arr, a2) 108 | 109 | 110 | def _get_ci_basic(self, arr, alpha): 111 | # basic CI 112 | lower = 2 * self.sample_stat - np.quantile(arr, 1 - alpha / 2) 113 | upper = 2 * self.sample_stat -np.quantile(arr, alpha / 2) 114 | return lower, upper 115 | 116 | 117 | def _get_ci_percentile(self, arr, alpha): 118 | # percentile based CI 119 | lower = np.quantile(arr, alpha / 2) 120 | upper = np.quantile(arr, 1 - alpha / 2) 121 | return lower, upper 122 | 123 | 124 | def _handle_null (self, arr): 125 | # drop NaN in the bootstrap/jackknife statistics array 126 | return [t for t in arr if not np.isnan(t)] 127 | -------------------------------------------------------------------------------- /client/src/archetype_vis/parallel_line_plot.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import _ from 'lodash' 3 | import {store} from '../controllers/config' 4 | import {SCHEMA} from '../controllers/constants' 5 | 6 | class ParallelLinePlot { 7 | constructor () { 8 | this.outerWidth = 450 9 | this.outerHeight = 400 10 | this.margin = { 11 | top: 15, 12 | right: 15, 13 | bottom: 60, 14 | left: 70 15 | } 16 | 17 | this.y_label = 'p-value' 18 | this.y_levels = ['p < 0.05', 'p >= 0.05'] 19 | this.x_label = '' 20 | 21 | // internal 22 | this.dec = '' 23 | this.x_levels = [] 24 | } 25 | 26 | wrangle (data) { 27 | this.x_levels = store.getDecisionByName(this.dec).options 28 | let stride = this.x_levels.length 29 | 30 | // we need to find all universe groups that differ only by the decision 31 | // first, combine decisions and outcomes 32 | data = _.map(data, (d) => { 33 | let uni = store.getUniverseById(d.uid) 34 | let y = d[SCHEMA.P] < 0.05 ? this.y_levels[0] : this.y_levels[1] 35 | return _.defaults({'y': y}, uni) 36 | }) 37 | 38 | // sort by all other decisions 39 | let decs = _.keys(store.decisions) 40 | _.remove(decs, (d) => d === this.dec) 41 | data = _.sortBy(data, decs) 42 | 43 | // create a map to make the x order consistent 44 | let lookup = _.zipObject(this.x_levels, _.range(stride)) 45 | 46 | // combine into lines 47 | let res = [] 48 | for (let i = 0; i < data.length; i += stride) { 49 | // sort the group 50 | let line = [] 51 | for (let j = 0; j < stride; j++) { 52 | let datum = data[i + j] 53 | line[lookup[datum[this.dec]]] = datum 54 | } 55 | 56 | // fixme: here we throw away any incomplete line 57 | let complete = true 58 | line = _.map(line, (d) => { 59 | if (!d) { 60 | complete = false 61 | return {} 62 | } 63 | return {y: d.y, x: d[this.dec]} 64 | }) 65 | if (complete) { 66 | res.push(line) 67 | } 68 | } 69 | 70 | return res 71 | } 72 | 73 | draw (parent, data, dec) { 74 | this.dec = dec 75 | data = this.wrangle(data) 76 | 77 | // prepare the canvas 78 | let height = this.outerHeight - this.margin.top - this.margin.bottom 79 | let width = this.outerWidth - this.margin.left - this.margin.right 80 | let raw = d3.select(parent) 81 | .append('svg') 82 | .attr('width', this.outerWidth) 83 | .attr('height', this.outerHeight) 84 | let svg = raw.append('g') 85 | .attr('transform', `translate(${this.margin.left},${this.margin.top})`) 86 | 87 | // scale 88 | let xs = d3.scaleBand() 89 | .domain(this.x_levels) 90 | .range([0, width]) 91 | let ys = d3.scaleBand() 92 | .domain(this.y_levels) 93 | .range([height, 0]) 94 | 95 | // draw the lines 96 | let jitter = ys.step() * 0.2 97 | let line = d3.line() 98 | .x((d) => xs(d.x) + xs.step() * 0.5) 99 | .y((d) => ys(d.y) + ys.step() * 0.5 + _.random(0, jitter) - jitter * 0.5) 100 | svg.selectAll('.para-line') 101 | .data(data).enter() 102 | .append('path') 103 | .attr('d', (d) => line(d)) 104 | .attr('fill', 'none') 105 | .attr('stroke', '#000') 106 | .attr('stroke-opacity', 0.2) 107 | 108 | // axis 109 | svg.append('g') 110 | .attr('transform', 'translate(0,' + height + ')') 111 | .call(d3.axisBottom(xs)) 112 | svg.append('g') 113 | .call(d3.axisLeft(ys)) 114 | 115 | // axis label 116 | svg.append('text') 117 | .attr('y', height + 38) 118 | .attr('x', width / 2) 119 | .style('text-anchor', 'middle') 120 | .text(this.x_label || this.dec) 121 | 122 | // prepare the annotation data 123 | data = _.flatten(data) 124 | let count = [] 125 | _.each(this.x_levels, (x, i) => { 126 | _.each(this.y_levels, (y) => { 127 | let n = _.filter(data, (d) => d.x === x && d.y === y).length 128 | let left = i < 1 129 | count.push({x: x, y: y, n: n, align_left: left}) 130 | }) 131 | }) 132 | console.log(count) 133 | 134 | // draw annotations 135 | svg.selectAll('.para-text') 136 | .data(count).enter() 137 | .append('text') 138 | .attr('x', (d) => { 139 | let padding = d.align_left ? -10 : 10 140 | return xs(d.x) + xs.step() * 0.5 + padding 141 | }) 142 | .attr('y', (d) => ys(d.y) + ys.step() * 0.5) 143 | .attr('font-size', 9) 144 | .attr('text-anchor', (d) => d.align_left ? 'end' : 'start') 145 | .text((d) => `N=${d.n}`) 146 | } 147 | } 148 | 149 | export default ParallelLinePlot 150 | -------------------------------------------------------------------------------- /client/src/controllers/inference/infer_simple_plot.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import {store, util} from '../config' 3 | import DotPlotScale from '../vis/dot_plot_scale' 4 | import _ from 'lodash' 5 | 6 | class InferSimplePlot { 7 | constructor () { 8 | this.outerWidth = 1050 9 | this.outerHeight = 250 10 | this.margin = { 11 | top: 2, 12 | right: 20, 13 | bottom: 30, 14 | left: 20 15 | } 16 | this.null = 0 17 | this.x_axis_label = 'Effect Size' 18 | this.label_font_size = 11 19 | this.smooth = true 20 | 21 | // internal 22 | this.svg = null 23 | this.scale = null 24 | this.data = null 25 | } 26 | 27 | draw (parent, data) { 28 | this.data = data 29 | 30 | // scale 31 | let scale_params = { 32 | 'outerWidth': this.outerWidth, 33 | 'outerHeight': this.outerHeight, 34 | 'margin': this.margin, 35 | 'x_field': 'diff' 36 | } 37 | 38 | // using a shared x range 39 | let scale = new DotPlotScale(store.x_range, scale_params) 40 | this.scale = scale 41 | 42 | // prepare the canvas 43 | this.svg = d3.select(parent) 44 | .append('svg') 45 | .attr('width', this.outerWidth) 46 | .attr('height', this.outerHeight) 47 | .append('g') 48 | .attr('transform', `translate(${this.margin.left},${this.margin.top})`) 49 | 50 | let obj = this.svg.append('svg') 51 | .classed('objects', true) 52 | .attr('width', scale.width()) 53 | .attr('height', scale.height()) 54 | 55 | // axis 56 | this.drawAxis() 57 | 58 | // envelope 59 | this.smooth = store.configs.dataset !== 'hurricane' //fixme 60 | if (this.smooth) { 61 | this.drawEnvelope(obj) 62 | } else { 63 | this.drawHist(obj) 64 | } 65 | 66 | // draw a line at the effect size 67 | obj.append('line') 68 | .attr('x1', scale.x(this.null)) 69 | .attr('x2', scale.x(this.null)) 70 | .attr('y1', 0) 71 | .attr('y2', scale.height()) 72 | .attr('stroke', '#e45756') 73 | } 74 | 75 | drawEnvelope (svg) { 76 | let scale = this.scale 77 | let dp = this.data 78 | // remove null and outliers (outside 10x range) 79 | let dm = scale.x.domain() 80 | let xr = (dm[1] - dm[0]) * 10 81 | dp = _.filter(dp, (d) => d != null && d < dm[1] + xr && d > dm[0] - xr) 82 | 83 | let density = util.kde_smart(dp, 0.5) 84 | 85 | // y scale 86 | let ys = d3.scaleLinear().domain([0, d3.max(density, (d) => d[1])]) 87 | .range([0, scale.height()]) 88 | 89 | // area 90 | let area = d3.area() 91 | .x((d) => scale.x(d[0])) 92 | .y0(scale.height()) 93 | .y1((d) => scale.height() - ys(d[1])) 94 | 95 | // plot 96 | svg.append('path') 97 | .attr('class', 'density-observed') 98 | .datum(density) 99 | .attr('d', area) 100 | 101 | } 102 | 103 | /** 104 | * Draw the envelope as a histogram 105 | */ 106 | drawHist (svg) { 107 | let scale = this.scale 108 | let dp = this.data 109 | 110 | let dm = scale.x.domain() 111 | let step = (dm[1] - dm[0]) / (scale.width() / 2) 112 | let bins = _.range(dm[0], dm[1], step) 113 | let hist = d3.histogram().domain(scale.x.domain()) 114 | .thresholds(bins)(dp) 115 | 116 | // y scale 117 | let ys = d3.scaleLinear().domain([0, d3.max(hist, (d) => d.length)]) 118 | .range([0, scale.height()]) 119 | 120 | // area 121 | let area = d3.area() 122 | .x((d) => scale.x(d.x1)) 123 | .y0(scale.height()) 124 | .y1((d) => scale.height() - ys(d.length)) 125 | 126 | // plot 127 | svg.append('path') 128 | .attr('class', 'density-observed') 129 | .datum(hist) 130 | .attr('d', area) 131 | } 132 | 133 | drawAxis () { 134 | let scale = this.scale 135 | let xAxis = d3.axisBottom(scale.x).tickSize(-scale.height()) 136 | .ticks(Math.round(scale.width() / 30)) 137 | 138 | this.svg.append("g") 139 | .classed("x axis muted", true) 140 | .attr('transform', `translate(0,${scale.height()})`) 141 | .call(xAxis) 142 | .call(g => g.selectAll('.tick line') 143 | .attr('stroke-opacity', 0.1) 144 | .attr('stroke-dasharray', '2, 2')) 145 | .call(g => g.selectAll('.domain') 146 | .attr('d', `M0.5,0H${scale.width()}`)) 147 | 148 | let th = scale.height() + this.label_font_size * 2 + 3 149 | this.svg.append('text') 150 | .classed('axis-label muted', true) 151 | .attr('transform', `translate(${scale.width() / 2}, ${th})`) 152 | .style('text-anchor', 'middle') 153 | .style('font-size', this.label_font_size) 154 | .text(this.x_axis_label) 155 | } 156 | } 157 | 158 | export default InferSimplePlot 159 | -------------------------------------------------------------------------------- /client/src/controllers/inference/infer_null_plot.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import {store} from '../config' 3 | import _ from 'lodash' 4 | import BaseScale from '../vis/base_scale' 5 | 6 | class InferNullPlot { 7 | constructor () { 8 | this.outerWidth = 1050 9 | this.outerHeight = 250 10 | this.margin = { 11 | top: 15, 12 | right: 20, 13 | bottom: 30, 14 | left: 20 15 | } 16 | this.x_axis_label = 'Universe' 17 | this.y_axis_label = 'Effect Size' 18 | this.label_font_size = 11 19 | 20 | // internal 21 | this.svg = null 22 | this.scale = null 23 | this.data = null 24 | } 25 | 26 | draw (parent, data) { 27 | this.data = data 28 | 29 | // make space for labels 30 | this.margin.bottom += this.x_axis_label ? this.label_font_size : 0 31 | this.margin.right += this.y_axis_label ? this.label_font_size : 0 32 | 33 | // scale 34 | let scale_params = { 35 | 'outerWidth': this.outerWidth, 36 | 'outerHeight': this.outerHeight, 37 | 'margin': this.margin, 38 | 'x_field': 'diff' 39 | } 40 | 41 | // scale 42 | let l = data.length 43 | let scale = new BaseScale(scale_params) 44 | scale.x = d3.scaleLinear().range([0, scale.width()]) 45 | .domain([0, l]) 46 | scale.y = d3.scaleLinear().range([scale.height(), 0]) 47 | .domain(store.x_range) 48 | // .domain([data[0].diff, data[l - 1].diff]) 49 | this.scale = scale 50 | 51 | // prepare the canvas 52 | this.svg = d3.select(parent) 53 | .append('svg') 54 | .attr('width', this.outerWidth) 55 | .attr('height', this.outerHeight) 56 | .append('g') 57 | .attr('transform', `translate(${this.margin.left},${this.margin.top})`) 58 | 59 | let obj = this.svg.append('svg') 60 | .classed('objects', true) 61 | .attr('width', scale.width()) 62 | .attr('height', scale.height()) 63 | 64 | // axis 65 | this.drawAxis() 66 | 67 | // draw the spec curve 68 | this.drawSpecCurve(obj) 69 | } 70 | 71 | drawAxis () { 72 | let scale = this.scale 73 | 74 | // y axis 75 | let yaxis = d3.axisRight(scale.y).tickSize(-scale.width()) 76 | .ticks(5) 77 | this.svg.append('g') 78 | .classed('y axis muted', true) 79 | .attr('transform', `translate(${scale.width()},0)`) 80 | .call(yaxis) 81 | .call(g => g.selectAll('.tick line') 82 | .attr('stroke-opacity', 0.1) 83 | .attr('stroke-dasharray', '2, 2')) 84 | .call(g => g.selectAll('.domain') 85 | .attr('d', `M0.5,${scale.height()} V0.5`)) 86 | 87 | // y axis label 88 | // fixme: replace 20 with the actual width of axis tick labels 89 | let tw = -scale.width() - this.label_font_size - 20 90 | this.svg.append('text') 91 | .classed('y axis-label muted', true) 92 | .attr('transform', 'rotate(90)') 93 | .attr('x', scale.height() / 2) 94 | .attr('y', tw) 95 | .style('text-anchor', 'middle') 96 | .style('font-size', this.label_font_size) 97 | .text(this.y_axis_label) 98 | 99 | // x axis title 100 | let th = scale.height() + this.label_font_size + 3 101 | this.svg.append('text') 102 | .classed('x axis-label muted', true) 103 | .attr('transform', `translate(${scale.width() / 2}, ${th})`) 104 | .style('text-anchor', 'middle') 105 | .style('font-size', this.label_font_size) 106 | .text(this.x_axis_label) 107 | } 108 | 109 | drawSpecCurve (svg) { 110 | let scale = this.scale 111 | let w = scale.width() / this.data.length 112 | w = Math.min(w, 1.5) 113 | 114 | // draw the CI 115 | svg.selectAll('.null-box') 116 | .data(this.data) 117 | .enter() 118 | .append('rect') 119 | .classed('null-box', true) 120 | .attr('x', (d) => scale.x(d.i) - w/2) 121 | .attr('y', (d) => scale.y(d.upper)) 122 | .attr('width', w) 123 | .attr('height', (d) => Math.abs(scale.y(d.upper) - scale.y(d.lower))) 124 | 125 | // draw the median and the point estimate 126 | // this.drawDash(svg, 'null-median', 'i', 'median') 127 | let pts = this.drawDash(svg, 'null-point', 'i', 'diff') 128 | pts.filter((d) => d.diff > d.upper || d.diff < d.lower) 129 | .classed('null-outside', true) 130 | } 131 | 132 | drawDash (svg, cls, x_col, y_col) { 133 | let scale = this.scale 134 | let w = scale.width() / this.data.length 135 | w -= w > 2.5 ? 1 : 0 136 | 137 | return svg.selectAll('.' + cls) 138 | .data(this.data) 139 | .enter() 140 | .append('line') 141 | .classed(cls, true) 142 | .attr('x1', (d) => scale.x(d[x_col]) - w/2) 143 | .attr('x2', (d) => scale.x(d[x_col]) + w/2) 144 | .attr('y1', (d) => scale.y(d[y_col])) 145 | .attr('y2', (d) => scale.y(d[y_col])) 146 | } 147 | } 148 | 149 | export default InferNullPlot 150 | -------------------------------------------------------------------------------- /client/src/archetype_vis/histogram.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import _ from 'lodash' 3 | 4 | class HistPlot { 5 | constructor () { 6 | this.outerWidth = 680 7 | this.outerHeight = 400 8 | this.margin = { 9 | top: 15, 10 | right: 15, 11 | bottom: 60, 12 | left: 90 13 | } 14 | this.padding_bottom = 10 15 | this.padding_left = 20 16 | 17 | this.y_label = 'Frequency' 18 | this.x_label = '' 19 | this.n_bins = 80 20 | this.count = true // proportion or raw count 21 | this.axis_font_size = 16 22 | this.label_font_size = 20 23 | 24 | // optional, for multi-view consistency 25 | this.x_range = null 26 | this.y_range = null 27 | } 28 | 29 | draw (parent, data, column, cutoff) { 30 | let height = this.outerHeight - this.margin.top - this.margin.bottom 31 | let width = this.outerWidth - this.margin.left - this.margin.right 32 | 33 | // prepare the canvas 34 | let raw = d3.select(parent) 35 | .append('svg') 36 | .attr('width', this.outerWidth) 37 | .attr('height', this.outerHeight) 38 | raw.append('rect') 39 | .attr('x', this.margin.left - this.padding_left) 40 | .attr('y', 0) 41 | .attr('width', this.outerWidth) 42 | .attr('height', this.outerHeight - this.margin.bottom + this.padding_bottom) 43 | .attr('fill', '#eee') 44 | let svg = raw.append('g') 45 | .attr('transform', `translate(${this.margin.left},${this.margin.top})`) 46 | 47 | // bins and scale 48 | data = _.map(data, (d) => d[column]) 49 | let xs = d3.scaleLinear() 50 | .domain(this.x_range || [d3.min(data), d3.max(data)]) 51 | .range([0, width]) 52 | let histogram = d3.histogram() 53 | .value((d) => d) 54 | .domain(xs.domain()) 55 | .thresholds(this.n_bins) 56 | let bins =histogram(data) 57 | bins = _.map(bins, (arr) => { 58 | return {x0: arr.x0, x1: arr.x1, 59 | y: this.count ? arr.length : arr.length / data.length} 60 | }) 61 | 62 | let ys = d3.scaleLinear() 63 | .domain(this.y_range || [0, d3.max(bins, (bin) => bin.y)]).nice() 64 | .range([height, 0]) 65 | 66 | // draw axis 67 | let x_axis = svg.append('g') 68 | .attr('transform', 'translate(0,' + (height + this.padding_bottom) + ')') 69 | .call(d3.axisBottom(xs)) 70 | let y_axis = svg.append('g') 71 | .attr('transform', 'translate(-' + this.padding_left + ', 0)') 72 | .call(d3.axisLeft(ys).ticks(10)) 73 | 74 | // manually draw the white tick lines 75 | svg.selectAll('.gridline-x') 76 | .data(x_axis.selectAll('.tick').data()) 77 | .enter() 78 | .append('path') 79 | .classed('gridline-x', true) 80 | .attr('d', (d) => `M${xs(d)},-${this.margin.top}V${height + this.padding_bottom}`) 81 | .attr('stroke', '#fff') 82 | svg.selectAll('.gridline-y') 83 | .data(y_axis.selectAll('.tick').data()) 84 | .enter() 85 | .append('path') 86 | .classed('gridline-y', true) 87 | .attr('d', (d) => `M-${this.padding_left},${ys(d)}H${width + this.padding_left}`) 88 | .attr('stroke', '#fff') 89 | 90 | // customize style 91 | svg.append('path') 92 | .attr('d', `M0,${height}H${width}`) 93 | .attr('stroke', '#222') 94 | x_axis.select('.domain') 95 | .remove() 96 | y_axis.select('.domain') 97 | .remove() 98 | x_axis.selectAll('text').attr('font-size', this.axis_font_size) 99 | y_axis.selectAll('text').attr('font-size', this.axis_font_size) 100 | 101 | // draw bars 102 | svg.selectAll('rect') 103 | .data(bins) 104 | .enter() 105 | .append('rect') 106 | .attr('x', (d) => xs(d.x0)) 107 | .attr('width', (d) => xs(d.x1) - xs(d.x0)) 108 | .attr('y', (d) => ys(d.y)) 109 | .attr('height', (d) => ys(0) - ys(d.y)) 110 | .attr('fill', '#fff') 111 | .attr('stroke', '#222') 112 | .attr('stroke-width', 1) 113 | 114 | // draw the dashed line 115 | if (cutoff != null) { 116 | svg.append('path') 117 | .attr('d', `M${xs(cutoff)},-${this.margin.top}V${height + this.padding_bottom}`) 118 | .attr('stroke', '#f00') 119 | .attr('stroke-dasharray', '4 3') 120 | } 121 | 122 | // axis labels 123 | let gap = 28 + this.axis_font_size 124 | svg.append('text') 125 | .attr('transform', 'rotate(-90)') 126 | .attr('y', - gap - this.padding_left) 127 | .attr('x', -(height / 2)) 128 | .style('text-anchor', 'middle') 129 | .text(this.y_label) 130 | .attr('font-size', this.label_font_size) 131 | svg.append('text') 132 | .attr('y', height + this.padding_bottom + gap) 133 | .attr('x', width / 2) 134 | .style('text-anchor', 'middle') 135 | .text(this.x_label || column) 136 | .attr('font-size', this.label_font_size) 137 | } 138 | } 139 | 140 | export default HistPlot 141 | -------------------------------------------------------------------------------- /doc/visualizer_config.md: -------------------------------------------------------------------------------- 1 | # Visualizer Config 2 | 3 | The visualizer config is a JSON file. It accepts the following top-level fields: `files`, `schema`, `labels`, and `sensitivity`. All of these fields are optional, and thus, the entire visualizer config is optional. We will describe these fields in later sections. Take a look at an example visualizer config file 4 | [here](https://github.com/uwdata/boba/blob/master/example/mortgage/visualizer_config.json). 5 | 6 | The visualizer config must be linked to your multiverse specification before compile time. To link the file, supply its path in the `BOBA_CONFIG` block: 7 | 8 | ```python 9 | # --- (BOBA_CONFIG) 10 | { 11 | "visualizer": "visualizer_config.json" 12 | } 13 | ``` 14 | 15 | If you do not link the visualizer config file before compiling the multiverse, or you do not use the Boba DSL, you might manually nest the content of this JSON into a top-level field named `visualizer` in `overview.json`. 16 | 17 | ## Files 18 | 19 | `files` is an array and it tells the visualizer where to find the outcomes of your multiverse. The array has the following format: 20 | 21 | ```json 22 | { 23 | "files": [ 24 | {"id": "ID1", "path": "my_outcomes.csv"}, 25 | {"id": "ID2", "path": "my_samples_{}.csv", "multi": true} 26 | ] 27 | } 28 | ``` 29 | 30 | Each item in the array has two required fields: `id`, which is a unique identifier manually chosen by you, and `path`, which is the path to your CSV file. If you use a relative path, the path is relative to the `-i` argument when invoking `boba-server`. 31 | 32 | In addition, there is another field `multi`, which means whether this is a collection of files instead of a single file. If `multi` is set to true, `path` must be a pattern where the universe ID is replaced by `{}`. 33 | 34 | ## Schema 35 | 36 | `schema` is an key-value object and it tells the visualizer which columns in your CSV files to find specific types of outcomes. For example: 37 | 38 | ```json 39 | { 40 | "schema": { 41 | "point_estimate": {"file": "ID1", "field": "coefficient"} 42 | } 43 | } 44 | ``` 45 | 46 | It means that the point estimates are in the column named `coefficient` of the CSV file `ID1`. 47 | 48 | Apparently, you must have specified at least one entry in `files` to use `schema`. The value in each key-value pair is an object and has two fields: `file`, which is the `id` of a file in your `files` array, and `field`, which is the column name in the CSV. The key in each key-value pair tells the visualizer what type of outcomes this is. 49 | 50 | Here are the available keys: 51 | - `point_estimate` (required): point estimate, one number per universe. 52 | - `p_value`: p-value, one number per universe. 53 | - `fit`: model fit quality, one number per universe. 54 | - `stacking_weight`: weight for the stacking algorithm, one number per universe. 55 | - `annotation`: additional text labels, one string per universe. 56 | - `prediction`: observed and predicted values per data point, one CSV file per universe. The corresponding file must supply a pattern as the `path` and set `multi` to `true`. In each file, the rows are the samples, and the first two columns are observed and predicted values (in this order). 57 | - `uncertainty`: draws from the sampling distribution, multiple numbers per universe. Keep each draw as a separate row in the CSV file. 58 | - `null_distribution`: draws from the null distribution, multiple numbers per universe. Keep each draw as a separate row in the CSV file. 59 | 60 | ## Labels 61 | 62 | `labels` is an key-value object. The flags control the appearance and behavior of the visualizer interface. For example: 63 | 64 | ```json 65 | { 66 | "labels": { 67 | "dataset": "my project", 68 | "x_range": [-30, 40] 69 | } 70 | } 71 | ``` 72 | 73 | - `dataset`: name of the dataset to display in the header. Example: "hurricane". 74 | - `x_axis`: label of the x-axis of the overview plot, usually the meaning of the point estimate. Example: "Coefficient on Female". 75 | - `x_axis_fit`: label of the x-axis of the model fit plot, usually the meaning of the observed data points. Example: "Number of Deaths". 76 | - `x_range`: default range of the x-axis. Example: [-10, 50] 77 | - `x_range_outer`: allowed range of the x-axis. This might be useful to hide extreme outliers and have a better resolution of the remaining data points. Example: [-100, 300]. 78 | - `fit_range`: range of the x-axis in the model fit plots. This might be useful to hide extreme outliers in order to have a better resolution. Example: [0, 1]. 79 | 80 | ## Sensitivity 81 | 82 | `sensitivity` is a string and it determines the algorithm for computing decision sensitivity. For example: 83 | 84 | ```json 85 | { 86 | "sensitivity": "f" 87 | } 88 | ``` 89 | 90 | Available options are: 91 | 92 | - `f`: an algorithm based on the F-test 93 | - `ks`: an algorithm based on Kolmogorov–Smirnov statistic 94 | - `ad` (default): the standardized Anderson-Darling criterion for k samples 95 | 96 | For more details about these algorithms, please refer to the 97 | [paper](https://arxiv.org/pdf/2007.05551.pdf). 98 | -------------------------------------------------------------------------------- /client/src/archetype_vis/FacetPage.vue: -------------------------------------------------------------------------------- 1 | 23 | 24 | 140 | 141 | 144 | -------------------------------------------------------------------------------- /doc/format.md: -------------------------------------------------------------------------------- 1 | # Input Format 2 | 3 | To start the Boba Visualizer, You will need to supply your own file path, 4 | which points to a folder containing your multiverse outcomes and accompanying meta data. 5 | If you write the multiverse using the `Boba DSL`, the meta data will be 6 | generated for you, but you need to wrangle the multiverse outcomes in the 7 | correct format. 8 | 9 | ## The Simplest Scenario 10 | 11 | First, we will describe the simplest case: suppose you write the multiverse 12 | using the Boba DSL, and you would like to visualize only one field, the point 13 | estimates of the universes. 14 | 15 | In this case, you do not need to provide any additional metadata, but you still need 16 | to wrangle the point estimates in the following format. It should be a CSV file 17 | named `estimates.csv`. In this file, each row represents a universe, and 18 | there will be two columns: a column named `uid` that corresponds to the 19 | integer ID of the universe, and a column named `estimate` that corresponds to 20 | the point estimate. Here is an example: 21 | 22 | |uid|estimate| 23 | |---|--------| 24 | |1 |0.04 | 25 | |2 |-3.58 | 26 | |3 |1.29 | 27 | |4 |-0.87 | 28 | 29 | Note that the `uid` of a universe must match its filename. For example, if the 30 | filename is `universe_2.py`, the `uid` should be 2. 31 | 32 | Now place this CSV file in the `multiverse` folder generated by the `boba compile` 33 | command, and supply the path to this `multiverse` directory as the input argument 34 | to the visualizer: 35 | 36 | ``` 37 | boba-server -i multiverse/ 38 | ``` 39 | 40 | And you're good to go! 41 | 42 | 43 | ## Boba Merge 44 | 45 | But how do you produce this CSV? The Boba DSL provides support for you to (1) 46 | output a CSV file when you run each universe, and (2) merge these individual 47 | CSV files after you've finished running all universes. 48 | 49 | When authoring the multiverse specification, output 50 | a csv file with just one row and one column, where the column is `estimate` 51 | and it contains your point estimate. The trick is to use the built-in `{{_n}}` 52 | variable in your output CSV path to create a separate file per universe. 53 | 54 | For example, in this Boba DSL template file, we write: 55 | 56 | ```R 57 | library(tidyverse) 58 | library(broom.mixed) 59 | 60 | # fit model 61 | model <- lm(y ~ x + a + b, data=df) 62 | 63 | # get outcomes into a table 64 | result <- tidy(model) %>% 65 | filter(term == 'x') 66 | 67 | # output 68 | write_csv(result, 'estimate_{{_n}}.csv') 69 | ``` 70 | 71 | When we run the multiverse, `universe_1.R` will produce `estimate_1.csv`, 72 | `universe_2.R` will produce `estimate_2.csv`, and so on. 73 | 74 | After we finish running the multiverse, we will need to merge these separate files into 75 | a single CSV called `estimates.csv`. This can be achieved with the `boba merge` 76 | command: 77 | 78 | ``` 79 | boba merge estimate_{}.csv --out estimates.csv 80 | ``` 81 | 82 | For more information about the merge command, use `boba merge --help`. 83 | 84 | ## Adding More Columns 85 | 86 | What if you would like to visualize more information besides the point estimates, 87 | such as p-values and model fit quality? In this case, you will need to provide 88 | a configuration file to tell the visualizer the file and the column of these fields. 89 | 90 | We first discuss adding a field with one number per universe, for example the p-value. 91 | The first step is to ask your multiverse to output p-values, and merge them 92 | afterwards as a single CSV. You might name the p-value column whatever you want. Also, 93 | p-values need not be in the same CSV as the point estimates; as long as all p-values 94 | are in one file that has a column `uid`, it is fine. But for illustration purpose, 95 | suppose we put p-value in the same file `estimates.csv`, like this: 96 | 97 | |uid|estimate|p.value| 98 | |---|--------|-------| 99 | |1 |0.04 |0.35 | 100 | |2 |-3.58 |0.0008 | 101 | |3 |1.29 |0.0012 | 102 | |4 |-0.87 |0.041 | 103 | 104 | Then, you will need to create a JSON file for visualizer config: 105 | 106 | ```JSON 107 | { 108 | "files": [ 109 | {"id": "EST", "path": "estimates.csv"} 110 | ], 111 | "schema": { 112 | "point_estimate": {"file": "EST", "field": "estimate"}, 113 | "p_value": {"file": "EST", "field": "p.value"} 114 | } 115 | } 116 | ``` 117 | 118 | In this config, `files` contains the path to our CSV file(s). In this case, we 119 | only have one CSV file, and we provide a relative path to our `multiverse` folder. 120 | `schema` tells the visualizer where the corresponding fields are located. 121 | For example, `p_value` is located in the `p.value` column of the `estimates.csv` 122 | file. Note that we do not directly fill in the filename, but the `id` of the 123 | file, to reduce redundancy (it's like database normalization). 124 | 125 | For a complete list of valid schema fields, see 126 | [here](https://github.com/uwdata/boba-visualizer/tree/master/doc/visualizer_config.md). 127 | 128 | ## Without the Boba DSL 129 | 130 | What if you did not author the multiverse using the Boba DSL? ... 131 | 132 | (More content to come) -------------------------------------------------------------------------------- /bobaserver/run_server.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import click 4 | import os 5 | import pandas as pd 6 | import numpy as np 7 | from boba.bobarun import BobaRun 8 | from bobaserver import app, socketio, scheduler 9 | from .util import read_json, write_json, read_key_safe, print_fail 10 | import bobaserver.common as common 11 | 12 | 13 | def check_path(p, more=''): 14 | """Check if the path exists""" 15 | if not os.path.exists(p): 16 | msg = 'Error: {} does not exist.'.format(p) 17 | print_help(msg + more) 18 | 19 | 20 | def check_required_field(obj, key, prefix=''): 21 | """Check if a required field is in obj.""" 22 | if key not in obj: 23 | err = 'Error: cannot find required field "{}" in {}'.format(key, obj) 24 | print_help(prefix + err) 25 | 26 | def print_help(err=''): 27 | """Show help message and exit.""" 28 | ctx = click.get_current_context() 29 | click.echo(ctx.get_help()) 30 | 31 | if err: 32 | click.secho('\n' + err, fg='red') 33 | ctx.exit() 34 | 35 | 36 | def read_meta(): 37 | """ Read overview.json, verify, and store the meta data. """ 38 | fn = os.path.join(app.data_folder, 'overview.json') 39 | err, res = read_json(fn) 40 | if (err): 41 | print_help(err['message']) 42 | 43 | # check summary.csv 44 | fn = os.path.join(app.data_folder, 'summary.csv') 45 | check_path(fn) 46 | 47 | # check file definition 48 | vis = read_key_safe(res, ['visualizer'], {}) 49 | fs = read_key_safe(vis, ['files'], []) 50 | lookup = {} 51 | prefix = 'In parsing visualizer.files in overview.json:\n' 52 | for f in fs: 53 | check_required_field(f, 'id', prefix) 54 | check_required_field(f, 'path', prefix) 55 | f['multi'] = read_key_safe(f, ['multi'], False) 56 | lookup[f['id']] = f 57 | 58 | # read schema and join file 59 | schema = read_key_safe(vis, ['schema'], {}) 60 | prefix = 'In parsing visualizer.schema in overview.json:\n' 61 | check_required_field(schema, 'point_estimate', prefix) 62 | for key in schema: 63 | s = schema[key] 64 | check_required_field(s, 'file', prefix) 65 | # check_required_field(s, 'field', prefix) 66 | # todo: verify if file is valid CSV and if field exist in file 67 | fid = s['file'] 68 | if fid not in lookup: 69 | msg = 'In parsing visualizer.schema in overview.json:\n' 70 | msg += '{}\n'.format(s) 71 | msg += 'Error: file id "{}" is not defined.'.format(fid) 72 | print_help(msg) 73 | s['file'] = lookup[fid]['path'] 74 | s['multi'] = lookup[fid]['multi'] 75 | s['name'] = key 76 | 77 | # check sensitivity flag 78 | sen = read_key_safe(vis, ['sensitivity'], 'ad') 79 | if sen not in ('f', 'ks', 'ad'): 80 | msg = f'Invalid sensitivity flag "{sen}". Available values:\n' 81 | msg += ' - "f": algorithm based on the F-test\n' 82 | msg += ' - "ks": algorithm based on Kolmogorov–Smirnov statistic' 83 | msg += ' - "ad": k-samples Anderson-Darling test' 84 | print_help(msg) 85 | 86 | # store meta data 87 | app.schema = schema 88 | app.summary = common.read_summary() 89 | app.decisions = read_key_safe(res, ['decisions'], {}) 90 | app.visualizer = { 91 | "sensitivity": sen, 92 | "labels": read_key_safe(vis, ['labels'], {}), 93 | "graph": read_key_safe(res, ['graph'], {}) 94 | } 95 | 96 | 97 | def check_result_files (): 98 | """ check if result files exists """ 99 | fn = os.path.join(app.data_folder, 'overview.json') 100 | err, res = read_json(fn) 101 | vis = read_key_safe(res, ['visualizer'], {}) 102 | fs = read_key_safe(vis, ['files'], []) 103 | 104 | prefix = 'In parsing visualizer.files in overview.json:\n' 105 | for f in fs: 106 | multi = read_key_safe(f, ['multi'], False) 107 | if not multi: 108 | check_path(os.path.join(app.data_folder, f['path'])) 109 | 110 | 111 | @click.command() 112 | @click.option('--in', '-i', 'input', default='.', show_default=True, 113 | help='Path to the input directory') 114 | @click.option('--port', default=8080, show_default=True, 115 | help='The port to bind the server to') 116 | @click.option('--host', default='0.0.0.0', show_default=True, 117 | help='The interface to bind the server to') 118 | @click.option('--monitor', is_flag=True, help='Allow boba monitor') 119 | @click.version_option() 120 | def main(input, port, host, monitor): 121 | check_path(input) 122 | app.data_folder = os.path.realpath(input) 123 | 124 | read_meta() 125 | app.bobarun = BobaRun(app.data_folder) 126 | if not monitor: 127 | check_result_files() 128 | 129 | # compute sensitivity and write scores to file 130 | app.sensitivity = common.cal_sensitivity() 131 | d = {'method': app.visualizer['sensitivity'], 'scores': app.sensitivity} 132 | write_json(d, os.path.join(input, 'sensitivity.json'), nice=True) 133 | 134 | # print starting message 135 | s_host = '127.0.0.1' if host == '0.0.0.0' else host 136 | msg = """\033[92m 137 | Server started! 138 | Navigate to http://{0}:{1}/ in your browser 139 | Press CTRL+C to stop\033[0m""".format(s_host, port) 140 | print(msg) 141 | 142 | # start server 143 | scheduler.start() 144 | if monitor: 145 | socketio.run(app, host= host, port=f'{port}') 146 | else: 147 | app.run(host= host, port=f'{port}') 148 | 149 | 150 | if __name__ == '__main__': 151 | main() 152 | -------------------------------------------------------------------------------- /bobaserver/common.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | import re 5 | from bobaserver import app 6 | from .bobastats import sensitivity 7 | from .util import print_warn, remove_na, group_by 8 | 9 | 10 | def get_decision_list (): 11 | # get a list of decision names 12 | return sorted([d['var'] for d in app.decisions]) 13 | 14 | 15 | def get_decision_df (): 16 | # get the summary.csv without any non-decision columns 17 | dec = [d['var'] for d in app.decisions] 18 | return read_summary()[dec] 19 | 20 | 21 | def get_field_name (field): 22 | # get the column name of the field in df 23 | return app.schema[field]['field'] 24 | 25 | 26 | def read_summary (): 27 | """ read summary.csv """ 28 | if hasattr(app, 'summary'): 29 | return app.summary 30 | 31 | fn = os.path.join(app.data_folder, 'summary.csv') 32 | smr = pd.read_csv(fn, na_filter=False) 33 | smr['uid'] = smr.apply(lambda r: r.name + 1, axis=1).astype(int) 34 | return smr 35 | 36 | 37 | def read_results (field, dtype=str): 38 | """ read a result field """ 39 | # read the result file 40 | info = app.schema[field] 41 | fn = os.path.join(app.data_folder, info['file']) 42 | df = pd.read_csv(fn, na_filter=False) 43 | col = info['field'] 44 | return df[['uid', col]] 45 | 46 | 47 | def read_results_batch (field_list): 48 | """ read multiple fields at once, minimizing file IO """ 49 | fields = [app.schema[f] for f in field_list if f in app.schema] 50 | groups = group_by(fields, lambda x: x['file']) 51 | 52 | res = None 53 | for fn in groups: 54 | df = pd.read_csv(os.path.join(app.data_folder, fn), na_filter=False) 55 | names = ['uid'] + [d['name'] for d in groups[fn]] 56 | cols = ['uid'] + [d['field'] for d in groups[fn]] 57 | df = df[cols].rename(columns=dict(zip(cols, names))) 58 | res = df if res is None else pd.merge(res, df, on='uid') 59 | 60 | return res 61 | 62 | 63 | def read_results_with_summary (field, dtype=str, diagnostics=True): 64 | """ read a result field and join with summary """ 65 | # read results and join with summary 66 | smr = read_summary() 67 | results = read_results(field, dtype) 68 | col = app.schema[field]['field'] 69 | df = pd.merge(smr, results, on='uid') 70 | 71 | # convert data type, remove Inf and NA 72 | n_df = df.shape[0] 73 | df = remove_na(df, col, dtype) 74 | 75 | # print warning messages 76 | if diagnostics: 77 | total = smr.shape[0] 78 | n_failed = total - n_df 79 | n_na = n_df - df.shape[0] 80 | if n_failed > 0 or n_na > 0: 81 | print_warn(f'Data quality warning: out of {total} universes, ') 82 | if n_failed > 0: 83 | percent = round(n_failed / total * 100, 1) 84 | print_warn(f' * {n_failed} universes ({percent}%) failed to run') 85 | if n_na > 0: 86 | percent = round(n_na / total * 100, 1) 87 | print_warn(f' * {n_na} {field} ({percent}%) contains Inf or NaN value') 88 | 89 | return df 90 | 91 | 92 | def cluster_error (df): 93 | """ Cluster the error messages based on heuristics """ 94 | if df.shape[0] < 1: 95 | df['group'] = pd.Series(dtype=str) 96 | return df 97 | 98 | # regex 99 | pt_skip = r'^(there were|warning message)' 100 | pt_err = r'^error' 101 | 102 | # row-wise function 103 | def process_row (row): 104 | sentences = row['message'].split('\n') 105 | first = '' 106 | i = 0 107 | while i < len(sentences): 108 | # skip the rows with uninformative message, and group by the first row 109 | if re.search(pt_skip, sentences[i], flags=re.IGNORECASE) is None: 110 | first = sentences[i] 111 | break 112 | i += 1 113 | 114 | # look for 'error' if the exit code is non-zero 115 | if row['exit_code'] > 0: 116 | while i < len(sentences): # start from where we left off 117 | if re.search(pt_err, sentences[i], flags=re.IGNORECASE) is not None: 118 | first = sentences[i] 119 | break 120 | i += 1 121 | 122 | return first 123 | 124 | df['group'] = df.apply(process_row, axis=1) 125 | return df 126 | 127 | 128 | def sensitivity_f_test (df, col): 129 | """ Compute one-way F-test to estimate decision sensitivity """ 130 | # compute one-way F-test 131 | res = {d['var']: sensitivity.sensitivity_f(df, d['var'], d['options'], 132 | col) for d in app.decisions} 133 | 134 | # check NaN 135 | for d in res: 136 | if np.isnan(res[d]): 137 | print_fail('ERROR: cannot compute sensitivity') 138 | print(f'F-test returns NaN value for decision "{d}"') 139 | exit(1) 140 | 141 | return res 142 | 143 | 144 | def sensitivity_ks (df, col): 145 | """ compute Kolmogorov-Smirnov statistic """ 146 | return {d['var']: sensitivity.sensitivity_ks(df, d['var'], d['options'], 147 | col) for d in app.decisions} 148 | 149 | 150 | def sensitivity_ad (df, col): 151 | """ use k-samples Anderson-Darling test to compute sensitivity """ 152 | return {d['var']: sensitivity.sensitivity_ad(df, d['var'], d['options'], 153 | col)[0] for d in app.decisions} 154 | 155 | 156 | def cal_sensitivity(df=None): 157 | """ Compute sensitivity """ 158 | # read the prediction and join with summary 159 | if df is None: 160 | df = read_results_with_summary('point_estimate', dtype=float) 161 | col = app.schema['point_estimate']['field'] 162 | method = app.visualizer['sensitivity'] 163 | 164 | if method == 'f': 165 | # one-way F-test 166 | score = sensitivity_f_test(df, col) 167 | if method == 'ks': 168 | # Kolmogorov-Smirnov statistic 169 | score = sensitivity_ks(df, col) 170 | if method == 'ad': 171 | # k-samples Anderson-Darling test 172 | score = sensitivity_ad(df, col) 173 | 174 | return score 175 | -------------------------------------------------------------------------------- /client/src/components/monitor/ErrorMessageView.vue: -------------------------------------------------------------------------------- 1 | 33 | 34 | 148 | 149 | 171 | -------------------------------------------------------------------------------- /client/src/controllers/inference/infer_stacking_plot.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import {store, util} from '../config' 3 | import DotPlotScale from '../vis/dot_plot_scale' 4 | import _ from 'lodash' 5 | 6 | class InferStackingPlot { 7 | constructor () { 8 | this.outerWidth = 1050 9 | this.outerHeight = 250 10 | this.margin = { 11 | top: 2, 12 | right: 20, 13 | bottom: 30, 14 | left: 20 15 | } 16 | this.x_axis_label = 'Effect Size' 17 | this.label_font_size = 11 18 | this.smooth = true 19 | 20 | // internal 21 | this.svg = null 22 | this.scale = null 23 | } 24 | 25 | draw (parent, uncertainty, nul_dist) { 26 | // scale 27 | let scale_params = { 28 | 'outerWidth': this.outerWidth, 29 | 'outerHeight': this.outerHeight, 30 | 'margin': this.margin, 31 | 'x_field': 'diff' 32 | } 33 | 34 | // using a shared x range 35 | let scale = new DotPlotScale(store.x_range, scale_params) 36 | this.scale = scale 37 | 38 | // prepare the canvas 39 | this.svg = d3.select(parent) 40 | .append('svg') 41 | .attr('width', this.outerWidth) 42 | .attr('height', this.outerHeight) 43 | .append('g') 44 | .attr('transform', `translate(${this.margin.left},${this.margin.top})`) 45 | 46 | let obj = this.svg.append('svg') 47 | .classed('objects', true) 48 | .attr('width', scale.width()) 49 | .attr('height', scale.height()) 50 | 51 | // axis 52 | this.drawAxis() 53 | 54 | // densities 55 | this.smooth = store.configs.dataset !== 'hurricane' //fixme 56 | if (typeof nul_dist === 'number') { 57 | this.drawLine(obj, nul_dist) 58 | this.drawHist(obj, uncertainty, 'density-observed') 59 | } else if (this.smooth) { 60 | let den_unc = this.getDensity(uncertainty) 61 | let den_nul = this.getDensity(nul_dist) 62 | 63 | // shared y scale 64 | let ym = Math.max(d3.max(den_unc, (d) => d[1]), 65 | d3.max(den_nul, (d) => d[1])) 66 | let ys = d3.scaleLinear().domain([0, ym]) 67 | .range([0, scale.height()]) 68 | 69 | this.drawEnvelope(obj, den_unc, 'density-observed', ys) 70 | this.drawEnvelope(obj, den_nul, 'density-null', ys) 71 | } else { 72 | // figure out the ratio for shared y axis 73 | let u = _.find(uncertainty, (d) => d).length 74 | let n = _.find(nul_dist, (d) => d).length 75 | let ur = u >= n ? 1 : n / u 76 | let nr = n >= u ? 1 : u / n 77 | 78 | this.drawHist(obj, uncertainty, 'density-observed', ur) 79 | this.drawHist(obj, nul_dist, 'density-null', nr) 80 | } 81 | } 82 | 83 | drawLine (svg, effect) { 84 | // draw a line at the effect size 85 | let scale = this.scale 86 | svg.append('line') 87 | .attr('x1', scale.x(effect)) 88 | .attr('x2', scale.x(effect)) 89 | .attr('y1', 0) 90 | .attr('y2', scale.height()) 91 | .attr('stroke', '#e45756') 92 | } 93 | 94 | /** 95 | * Perform KDE and return the density 96 | */ 97 | getDensity (data) { 98 | let scale = this.scale 99 | let dp = _.flatten(_.map(data, (arr) => arr)) 100 | // remove null and outliers (outside 10x range) 101 | let dm = scale.x.domain() 102 | let xr = (dm[1] - dm[0]) * 10 103 | dp = _.filter(dp, (d) => d != null && d < dm[1] + xr && d > dm[0] - xr) 104 | 105 | return util.kde_smart(dp, 0.5) 106 | } 107 | 108 | /** 109 | * Draw the density from KDE as an envelope 110 | */ 111 | drawEnvelope (svg, density, cls, y_scale) { 112 | let scale = this.scale 113 | 114 | // area 115 | let area = d3.area() 116 | .x((d) => scale.x(d[0])) 117 | .y0(scale.height()) 118 | .y1((d) => scale.height() - y_scale(d[1])) 119 | 120 | // plot 121 | svg.append('path') 122 | .attr('class', cls) 123 | .datum(density) 124 | .attr('d', area) 125 | } 126 | 127 | stackHist (data) { 128 | let scale = this.scale 129 | 130 | let dm = scale.x.domain() 131 | let step = (dm[1] - dm[0]) / (scale.width() / 2) 132 | let bins = _.range(dm[0], dm[1], step) 133 | 134 | let res = _.zipObject(bins, _.map(bins, () => 0)) 135 | _.each(data, (arr) => { 136 | if (!arr) { 137 | return // continue 138 | } 139 | let hist = d3.histogram().domain(scale.x.domain()) 140 | .thresholds(bins)(arr) 141 | let w = arr.weight 142 | w = w == null ? 1 : w 143 | _.each(hist, (d) => { 144 | res[d.x1] += d.length * w 145 | }) 146 | }) 147 | res = _.map(res, (val, key) => { 148 | return {x: key, y: val} 149 | }) 150 | res = _.filter(res, (d) => !_.isNaN(d.y)) 151 | return res 152 | } 153 | 154 | /** 155 | * Draw the envelope as a histogram 156 | */ 157 | drawHist (svg, data, cls, ratio = 1) { 158 | let scale = this.scale 159 | let hist = this.stackHist(data) 160 | 161 | // y scale 162 | let ym = d3.max(hist, (d) => d.y) * ratio 163 | let ys = d3.scaleLinear().domain([0, ym]) 164 | .range([0, scale.height()]) 165 | 166 | // area 167 | let area = d3.area() 168 | .x((d) => scale.x(d.x)) 169 | .y0(scale.height()) 170 | .y1((d) => scale.height() - ys(d.y)) 171 | 172 | // plot 173 | svg.append('path') 174 | .attr('class', cls) 175 | .datum(hist) 176 | .attr('d', area) 177 | } 178 | 179 | drawAxis () { 180 | let scale = this.scale 181 | let xAxis = d3.axisBottom(scale.x).tickSize(-scale.height()) 182 | .ticks(Math.round(scale.width() / 30)) 183 | 184 | this.svg.append("g") 185 | .classed("x axis muted", true) 186 | .attr('transform', `translate(0,${scale.height()})`) 187 | .call(xAxis) 188 | .call(g => g.selectAll('.tick line') 189 | .attr('stroke-opacity', 0.1) 190 | .attr('stroke-dasharray', '2, 2')) 191 | .call(g => g.selectAll('.domain') 192 | .attr('d', `M0.5,0H${scale.width()}`)) 193 | 194 | let th = scale.height() + this.label_font_size * 2 + 3 195 | this.svg.append('text') 196 | .classed('axis-label muted', true) 197 | .attr('transform', `translate(${scale.width() / 2}, ${th})`) 198 | .style('text-anchor', 'middle') 199 | .style('font-size', this.label_font_size) 200 | .text(this.x_axis_label) 201 | } 202 | } 203 | 204 | export default InferStackingPlot 205 | -------------------------------------------------------------------------------- /client/src/archetype_vis/forest_plot.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import _ from 'lodash' 3 | import {SCHEMA} from '../controllers/constants' 4 | 5 | const SIGN = 0 6 | 7 | class ForestPlot { 8 | constructor () { 9 | this.outerWidth = 680 10 | this.outerHeight = 0 11 | this.margin = { 12 | top: 30, 13 | right: 15, 14 | bottom: 60, 15 | left: 300 16 | } 17 | 18 | this.line_height = 18 19 | this.omit_index = 3 20 | this.x_label = 'Regression Coefficient' 21 | 22 | this.CI_tick_length = 8 23 | this.dot_radius = 3 24 | } 25 | 26 | wrangle (data, cutoff) { 27 | let res = [] 28 | let omitted = Boolean(data.length - cutoff * 2) 29 | 30 | if (omitted) { 31 | for (let i = 0; i < cutoff; i++) { 32 | res.push(data[i]) 33 | } 34 | for (let i = data.length - cutoff; i < data.length; i++) { 35 | res.push(data[i]) 36 | } 37 | } else { 38 | res = data 39 | } 40 | 41 | res = _.map(res, (d, i) => { 42 | return { 43 | 'uid': d.uid, 44 | '_y': omitted ? (i < cutoff ? i : i + this.omit_index) : i, 45 | 'dot': d[SCHEMA.POINT], 46 | 'upper': d[SCHEMA.POINT] + d[SCHEMA.STDERR] * 1.96, 47 | 'lower': d[SCHEMA.POINT] - d[SCHEMA.STDERR] * 1.96, 48 | } 49 | }) 50 | return res 51 | } 52 | 53 | draw (parent, data, cutoff) { 54 | let omitted = data.length - cutoff * 2 55 | let height = this.line_height * (cutoff * 2 + (omitted ? this.omit_index : 0)) 56 | let width = this.outerWidth - this.margin.left - this.margin.right 57 | this.outerHeight = height + this.margin.top + this.margin.bottom 58 | data = this.wrangle(data, cutoff) 59 | 60 | // prepare the canvas 61 | let raw = d3.select(parent) 62 | .append('svg') 63 | .attr('width', this.outerWidth) 64 | .attr('height', this.outerHeight) 65 | let svg = raw.append('g') 66 | .attr('transform', `translate(${this.margin.left},${this.margin.top})`) 67 | 68 | // scale 69 | let xs = d3.scaleLinear() 70 | .domain([d3.min(data, (d) => d.lower) * 1.1, d3.max(data, (d) => d.upper) * 1.1]) 71 | .range([0, width]) 72 | 73 | // dots 74 | svg.selectAll('.forest-dot') 75 | .data(data) 76 | .enter() 77 | .append('circle') 78 | .classed('forest-dot', true) 79 | .attr('cx', (d) => xs(d.dot)) 80 | .attr('cy', (d) => this.line_height * d['_y']) 81 | .attr('r', this.dot_radius) 82 | 83 | // confidence intervals 84 | let l = this.CI_tick_length 85 | svg.selectAll('.forest-ci') 86 | .data(data) 87 | .enter() 88 | .append('path') 89 | .classed('forest-ci', true) 90 | .attr('d', (d) => { 91 | return `M${xs(d.lower)},${this.line_height * d._y-l/2}` + 92 | `v${l}v-${l/2}H${xs(d.upper)}v${l/2}v-${l}` 93 | }) 94 | .attr('stroke', '#000') 95 | .attr('fill', 'none') 96 | 97 | // axes 98 | let x_axis = svg.append('g') 99 | .attr('transform', `translate(0,${height})`) 100 | .call(d3.axisBottom(xs).ticks(5)) 101 | x_axis.select('.domain') 102 | .remove() 103 | svg.append('path') 104 | .attr('d', `M0,-${this.margin.top}V${height}h${width}`) 105 | .attr('stroke', '#000') 106 | .attr('fill', 'none') 107 | 108 | // dashed line 109 | svg.append('path') 110 | .attr('d', `M${xs(SIGN)},-${this.margin.top}V${height}`) 111 | .attr('stroke', '#666') 112 | .attr('stroke-dasharray', '6 3') 113 | 114 | // axis label 115 | svg.append('text') 116 | .attr('y', height + 38) 117 | .attr('x', width / 2) 118 | .style('text-anchor', 'middle') 119 | .text(this.x_label) 120 | 121 | // table on the left 122 | // fixme: table header & content are hardcoded 123 | let margin_left = 15 124 | let header_height = this.margin.top - this.CI_tick_length/2 125 | let table = raw.append('g') 126 | .attr('transform', `translate(${margin_left},${header_height})`) 127 | let x0 = 5 128 | let x1 = 60 129 | let x2 = 200 130 | let fs = this.line_height - 4 131 | let rows = table.selectAll('.forest-row') 132 | .data(data) 133 | .enter() 134 | .append('g') 135 | .classed('forest-text', true) 136 | 137 | rows.append('text') 138 | .attr('x', x0) 139 | .attr('y', (d) => this.line_height * d['_y']) 140 | .style('alignment-baseline', 'hanging') 141 | .style('font-size', fs) 142 | .text((d) => d.uid) 143 | 144 | rows.append('text') 145 | .attr('x', x1) 146 | .attr('y', (d) => this.line_height * d['_y']) 147 | .style('alignment-baseline', 'hanging') 148 | .style('font-size', fs) 149 | .text('Linear Regression') 150 | 151 | rows.append('text') 152 | .attr('x', x2) 153 | .attr('y', (d) => this.line_height * d['_y']) 154 | .style('alignment-baseline', 'hanging') 155 | .style('font-size', fs) 156 | .text((d) => d.dot.toFixed(2)) 157 | 158 | // table header 159 | let padding = 5 160 | table.append('path') 161 | .attr('d', `M0,-${padding}H${this.margin.left - margin_left - 10}`) 162 | .attr('stroke', '#000') 163 | table.append('path') 164 | .attr('d', `M0,-${header_height - 0.5}H${this.margin.left - margin_left - 10}`) 165 | .attr('stroke', '#000') 166 | table.append('text') 167 | .attr('x', x0) 168 | .attr('y', -padding * 2) 169 | .style('alignment-baseline', 'baseline') 170 | .style('font-size', fs) 171 | .text('ID') 172 | table.append('text') 173 | .attr('x', x1) 174 | .attr('y', -padding * 2) 175 | .style('alignment-baseline', 'baseline') 176 | .style('font-size', fs) 177 | .text('Analytic Approach') 178 | table.append('text') 179 | .attr('x', x2) 180 | .attr('y', -padding * 2) 181 | .style('alignment-baseline', 'baseline') 182 | .style('font-size', fs) 183 | .text('Coefficient') 184 | 185 | // omitted 186 | if (omitted) { 187 | table.append('text') 188 | .attr('x', x1) 189 | .attr('y', this.line_height * 11) 190 | .style('alignment-baseline', 'hanging') 191 | .style('font-size', fs) 192 | .attr('fill', '#666') 193 | .text(` ... ${omitted} more universes`) 194 | } 195 | } 196 | } 197 | 198 | export default ForestPlot 199 | -------------------------------------------------------------------------------- /client/src/controllers/raw_plot.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import RawScale from './vis/raw_scale' 3 | import {bus, util} from './config' 4 | import _ from 'lodash' 5 | 6 | class RawPlot { 7 | constructor () { 8 | this.outerWidth = 300 9 | this.outerHeight = 120 10 | this.margin = { 11 | top: 0, 12 | right: 2, 13 | bottom: 20, 14 | left: 2 15 | } 16 | this.dot_radius = 4 17 | this.dot_opacity = 0.3 18 | this.title = '' 19 | this.x_axis_label = '' 20 | this.label_font_size = 11 21 | 22 | // assigned when calling draw 23 | this.parent = '' 24 | this.data = [] 25 | 26 | // components 27 | this.scale = null 28 | } 29 | 30 | draw (parent, data, range) { 31 | this.parent = parent 32 | this.data = data 33 | 34 | let outerWidth = this.outerWidth 35 | let outerHeight = this.outerHeight 36 | let margin = this.margin 37 | 38 | // make space for labels 39 | margin.bottom += (this.x_axis_label || this.title) ? this.label_font_size : 0 40 | 41 | // scale 42 | let scale_params = { 43 | 'outerWidth': this.outerWidth, 44 | 'outerHeight': this.outerHeight, 45 | 'margin': this.margin 46 | } 47 | 48 | let scale = new RawScale(range, scale_params) 49 | this.scale = scale 50 | 51 | // prepare the canvas 52 | let svg = d3.select(parent) 53 | .append('svg') 54 | .attr('width', outerWidth) 55 | .attr('height', outerHeight) 56 | .append('g') 57 | .attr('transform', `translate(${margin.left},${margin.top})`) 58 | 59 | // axis 60 | this._drawAxis(svg) 61 | 62 | // title and labels 63 | this._drawTitles(svg) 64 | 65 | let objects = svg.append('svg') 66 | .classed('objects', true) 67 | .attr('width', scale.width()) 68 | .attr('height', scale.height()) 69 | 70 | // first draw the actual data as dots 71 | this._drawDots(objects, data.actual, scale.y('actual'), 'raw-dot') 72 | this._drawViolin(objects, data.actual, scale.y('actual')) 73 | 74 | // then draw the predicted data as violin plots 75 | this._drawDots(objects, data.pred, scale.y('pred'), 'pred-dot') 76 | this._drawViolin(objects, data.pred, scale.y('pred')) 77 | } 78 | 79 | _drawDots (svg, data, y0, cls) { 80 | let scale = this.scale 81 | let bin_size = this.dot_radius * 2 // x-axis bin size 82 | 83 | // sort 84 | data = _.map(data, (d, i) => { 85 | return {value: d, _index: i} 86 | }) 87 | let sorted = _.sortBy(data, (d) => d.value) 88 | 89 | // dot density algorithm 90 | let i = 0 91 | let x = null 92 | let count = 0 93 | while (i < sorted.length) { 94 | let xi = sorted[i].value 95 | if (x != null && scale.x(xi) < scale.x(x) + bin_size) { 96 | count += 1 97 | } else { 98 | x = xi 99 | count = 0 100 | } 101 | 102 | let idx =sorted[i]._index 103 | data[idx]._x = x 104 | data[idx]._y = count 105 | 106 | i += 1 107 | } 108 | 109 | // compute y based on counts 110 | let h = scale.y.bandwidth() / 2 111 | let step = Math.min(h / (d3.max(data, (d) => d._y) + 1), 112 | this.dot_radius) 113 | _.each(data, (d) => { 114 | let delta = d._y * step + step * 0.5 115 | d._y = d._y % 2 ? y0 + h + delta : y0 + h - delta 116 | }) 117 | 118 | svg.selectAll('.' + cls) 119 | .data(data) 120 | .enter() 121 | .append('circle') 122 | .classed(cls, true) 123 | .attr('r', () => this.dot_radius) 124 | .attr('cx', (d) => scale.x(d._x)) 125 | .attr('cy', (d) => d._y) 126 | .attr('fill-opacity', this.dot_opacity) 127 | } 128 | 129 | _drawViolin (svg, data, y0) { 130 | let scale = this.scale 131 | 132 | // compute kernel density estimation 133 | let density = util.kde_smart(data) 134 | 135 | // scale 136 | let h = scale.y.bandwidth() / 2 137 | let ys = d3.scaleLinear().range([y0 + h, y0]) 138 | .domain([0, 1.05 * d3.max(_.map(density, (d) => d[1]))]) 139 | 140 | // line 141 | let line = d3.line().curve(d3.curveBasis) 142 | .x((d) => scale.x(d[0])) 143 | .y((d) => ys(d[1])) 144 | 145 | // plot the upper curve 146 | svg.append('path') 147 | .attr('class', 'violin-curve') 148 | .datum(density) 149 | .attr('d', line) 150 | 151 | // plot the lower curve 152 | ys.range([y0 + h, y0 + h * 2]) 153 | line = d3.line().curve(d3.curveBasis) 154 | .x((d) => scale.x(d[0])) 155 | .y((d) => ys(d[1])) 156 | 157 | svg.append('path') 158 | .attr('class', 'violin-curve') 159 | .datum(density) 160 | .attr('d', line) 161 | } 162 | 163 | _drawAxis (svg) { 164 | let scale = this.scale 165 | let xAxis = d3.axisBottom(scale.x).tickSize(-scale.height()) 166 | 167 | // X Axis 168 | svg.append("g") 169 | .classed("x axis muted", true) 170 | .attr('transform', `translate(0,${scale.height()})`) 171 | .call(xAxis) 172 | .call(g => g.selectAll('.tick line') 173 | .remove()) 174 | .call(g => g.selectAll('.domain') 175 | .attr('d', `M0.5,0H${scale.width()}`)) 176 | } 177 | 178 | _drawTitles (svg) { 179 | let scale = this.scale 180 | let th = scale.height() + this.label_font_size * 2 + 3 181 | 182 | // title 183 | if (this.title) { 184 | svg.append('text') 185 | .attr('transform', `translate(0, ${th})`) 186 | .style('font-size', this.label_font_size) 187 | .style('font-weight', '700') 188 | .text(this.title) 189 | } 190 | 191 | // x-axis label 192 | if (this.x_axis_label) { 193 | svg.append('text') 194 | .classed('axis-label muted', true) 195 | .attr('transform', `translate(${scale.width()}, ${th})`) 196 | .style('text-anchor', 'end') 197 | .style('font-size', this.label_font_size) 198 | .text(this.x_axis_label) 199 | } 200 | 201 | // overlay for event 202 | svg.append('rect') 203 | .attr('x', 0) 204 | .attr('y', th - this.label_font_size) 205 | .attr('width', scale.width() / 3) 206 | .attr('height', this.label_font_size) 207 | .attr('fill', 'none') 208 | .attr('pointer-events', 'all') 209 | .on('mouseover', mouseover) 210 | .on('mouseout', mouseout) 211 | 212 | // event 213 | let data = this.data 214 | function mouseover() { 215 | bus.$emit('raw.mouseover', 216 | {uid: data.uid, x: d3.event.clientX, y: d3.event.clientY}) 217 | } 218 | 219 | function mouseout() { 220 | bus.$emit('raw.mouseout') 221 | } 222 | } 223 | } 224 | 225 | export default RawPlot 226 | -------------------------------------------------------------------------------- /client/src/archetype_vis/spec_curve_plot.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import _ from 'lodash' 3 | import {store, tableau10} from '../controllers/config' 4 | 5 | class SpecCurvePlot { 6 | constructor () { 7 | this.outerWidth = 1050 8 | this.upperHeight = 130 9 | this.lowerHeight = 0 // will be computed 10 | this.margin = { 11 | top: 0, 12 | right: 50, 13 | bottom: 50, 14 | left: 250, 15 | inner: 15 16 | } 17 | this.dot_radius = 1.4 18 | this.row_height = 12 19 | this.axis_label = 'Effect Size' 20 | 21 | // assigned in calling draw 22 | this.parent = '' 23 | this.data = [] 24 | 25 | // components 26 | this.svg = null 27 | } 28 | 29 | getLowerHeight () { 30 | let h = 0 31 | _.each(store.decisions, (dec) => { 32 | h += (dec.length + 2) * this.row_height 33 | }) 34 | 35 | return h + this.margin.top + this.margin.bottom + 36 | 2 * this.margin.inner - this.row_height 37 | } 38 | 39 | draw (parent, data) { 40 | this.parent = parent 41 | this.data = data 42 | let l = data.length 43 | this.lowerHeight = this.getLowerHeight() 44 | 45 | // prepare the canvas 46 | let raw = d3.select(parent) 47 | .append('svg') 48 | .attr('width', this.outerWidth) 49 | .attr('height', this.upperHeight + this.lowerHeight) 50 | this.svg = raw.append('g') 51 | .attr('transform', `translate(${this.margin.left},${this.margin.top})`) 52 | 53 | // upper plot 54 | let width = this.outerWidth - this.margin.left - this.margin.right 55 | let height = this.upperHeight - this.margin.top 56 | let upper = this.svg.append('svg') 57 | .classed('upper', true) 58 | .attr('width', width) 59 | .attr('height', height) 60 | 61 | // scales 62 | let scale_x = d3.scaleLinear().range([this.margin.inner, width - this.margin.inner]) 63 | .domain([0, l]) 64 | let scale_y = d3.scaleLinear().range([height - 5, 5]) 65 | .domain([data[0].diff, data[l - 1].diff]) 66 | 67 | // the green background 68 | upper.append('rect') 69 | .attr('width', width) 70 | .attr('height', height) 71 | .attr('fill', '#C0DCC0') 72 | 73 | // draw CIs (if applicable) 74 | let ds = _.filter(data, (d) => d.upper != null) 75 | if (ds.length) { 76 | let area = d3.area() 77 | .x((d) => scale_x(d.i)) 78 | .y0((d) => scale_y(d.lower)) 79 | .y1((d) => scale_y(d.upper)) 80 | upper.append('path') 81 | .datum(ds) 82 | .classed('spec-curve-envelope', true) 83 | .attr('d', area) 84 | } 85 | 86 | // draw dots 87 | upper.selectAll('.curve-dot') 88 | .data(data) 89 | .enter() 90 | .append('circle') 91 | .classed('curve-dot', true) 92 | .attr('r', () => this.dot_radius) 93 | .attr('cx', (d) => scale_x(d.i)) 94 | .attr('cy', (d) => scale_y(d.diff)) 95 | 96 | // the horizontal line at 0 97 | if (scale_y.domain()[0] * scale_y.domain()[1] < 0) { 98 | upper.append('path') 99 | .attr('d', `M0,${scale_y(0)}H${width}`) 100 | .attr('stroke', '#6c757d') 101 | .attr('shape-rendering', 'crispEdges') 102 | } 103 | 104 | // y axis 105 | let yaxis = d3.axisRight(scale_y).tickSize(-width) 106 | .ticks(3) 107 | let axis_container = this.svg.append("g") 108 | .classed("y axis muted", true) 109 | .attr('transform', `translate(${width},0)`) 110 | axis_container 111 | .call(yaxis) 112 | .call(g => g.selectAll('.tick line') 113 | .attr('stroke-opacity', 0.1) 114 | .attr('stroke-dasharray', '2, 2')) 115 | .call(g => g.selectAll('.domain') 116 | .attr('d', `M0.5,${height} V0.5`)) 117 | 118 | // y axis label 119 | axis_container.append('text') 120 | .text(this.axis_label) 121 | .attr('x', 35) 122 | .attr('y', height / 2) 123 | .attr('text-anchor', 'middle') 124 | .attr('transform', `rotate(270, 35, ${height / 2})`) 125 | .classed('spec-curve-axis-title', true) 126 | 127 | // lower plot 128 | height = this.lowerHeight - this.margin.top - this.margin.bottom 129 | let lower = this.svg.append('g') 130 | .attr('transform', `translate(0,${this.upperHeight + this.margin.top})`) 131 | .append('svg') 132 | .classed('lower', true) 133 | .attr('width', width) 134 | .attr('height', height) 135 | // the white background 136 | lower.append('rect') 137 | .attr('width', width) 138 | .attr('height', height) 139 | .attr('fill', '#fff') 140 | // canvas for the decision labels 141 | let labels = raw.append('g') 142 | .attr('transform', `translate(0,${this.upperHeight + this.margin.top + this.margin.inner})`) 143 | .append('svg') 144 | .attr('width', this.margin.left) 145 | .attr('height', height) 146 | 147 | let h0 = 0 148 | let i = 0 149 | _.each(store.decisions, (opts, dec) => { 150 | // dots 151 | let cl = 'dec-dot-' + i 152 | let color = '#' + tableau10.substr(i * 6, 6) 153 | lower.selectAll('.' + cl) 154 | .data(data) 155 | .enter() 156 | .append('circle') 157 | .classed(cl, true) 158 | .attr('r', () => this.dot_radius) 159 | .attr('cx', (d) => scale_x(d.i)) 160 | .attr('cy', (d) => { 161 | let idx = _.indexOf(opts, d[dec]) 162 | return h0 + (1.5 + idx) * this.row_height + this.margin.inner 163 | }) 164 | .attr('fill', color) 165 | 166 | // labels 167 | let ls = _.concat([dec], opts) 168 | _.each(ls, (label, idx) => { 169 | labels.append('text') 170 | .attr('x', this.margin.left - 10) 171 | .attr('y', h0 + (idx + 0.5) * this.row_height) 172 | .attr('text-anchor', 'end') 173 | .attr('dominant-baseline', 'middle') 174 | .style('font-size', () => idx ? this.row_height - 3 : this.row_height) 175 | .style('font-weight', () => idx ? 'normal' : 'bold') 176 | .text(label) 177 | }) 178 | 179 | // increment 180 | h0 += (opts.length + 2) * this.row_height 181 | i += 1 182 | }) 183 | 184 | // bottom axis 185 | height = this.upperHeight + this.lowerHeight - this.margin.bottom 186 | let xaxis = d3.axisBottom(scale_x).tickSize(5) 187 | .ticks(l / 50) 188 | axis_container = this.svg.append("g") 189 | .classed("x axis muted", true) 190 | .attr('transform', `translate(0, ${height})`) 191 | axis_container 192 | .call(xaxis) 193 | .call(g => g.selectAll('.tick') 194 | .filter((d, i) => i > 1 && i < l/50-1) 195 | .remove()) 196 | 197 | // bottom axis label 198 | axis_container 199 | .append('text') 200 | .text('Specification #') 201 | .attr('x', width / 2) 202 | .attr('y', 30) 203 | .attr('text-anchor', 'middle') 204 | .classed('spec-curve-axis-title', true) 205 | } 206 | } 207 | 208 | export default SpecCurvePlot 209 | -------------------------------------------------------------------------------- /client/src/components/monitor/ProgressCard.vue: -------------------------------------------------------------------------------- 1 | 39 | 40 | 186 | 187 | 200 | -------------------------------------------------------------------------------- /bobaserver/bobastats/sampling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import itertools 4 | from sklearn.linear_model import LinearRegression 5 | from .bootstrap import bootstrap 6 | from .sensitivity import ad_wrapper 7 | 8 | 9 | def round_robin (df, n=50): 10 | """ 11 | Round robin baseline (similar to stratified sampling). 12 | """ 13 | df_snapshot = df 14 | df = df.copy() 15 | decs = list(df.columns) 16 | 17 | # weights 18 | weights = round_robin_weights(df) 19 | 20 | indices = [] 21 | while len(indices) < n and df.shape[0] > 0: 22 | # in each round, go over each decision 23 | for dec in decs: 24 | gp = df.groupby(dec) 25 | # go over each option 26 | for opt in gp.groups.keys(): 27 | # draw an index from all rows of the option 28 | row_ids = gp.groups[opt] 29 | drawn_id = row_ids[np.random.choice(len(row_ids))] 30 | indices.append(drawn_id) 31 | 32 | # remove from df so we are sampling without replacement 33 | df = df.drop(index=drawn_id) 34 | 35 | # convert to position-based index (iloc) 36 | indices = df_snapshot.index.get_indexer(indices) 37 | return indices, weights 38 | 39 | 40 | def uniform (df, n=50): 41 | """ 42 | Sample universes uniformly and return the sketching indices 43 | """ 44 | indices = np.random.choice(df.shape[0], n, False) 45 | return indices, None 46 | 47 | 48 | def sketching (df, n=50, interact=False): 49 | """ 50 | Use a random sampling-based sketching algorithm without rescaling; 51 | It is algorithm 2 on page 8. Return the normalized leverage scores 52 | and the sketching indices. 53 | """ 54 | # one hot encoding 55 | X = one_hot_encode(df, interact=interact) 56 | 57 | # calculate leverage scores 58 | U, s, V = np.linalg.svd(X, full_matrices=False) 59 | l = np.dot(U, np.transpose(U)).diagonal() 60 | 61 | # get distribution and sample 62 | p = X.shape[1] 63 | dist = l / p 64 | indices = np.random.choice(X.shape[0], n, False, dist) 65 | 66 | return indices, dist 67 | 68 | 69 | def round_robin_weights (df): 70 | """ 71 | Compute the probability of drawing each universe in the first round of round 72 | robin. The probability is normalized (the sum of all universes is 1). 73 | 74 | Returns: a numpy array with the probability of drawing the universe where the 75 | index matches the index in the input df. 76 | """ 77 | # a list of decisions 78 | decs = df.columns.tolist() 79 | df = df.copy().fillna('') 80 | df['dummy'] = 0 81 | 82 | # construct a lookup table for marginal probabilities 83 | lookup = {} 84 | for d in decs: 85 | gp = df.groupby(d) 86 | options = gp.groups.keys() 87 | lookup[d] = {opt: 1 / gp.count()['dummy'].loc[opt] for opt in options} 88 | 89 | # probability for a universe to be drawn in the first round 90 | def compute_prob (row): 91 | marginal = [lookup[d][row[d]] for d in decs] 92 | p = 0 93 | for i in range(1, len(decs)): 94 | v = np.sum([np.prod(c) for c in itertools.combinations(marginal, i)]) 95 | p += v if i % 2 else (-v) 96 | return p 97 | 98 | weights = df.apply(compute_prob, axis=1).to_numpy() 99 | weights = weights / np.sum(weights) # normalize 100 | return weights 101 | 102 | 103 | def get_outcome_mean (y, indices, weights=None, ignore_na=True): 104 | """ 105 | Estimate outcome mean from sample. 106 | - weights: likelihood ratio f(x)/g(x) for importance sampling 107 | """ 108 | arr = y[indices] 109 | if weights is not None: 110 | arr = weights[indices] * arr 111 | if ignore_na: 112 | arr = arr[~np.isnan(arr)] 113 | 114 | return np.mean(arr) 115 | 116 | 117 | def bootstrap_outcome (df, COL, indices, weights=None, ignore_na=True): 118 | """ 119 | Given a sample, compute the bootstrapped CI around outcome mean. 120 | 121 | Parameters: 122 | - df: multiverse dataframe 123 | - COL: the column in df 124 | - indices: sample index into the multiverse df 125 | - weights: importance sampling weights, if applicable 126 | """ 127 | y = df[COL].to_numpy() 128 | mean = get_outcome_mean(y, indices, weights) 129 | 130 | # we will pass the index array to bootstrap, so here we adjust the func API 131 | stat = lambda idx, w: get_outcome_mean(y, idx, w) 132 | 133 | # bootstrap 134 | bs = bootstrap(stat, ci_type='percentile', n=200) 135 | bs.fit(indices, weights) # sample uniformly, weighted mean 136 | lower, upper = bs.get_ci() 137 | 138 | return [mean, lower, upper] 139 | 140 | 141 | def bootstrap_sensitivity (df, COL, indices, decs=None): 142 | """ Sensitivity and bootstrapped CI for all decisions. """ 143 | if decs is None: 144 | # assuming all columns except "outcome" is a decision 145 | decs = list(df.columns) 146 | decs.remove(COL) 147 | 148 | # prep work 149 | out = [] 150 | header = ['decision', 'score', 'p', 'score_lower', 'score_upper'] 151 | # our bootstrap statisitc is the AD score of a decision 152 | stat = lambda idx, d: ad_wrapper(df.iloc[idx], d, COL)[0] 153 | 154 | # loop over all decisions 155 | for d in decs: 156 | # sample stats 157 | score, pval = ad_wrapper(df.iloc[indices], d, COL) 158 | row = [d, score, pval] 159 | 160 | # bootstrap 161 | if not np.isnan(score): 162 | bs = bootstrap(stat, ci_type='bc', n=200) 163 | bs.fit(indices, d) 164 | lower, upper = bs.get_ci() 165 | row += [lower, upper] 166 | 167 | # pad with NaN if we did not bootstrap 168 | row += [np.nan] * (len(header) - len(row)) 169 | out.append(row) 170 | 171 | return pd.DataFrame(out, columns = header) 172 | 173 | 174 | def one_hot_encode (df, interact=False): 175 | """ 176 | Convert data into dummy coding for linear regression. 177 | Parameter: 178 | - interact: whether to add all possible two-way interactions 179 | """ 180 | # a list of decisions 181 | decisions = df.columns.tolist() 182 | 183 | # helper function to create interaction terms 184 | REF_LEVEL = 'reference_level' 185 | ref = df.iloc[0] 186 | def add_interaction (row): 187 | # first manually replace reference level 188 | for dec in decisions: 189 | row[dec] = REF_LEVEL if row[dec] == ref[dec] else row[dec] 190 | 191 | # now add all two-way interactions 192 | for i in range(0, len(decisions)): 193 | for j in range(i + 1, len(decisions)): 194 | d1 = decisions[i] 195 | d2 = decisions[j] 196 | val = REF_LEVEL if (row[d1] == REF_LEVEL or row[d2] == REF_LEVEL) \ 197 | else f'{row[d1]}_{row[d2]}' 198 | row[f'{d1}_{d2}'] = val 199 | 200 | return row 201 | 202 | # one-hot encoding 203 | if not interact: 204 | df = pd.get_dummies(df, columns = decisions, drop_first=True) 205 | else: 206 | df = df.apply(add_interaction, axis=1) 207 | cols = df.columns.tolist() 208 | df = pd.get_dummies(df, columns=cols) 209 | # now manually remove any columns with ref level 210 | cols = df.columns.tolist() 211 | cols = [c for c in cols if REF_LEVEL in c] 212 | df = df.drop(columns=cols, axis=1) 213 | 214 | # get X 215 | X = df.to_numpy() 216 | return X 217 | -------------------------------------------------------------------------------- /client/src/pages/MainPage.vue: -------------------------------------------------------------------------------- 1 | 89 | 90 | 169 | 170 | 215 | -------------------------------------------------------------------------------- /client/src/controllers/monitor/outcome_progress_plot.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3' 2 | import _ from 'lodash' 3 | 4 | class OutcomeProgressPlot { 5 | constructor (parent) { 6 | this.outerWidth = 0 // will be set by caller 7 | this.outerHeight = 0 // will be set by caller 8 | this.margin = { 9 | top: 5, 10 | right: 0, 11 | bottom: 6, 12 | left: 20 13 | } 14 | this.parent = parent 15 | 16 | // style 17 | this.x_label = 'Progress' 18 | this.label_font_size = 10 19 | this.show_x_label = true 20 | 21 | // axis range 22 | this.x_max = null // must be set by the caller 23 | 24 | // internal 25 | this.svg = null 26 | this.xs = null // x scale 27 | this.ys = null // y scale 28 | this.trans = null // shared transition 29 | } 30 | 31 | draw (data) { 32 | // prepare the canvas 33 | this.svg = d3.select(this.parent) 34 | .append('svg') 35 | .attr('width', this.outerWidth) 36 | .attr('height', this.outerHeight) 37 | .append('g') 38 | .attr('transform', `translate(${this.margin.left},${this.margin.top})`) 39 | 40 | // scale 41 | this._setScale(data) 42 | 43 | // shared transition config, for synchronization 44 | this.trans = d3.transition() 45 | .duration(1000) 46 | .ease(d3.easeLinear) 47 | 48 | // draw y axis 49 | this.y_axis = this.svg.append('g') 50 | .attr('transform', 'translate(0, 0)') 51 | .classed('y axis muted', true) 52 | this._drawYAxis() 53 | 54 | // x axis label 55 | if (this.show_x_label) { 56 | let height = this.ys.range()[0] 57 | let width = this.xs.range()[1] 58 | this.svg.append('text') 59 | .classed('axis-label muted', true) 60 | .attr('transform', `translate(${width / 2}, ${height + 3})`) 61 | .style('text-anchor', 'middle') 62 | .style('font-size', this.label_font_size) 63 | .text(this.x_label) 64 | } 65 | this._drawXAxis() 66 | 67 | // draw mean line and CI band 68 | this.svg.append('g').classed('actual-plot', true) 69 | this._drawLineAndCI(data) 70 | 71 | // legend 72 | let width = this.xs.range()[1] 73 | let legend_width = 70 74 | let symbol_width = 20 75 | let legend_padding = 5 76 | let box = this.svg.append('g') 77 | .classed('legend-container', true) 78 | .attr('transform', `translate(${width - legend_width},${-this.margin.top})`) 79 | box.append('rect') 80 | .attr('x', 0) 81 | .attr('y', 0) 82 | .attr('height', this.label_font_size * 4) 83 | .attr('width', legend_width) 84 | .attr('fill', '#fff') 85 | box.append('rect') 86 | .classed('outcome-CI', true) 87 | .attr('x', legend_padding) 88 | .attr('y', legend_padding) 89 | .attr('height', this.label_font_size) 90 | .attr('width', symbol_width) 91 | box.append('text') 92 | .classed('axis-label muted', true) 93 | .attr('x', symbol_width + legend_padding + 5) 94 | .attr('y', legend_padding + 8) 95 | .style('font-size', this.label_font_size) 96 | .text('95% CI') 97 | let y_start = legend_padding + 5 + this.label_font_size * 1.5 - 1 98 | box.append('path') 99 | .classed('outcome-mean', true) 100 | .attr('d', `M${legend_padding},${y_start}h${symbol_width}`) 101 | box.append('text') 102 | .classed('axis-label muted', true) 103 | .attr('x', symbol_width + legend_padding + 5) 104 | .attr('y', legend_padding + 22) 105 | .style('font-size', this.label_font_size) 106 | .text('Mean') 107 | } 108 | 109 | clear () { 110 | if (this.svg) { 111 | d3.select(this.parent).selectAll('*').remove() 112 | } 113 | this.svg = null 114 | } 115 | 116 | update (data) { 117 | if (!data || data.length < 2) { 118 | // the data has been cleared 119 | this.clear() 120 | } else if (!this.svg) { 121 | // the chart has been removed 122 | this.draw(data) 123 | } else { 124 | // update the current chart 125 | this._setScale(data) 126 | this._drawYAxis() 127 | this._drawXAxis(true) 128 | this._drawLineAndCI(data, true) 129 | } 130 | } 131 | 132 | _setScale (data) { 133 | let height = this.outerHeight - this.margin.top - this.margin.bottom 134 | let width = this.outerWidth - this.margin.left - this.margin.right 135 | 136 | let xmax = d3.max(data, (d) => d['n_samples']) / this.x_max 137 | xmax = xmax > 0.6 ? 1 : (xmax > 0.4 ? 0.75 : (xmax > 0.2 ? 0.5 : 0.25)) 138 | this.xs = d3.scaleLinear() 139 | .domain([0, xmax]) 140 | .range([0, width]) 141 | this.ys = d3.scaleLinear() 142 | .domain([d3.min(data, (d) => d.lower), d3.max(data, (d) => d.upper)]) 143 | .range([height, 0]) 144 | } 145 | 146 | _drawYAxis () { 147 | let width = this.xs.range()[1] 148 | let func = d3.axisLeft(this.ys).ticks(6) 149 | .tickSize(-width) 150 | 151 | // animation will have a weird flashing bug, probably because we remove the domain ... 152 | // let y_axis = redraw ? this.y_axis.transition(this.trans) : this.y_axis 153 | 154 | this.y_axis 155 | .call(func) 156 | .call(g => g.selectAll('.domain').remove()) 157 | .call(g => g.selectAll('.tick line') 158 | .attr('stroke-opacity', 0.1) 159 | .attr('stroke-dasharray', '2, 2')) 160 | } 161 | 162 | _drawXAxis (redraw=false) { 163 | let x_max = this.xs.domain()[1] 164 | let height = this.ys.range()[0] 165 | let sel = this.svg.selectAll('.axis-label.muted.x-axis') 166 | .data([x_max]) 167 | if (!redraw) { 168 | sel = sel.enter().append('text') 169 | .classed('axis-label muted x-axis', true) 170 | } 171 | sel 172 | .attr('transform', `translate(0, ${height + 3})`) 173 | .attr('x', (d) => this.xs(d)) 174 | .style('text-anchor', 'end') 175 | .style('font-size', this.label_font_size) 176 | .text((d) => d * 100 + '%') 177 | } 178 | 179 | _drawLineAndCI (data, redraw=false) { 180 | let svg = this.svg.select('.actual-plot') 181 | 182 | // draw CIs 183 | let area = d3.area() 184 | .x((d) => this.xs(d['n_samples'] / this.x_max)) 185 | .y0((d) => this.ys(d.lower)) 186 | .y1((d) => this.ys(d.upper)) 187 | if (redraw) { 188 | svg.select('.outcome-CI') 189 | .datum(data) 190 | .transition(this.trans) 191 | .attr('d', area) 192 | } else { 193 | svg.append('path') 194 | .datum(data) 195 | .classed('outcome-CI', true) 196 | .attr('d', area) 197 | } 198 | 199 | // draw the mean line 200 | let line_data = _.map(data, (d) => { 201 | return { 202 | x: d['n_samples'], 203 | y: d['mean'] 204 | } 205 | }) 206 | let line = d3.line() 207 | .x((d) => this.xs(d.x) / this.x_max) 208 | .y((d) => this.ys(d.y)) 209 | if (redraw) { 210 | svg.select('.outcome-mean') 211 | .datum(line_data) 212 | .transition(this.trans) 213 | .attr('d', line) 214 | } else { 215 | svg.append('path') 216 | .datum(line_data) 217 | .attr('d', line) 218 | .attr('fill', 'none') 219 | .classed('outcome-mean', true) 220 | } 221 | } 222 | } 223 | 224 | export default OutcomeProgressPlot 225 | --------------------------------------------------------------------------------