├── .gitignore ├── Procfile ├── README.md ├── ch02 ├── Email Analysis.xlsb ├── README.md └── probability.pig ├── ch03 ├── README.md ├── cat_avro ├── gmail │ ├── email.avro.schema │ ├── email_utils.py │ ├── gmail.py │ └── gmail_slurper.py ├── pig │ ├── avro_to_mongo.pig │ ├── elasticsearch.pig │ ├── mongo.pig │ └── sent_counts.pig ├── python │ ├── elasticsearch.py │ ├── flask_echo.py │ ├── flask_mongo.py │ ├── mongo.py │ └── test_avro.py └── web │ ├── index.py │ ├── static │ ├── bootstrap │ │ ├── css │ │ │ ├── bootstrap-responsive.css │ │ │ ├── bootstrap-responsive.min.css │ │ │ ├── bootstrap.css │ │ │ └── bootstrap.min.css │ │ ├── img │ │ │ ├── glyphicons-halflings-white.png │ │ │ └── glyphicons-halflings.png │ │ └── js │ │ │ ├── bootstrap.js │ │ │ └── bootstrap.min.js │ ├── d3 │ │ ├── d3.v3.js │ │ └── d3.v3.min.js │ └── nvd3 │ │ ├── .gitignore │ │ ├── LICENSE.md │ │ ├── Makefile │ │ ├── README.md │ │ ├── build.bat │ │ ├── build.sh │ │ ├── deprecated │ │ ├── bar.html │ │ ├── bar.js │ │ ├── charts │ │ │ ├── cumulativeLineChart.js │ │ │ ├── discreteBarChart.js │ │ │ ├── lineChart.js │ │ │ ├── lineChartDaily.js │ │ │ └── stackedAreaChart.js │ │ ├── cumulativeLine.html │ │ ├── cumulativeLine.js │ │ ├── discreteBarChartWithEnabledTooltip.html │ │ ├── discreteBarChartWithEnabledTooltip.js │ │ ├── discreteBarWithAxes.html │ │ ├── discreteBarWithAxes.js │ │ ├── lineChart-old.html │ │ ├── lineChartDaily.html │ │ ├── linePlusBar.html │ │ ├── linePlusBar.js │ │ ├── lineWithFocus.html │ │ ├── lineWithFocus.js │ │ ├── lineWithFourAxes.html │ │ ├── lineWithFourAxes.js │ │ ├── lineWithLegend.html │ │ ├── lineWithLegend.js │ │ ├── monthendAxis.html │ │ ├── multiBarHorizontalWithLegend.html │ │ ├── multiBarHorizontalWithLegend.js │ │ ├── multiBarWithLegend.html │ │ ├── multiBarWithLegend.js │ │ ├── pie.js │ │ ├── scatterChart.html │ │ ├── scatterChart.js │ │ ├── scatterFisheyeChart.js │ │ ├── scatterWithLegend.html │ │ ├── scatterWithLegend.js │ │ ├── stackedArea.js │ │ ├── stackedAreaChart.html │ │ ├── stackedAreaChart_old.html │ │ ├── stackedAreaWithLegend.html │ │ └── stackedAreaWithLegend.js │ │ ├── examples │ │ ├── bullet.html │ │ ├── bulletChart.html │ │ ├── crossfilter.html │ │ ├── crossfilterWithDimentions.html │ │ ├── crossfilterWithTables.html │ │ ├── cumulativeLineChart.html │ │ ├── discreteBarChart.html │ │ ├── historicalBar.html │ │ ├── horizon.html │ │ ├── images │ │ │ ├── grey-minus.png │ │ │ └── grey-plus.png │ │ ├── indentedtree.html │ │ ├── legend.html │ │ ├── line.html │ │ ├── lineChart.html │ │ ├── lineChartSVGResize.html │ │ ├── linePlusBarChart.html │ │ ├── linePlusBarWithFocusChart.html │ │ ├── lineWithFisheyeChart.html │ │ ├── lineWithFocusChart.html │ │ ├── multiBar.html │ │ ├── multiBarChart.html │ │ ├── multiBarHorizontalChart.html │ │ ├── multiChart.html │ │ ├── nations.json │ │ ├── pie.html │ │ ├── pieChart.html │ │ ├── scatter.html │ │ ├── scatterChart.html │ │ ├── scatterPlusLineChart.html │ │ ├── sparkline.html │ │ ├── sparklinePlus.html │ │ ├── stackedArea.html │ │ ├── stackedAreaChart.html │ │ └── stream_layers.js │ │ ├── lib │ │ ├── cie.js │ │ ├── crossfilter.js │ │ ├── crossfilter.min.js │ │ ├── d3.v2.js │ │ ├── d3.v2.min.js │ │ ├── fisheye.js │ │ ├── hive.js │ │ ├── horizon.js │ │ └── sankey.js │ │ ├── nv.d3.js │ │ ├── nv.d3.min.js │ │ └── src │ │ ├── core.js │ │ ├── intro.js │ │ ├── models │ │ ├── axis.js │ │ ├── backup │ │ │ ├── bullet.js │ │ │ └── bulletChart.js │ │ ├── bullet.js │ │ ├── bulletChart.js │ │ ├── cumulativeLineChart.js │ │ ├── discreteBar.js │ │ ├── discreteBarChart.js │ │ ├── distribution.js │ │ ├── historicalBar.js │ │ ├── indentedTree.js │ │ ├── legend.js │ │ ├── line.js │ │ ├── lineChart.js │ │ ├── linePlusBarChart.js │ │ ├── linePlusBarWithFocusChart.js │ │ ├── lineWithFisheye.js │ │ ├── lineWithFisheyeChart.js │ │ ├── lineWithFocusChart.js │ │ ├── multiBar.js │ │ ├── multiBarChart.js │ │ ├── multiBarHorizontal.js │ │ ├── multiBarHorizontalChart.js │ │ ├── multiBarTimeSeries.js │ │ ├── multiBarTimeSeriesChart.js │ │ ├── multiChart.js │ │ ├── ohlcBar.js │ │ ├── pie.js │ │ ├── pieChart.js │ │ ├── scatter.js │ │ ├── scatterChart.js │ │ ├── scatterPlusLineChart.js │ │ ├── sparkline.js │ │ ├── sparklinePlus.js │ │ ├── stackedArea.js │ │ └── stackedAreaChart.js │ │ ├── nv.d3.css │ │ ├── outro.js │ │ ├── tooltip.js │ │ └── utils.js │ └── templates │ └── table.html ├── ch04 ├── .dotcloud │ └── config ├── README.md ├── __init__.py ├── dotcloud.yml ├── index.py ├── requirements.txt ├── test_dotcloud_mongo.pig └── wsgi.py ├── ch05 ├── README.md ├── avro_to_mongo.pig ├── elasticsearch.pig ├── elasticsearch.py ├── list_emails.mongo.js ├── mongo_list.py └── web │ ├── config.py │ ├── index.py │ ├── static │ ├── bootstrap │ │ ├── css │ │ │ ├── bootstrap-responsive.css │ │ │ ├── bootstrap-responsive.min.css │ │ │ ├── bootstrap.css │ │ │ └── bootstrap.min.css │ │ ├── img │ │ │ ├── glyphicons-halflings-white.png │ │ │ └── glyphicons-halflings.png │ │ └── js │ │ │ ├── bootstrap.js │ │ │ └── bootstrap.min.js │ ├── d3 │ │ ├── d3.v3.js │ │ └── d3.v3.min.js │ └── nvd3 │ │ ├── .gitignore │ │ ├── LICENSE.md │ │ ├── Makefile │ │ ├── README.md │ │ ├── build.bat │ │ ├── build.sh │ │ ├── deprecated │ │ ├── bar.html │ │ ├── bar.js │ │ ├── charts │ │ │ ├── cumulativeLineChart.js │ │ │ ├── discreteBarChart.js │ │ │ ├── lineChart.js │ │ │ ├── lineChartDaily.js │ │ │ └── stackedAreaChart.js │ │ ├── cumulativeLine.html │ │ ├── cumulativeLine.js │ │ ├── discreteBarChartWithEnabledTooltip.html │ │ ├── discreteBarChartWithEnabledTooltip.js │ │ ├── discreteBarWithAxes.html │ │ ├── discreteBarWithAxes.js │ │ ├── lineChart-old.html │ │ ├── lineChartDaily.html │ │ ├── linePlusBar.html │ │ ├── linePlusBar.js │ │ ├── lineWithFocus.html │ │ ├── lineWithFocus.js │ │ ├── lineWithFourAxes.html │ │ ├── lineWithFourAxes.js │ │ ├── lineWithLegend.html │ │ ├── lineWithLegend.js │ │ ├── monthendAxis.html │ │ ├── multiBarHorizontalWithLegend.html │ │ ├── multiBarHorizontalWithLegend.js │ │ ├── multiBarWithLegend.html │ │ ├── multiBarWithLegend.js │ │ ├── pie.js │ │ ├── scatterChart.html │ │ ├── scatterChart.js │ │ ├── scatterFisheyeChart.js │ │ ├── scatterWithLegend.html │ │ ├── scatterWithLegend.js │ │ ├── stackedArea.js │ │ ├── stackedAreaChart.html │ │ ├── stackedAreaChart_old.html │ │ ├── stackedAreaWithLegend.html │ │ └── stackedAreaWithLegend.js │ │ ├── examples │ │ ├── bullet.html │ │ ├── bulletChart.html │ │ ├── crossfilter.html │ │ ├── crossfilterWithDimentions.html │ │ ├── crossfilterWithTables.html │ │ ├── cumulativeLineChart.html │ │ ├── discreteBarChart.html │ │ ├── historicalBar.html │ │ ├── horizon.html │ │ ├── images │ │ │ ├── grey-minus.png │ │ │ └── grey-plus.png │ │ ├── indentedtree.html │ │ ├── legend.html │ │ ├── line.html │ │ ├── lineChart.html │ │ ├── lineChartSVGResize.html │ │ ├── linePlusBarChart.html │ │ ├── linePlusBarWithFocusChart.html │ │ ├── lineWithFisheyeChart.html │ │ ├── lineWithFocusChart.html │ │ ├── multiBar.html │ │ ├── multiBarChart.html │ │ ├── multiBarHorizontalChart.html │ │ ├── multiChart.html │ │ ├── nations.json │ │ ├── pie.html │ │ ├── pieChart.html │ │ ├── scatter.html │ │ ├── scatterChart.html │ │ ├── scatterPlusLineChart.html │ │ ├── sparkline.html │ │ ├── sparklinePlus.html │ │ ├── stackedArea.html │ │ ├── stackedAreaChart.html │ │ └── stream_layers.js │ │ ├── lib │ │ ├── cie.js │ │ ├── crossfilter.js │ │ ├── crossfilter.min.js │ │ ├── d3.v2.js │ │ ├── d3.v2.min.js │ │ ├── fisheye.js │ │ ├── hive.js │ │ ├── horizon.js │ │ └── sankey.js │ │ ├── nv.d3.js │ │ ├── nv.d3.min.js │ │ └── src │ │ ├── core.js │ │ ├── intro.js │ │ ├── models │ │ ├── axis.js │ │ ├── backup │ │ │ ├── bullet.js │ │ │ └── bulletChart.js │ │ ├── bullet.js │ │ ├── bulletChart.js │ │ ├── cumulativeLineChart.js │ │ ├── discreteBar.js │ │ ├── discreteBarChart.js │ │ ├── distribution.js │ │ ├── historicalBar.js │ │ ├── indentedTree.js │ │ ├── legend.js │ │ ├── line.js │ │ ├── lineChart.js │ │ ├── linePlusBarChart.js │ │ ├── linePlusBarWithFocusChart.js │ │ ├── lineWithFisheye.js │ │ ├── lineWithFisheyeChart.js │ │ ├── lineWithFocusChart.js │ │ ├── multiBar.js │ │ ├── multiBarChart.js │ │ ├── multiBarHorizontal.js │ │ ├── multiBarHorizontalChart.js │ │ ├── multiBarTimeSeries.js │ │ ├── multiBarTimeSeriesChart.js │ │ ├── multiChart.js │ │ ├── ohlcBar.js │ │ ├── pie.js │ │ ├── pieChart.js │ │ ├── scatter.js │ │ ├── scatterChart.js │ │ ├── scatterPlusLineChart.js │ │ ├── sparkline.js │ │ ├── sparklinePlus.js │ │ ├── stackedArea.js │ │ └── stackedAreaChart.js │ │ ├── nv.d3.css │ │ ├── outro.js │ │ ├── tooltip.js │ │ └── utils.js │ └── templates │ ├── layout.html │ ├── macros.jnj │ └── partials │ ├── email.html │ └── emails.html ├── ch06 ├── README.md ├── emails_per_email_address.pig ├── list_addresses.py ├── mongo.js ├── sent_distributions.pig └── web │ ├── config.py │ ├── index.py │ ├── static │ ├── bootstrap │ │ ├── css │ │ │ ├── bootstrap-responsive.css │ │ │ ├── bootstrap-responsive.min.css │ │ │ ├── bootstrap.css │ │ │ └── bootstrap.min.css │ │ ├── img │ │ │ ├── glyphicons-halflings-white.png │ │ │ └── glyphicons-halflings.png │ │ └── js │ │ │ ├── bootstrap.js │ │ │ └── bootstrap.min.js │ ├── d3 │ │ ├── d3.v3.js │ │ └── d3.v3.min.js │ └── nvd3 │ │ ├── .gitignore │ │ ├── LICENSE.md │ │ ├── Makefile │ │ ├── README.md │ │ ├── build.bat │ │ ├── build.sh │ │ ├── deprecated │ │ ├── bar.html │ │ ├── bar.js │ │ ├── charts │ │ │ ├── cumulativeLineChart.js │ │ │ ├── discreteBarChart.js │ │ │ ├── lineChart.js │ │ │ ├── lineChartDaily.js │ │ │ └── stackedAreaChart.js │ │ ├── cumulativeLine.html │ │ ├── cumulativeLine.js │ │ ├── discreteBarChartWithEnabledTooltip.html │ │ ├── discreteBarChartWithEnabledTooltip.js │ │ ├── discreteBarWithAxes.html │ │ ├── discreteBarWithAxes.js │ │ ├── lineChart-old.html │ │ ├── lineChartDaily.html │ │ ├── linePlusBar.html │ │ ├── linePlusBar.js │ │ ├── lineWithFocus.html │ │ ├── lineWithFocus.js │ │ ├── lineWithFourAxes.html │ │ ├── lineWithFourAxes.js │ │ ├── lineWithLegend.html │ │ ├── lineWithLegend.js │ │ ├── monthendAxis.html │ │ ├── multiBarHorizontalWithLegend.html │ │ ├── multiBarHorizontalWithLegend.js │ │ ├── multiBarWithLegend.html │ │ ├── multiBarWithLegend.js │ │ ├── pie.js │ │ ├── scatterChart.html │ │ ├── scatterChart.js │ │ ├── scatterFisheyeChart.js │ │ ├── scatterWithLegend.html │ │ ├── scatterWithLegend.js │ │ ├── stackedArea.js │ │ ├── stackedAreaChart.html │ │ ├── stackedAreaChart_old.html │ │ ├── stackedAreaWithLegend.html │ │ └── stackedAreaWithLegend.js │ │ ├── examples │ │ ├── bullet.html │ │ ├── bulletChart.html │ │ ├── crossfilter.html │ │ ├── crossfilterWithDimentions.html │ │ ├── crossfilterWithTables.html │ │ ├── cumulativeLineChart.html │ │ ├── discreteBarChart.html │ │ ├── historicalBar.html │ │ ├── horizon.html │ │ ├── images │ │ │ ├── grey-minus.png │ │ │ └── grey-plus.png │ │ ├── indentedtree.html │ │ ├── legend.html │ │ ├── line.html │ │ ├── lineChart.html │ │ ├── lineChartSVGResize.html │ │ ├── linePlusBarChart.html │ │ ├── linePlusBarWithFocusChart.html │ │ ├── lineWithFisheyeChart.html │ │ ├── lineWithFocusChart.html │ │ ├── multiBar.html │ │ ├── multiBarChart.html │ │ ├── multiBarHorizontalChart.html │ │ ├── multiChart.html │ │ ├── nations.json │ │ ├── pie.html │ │ ├── pieChart.html │ │ ├── scatter.html │ │ ├── scatterChart.html │ │ ├── scatterPlusLineChart.html │ │ ├── sparkline.html │ │ ├── sparklinePlus.html │ │ ├── stackedArea.html │ │ ├── stackedAreaChart.html │ │ └── stream_layers.js │ │ ├── lib │ │ ├── cie.js │ │ ├── crossfilter.js │ │ ├── crossfilter.min.js │ │ ├── d3.v2.js │ │ ├── d3.v2.min.js │ │ ├── fisheye.js │ │ ├── hive.js │ │ ├── horizon.js │ │ └── sankey.js │ │ ├── nv.d3.js │ │ ├── nv.d3.min.js │ │ └── src │ │ ├── core.js │ │ ├── intro.js │ │ ├── models │ │ ├── axis.js │ │ ├── backup │ │ │ ├── bullet.js │ │ │ └── bulletChart.js │ │ ├── bullet.js │ │ ├── bulletChart.js │ │ ├── cumulativeLineChart.js │ │ ├── discreteBar.js │ │ ├── discreteBarChart.js │ │ ├── distribution.js │ │ ├── historicalBar.js │ │ ├── indentedTree.js │ │ ├── legend.js │ │ ├── line.js │ │ ├── lineChart.js │ │ ├── linePlusBarChart.js │ │ ├── linePlusBarWithFocusChart.js │ │ ├── lineWithFisheye.js │ │ ├── lineWithFisheyeChart.js │ │ ├── lineWithFocusChart.js │ │ ├── multiBar.js │ │ ├── multiBarChart.js │ │ ├── multiBarHorizontal.js │ │ ├── multiBarHorizontalChart.js │ │ ├── multiBarTimeSeries.js │ │ ├── multiBarTimeSeriesChart.js │ │ ├── multiChart.js │ │ ├── ohlcBar.js │ │ ├── pie.js │ │ ├── pieChart.js │ │ ├── scatter.js │ │ ├── scatterChart.js │ │ ├── scatterPlusLineChart.js │ │ ├── sparkline.js │ │ ├── sparklinePlus.js │ │ ├── stackedArea.js │ │ └── stackedAreaChart.js │ │ ├── nv.d3.css │ │ ├── outro.js │ │ ├── tooltip.js │ │ └── utils.js │ └── templates │ ├── layout.html │ ├── macros.jnj │ └── partials │ ├── address.html │ ├── email.html │ ├── emails.html │ └── sent_distribution.html ├── ch07 ├── README.md ├── mongo │ ├── mongo.js │ └── sent_distribution_fix_mongo.js ├── pig │ ├── lda.pig │ ├── network.pig │ ├── ntfidf.macro │ ├── process_mcl.pig │ ├── process_topics.pig │ ├── publish_topics_per_email.pig │ ├── related_email_addresses.pig │ ├── sent_distributions_fix.pig │ ├── test_tokenizers.pig │ ├── topics.pig │ └── udfs.py ├── python │ ├── sent_distribution_fix.py │ └── token_extractor.py └── web │ ├── config.py │ ├── index.py │ ├── static │ ├── bootstrap │ │ ├── css │ │ │ ├── bootstrap-responsive.css │ │ │ ├── bootstrap-responsive.min.css │ │ │ ├── bootstrap.css │ │ │ └── bootstrap.min.css │ │ ├── img │ │ │ ├── glyphicons-halflings-white.png │ │ │ └── glyphicons-halflings.png │ │ └── js │ │ │ ├── bootstrap.js │ │ │ └── bootstrap.min.js │ ├── d3 │ │ ├── d3.v3.js │ │ └── d3.v3.min.js │ └── nvd3 │ │ ├── .gitignore │ │ ├── LICENSE.md │ │ ├── Makefile │ │ ├── README.md │ │ ├── build.bat │ │ ├── build.sh │ │ ├── deprecated │ │ ├── bar.html │ │ ├── bar.js │ │ ├── charts │ │ │ ├── cumulativeLineChart.js │ │ │ ├── discreteBarChart.js │ │ │ ├── lineChart.js │ │ │ ├── lineChartDaily.js │ │ │ └── stackedAreaChart.js │ │ ├── cumulativeLine.html │ │ ├── cumulativeLine.js │ │ ├── discreteBarChartWithEnabledTooltip.html │ │ ├── discreteBarChartWithEnabledTooltip.js │ │ ├── discreteBarWithAxes.html │ │ ├── discreteBarWithAxes.js │ │ ├── lineChart-old.html │ │ ├── lineChartDaily.html │ │ ├── linePlusBar.html │ │ ├── linePlusBar.js │ │ ├── lineWithFocus.html │ │ ├── lineWithFocus.js │ │ ├── lineWithFourAxes.html │ │ ├── lineWithFourAxes.js │ │ ├── lineWithLegend.html │ │ ├── lineWithLegend.js │ │ ├── monthendAxis.html │ │ ├── multiBarHorizontalWithLegend.html │ │ ├── multiBarHorizontalWithLegend.js │ │ ├── multiBarWithLegend.html │ │ ├── multiBarWithLegend.js │ │ ├── pie.js │ │ ├── scatterChart.html │ │ ├── scatterChart.js │ │ ├── scatterFisheyeChart.js │ │ ├── scatterWithLegend.html │ │ ├── scatterWithLegend.js │ │ ├── stackedArea.js │ │ ├── stackedAreaChart.html │ │ ├── stackedAreaChart_old.html │ │ ├── stackedAreaWithLegend.html │ │ └── stackedAreaWithLegend.js │ │ ├── examples │ │ ├── bullet.html │ │ ├── bulletChart.html │ │ ├── crossfilter.html │ │ ├── crossfilterWithDimentions.html │ │ ├── crossfilterWithTables.html │ │ ├── cumulativeLineChart.html │ │ ├── discreteBarChart.html │ │ ├── historicalBar.html │ │ ├── horizon.html │ │ ├── images │ │ │ ├── grey-minus.png │ │ │ └── grey-plus.png │ │ ├── indentedtree.html │ │ ├── legend.html │ │ ├── line.html │ │ ├── lineChart.html │ │ ├── lineChartSVGResize.html │ │ ├── linePlusBarChart.html │ │ ├── linePlusBarWithFocusChart.html │ │ ├── lineWithFisheyeChart.html │ │ ├── lineWithFocusChart.html │ │ ├── multiBar.html │ │ ├── multiBarChart.html │ │ ├── multiBarHorizontalChart.html │ │ ├── multiChart.html │ │ ├── nations.json │ │ ├── pie.html │ │ ├── pieChart.html │ │ ├── scatter.html │ │ ├── scatterChart.html │ │ ├── scatterPlusLineChart.html │ │ ├── sparkline.html │ │ ├── sparklinePlus.html │ │ ├── stackedArea.html │ │ ├── stackedAreaChart.html │ │ └── stream_layers.js │ │ ├── lib │ │ ├── cie.js │ │ ├── crossfilter.js │ │ ├── crossfilter.min.js │ │ ├── d3.v2.js │ │ ├── d3.v2.min.js │ │ ├── fisheye.js │ │ ├── hive.js │ │ ├── horizon.js │ │ └── sankey.js │ │ ├── nv.d3.js │ │ ├── nv.d3.min.js │ │ └── src │ │ ├── core.js │ │ ├── intro.js │ │ ├── models │ │ ├── axis.js │ │ ├── backup │ │ │ ├── bullet.js │ │ │ └── bulletChart.js │ │ ├── bullet.js │ │ ├── bulletChart.js │ │ ├── cumulativeLineChart.js │ │ ├── discreteBar.js │ │ ├── discreteBarChart.js │ │ ├── distribution.js │ │ ├── historicalBar.js │ │ ├── indentedTree.js │ │ ├── legend.js │ │ ├── line.js │ │ ├── lineChart.js │ │ ├── linePlusBarChart.js │ │ ├── linePlusBarWithFocusChart.js │ │ ├── lineWithFisheye.js │ │ ├── lineWithFisheyeChart.js │ │ ├── lineWithFocusChart.js │ │ ├── multiBar.js │ │ ├── multiBarChart.js │ │ ├── multiBarHorizontal.js │ │ ├── multiBarHorizontalChart.js │ │ ├── multiBarTimeSeries.js │ │ ├── multiBarTimeSeriesChart.js │ │ ├── multiChart.js │ │ ├── ohlcBar.js │ │ ├── pie.js │ │ ├── pieChart.js │ │ ├── scatter.js │ │ ├── scatterChart.js │ │ ├── scatterPlusLineChart.js │ │ ├── sparkline.js │ │ ├── sparklinePlus.js │ │ ├── stackedArea.js │ │ └── stackedAreaChart.js │ │ ├── nv.d3.css │ │ ├── outro.js │ │ ├── tooltip.js │ │ └── utils.js │ └── templates │ ├── layout.html │ ├── macros.jnj │ └── partials │ ├── address.html │ ├── email.html │ ├── emails.html │ └── sent_distribution.html ├── ch08 ├── README.md ├── mongo.js ├── p_reply_given_from_to.pig └── web │ ├── config.py │ ├── index.py │ ├── smoother.py │ ├── static │ ├── bootstrap │ │ ├── css │ │ │ ├── bootstrap-responsive.css │ │ │ ├── bootstrap-responsive.min.css │ │ │ ├── bootstrap.css │ │ │ └── bootstrap.min.css │ │ ├── img │ │ │ ├── glyphicons-halflings-white.png │ │ │ └── glyphicons-halflings.png │ │ └── js │ │ │ ├── bootstrap.js │ │ │ └── bootstrap.min.js │ ├── d3 │ │ ├── d3.v3.js │ │ └── d3.v3.min.js │ └── nvd3 │ │ ├── .gitignore │ │ ├── LICENSE.md │ │ ├── Makefile │ │ ├── README.md │ │ ├── build.bat │ │ ├── build.sh │ │ ├── deprecated │ │ ├── bar.html │ │ ├── bar.js │ │ ├── charts │ │ │ ├── cumulativeLineChart.js │ │ │ ├── discreteBarChart.js │ │ │ ├── lineChart.js │ │ │ ├── lineChartDaily.js │ │ │ └── stackedAreaChart.js │ │ ├── cumulativeLine.html │ │ ├── cumulativeLine.js │ │ ├── discreteBarChartWithEnabledTooltip.html │ │ ├── discreteBarChartWithEnabledTooltip.js │ │ ├── discreteBarWithAxes.html │ │ ├── discreteBarWithAxes.js │ │ ├── lineChart-old.html │ │ ├── lineChartDaily.html │ │ ├── linePlusBar.html │ │ ├── linePlusBar.js │ │ ├── lineWithFocus.html │ │ ├── lineWithFocus.js │ │ ├── lineWithFourAxes.html │ │ ├── lineWithFourAxes.js │ │ ├── lineWithLegend.html │ │ ├── lineWithLegend.js │ │ ├── monthendAxis.html │ │ ├── multiBarHorizontalWithLegend.html │ │ ├── multiBarHorizontalWithLegend.js │ │ ├── multiBarWithLegend.html │ │ ├── multiBarWithLegend.js │ │ ├── pie.js │ │ ├── scatterChart.html │ │ ├── scatterChart.js │ │ ├── scatterFisheyeChart.js │ │ ├── scatterWithLegend.html │ │ ├── scatterWithLegend.js │ │ ├── stackedArea.js │ │ ├── stackedAreaChart.html │ │ ├── stackedAreaChart_old.html │ │ ├── stackedAreaWithLegend.html │ │ └── stackedAreaWithLegend.js │ │ ├── examples │ │ ├── bullet.html │ │ ├── bulletChart.html │ │ ├── crossfilter.html │ │ ├── crossfilterWithDimentions.html │ │ ├── crossfilterWithTables.html │ │ ├── cumulativeLineChart.html │ │ ├── discreteBarChart.html │ │ ├── historicalBar.html │ │ ├── horizon.html │ │ ├── images │ │ │ ├── grey-minus.png │ │ │ └── grey-plus.png │ │ ├── indentedtree.html │ │ ├── legend.html │ │ ├── line.html │ │ ├── lineChart.html │ │ ├── lineChartSVGResize.html │ │ ├── linePlusBarChart.html │ │ ├── linePlusBarWithFocusChart.html │ │ ├── lineWithFisheyeChart.html │ │ ├── lineWithFocusChart.html │ │ ├── multiBar.html │ │ ├── multiBarChart.html │ │ ├── multiBarHorizontalChart.html │ │ ├── multiChart.html │ │ ├── nations.json │ │ ├── pie.html │ │ ├── pieChart.html │ │ ├── scatter.html │ │ ├── scatterChart.html │ │ ├── scatterPlusLineChart.html │ │ ├── sparkline.html │ │ ├── sparklinePlus.html │ │ ├── stackedArea.html │ │ ├── stackedAreaChart.html │ │ └── stream_layers.js │ │ ├── lib │ │ ├── cie.js │ │ ├── crossfilter.js │ │ ├── crossfilter.min.js │ │ ├── d3.v2.js │ │ ├── d3.v2.min.js │ │ ├── fisheye.js │ │ ├── hive.js │ │ ├── horizon.js │ │ └── sankey.js │ │ ├── nv.d3.js │ │ ├── nv.d3.min.js │ │ └── src │ │ ├── core.js │ │ ├── intro.js │ │ ├── models │ │ ├── axis.js │ │ ├── backup │ │ │ ├── bullet.js │ │ │ └── bulletChart.js │ │ ├── bullet.js │ │ ├── bulletChart.js │ │ ├── cumulativeLineChart.js │ │ ├── discreteBar.js │ │ ├── discreteBarChart.js │ │ ├── distribution.js │ │ ├── historicalBar.js │ │ ├── indentedTree.js │ │ ├── legend.js │ │ ├── line.js │ │ ├── lineChart.js │ │ ├── linePlusBarChart.js │ │ ├── linePlusBarWithFocusChart.js │ │ ├── lineWithFisheye.js │ │ ├── lineWithFisheyeChart.js │ │ ├── lineWithFocusChart.js │ │ ├── multiBar.js │ │ ├── multiBarChart.js │ │ ├── multiBarHorizontal.js │ │ ├── multiBarHorizontalChart.js │ │ ├── multiBarTimeSeries.js │ │ ├── multiBarTimeSeriesChart.js │ │ ├── multiChart.js │ │ ├── ohlcBar.js │ │ ├── pie.js │ │ ├── pieChart.js │ │ ├── scatter.js │ │ ├── scatterChart.js │ │ ├── scatterPlusLineChart.js │ │ ├── sparkline.js │ │ ├── sparklinePlus.js │ │ ├── stackedArea.js │ │ └── stackedAreaChart.js │ │ ├── nv.d3.css │ │ ├── outro.js │ │ ├── tooltip.js │ │ └── utils.js │ └── templates │ ├── layout.html │ ├── macros.jnj │ └── partials │ ├── address.html │ ├── email.html │ ├── emails.html │ └── sent_distribution.html ├── ch09 ├── README.md ├── mongo.js ├── pig │ ├── hamming.py │ ├── p_reply_given_from_to.pig │ ├── p_reply_given_time_of_day.pig │ ├── p_reply_given_topics.pig │ ├── publish_topics.pig │ ├── smooth_times.pig │ ├── test_results.pig │ └── udfs.py ├── tune_weights.py └── web │ ├── config.py │ ├── index.py │ ├── smoother.py │ ├── static │ ├── bootstrap │ │ ├── css │ │ │ ├── bootstrap-responsive.css │ │ │ ├── bootstrap-responsive.min.css │ │ │ ├── bootstrap.css │ │ │ └── bootstrap.min.css │ │ ├── img │ │ │ ├── glyphicons-halflings-white.png │ │ │ └── glyphicons-halflings.png │ │ └── js │ │ │ ├── bootstrap.js │ │ │ └── bootstrap.min.js │ ├── d3 │ │ ├── d3.v3.js │ │ └── d3.v3.min.js │ └── nvd3 │ │ ├── .gitignore │ │ ├── LICENSE.md │ │ ├── Makefile │ │ ├── README.md │ │ ├── build.bat │ │ ├── build.sh │ │ ├── deprecated │ │ ├── bar.html │ │ ├── bar.js │ │ ├── charts │ │ │ ├── cumulativeLineChart.js │ │ │ ├── discreteBarChart.js │ │ │ ├── lineChart.js │ │ │ ├── lineChartDaily.js │ │ │ └── stackedAreaChart.js │ │ ├── cumulativeLine.html │ │ ├── cumulativeLine.js │ │ ├── discreteBarChartWithEnabledTooltip.html │ │ ├── discreteBarChartWithEnabledTooltip.js │ │ ├── discreteBarWithAxes.html │ │ ├── discreteBarWithAxes.js │ │ ├── lineChart-old.html │ │ ├── lineChartDaily.html │ │ ├── linePlusBar.html │ │ ├── linePlusBar.js │ │ ├── lineWithFocus.html │ │ ├── lineWithFocus.js │ │ ├── lineWithFourAxes.html │ │ ├── lineWithFourAxes.js │ │ ├── lineWithLegend.html │ │ ├── lineWithLegend.js │ │ ├── monthendAxis.html │ │ ├── multiBarHorizontalWithLegend.html │ │ ├── multiBarHorizontalWithLegend.js │ │ ├── multiBarWithLegend.html │ │ ├── multiBarWithLegend.js │ │ ├── pie.js │ │ ├── scatterChart.html │ │ ├── scatterChart.js │ │ ├── scatterFisheyeChart.js │ │ ├── scatterWithLegend.html │ │ ├── scatterWithLegend.js │ │ ├── stackedArea.js │ │ ├── stackedAreaChart.html │ │ ├── stackedAreaChart_old.html │ │ ├── stackedAreaWithLegend.html │ │ └── stackedAreaWithLegend.js │ │ ├── examples │ │ ├── bullet.html │ │ ├── bulletChart.html │ │ ├── crossfilter.html │ │ ├── crossfilterWithDimentions.html │ │ ├── crossfilterWithTables.html │ │ ├── cumulativeLineChart.html │ │ ├── discreteBarChart.html │ │ ├── historicalBar.html │ │ ├── horizon.html │ │ ├── images │ │ │ ├── grey-minus.png │ │ │ └── grey-plus.png │ │ ├── indentedtree.html │ │ ├── legend.html │ │ ├── line.html │ │ ├── lineChart.html │ │ ├── lineChartSVGResize.html │ │ ├── linePlusBarChart.html │ │ ├── linePlusBarWithFocusChart.html │ │ ├── lineWithFisheyeChart.html │ │ ├── lineWithFocusChart.html │ │ ├── multiBar.html │ │ ├── multiBarChart.html │ │ ├── multiBarHorizontalChart.html │ │ ├── multiChart.html │ │ ├── nations.json │ │ ├── pie.html │ │ ├── pieChart.html │ │ ├── scatter.html │ │ ├── scatterChart.html │ │ ├── scatterPlusLineChart.html │ │ ├── sparkline.html │ │ ├── sparklinePlus.html │ │ ├── stackedArea.html │ │ ├── stackedAreaChart.html │ │ └── stream_layers.js │ │ ├── lib │ │ ├── cie.js │ │ ├── crossfilter.js │ │ ├── crossfilter.min.js │ │ ├── d3.v2.js │ │ ├── d3.v2.min.js │ │ ├── fisheye.js │ │ ├── hive.js │ │ ├── horizon.js │ │ └── sankey.js │ │ ├── nv.d3.js │ │ ├── nv.d3.min.js │ │ └── src │ │ ├── core.js │ │ ├── intro.js │ │ ├── models │ │ ├── axis.js │ │ ├── backup │ │ │ ├── bullet.js │ │ │ └── bulletChart.js │ │ ├── bullet.js │ │ ├── bulletChart.js │ │ ├── cumulativeLineChart.js │ │ ├── discreteBar.js │ │ ├── discreteBarChart.js │ │ ├── distribution.js │ │ ├── historicalBar.js │ │ ├── indentedTree.js │ │ ├── legend.js │ │ ├── line.js │ │ ├── lineChart.js │ │ ├── linePlusBarChart.js │ │ ├── linePlusBarWithFocusChart.js │ │ ├── lineWithFisheye.js │ │ ├── lineWithFisheyeChart.js │ │ ├── lineWithFocusChart.js │ │ ├── multiBar.js │ │ ├── multiBarChart.js │ │ ├── multiBarHorizontal.js │ │ ├── multiBarHorizontalChart.js │ │ ├── multiBarTimeSeries.js │ │ ├── multiBarTimeSeriesChart.js │ │ ├── multiChart.js │ │ ├── ohlcBar.js │ │ ├── pie.js │ │ ├── pieChart.js │ │ ├── scatter.js │ │ ├── scatterChart.js │ │ ├── scatterPlusLineChart.js │ │ ├── sparkline.js │ │ ├── sparklinePlus.js │ │ ├── stackedArea.js │ │ └── stackedAreaChart.js │ │ ├── nv.d3.css │ │ ├── outro.js │ │ ├── tooltip.js │ │ └── utils.js │ └── templates │ ├── layout.html │ ├── macros.jnj │ └── partials │ ├── address.html │ ├── email.html │ ├── emails.html │ ├── sent_distribution.html │ └── will_reply.html ├── pigrc └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | *.txt 2 | *.pyc 3 | venv 4 | pyelasticsearch 5 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: python ch04/index.py 2 | -------------------------------------------------------------------------------- /ch02/Email Analysis.xlsb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch02/Email Analysis.xlsb -------------------------------------------------------------------------------- /ch02/README.md: -------------------------------------------------------------------------------- 1 | Agile Data the Book 2 | =================== 3 | 4 | You can buy the book [here](http://shop.oreilly.com/product/0636920025054.do). You can read the book on [O'Reilly OFPS](http://ofps.oreilly.com/titles/9781449326265/) now. Work the chapter code examples as you go. Don't forget to initialize your python environment. Try linux (apt-get, yum) or OS X (brew, port) packages if any of the requirements don't install in your [virtualenv](http://www.virtualenv.org/en/latest/). 5 | -------------------------------------------------------------------------------- /ch03/cat_avro: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # 4 | # derived from example at http://www.harshj.com/2010/04/25/writing-and-reading-avro-data-files-using-python/ 5 | # 6 | from avro import schema, datafile, io 7 | import pprint 8 | import sys 9 | import json 10 | 11 | field_id = None 12 | # Optional key to print 13 | if (len(sys.argv) > 2): 14 | field_id = sys.argv[2] 15 | 16 | # Test reading avros 17 | rec_reader = io.DatumReader() 18 | 19 | # Create a 'data file' (avro file) reader 20 | df_reader = datafile.DataFileReader( 21 | open(sys.argv[1]), 22 | rec_reader 23 | ) 24 | 25 | # Read all records stored inside 26 | pp = pprint.PrettyPrinter() 27 | i = 0 28 | for record in df_reader: 29 | if i > 20: 30 | break 31 | i += 1 32 | if field_id: 33 | pp.pprint(record[field_id]) 34 | else: 35 | pp.pprint(record) 36 | 37 | obj = json.loads(df_reader.meta['avro.schema']) 38 | print "\nAvro Schema: " + json.dumps(obj) 39 | -------------------------------------------------------------------------------- /ch03/pig/avro_to_mongo.pig: -------------------------------------------------------------------------------- 1 | /* Set Home Directory - where we install software */ 2 | %default HOME `echo \$HOME/Software/` 3 | 4 | /* Load Avro jars and define shortcut */ 5 | REGISTER $HOME/pig/build/ivy/lib/Pig/avro-1.5.3.jar 6 | REGISTER $HOME/pig/build/ivy/lib/Pig/json-simple-1.1.jar 7 | REGISTER $HOME/pig/contrib/piggybank/java/piggybank.jar 8 | define AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); 9 | 10 | /* MongoDB libraries and configuration */ 11 | REGISTER $HOME/mongo-hadoop/mongo-2.10.1.jar 12 | REGISTER $HOME/mongo-hadoop/core/target/mongo-hadoop-core-1.1.0-SNAPSHOT.jar 13 | REGISTER $HOME/mongo-hadoop/pig/target/mongo-hadoop-pig-1.1.0-SNAPSHOT.jar 14 | 15 | /* Set speculative execution off so we don't have the chance of duplicate records in Mongo */ 16 | set mapred.map.tasks.speculative.execution false 17 | set mapred.reduce.tasks.speculative.execution false 18 | define MongoStorage com.mongodb.hadoop.pig.MongoStorage(); /* Shortcut */ 19 | 20 | avros = load '$avros' using AvroStorage(); /* For example, 'enron.avro' */ 21 | store avros into '$mongourl' using MongoStorage(); /* For example, 'mongodb://localhost/enron.emails' */ 22 | -------------------------------------------------------------------------------- /ch03/pig/elasticsearch.pig: -------------------------------------------------------------------------------- 1 | /* Set Home Directory - where we install software */ 2 | %default HOME `echo \$HOME/Software/` 3 | 4 | /* Avro uses json-simple, and is in piggybank until Pig 0.12, where AvroStorage and TrevniStorage are builtins */ 5 | REGISTER $HOME/pig/build/ivy/lib/Pig/avro-1.5.3.jar 6 | REGISTER $HOME/pig/build/ivy/lib/Pig/json-simple-1.1.jar 7 | REGISTER $HOME/pig/contrib/piggybank/java/piggybank.jar 8 | 9 | DEFINE AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); 10 | 11 | /* Elasticsearch's own jars */ 12 | REGISTER $HOME/elasticsearch-0.20.2/lib/*.jar 13 | 14 | /* Register wonderdog - elasticsearch integration */ 15 | REGISTER $HOME/wonderdog/target/wonderdog-1.0-SNAPSHOT.jar 16 | 17 | /* Remove the old json */ 18 | rmf /tmp/sent_count_json 19 | 20 | /* Nuke the elasticsearch sent_counts index, as we are about to replace it. */ 21 | sh curl -XDELETE 'http://localhost:9200/inbox/sent_counts' 22 | 23 | /* Load Avros, and store as JSON */ 24 | sent_counts = LOAD '/tmp/sent_counts.txt' AS (from:chararray, to:chararray, total:long); 25 | STORE sent_counts INTO '/tmp/sent_count_json' USING JsonStorage(); 26 | 27 | /* Now load the JSON as a single chararray field, and index it into ElasticSearch with Wonderdog from InfoChimps */ 28 | sent_count_json = LOAD '/tmp/sent_count_json' AS (sent_counts:chararray); 29 | STORE sent_count_json INTO 'es://inbox/sentcounts?json=true&size=1000' USING com.infochimps.elasticsearch.pig.ElasticSearchStorage( 30 | '$HOME/elasticsearch-0.20.2/config/elasticsearch.yml', 31 | '$HOME/elasticsearch-0.20.2/plugins'); 32 | 33 | /* Search for Hadoop to make sure we get a hit in our sent_count index */ 34 | sh curl -XGET 'http://localhost:9200/inbox/sentcounts/_search?q=russell&pretty=true&size=1' 35 | -------------------------------------------------------------------------------- /ch03/pig/mongo.pig: -------------------------------------------------------------------------------- 1 | /* Set Home Directory - where we install software */ 2 | %default HOME `echo \$HOME/Software/` 3 | 4 | REGISTER $HOME/mongo-hadoop/mongo-2.10.1.jar 5 | REGISTER $HOME/mongo-hadoop/core/target/mongo-hadoop-core-1.1.0-SNAPSHOT.jar 6 | REGISTER $HOME/mongo-hadoop/pig/target/mongo-hadoop-pig-1.1.0-SNAPSHOT.jar 7 | 8 | set mapred.map.tasks.speculative.execution false 9 | set mapred.reduce.tasks.speculative.execution false 10 | 11 | sent_counts = LOAD '/tmp/sent_counts.txt' AS (from:chararray, to:chararray, total:long); 12 | STORE sent_counts INTO 'mongodb://localhost/agile_data.sent_counts' USING com.mongodb.hadoop.pig.MongoStorage(); 13 | -------------------------------------------------------------------------------- /ch03/pig/sent_counts.pig: -------------------------------------------------------------------------------- 1 | /* Set Home Directory - where we install software */ 2 | %default HOME `echo \$HOME/Software/` 3 | 4 | REGISTER $HOME/pig/build/ivy/lib/Pig/avro-1.5.3.jar 5 | REGISTER $HOME/pig/build/ivy/lib/Pig/json-simple-1.1.jar 6 | REGISTER $HOME/pig/contrib/piggybank/java/piggybank.jar 7 | 8 | DEFINE AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); 9 | 10 | rmf /tmp/sent_counts.txt 11 | 12 | /* Load the emails in avro format (edit the path to match where you saved them) using the AvroStorage UDF from Piggybank */ 13 | messages = LOAD '/me/Data/test_mbox' USING AvroStorage(); 14 | 15 | /* Filter nulls, they won't help */ 16 | messages = FILTER messages BY (from IS NOT NULL) AND (tos IS NOT NULL); 17 | 18 | /* Emails can be 'to' more than one person. FLATTEN() will project our from with each 'to' that exists. */ 19 | addresses = FOREACH messages GENERATE from.address AS from, FLATTEN(tos.(address)) AS to; 20 | 21 | /* Lowercase the email addresses, so we don't count MiXed case of the same address as multiple addresses */ 22 | lowers = FOREACH addresses GENERATE LOWER(from) AS from, LOWER(to) AS to; 23 | 24 | /* GROUP BY each from/to pair into a bag (array), then count the bag's contents ($1 means the 2nd field) to get a total. 25 | Same as SQL: SELECT from, to, COUNT(*) FROM lowers GROUP BY (from, to); 26 | Note: COUNT_STAR differs from COUNT in that it counts nulls. */ 27 | by_from_to = GROUP lowers BY (from, to); 28 | sent_counts = FOREACH by_from_to GENERATE FLATTEN(group) AS (from, to), COUNT_STAR(lowers) AS total; 29 | 30 | /* Sort the data, highest sent count first */ 31 | sent_counts = ORDER sent_counts BY total DESC; 32 | STORE sent_counts INTO '/tmp/sent_counts.txt'; 33 | -------------------------------------------------------------------------------- /ch03/python/elasticsearch.py: -------------------------------------------------------------------------------- 1 | import pyelasticsearch 2 | elastic = pyelasticsearch.ElasticSearch('http://localhost:9200/inbox') 3 | results = elastic.search("hadoop", index="sentcounts") 4 | print results 5 | -------------------------------------------------------------------------------- /ch03/python/flask_echo.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | app = Flask(__name__) 3 | 4 | @app.route("/") 5 | def hello(input): 6 | return input 7 | 8 | if __name__ == "__main__": 9 | app.run(debug=True) 10 | -------------------------------------------------------------------------------- /ch03/python/flask_mongo.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | import pymongo 3 | import json 4 | 5 | # Setup Flask 6 | app = Flask(__name__) 7 | 8 | # Setup Mongo 9 | conn = pymongo.Connection() # defaults to localhost 10 | db = conn.agile_data 11 | sent_counts = db['sent_counts'] 12 | 13 | # Fetch from/to totals, given a pair of email addresses 14 | @app.route("/sent_counts//") 15 | def sent_count(from_address, to_address): 16 | sent_count = sent_counts.find_one( {'from': from_address, 'to': to_address} ) 17 | return json.dumps( {'from': sent_count['from'], 'to': sent_count['to'], 'total': sent_count['total']} ) 18 | 19 | if __name__ == "__main__": 20 | app.run(debug=True) 21 | -------------------------------------------------------------------------------- /ch03/python/mongo.py: -------------------------------------------------------------------------------- 1 | import pymongo 2 | import json 3 | 4 | conn = pymongo.Connection() # defaults to localhost 5 | db = conn.agile_data 6 | results = db['sent_counts'].find() 7 | for i in range(0, results.count()): # Loop and print all results 8 | print results[i] 9 | 10 | -------------------------------------------------------------------------------- /ch03/python/test_avro.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # 4 | # derived from helpful example at http://www.harshj.com/2010/04/25/writing-and-reading-avro-data-files-using-python/ 5 | # 6 | from avro import schema, datafile, io 7 | import pprint 8 | 9 | # Test writing avros 10 | OUTFILE_NAME = '/tmp/messages.avro' 11 | 12 | SCHEMA_STR = """{ 13 | "type": "record", 14 | "name": "Message", 15 | "fields" : [ 16 | {"name": "message_id", "type": "int"}, 17 | {"name": "topic", "type": "string"}, 18 | {"name": "user_id", "type": "int"} 19 | ] 20 | }""" 21 | 22 | SCHEMA = schema.parse(SCHEMA_STR) 23 | 24 | # Create a 'record' (datum) writer 25 | rec_writer = io.DatumWriter(SCHEMA) 26 | 27 | # Create a 'data file' (avro file) writer 28 | df_writer = datafile.DataFileWriter( 29 | open(OUTFILE_NAME, 'wb'), 30 | rec_writer, 31 | writers_schema = SCHEMA 32 | ) 33 | 34 | df_writer.append( {"message_id": 11, "topic": "Hello galaxy", "user_id": 1} ) 35 | df_writer.append( {"message_id": 12, "topic": "Jim is silly!", "user_id": 1} ) 36 | df_writer.append( {"message_id": 23, "topic": "I like apples.", "user_id": 2} ) 37 | df_writer.close() 38 | 39 | # Test reading avros 40 | rec_reader = io.DatumReader() 41 | 42 | # Create a 'data file' (avro file) reader 43 | df_reader = datafile.DataFileReader( 44 | open(OUTFILE_NAME), 45 | rec_reader 46 | ) 47 | 48 | # Read all records stored inside 49 | pp = pprint.PrettyPrinter() 50 | for record in df_reader: 51 | pp.pprint(record) 52 | -------------------------------------------------------------------------------- /ch03/web/index.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template 2 | import pymongo 3 | import json 4 | import re 5 | 6 | # Setup Flask 7 | app = Flask(__name__) 8 | 9 | # Setup Mongo 10 | conn = pymongo.Connection() # defaults to localhost 11 | db = conn.agile_data 12 | 13 | # Fetch from/to totals and list them 14 | @app.route("/sent_counts") 15 | def sent_counts(): 16 | sent_counts = db['sent_counts'].find() 17 | results = {} 18 | results['keys'] = 'from', 'to', 'total' 19 | results['values'] = [[s['from'], s['to'], s['total']] for s in sent_counts if re.search('apache', str(s['from'])) or re.search('apache', str(s['to']))] 20 | results['values'] = results['values'][0:17] 21 | return render_template('table.html', results=results) 22 | 23 | if __name__ == "__main__": 24 | app.run(debug=True) 25 | -------------------------------------------------------------------------------- /ch03/web/static/bootstrap/img/glyphicons-halflings-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch03/web/static/bootstrap/img/glyphicons-halflings-white.png -------------------------------------------------------------------------------- /ch03/web/static/bootstrap/img/glyphicons-halflings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch03/web/static/bootstrap/img/glyphicons-halflings.png -------------------------------------------------------------------------------- /ch03/web/static/nvd3/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Jekyll Files # 3 | ################ 4 | _site 5 | 6 | 7 | # Random Files # 8 | ################ 9 | *.swp 10 | *~ 11 | *.log 12 | 13 | 14 | # Private Test Data # 15 | ##################### 16 | *REALDATA* 17 | 18 | 19 | # OS generated files # 20 | ###################### 21 | .DS_Store* 22 | ehthumbs.db 23 | Icon? 24 | Thumbs.db 25 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | ##nvd3.js License 3 | 4 | Copyright (c) 2011, 2012 [Novus Partners, Inc.][novus] 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | 18 | [novus]: https://www.novus.com/ 19 | 20 | 21 | 22 | ##d3.js License 23 | 24 | Copyright (c) 2012, Michael Bostock 25 | All rights reserved. 26 | 27 | Redistribution and use in source and binary forms, with or without 28 | modification, are permitted provided that the following conditions are met: 29 | 30 | * Redistributions of source code must retain the above copyright notice, this 31 | list of conditions and the following disclaimer. 32 | 33 | * Redistributions in binary form must reproduce the above copyright notice, 34 | this list of conditions and the following disclaimer in the documentation 35 | and/or other materials provided with the distribution. 36 | 37 | * The name Michael Bostock may not be used to endorse or promote products 38 | derived from this software without specific prior written permission. 39 | 40 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 41 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 43 | DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT, 44 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 45 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 46 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 47 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 48 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 49 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 50 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/Makefile: -------------------------------------------------------------------------------- 1 | JS_FILES = \ 2 | src/intro.js \ 3 | src/core.js \ 4 | src/tooltip.js \ 5 | src/utils.js \ 6 | src/models/axis.js \ 7 | src/models/historicalBar.js \ 8 | src/models/bullet.js \ 9 | src/models/bulletChart.js \ 10 | src/models/cumulativeLineChart.js \ 11 | src/models/discreteBar.js \ 12 | src/models/discreteBarChart.js \ 13 | src/models/distribution.js \ 14 | src/models/indentedTree.js \ 15 | src/models/legend.js \ 16 | src/models/line.js \ 17 | src/models/lineChart.js \ 18 | src/models/linePlusBarChart.js \ 19 | src/models/lineWithFocusChart.js \ 20 | src/models/multiBar.js \ 21 | src/models/multiBarChart.js \ 22 | src/models/multiBarHorizontal.js \ 23 | src/models/multiBarHorizontalChart.js \ 24 | src/models/multiChart.js \ 25 | src/models/ohlcBar.js \ 26 | src/models/pie.js \ 27 | src/models/pieChart.js \ 28 | src/models/scatter.js \ 29 | src/models/scatterChart.js \ 30 | src/models/scatterPlusLineChart.js \ 31 | src/models/sparkline.js \ 32 | src/models/sparklinePlus.js \ 33 | src/models/stackedArea.js \ 34 | src/models/stackedAreaChart.js \ 35 | src/outro.js 36 | 37 | JS_COMPILER = \ 38 | uglifyjs 39 | 40 | all: nv.d3.js nv.d3.min.js 41 | nv.d3.js: $(JS_FILES) 42 | nv.d3.min.js: $(JS_FILES) 43 | 44 | nv.d3.js: Makefile 45 | rm -f $@ 46 | cat $(filter %.js,$^) >> $@ 47 | 48 | %.min.js:: Makefile 49 | rm -f $@ 50 | cat $(filter %.js,$^) | $(JS_COMPILER) >> $@ 51 | 52 | clean: 53 | rm -rf nv.d3.js nv.d3.min.js 54 | 55 | 56 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/README.md: -------------------------------------------------------------------------------- 1 | Please see Novus' official statement on nvd3 with an explanation, 2 | apology, and commitment to its permanent status as an open-source 3 | project. 4 | [http://nvd3.org/statement.html](http://nvd3.org/statement.html) 5 | 6 | # nvd3 - v0.0.1 7 | 8 | A reusable chart library for d3.JS. 9 | 10 | Currently in an early stage of development, but will be a very active project. It may change quite a bit from its current state, but will always try to follow the style in which d3.js was done. 11 | 12 | You can also check out the [examples page](http://nvd3.org/ghpages/examples.html) 13 | 14 | --- 15 | 16 | If one of [the existing models](https://github.com/novus/nvd3/tree/master/src/models) doesn't meet your needs, fork the project, implement the model and an example using it, send us a pull request, for consideration for inclusion in the project. 17 | 18 | --- 19 | 20 | Minifying your fork: 21 | 22 | The Makefile requires [UglifyJS](https://github.com/mishoo/UglifyJS). 23 | 24 | The easist way to install is to install via npm. Run `npm install 25 | uglify-js` from your home directory, then add the output from `npm bin` 26 | into your path so that you have access to `uglifyjs` from the command 27 | line (remember to restart your terminal window when adding to the path.) 28 | 29 | Once you have `uglifyjs` command available, running `make` from your 30 | fork's root directory will rebuild both `nv.d3.js` and `nv.d3.min.js`. 31 | 32 | Without UglifyJS, you won't get the minified version when running make. 33 | 34 | **We ask that you DO NOT minify pull requests... 35 | If you need to minify please build pull request in separate branch, and 36 | merge and minify in yout master. 37 | 38 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/build.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | copy src\intro.js /B + src\core.js /B + src\tooltip.js /B temp1.js /B 3 | copy src\models\*.js /B temp2.js /B 4 | copy temp1.js /B + temp2.js /B + src\outro.js /B nv.d3.js /B 5 | del temp1.js 6 | del temp2.js 7 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | COMPRESSOR=`which yui-compressor` 3 | cat src/intro.js src/core.js src/tooltip.js src/utils.js src/models/*.js src/outro.js > nv.d3.js 4 | if [ -e $COMPRESSOR ]; then 5 | $COMPRESSOR --type js -o nv.d3.min.js nv.d3.js 6 | fi 7 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/deprecated/lineChart-old.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 29 | 30 | 31 |
32 |
33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 84 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/examples/images/grey-minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch03/web/static/nvd3/examples/images/grey-minus.png -------------------------------------------------------------------------------- /ch03/web/static/nvd3/examples/images/grey-plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch03/web/static/nvd3/examples/images/grey-plus.png -------------------------------------------------------------------------------- /ch03/web/static/nvd3/examples/legend.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 76 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/examples/line.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 96 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/examples/lineWithFocusChart.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 33 | 34 | 35 |
36 | 37 |
38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 88 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/examples/multiBar.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 18 | 19 | 20 |
21 | 22 |
23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 93 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/examples/multiBarChart.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 29 | 30 | 31 |
32 | 33 |
34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 81 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/examples/pie.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 94 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/examples/sparkline.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 21 | 22 | 23 |

Sparkline:

24 | 25 | 26 | 27 | 28 | 29 | 63 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/examples/sparklinePlus.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 18 | 19 | 20 |

SparklinePlus:

21 |

22 | 23 | 24 | 25 | 26 | 27 | 28 | 68 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/examples/stream_layers.js: -------------------------------------------------------------------------------- 1 | 2 | /* Inspired by Lee Byron's test data generator. */ 3 | function stream_layers(n, m, o) { 4 | if (arguments.length < 3) o = 0; 5 | function bump(a) { 6 | var x = 1 / (.1 + Math.random()), 7 | y = 2 * Math.random() - .5, 8 | z = 10 / (.1 + Math.random()); 9 | for (var i = 0; i < m; i++) { 10 | var w = (i / m - y) * z; 11 | a[i] += x * Math.exp(-w * w); 12 | } 13 | } 14 | return d3.range(n).map(function() { 15 | var a = [], i; 16 | for (i = 0; i < m; i++) a[i] = o + o * Math.random(); 17 | for (i = 0; i < 5; i++) bump(a); 18 | return a.map(stream_index); 19 | }); 20 | } 21 | 22 | /* Another layer generator using gamma distributions. */ 23 | function stream_waves(n, m) { 24 | return d3.range(n).map(function(i) { 25 | return d3.range(m).map(function(j) { 26 | var x = 20 * j / m - i / 3; 27 | return 2 * x * Math.exp(-.5 * x); 28 | }).map(stream_index); 29 | }); 30 | } 31 | 32 | function stream_index(d, i) { 33 | return {x: i, y: Math.max(0, d)}; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/src/intro.js: -------------------------------------------------------------------------------- 1 | (function(){ 2 | -------------------------------------------------------------------------------- /ch03/web/static/nvd3/src/outro.js: -------------------------------------------------------------------------------- 1 | })(); -------------------------------------------------------------------------------- /ch04/.dotcloud/config: -------------------------------------------------------------------------------- 1 | { 2 | "push_branch": null, 3 | "application": "testola", 4 | "version": "0.9.4", 5 | "push_protocol": "rsync" 6 | } -------------------------------------------------------------------------------- /ch04/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch04/__init__.py -------------------------------------------------------------------------------- /ch04/dotcloud.yml: -------------------------------------------------------------------------------- 1 | www: 2 | type: python 3 | systempackages: 4 | - libatlas-base-dev 5 | - gfortran 6 | - libsnappy1 7 | - libsnappy-dev 8 | data: 9 | type: mongodb -------------------------------------------------------------------------------- /ch04/index.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | import os 3 | 4 | # Setup Flask 5 | app = Flask(__name__) 6 | 7 | # Simple echo service 8 | @app.route("/") 9 | def hello(input): 10 | return input 11 | 12 | if __name__ == "__main__": 13 | port = int(os.environ.get('PORT', 5000)) 14 | app.run(host='0.0.0.0', port=port) 15 | -------------------------------------------------------------------------------- /ch04/requirements.txt: -------------------------------------------------------------------------------- 1 | ################################################################################################## 2 | # 2013.12.31 - Added requirements.txt to allow dotcloud to build and obtain all dependent package 3 | ################################################################################################## 4 | 5 | #BareNecessities==0.2.8 6 | #ESClient==0.5.3 7 | Flask==0.9 8 | Jinja2==2.6 9 | ##LEPL==5.1.3 10 | ##Mail==2.1.0 11 | #Werkzeug==0.8.3 12 | ##distribute==0.6.31 13 | ##python-snappy 14 | ##avro==1.7.3 15 | -e git+https://github.com/rhec/pyelasticsearch.git#egg=pyelasticsearch 16 | pymongo==2.4.1 17 | requests==1.0.4 18 | simplejson==2.6.2 19 | wsgiref==0.1.2 20 | ##numpy 21 | ##honcho 22 | ##scipy 23 | dotcloud 24 | ##python-dateutil 25 | ##nltk 26 | -------------------------------------------------------------------------------- /ch04/wsgi.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('/home/dotcloud/current') 3 | from index import app as application -------------------------------------------------------------------------------- /ch05/README.md: -------------------------------------------------------------------------------- 1 | Agile Data the Book 2 | =================== 3 | 4 | You can buy the book [here](http://shop.oreilly.com/product/0636920025054.do). You can read the book on [O'Reilly OFPS](http://ofps.oreilly.com/titles/9781449326265/) now. Work the chapter code examples as you go. Don't forget to initialize your python environment. Try linux (apt-get, yum) or OS X (brew, port) packages if any of the requirements don't install in your [virtualenv](http://www.virtualenv.org/en/latest/). 5 | 6 | Agile Data - Chapter 5: Collecting and Displaying Atomic Records 7 | =============================================================== 8 | 9 | ## Setup Python Virtual Environment ## 10 | 11 | ``` 12 | # From project root 13 | 14 | # Setup python virtualenv 15 | virtualenv -p `which python2.7` venv --distribute 16 | source venv/bin/activate 17 | pip install -r requirements.txt 18 | ``` 19 | 20 | ## Store Emails in MongoDB ## 21 | 22 | ``` 23 | pig -l /tmp -x local -param avros= -param mongourl=mongodb://localhost/agile_data.emails -v -w avro_to_mongo.pig 24 | ``` 25 | 26 | ## Create the date and message_id indexes in MongoDB ## 27 | 28 | ``` 29 | mongo < list_emails.mongo.js 30 | ``` 31 | 32 | Or paste that file into the mongo shell. 33 | 34 | ## Access Emails from Python ## 35 | 36 | To test the 'pymongo' module by listing emails, run: 37 | 38 | ``` 39 | python ./mongo_list.py 40 | ``` 41 | 42 | ## Store Emails in ElasticSearch ## 43 | 44 | pig -l /tmp -x local -v -w ./elasticsearch.pig 45 | 46 | ## Search Emails from Python ## 47 | 48 | Test pyelastic and the ElasticSearch query/sort APIs via: 49 | 50 | ``` 51 | python elasticsearch.py 52 | ``` 53 | 54 | ## Run Inbox Application ## 55 | 56 | Finally, run our Python/Flask web application. 57 | 58 | ``` 59 | python web/index.py 60 | ``` 61 | 62 | -------------------------------------------------------------------------------- /ch05/avro_to_mongo.pig: -------------------------------------------------------------------------------- 1 | /* Set Home Directory - where we install software */ 2 | %default HOME `echo \$HOME/Software/` 3 | 4 | /* Load Avro jars and define shortcut */ 5 | REGISTER $HOME/pig/build/ivy/lib/Pig/avro-1.5.3.jar 6 | REGISTER $HOME/pig/build/ivy/lib/Pig/json-simple-1.1.jar 7 | REGISTER $HOME/pig/contrib/piggybank/java/piggybank.jar 8 | define AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); 9 | 10 | /* MongoDB libraries and configuration */ 11 | REGISTER $HOME/mongo-hadoop/mongo-2.10.1.jar 12 | REGISTER $HOME/mongo-hadoop/core/target/mongo-hadoop-core-1.1.0-SNAPSHOT.jar 13 | REGISTER $HOME/mongo-hadoop/pig/target/mongo-hadoop-pig-1.1.0-SNAPSHOT.jar 14 | 15 | set mapred.map.tasks.speculative.execution false 16 | set mapred.reduce.tasks.speculative.execution false 17 | 18 | /* Set speculative execution off so we don't have the chance of duplicate records in Mongo */ 19 | set mapred.map.tasks.speculative.execution false 20 | set mapred.reduce.tasks.speculative.execution false 21 | define MongoStorage com.mongodb.hadoop.pig.MongoStorage(); /* Shortcut */ 22 | 23 | avros = load '$avros' using AvroStorage(); /* For example, 'enron.avro' */ 24 | store avros into '$mongourl' using MongoStorage(); /* For example, 'mongodb://localhost/enron.emails' */ 25 | -------------------------------------------------------------------------------- /ch05/elasticsearch.pig: -------------------------------------------------------------------------------- 1 | /* Set Home Directory - where we install software */ 2 | %default HOME `echo \$HOME/Software/` 3 | 4 | /* Avro uses json-simple, and is in piggybank until Pig 0.12, where AvroStorage and TrevniStorage are builtins */ 5 | REGISTER $HOME/pig/build/ivy/lib/Pig/avro-1.5.3.jar 6 | REGISTER $HOME/pig/build/ivy/lib/Pig/json-simple-1.1.jar 7 | REGISTER $HOME/pig/contrib/piggybank/java/piggybank.jar 8 | 9 | DEFINE AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); 10 | 11 | /* Elasticsearch's own jars */ 12 | REGISTER $HOME/elasticsearch-0.20.2/lib/*.jar 13 | 14 | /* Register wonderdog - elasticsearch integration */ 15 | REGISTER $HOME/wonderdog/target/wonderdog-1.0-SNAPSHOT.jar 16 | 17 | /* Remove the old email json */ 18 | rmf /tmp/inbox_json 19 | 20 | /* Nuke the elasticsearch emails index, as we are about to replace it. */ 21 | sh curl -XDELETE 'http://localhost:9200/inbox/emails' 22 | 23 | /* Load Avros, and store as JSON */ 24 | emails = LOAD '/me/Data/test_mbox' USING AvroStorage(); 25 | STORE emails INTO '/tmp/inbox_json' USING JsonStorage(); 26 | 27 | /* Now load the JSON as a single chararray field, and index it into ElasticSearch with Wonderdog from InfoChimps */ 28 | email_json = LOAD '/tmp/inbox_json' AS (email:chararray); 29 | STORE email_json INTO 'es://inbox/emails?json=true&size=1000' USING com.infochimps.elasticsearch.pig.ElasticSearchStorage( 30 | '$HOME/elasticsearch-0.20.2/config/elasticsearch.yml', 31 | '$HOME/elasticsearch-0.20.2/plugins'); 32 | 33 | /* Search for Hadoop to make sure we get a hit in our email index */ 34 | sh curl -XGET 'http://localhost:9200/inbox/emails/_search?q=hadoop&pretty=true&size=1' 35 | -------------------------------------------------------------------------------- /ch05/elasticsearch.py: -------------------------------------------------------------------------------- 1 | import pyelasticsearch 2 | elastic = pyelasticsearch.ElasticSearch('http://localhost:9200/inbox') 3 | results = elastic.search("hadoop", index="emails") 4 | print results 5 | results2 = elastic.search({'query': {"term": { "body": query}}, 'from': 0, 'size': 20}, index="emails") 6 | print results2 7 | 8 | 9 | -------------------------------------------------------------------------------- /ch05/list_emails.mongo.js: -------------------------------------------------------------------------------- 1 | use agile_data; 2 | show collections; 3 | db.emails.findOne(); 4 | db.emails.find(); 5 | db.emails.find().sort({date: 1}); 6 | // error: { 7 | // "$err" : "too much data for sort() with no index. add an index or specify a smaller limit", 8 | // "code" : 10128 9 | } 10 | db.emails.getIndexes(); 11 | // [ 12 | // { 13 | // "v" : 1, 14 | // "key" : { 15 | // "_id" : 1 16 | // }, 17 | // "ns" : "agile_data.emails", 18 | // "name" : "_id_" 19 | // } 20 | // ] 21 | db.emails.ensureIndex({date: 1}); // Add an index on date 22 | db.emails.getIndexes(); 23 | // [ 24 | // { 25 | // "v" : 1, 26 | // "key" : { 27 | // "_id" : 1 28 | // }, 29 | // "ns" : "agile_data.emails", 30 | // "name" : "_id_" 31 | // }, 32 | // { 33 | // "v" : 1, 34 | // "key" : { 35 | // "date" : 1 36 | // }, 37 | // "ns" : "agile_data.emails", 38 | // "name" : "date_1" 39 | // } 40 | // ] 41 | db.emails.find().sort({date: 1}); 42 | // ... lots of sorted emails ... 43 | db.emails.ensureIndex({message_id: 1}); // Add message_id index 44 | db.emails.getIndexes(); 45 | // [ 46 | // { 47 | // "v" : 1, 48 | // "key" : { 49 | // "_id" : 1 50 | // }, 51 | // "ns" : "agile_data.emails", 52 | // "name" : "_id_" 53 | // }, 54 | // { 55 | // "v" : 1, 56 | // "key" : { 57 | // "date" : 1 58 | // }, 59 | // "ns" : "agile_data.emails", 60 | // "name" : "date_1" 61 | // }, 62 | // { 63 | // "v" : 1, 64 | // "key" : { 65 | // "message_id" : 1 66 | // }, 67 | // "ns" : "agile_data.emails", 68 | // "name" : "message_id_1" 69 | // } 70 | // ] 71 | db.emails.find().sort({date:0}).limit(10).pretty(); // Fetch last 10 emails, pretty format 72 | -------------------------------------------------------------------------------- /ch05/mongo_list.py: -------------------------------------------------------------------------------- 1 | import pymongo 2 | 3 | # Setup Mongo 4 | conn = pymongo.Connection() # defaults to localhost 5 | db = conn.agile_data 6 | emails = db['emails'] 7 | 8 | email_list = emails.find()[0:20] 9 | for email in email_list: 10 | print email 11 | -------------------------------------------------------------------------------- /ch05/web/config.py: -------------------------------------------------------------------------------- 1 | EMAILS_PER_PAGE=16 2 | ELASTIC_URL='http://localhost:9200/inbox' -------------------------------------------------------------------------------- /ch05/web/static/bootstrap/img/glyphicons-halflings-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch05/web/static/bootstrap/img/glyphicons-halflings-white.png -------------------------------------------------------------------------------- /ch05/web/static/bootstrap/img/glyphicons-halflings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch05/web/static/bootstrap/img/glyphicons-halflings.png -------------------------------------------------------------------------------- /ch05/web/static/nvd3/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Jekyll Files # 3 | ################ 4 | _site 5 | 6 | 7 | # Random Files # 8 | ################ 9 | *.swp 10 | *~ 11 | *.log 12 | 13 | 14 | # Private Test Data # 15 | ##################### 16 | *REALDATA* 17 | 18 | 19 | # OS generated files # 20 | ###################### 21 | .DS_Store* 22 | ehthumbs.db 23 | Icon? 24 | Thumbs.db 25 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | ##nvd3.js License 3 | 4 | Copyright (c) 2011, 2012 [Novus Partners, Inc.][novus] 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | 18 | [novus]: https://www.novus.com/ 19 | 20 | 21 | 22 | ##d3.js License 23 | 24 | Copyright (c) 2012, Michael Bostock 25 | All rights reserved. 26 | 27 | Redistribution and use in source and binary forms, with or without 28 | modification, are permitted provided that the following conditions are met: 29 | 30 | * Redistributions of source code must retain the above copyright notice, this 31 | list of conditions and the following disclaimer. 32 | 33 | * Redistributions in binary form must reproduce the above copyright notice, 34 | this list of conditions and the following disclaimer in the documentation 35 | and/or other materials provided with the distribution. 36 | 37 | * The name Michael Bostock may not be used to endorse or promote products 38 | derived from this software without specific prior written permission. 39 | 40 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 41 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 43 | DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT, 44 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 45 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 46 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 47 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 48 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 49 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 50 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/Makefile: -------------------------------------------------------------------------------- 1 | JS_FILES = \ 2 | src/intro.js \ 3 | src/core.js \ 4 | src/tooltip.js \ 5 | src/utils.js \ 6 | src/models/axis.js \ 7 | src/models/historicalBar.js \ 8 | src/models/bullet.js \ 9 | src/models/bulletChart.js \ 10 | src/models/cumulativeLineChart.js \ 11 | src/models/discreteBar.js \ 12 | src/models/discreteBarChart.js \ 13 | src/models/distribution.js \ 14 | src/models/indentedTree.js \ 15 | src/models/legend.js \ 16 | src/models/line.js \ 17 | src/models/lineChart.js \ 18 | src/models/linePlusBarChart.js \ 19 | src/models/lineWithFocusChart.js \ 20 | src/models/multiBar.js \ 21 | src/models/multiBarChart.js \ 22 | src/models/multiBarHorizontal.js \ 23 | src/models/multiBarHorizontalChart.js \ 24 | src/models/multiChart.js \ 25 | src/models/ohlcBar.js \ 26 | src/models/pie.js \ 27 | src/models/pieChart.js \ 28 | src/models/scatter.js \ 29 | src/models/scatterChart.js \ 30 | src/models/scatterPlusLineChart.js \ 31 | src/models/sparkline.js \ 32 | src/models/sparklinePlus.js \ 33 | src/models/stackedArea.js \ 34 | src/models/stackedAreaChart.js \ 35 | src/outro.js 36 | 37 | JS_COMPILER = \ 38 | uglifyjs 39 | 40 | all: nv.d3.js nv.d3.min.js 41 | nv.d3.js: $(JS_FILES) 42 | nv.d3.min.js: $(JS_FILES) 43 | 44 | nv.d3.js: Makefile 45 | rm -f $@ 46 | cat $(filter %.js,$^) >> $@ 47 | 48 | %.min.js:: Makefile 49 | rm -f $@ 50 | cat $(filter %.js,$^) | $(JS_COMPILER) >> $@ 51 | 52 | clean: 53 | rm -rf nv.d3.js nv.d3.min.js 54 | 55 | 56 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/README.md: -------------------------------------------------------------------------------- 1 | Please see Novus' official statement on nvd3 with an explanation, 2 | apology, and commitment to its permanent status as an open-source 3 | project. 4 | [http://nvd3.org/statement.html](http://nvd3.org/statement.html) 5 | 6 | # nvd3 - v0.0.1 7 | 8 | A reusable chart library for d3.JS. 9 | 10 | Currently in an early stage of development, but will be a very active project. It may change quite a bit from its current state, but will always try to follow the style in which d3.js was done. 11 | 12 | You can also check out the [examples page](http://nvd3.org/ghpages/examples.html) 13 | 14 | --- 15 | 16 | If one of [the existing models](https://github.com/novus/nvd3/tree/master/src/models) doesn't meet your needs, fork the project, implement the model and an example using it, send us a pull request, for consideration for inclusion in the project. 17 | 18 | --- 19 | 20 | Minifying your fork: 21 | 22 | The Makefile requires [UglifyJS](https://github.com/mishoo/UglifyJS). 23 | 24 | The easist way to install is to install via npm. Run `npm install 25 | uglify-js` from your home directory, then add the output from `npm bin` 26 | into your path so that you have access to `uglifyjs` from the command 27 | line (remember to restart your terminal window when adding to the path.) 28 | 29 | Once you have `uglifyjs` command available, running `make` from your 30 | fork's root directory will rebuild both `nv.d3.js` and `nv.d3.min.js`. 31 | 32 | Without UglifyJS, you won't get the minified version when running make. 33 | 34 | **We ask that you DO NOT minify pull requests... 35 | If you need to minify please build pull request in separate branch, and 36 | merge and minify in yout master. 37 | 38 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/build.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | copy src\intro.js /B + src\core.js /B + src\tooltip.js /B temp1.js /B 3 | copy src\models\*.js /B temp2.js /B 4 | copy temp1.js /B + temp2.js /B + src\outro.js /B nv.d3.js /B 5 | del temp1.js 6 | del temp2.js 7 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | COMPRESSOR=`which yui-compressor` 3 | cat src/intro.js src/core.js src/tooltip.js src/utils.js src/models/*.js src/outro.js > nv.d3.js 4 | if [ -e $COMPRESSOR ]; then 5 | $COMPRESSOR --type js -o nv.d3.min.js nv.d3.js 6 | fi 7 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/deprecated/lineChart-old.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 29 | 30 | 31 |
32 |
33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 84 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/examples/images/grey-minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch05/web/static/nvd3/examples/images/grey-minus.png -------------------------------------------------------------------------------- /ch05/web/static/nvd3/examples/images/grey-plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch05/web/static/nvd3/examples/images/grey-plus.png -------------------------------------------------------------------------------- /ch05/web/static/nvd3/examples/legend.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 76 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/examples/line.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 96 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/examples/lineWithFocusChart.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 33 | 34 | 35 |
36 | 37 |
38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 88 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/examples/multiBar.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 18 | 19 | 20 |
21 | 22 |
23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 93 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/examples/multiBarChart.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 29 | 30 | 31 |
32 | 33 |
34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 81 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/examples/pie.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 94 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/examples/sparkline.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 21 | 22 | 23 |

Sparkline:

24 | 25 | 26 | 27 | 28 | 29 | 63 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/examples/sparklinePlus.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 18 | 19 | 20 |

SparklinePlus:

21 |

22 | 23 | 24 | 25 | 26 | 27 | 28 | 68 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/examples/stream_layers.js: -------------------------------------------------------------------------------- 1 | 2 | /* Inspired by Lee Byron's test data generator. */ 3 | function stream_layers(n, m, o) { 4 | if (arguments.length < 3) o = 0; 5 | function bump(a) { 6 | var x = 1 / (.1 + Math.random()), 7 | y = 2 * Math.random() - .5, 8 | z = 10 / (.1 + Math.random()); 9 | for (var i = 0; i < m; i++) { 10 | var w = (i / m - y) * z; 11 | a[i] += x * Math.exp(-w * w); 12 | } 13 | } 14 | return d3.range(n).map(function() { 15 | var a = [], i; 16 | for (i = 0; i < m; i++) a[i] = o + o * Math.random(); 17 | for (i = 0; i < 5; i++) bump(a); 18 | return a.map(stream_index); 19 | }); 20 | } 21 | 22 | /* Another layer generator using gamma distributions. */ 23 | function stream_waves(n, m) { 24 | return d3.range(n).map(function(i) { 25 | return d3.range(m).map(function(j) { 26 | var x = 20 * j / m - i / 3; 27 | return 2 * x * Math.exp(-.5 * x); 28 | }).map(stream_index); 29 | }); 30 | } 31 | 32 | function stream_index(d, i) { 33 | return {x: i, y: Math.max(0, d)}; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/src/intro.js: -------------------------------------------------------------------------------- 1 | (function(){ 2 | -------------------------------------------------------------------------------- /ch05/web/static/nvd3/src/outro.js: -------------------------------------------------------------------------------- 1 | })(); -------------------------------------------------------------------------------- /ch05/web/templates/partials/email.html: -------------------------------------------------------------------------------- 1 | 2 | {% extends "layout.html" %} 3 | 4 | 5 | {% import "macros.jnj" as common %} 6 | 7 | 8 | {% macro display_in_reply_to(key, name) %} 9 | {% if email[key] != 'None' -%} 10 |
11 | {{ common.display_label(name)|safe }} 12 | {{ common.display_link(email[key], '/email', email[key])|safe }} 13 |
14 | {% endif -%} 15 | {% endmacro -%} 16 | 17 | 18 | {% macro convert_body(body) -%} 19 | {{ body.replace('\r\n', '
')|safe }} 20 | {% endmacro -%} 21 | 22 | 23 | {% macro display_email_body(record) -%} 24 | {% if(record['body']) -%} 25 |
26 |
27 | {{ convert_body(record['body']) }} 28 |
29 |
30 | {% endif -%} 31 | {% endmacro -%} 32 | 33 | 34 | {% block content -%} 35 | 38 |

Email ID: {{email['message_id']}}

39 |
40 | {{ common.display_email_addresses('From', email['from'])|safe }} 41 | {{ common.display_email_addresses('To', email['tos'])|safe }} 42 | {{ common.display_email_addresses('Cc', email['ccs'])|safe }} 43 | {{ common.display_email_addresses('Bcc', email['bccs'])|safe }} 44 | {{ common.display_email_addresses('Reply-To', email['reply_tos'])|safe }} 45 | 46 | {{ display_in_reply_to('in_reply_to', 'In-Reply-To') }} 47 | {{ common.display_field(email['date'], 'Date')|safe }} 48 | {{ common.display_field(email['subject'], 'Subject')|safe }} 49 | 50 | {{ display_email_body(email) }} 51 |
52 | {% endblock -%} -------------------------------------------------------------------------------- /ch05/web/templates/partials/emails.html: -------------------------------------------------------------------------------- 1 | 2 | {% extends "layout.html" %} 3 | 4 | 5 | {% import "macros.jnj" as common %} 6 | 7 | 8 | 9 | {% block content -%} 10 | 13 |
Emails 14 |
17 |
18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | {% for email in emails %} 26 | 27 | 28 | 29 | 30 | 31 | {% endfor %} 32 | 33 |
FromSubjectDate
{{ common.display_email_address(email['from'])|safe }}{{ common.display_link(email['message_id'], '/email', email['subject'])|safe }}{{ email['date'] }}
34 | {% if nav_offsets and nav_path -%} 35 | {{ common.display_nav(nav_offsets, nav_path, query)|safe }} 36 | {% endif -%} 37 |
38 | {% endblock -%} 39 | -------------------------------------------------------------------------------- /ch06/list_addresses.py: -------------------------------------------------------------------------------- 1 | import pymongo 2 | 3 | # Setup Mongo 4 | conn = pymongo.Connection() # defaults to localhost 5 | db = conn.agile_data 6 | addresses_per_email = db['addresses_per_email'] 7 | 8 | address_lists = addresses_per_email.find()[0:20] 9 | for addresses in address_lists: 10 | print addresses 11 | 12 | emails_per_address = db['emails_per_address'] 13 | email_list = emails_per_address.find_one() 14 | for email in email_list: 15 | print email 16 | -------------------------------------------------------------------------------- /ch06/mongo.js: -------------------------------------------------------------------------------- 1 | use agile_data 2 | show collections 3 | db.emails_per_address.ensureIndex({address: 1}); 4 | db.emails_per_address.findOne() 5 | // { 6 | // "_id" : ObjectId("50f1cfe93004acab8d0340ea"), 7 | // "address" : "user@pig.apache.org", 8 | // "messages" : [ 9 | // { 10 | // "message_id" : "2CC96549-8E00-46BF-998E-5606B6952467@gmail.com", 11 | // "subject" : "Re: Group by with count", 12 | // "date" : "2012-12-27T15:36:58" 13 | // }, 14 | // { 15 | // "message_id" : "2CC96549-8E00-46BF-998E-5606B6952467@gmail.com", 16 | // "subject" : "Re: Group by with count", 17 | // "date" : "2012-12-27T15:36:58" 18 | // }, 19 | // { 20 | // "message_id" : "2CC96549-8E00-46BF-998E-5606B6952467@gmail.com", 21 | // "subject" : "Re: Group by with count", 22 | // "date" : "2012-12-27T15:36:58" 23 | // }, 24 | // ... 25 | db.addresses_per_email.ensureIndex({message_id: 1}); 26 | db.addresses_per_email.findOne() 27 | // { 28 | // "_id" : ObjectId("50f1d8453004db7be37cffb0"), 29 | // "message_id" : "kl59ip.iuzmp1@", 30 | // "addresses" : [ 31 | // { 32 | // "address" : "artifacts@computerhistory.org" 33 | // }, 34 | // { 35 | // "address" : "russell.jurney@gmail.com" 36 | // }, 37 | // { 38 | // "address" : "russell.jurney@gmail.com" 39 | // } 40 | // ] 41 | // } 42 | db.sent_distributions.ensureIndex({address: 1}) 43 | db.sent_distributions.findOne() 44 | // { 45 | // "_id" : ObjectId("50f365ba30042ade8f22cb86"), 46 | // "address" : "russell.jurney@gmail.com", 47 | // "sent_distribution" : [ 48 | // { 49 | // "sent_hour" : "00", 50 | // "total" : NumberLong(435) 51 | // }, 52 | // { 53 | // "sent_hour" : "01", 54 | // "total" : NumberLong(307) 55 | // }, 56 | // ... 57 | // ] 58 | // } 59 | -------------------------------------------------------------------------------- /ch06/web/config.py: -------------------------------------------------------------------------------- 1 | EMAILS_PER_PAGE=15 2 | ELASTIC_URL='http://localhost:9200/inbox' -------------------------------------------------------------------------------- /ch06/web/static/bootstrap/img/glyphicons-halflings-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch06/web/static/bootstrap/img/glyphicons-halflings-white.png -------------------------------------------------------------------------------- /ch06/web/static/bootstrap/img/glyphicons-halflings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch06/web/static/bootstrap/img/glyphicons-halflings.png -------------------------------------------------------------------------------- /ch06/web/static/nvd3/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Jekyll Files # 3 | ################ 4 | _site 5 | 6 | 7 | # Random Files # 8 | ################ 9 | *.swp 10 | *~ 11 | *.log 12 | 13 | 14 | # Private Test Data # 15 | ##################### 16 | *REALDATA* 17 | 18 | 19 | # OS generated files # 20 | ###################### 21 | .DS_Store* 22 | ehthumbs.db 23 | Icon? 24 | Thumbs.db 25 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | ##nvd3.js License 3 | 4 | Copyright (c) 2011, 2012 [Novus Partners, Inc.][novus] 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | 18 | [novus]: https://www.novus.com/ 19 | 20 | 21 | 22 | ##d3.js License 23 | 24 | Copyright (c) 2012, Michael Bostock 25 | All rights reserved. 26 | 27 | Redistribution and use in source and binary forms, with or without 28 | modification, are permitted provided that the following conditions are met: 29 | 30 | * Redistributions of source code must retain the above copyright notice, this 31 | list of conditions and the following disclaimer. 32 | 33 | * Redistributions in binary form must reproduce the above copyright notice, 34 | this list of conditions and the following disclaimer in the documentation 35 | and/or other materials provided with the distribution. 36 | 37 | * The name Michael Bostock may not be used to endorse or promote products 38 | derived from this software without specific prior written permission. 39 | 40 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 41 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 43 | DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT, 44 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 45 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 46 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 47 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 48 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 49 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 50 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/Makefile: -------------------------------------------------------------------------------- 1 | JS_FILES = \ 2 | src/intro.js \ 3 | src/core.js \ 4 | src/tooltip.js \ 5 | src/utils.js \ 6 | src/models/axis.js \ 7 | src/models/historicalBar.js \ 8 | src/models/bullet.js \ 9 | src/models/bulletChart.js \ 10 | src/models/cumulativeLineChart.js \ 11 | src/models/discreteBar.js \ 12 | src/models/discreteBarChart.js \ 13 | src/models/distribution.js \ 14 | src/models/indentedTree.js \ 15 | src/models/legend.js \ 16 | src/models/line.js \ 17 | src/models/lineChart.js \ 18 | src/models/linePlusBarChart.js \ 19 | src/models/lineWithFocusChart.js \ 20 | src/models/multiBar.js \ 21 | src/models/multiBarChart.js \ 22 | src/models/multiBarHorizontal.js \ 23 | src/models/multiBarHorizontalChart.js \ 24 | src/models/multiChart.js \ 25 | src/models/ohlcBar.js \ 26 | src/models/pie.js \ 27 | src/models/pieChart.js \ 28 | src/models/scatter.js \ 29 | src/models/scatterChart.js \ 30 | src/models/scatterPlusLineChart.js \ 31 | src/models/sparkline.js \ 32 | src/models/sparklinePlus.js \ 33 | src/models/stackedArea.js \ 34 | src/models/stackedAreaChart.js \ 35 | src/outro.js 36 | 37 | JS_COMPILER = \ 38 | uglifyjs 39 | 40 | all: nv.d3.js nv.d3.min.js 41 | nv.d3.js: $(JS_FILES) 42 | nv.d3.min.js: $(JS_FILES) 43 | 44 | nv.d3.js: Makefile 45 | rm -f $@ 46 | cat $(filter %.js,$^) >> $@ 47 | 48 | %.min.js:: Makefile 49 | rm -f $@ 50 | cat $(filter %.js,$^) | $(JS_COMPILER) >> $@ 51 | 52 | clean: 53 | rm -rf nv.d3.js nv.d3.min.js 54 | 55 | 56 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/README.md: -------------------------------------------------------------------------------- 1 | Please see Novus' official statement on nvd3 with an explanation, 2 | apology, and commitment to its permanent status as an open-source 3 | project. 4 | [http://nvd3.org/statement.html](http://nvd3.org/statement.html) 5 | 6 | # nvd3 - v0.0.1 7 | 8 | A reusable chart library for d3.JS. 9 | 10 | Currently in an early stage of development, but will be a very active project. It may change quite a bit from its current state, but will always try to follow the style in which d3.js was done. 11 | 12 | You can also check out the [examples page](http://nvd3.org/ghpages/examples.html) 13 | 14 | --- 15 | 16 | If one of [the existing models](https://github.com/novus/nvd3/tree/master/src/models) doesn't meet your needs, fork the project, implement the model and an example using it, send us a pull request, for consideration for inclusion in the project. 17 | 18 | --- 19 | 20 | Minifying your fork: 21 | 22 | The Makefile requires [UglifyJS](https://github.com/mishoo/UglifyJS). 23 | 24 | The easist way to install is to install via npm. Run `npm install 25 | uglify-js` from your home directory, then add the output from `npm bin` 26 | into your path so that you have access to `uglifyjs` from the command 27 | line (remember to restart your terminal window when adding to the path.) 28 | 29 | Once you have `uglifyjs` command available, running `make` from your 30 | fork's root directory will rebuild both `nv.d3.js` and `nv.d3.min.js`. 31 | 32 | Without UglifyJS, you won't get the minified version when running make. 33 | 34 | **We ask that you DO NOT minify pull requests... 35 | If you need to minify please build pull request in separate branch, and 36 | merge and minify in yout master. 37 | 38 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/build.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | copy src\intro.js /B + src\core.js /B + src\tooltip.js /B temp1.js /B 3 | copy src\models\*.js /B temp2.js /B 4 | copy temp1.js /B + temp2.js /B + src\outro.js /B nv.d3.js /B 5 | del temp1.js 6 | del temp2.js 7 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | COMPRESSOR=`which yui-compressor` 3 | cat src/intro.js src/core.js src/tooltip.js src/utils.js src/models/*.js src/outro.js > nv.d3.js 4 | if [ -e $COMPRESSOR ]; then 5 | $COMPRESSOR --type js -o nv.d3.min.js nv.d3.js 6 | fi 7 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/deprecated/lineChart-old.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 29 | 30 | 31 |
32 |
33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 84 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/examples/images/grey-minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch06/web/static/nvd3/examples/images/grey-minus.png -------------------------------------------------------------------------------- /ch06/web/static/nvd3/examples/images/grey-plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch06/web/static/nvd3/examples/images/grey-plus.png -------------------------------------------------------------------------------- /ch06/web/static/nvd3/examples/legend.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 76 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/examples/line.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 96 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/examples/lineWithFocusChart.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 33 | 34 | 35 |
36 | 37 |
38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 88 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/examples/multiBar.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 18 | 19 | 20 |
21 | 22 |
23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 93 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/examples/multiBarChart.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 29 | 30 | 31 |
32 | 33 |
34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 81 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/examples/pie.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 94 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/examples/sparkline.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 21 | 22 | 23 |

Sparkline:

24 | 25 | 26 | 27 | 28 | 29 | 63 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/examples/sparklinePlus.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 18 | 19 | 20 |

SparklinePlus:

21 |

22 | 23 | 24 | 25 | 26 | 27 | 28 | 68 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/examples/stream_layers.js: -------------------------------------------------------------------------------- 1 | 2 | /* Inspired by Lee Byron's test data generator. */ 3 | function stream_layers(n, m, o) { 4 | if (arguments.length < 3) o = 0; 5 | function bump(a) { 6 | var x = 1 / (.1 + Math.random()), 7 | y = 2 * Math.random() - .5, 8 | z = 10 / (.1 + Math.random()); 9 | for (var i = 0; i < m; i++) { 10 | var w = (i / m - y) * z; 11 | a[i] += x * Math.exp(-w * w); 12 | } 13 | } 14 | return d3.range(n).map(function() { 15 | var a = [], i; 16 | for (i = 0; i < m; i++) a[i] = o + o * Math.random(); 17 | for (i = 0; i < 5; i++) bump(a); 18 | return a.map(stream_index); 19 | }); 20 | } 21 | 22 | /* Another layer generator using gamma distributions. */ 23 | function stream_waves(n, m) { 24 | return d3.range(n).map(function(i) { 25 | return d3.range(m).map(function(j) { 26 | var x = 20 * j / m - i / 3; 27 | return 2 * x * Math.exp(-.5 * x); 28 | }).map(stream_index); 29 | }); 30 | } 31 | 32 | function stream_index(d, i) { 33 | return {x: i, y: Math.max(0, d)}; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/src/intro.js: -------------------------------------------------------------------------------- 1 | (function(){ 2 | -------------------------------------------------------------------------------- /ch06/web/static/nvd3/src/outro.js: -------------------------------------------------------------------------------- 1 | })(); -------------------------------------------------------------------------------- /ch06/web/templates/partials/address.html: -------------------------------------------------------------------------------- 1 | 2 | {% extends "layout.html" %} 3 | 4 | 5 | {% import "macros.jnj" as common %} 6 | 7 | 8 | 9 | {% block content -%} 10 |

Email Address

11 |
12 | 13 | 14 | 15 | 16 | 17 | 18 | {% for email in emails %} 19 | 20 | 21 | 22 | 23 | {% endfor %} 24 | 25 |
SubjectDate
{{ common.display_link(email['message_id'], '/email', email['subject'])|safe }}{{ email['date'] }}
26 | {% if nav_offsets and nav_path -%} 27 | {{ common.display_nav(nav_offsets, nav_path, query)|safe }} 28 | {% endif -%} 29 |
30 | {% endblock -%} 31 | -------------------------------------------------------------------------------- /ch06/web/templates/partials/emails.html: -------------------------------------------------------------------------------- 1 | 2 | {% extends "layout.html" %} 3 | 4 | 5 | {% import "macros.jnj" as common %} 6 | 7 | 8 | 9 | {% block content -%} 10 |

Emails 11 |

14 |
15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | {% for email in emails %} 23 | 24 | 25 | 26 | 27 | 28 | {% endfor %} 29 | 30 |
FromSubjectDate
{{ common.display_email_address(email['from'])|safe }}{{ common.display_link(email['message_id'], '/email', email['subject'])|safe }}{{ email['date'] }}
31 | {% if nav_offsets and nav_path -%} 32 | {{ common.display_nav(nav_offsets, nav_path, query)|safe }} 33 | {% endif -%} 34 |
35 | {% endblock -%} 36 | -------------------------------------------------------------------------------- /ch07/mongo/sent_distribution_fix_mongo.js: -------------------------------------------------------------------------------- 1 | function range(start, stop, step){ 2 | if (typeof stop=='undefined'){ 3 | // one param defined 4 | stop = start; 5 | start = 0; 6 | }; 7 | if (typeof step=='undefined'){ 8 | step = 1; 9 | }; 10 | if ((step>0 && start>=stop) || (step<0 && start<=stop)){ 11 | return []; 12 | }; 13 | var result = []; 14 | for (var i=start; step>0 ? istop; i+=step){ 15 | result.push(i); 16 | }; 17 | return result; 18 | }; 19 | 20 | // Get "00" - "23" 21 | function makeHourRange(num) { 22 | return num < 10 ? "0" + num.toString() : num.toString(); 23 | } 24 | 25 | function fillBlanks(rawData) { 26 | var hourRange = range(0,24); 27 | var ourData = Array(); 28 | for (hour in hourRange) 29 | { 30 | var hourString = makeHourRange(hour); 31 | var found = false; 32 | for(x in rawData) 33 | { 34 | if(rawData[x]['sent_hour'] == hourString) 35 | { 36 | found = true; 37 | break; 38 | } 39 | } 40 | if(found == true) 41 | { 42 | ourData.push(rawData[x]); 43 | } 44 | else 45 | { 46 | ourData.push({'sent_hour': hourString, 'total': 0}) 47 | } 48 | } 49 | return ourData; 50 | } 51 | 52 | use agile_data 53 | data = sent_dist.findOne(); 54 | fillBlanks(data['sent_distribution']); 55 | -------------------------------------------------------------------------------- /ch07/pig/lda.pig: -------------------------------------------------------------------------------- 1 | /* Set Home Directory - where we install software */ 2 | %default HOME `echo \$HOME/Software/` 3 | 4 | /* Avro uses json-simple, and is in piggybank until Pig 0.12, where AvroStorage and TrevniStorage are builtins */ 5 | REGISTER $HOME/pig/build/ivy/lib/Pig/avro-1.5.3.jar 6 | REGISTER $HOME/pig/build/ivy/lib/Pig/json-simple-1.1.jar 7 | REGISTER $HOME/pig/contrib/piggybank/java/piggybank.jar 8 | 9 | DEFINE AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); 10 | 11 | REGISTER $HOME/varaha/lib/*.jar /* */ 12 | REGISTER $HOME/varaha/target/varaha-1.0-SNAPSHOT.jar 13 | 14 | define TokenizeText varaha.text.TokenizeText(); 15 | define LDATopics varaha.topic.LDATopics(); 16 | define RangeConcat org.pygmalion.udf.RangeBasedStringConcat('0', ' '); 17 | 18 | set default_parallel 10 19 | set mapred.map.tasks.speculative.execution false 20 | set mapred.reduce.tasks.speculative.execution false 21 | 22 | -- 23 | -- Load the docs 24 | -- 25 | emails = load '/me/Data/test_mbox' using AvroStorage(); 26 | raw_documents = foreach emails generate message_id, body; 27 | -- 28 | -- Tokenize text to remove stopwords 29 | -- 30 | tokenized = foreach raw_documents generate message_id, flatten(TokenizeText(body)) as (token:chararray); 31 | 32 | -- 33 | -- Concat the text for a given doc with spaces 34 | -- 35 | documents = foreach (group tokenized by message_id) generate group as message_id, RangeConcat(tokenized.token) as text; 36 | 37 | -- 38 | -- Ensure all our documents are sane 39 | -- 40 | for_lda = filter documents by message_id IS NOT NULL and text IS NOT NULL; 41 | 42 | -- 43 | -- Group the docs by all and find topics 44 | -- 45 | -- WARNING: This is, in general, not appropriate in a production environment. 46 | -- Instead it is best to group by some piece of metadata which partitions 47 | -- the documents into smaller groups. 48 | -- 49 | topics = foreach (group for_lda all) generate 50 | FLATTEN(LDATopics(20, for_lda)) as ( 51 | topic_num:int, 52 | keywords:bag {t:tuple(keyword:chararray, weight:int)} 53 | ); 54 | 55 | 56 | store topics into '/tmp/lda_topics.txt'; -------------------------------------------------------------------------------- /ch07/pig/ntfidf.macro: -------------------------------------------------------------------------------- 1 | /* Derived from TF-IDF by Jacob Perkins at http://thedatachef.blogspot.com/2011/04/tf-idf-with-apache-pig.html with 2 | help from Mat Kelcey who referred me to http://nlp.stanford.edu/IR-book/html/htmledition/maximum-tf-normalization-1.html */ 3 | 4 | DEFINE ntf_idf(token_records, id_field, token_field) RETURNS out_relation { 5 | 6 | /* Calculate the term count per document */ 7 | doc_word_totals = foreach (group $token_records by ($id_field, $token_field)) generate 8 | FLATTEN(group) as ($id_field, token), 9 | COUNT_STAR($token_records) as doc_total; 10 | 11 | /* Calculate the document size */ 12 | pre_term_counts = foreach (group doc_word_totals by $id_field) generate 13 | group AS $id_field, 14 | FLATTEN(doc_word_totals.(token, doc_total)) as (token, doc_total), 15 | SUM(doc_word_totals.doc_total) as doc_size, 16 | MAX(doc_word_totals.doc_total) as max_freq; 17 | 18 | /* Calculate the TF */ 19 | term_freqs = foreach pre_term_counts generate 20 | $id_field as $id_field, 21 | token as token, 22 | ((double)doc_total / (double)doc_size / (double) max_freq) AS term_freq; 23 | 24 | /* Get count of documents using each token, for idf */ 25 | token_usages = foreach (group term_freqs by token) generate 26 | FLATTEN(term_freqs) as ($id_field:chararray, token:chararray, term_freq:double), 27 | COUNT_STAR(term_freqs) as num_docs_with_token; 28 | 29 | /* Get document count */ 30 | just_ids = foreach $token_records generate $id_field; 31 | just_ids = DISTINCT just_ids; 32 | ndocs = foreach (group just_ids all) generate COUNT_STAR(just_ids) as total_docs; 33 | 34 | /* Note the use of Pig Scalars to calculate idf */ 35 | scores = foreach token_usages { 36 | idf = LOG((double)ndocs.total_docs/(double)num_docs_with_token); 37 | ntf_idf = (double)term_freq * idf; 38 | generate $id_field as $id_field, 39 | token as token, 40 | (double)ntf_idf as score:double; 41 | }; 42 | 43 | $out_relation = filter scores by token IS NOT NULL and token != '' and LENGTH(token) > 2; -- score > 0.10 and 44 | }; 45 | -------------------------------------------------------------------------------- /ch07/pig/process_mcl.pig: -------------------------------------------------------------------------------- 1 | sent_counts = load '/tmp/sent_counts.tsv' as (from:chararray, to:chararray, weight:int); 2 | -------------------------------------------------------------------------------- /ch07/pig/publish_topics_per_email.pig: -------------------------------------------------------------------------------- 1 | /* Set Home Directory - where we install software */ 2 | %default HOME `echo \$HOME/Software/` 3 | 4 | /* MongoDB libraries and configuration */ 5 | REGISTER $HOME/mongo-hadoop/mongo-2.10.1.jar 6 | REGISTER $HOME/mongo-hadoop/core/target/mongo-hadoop-core-1.1.0-SNAPSHOT.jar 7 | REGISTER $HOME/mongo-hadoop/pig/target/mongo-hadoop-pig-1.1.0-SNAPSHOT.jar 8 | 9 | DEFINE MongoStorage com.mongodb.hadoop.pig.MongoStorage(); 10 | 11 | per_document_scores = LOAD '/tmp/topics_per_document.txt' AS (message_id:chararray, topics:bag{topic:tuple(word:chararray, score:double)}); 12 | store per_document_scores into 'mongodb://localhost/agile_data.topics_per_email' using MongoStorage(); 13 | -------------------------------------------------------------------------------- /ch07/pig/test_tokenizers.pig: -------------------------------------------------------------------------------- 1 | /* Set Home Directory - where we install software */ 2 | %default HOME `echo \$HOME/Software/` 3 | 4 | /* Avro uses json-simple, and is in piggybank until Pig 0.12, where AvroStorage and TrevniStorage are builtins */ 5 | REGISTER $HOME/pig/build/ivy/lib/Pig/avro-1.5.3.jar 6 | REGISTER $HOME/pig/build/ivy/lib/Pig/json-simple-1.1.jar 7 | REGISTER $HOME/pig/contrib/piggybank/java/piggybank.jar 8 | 9 | DEFINE AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); 10 | 11 | REGISTER $HOME/varaha/lib/*.jar /* */ 12 | REGISTER $HOME/varaha/target/varaha-1.0-SNAPSHOT.jar 13 | 14 | DEFINE TokenizeText varaha.text.TokenizeText(); 15 | DEFINE StanfordTokenize varaha.text.StanfordTokenize(); 16 | 17 | rmf /tmp/test_lucene.txt 18 | rmf /tmp/test_stanford.txt 19 | 20 | set default_parallel 5 21 | set mapred.map.tasks.speculative.execution false 22 | set mapred.reduce.tasks.speculative.execution false 23 | 24 | emails = load '/me/Data/test_mbox' using AvroStorage(); 25 | emails = limit emails 10; 26 | id_body = foreach emails generate message_id, body; 27 | 28 | token_records = foreach id_body generate message_id, FLATTEN(TokenizeText(body)) as tokens; 29 | token_records_2 = foreach id_body generate message_id, FLATTEN(StanfordTokenize(body)) as tokens; 30 | store token_records into '/tmp/test_lucene.txt'; 31 | store token_records_2 into '/tmp/test_stanford.txt'; -------------------------------------------------------------------------------- /ch07/pig/topics.pig: -------------------------------------------------------------------------------- 1 | /* Set Home Directory - where we install software */ 2 | %default HOME `echo \$HOME/Software/` 3 | 4 | /* Avro uses json-simple, and is in piggybank until Pig 0.12, where AvroStorage and TrevniStorage are builtins */ 5 | REGISTER $HOME/pig/build/ivy/lib/Pig/avro-1.7.4.jar 6 | REGISTER $HOME/pig/build/ivy/lib/Pig/json-simple-1.1.jar 7 | REGISTER $HOME/pig/contrib/piggybank/java/piggybank.jar 8 | 9 | DEFINE AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); 10 | DEFINE LENGTH org.apache.pig.piggybank.evaluation.string.LENGTH(); 11 | 12 | REGISTER $HOME/varaha/lib/*.jar /* Varaha has a good tokenizer */ 13 | REGISTER $HOME/varaha/target/varaha-1.0-SNAPSHOT.jar 14 | 15 | DEFINE TokenizeText varaha.text.TokenizeText('1', '1'); 16 | 17 | set default_parallel 20 18 | 19 | rmf /tmp/tf_idf_scores.txt 20 | rmf /tmp/ntf_idf_scores.txt 21 | rmf /tmp/trimmed_tokens.txt 22 | 23 | register 'udfs.py' using jython as funcs; 24 | import 'ntfidf.macro'; 25 | 26 | /* Load emails and trim unneeded fields */ 27 | emails = load '/me/Data/test_mbox' using AvroStorage(); 28 | -- emails = FILTER emails BY body IS NOT NULL; 29 | id_body_address = foreach emails generate message_id, body, from.address as address; 30 | 31 | /* Project and latten to message_id/address/token and basic filter */ 32 | token_records_address = foreach id_body_address generate message_id, address, FLATTEN(TokenizeText(body)) as token; 33 | trimmed_tokens = filter token_records_address by token is not null and token != '' and LENGTH(token) > 2; 34 | store trimmed_tokens into '/tmp/trimmed_tokens.txt'; 35 | 36 | /* Run topics per message */ 37 | ntf_idf_scores_per_message = ntf_idf(trimmed_tokens, 'message_id', 'token'); 38 | store ntf_idf_scores_per_message into '/tmp/ntf_idf_scores_per_message.txt'; 39 | 40 | /* Run topics per email address */ 41 | ntf_idf_scores_per_address = ntf_idf(trimmed_tokens, 'address', 'token'); 42 | store ntf_idf_scores_per_address into '/tmp/ntf_idf_scores_per_address.txt'; 43 | -------------------------------------------------------------------------------- /ch07/pig/udfs.py: -------------------------------------------------------------------------------- 1 | @outputSchema("sent_dist:bag{t:(sent_hour:chararray, total:int)}") 2 | def fill_in_blanks(sent_dist): 3 | print sent_dist 4 | out_data = list() 5 | hours = [ '%02d' % i for i in range(24) ] 6 | for hour in hours: 7 | entry = [x for x in sent_dist if x[0] == hour] 8 | if entry: 9 | entry = entry[0] 10 | print entry.__class__ 11 | out_data.append(tuple([entry[0], entry[1]])) 12 | else: 13 | out_data.append(tuple([hour, 0])) 14 | return out_data 15 | 16 | @outputSchema("token:chararray") 17 | def lower(token): 18 | return token.lower() 19 | 20 | import re, sys 21 | 22 | @outputSchema("token:chararray") 23 | def remove_punctuation(token): 24 | #word = re.sub(r'([^\w\s]|_)+(?=\s|$)', '', token) 25 | #punctuation = re.compile(r'[-.@&$#`\'?!,> 2: 29 | words.append(token) 30 | 31 | def main(): 32 | te = TokenExtractor() 33 | for line in sys.stdin: 34 | message_id, token = line.split('\t') 35 | lowers = te.lower(token) 36 | no_punc = te.remove_punctuation(lowers) 37 | no_shorts = te.short_filter(no_punc) 38 | print message_id + "\t" + no_shorts 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /ch07/web/config.py: -------------------------------------------------------------------------------- 1 | EMAILS_PER_LIST_PAGE=15 2 | EMAILS_PER_ADDRESS_PAGE=6 3 | ELASTIC_URL='http://localhost:9200/inbox' 4 | -------------------------------------------------------------------------------- /ch07/web/static/bootstrap/img/glyphicons-halflings-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch07/web/static/bootstrap/img/glyphicons-halflings-white.png -------------------------------------------------------------------------------- /ch07/web/static/bootstrap/img/glyphicons-halflings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch07/web/static/bootstrap/img/glyphicons-halflings.png -------------------------------------------------------------------------------- /ch07/web/static/nvd3/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Jekyll Files # 3 | ################ 4 | _site 5 | 6 | 7 | # Random Files # 8 | ################ 9 | *.swp 10 | *~ 11 | *.log 12 | 13 | 14 | # Private Test Data # 15 | ##################### 16 | *REALDATA* 17 | 18 | 19 | # OS generated files # 20 | ###################### 21 | .DS_Store* 22 | ehthumbs.db 23 | Icon? 24 | Thumbs.db 25 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | ##nvd3.js License 3 | 4 | Copyright (c) 2011, 2012 [Novus Partners, Inc.][novus] 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | 18 | [novus]: https://www.novus.com/ 19 | 20 | 21 | 22 | ##d3.js License 23 | 24 | Copyright (c) 2012, Michael Bostock 25 | All rights reserved. 26 | 27 | Redistribution and use in source and binary forms, with or without 28 | modification, are permitted provided that the following conditions are met: 29 | 30 | * Redistributions of source code must retain the above copyright notice, this 31 | list of conditions and the following disclaimer. 32 | 33 | * Redistributions in binary form must reproduce the above copyright notice, 34 | this list of conditions and the following disclaimer in the documentation 35 | and/or other materials provided with the distribution. 36 | 37 | * The name Michael Bostock may not be used to endorse or promote products 38 | derived from this software without specific prior written permission. 39 | 40 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 41 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 43 | DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT, 44 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 45 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 46 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 47 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 48 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 49 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 50 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/Makefile: -------------------------------------------------------------------------------- 1 | JS_FILES = \ 2 | src/intro.js \ 3 | src/core.js \ 4 | src/tooltip.js \ 5 | src/utils.js \ 6 | src/models/axis.js \ 7 | src/models/historicalBar.js \ 8 | src/models/bullet.js \ 9 | src/models/bulletChart.js \ 10 | src/models/cumulativeLineChart.js \ 11 | src/models/discreteBar.js \ 12 | src/models/discreteBarChart.js \ 13 | src/models/distribution.js \ 14 | src/models/indentedTree.js \ 15 | src/models/legend.js \ 16 | src/models/line.js \ 17 | src/models/lineChart.js \ 18 | src/models/linePlusBarChart.js \ 19 | src/models/lineWithFocusChart.js \ 20 | src/models/multiBar.js \ 21 | src/models/multiBarChart.js \ 22 | src/models/multiBarHorizontal.js \ 23 | src/models/multiBarHorizontalChart.js \ 24 | src/models/multiChart.js \ 25 | src/models/ohlcBar.js \ 26 | src/models/pie.js \ 27 | src/models/pieChart.js \ 28 | src/models/scatter.js \ 29 | src/models/scatterChart.js \ 30 | src/models/scatterPlusLineChart.js \ 31 | src/models/sparkline.js \ 32 | src/models/sparklinePlus.js \ 33 | src/models/stackedArea.js \ 34 | src/models/stackedAreaChart.js \ 35 | src/outro.js 36 | 37 | JS_COMPILER = \ 38 | uglifyjs 39 | 40 | all: nv.d3.js nv.d3.min.js 41 | nv.d3.js: $(JS_FILES) 42 | nv.d3.min.js: $(JS_FILES) 43 | 44 | nv.d3.js: Makefile 45 | rm -f $@ 46 | cat $(filter %.js,$^) >> $@ 47 | 48 | %.min.js:: Makefile 49 | rm -f $@ 50 | cat $(filter %.js,$^) | $(JS_COMPILER) >> $@ 51 | 52 | clean: 53 | rm -rf nv.d3.js nv.d3.min.js 54 | 55 | 56 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/README.md: -------------------------------------------------------------------------------- 1 | Please see Novus' official statement on nvd3 with an explanation, 2 | apology, and commitment to its permanent status as an open-source 3 | project. 4 | [http://nvd3.org/statement.html](http://nvd3.org/statement.html) 5 | 6 | # nvd3 - v0.0.1 7 | 8 | A reusable chart library for d3.JS. 9 | 10 | Currently in an early stage of development, but will be a very active project. It may change quite a bit from its current state, but will always try to follow the style in which d3.js was done. 11 | 12 | You can also check out the [examples page](http://nvd3.org/ghpages/examples.html) 13 | 14 | --- 15 | 16 | If one of [the existing models](https://github.com/novus/nvd3/tree/master/src/models) doesn't meet your needs, fork the project, implement the model and an example using it, send us a pull request, for consideration for inclusion in the project. 17 | 18 | --- 19 | 20 | Minifying your fork: 21 | 22 | The Makefile requires [UglifyJS](https://github.com/mishoo/UglifyJS). 23 | 24 | The easist way to install is to install via npm. Run `npm install 25 | uglify-js` from your home directory, then add the output from `npm bin` 26 | into your path so that you have access to `uglifyjs` from the command 27 | line (remember to restart your terminal window when adding to the path.) 28 | 29 | Once you have `uglifyjs` command available, running `make` from your 30 | fork's root directory will rebuild both `nv.d3.js` and `nv.d3.min.js`. 31 | 32 | Without UglifyJS, you won't get the minified version when running make. 33 | 34 | **We ask that you DO NOT minify pull requests... 35 | If you need to minify please build pull request in separate branch, and 36 | merge and minify in yout master. 37 | 38 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/build.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | copy src\intro.js /B + src\core.js /B + src\tooltip.js /B temp1.js /B 3 | copy src\models\*.js /B temp2.js /B 4 | copy temp1.js /B + temp2.js /B + src\outro.js /B nv.d3.js /B 5 | del temp1.js 6 | del temp2.js 7 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | COMPRESSOR=`which yui-compressor` 3 | cat src/intro.js src/core.js src/tooltip.js src/utils.js src/models/*.js src/outro.js > nv.d3.js 4 | if [ -e $COMPRESSOR ]; then 5 | $COMPRESSOR --type js -o nv.d3.min.js nv.d3.js 6 | fi 7 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/deprecated/lineChart-old.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 29 | 30 | 31 |
32 |
33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 84 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/examples/images/grey-minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch07/web/static/nvd3/examples/images/grey-minus.png -------------------------------------------------------------------------------- /ch07/web/static/nvd3/examples/images/grey-plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch07/web/static/nvd3/examples/images/grey-plus.png -------------------------------------------------------------------------------- /ch07/web/static/nvd3/examples/legend.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 76 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/examples/line.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 96 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/examples/lineWithFocusChart.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 33 | 34 | 35 |
36 | 37 |
38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 88 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/examples/multiBar.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 18 | 19 | 20 |
21 | 22 |
23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 93 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/examples/multiBarChart.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 29 | 30 | 31 |
32 | 33 |
34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 81 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/examples/pie.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 94 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/examples/sparkline.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 21 | 22 | 23 |

Sparkline:

24 | 25 | 26 | 27 | 28 | 29 | 63 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/examples/sparklinePlus.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 18 | 19 | 20 |

SparklinePlus:

21 |

22 | 23 | 24 | 25 | 26 | 27 | 28 | 68 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/examples/stream_layers.js: -------------------------------------------------------------------------------- 1 | 2 | /* Inspired by Lee Byron's test data generator. */ 3 | function stream_layers(n, m, o) { 4 | if (arguments.length < 3) o = 0; 5 | function bump(a) { 6 | var x = 1 / (.1 + Math.random()), 7 | y = 2 * Math.random() - .5, 8 | z = 10 / (.1 + Math.random()); 9 | for (var i = 0; i < m; i++) { 10 | var w = (i / m - y) * z; 11 | a[i] += x * Math.exp(-w * w); 12 | } 13 | } 14 | return d3.range(n).map(function() { 15 | var a = [], i; 16 | for (i = 0; i < m; i++) a[i] = o + o * Math.random(); 17 | for (i = 0; i < 5; i++) bump(a); 18 | return a.map(stream_index); 19 | }); 20 | } 21 | 22 | /* Another layer generator using gamma distributions. */ 23 | function stream_waves(n, m) { 24 | return d3.range(n).map(function(i) { 25 | return d3.range(m).map(function(j) { 26 | var x = 20 * j / m - i / 3; 27 | return 2 * x * Math.exp(-.5 * x); 28 | }).map(stream_index); 29 | }); 30 | } 31 | 32 | function stream_index(d, i) { 33 | return {x: i, y: Math.max(0, d)}; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/src/intro.js: -------------------------------------------------------------------------------- 1 | (function(){ 2 | -------------------------------------------------------------------------------- /ch07/web/static/nvd3/src/outro.js: -------------------------------------------------------------------------------- 1 | })(); -------------------------------------------------------------------------------- /ch07/web/templates/partials/emails.html: -------------------------------------------------------------------------------- 1 | 2 | {% extends "layout.html" %} 3 | 4 | 5 | {% import "macros.jnj" as common %} 6 | 7 | 8 | 9 | {% block content -%} 10 |

Emails 11 |

14 |
15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | {% for email in emails %} 23 | 24 | 25 | 26 | 27 | 28 | {% endfor %} 29 | 30 |
FromSubjectDate
{{ common.display_email_address(email['from'])|safe }}{{ common.display_link(email['message_id'], '/email', email['subject'])|safe }}{{ email['date'] }}
31 | {% if nav_offsets and nav_path -%} 32 | {{ common.display_nav(nav_offsets, nav_path, query)|safe }} 33 | {% endif -%} 34 |
35 | {% endblock -%} 36 | -------------------------------------------------------------------------------- /ch08/README.md: -------------------------------------------------------------------------------- 1 | Agile Data the Book 2 | =================== 3 | 4 | You can buy the book [here](http://shop.oreilly.com/product/0636920025054.do). You can read the book on [O'Reilly OFPS](http://ofps.oreilly.com/titles/9781449326265/) now. Work the chapter code examples as you go. Don't forget to initialize your python environment. Try linux (apt-get, yum) or OS X (brew, port) packages if any of the requirements don't install in your [virtualenv](http://www.virtualenv.org/en/latest/). 5 | 6 | Agile Data - Chapter 8: Making Predictions 7 | =============================================================== 8 | 9 | ## Setup Python Virtual Environment ## 10 | 11 | ``` 12 | # From project root 13 | 14 | # Setup python virtualenv 15 | virtualenv -p `which python2.7` venv --distribute 16 | source venv/bin/activate 17 | pip install -r requirements.txt 18 | ``` 19 | 20 | ## Run Analytic Inbox Application ## 21 | 22 | Most of this chapter will involve running our Python/Flask web application. 23 | 24 | ``` 25 | python web/index.py 26 | ``` 27 | 28 | ## Smooth Email Sent Time Distributions ## 29 | 30 | See previous - start the web app, the fix is applied to 'web/index.py'. 31 | 32 | ## Calculate Reply Probability ## 33 | 34 | To calculate, run: 35 | 36 | ``` 37 | pig -l /tmp -x local -v -w p_reply.pig 38 | ``` 39 | 40 | This will create a mongodb store: 'mongodb://localhost/agile_data.related_addresses' 41 | 42 | ## Check MongoDB for P(reply|from) and P(reply|to) ## 43 | 44 | Run 'mongo.js', or in the mongo terminal: 45 | 46 | ``` 47 | mongo agile_data 48 | db.reply_ratios.ensureIndex({from: 1, to: 1}); 49 | db.reply_ratios.findOne(); 50 | ``` 51 | 52 | -------------------------------------------------------------------------------- /ch08/web/config.py: -------------------------------------------------------------------------------- 1 | EMAILS_PER_LIST_PAGE=15 2 | EMAILS_PER_ADDRESS_PAGE=6 3 | ELASTIC_URL='http://localhost:9200/inbox' 4 | MY_EMAIL='russell.jurney@gmail.com' 5 | -------------------------------------------------------------------------------- /ch08/web/smoother.py: -------------------------------------------------------------------------------- 1 | # Based on http://www.scipy.org/Cookbook/SignalSmooth 2 | 3 | import numpy as np 4 | 5 | class Smoother(): 6 | 7 | """Takes an array of objects as input, and the data key of the object for access.""" 8 | def __init__(self, raw_data, data_key): 9 | self.raw_data = raw_data 10 | print self.raw_data 11 | self.data = self.to_array(raw_data, data_key) 12 | 13 | """Given an array of objects with values, return a numpy array of values.""" 14 | def to_array(self, in_data, data_key): 15 | data_array = list() 16 | for datum in in_data: 17 | data_array.append(datum[data_key]) 18 | return np.array(data_array) 19 | 20 | """Smoothing method from SciPy SignalSmooth Cookbook: http://www.scipy.org/Cookbook/SignalSmooth""" 21 | def smooth(self, window_len=5, window='hamming'): 22 | x = self.data 23 | s=np.r_[2*x[0]-x[window_len:1:-1], x, 2*x[-1]-x[-1:-window_len:-1]] 24 | w = getattr(np, window)(window_len) 25 | y = np.convolve(w/w.sum(), s, mode='same') 26 | self.smoothed = y[window_len-1:-window_len+1] 27 | 28 | def to_objects(self): 29 | objects = list() 30 | hours = [ '%02d' % i for i in range(24) ] 31 | for idx, val in enumerate(hours): 32 | objects.append({"sent_hour": val, "total": round(self.smoothed[idx], 0)}) 33 | return objects 34 | -------------------------------------------------------------------------------- /ch08/web/static/bootstrap/img/glyphicons-halflings-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch08/web/static/bootstrap/img/glyphicons-halflings-white.png -------------------------------------------------------------------------------- /ch08/web/static/bootstrap/img/glyphicons-halflings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch08/web/static/bootstrap/img/glyphicons-halflings.png -------------------------------------------------------------------------------- /ch08/web/static/nvd3/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Jekyll Files # 3 | ################ 4 | _site 5 | 6 | 7 | # Random Files # 8 | ################ 9 | *.swp 10 | *~ 11 | *.log 12 | 13 | 14 | # Private Test Data # 15 | ##################### 16 | *REALDATA* 17 | 18 | 19 | # OS generated files # 20 | ###################### 21 | .DS_Store* 22 | ehthumbs.db 23 | Icon? 24 | Thumbs.db 25 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/Makefile: -------------------------------------------------------------------------------- 1 | JS_FILES = \ 2 | src/intro.js \ 3 | src/core.js \ 4 | src/tooltip.js \ 5 | src/utils.js \ 6 | src/models/axis.js \ 7 | src/models/historicalBar.js \ 8 | src/models/bullet.js \ 9 | src/models/bulletChart.js \ 10 | src/models/cumulativeLineChart.js \ 11 | src/models/discreteBar.js \ 12 | src/models/discreteBarChart.js \ 13 | src/models/distribution.js \ 14 | src/models/indentedTree.js \ 15 | src/models/legend.js \ 16 | src/models/line.js \ 17 | src/models/lineChart.js \ 18 | src/models/linePlusBarChart.js \ 19 | src/models/lineWithFocusChart.js \ 20 | src/models/multiBar.js \ 21 | src/models/multiBarChart.js \ 22 | src/models/multiBarHorizontal.js \ 23 | src/models/multiBarHorizontalChart.js \ 24 | src/models/multiChart.js \ 25 | src/models/ohlcBar.js \ 26 | src/models/pie.js \ 27 | src/models/pieChart.js \ 28 | src/models/scatter.js \ 29 | src/models/scatterChart.js \ 30 | src/models/scatterPlusLineChart.js \ 31 | src/models/sparkline.js \ 32 | src/models/sparklinePlus.js \ 33 | src/models/stackedArea.js \ 34 | src/models/stackedAreaChart.js \ 35 | src/outro.js 36 | 37 | JS_COMPILER = \ 38 | uglifyjs 39 | 40 | all: nv.d3.js nv.d3.min.js 41 | nv.d3.js: $(JS_FILES) 42 | nv.d3.min.js: $(JS_FILES) 43 | 44 | nv.d3.js: Makefile 45 | rm -f $@ 46 | cat $(filter %.js,$^) >> $@ 47 | 48 | %.min.js:: Makefile 49 | rm -f $@ 50 | cat $(filter %.js,$^) | $(JS_COMPILER) >> $@ 51 | 52 | clean: 53 | rm -rf nv.d3.js nv.d3.min.js 54 | 55 | 56 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/README.md: -------------------------------------------------------------------------------- 1 | Please see Novus' official statement on nvd3 with an explanation, 2 | apology, and commitment to its permanent status as an open-source 3 | project. 4 | [http://nvd3.org/statement.html](http://nvd3.org/statement.html) 5 | 6 | # nvd3 - v0.0.1 7 | 8 | A reusable chart library for d3.JS. 9 | 10 | Currently in an early stage of development, but will be a very active project. It may change quite a bit from its current state, but will always try to follow the style in which d3.js was done. 11 | 12 | You can also check out the [examples page](http://nvd3.org/ghpages/examples.html) 13 | 14 | --- 15 | 16 | If one of [the existing models](https://github.com/novus/nvd3/tree/master/src/models) doesn't meet your needs, fork the project, implement the model and an example using it, send us a pull request, for consideration for inclusion in the project. 17 | 18 | --- 19 | 20 | Minifying your fork: 21 | 22 | The Makefile requires [UglifyJS](https://github.com/mishoo/UglifyJS). 23 | 24 | The easist way to install is to install via npm. Run `npm install 25 | uglify-js` from your home directory, then add the output from `npm bin` 26 | into your path so that you have access to `uglifyjs` from the command 27 | line (remember to restart your terminal window when adding to the path.) 28 | 29 | Once you have `uglifyjs` command available, running `make` from your 30 | fork's root directory will rebuild both `nv.d3.js` and `nv.d3.min.js`. 31 | 32 | Without UglifyJS, you won't get the minified version when running make. 33 | 34 | **We ask that you DO NOT minify pull requests... 35 | If you need to minify please build pull request in separate branch, and 36 | merge and minify in yout master. 37 | 38 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/build.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | copy src\intro.js /B + src\core.js /B + src\tooltip.js /B temp1.js /B 3 | copy src\models\*.js /B temp2.js /B 4 | copy temp1.js /B + temp2.js /B + src\outro.js /B nv.d3.js /B 5 | del temp1.js 6 | del temp2.js 7 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | COMPRESSOR=`which yui-compressor` 3 | cat src/intro.js src/core.js src/tooltip.js src/utils.js src/models/*.js src/outro.js > nv.d3.js 4 | if [ -e $COMPRESSOR ]; then 5 | $COMPRESSOR --type js -o nv.d3.min.js nv.d3.js 6 | fi 7 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/deprecated/lineChart-old.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 29 | 30 | 31 |
32 |
33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 84 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/examples/images/grey-minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch08/web/static/nvd3/examples/images/grey-minus.png -------------------------------------------------------------------------------- /ch08/web/static/nvd3/examples/images/grey-plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch08/web/static/nvd3/examples/images/grey-plus.png -------------------------------------------------------------------------------- /ch08/web/static/nvd3/examples/legend.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 76 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/examples/line.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 96 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/examples/lineWithFocusChart.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 33 | 34 | 35 |
36 | 37 |
38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 88 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/examples/multiBar.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 18 | 19 | 20 |
21 | 22 |
23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 93 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/examples/multiBarChart.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 29 | 30 | 31 |
32 | 33 |
34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 81 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/examples/pie.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 94 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/examples/sparkline.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 21 | 22 | 23 |

Sparkline:

24 | 25 | 26 | 27 | 28 | 29 | 63 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/examples/sparklinePlus.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 18 | 19 | 20 |

SparklinePlus:

21 |

22 | 23 | 24 | 25 | 26 | 27 | 28 | 68 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/examples/stream_layers.js: -------------------------------------------------------------------------------- 1 | 2 | /* Inspired by Lee Byron's test data generator. */ 3 | function stream_layers(n, m, o) { 4 | if (arguments.length < 3) o = 0; 5 | function bump(a) { 6 | var x = 1 / (.1 + Math.random()), 7 | y = 2 * Math.random() - .5, 8 | z = 10 / (.1 + Math.random()); 9 | for (var i = 0; i < m; i++) { 10 | var w = (i / m - y) * z; 11 | a[i] += x * Math.exp(-w * w); 12 | } 13 | } 14 | return d3.range(n).map(function() { 15 | var a = [], i; 16 | for (i = 0; i < m; i++) a[i] = o + o * Math.random(); 17 | for (i = 0; i < 5; i++) bump(a); 18 | return a.map(stream_index); 19 | }); 20 | } 21 | 22 | /* Another layer generator using gamma distributions. */ 23 | function stream_waves(n, m) { 24 | return d3.range(n).map(function(i) { 25 | return d3.range(m).map(function(j) { 26 | var x = 20 * j / m - i / 3; 27 | return 2 * x * Math.exp(-.5 * x); 28 | }).map(stream_index); 29 | }); 30 | } 31 | 32 | function stream_index(d, i) { 33 | return {x: i, y: Math.max(0, d)}; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/src/intro.js: -------------------------------------------------------------------------------- 1 | (function(){ 2 | -------------------------------------------------------------------------------- /ch08/web/static/nvd3/src/outro.js: -------------------------------------------------------------------------------- 1 | })(); -------------------------------------------------------------------------------- /ch08/web/templates/partials/emails.html: -------------------------------------------------------------------------------- 1 | 2 | {% extends "layout.html" %} 3 | 4 | 5 | {% import "macros.jnj" as common %} 6 | 7 | 8 | 9 | {% block content -%} 10 |

Emails 11 |

14 |
15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | {% for email in emails %} 23 | 24 | 25 | 26 | 27 | 28 | {% endfor %} 29 | 30 |
FromSubjectDate
{{ common.display_email_address(email['from'])|safe }}{{ common.display_link(email['message_id'], '/email', email['subject'])|safe }}{{ email['date'] }}
31 | {% if nav_offsets and nav_path -%} 32 | {{ common.display_nav(nav_offsets, nav_path, query)|safe }} 33 | {% endif -%} 34 |
35 | {% endblock -%} 36 | -------------------------------------------------------------------------------- /ch09/mongo.js: -------------------------------------------------------------------------------- 1 | // Drop all relations, to recreate 2 | db.overall_reply_ratio.drop(); 3 | db.from_to_reply_ratios.drop(); 4 | db.p_sent_from_to.drop(); 5 | db.hourly_from_reply_probs.drop(); 6 | db.p_sent_hour.drop(); 7 | db.token_reply_rates.drop(); 8 | 9 | db.p_token.ensureIndex({'token': 1}) 10 | db.token_reply_rates.ensureIndex({'token': 1}) 11 | db.token_no_reply_rates.ensureIndex({'token': 1}) 12 | db.from_to_reply_ratios.ensureIndex({from: 1, to: 1}) 13 | db.from_to_no_reply_ratios.ensureIndex({from: 1, to: 1}) 14 | 15 | // { 16 | // "_id" : ObjectId("511700c330048b60597e7c04"), 17 | // "token" : "public", 18 | // "reply_rate" : 0.6969366812896153 19 | // } -------------------------------------------------------------------------------- /ch09/pig/hamming.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Based on http://www.scipy.org/Cookbook/SignalSmooth 3 | 4 | import numpy as np 5 | import sys, os 6 | 7 | def smooth(data, window_len=5, window='hamming'): 8 | x = data 9 | s=np.r_[2*x[0]-x[window_len:1:-1], x, 2*x[-1]-x[-1:-window_len:-1]] 10 | w = getattr(np, window)(window_len) 11 | y = np.convolve(w/w.sum(), s, mode='same') 12 | return y[window_len-1:-window_len+1] 13 | 14 | def main(): 15 | for line in sys.stdin: 16 | email, hour_dist = line.split('\t') 17 | vals = hour_dist[2:-3].rsplit('),(') 18 | data = [] 19 | for val in vals: 20 | hour, p_reply = val.rsplit(',') 21 | data.append(float(p_reply)) 22 | smoothed = smooth(np.array(data)).flatten() 23 | for i in range(0,len(smoothed)): 24 | hour = vals[i].rsplit(',')[0] 25 | print email + "\t" + hour + "\t" + str(smoothed[i]) 26 | 27 | if __name__ == "__main__": 28 | main() 29 | -------------------------------------------------------------------------------- /ch09/pig/publish_topics.pig: -------------------------------------------------------------------------------- 1 | /* Set Home Directory - where we install software */ 2 | %default HOME `echo \$HOME/Software/` 3 | 4 | /* MongoDB libraries and configuration */ 5 | REGISTER $HOME/mongo-hadoop/mongo-2.10.1.jar 6 | REGISTER $HOME/mongo-hadoop/core/target/mongo-hadoop-core-1.1.0-SNAPSHOT.jar 7 | REGISTER $HOME/mongo-hadoop/pig/target/mongo-hadoop-pig-1.1.0-SNAPSHOT.jar 8 | 9 | DEFINE MongoStorage com.mongodb.hadoop.pig.MongoStorage(); 10 | 11 | token_reply_rates = LOAD '/tmp/reply_rates.txt' AS (token:chararray, reply_rate:double); 12 | store token_reply_rates into 'mongodb://localhost/agile_data.token_reply_rates' using MongoStorage(); 13 | 14 | token_no_reply_rates = LOAD '/tmp/no_reply_rates.txt' AS (token:chararray, reply_rate:double); 15 | store token_no_reply_rates into 'mongodb://localhost/agile_data.token_no_reply_rates' using MongoStorage(); 16 | 17 | p_token = LOAD '/tmp/p_token.txt' AS (token:chararray, prob:double); 18 | store p_token into 'mongodb://localhost/agile_data.p_token' using MongoStorage(); 19 | -------------------------------------------------------------------------------- /ch09/pig/smooth_times.pig: -------------------------------------------------------------------------------- 1 | /* Set Home Directory - where we install software */ 2 | %default HOME `echo \$HOME/Software/` 3 | 4 | /* Avro uses json-simple, and is in piggybank until Pig 0.12, where AvroStorage and TrevniStorage are builtins */ 5 | REGISTER $HOME/pig/build/ivy/lib/Pig/avro-1.5.3.jar 6 | REGISTER $HOME/pig/build/ivy/lib/Pig/json-simple-1.1.jar 7 | REGISTER $HOME/pig/contrib/piggybank/java/piggybank.jar 8 | 9 | DEFINE AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); 10 | 11 | /* MongoDB libraries and configuration */ 12 | REGISTER $HOME/mongo-hadoop/mongo-2.10.1.jar 13 | REGISTER $HOME/mongo-hadoop/core/target/mongo-hadoop-core-1.1.0-SNAPSHOT.jar 14 | REGISTER $HOME/mongo-hadoop/pig/target/mongo-hadoop-pig-1.1.0-SNAPSHOT.jar 15 | 16 | DEFINE MongoStorage com.mongodb.hadoop.pig.MongoStorage(); 17 | 18 | set default_parallel 10 19 | set mapred.map.tasks.speculative.execution false 20 | set mapred.reduce.tasks.speculative.execution false 21 | 22 | rmf /tmp/smoothed_sent_dists.avro 23 | rmf /tmp/smoothed_sent_dists.txt 24 | 25 | time_dists_per_email = LOAD '/tmp/date_filled_dist.avro' using AvroStorage(); -- as (address:chararray, sent_distribution:bag{t:tuple(hour:chararray, p_reply:double)}); 26 | 27 | DEFINE smooth_stream `hamming.py` SHIP ('hamming.py'); 28 | smoothed_time_dists_per_email = STREAM time_dists_per_email THROUGH smooth_stream as (address:chararray, hour:chararray, p_reply:double); 29 | 30 | answer = foreach (group smoothed_time_dists_per_email by address) { 31 | sorted = order smoothed_time_dists_per_email by hour; 32 | generate group as address, sorted.(hour, p_reply) as sent_distribution; 33 | }; 34 | store answer into '/tmp/smoothed_sent_dists.avro' using AvroStorage(); 35 | store answer into '/tmp/smoothed_sent_dists.txt'; 36 | store answer into 'mongodb://localhost/agile_data.hourly_from_reply_probs' using MongoStorage(); 37 | 38 | /*p_sent_hour = load '/tmp/p_sent_hour.txt' as (from:chararray, distribution:bag{t:tuple(sent_hour:chararray, ratio:double)}); 39 | store p_sent_hour into 'mongodb://localhost/agile_data.p_sent_hour' using MongoStorage(); 40 | 41 | */ -------------------------------------------------------------------------------- /ch09/pig/test_results.pig: -------------------------------------------------------------------------------- 1 | /* Set Home Directory - where we install software */ 2 | %default HOME `echo \$HOME/Software/` 3 | 4 | /* Avro uses json-simple, and is in piggybank until Pig 0.12, where AvroStorage and TrevniStorage are builtins */ 5 | REGISTER $HOME/pig/build/ivy/lib/Pig/avro-1.5.3.jar 6 | REGISTER $HOME/pig/build/ivy/lib/Pig/json-simple-1.1.jar 7 | REGISTER $HOME/pig/contrib/piggybank/java/piggybank.jar 8 | 9 | DEFINE AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); 10 | DEFINE ABS org.apache.pig.piggybank.evaluation.math.ABS(); 11 | 12 | rmf /tmp/final_answer.txt 13 | 14 | results = load '../results.txt' as (message_id:chararray, p_tokens_weight:double, p_from_to_reply_weight:double, p_reply:double); 15 | 16 | emails = load '/me/Data/test_mbox' using AvroStorage(); 17 | emails = foreach emails generate message_id, in_reply_to; 18 | 19 | with_results = join results by message_id left outer, emails by in_reply_to; 20 | 21 | test_results = foreach with_results generate (double)((emails::message_id is not null) ? 1 : 0) as result:double, *; 22 | errors = foreach test_results generate p_tokens_weight as p_tokens_weight, 23 | p_from_to_reply_weight as p_from_to_reply_weight, 24 | (double)ABS(result - p_reply) as error:double; 25 | answer = foreach (group errors by (p_tokens_weight, p_from_to_reply_weight)) generate flatten(group) as (p_tokens_weight, p_from_to_reply_weight), 26 | SUM(errors.error)/COUNT(errors.error) as avg_error; 27 | final_answer = order answer by avg_error desc; 28 | store final_answer into '/tmp/final_answer.txt'; 29 | -------------------------------------------------------------------------------- /ch09/tune_weights.py: -------------------------------------------------------------------------------- 1 | import pymongo 2 | from datetime import datetime 3 | from avro import schema, datafile, io 4 | import pprint 5 | import sys 6 | import json 7 | from nltk.tokenize import word_tokenize 8 | 9 | import dateutil.parser 10 | 11 | pp = pprint.PrettyPrinter() 12 | 13 | conn = pymongo.Connection() # defaults to localhost 14 | db = conn.agile_data 15 | from_to_reply_ratios = db['from_to_reply_ratios'] 16 | hourly_from_reply_probs = db['hourly_from_reply_probs'] 17 | token_reply_rates = db['token_reply_rates'] 18 | 19 | # Test reading avros 20 | rec_reader = io.DatumReader() 21 | # Create a 'data file' (avro file) reader 22 | df_reader = datafile.DataFileReader( 23 | open("/me/Data/test_mbox/part-1.avro"), 24 | rec_reader 25 | ) 26 | 27 | # Go through all the avro emails... 28 | for record in df_reader: 29 | # Get the message_id, from, first to, and message body 30 | message_id = record['message_id'] 31 | froms = record['from']['address'] 32 | if record['tos']: 33 | if record['tos'][0]: 34 | to = record['tos'][0]['address'] 35 | 36 | # For each token in the body, if there's a match in MongoDB, 37 | # append it and average all of them at the end 38 | word_probs = [] 39 | body = record['body'] 40 | for token in word_tokenize(body): 41 | search = token_reply_rates.find_one({'token': token}) 42 | if search: 43 | word_probs.append(search['reply_rate']) 44 | len_probs = float(len(probs)) 45 | if(len_probs > 0): 46 | token_rate = sum(probs) / len_probs 47 | else: 48 | continue 49 | 50 | # Use from/to probabilities when available 51 | ftrr = from_to_reply_ratios.find_one({'from': froms, 'to': to}) 52 | if ftrr: 53 | p_from_to_reply = ftrr['ratio'] 54 | else: 55 | continue 56 | 57 | # Now try 0.1 increments of weights between these two vectors to weight them 58 | for i in [x / 10.0 for x in range(0, 11, 1)]: 59 | result = (token_rate * i) + (p_from_to_reply * (1 - i)) 60 | print message_id + "\t" + str(i) + "\t" + str(1 - i) + "\t" + str(result) 61 | 62 | # Tada - followup with test_results.pig to find proper weight. Zoom in more as needed. 63 | -------------------------------------------------------------------------------- /ch09/web/config.py: -------------------------------------------------------------------------------- 1 | EMAILS_PER_LIST_PAGE=15 2 | EMAILS_PER_ADDRESS_PAGE=6 3 | ELASTIC_URL='http://localhost:9200/inbox' 4 | MY_EMAIL='russell.jurney@gmail.com' 5 | -------------------------------------------------------------------------------- /ch09/web/smoother.py: -------------------------------------------------------------------------------- 1 | # Based on http://www.scipy.org/Cookbook/SignalSmooth 2 | 3 | import numpy as np 4 | 5 | class Smoother(): 6 | 7 | """Takes an array of objects as input, and the data key of the object for access.""" 8 | def __init__(self, raw_data, data_key): 9 | self.raw_data = raw_data 10 | print self.raw_data 11 | self.data = self.to_array(raw_data, data_key) 12 | 13 | """Given an array of objects with values, return a numpy array of values.""" 14 | def to_array(self, in_data, data_key): 15 | data_array = list() 16 | for datum in in_data: 17 | data_array.append(datum[data_key]) 18 | return np.array(data_array) 19 | 20 | """Smoothing method from SciPy SignalSmooth Cookbook: http://www.scipy.org/Cookbook/SignalSmooth""" 21 | def smooth(self, window_len=5, window='blackman'): 22 | x = self.data 23 | s=np.r_[2*x[0]-x[window_len:1:-1], x, 2*x[-1]-x[-1:-window_len:-1]] 24 | w = getattr(np, window)(window_len) 25 | y = np.convolve(w/w.sum(), s, mode='same') 26 | self.smoothed = y[window_len-1:-window_len+1] 27 | 28 | def to_objects(self): 29 | objects = list() 30 | hours = [ '%02d' % i for i in range(24) ] 31 | for idx, val in enumerate(hours): 32 | objects.append({"sent_hour": val, "total": round(self.smoothed[idx], 0)}) 33 | return objects 34 | -------------------------------------------------------------------------------- /ch09/web/static/bootstrap/img/glyphicons-halflings-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch09/web/static/bootstrap/img/glyphicons-halflings-white.png -------------------------------------------------------------------------------- /ch09/web/static/bootstrap/img/glyphicons-halflings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch09/web/static/bootstrap/img/glyphicons-halflings.png -------------------------------------------------------------------------------- /ch09/web/static/nvd3/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Jekyll Files # 3 | ################ 4 | _site 5 | 6 | 7 | # Random Files # 8 | ################ 9 | *.swp 10 | *~ 11 | *.log 12 | 13 | 14 | # Private Test Data # 15 | ##################### 16 | *REALDATA* 17 | 18 | 19 | # OS generated files # 20 | ###################### 21 | .DS_Store* 22 | ehthumbs.db 23 | Icon? 24 | Thumbs.db 25 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/Makefile: -------------------------------------------------------------------------------- 1 | JS_FILES = \ 2 | src/intro.js \ 3 | src/core.js \ 4 | src/tooltip.js \ 5 | src/utils.js \ 6 | src/models/axis.js \ 7 | src/models/historicalBar.js \ 8 | src/models/bullet.js \ 9 | src/models/bulletChart.js \ 10 | src/models/cumulativeLineChart.js \ 11 | src/models/discreteBar.js \ 12 | src/models/discreteBarChart.js \ 13 | src/models/distribution.js \ 14 | src/models/indentedTree.js \ 15 | src/models/legend.js \ 16 | src/models/line.js \ 17 | src/models/lineChart.js \ 18 | src/models/linePlusBarChart.js \ 19 | src/models/lineWithFocusChart.js \ 20 | src/models/multiBar.js \ 21 | src/models/multiBarChart.js \ 22 | src/models/multiBarHorizontal.js \ 23 | src/models/multiBarHorizontalChart.js \ 24 | src/models/multiChart.js \ 25 | src/models/ohlcBar.js \ 26 | src/models/pie.js \ 27 | src/models/pieChart.js \ 28 | src/models/scatter.js \ 29 | src/models/scatterChart.js \ 30 | src/models/scatterPlusLineChart.js \ 31 | src/models/sparkline.js \ 32 | src/models/sparklinePlus.js \ 33 | src/models/stackedArea.js \ 34 | src/models/stackedAreaChart.js \ 35 | src/outro.js 36 | 37 | JS_COMPILER = \ 38 | uglifyjs 39 | 40 | all: nv.d3.js nv.d3.min.js 41 | nv.d3.js: $(JS_FILES) 42 | nv.d3.min.js: $(JS_FILES) 43 | 44 | nv.d3.js: Makefile 45 | rm -f $@ 46 | cat $(filter %.js,$^) >> $@ 47 | 48 | %.min.js:: Makefile 49 | rm -f $@ 50 | cat $(filter %.js,$^) | $(JS_COMPILER) >> $@ 51 | 52 | clean: 53 | rm -rf nv.d3.js nv.d3.min.js 54 | 55 | 56 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/README.md: -------------------------------------------------------------------------------- 1 | Please see Novus' official statement on nvd3 with an explanation, 2 | apology, and commitment to its permanent status as an open-source 3 | project. 4 | [http://nvd3.org/statement.html](http://nvd3.org/statement.html) 5 | 6 | # nvd3 - v0.0.1 7 | 8 | A reusable chart library for d3.JS. 9 | 10 | Currently in an early stage of development, but will be a very active project. It may change quite a bit from its current state, but will always try to follow the style in which d3.js was done. 11 | 12 | You can also check out the [examples page](http://nvd3.org/ghpages/examples.html) 13 | 14 | --- 15 | 16 | If one of [the existing models](https://github.com/novus/nvd3/tree/master/src/models) doesn't meet your needs, fork the project, implement the model and an example using it, send us a pull request, for consideration for inclusion in the project. 17 | 18 | --- 19 | 20 | Minifying your fork: 21 | 22 | The Makefile requires [UglifyJS](https://github.com/mishoo/UglifyJS). 23 | 24 | The easist way to install is to install via npm. Run `npm install 25 | uglify-js` from your home directory, then add the output from `npm bin` 26 | into your path so that you have access to `uglifyjs` from the command 27 | line (remember to restart your terminal window when adding to the path.) 28 | 29 | Once you have `uglifyjs` command available, running `make` from your 30 | fork's root directory will rebuild both `nv.d3.js` and `nv.d3.min.js`. 31 | 32 | Without UglifyJS, you won't get the minified version when running make. 33 | 34 | **We ask that you DO NOT minify pull requests... 35 | If you need to minify please build pull request in separate branch, and 36 | merge and minify in yout master. 37 | 38 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/build.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | copy src\intro.js /B + src\core.js /B + src\tooltip.js /B temp1.js /B 3 | copy src\models\*.js /B temp2.js /B 4 | copy temp1.js /B + temp2.js /B + src\outro.js /B nv.d3.js /B 5 | del temp1.js 6 | del temp2.js 7 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | COMPRESSOR=`which yui-compressor` 3 | cat src/intro.js src/core.js src/tooltip.js src/utils.js src/models/*.js src/outro.js > nv.d3.js 4 | if [ -e $COMPRESSOR ]; then 5 | $COMPRESSOR --type js -o nv.d3.min.js nv.d3.js 6 | fi 7 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/deprecated/lineChart-old.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 29 | 30 | 31 |
32 |
33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 84 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/examples/images/grey-minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch09/web/static/nvd3/examples/images/grey-minus.png -------------------------------------------------------------------------------- /ch09/web/static/nvd3/examples/images/grey-plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rjurney/Agile_Data_Code/5a5a5f11de5ed0c4949bf1e7c80fba329fc72120/ch09/web/static/nvd3/examples/images/grey-plus.png -------------------------------------------------------------------------------- /ch09/web/static/nvd3/examples/legend.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 76 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/examples/line.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 96 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/examples/lineWithFocusChart.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 33 | 34 | 35 |
36 | 37 |
38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 88 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/examples/multiBar.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 18 | 19 | 20 |
21 | 22 |
23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 93 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/examples/multiBarChart.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 29 | 30 | 31 |
32 | 33 |
34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 81 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/examples/pie.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 94 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/examples/sparkline.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 21 | 22 | 23 |

Sparkline:

24 | 25 | 26 | 27 | 28 | 29 | 63 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/examples/sparklinePlus.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 18 | 19 | 20 |

SparklinePlus:

21 |

22 | 23 | 24 | 25 | 26 | 27 | 28 | 68 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/examples/stream_layers.js: -------------------------------------------------------------------------------- 1 | 2 | /* Inspired by Lee Byron's test data generator. */ 3 | function stream_layers(n, m, o) { 4 | if (arguments.length < 3) o = 0; 5 | function bump(a) { 6 | var x = 1 / (.1 + Math.random()), 7 | y = 2 * Math.random() - .5, 8 | z = 10 / (.1 + Math.random()); 9 | for (var i = 0; i < m; i++) { 10 | var w = (i / m - y) * z; 11 | a[i] += x * Math.exp(-w * w); 12 | } 13 | } 14 | return d3.range(n).map(function() { 15 | var a = [], i; 16 | for (i = 0; i < m; i++) a[i] = o + o * Math.random(); 17 | for (i = 0; i < 5; i++) bump(a); 18 | return a.map(stream_index); 19 | }); 20 | } 21 | 22 | /* Another layer generator using gamma distributions. */ 23 | function stream_waves(n, m) { 24 | return d3.range(n).map(function(i) { 25 | return d3.range(m).map(function(j) { 26 | var x = 20 * j / m - i / 3; 27 | return 2 * x * Math.exp(-.5 * x); 28 | }).map(stream_index); 29 | }); 30 | } 31 | 32 | function stream_index(d, i) { 33 | return {x: i, y: Math.max(0, d)}; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/src/intro.js: -------------------------------------------------------------------------------- 1 | (function(){ 2 | -------------------------------------------------------------------------------- /ch09/web/static/nvd3/src/outro.js: -------------------------------------------------------------------------------- 1 | })(); -------------------------------------------------------------------------------- /ch09/web/templates/partials/emails.html: -------------------------------------------------------------------------------- 1 | 2 | {% extends "layout.html" %} 3 | 4 | 5 | {% import "macros.jnj" as common %} 6 | 7 | 8 | 9 | {% block content -%} 10 |

Emails 11 |

14 |
15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | {% for email in emails %} 23 | 24 | 25 | 26 | 27 | 28 | {% endfor %} 29 | 30 |
FromSubjectDate
{{ common.display_email_address(email['from'])|safe }}{{ common.display_link(email['message_id'], '/email', email['subject'])|safe }}{{ email['date'] }}
31 | {% if nav_offsets and nav_path -%} 32 | {{ common.display_nav(nav_offsets, nav_path, query)|safe }} 33 | {% endif -%} 34 |
35 | {% endblock -%} 36 | -------------------------------------------------------------------------------- /ch09/web/templates/partials/will_reply.html: -------------------------------------------------------------------------------- 1 | 2 | {% extends "layout.html" %} 3 | 4 | 5 | {% import "macros.jnj" as common %} 6 | 7 | {% block content -%} 8 | 9 |
10 |
11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 | 19 |
20 | 21 |

{{ result }}

22 | 23 | {% endblock -%} 24 | -------------------------------------------------------------------------------- /pigrc: -------------------------------------------------------------------------------- 1 | /* Setup for Piggybank */ 2 | %default PIGGYBANK_LIB '/me/Software/pig/contrib/piggybank/java' 3 | REGISTER $PIBBYBANK_LIB/piggybank.jar 4 | 5 | /* Setup for Avro */ 6 | %default PIG_LIB '/me/Software/pig/build/ivy/lib/Pig'; 7 | REGISTER $PIG_LIB/avro-1.5.3.jar 8 | REGISTER $PIG_LIB/json-simple-1.1.jar 9 | DEFINE AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); 10 | 11 | /* Setup for MongoDB */ 12 | $default MONGO_LIB '/me/Software/mongo-hadoop/' 13 | REGISTER $MONGO_LIB/mongo-2.10.1.jar 14 | REGISTER $MONGO_LIB/core/target/mongo-hadoop-core-1.1.0-SNAPSHOT.jar 15 | REGISTER $MONGO_LIB/pig/target/mongo-hadoop-pig-1.1.0-SNAPSHOT.jar 16 | DEFINE MongoStorage com.mongodb.hadoop.pig.MongoStorage(); 17 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | BareNecessities==0.2.8 2 | ESClient==0.5.3 3 | Flask==0.9 4 | Jinja2==2.6 5 | LEPL==5.1.3 6 | Mail==2.1.0 7 | Werkzeug==0.8.3 8 | distribute==0.6.31 9 | python-snappy 10 | avro==1.7.3 11 | -e git+https://github.com/rhec/pyelasticsearch.git#egg=pyelasticsearch 12 | pymongo==2.4.1 13 | requests==1.0.4 14 | simplejson==2.6.2 15 | wsgiref==0.1.2 16 | numpy 17 | honcho 18 | scipy 19 | dotcloud 20 | python-dateutil 21 | nltk 22 | --------------------------------------------------------------------------------