├── 2017 ├── acl-weather │ ├── README.md │ └── acl-weather.ipynb ├── austin-taco-transit │ ├── README.md │ ├── austin-taco-transit.ipynb │ ├── busstops.csv │ └── tacos.csv └── names-pareto │ ├── README.md │ └── names-pareto.ipynb ├── 2018 └── python-community-insights │ └── Python Community Insights.ipynb ├── 2019 ├── avro-to-arrow │ ├── AUTHORS │ ├── LICENSE │ ├── README.rst │ ├── avro_to_arrow │ │ ├── __init__.py │ │ ├── decoder.py │ │ └── generator.py │ ├── example.py │ ├── setup.py │ └── tests │ │ └── test_decoder.py └── python-auth-samples │ ├── README.md │ ├── api_key.py │ ├── iam.py │ ├── requirements.txt │ ├── scopes.py │ └── slides.pdf ├── 2020 └── bigquery-python │ └── user-credentials-magics.ipynb ├── 2021 └── 06-python-decorator │ └── decorator_sample.py ├── 2024 ├── 12-bigframes-usa-names │ └── usa_names.ipynb ├── 12-bigquery-utils │ └── bigquery-utils.ipynb └── 12-pydata-global │ ├── img │ ├── iowa-categories.png │ ├── iowa-counties-map.png │ ├── iowa-pop-volume-line.png │ ├── iowa-pop-volume-scatter.png │ ├── iowa-volume-per-pop-lines.png │ ├── iowa-volume-per-pop-scatter.png │ ├── iowa-volumes.png │ ├── iowa-words.png │ ├── iowa-zips-dirty.png │ └── iowa-zips.png │ └── index.md ├── 2025 └── overcastdata_freeze_dates.ipynb ├── .github └── workflows │ ├── bigquery.yml │ └── static.yml ├── .gitignore ├── LICENSE ├── README.md ├── index.html └── requirements.txt /.github/workflows/bigquery.yml: -------------------------------------------------------------------------------- 1 | # Simple workflow for deploying static content to GitHub Pages 2 | name: Run BigQuery snippets as a test 3 | 4 | on: 5 | push: 6 | paths-ignore: 7 | - ".gitignore" 8 | branches: 9 | - main 10 | pull_request_target: 11 | types: 12 | - labeled 13 | 14 | jobs: 15 | build: 16 | runs-on: ubuntu-latest 17 | if: github.event_name == 'push' || github.event.label.name == 'ci-run-cloud' 18 | 19 | # Add "id-token" with the intended permissions. 20 | permissions: 21 | contents: 'read' 22 | # required for GCP workload identity federation 23 | id-token: 'write' 24 | 25 | steps: 26 | - name: checkout 27 | uses: actions/checkout@v4 28 | if: github.event.label.name != 'ci-run-cloud' 29 | 30 | - name: checkout 31 | if: github.event.label.name == 'ci-run-cloud' 32 | uses: actions/checkout@v4 33 | with: 34 | fetch-depth: 0 35 | ref: ${{ github.event.pull_request.head.sha }} 36 | 37 | - name: Set up Python 38 | # This is the version of the action for setting up Python, not the Python version. 39 | uses: actions/setup-python@v5 40 | with: 41 | # Semantic version range syntax or exact version of a Python version 42 | python-version: '3.12' 43 | # Optional - x64 or x86 architecture, defaults to x64 44 | architecture: 'x64' 45 | 46 | - uses: 'google-github-actions/auth@v2' 47 | with: 48 | project_id: 'friendliness-dev-demo' 49 | workload_identity_provider: 'projects/368174856750/locations/global/workloadIdentityPools/github/providers/code-snippets' 50 | 51 | - name: Display Python version 52 | run: python -c "import sys; print(sys.version)" 53 | 54 | - name: Install packages 55 | run: python -m pip install -r requirements.txt 56 | 57 | - name: Test the notebook 58 | run: python -m jupyter nbconvert --to html --execute 2024/12-bigquery-utils/bigquery-utils.ipynb 59 | -------------------------------------------------------------------------------- /.github/workflows/static.yml: -------------------------------------------------------------------------------- 1 | # Simple workflow for deploying static content to GitHub Pages 2 | name: Deploy static content to Pages 3 | 4 | on: 5 | # Runs on pushes targeting the default branch 6 | push: 7 | branches: ["main"] 8 | 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 13 | permissions: 14 | contents: read 15 | pages: write 16 | id-token: write 17 | 18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 20 | concurrency: 21 | group: "pages" 22 | cancel-in-progress: false 23 | 24 | jobs: 25 | # Single deploy job since we're just deploying 26 | deploy: 27 | environment: 28 | name: github-pages 29 | url: ${{ steps.deployment.outputs.page_url }} 30 | runs-on: ubuntu-latest 31 | steps: 32 | - name: Checkout 33 | uses: actions/checkout@v4 34 | - name: Setup Pages 35 | uses: actions/configure-pages@v5 36 | - name: Upload artifact 37 | uses: actions/upload-pages-artifact@v3 38 | with: 39 | # Upload entire repository 40 | path: '.' 41 | - name: Deploy to GitHub Pages 42 | id: deployment 43 | uses: actions/deploy-pages@v4 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /2017/acl-weather/README.md: -------------------------------------------------------------------------------- 1 | # Did Austin City Limits Music Festival Change to October to Avoid the Heat? 2 | 3 | Jupyter notebook for [this blog post](https://medium.com/@TimSwast/did-austin-city-limits-music-festival-change-to-october-to-avoid-the-heat-44b57ec7328c). 4 | 5 | - [acl-weather.ipynb](acl-weather.ipynb) 6 | -------------------------------------------------------------------------------- /2017/austin-taco-transit/README.md: -------------------------------------------------------------------------------- 1 | # Finding the perfect place to live in Austin, TX 2 | 3 | A Jupyter notebook to demonstrate [visualizing big data with Google Cloud](https://medium.com/@TimSwast/visualizing-big-data-with-google-cloud-fe323a03f85c). I use data visualization to find the perfect place to live in Austin: close to reliable public transit and delicious tacos. 4 | 5 | - [austin-taco-transit.ipynb](http://nbviewer.jupyter.org/github/tswast/code-snippets/blob/master/2017/austin-taco-transit/austin-taco-transit.ipynb) 6 | -------------------------------------------------------------------------------- /2017/austin-taco-transit/tacos.csv: -------------------------------------------------------------------------------- 1 | Torchy's,30.4528565,-97.82776 2 | Torchy's,30.3707033,-97.7564423 3 | Torchy's,30.3234146,-97.7393933 4 | Torchy's,30.2935625,-97.7419348 5 | Torchy's,30.3031357,-97.6991092 6 | Torchy's,30.2506632,-97.7545106 7 | Torchy's,30.2454379,-97.7517204 8 | Torchy's,30.2367433,-97.7628865 9 | Torchy's,30.2223925,-97.840664 10 | Torchy's,30.3468437,-97.9672742 11 | Tacodeli,30.4076255,-97.7134925 12 | Tacodeli,30.3487391,-97.7351564 13 | Tacodeli,30.3104193,-97.7403106 14 | Tacodeli,30.2584636,-97.7876652 15 | Tacodeli,30.2903258,-97.8262127 16 | El Chilito,30.284025,-97.718475 17 | El Chilito,30.2631467,-97.7245702 -------------------------------------------------------------------------------- /2017/names-pareto/README.md: -------------------------------------------------------------------------------- 1 | # Did Austin City Limits Music Festival Change to October to Avoid the Heat? 2 | 3 | Jupyter notebook for [this blog post](https://medium.com/@TimSwast/how-to-pick-a-name-with-big-data-fdbc68205449). 4 | 5 | - [names-pareto.ipynb](names-pareto.ipynb) 6 | 7 | -------------------------------------------------------------------------------- /2017/names-pareto/names-pareto.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How to pick a name with big data?\n", 8 | "\n", 9 | "Picking a name is a very personal thing. A name can honor past family members and friends. It can remind you of people you’ve met and their personalities. But, I’m going to take a very utilitarian approach.\n", 10 | "\n", 11 | "First, what are we optimizing for? We could try to find a unique name by looking at [the distribution of names by starting letter](https://medium.com/@TimSwast/what-letters-most-commonly-start-first-names-ef5f7f8e50d2#.kzv4g7fho). We might choose a name beginning with the letter U based on that analysis.\n", 12 | "\n", 13 | "But consider the situation where an elementary school teacher lines students up alphabetically. With a name beginning with U, the child would always be at the end. Personally, I find being at the end of a line uncomfortable. It’s much better to be near the center of a line. Let’s see if we can use data to optimize for comfort in this situation." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": { 20 | "collapsed": false, 21 | "deletable": true, 22 | "editable": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "# Copyright 2017 Google Inc.\n", 27 | "#\n", 28 | "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", 29 | "# you may not use this file except in compliance with the License.\n", 30 | "# You may obtain a copy of the License at\n", 31 | "#\n", 32 | "# http://www.apache.org/licenses/LICENSE-2.0\n", 33 | "#\n", 34 | "# Unless required by applicable law or agreed to in writing, software\n", 35 | "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", 36 | "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", 37 | "# See the License for the specific language governing permissions and\n", 38 | "# limitations under the License.\n", 39 | "\n", 40 | "from matplotlib import pyplot as plt\n", 41 | "from pandas.io import gbq\n", 42 | "import pandas as pd" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "First, use [BigQuery](https://cloud.google.com/bigquery/) to query the [USA Names public dataset](https://cloud.google.com/bigquery/public-data/usa-names). Summarize how many people have had each name." 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 2, 55 | "metadata": { 56 | "collapsed": false, 57 | "deletable": true, 58 | "editable": true 59 | }, 60 | "outputs": [ 61 | { 62 | "name": "stdout", 63 | "output_type": "stream", 64 | "text": [ 65 | "Requesting query... ok.\n", 66 | "Query running...\n", 67 | "Query done.\n", 68 | "Processed: 84.1 Mb\n", 69 | "\n", 70 | "Retrieving results...\n", 71 | "Got 29828 rows.\n", 72 | "\n", 73 | "Total time taken 4.94 s.\n", 74 | "Finished at 2017-03-03 03:03:58.\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "q = '''#standardSQL\n", 80 | "SELECT\n", 81 | " name,\n", 82 | " name_total,\n", 83 | " SUM(name_total) OVER(ORDER BY name ASC) AS name_cumulative\n", 84 | "FROM (\n", 85 | " SELECT\n", 86 | " name,\n", 87 | " SUM(number) AS name_total\n", 88 | " FROM\n", 89 | " `bigquery-public-data.usa_names.usa_1910_2013`\n", 90 | " GROUP BY\n", 91 | " name )\n", 92 | "ORDER BY\n", 93 | " name ASC'''\n", 94 | "df = gbq.read_gbq(q, project_id='swast-scratch', dialect='standard')" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 3, 100 | "metadata": { 101 | "collapsed": false, 102 | "deletable": true, 103 | "editable": true 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "# Add a column that converts the cumulative total to percentages\n", 108 | "total_names = df.name_cumulative.tail(1).values\n", 109 | "df = df.assign(name_percent=pd.Series((df.name_cumulative * 100.0) / total_names).values)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 4, 115 | "metadata": { 116 | "collapsed": true, 117 | "deletable": true, 118 | "editable": true 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "def pareto(df, nletters):\n", 123 | " dff = df.groupby(by=lambda x: df.name[x][:nletters])\n", 124 | " dff = dff.agg({'name_total': 'sum', 'name_percent': 'max'})\n", 125 | "\n", 126 | " # Make a pareto plot (two y-axes)\n", 127 | " dff.name_total.plot.bar()\n", 128 | " return dff.name_percent.plot(secondary_y=True)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "Next, build a [Pareto chart](https://en.wikipedia.org/wiki/Pareto_chart) of the distribution of names by starting letter." 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 5, 141 | "metadata": { 142 | "collapsed": false, 143 | "deletable": true, 144 | "editable": true 145 | }, 146 | "outputs": [ 147 | { 148 | "data": { 149 | "text/plain": [ 150 | "" 151 | ] 152 | }, 153 | "execution_count": 5, 154 | "metadata": {}, 155 | "output_type": "execute_result" 156 | }, 157 | { 158 | "data": { 159 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAf0AAAFdCAYAAAAJ0LRmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XdYVFf+P/D3FHoZUJoCEQVRVFCjMaKxorDBxliTmLqb\nmPysURONpO2m6KZHE/NNTLNkd1NUNJaoERVNFI0FUYMVG+BQpNdp9/cHkUgUpzBwp7xfz+PjzNw7\n536ob+6595wjEQRBABEREdk9qdgFEBERUetg6BMRETkIhj4REZGDYOgTERE5CIY+ERGRg2DoExER\nOQi5mAdPTk7Gnj170LZtW2zatOmO+y5ZsgQHDx6ERCJBdXU1SkpKcOjQoVaqlIiIyPZJxBynf/jw\nYXh4eGDBggUGQ/9m33zzDbKysvDmm2+2YHVERET2RdTu/b59+8Lb27vRa1evXsWTTz6JCRMm4OGH\nH8bFixdved/mzZsxevTo1iqTiIjILojavX87L7/8Ml577TXcddddyMzMxD//+U+sWrWqYXteXh5y\nc3PRv39/EaskIiKyPVYV+tXV1Th27BjmzJmDG1cdtFpto322bNmChIQESCQSMUokIiKyWVYV+nq9\nHt7e3khJSWlyn61bt+LVV19txaqIiIjsg9HX9PV6PZRKJZ555plbtqWkpCA2NhZKpRJKpRJr1641\nuoCb7yP09PRESEgItm3b1vDa6dOnGx5nZ2ejvLwcvXr1Mrp9IiIiW5GcnIwBAwZgzJgxDa+VlZXh\n73//OxISEvCPf/wDFRUVDdveeOMNxMfHY9y4ccjKyjLYvtGhv3r1aoSHhze5fdSoUUhJSUFKSgom\nTpxoVJvz58/HAw88gIsXL2Lo0KFYt24d3n33Xaxduxbjxo3D6NGjsWvXrob9t27dilGjRhlbMhER\nkU0ZP348vvzyy0avrVixArGxsdi+fTvuvfdefPbZZwCAtLQ0XLlyBTt27MBrr71mVC+4Ud37KpUK\naWlpeOaZZ/D111/fdh9zRv699957t339iy++uO3rM2fONPkYREREtqJv377Izc1t9Fpqaiq++eYb\nAIBSqcSjjz6K5557DqmpqUhKSgIA9OzZExUVFSgqKoKfn1+T7Rt1pr948WIsWLDgjjfP7dixA+PG\njcOcOXOgUqmMaZaIiIgMKC4ubghyf39/FBcXAwAKCgoQFBTUsF9gYCDy8/Pv2JbB0N+zZw/8/PwQ\nFRXV5Nn88OHDsWvXLmzcuBGxsbFYuHCh0R8MERERme52mWxoZJvB7v2jR49i165dSEtLQ11dHaqq\nqrBgwQK8/fbbDfsoFIqGx5MnT8a7775rsFitVge5XGZwPyIiciyVNRrkX6+CIAB6QYBeECDob3os\nCNDrBegFNDxu2Pcvj7U6PdQaHdTa+v/rNDpoNI1fU2t1UN94TfPHY62u0T41dVqYM3+tq7MM7q5O\ncHeVQ+Hpgn/PuM/kNtq2bdvQbV9YWIg2bdoAqD+zv7lnXaVSISAg4I5tGQz9efPmYd68eQCAQ4cO\n4auvvmoU+ABQWFgIf39/APXXHiIiIgx+ECUl1Qb3ISIix3ElvwI7j+Tg4O/50Gj1otQgk0rgJJfC\nWS6Fk1wGVycZvN2c4Oosg6uLHO4ucrg1/JP9+dj5z+fuLnK4/rFdJjV94tu/nsEPHz4c69evx7Rp\n05CSkoK4uDgAQFxcHP7zn/8gMTERGRkZ8Pb2vuP1fKAZ4/SXLVuG6OhoDBs2DGvWrMGuXbsgl8uh\nUCiwZMkSc5slIiIHotXpcfRsIVKP5OBcThkAwN/HFTGd/CCTSSCVSCCR1HdbS6WABBJIpTe9JsEf\n+9Q/rt/vz+1yqQROTlI4y2UNYS6X1z+vD/bG/8wJaUuaP38+Dh48iNLSUgwdOhSzZs3CtGnTMGfO\nHKxbtw7t27fH0qVLAQBDhgxBWloaRo4cCTc3N6OyV7QFdwoLKwzvREREdqm8So20jFzsychDSUUd\nAKBHxzaI6xOC6PC2kNrprKv+/l6iHt+qZuQjIiL7lp1XjtQjV/Hb6QJodQJcnWWI6xOC4XcHo11b\nD7HLs3sMfSIialEarR6HTxdg55EcXLxWDgAIauOOuD4hGNAjCG4ujKLWws80ERG1iJKKOuw5lou0\njFyUV2sgAdArwg9xfULQLcyXC6eJgKFPREQWIwgCzuWUYdfRHBw5UwidXoC7ixwJ/UIx7O4QBPi4\niV2iQ2PoExFRs2i0OlxSVeB8ThkO/p6PKwWVAIBgfw/E9QlBbLcguDhzXhZrwNAnIiKTlFWpcT6n\nDOdzS3E+twyXVRXQ6uoHgkklEvTp4o8RfUIQGerDLnwrw9AnIqIm6QUBeUVVOJ9bVh/0OWUoKK1p\n2C6VSHBXoCcighWICFGgS6gPFJ4uIlZMd8LQJyKiBnVqHbKvleN8TinO5ZbhQm45auq0DdvdXeSI\n7tQWESEKdA5WoGM7b3bd2xCGPhGRjaqu1eLwmQKoNToIQP3c8ILw52MAAurnor8xD5sg4I/t9Q9u\nPK6q1eJ8bhmu5ldCf9OcbQG+bri7sx8iQhSICFagnZ+H3U6c4wgY+kRENujs1VJ8vul3XC+vtVib\ncpkEndp7N3TVRwQr4O3hbLH2SXwMfSIiG6LV6fHjr5ew5cAlAEBi/w4IC/JC/cn3H3PO448lVhse\n32Eb6ndwcZIhNMADTlz91K4x9ImIbERBSTVWbPod2Xnl8FO44qkx3dA5xEfsssiGMPSJiKycIAj4\n9YQK/9l5FnVqHfp3D8TDI7vA3ZW/wsk0/I4hIrJiVbUarN52Br+dLoCbiwxPjemG2O5BYpdFNoqh\nT0Rkpc5cKcGKTb+jpKIOESEKTBvdDX6cxpaagaFPRGRltDo9Nv5yEVsPXIZEIkHSoI4YFdsBMqlU\n7NLIxjH0iYisSH5xNT778RQuqSrgp3DFtLHdERGsELssshMMfSIiKyAIAvZlXsP/dp5DnUaHgT2C\n8NDISK41TxbF7yYiIpFV1miwattpHDlTCDcXOZ4Z1x39ogLFLovsEEOfiEhEWZeK8cWWLJRU1CEy\nRIGnxnRHW4Wr2GWRnWLoExGJQKvTI2VvNrYdvAKpVILxgzshsX8HSKWc155aDkOfiKiVlVbWYena\nTFxWVSDA1w3TxnRHp/beYpdFDoChT0TUiiprNHjv2wzkFlVhYI8gTI2PhKszfxVT6+B3GhFRK6mp\n0+KD7+sDf0SfEDw4onP94jdErYQzPRARtQK1RoeP1mXi4rUKDOwRhAcY+CQCo0Nfr9dDqVTimWee\nuWWbWq3G3LlzER8fjylTpiAvL8+iRRIR2TKtTo9PN57C6Sul6BPpj8cTu0LKwCcRGB36q1evRnh4\n+G23rV27FgqFAjt27MBjjz2Gd955x2IFEhHZMr0g4KstWcg4X4TuYb6YNrY7p9Ml0Rj1nadSqZCW\nloZJkybddntqaiqUSiUAICEhAQcOHLBchURENkoQBHyz4yzSf89HRLACM8fHwEnOwCfxGHUj3+LF\ni7FgwQJUVFTcdntBQQGCguqXepTJZPD29kZpaSl8fHwsVymRmXQ6HS5dyja4X1hYJ8hkslaoiBzF\nurRs7DmWi9AATzw7KQYuzvz+InEZDP09e/bAz88PUVFROHjw4G33EQThlue8QYWsxaVL2Zjzzo9w\nVwQ0uU91WQGWPj8W4eGdW7EysmdbDlzC1vTLCPR1w7wpveDu6iR2SUSGQ//o0aPYtWsX0tLSUFdX\nh6qqKixYsABvv/12wz5BQUFQqVQIDAyETqdDZWUlFAquCkXWw10RAE/fYLHLIAex+1gu1qVlo423\nC557oDcUHs5il0QEwIhr+vPmzcOePXuQmpqK999/H/fee2+jwAeAYcOGISUlBQCwbds29O/fv2Wq\nJSKycumnVPhm+xl4uTvhuQd6cx59sipm31GybNky7N69GwAwadIklJSUID4+HqtWrcL8+fMtViAR\nka3IOFeELzZnwdVFjvlTeiGojbvYJRE1YtKMfP369UO/fv0AALNnz2543dnZGUuXLrVsZURENiTr\ncgk+2XAScrkEz06KwV2BXmKXRHQLjh0hImqm7LxyLFuXCUDAzPHR6BzCkUtknRj6RETNkFNYiQ++\nz4Bao8PTY7ujR8e2YpdE1CSGPhGRmQpKqvHedxmoqtXiifuj0KdL08NCiawBQ5+IyAwlFXV499sM\nlFWq8WBcZ9wX007skogMYugTEZmoolqN977LQFFZLZLu64iR94SKXRKRURj6REQmqKnT4v3vjyOv\nqArx94RizMAwsUsiMhpDn4jISBqtHsvWZuKyqgKDYtphyvAITjlONoWhT0RkBEEQsGb7GZy5Woq+\nXfzx2N+6MvDJ5jD0iYiMsPNwDn45cQ1hQV54cnQ3SKUMfLI9DH0iIgNOXSrGd7vOw9vDGTPHR8PZ\niUvkkm1i6BMR3UF+STU+3XASUikwc3w02nhzAR2yXQx9IqIm1NRp8dG6E6iq1eKRhC6ICOaS4WTb\nGPpERLehFwR8vul35BVVYUTfEAyKaS92SUTNxtAnIrqNDfuykXG+CN3CfDFleITY5RBZBEOfiOgv\nDmXlY/P+ywjwccMz43pAJuWvSrIP/E4mIrrJZVUFvtqSBRdnGWZNiIanm5PYJRFZDEOfiOgP5VVq\nfLQ+E2qtHtNGd0Owv6fYJRFZFEOfiAiAVqfHJyknUFxeB+Wgjugd6S92SUQWx9AnIgLw35/P4mxO\nGfp2DcDoAWFil0PUIuRiF0BEJLbdR3OwJyMPdwV44h+JUZxTn0SzcuVKrF27FhKJBJGRkViyZAkK\nCgowb948lJWVoXv37nj77bchl5sX3zzTJyKHduZKCf678xy83J0wc0I0XJw5xS6JIz8/H2vWrMH6\n9euxadMm6HQ6bNmyBe+++y6eeOIJbN++HV5eXli7dq3Zx2DoE5HDKiqtwfKUkwCAGcpo+CncRK6I\nHJ1er0dNTQ20Wi1qa2sREBCAgwcPIiEhAQCgVCrx888/m90+u/eJyCHVqrVYtu4EKms0eDShCyJD\nfcQuiRxcYGAgnnjiCQwdOhRubm4YOHAgunXrBm9vb0j/mCsiKCgIBQUFZh9DtND39XWHXM5uNGp5\n/v53Y3f/u8Uug6yIXi/grTW/IaewEvcPCMOk+K5il0SE8vJypKamYvfu3fDy8sKcOXOwd+/eW/Zr\nzj0nooV+SUm1WIcmB3PhwjksWpEOT9/gJvepLMnFkmn9ER7euRUrI7H8+MtF7M+8hshQHygHhqGw\nsELskshB+Pt7Nblt//79CA0NhY9Pfa/TiBEjcOzYMZSXl0Ov10MqlUKlUiEgIMDs4xsMfbVajalT\np0Kj0UCn0yEhIQEzZ85stE9KSgrefvttBAUFAQCmTp2KiRMnml0UEVFLOXKmEBt+uYi23q6YruwB\nuYy3NpF1aN++PY4fP466ujo4OzsjPT0d0dHRKC0txbZt25CYmIiUlBTExcWZfQyDoe/s7IzVq1fD\nzc0NOp0ODz74IAYPHoyYmJhG+40aNQovvfSS2YUQEbW0nIJKfLH5dzg7STFrQjS83Z3FLomoQUxM\nDBISEpCUlAS5XI5u3bph8uTJGDx4MObNm4elS5ciKiqqWSfVRnXvu7nV39GqVquh1Wpvu48gCGYX\nQUTU0iqq1Vi2LhN1Gh2mJ/XAXYFNd7MSiWXmzJm39KaHhobihx9+sEj7RoW+Xq/H+PHjceXKFUyd\nOvWWs3wA2LFjBw4fPoywsDAsWrSooaufiMhcNXVapGXk4WpBJQQIEIT6m/AE4Y/Ht/2/8X561J+U\nlFbU4Xp5HcYMCEPfruZfEyWyZUaFvlQqxYYNG1BZWYnp06fj/PnziIj4c33p4cOHY/To0XBycsK3\n336LhQsXYtWqVS1WNBHZt8oaDVKP5GDn4auoqr1976IxJKi/01kiqf//vph2GDeoo+UKJbIxJt29\n7+npiX79+mHfvn2NQl+hUDQ8njx5Mt59913LVUhEDqO8So3tv13B7qO5qFXr4OnmBOXgTri3WyDk\nUgkkEgmk0voQl0okkEoah7pU8ue2G68R0Z8Mhn5xcTGcnJzg5eWF2tpaHDhwANOmTWu0T2FhIfz9\n61ekSk1NbfQHARGRISUVdfjp4GXszciDWquHwsMZYwd2xNDe7eHqzDnEiCzF4E9TYWEhXnjhBej1\neuj1eiQmJmLIkCFYtmwZoqOjMWzYMKxZswa7du2CXC6HQqHAkiVLWqN2IrJxhaU1+Cn9Mn45cQ1a\nnYA23i64/94OGBTTDs5OnLyLyNIkgki33XMyDGotnJzH+ly7XoWtBy7jwKl86AUBAT5uGBXbAbE9\ngjhunuzanSbnaQ3sNyOiVnO1oBJbDlzCb1kFEAC09/PA6NgOuCcqADIpw56opTH0iajFZeeVY/P+\nS8g4XwQAuCvQE2MGhKF3pD+kvNmOqNUw9ImoxZy9WopN+y/h1MViAEB4e2+MGRiG6E5teWc9kQgY\n+kRktDqNDhXValRUa/74p0ZlzZ+PK6o1qKj5c3tNXf0Y+653+WDMgDB07eDLsCcSEUOfiBro9Hqc\nuliCrMvFKK/6M8Ar/whztUZvsA2ZVAJPNye08XZBUBtfxN8Tis4hXKueyBow9IkIVwsq8euJa0j/\nPR/lVepG25zkUni5O6FdGw94uTv98c8Znm5/Pr75f3cXOc/miawUQ5/IQZVV1iH993zsP6nC1YJK\nAICHqxzD7g5Gv64BaOvtCk93J7g4yRjiRHaCoU/kQNQaHTLOF2H/SRVOZhdDLwiQSSXo3dkPA3q0\nQ0x4WzjJOXSOyF4x9InsnCAIOJdThv0nVfjtdEHDzXUd23lhQI926BcVAC+uK0/kEBj6RHaqoLQG\nB06qsP/kNRSW1gIAfL1cMKx3MGJ7BCHYz0PkComotTH0iexIda0Wh88U4NcT13AupwwA4OIkw4Ae\nQRjQIwhd7/KFVMrr80SOiqFPZCdOXSzGR+syodbqIQEQ1cEXA3oEoU8Xf65UR0QAGPpEdqFOrcPK\nn05DpxcwfnAnxHYPQluFq9hlEZGVYegT2YFN+y/henktEvt3wOgBYWKXQ0RWimNziGxcblEVth+6\ngrberhgzMEzscojIijH0iWyYIAhYs/0MdHoBU0dGwsVJJnZJRGTFGPpENmz/SRXOXi1F785+6NXZ\nT+xyiMjKMfSJbFRljQbf7ToPZycpHhoRKXY5RGQDGPpENmpd2gVU1mgw7r6OvFOfiIzC0CeyQedz\ny5CWkYdgfw+M7BsqdjlEZCMY+kQ2RqfXY832MwCAR+K7QC7jjzERGYe/LYhsTOrhHFwtqMR9Me0Q\nGeojdjlEZEMY+kQ2pLi8Fim/XISHqxyThoaLXQ4R2RiGPpEN+V/qOdSpdZg0LILL4RKRyRj6RDYi\n88J1HDlTiIhgBe6LaSd2OURkgwzOva9WqzF16lRoNBrodDokJCRg5syZt+yzcOFCnDp1Cr6+vvjg\ngw/Qvn37FiuayNGoNTr85+czkEokeDShC6QSLo9LRKYzeKbv7OyM1atXY8OGDdiwYQP27t2LzMzM\nRvusXbsWCoUCO3bswGOPPYZ33nmnxQomckSbD1xGYWkt4u8JRUiAp9jlEJGNMqp7383NDUD9Gb1W\nq71le2pqKpRKJQAgISEBBw4csGCJRI7t2vUq/JR+GW28XTD2vjCxyyEiG2ZU6Ov1eiQlJWHgwIEY\nOHAgYmJiGm0vKChAUFAQAEAmk8Hb2xulpaWWr5bIwdy8oM6DcZFwdeZq2ERkPqNCXyqVNnTtHz9+\nHOfPn2+0XRCEW55LeM2RqNnSf8/H6SuliAlvi7sjuaAOETWPSXfve3p6ol+/fti3b1+j14OCgqBS\nqQAAOp0OlZWVUCgUlquSyAFV1WrwXeo5OMulmDoykn9IE1GzGQz94uJiVFRUAABqa2tx4MABdOrU\nqdE+w4YNQ0pKCgBg27Zt6N+/fwuUSuRY1qdlo7xagzEDw+Dv4yZ2OURkBwxeICwsLMQLL7wAvV4P\nvV6PxMREDBkyBMuWLUN0dDSGDRuGSZMm4fnnn0d8fDx8fHzw/vvvt0btRHYrO68ce47lol1bdyT0\nu0vscojIThgM/S5dujScxd9s9uzZDY+dnZ2xdOlSy1ZG5KB0ej1Wbz8NAVxQh4gsi79NiKzMrqO5\nuJJfiQE9gtC1g6/Y5RCRHWHoE1mRkoo6pOzNhoerHJOHRYhdDhHZGYY+kRX5btc51Kp1mDAkHN4e\nXFCHiCyLoU9kJU5evI5DWQXo1N4bg3tx7QoisjyGPpEV0Gh1+GbHWUgk9TfvcUEdImoJDH0iK7Dl\nwGUUlNRgRJ9QdAjyErscIrJTDH0ikZ3LKcXW9Mvw8XRG0qCOYpdDRHaMq3cQiejImUKs2HQKej3w\naEJXuLnwR5KIWg5/wxCJJPVIDv7781k4O8kwY2I0YsLbil0SEdk5hj5RKxMEAevSsrE1/TK83Z0w\nZ1JPdGznbXI7Op0Oly5lG9wvLKwTZDKZOaUSkZ1h6BO1Iq1Oj6+3nsaBUyoE+rph7pReCDBzMZ1L\nl7Ix550f4a4IaHKf6rICLH1+LMLDO5tbMhHZEYY+USupqdNiecoJ/H6pBJ3ae2P2xBh4uzdvAh53\nRQA8fYMtVCER2TuGPlErKK2sw4ffH8eVgkr0ivDD0+O6w8WJXe5E1FhFRQVefPFFnDt3DlKpFIsX\nL0ZYWBjmzp2L3NxchISE4MMPP4SXl3lDezlkj6iFXbtehTdXH8GVgkoM6dUeM8b3YOAT0W29+eab\nGDJkCH766Sds3LgRnTp1wooVKxAbG4vt27fj3nvvxWeffWZ2+wx9ohZ0LqcUi9ccwfXyWigHdcSj\nCV0gk/LHjohuVVlZicOHD2PChAkAALlcDi8vL6SmpkKpVAIAlEoldu7cafYx2L1P1EKOni3EZz+e\ngk4n4InErhgUw/n0iahpOTk58PX1xaJFi3D69Gn06NEDycnJuH79Ovz8/AAA/v7+KCkpMfsYPOUg\nagG7juZgecoJSCUSzJ4Yw8AnIoO0Wi1+//13PPTQQ0hJSYGbmxtWrFgBiQXX4hDtTN/X1x1yOa9r\nUsvz978bu/vf3SrHEgQBa37Kwg+p5+Dj6YJXnrwXnUN9W+RYrflxEVHLCwoKQlBQEKKjowEA8fHx\n+Pzzz9G2bVsUFRXBz88PhYWFaNOmjdnHEC30S0qqxTo0OZgLF85h0Yr0Ow5tqyzJxZJp/Zs1nl2r\n02PlT6ex/+SfY/B9XOUoLKwwu807aa2Pi4gsx9+/6bvu/fz80K5dO1y8eBEdO3ZEeno6IiIiEBER\ngfXr12PatGlISUlBXFyc2cfnNX0iC6ip0+KTlBM4ZcEx+ETkeF566SU899xz0Gq1CA0NxZIlS6DT\n6fDss89i3bp1aN++PZYuXWp2+wx9omYqrazDhz8cx5V8jsEnoubp2rUr1q1bd8vrK1eutEj7DH2i\nZrh2vQrvf3cc18trMbRXe0yNj+SQPCKyWgx9IjNlXS7BJyknUFWrhXJwJ4yO7WDRu2yJiCyNoU9k\nht1Hc/DfnecAAH9PjMJ9Me1EroiIyDCGPpEJtDo9/pd6DruP5sLL3QkzlNGIDPURuywiIqMw9ImM\nVFmjwScpJ3D6SilC/D0xe2I0/BTmLYtLRCQGg6GvUqmwYMECFBUVQSaTYdKkSXj00Ucb7XPo0CFM\nnz4doaGhAICRI0di+vTpLVMxkQhyi6rw0dpMFJTWoHdnPzw1phtcnfk3MxHZFoO/tWQyGRYtWoSo\nqChUVVVh/PjxGDhwIMLDwxvt17dvX3z66actViiRWDIvFOHTjadQq9Zh9IAwJA3qCClv2CMiG2Qw\n9P39/eHv7w8A8PDwQHh4OAoKCm4JfSJ7IwgCth+6ih92n4dcLsXTY7vj3m6BYpdFRGQ2k/onc3Jy\ncPr0acTExNyyLSMjA0lJSQgICMCCBQsQERFhsSKJWptGq8OqbWew/6QKPp7OmDUhBh3beYtdFhFR\nsxgd+lVVVZg9ezaSk5Ph4eHRaFv37t2xe/duuLm5IS0tDTNmzMD27dstXixRayirrMPH60/gQl45\nOrbzxszx0fD1chG7LCKiZjNq6jCtVovZs2dj3LhxGDFixC3bPTw84OZWfxfzkCFDoNFoUFpaatlK\niVrBZVUFXlt1GBfyytG/eyAWPtSbgU9EdsOoM/3k5GRERETgscceu+32G0v+AUBmZiYAwMeHY5fJ\ntmRdrcbmw0eg0eoxYUgnJPbnDHtEZF8Mhv6RI0ewadMmREZGIikpCRKJBHPnzkVeXh4kEgmmTJmC\n7du343//+x/kcjlcXV3xwQcftEbtRBbj7OqF9enX4eIsw6wJMejV2U/skoiILM5g6Pfp0wdZWVl3\n3Gfq1KmYOnWqxYoiam3Obt7w8ZBh3gN9EOLvKXY5REQtgsuBEQHQaerwRFwgA5+I7BpDnwhATWUR\n3F1kYpdBRNSiGPpEREQOgqFPRETkIBj6REREDkK0ZcIuXDh3y2thYZ0gk/G6KhERUUsQLfQXrUhv\n9Ly6rABLnx+L8PDOIlVERERk30QLfU/fYLEOTURE5JB4TZ+IiMhBMPTJQXAOfSIihj45BCcXd7FL\nICISHUOf7J4gCHBy4fS6REQMfbJ751W1kMpEu2eViMhqMPTJ7h0+Vyl2CUREVoGhT3Ytr6gK2fm1\n0GnqxC6FiEh0DH2ya6lHcwAA6jqe7RMROdyFTp1Oh0uXsg3uxymBbV91rQb7T6jg7SZDZUmt2OUQ\nEYnO4UL/0qVszHnnR7grAprch1MC24dfMq+hTqPDgK4K5OWJXQ0RkfgcLvQBwF0RwGmA7ZxeLyD1\naA6c5FL06uiBTXvFroiISHwOGfqWwMsE1i0z+zoKS2sxuGc7uLvw809EBDD0zcbLBNYt9fBVAEBc\nn1CoK66JXA0RkXVg6DcDLxNYp9yiKpy6VIIuoT4IDfDEhQqxKyJbxR49sjcMfbI7u47UD9Mb0TdE\n5ErI1rFHj+wNQ5/sSnWtBvtPqtDW2wW9OvuJXQ7ZAfbokT3h5DxkV24M0xt+dwhkUn57ExHdzOBv\nRZVKhUfmtJtGAAAgAElEQVQffRSJiYkYM2YMVq9efdv93njjDcTHx2PcuHHIysqyeKFEhtwYpucs\nl2JQz/Zil0NEZHUMdu/LZDIsWrQIUVFRqKqqwvjx4zFw4ECEh4c37JOWloYrV65gx44dOH78OF59\n9VV8//33LVo40V9lXvhzmJ6nm5PY5RARWR2DZ/r+/v6IiooCAHh4eCA8PBwFBQWN9klNTUVSUhIA\noGfPnqioqEBRUVELlEvUtJ1H/hymR0REtzLpomdOTg5Onz6NmJiYRq8XFBQgKCio4XlgYCDy8/Mt\nUyGREXKLqvD7pRJ0vat+mB4REd3K6NCvqqrC7NmzkZycDA8Pj0bbBEG4ZX+JRNL86oiMdGOYHs/y\niYiaZlToa7VazJ49G+PGjcOIESNu2R4YGAiVStXwXKVSISCg6XGtRJZUXavBryevoa23K3p1bit2\nOUREVsuo0E9OTkZERAQee+yx226Pi4vDhg0bAAAZGRnw9vaGnx/HSFPr2Jd5DWqNHsP7BHOYHhHR\nHRi8e//IkSPYtGkTIiMjkZSUBIlEgrlz5yIvLw8SiQRTpkzBkCFDkJaWhpEjR8LNzQ1LlixpjdqJ\n6ofpHfljmF4Mh+kREd2JwdDv06ePUePuX3nlFYsURGSK4xeKUFRWi8E923OYHhGRAewLJZuWemOe\n/T6cZ5+IyBCGPtmsm4fphXCYHhGRQQx9slkNZ/l9OUyPiMgYDH2ySVW1Guy/MUwvgiNFiIiMwdAn\nm7Tv+J/D9KRSTgRFRPZDr9dDqVTimWeeAVA/G+7kyZORkJCAefPmQavVmt02Q59sjl4vYNdRDtMj\nIvu0evXqRovavfvuu3jiiSewfft2eHl5Ye3atWa3zdAnm3NjmF5sjyAO0yOboNPpcOHCOYP/dDqd\n2KWSyFQqFdLS0jBp0qSG19LT05GQkAAAUCqV+Pnnn81u3+A4fSJrs/PwjXn2OUyPbMOlS9mY886P\ncFc0PT15dVkBlj4/FuHhnVuxMrI2ixcvxoIFC1BRUQEAKCkpgUKhgPSP2UaDgoJuWenWFAx9sim5\nhZXIulyCqA6+CPHnMD2yHe6KAHj6BotdBlmxPXv2wM/PD1FRUTh48CCA+gXt/rqoXXMWtJMIt1si\nrxVcvlaODu28xTg02RCtTo/84mpcK6pCXmEl9p+4hlPZ15H8eD/ERrcTuzwiIot5//338eOPP0Im\nk6Gurg5VVVWIi4vDr7/+il9//RVSqRQZGRn4+OOP8cUXX5h1DNFCf8z8jY2eV5bkYsm0/i3etXXh\nwjksWpF+x7+4janFUu0QoNPrcb2sFvklNcgvrq7/v6QaBcU1KCqrhf4v36Lt2rrj9X/ca/Rd+/b6\ntbLXj8ua8PcFWZq/v5dR+x06dAhfffUVPv30Uzz77LOIj49HYmIiXn31VXTt2hUPPvigWcdn9z61\nCr0g/BHs1cgv/iPUS2qQX1KDotIa6PS3/u3p7e6ETsHeCPR1Q6CvOwL++L+9nzuH6RGRw5g/fz7m\nzZuHpUuXIioqChMnTjS7LYY+taji8lr8cuIafsm8hqKy2lu2e7o5IaydFwJ83BHYpj7UA9u4IcDH\nHe6u/PYkIsfUr18/9OvXDwAQGhqKH374wSLt8rcqWZxWp0fGuSLsy7yGkxevQxAAZycp+nYNQLCf\nR/2Ze5v6M3cPVw65IyJqLQx9spi8oirsy8zD/pMqVFRrAACd2ntjUEw79IsKhJsLv92IiMTE38LU\nLLVqLX7LKsDezDxcyC0HUN9lP7JvKAb1bMdhdUREVoShTyYTBAHZeeXYezwPh04XoE6tgwRAj45t\nMKhne/SK8IOTnJM9EhFZG4Y+Ga28Wo0DJ1XYl3kNeUVVAIC23i74W7+7MDA6CH4KN5ErJCKiO2Ho\n0x3pBQGnLhZj3/E8HDtXBJ1egFwmwT1dAzCoZzt069CGw+eIiGwEQ59uSxAEHL9wHevTLiCnsP6s\nPtjfA4Ni2iO2eyC83J1FrpCIiEzF0KdbnLlSgnVp2TifWwYJgNjugYjrE4qO7byaNeczERGJi6FP\nDa7kV2BdWjZOZF8HAPTu7IfxgzshmHfgExHZBYY+Ib+4Gin7snEoq365xq53+WDCkHCEBytEroyI\niCyJoe/ASirq8OOvF7Hv+DXoBQEdgrwwcUg4uoX5shufiMgOMfQdUGWNBlvTLyP1SA40Wj2C2rhj\n/OBO6NPFn2FPRGTHDIZ+cnIy9uzZg7Zt22LTpk23bD906BCmT5+O0NBQAMDIkSMxffp0y1dKzVar\n1uLn365i26ErqKnTwdfLBePu64iB0UGQSTmZDhGRvTMY+uPHj8cjjzyCBQsWNLlP37598emnn1q0\nMLIcjVaPtIxcbN5/CeXVGni6OeGB4R0x7O5gOMllYpfncHQ6HS5dyja4X1hYJ8hk/PoQkeUYDP2+\nffsiNze3NWohC9PrBRw4pcKGfRdxvbwWLs4yjB0YhoR+d3HxGxFdupSNOe/8CHdFQJP7VJcVYOnz\nYxEe3rkVKyMie2eR3/wZGRlISkpCQEAAFixYgIiICEs0S2aoU+twIa8MZ6+W4vCZQuQVVUEuk2Bk\n31CMGtAB3pxUxyq4KwLg6RssdhlE5GCaHfrdu3fH7t274ebmhrS0NMyYMQPbt2+3RG1khMoaDc7n\n1If82ZxSXFZVQKcXAAASCXBfTDuMG9gRbRWuIldKRERia3boe3h4NDweMmQI/vWvf6G0tBQ+Pj7N\nbZpuo6SiDudySnHmainOXS1tmCIXAGRSCToEeSEy1AeRIT6ICFHA081JxGqJiMiaGBX6giA0ua2o\nqAh+fn4AgMzMTABg4FuIIAgoKK2pP4u/WopzV8tQUFrTsN1ZLkXXu3zqQz7UB+HtFXBx5o1fRER0\newZDf/78+Th48CBKS0sxdOhQzJo1CxqNBhKJBFOmTMH27dvxv//9D3K5HK6urvjggw9arFhHuOtZ\nq9Pj1xPXkHW5BGeulqKsUt2wzc1FjpjwtugS6oPOoT4IC/KCXMahdkREZByDof/ee+/dcfvUqVMx\ndepUixV0J/Z+13N5lRqfbDiJs1dLAQAKD2f07RpQH/IhCoT4e3IZWyIiMpvNjduy17ueL14rx8fr\nT6Ckog59uvhj4pBwBPi6cYY8K+IIPU1EZN9sLvTt0S+Z17B6+xnodHpMGNIJif07MOytkL33NBGR\n/WPoi0ir0+Pb1HPYdTQX7i5yPD0hGtGd2opdFt2BvfY0EZFjYOiLpKyyDp9sOIlzOWUIUDhh4gA/\nuAvFuHCh+Lb7s8uYiIiai6Evggt5ZVi+/gRKK9WICnFD2q/pyL7k1+T+7DImIiJLYOi3sr3H8/DN\njjPQ6QVMGhaOzm3r8NsJP3YZExFRi2PotxKNVo//7TyLPRl58HCV45lxPdC9YxtcuHBO7NKIiMhB\nMPRbQUlFHT7ZcAIXcssRGuCJmeOj4e/jJnZZRETkYBj6LSynqA4fb/0NZVVq3NstEI/f3xUuTrwh\nj4iIWh9DvwXJnd2xZk8BBABThkcg/p5Qjr8nIiLRMPRbkKuHL1ycpJg5PgZRYW3ELoeIiBwcV2tp\nQTqtGv8YEcjAJyIiq8DQb0E1FYVQeLAzhYiIrAMTiYjIRnDRJ2ouhj4RkY3gok/UXAx9IiIbwkWf\nqDl4TZ+IiMhBMPSbQe7iIXYJRERERmPom+lKYS1c3BRil0FERGQ0hr4Zistrse7AdbHLICIiMglD\n30RqjQ4frT+B6jo91DVlYpdDRERkNIa+CQRBwOrtZ3BZVYGYMA9o6qrELomIiMhoDH0T7Dycg/0n\nVejYzhv33+0rdjlEREQmYegbKetSMb7bdR7eHs6YOT4achlXyyMiItvC0DdCUWkN/m/jKUgkwAxl\nD/h6uYhdEhERkckMhn5ycjIGDBiAMWPGNLnPG2+8gfj4eIwbNw5ZWVkWLVBsdRodPl5/ApU1Gkwd\nGYnOIT5il0RERGQWg6E/fvx4fPnll01uT0tLw5UrV7Bjxw689tprePXVVy1aoJgEQcDKn07jSkEl\nhvRqj6G9OfUlERHZLoOh37dvX3h7eze5PTU1FUlJSQCAnj17oqKiAkVFRZarUETbD13Fwd/zERGs\nwEMjIsUuh4iIqFmafU2/oKAAQUFBDc8DAwORn5/f3GZFl62qxQ97zsPH0xnTlT3gJOftD0REZNua\nnWSCINzymkRi23e2S6QypKRfh0wqwYzx0fDx5I17RERk+5od+oGBgVCpVA3PVSoVAgKaXuvZFrh6\ntkWtRo9H4rsgvD3n1yciIvtgVOjf7mz+hri4OGzYsAEAkJGRAW9vb/j5+VmmOpHIZE7oE+6JQT3b\ni10KERGRxcgN7TB//nwcPHgQpaWlGDp0KGbNmgWNRgOJRIIpU6ZgyJAhSEtLw8iRI+Hm5oYlS5a0\nRt0tSqepw8heIWKXQUREDkSlUmHBggUoKiqCTCbDpEmT8Oijj6KsrAxz585Fbm4uQkJC8OGHH8LL\ny8usYxgM/ffee89gI6+88opZB7dWtVXFkEkjxC6DiIgciEwmw6JFixAVFYWqqiqMHz8eAwcOxPr1\n6xEbG4unnnoKK1aswGeffYbnnnvOrGPwlvTbEAS92CUQEZGD8ff3R1RUFADAw8MD4eHhyM/PR2pq\nKpRKJQBAqVRi586dZh/D4Jk+ORadTodLl7IN7hcW1gkymawVKiIicjw5OTk4ffo0evbsievXrzfc\nK+fv74+SkhKz25UId7pLrwVdvlaODu2anvSnNQiCgH+v/g37M69h7KBOeCopWtR6iIiIqqqq8Mgj\nj2D69OkYMWIE+vXrh0OHDjVsv/fee3Hw4EGz2hbtTH/mu7sbPa8sycWSaf0RHt65yfdcuHAOi1ak\nw9O36elwDbXTVBs/7svGj/uyW7UWS7ZjKdZWjyVY29fK2tqhplnb14pfc9vn73/nG/C0Wi1mz56N\ncePGYcSIEQCAtm3boqioCH5+figsLESbNm3MPj6v6RMREVmJ5ORkRERE4LHHHmt4bfjw4Vi/fj0A\nICUlBXFxcWa375DX9CVSXosmIiLrcuTIEWzatAmRkZFISkqCRCLB3Llz8dRTT+HZZ5/FunXr0L59\neyxdutTsYzhc6Gt0erh6mN81QkQtizeTkqPq06dPk8vTr1y50iLHcKjQFwQB246WQiZ3FrsUImrC\npUvZmPPOj3BXND2dd3VZAZY+P5bXrYlM5FChn5aRh8xLVdBp1Qx+Iivmrgi4481qRGQeh7mR70Ju\nGf7z81m4O0tRW1ksdjlEREStziFCv6xKjeUpJ6AXBCT1bwtB0IldEhERUauz++59rU6P/9twEqWV\nakwaFo6OfmqxSyIiIhKF3Z/p/7D7As5eLUXfLv74W7+7xC6HiIhINHZ9pp9+SoWfD19Fez8PPJEY\nBYlEInZJRET0F8YM0+QQTcuw29C/WlCJlT+dhpuLDDOUPeDmYrcfKhGRTTM0TJNDNC3HLpOwRq3H\nivWZUGv1mDU2Gu3aeohdEhER3QGHabYOu7ymv/HgdRSW1mL0gA7oHekvdjlERERWwe7O9J1dvXBB\nVYseHdsg6b5OYpfjsDiVKhGR9bG/0Hfzho+HDNPGdodUyhv3xMKpVImIrI/dhb4gCJgwwA+ebk5i\nl+LweI2OiMi62F3o11WXIMiH4/GJjMVLMUSOw+5CX6uuEbsEIpvCSzFEjsPuQp+ITMdLMUSOwS6H\n7BEREdGtGPpEREQOwqjQ37t3L/72t78hISEBK1asuGV7SkoKYmNjoVQqoVQqsXbtWosXSkRERM1j\n8Jq+Xq/H66+/jpUrVyIgIAATJ05EXFwcwsPDG+03atQovPTSSy1WKADo9AJc3H1a9BhERET2yuCZ\nfmZmJjp06IDg4GA4OTlh1KhRSE1NvWU/QRBapMAbKms0+O/eQji5cB59IiIicxgM/fz8fLRr167h\neWBgIAoKCm7Zb8eOHRg3bhzmzJkDlUpl0SKvXa/CG6sO40phHYfkERERmclg6BtzBj98+HDs2rUL\nGzduRGxsLBYuXGiR4gDg1MVivLH6CApKazAwyhu1VcUWa5uIiMiRGAz9oKAg5OXlNTzPz89HQEDj\nSTwUCgWcnOqnvZ08eTJOnTplkeJ2Hc3BB98fh0arw1NjumFoD4VF2iUiInJEBkM/OjoaV65cQW5u\nLtRqNbZs2YK4uLhG+xQWFjY8Tk1NRURERLOK0un1+M+Os/hmx1l4usmx4KG7Eds9qFltEhEROTqD\nd+/LZDK8/PLL+Pvf/w5BEDBx4kSEh4dj2bJliI6OxrBhw7BmzRrs2rULcrkcCoUCS5YsMbug6lot\nPt14EicvFiPY3wNzJsbAT+FmdntERERUz6hpeAcPHozBgwc3em327NkNj+fNm4d58+Y1u5iSSi2+\nWnMY165XIya8LZ4e2x1uLpwpmIiIyBKsJlGlcmd8nZqPGrUe8feEYvKwCEilErHLIiIishtWE/pu\nnn6o0+jx2N+6YEgvLvxhKi6PSkREhlhN6EMQ8ODgAAa+mbg8KhG1Np5s2B6rCf3qigKEBXQQuwyb\nxuVRiag18WTD9lhN6At6ndglEBGRiXiyYVu4tC4REZGDYOgTERE5CIY+ERGRg2DoExEROQiGPhER\nkYNg6BMRETkIhj4REZGDsJpx+kS3wxm/iIgsh6FPVo0zfhERWQ5Dn6weZ/wiIrIMXtMnIiJyEAx9\nIiIiB8HQJyIichAMfSIiIgfBG/mIyCI4vJLI+jH0icgiOLySyPox9InIYji8ksi6MfSJiIgsrKnL\nXf7+d4tQzZ8Y+kRERBbW1OWu3f0Z+kRERHbHGi93GRX6e/fuxeLFiyEIAiZMmIBp06Y12q5Wq7Fw\n4UKcOnUKvr6++OCDD9C+ffsWKZiIiKiliD0KxVDeNpfB0Nfr9Xj99dexcuVKBAQEYOLEiYiLi0N4\neHjDPmvXroVCocCOHTuwdetWvPPOO/jggw8sWigREVFLE3MUijF521wGJ+fJzMxEhw4dEBwcDCcn\nJ4waNQqpqamN9klNTYVSqQQAJCQk4MCBAxYrkIiIqDXd6JZv6t+d/iBoDmPytrkMnunn5+ejXbt2\nDc8DAwNx4sSJRvsUFBQgKCgIACCTyeDt7Y3S0lL4+PhYtFgiImOI3UVr7ezx82MPH5MxedtcBkNf\nEASDjfx1H0EQIJFI7vieypLcRs+rywoMHseY/YxpxxJtsB3baseaamE7Ld/OpUvZmPbyF3D1bNPk\nPrWVxVjx+pMGu2it5WOyZDu29vmxx4/pdozJ2+aSCAaOkpGRgY8++ghffvklAGDFihUA0Ojmgief\nfBKzZs1Cz549odPpcN9997GLn4iIyATG5G1zGbymHx0djStXriA3NxdqtRpbtmxBXFxco32GDRuG\nlJQUAMC2bdvQv39/ixVIRETkCIzJ2+YyeKYP1A8hePPNNyEIAiZOnIhp06Zh2bJliI6OxrBhw6BW\nq/H8888jKysLPj4+eP/99xESEmLRQomIiOzd7fLWkowKfSIiIrJ9Brv3iYiIyD4w9ImIiBwEQ5+I\niMhBiBL6P//8M7p27YqLFy+a3UZUVBSUSiXGjRuH8ePHIyMjw6x2ioqKMG/ePMTHx2PChAl4+umn\ncfnyZbNqGT16NJKSkrBy5UqzxlveaCcpKQlKpRKff/65yW001VZeXp7JbVy/fh3z58/HyJEjMWHC\nBDzwwAPYuXOnSW307t270fOUlBS8/vrrJtdypzbFaufm96elpSEhIQEqlarV6+jatSsWLlzY8Fyn\n06F///545plnzGrrrbfeanj+1Vdf4eOPPza5nfz8fEyfPh0JCQmIj4/H4sWLodVqTW7nxvfxmDFj\n8Oyzz6Kurs7kNv5az8iRI/HGG29Ao9GYXcv/+3//D5WVlWbVAgD/93//h9GjR2Ps2LFQKpXIzMw0\n6f2lpaUNP9v33XcfBg8e3PDc1M9zbm4uxowZ0+i1jz/+GF9//bXRbTzyyCP49ddfG722atUqvPba\na0a9f8mSJVi9enXD83/84x94+eWXG56/9dZbWLlypdH1qFQqxMXFoby8HABQVlaGuLg4XLt2zeg2\nbnjooYewd+/ehudbt27FU089ZVIbO3fubPj63Pi9HBUVhX379plcT7MJIpgzZ44wdepU4aOPPjK7\njd69ezc83rdvn/Dwww+b1c6UKVOE7777ruH56dOnhcOHD5tdy/Xr14XHH39cWLZsmcm13NxOc1mi\nrb9+bvLy8oRvvvmmWXWsX79eeP3115tVl6U+T81t58b79+/fL4wcOVK4evWqKHX06tVLUCqVQl1d\nnSAIgpCWliYkJSUJTz/9tMltRUdHC3FxcUJJSYkgCILw5ZdfmvVzOnHiRCElJUUQBEHQ6/VCcnKy\n8NZbb5nczs2fm/nz5wtff/21yW00Vc8bb7xhdi0LFy4UPv30U7NqOXbsmDBlyhRBo9EIgiAIJSUl\nQkFBgVltCYIgfPTRR8JXX31l9vtzcnKE0aNHN6vN7777TnjhhRcavTZ58mThyJEjRr3/p59+Ep59\n9llBEOq/PkqlUpgyZUrD9ilTpgjHjx83uh5BEIQvvvhCePnllwVBEISXX35ZWLFihUnvv+Hs2bPC\n/fffL9TV1QlVVVVCfHy82T/rN3z33XdmZ1ZztfqZfnV1NY4dO4Y333wTW7ZsMbsd4aYz6YqKCigU\nCpPbSE9Ph5OTEyZPntzwWpcuXdCnTx+z62rTpg1ee+01fPPNNya/V7DgQIrmtnXgwIFbPjft2rXD\n1KlTm1ua3RAEAYcPH8Yrr7yCFStWiDpMddCgQdizZw8AYMuWLRg1apRZ7chkMkyePNmks7y/OnDg\nAFxdXZGUlAQAkEgkWLRoEdatW2f2mToA9O3bF1euXLFYPRs3bkRNTY1ZtfTq1Qv5+flmvbewsBC+\nvr6Qy+snRPXx8YG/v79ZbVmL+Ph4pKWlNfSe5ObmorCwEHffbdza8XfffTeOHj0KADh37hwiIyPh\n4eGBiooKqNVqZGdno1u3bibV9Nhjj+H48eNYtWoVjh07hieeeMK0D+oPnTt3xvDhw7FixQosX74c\nSUlJzfpZv3jxIpYvX453333X7Daaw6ildS1p586dGDRoEDp06AAfHx9kZWUhKirK5Hbq6uqgVCpR\nW1uLoqIirFq1yuQ2zp07h+7du5v8PkNCQ0MBAMXFxWjTpukpIf/qxsck/DGN8bRp03D//febVcPN\nbYWGhuKjjz4y6f3nz5+3yOemtra2YTEmQRBQXl6O4cOHN7tda6DRaDBjxgysWbMGYWFhotUhkUgw\natQofPzxxxg6dCjOnDmDiRMn4vDhw2a1NXXqVIwZM8bkLswbbve94+npieDgYFy+fBmRkZFGt3Xj\nj1etVou9e/di8ODBFqsnJCQEly9fRteuXU2qRafT4cCBA5g0aZLJtQDAwIEDsXz5cvztb39DbGws\nEhMTcc8995jVlrXw8fFBTEwM9u3bh+HDh2PLli0m/e4KCAiAXC6HSqXCsWPH0Lt3b+Tn5+PYsWPw\n9PREly5dGv5IMpZcLsfzzz+PJ598El9//bXJ77/ZjBkzoFQq4ezsjHXr1pndjlarxXPPPYcXXngB\ngYGBZrfTHK0e+lu2bMHjjz8OAEhMTMSmTZvMCn1XV9eGWQAzMjKwYMECbN682ZKlNoterzf5PTd/\nTM1lybYA4LXXXsORI0fg7OyMH374wew6UlJScOrUKYvVJSa5XI7evXvjhx9+wIsvvihqLZGRkcjN\nzcXmzZsxZMiQZvX0eHh4QKlUYvXq1XB1dTX5/UITa2/o9XqDa3L81Y0/XgGgT58+mDhxokXrMacW\nlUqFiIgIDBw40ORaAMDd3R0pKSk4fPgw0tPTMXfuXDz33HMNPRGtramvialfq8TERGzZsgXDhw/H\n1q1bsWTJEpPef+Ns/8ZZuUqlwtGjR+Hl5WX2fS9paWkICAjA2bNnERsba1YbAODm5obExER4eHjA\nycnJ7HY+/PBDREZGmn0yZwmt2r1fWlqK9PR0vPjii4iLi8NXX32Fbdu2NbvdXr16oaSkBMXFxSa9\nLyIiAidPnmz28f/q6tWrkMlkJp3lW5uIiIhG4fzKK69g5cqVJn+O7ZlUKsXSpUtx4sQJfPbZZ2KX\ng+HDh+Ptt9/G6NGjm93Wo48+irVr15rV/d25c+dbVgarrKyESqXCXXfdZVJbN/5oTElJwUsvvWTW\n2VpT9Vy/fh2dOnUyuZY9e/ZAEASzLuHdIJFIcM8992DWrFl4+eWXsX37drPbai4fHx+UlZU1es2c\nVVJHjBiB9PR0/P7776irqzP5ZK5Xr144evQozp49i8jISPTq1QsZGRnIyMgw+jLBzbKyspCeno7v\nv/8eK1euRFFRkclt3EwqlUIqNT8yDx48iJ9//hmvvPJKs+porlYN/W3btiEpKQm7du1Camoqdu/e\njeDgYBw5csTktm4+k7lw4QL0ej18fX1NaiM2NhYajabRmeuZM2dMrufmWoqLi/HPf/4TDz/8sElt\n/LWd5mpuW7GxsVCr1fj2228bXjMnACz5MbVkm+YQBAEuLi747LPPsHnzZqxdu9bsdppbBwBMnDgR\nM2bMQOfOd15BzJi2FAoF7r//frM+ptjYWNTW1mLjxo0A6rvD33rrLYwfPx4uLi5m1dMcTdXzyCOP\nwNnZ2eRaXFxc8OKLL+Krr76CTqczuZ6LFy82GiGUlZWF4OBgk9uxFHd3dwQEBDQsklZaWopffvnF\n5Hub3N3dcc899yA5Odmse0r69OmDPXv2wMfHBxKJBAqFAuXl5Th27Bh69eplcnv/+te/kJycjKCg\nIDz55JP497//bXIbllJWVobk5GS8/fbbcHNzE60OoJW797du3XrLPMLx8fHYvHmzyd9garW64Zo1\nUD+kw9TuKKB+aMqbb76JFStWwNXVFcHBwUhOTjarFo1GA7lcjqSkpIZLGOa0c6M7ctCgQZg3b57J\n7Sg+lZsAAAGTSURBVACmd83dzvLly7F48WJ88cUXaNOmDdzc3PD888+3eh0t0aZOpzPpF/6d6lAo\nFPj888/x8MMPo02bNibfs1BXV4ehQ4c2fN0ff/xxk75/btQRGBiIRx55xKRjN9UWAPz973/Hf//7\nX7M+38uXL8err76KTz75BIIgYPDgwZg7d26z6mmO5cuX45///Cc++eQTFBcXIzEx0eQ5zW+uJSoq\nCl27dsWWLVswduxYk9qprq7G66+/jsrKSshkMnTo0MHooW0t5a233sK//vWvht+js2bNarg3yRSj\nR4/GrFmz8MEHH5j83sjISJSWljb6fHbp0gW1tbUm9zp8//33aN++fUOX/oMPPoj169fj8OHD6Nu3\nr8m1Ndd3333XcEIIwCL3bZmLc++TQzp9+jReeeUVfP/992KXQq0sIyMD8+bNw8cff2zyHeFEto6h\nTw7n22+/xTfffIMXX3yxWTf3EBHZGoY+ERGRg+Dc+0RERA6CoU9EROQgGPpEREQOgqFPRETkIBj6\nREREDoKhT0RE5CD+P80B3muNn09gAAAAAElFTkSuQmCC\n", 160 | "text/plain": [ 161 | "" 162 | ] 163 | }, 164 | "metadata": {}, 165 | "output_type": "display_data" 166 | } 167 | ], 168 | "source": [ 169 | "pareto(df, 1)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "The bars show the relative distributions of names, and the line shows the cumulative total. For example, since the line is at 40 for the letter I, we know that 40% of people have names beginning with the letter A through I. We’re looking for where the line crosses 50%, because we want the name that puts us right in the middle of an alphabetized line of people.\n", 177 | "\n", 178 | "We see that the line crosses 50% at the letter J, so let’s zoom in on that." 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 6, 184 | "metadata": { 185 | "collapsed": false, 186 | "deletable": true, 187 | "editable": true 188 | }, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/plain": [ 193 | "" 194 | ] 195 | }, 196 | "execution_count": 6, 197 | "metadata": {}, 198 | "output_type": "execute_result" 199 | }, 200 | { 201 | "data": { 202 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfcAAAFdCAYAAAAe8iSvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt8U2Wex/FvmrZcWoqUpgWKoBQF1CpoRYaBKpSb1ALl\nIuso44iKl0FcbyjsoigMvERhV9dRZCgq4I4jINei4KDAIAIy6KJyGa1CIaU3CqU3aJuc/QNbKbek\nNGmS08/7r+bk5JdfkybfPk9OnmMxDMMQAAAwjSBfNwAAADyLcAcAwGQIdwAATIZwBwDAZAh3AABM\nhnAHAMBkgn1555MnT9bGjRvVsmVLrV69+qL7zpw5U9u3b5fFYlFpaamOHTumHTt21FOnAAAEDosv\nv+e+c+dOhYWFaeLEiS7D/UyLFy/W3r179ac//cmL3QEAEJh8Oi2fkJCgiIiIGtsOHTqkBx54QCNG\njNA999yjn3/++ZzbrVmzRnfccUd9tQkAQEDx6bT8+UyZMkUvvfSS2rVrp927d2vq1Kl67733qq/P\nysqS3W5Xjx49fNglAAD+y6/CvbS0VF9//bUef/xxVX1aUFlZWWOf9PR0DRw4UBaLxRctAgDg9/wq\n3J1OpyIiIrR8+fIL7rN27Vq98MIL9dgVAACBxeVn7pMnT1bPnj2VkpJywX22b9+uYcOG6Y477tCY\nMWNq1cCZx/OFh4erbdu2+uSTT6q37du3r/rnn376SSdOnFDXrl1rdR8AAASCvn37asiQIRo2bJhG\njhxZ47q0tDR17txZx48fd1nH5ch9+PDhGjNmjCZOnHje64uKivTSSy9pwYIFiomJUUFBgZu/gvTU\nU09p+/btOn78uG677TY99thjevXVV/XCCy/orbfeksPh0ODBg9W5c2dJp0ftycnJbtcHACCQWCwW\nLVq0SM2bN6+xPTs7W1u3blWbNm3cquMy3BMSEmS32y94/erVqzVgwADFxMRIkiIjI926Y0maPXv2\nebfPnz//vNvHjx/vdm0AAAKNYRhyOp3nbJ8xY4YmTpyoRx55xK06df4q3IEDB1RYWKgxY8ZoxIgR\nWrFiRV1LAgDQIFksFt1///0aMWKEPvzwQ0nSZ599ptatW6tTp05u16nzAXUOh0N79uzRe++9p9LS\nUv3bv/2bunXrpvbt29e1NAAADcoHH3wgm82mgoICjR07Vh06dNDcuXO1YMGC6n3cWXuuziP3mJgY\n9erVS40aNVKLFi2UkJBQ4yC4C6msdNT1rgEAMBWbzSbp9Efc/fr1044dO2S32zV06FD17dtXOTk5\nGjFihI4ePXrROm6N3C/2X0JSUpKmT58uh8Oh8vJy7d69W/fdd5/LmseOlbpz1wAAmIbN1uyC15WV\nlcnpdCosLEylpaXasmWLxo8fr0cffbR6n759+2r58uXnHHB3Npfhfr4j2isqKmSxWDR69GjFxcWp\nV69eGjJkiIKCgnTnnXeqY8eOtfhVAQBAfn6+xo8fL4vFIofDoZSUFPXq1avGPhaLxa1peZ+dOCYv\nr8gXdwsAgM9cbOTuSZzPHQAAkyHcAQAwGcIdAACTIdwBADAZwh0AAJMh3AEAMBnCHQAAkyHcAQAw\nGcIdAACTIdwBADAZwh0AAJMh3AEAMBnCHQAAkyHcAQAwGcIdAACTIdwBADCZYF83AABAbTgNQ++u\n3afvDxT4upVaCbEGKW3KgHq5L8IdABBQ/r7zsLZ8e0TNmoaoSWjgxFhIcP1NlgfOowIAl8jhcOjA\ngZ/c3v+KKzrIarV6sSNcqpyCUn20KUPNmoZo2gO3KKJpqK9b8kuEOwDTO3DgJz3+yio1bR7tct/S\nwly99swQxcVdVQ+doTacTkNpa/eqvNKpB+64hmC/CMIdQIPQtHm0wlvE+roN1MGnOw/px8OFurlz\ntBI6u/5HrSHjaHkAgN87crREH23+Sc2ahujuAVf7uh2/R7gDAPya02lowdq9qqh0asyATkzHu4Fw\nBwD4tfVfHVKG/YS6d2E63l2EOwDAbx05WqLl//hJEU1DdHd/puPdRbgDAPxSjen4gZ3UjOl4txHu\nAAC/dOZ0/E2dmI6vDcIdAOB3svJPHx0fERaqewZ08nU7AYdwBwD4larp+EqHU78f2EnhTUJ83VLA\nIdwBAH5l3Y5M/ZR1Qj2uidGNV9t83U5AchnukydPVs+ePZWSknLR/Xbv3q1rrrlG69ev91hzAICG\nJSu/RMv/8bMiwkL1O46Ov2Quw3348OFKS0u76D5Op1OzZ89W7969PdYYAKBhcTidSks/PR1/L9Px\ndeIy3BMSEhQREXHRfRYtWqSBAwcqMjLSY40BABqWdTsO6ecjJ9Tj2hh1Yzq+Tur8mXtOTo7+/ve/\n66677vJEPwCABsieX6IV//hJzcNC9bt+TMfXVZ3DfcaMGXrmmWdksVgkSYZh1LkpAEDD4XA6lbZm\njyodhn4/iOl4T6jzKV+/++47PfHEEzIMQ8eOHdPmzZsVHByspKQkT/QHADC5T7Zn6kB2kX5zbYy6\nXcV0vCe4Fe4XG41v2LCh+udJkyapT58+BDsAwC2H84q1csvPah4WqruYjvcYl+H+1FNPafv27Tp+\n/Lhuu+02PfbYY6qoqJDFYtHo0aPro0cAgAk5nE4tSN+rSoehewd1Zjreg1yG++zZs90uNnPmzDo1\nAwBoOD7edno6vud1rdT1qihft2MqrFAHAKh3h3NPT8dfFh6qu/pd5et2TIdwBwDUq0rH6cVqHM7T\n0/FhjZmO9zTCHQBQrz7edlAHc4r02+ta6YaOTMd7Q52/CgcAgLsO5RZr1RcHmI6/gL59+yo8PFxB\nQUEKDg7W0qVLNWvWLH3++ecKDQ1Vu3btNHPmTIWHh1+0DiN3AEC9OD0dv0cOp6E/3N5ZTZmOP4fF\nYtGiRYu0YsUKLV26VJLUq1cvpaena+XKlWrfvr3efvttl3UIdwBAvVi77aAyc4r12/hWuj6O6fjz\nMQxDTqezxraePXsqKOh0XHft2lXZ2dku6xDuAACvO5RbrNVfHFCLZo10VxLT8RdisVh0//33a8SI\nEfrwww/PuX7p0qVKTEx0WYfP3AEAXlXpOL12fNXR8UzHX9gHH3wgm82mgoIC3XffferQoYMSEhIk\nSW+99ZZCQkKUkpLiso7Pwr1Fi6YKDrb66u4BNCA22436vMeNvm6jwfrrun3KzC1W/+7tlNTjCl+3\n49dsttNr60dGRqp///769ttvlZCQoOXLl2vTpk1auHChW3V8Fu7HjpX66q4BNDAZGT9o0rxtCm8R\n63Lf4mN2zRzXQ3FxTB17QmZOkf7293+pRbNGGtrzCuXlFfm6JZ+y2Zpd8LqysjI5nU6FhYWptLRU\nW7Zs0fjx47V582bNnz9fixcvVmhoqFv3w7Q8AJwhpHEzbf6+UN8d+fmSa3Dq61/t3J8nh9PQfbd3\nVtPGRM7F5Ofna/z48bJYLHI4HEpJSVGvXr00YMAAVVRUaOzYsZKkG264QVOnTr1oLYvho7/Chv7f\nG4D6U5uROzzv1q5tdO+gzr5uwy9cbOTuSfwbBQBnKCvK0wN3XKM2bdrWqY7FQ/0EupDgIHVoE+Hr\nNhocwh0AzuCoLNcV0Y0V176Fr1sBLhnfcwcAwGR8NnLPyPjhvNuvuKKDrFa+IgcAwKXyWbhPmrft\nnG2lhbl67ZkhfAUFAIA68Fm4c9QqAADewWfuAACYDOEOAIDJEO4AAJgM4Q4AgMkQ7gAAmAzhDgCA\nyRDuAACYDOEOAIDJEO4AAJgM4Q4AgMkQ7gAAmIzLcJ88ebJ69uyplJSU816/evVqDRkyREOHDtVd\nd92l/fv3e7xJAADgPpfhPnz4cKWlpV3w+ssvv1zvv/++Vq5cqUceeURTpkzxaIMAAKB2XIZ7QkKC\nIiIiLnh9165d1axZs+qfc3JyPNcdAHiMxdcNAPXGo5+5L1myRImJiZ4sCQAeEWT12RmugXrnsb/2\nbdu26aOPPtL//u//eqokAHhMkDXE1y0A9cYj4b5v3z49//zzmj9/vpo3b+6JkgDgUYQ7GhK3puUN\nw7jgdVlZWZowYYJmzZqldu3aeawxAPAkpuXRkLj8a3/qqae0fft2HT9+XLfddpsee+wxVVRUyGKx\naPTo0XrzzTdVWFioF198UYZhKDg4WEuXLq2P3gHAbYzc0ZC4DPfZs2df9Prp06dr+vTpHmsIADyt\n5JRDQUFWX7cB1BtWqANgenmFFb5uAahXhDsA0yPc0dAQ7gBML5dwRwNDuAMwvbzCiot+6wcwG8Id\ngKk5DUN5JyrkdFb6uhWg3hDuAEytoPCkyisNOR1MzaPhINwBmNrhvBJJItzRoBDuAEzNnl8sSXI6\nmJZHw0G4AzA1Ru5oiAh3AKZ2OK9YIVaLDKfD160A9YZwB2BalQ6nso+WytacdeXRsBDuAEwru6BU\nDqdBuKPBIdwBmJb9l8/boyMIdzQshDsA0zqcd/pIeUbuaGgIdwCmVT1yJ9zRwBDuAEzrcF6xmjUN\nUVhjzuWOhiXY1w0AgDeUnapUfuFJdWnfwtetAG7r27evwsPDFRQUpODgYC1dulSFhYV64oknZLfb\n1bZtW/33f/+3mjVrdtE6jNwBmFLW0dNT8rFRYT7uBHCfxWLRokWLtGLFCi1dulSSNG/ePP3mN7/R\nunXrdMstt+jtt992WYdwB2BKVZ+3t40O93EngPsMw5DT6ayxbcOGDUpNTZUkpaam6u9//7vLOoQ7\nAFOqOlI+1sbIHYHDYrHo/vvv14gRI7RkyRJJ0tGjRxUVFSVJstlsOnbsmMs6fOYOwJSqRu5tWoYp\n67CPmwHc9MEHH8hms6mgoEBjx47VlVdeKYvFUus6Pgv3N57uo/atI3x19wBMLutoiWIim6pd2xZq\n17aFPu9xo69bAlyy2WySpMjISPXr10+7d+9Wy5YtlZ+fr6ioKOXl5SkyMtJlHZ+F+/hXPz9nW/Ex\nu2aO66G4uKt80BEAszhRUq7C4nJd2TFCeXlFysj4QZPmbVN4i1iXt+V9CN5ks134KPeysjI5nU6F\nhYWptLRUW7Zs0fjx49W3b1999NFHGjdunJYvX66kpCSX98O0PADT4fN2BKL8/HyNHz9eFotFDodD\nKSkp6tWrl6677jr9+7//u5YtW6Y2bdrotddec1mLcAdgOlWftxPuCCSXX365Vq5cec72yy67TO++\n+26tanG0PADTqRq5t7XxNTg0TIQ7ANOx55fIGmRRq8imvm4F8AnCHYCpOA1D9rwStWrZVMFW3uLQ\nMPGXD8BU8gtP6lSFgyl5NGguw33y5Mnq2bOnUlJSLrjP9OnTNWDAAA0dOlR79+71aIMAUBv2qiPl\nWVMeDZjLcB8+fLjS0tIueP2mTZuUmZmp9evX66WXXtILL7zg0QYBoDYOV60pz8gdDZjLcE9ISFBE\nxIVXktuwYYOGDRsmSbrhhhtUVFSk/Px8z3UIALVgrz5SnpE7Gq46f+aem5urVq1aVV+OiYlRTk5O\nXcsCwCWx55WoUahVkc0b+7oVwGfqHO6GYZyz7VIWuQeAuqp0OJVdUKq2UWEK4n0IDVidwz0mJkbZ\n2dnVl7OzsxUdHV3XsgBQa9lHS+VwGqxMhwbPrXA/3+i8SlJSklasWCFJ+uabbxQREVF93lkAqE+/\nrinPwXRo2FyuLf/UU09p+/btOn78uG677TY99thjqqiokMVi0ejRo3Xrrbdq06ZN6t+/v5o0aaKZ\nM2fWR98AcA57/i9HyvM1ODRwLsN99uzZLos8//zzHmkGAOricO4vI/doRu5o2FihDoBp2PNLFBEW\nqoimob5uBfApwh2AKZSdqlR+4UlWpgNEuAMwiax8VqYDqhDuAEzh1yPlGbkDhDsAU2BNeeBXhDsA\nU+BscMCvCHcAAc8wDB3OK5HtssZqFGr1dTuAzxHuAALeidIKFZdVMCUP/IJwBxDwOJgOqIlwBxDw\n7LlV53Bn5A5IhDsAEzj8y3fcOZgOOI1wBxDw7HnFsgZZFBPZ1NetAH6BcAcQ0JyGIXt+iVq3DFOw\nlbc0QCLcAQS4/ONlKq9wqi0H0wHVCHcAAc3+y8p0HCkP/IpwBxDQfv0aHEfKA1UIdwAB7dc15Rm5\nA1UIdwABzZ5fosahVrWMaOzrVgC/QbgDCFgVlU5lHy1VrC1MFovF1+0AfoNwBxCwsgtK5TQMVqYD\nzkK4AwhYhznNK3BehDuAgGWvPpiOkTtwJsIdQMDibHDA+RHuAAKWPa9YzcNC1axpqK9bAfwK4Q4g\nIJWdqtTRE6f4fjtwHoQ7gID067KzfN4OnI1wBxCQDufzeTtwIYQ7gIBkz+VIeeBCCHcAAcmeXyyL\npDZ8xx04B+EOIOAYhqHDeSWytWiiRiFWX7cD+B23wn3z5s0aNGiQBg4cqHnz5p1z/ZEjR/T73/9e\nqampGjp0qDZt2uTxRgGgSmFJuYrLKliZDqbkdDo1bNgwPfzww5KkL7/8UsOHD9ewYcN0991369Ch\nQy5ruAx3p9OpadOmKS0tTWvWrFF6eroyMjJq7PPWW29p8ODBWr58uebMmaMXX3zxEn8lAHCNlelg\nZgsXLlTHjh2rL0+dOlWzZ8/WihUrlJycrDfffNNlDZfhvnv3brVv316xsbEKCQlRcnKyNmzYUGMf\ni8Wi4uLTR66eOHFCMTExtf1dAMBtrEwHs8rOztamTZs0atSo6m1BQUEqKiqSJBUXFys6OtplnWBX\nO+Tk5Kh169bVl2NiYvTtt9/W2Gf8+PEaO3asFi1apJMnT+qdd95x+xcBgNpi5A6zmjFjhiZOnFgd\n5pI0bdo0Pfjgg2rSpInCw8P1t7/9zWUdl+FuGIbLIunp6RoxYoT+8Ic/6JtvvtEzzzyj9PT0i97m\njaf7qH3rCJe1AeBs2cfLFGwN0nVXR8tqdX3okM12oz7vcWM9dAZcuo0bNyoqKkpdunTR9u3bq7e/\n9957mj9/vuLj47VgwQLNnDlT06dPv2gtl+HeqlUrZWVlVV/Oyck5Z0pg6dKlSktLkyR17dpVp06d\nUkFBgSIjIy9Yd/yrn5+zrfiYXTPH9VBc3FWu2gLQQDmdhjKPnFDrlk1VUFDi1m0yMn7QpHnbFN4i\n1uW+vA/Bm2y2Zhe8bteuXfrss8+0adMmnTp1SiUlJXrooYf0888/Kz4+XpJ0++2368EHH3R5Py7/\n5Y2Pj1dmZqbsdrvKy8uVnp6upKSkGvu0adNGW7dulSRlZGSovLz8osEOAJcqr7BM5ZVO1pSH6Tz5\n5JPauHGjNmzYoDlz5uiWW27RW2+9paKiIh08eFCStGXLFnXo0MFlLZcjd6vVqilTpmjs2LEyDEMj\nR45UXFycXn/9dcXHx6tPnz569tln9Z//+Z969913FRQUpJdffrnuvyUAnMfhXNaUR8MRFBSkadOm\nafz48bJarYqIiNCMGTNc3s5luEtSYmKiEhMTa2ybMGFC9c9xcXH661//WsuWAaD27L+sKc/IHWbW\nvXt3de/eXZLUr18/9evXr1a3Z4U6AAHlcNXZ4KIYuQMX4tbIHf7N4XDowIGf3N7/iis6yGplyU4E\nJntesZo0ClZkRCNftwL4LcLdBA4c+EmPv7JKTZu7XtigtDBXrz0zhCOBEZAqKp3KKShTh9gIWSwW\nX7cD+C3C3SSaNo9262s+QCA7crRETsNQW9aUBy6Kz9wBBIyqlek4Uh64OMIdQMCoWlOeI+WBiyPc\nAQQMez4jd8AdhDuAgHE4r1iXhYcqvEmIr1sB/BrhDiAglJ6sVMGJU4zaATcQ7gACAivTAe4j3AEE\nBFamA9xHuAMICPaqI+WjGbkDrhDuAALC4bwSWSS1bkm4A64Q7gD8nmEYsucVK7pFEzUK4bwIgCuE\nOwC/d7y4XCUnK9WWI+UBtxDuAPxe1eftsRwpD7iFcAfg96qOlGfkDriHcAfg9xi5A7VDuAPwe4fz\nSxRsDVJ0iya+bgUICIQ7AL/mdBrKyi9Rm6imsgbxlgW4g1cKAL+Wd7xMFZVOVqYDaoFwB+DXDrMy\nHVBrhDsAv8aa8kDtBfu6AcDbTpSW68fDhXI6DV+3gkuw90CBJM4GB9QG4Q5TKiw+pV3/ytPO/Xna\nl3lMBrke0MKbhKhFs0a+bgMIGIQ7TONY0Sn9c3+udu7P0w+Hjqsqz+NiI3RDXJSaNOLPPVBd2TpC\nFovF120AAYN3OwS0o4UnqwP9R3uhJMki6aq2zXVT52jddLVNkRGNfdskANQzwh0BJ/d42elA35en\nn4+ckCRZLFLndpcpoXO0brzapsvCmcIF0HAR7ggI2QWl1YF+MKdIkhRksejaK1rops7RuvEqmyLC\nQn3cJQD4B7fCffPmzZoxY4YMw9CIESM0bty4c/ZZu3at/vznPysoKEidOnXSq6++6vFm0bBk5Zdo\n5y+BXvVdZ2uQRfEdWiqhk03drrYpvEmIj7sEAP/jMtydTqemTZumd999V9HR0Ro5cqSSkpIUFxdX\nvc/Bgwc1f/58/e1vf1N4eLgKCgq82jTMwTjrEHZDUlbeL4G+P09Z+ae/3xxstahrxyjd1MmmrldF\nKawxgQ4AF+My3Hfv3q327dsrNjZWkpScnKwNGzbUCPcPP/xQv/vd7xQefnqRicjISC+1i7oKu6yN\nZiw9JIsOX3INQ+f5Xplx0Yu1FhIcpBuvtimhk003dORIdwCoDZfvmDk5OWrdunX15ZiYGH377bc1\n9jlw4IAk6a677pJhGPrjH/+o3r17e7ZTeITTUa7Lo5qpcZO6nV3rfF9KOmfbeb66dPaWs3eJCAvV\njVfbFN+hJYEOAJfI5bvn2VOn5+NwOJSZman3339fWVlZuvvuu5Wenl49kof/KCvK1713dVRc3FW+\nbgUA4CUu15Zv1aqVsrKyqi/n5OQoOjq6xj4xMTFKSkpSUFCQ2rZtqyuvvLJ6NA8AAOqXy3CPj49X\nZmam7Ha7ysvLlZ6erqSkpBr79OvXT9u2bZMkFRQU6ODBg7r88su90zEAALgol9PyVqtVU6ZM0dix\nY2UYhkaOHKm4uDi9/vrrio+PV58+fdS7d2998cUXSk5OltVq1cSJE9W8efP66B8AAJzFrSOWEhMT\nlZiYWGPbhAkTalx+7rnn9Nxzz3muMwAAcEk4nzsAACZDuAMAYDKEOwAAJkO4AwBgMoS7aZxvzTgA\nQKBxOp1KTU3Vww8/XL3tv/7rvzRw4EAlJydr8eLFLmuwvqdJBIc29nULAAAPWLhwoeLi4lRcfPps\nmMuWLVNOTo7WrVsnSW6dnI2Ru0kEhzT1dQsAgDrKzs7Wpk2bNGrUqOptH3zwgf74xz9WX3bn5GyE\nuwmUlTtlDWnk6zYAAHU0Y8YMTZw4UZYzzqqVmZmp9PR0jRgxQuPGjdPBgwdd1iHcTeBfWWU1/hAA\nAIFn48aNioqKUpcuXWqctK28vFyNGzfWsmXLNGrUKE2ePNllLYvhzmnfvODgkRNq3zrCF3dtOi/8\n5Uvt2pereZP6qXVUmK/bAQBcgjlz5mjVqlWyWq06deqUSkpK1K9fP33//feaP3++2rRpI0lKSEjQ\nzp07L1rLZ+Ge8tTKc7YVH7Nr5rgenI60ForLKvTvr/9DFRXlsgaHut6fxxgNUEbGD5o0b5vCW8S6\n3JfXCLzJZmvm1n47duzQggULNHfuXM2ZM0ft27fXiBEjtH37dr366qtasmTJRW/P0fIBbte/8uQ0\npMryMrfCHQAQWB588EE9/fTTevfddxUWFqbp06e7vA3hHuC+2psjSaqsKFMjcSY+ADCD7t27q3v3\n7pKkZs2a6e23367V7TmgLoCdKC3X3oPH1SYyVIbT4et2AAB+gnAPYLv258lpGLrmcr7jDgD4FeEe\nwL7alytJ6tK2iY87AQD4E8I9QBWWlGtf5jF1jG2uiKYcOgEA+BXhHqD+uT9XhiHd3Dna160AAPwM\n4R6gduzNlUVSAuEOADgL4R6AjhWd0g+Hjuuqts3VohlrygMAaiLcA9A/9+fKkHRzlxhftwIA8EOE\newDase+XKflONl+3AgDwQ4R7gCk4cVI/Hi5Up3aXqXk4U/IAgHMR7gFm5y/fbecoeQDAhRDuAear\nfbmyWKSbOhHuAIDzI9wDSH5hmTKyTqhzuxaKCOMMcACA8yPcA8jOfXmSpO5dGLUDAC6McA8gX+3L\nUZDFohuv5ih5AMCFEe4BIvd4mX4+UqRrrmihZk2ZkgcAXJhb4b5582YNGjRIAwcO1Lx58y643yef\nfKLOnTvr+++/91iDOI2j5AEA7nIZ7k6nU9OmTVNaWprWrFmj9PR0ZWRknLNfSUmJFi9erK5du3ql\n0Ybuq725sgZZ1I0peQCACy7Dfffu3Wrfvr1iY2MVEhKi5ORkbdiw4Zz9XnvtNT344IMKCQnxSqMN\nWc6xUh3MKdK1V0YqvAmPLwDg4lyGe05Ojlq3bl19OSYmRrm5uTX22bt3r7Kzs3Xrrbd6vkPoq71M\nyQMA3BfsagfDMFxeP2PGDL388stu3wa1s2NvroKtFnW7KsrXrQAAAoDLkXurVq2UlZVVfTknJ0fR\n0b+OIEtKSvTjjz9qzJgx6tu3r/7v//5Pjz76KAfVeciRoyU6nFes665sqaaNmZIHALjmcuQeHx+v\nzMxM2e122Ww2paena86cOdXXh4eH68svv6y+PGbMGE2aNEnXXHONdzpuYJiSBwDUlstwt1qtmjJl\nisaOHSvDMDRy5EjFxcXp9ddfV3x8vPr06VNjf4vFwrS8B321L1fB1iB1ZUoeQAPkcDh04MBPtbrN\nFVd0kNVq9VJHgcFluEtSYmKiEhMTa2ybMGHCefdduHBh3buCJMmeVyx7fom6XRWlJo3ceqoAwFQO\nHPhJj7+ySk2buzd7WVqYq9eeGaK4uKu83Jl/IzH82Fe/LFzTvUuMjzsBAN9p2jxa4S1ifd1GQGH5\nWT9lGIa+2per0OAg3dCxpa/bAQAEEMLdTx3OK9GRo6W6Pq6lGocywQIAcB/h7qe+2pcjSbqZKXkA\nQC0R7n7IMAx9tTdXoSFBur4DU/IAgNoh3P1QZk6xco6VqWvHKDUKbdhf5wAA1B7h7oe+4vSuAIA6\nINz9jGENKQzkAAAQJUlEQVQY2rE3R41CrYpnSh4AcAkIdz9zILtI+YUn1a1jlEJDmJIHANQe4e5n\nmJIHANQV4e5Hqo6Sb9LIqus6RPq6HQBAgCLc/chPR07o6ImT6trRppBgpuQBAJeGcPcjVad37d6F\nKXkAwKUj3P2E85e15Js2Cta1VzIlDwC4dIS7n8iwF+pY0SndeLVNwVaeFgDApSNF/ETVlPzNTMkD\nAOqIcPcDTsPQV/tzFdY4WF3at/B1OwCAAEe4+4EfDh1XYXG5burElDwANHROp1Opqal6+OGHa2yf\nNm2aunXr5lYNksQP/LpwDad3BYCGbuHChYqLi6ux7bvvvlNRUZEsFotbNQh3H3M6De3cn6fwJiHq\n3P4yX7cDAPCh7Oxsbdq0SaNGjare5nQ6NWvWLE2cONHtOoS7j+0/dFwnSsqV0MkmaxBPBwA0ZDNm\nzNDEiRNrjNAXL16sfv36KSoqSoZhuFUn2FsNXoqmETF66+MjCgnJv+Qa7v3a/qP0ZKUk1pIHgIZu\n48aNioqKUpcuXbR9+3ZJUm5urj755BMtXry4VrUshrv/BnjYmBc+kfcGqu59JuEv2rVqpqkP/kbW\noMDqGwDgOXPmzNGqVatktVp16tQplZSUKDQ0VCEhIWrUqJEMw9CRI0fUrl07rVu37qK1fBbuKU+t\nPGdb8TG7Zo7robi4q3zQUeDKyPhBk+ZtU3iLWJf78hijIeI1Erhq89xJ/v/82WzN3Npvx44dWrBg\ngebOnVtje7du3fT111+7vD0f8gIAECDcPVrerz5zBwAAUvfu3dW9e/dztu/atcut2zNyBwDAZAh3\nAABMhnAHAMBkCHcAAEzGrXDfvHmzBg0apIEDB2revHnnXP/uu+8qOTlZQ4cO1X333acjR454vFEA\nAOAel+HudDo1bdo0paWlac2aNUpPT1dGRkaNfa655hp99NFHWrlypQYMGKBZs2Z5rWEAAHBxLsN9\n9+7dat++vWJjYxUSEqLk5GRt2LChxj7du3dXo0aNJEldu3ZVTk6Od7oFAAAuuQz3nJwctW7duvpy\nTEyMcnNzL7j/0qVLlZiY6JnuAABArblcxKY2q9OuXLlS33//vRYtWlSnpgAAwKVzGe6tWrVSVlZW\n9eWcnBxFR597BrOtW7dq3rx5Wrx4sUJCQjzbJQAAcJvLafn4+HhlZmbKbrervLxc6enpSkpKqrHP\nnj179MILL+itt95SixYtvNYsAABwzeXI3Wq1asqUKRo7dqwMw9DIkSMVFxen119/XfHx8erTp49e\neeUVlZWV6fHHH5dhGGrTpo3efPPN+ugfAACcxa0TxyQmJp5zkNyECROqf37nnXc82xUAALhkrFAH\nAIDJEO4AAJgM4Q4AgMkQ7gAAmAzhDgCAyRDuAACYDOEOAIDJEO4AAJgM4Q4AgMkQ7gAAmAzhDgCA\nyRDuAACYDOEOAIDJEO4AAJgM4Q4AgMkQ7gAAmAzhDgCAyQT7ugHAkxwOhw4c+Mnt/a+4ooOsVqsX\nOwKA+ke4w1QOHPhJj7+ySk2bR7vct7QwV689M0RxcVfVQ2cAUH8Id5hO0+bRCm8R6+s2AMBn+Mwd\nAACTIdwBADAZwh0AAJMh3AEAMBnCHQAAkyHcAQAwGcIdAACTIdwBADAZwh0AAJNxK9w3b96sQYMG\naeDAgZo3b94515eXl+uJJ57QgAEDNHr0aGVlZXm8UQAAGgKn06lhw4bp4YcfliQ9/fTTGjRokFJS\nUvQf//EfcjgcLmu4DHen06lp06YpLS1Na9asUXp6ujIyMmrss3TpUjVv3lzr16/Xvffeq1deeeUS\nfyUAABq2hQsXqmPHjtWXhwwZok8++USrV6/WyZMntWTJEpc1XIb77t271b59e8XGxiokJETJycna\nsGFDjX02bNig1NRUSdLAgQP15Zdf1vZ3AQCgwcvOztamTZs0atSo6m2JiYnVP8fHxys7O9tlHZfh\nnpOTo9atW1dfjomJUW5ubo19cnNz1apVK0mS1WpVRESEjh8/7vq3AAAA1WbMmKGJEyfKYrGcc11l\nZaVWrVql3r17u6zj8qxwhmG4LHL2PoZhnLexMxUfs5+zrbQw9zx7XlhGxg9u71ub03p6q643a7v7\n2PnLY+zN2jwWDaNubWsH2t+FNx8Lf+jZG89dbfeV6v+xsNluvOBtNm7cqKioKHXp0kXbt28/5/oX\nX3xRN998s2666SaX928xXKT3N998o//5n/9RWlqaJFUfUDdu3LjqfR544AE99thjuuGGG+RwONSr\nVy+m5gEAqIU5c+Zo1apVslqtOnXqlEpKStS/f3/NmjVLb7zxhvbt26c33njDrVoup+Xj4+OVmZkp\nu92u8vJypaenKykpqcY+ffr00fLlyyVJn3zyiXr06HEJvxYAAA3Xk08+qY0bN2rDhg2aM2eObrnl\nFs2aNUtLlizRli1bNGfOHLdruZyWt1qtmjJlisaOHSvDMDRy5EjFxcXp9ddfV3x8vPr06aNRo0bp\nmWee0YABA3TZZZfVqgEAAHBhU6dOVWxsrO68805ZLBb1799fjz766EVv43JaHgAABBZWqAMAwGQI\ndwAATIZwBwDAZOo13Lt16+a1ujt27Kheh9fTtb3hfHUnTZqk9evXe7yupwTi81dVPy8vT48//rjf\n162Px8JutyslJcUjtbypPl97Dbm2t3vOysrSmjVrPFrTGwLxubuYeg13Vwvb1KWuN2tT17u1vfn8\nVdW32Wx67bXX/L5ufTwWnqrvzT69WT8QXyPnq+10Or1S15MsFosOHz7s0XDn78I99T4tX1ZWpj/8\n4Q8aPny4hgwZcs469Zei6oD/4uJiPfTQQxo0aJCmTp1a57pn+stf/qKUlBQNGzbMo1/1e+mll3T7\n7bdr7NixOnr0qMfqpqWlaeTIkRo6dKjbix64a+rUqRo8eLDuv/9+jRs3rs6zDVXPX0lJiSZMmKDb\nb79dzzzzjCdareap0aq365795ZXdu3crNTVVhw8f9th9eIphGPrqq680ZswYPfroo+rfv79mz56t\n1atXa9SoURoyZIgOHTrkkfre+Lt4+eWXlZKSoiFDhmjt2rUeqyvVfF174jUi/fp43H333XrkkUc0\nePBgD3R6Wl5enu655x6lpqYqJSVF//znPz1S1zAMzZkzRzt37lRqaqree+89j9Q8e3Zr2rRpWrFi\nRZ3rPvnkk9q8eXP1tkmTJunTTz+tU92q2s8++2yNvHv66af1+eef17n2hbj8nrunNWrUSH/+858V\nFhamY8eOafTo0ecsilNbFotFhmFo9+7d+vjjj9WmTRvdf//9Wr9+vQYMGFCn2oZhaPPmzfrss8+0\nbNkyhYaG6sSJE3WqWVX3008/1cGDB/Xxxx8rNzdXycnJGjlyZJ3rfvHFFzp48KCWLl0qwzD0yCOP\naOfOnUpISKhz7XXr1unIkSNau3at8vPzNXjw4Dr3XPX87du3T+np6bLZbLrrrru0a9cu3XjjhZdq\nrO19eIM3/yP/+uuvNX36dM2dO1cxMTFeu59LVfW87d+/Xx9//LGaNWumfv366c4779SSJUu0cOFC\nLV68WJMmTapTfU//XRiGofXr1+tf//qXVq9eraNHj2rkyJHq3r27oqKiLrluVW1vvK6lXx+PPXv2\nKD09XW3atKlzTel0z+np6erdu7ceeughGYahsrIyj9SWpKeeekppaWmaO3eux2p663U3ePBgrV27\nVomJiaqoqNC2bdv04osveqT2qFGj9M477ygpKUnFxcX65ptvNGvWLI/UPp96H7kbhqHZs2dryJAh\nuu+++5Sbm1vnEWvVWvY33HCDYmNjZbFYlJyc7LH/Pr/88ksNHz5coaGhkqSIiAiP1N25c6eSk5Ml\nSdHR0R5b2W/Lli364osvlJqaqtTUVP388886ePCgR2rv2rVLgwYNkiRFRUXplltuqXPNqufv+uuv\nV3R0tCwWizp37iy7/dzzDzQUGRkZev755/022KVfn7f4+Hi1bNlSoaGhateunX77299Kkq6++uo6\nPYfe/LvYtWtX9WuvZcuW6t69u7799ts615W897o+8/HwVLBXue6667Rs2TK98cYb2r9/v5o2berR\n+oHAYrEoMTFR27ZtU0VFhTZv3qyEhITq9/261k5ISFBmZqYKCgq0Zs0aDRgwQEFB3ovgeh25G4ah\nVatW6fjx41qxYoWCgoLUt29fnTp1yiv354vPOWrLWz0+9NBDuvPOOz1e15trHoWEhFT/bLVa5XA4\nvHZf/s5ms6m8vFx79uzRrbfe6ut2LurM581isVS/GQYFBamystKj9T31d3G+k115kjffe5o0aeLR\nelXB8/7772vjxo167rnndN9992no0KEevR9PslqtNY458FSGhIaG6pZbbtE//vEPrV271uMf5Q0d\nOlSrVq3S2rVrNXPmTI/WPlu9j9yLi4sVGRmpoKAgbdu2TVlZWXWueea0vN1ul9Pp1Nq1a906c447\nfvvb32rZsmU6efKkJKmwsNAjdW+++WatWbNGTqdTubm55z0L0KXo3bu3li5dqtLSUkmnT9tbUFDg\nkdo33XST1q1bJ8MwlJ+frx07dnikLmqKiIjQvHnzNGfOHB5jL7j55puVnp4up9OpgoIC7dy5U9df\nf73Hanvjde0thmHoyJEjioyM1KhRozRq1Cjt2bPHI7UtFovCwsJUUlLikXpVNWNjY/Xjjz+qoqJC\nRUVFHj1R2eDBg/XRRx9p165d6tWrl8fqSlJqaqoWLlwoi8WiuLg4j9Y+W72N3B0Ohxo1aqSUlBQ9\n/PDDGjJkiK677ro6/4IOh0MhISHV01XTpk3TwYMH1aNHD/Xv398jPffq1Ut79uzRiBEjFBoaqsTE\nRD3xxBN1rtuvXz99+eWXSk5OVps2ber8dYmquj179tSPP/6o0aNHS5LCwsL0yiuvKDIyss61Bw4c\nqK1btyo5OVmtW7fWtddeq2bNmtW57tk8NfJxOBwemVarj7pnPxaRkZGaO3euxo0bpz/96U91Dh9P\n9lwfz5s36p/52tu1a5eGDh0qi8WiiRMnqmXLlh6r7cnX9Zm1Pa3qb2L79u1KS0tTcHCwwsLC9PLL\nL3usdqdOnWS1WjVs2DClpqbq3nvvrXPNmJgY3X777brjjjvUtm1bXXvttR7pVTo9mHv22WeVlJSk\n4OC6R+SZtVu2bKkOHTrUOZvcYtSTvXv3GqNGjQqYut6sHWh1z65dUlJiGIZhHDt2zOjfv7+Rn5/v\nkbre4O3H+rvvvjPuuecej9b0Fk/WD6Re66PuxWo/99xzxrp167xSu64C7f0z0B+H0tJSo3///kZR\nUZFX7utM9RLuf/3rX43k5GRj69atAVHXm7UDre75at9zzz3G0KFDjeTkZGP58uUeq+tp3n6s58+f\nbwwcOND49NNPPVYzEB6LQOq1Puq6ql3XcDfb4+FPNb1Z98zaX3zxhbF161bjtttuMxYuXOjx+zkf\nzgoHAIDJsLY8AAAmQ7gDAGAyhDsAACZDuAMAYDKEOwAAJkO4AwBgMv8PtThcLKoRnJUAAAAASUVO\nRK5CYII=\n", 203 | "text/plain": [ 204 | "" 205 | ] 206 | }, 207 | "metadata": {}, 208 | "output_type": "display_data" 209 | } 210 | ], 211 | "source": [ 212 | "j = df.select(lambda x: df.name.values[x].startswith(\"J\"))\n", 213 | "pareto(j, 2)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "Now the line crosses 50% at names beginning with Jo, so let’s zoom in again." 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 7, 226 | "metadata": { 227 | "collapsed": false, 228 | "deletable": true, 229 | "editable": true 230 | }, 231 | "outputs": [ 232 | { 233 | "data": { 234 | "text/plain": [ 235 | "" 236 | ] 237 | }, 238 | "execution_count": 7, 239 | "metadata": {}, 240 | "output_type": "execute_result" 241 | }, 242 | { 243 | "data": { 244 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhMAAAFXCAYAAAAYgszDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XtclHXe//HXzAAKCChnPKZQ6ZbaQVMXs1ZUUkPBQ3Xv\n1t2qt3ZYa8s1d21/3bZZ+tgsW++tvTc7bLnrfXevB7grNU1M1DxUehuuZVueUJSTHBxAOc31+4OY\ntJAZmIGBmffz8Sjhy3V9rg8wMG+u6zvX12QYhoGIiIhIC5k93YCIiIh0bAoTIiIi4hKFCREREXGJ\nwoSIiIi4RGFCREREXKIwISIiIi5xKkxYrVYeffRRxo8fz8SJE/n8888pKytj5syZJCcnM2vWLKxW\nq337Z599lnHjxjF58mS+/PJL+3h6ejrJyckkJyeTkZFhHz98+DApKSkkJyfz3HPP2cdbcgwRERFx\nzujRo5k0aRKpqalMmzYNgOeff57x48czefJkHnnkEcrLyx0XMpzw61//2li7dq1hGIZRU1NjnD9/\n3nj++eeNlStXGoZhGK+++qqxbNkywzAMY/v27cbs2bMNwzCMgwcPGtOnTzcMwzBKS0uNpKQk4/z5\n80ZZWZn9bcMwjGnTphmff/65YRiG8W//9m/Gjh07DMMwmn0MERERcd7o0aON0tLSy8Y+/vhjo66u\nzjAMw1i2bJnxwgsvOKzj8MxEeXk5n332GVOnTgXAz8+PkJAQMjMzSUtLAyAtLY3MzEwAMjMzSU1N\nBWDw4MFYrVaKiorYtWsXiYmJhISEEBoaSmJiIjt37qSwsJCKigoGDRoEQGpqKlu3brXXas4xRERE\nxHmGYWCz2S4b+/GPf4zZXB8PbrjhBvLy8hzWcRgmTp8+Tbdu3Vi4cCFpaWk89dRTXLhwgXPnzhEZ\nGQlAVFQUxcXFABQUFBAbG2vfPzY2lvz8fPLz84mLi7OPx8TE2Mcv3b5hHHD6GJfuIyIiIs4xmUzM\nmjWLqVOn8ve///0HH1+7di2jRo1yWMdhmKitreWLL77gpz/9Kenp6QQGBrJy5UpMJlOj2xvfuzu3\nYRiYTKYfjDd8Elcab0pL9hEREZHLvfPOO6xfv57XXnuN1atX89lnn9k/9p//+Z/4+/uTkpLisI7D\nMBEbG0tsbCwDBw4EYNy4cXzxxRdERETYLy0UFhYSHh4O1J8luPSUSF5eHtHR0cTGxnLmzJlGx8+e\nPWsfz8/PJzo6GoDIyMhmHaMptbV1jj5VERERnxIVFQVAeHg4Y8eO5dChQ0D9CyaysrJ48cUXnarj\n52iDyMhI4uLiOH78OH379mXv3r0kJCSQkJDA+vXrmTNnDunp6SQlJQGQlJTE6tWrmTBhAgcPHiQ0\nNJTIyEhGjhzJSy+9hNVqxWazsXv3bubPn09oaChdunQhOzubgQMHkpGRwX333QfUzzJtzjGaUlJS\n6dQXRERExBtERYU0+fELFy5gs9kIDg6msrKSXbt2MXfuXHbs2MHrr7/O3/72NwICApw6lslo7JrB\n9xw5coTf/va31NbW0qtXL5YuXUpdXR2PPfYYZ8+epXv37qxYsYLQ0FAAnnnmGXbu3ElgYCBLly7l\nuuuuA2D9+vX8+c9/xmQy8dBDD9knUf7jH/9g4cKFVFVVMWrUKP7f//t/AJSWljb7GFdSWGht8uMi\nIiLexFGYOHXqFHPnzsVkMlFXV0dKSgpz5sxh3Lhx1NTU0LVrV6D+hQ5PP/10k7WcChPeQGFCRER8\niaMw4U66A6aIiIi4xOGcCREREWld5yurOXHWCtRfLGi4ZnDZpQPj0jeNK4x/J7kNz0woTIiIiHjI\nhapatnx6ig8+yaGq2r2vOkxO7OfWek1RmBAREWljtXU2dnx+hnd3Hed8ZQ2hQf6MHdKLwADLdxuZ\nLn3zh/dSMpka3fTyD7QRhQkREZE2YhgGn31VyLqsoxSUXKCTv4XJI/uSfEsvOgd03Kfkjtu5iIhI\nB/JVTgl//+gox8+ex2I2MfqmHqQk9iUs2Ll7ObRnChMiIiKt6HRBOWuzjpJ99BwAQ/pHM3VUP2LC\ngzzcmfsoTIiIiLSC4vMXSd95jN2H8jCA/r27Mu32BPp1D/V0a26nMCEiIuJGFRdr2LDnJFs/O01t\nnY0eUcFMvz2egf0ivHZRSoUJERERN6iprSNzfy4b9pyg4mIt4aGdSLu1HyOui8Vs9s4Q0UBhQkRE\nxAU2m8Gew3mk7zxG8fkqgjv7cddPEki6uQf+fhbHBbyAwoSIiEgLGIbBoWPnWLv9KKcLK/CzmLlj\nWG8mjuhDcGd/T7fXphQmRESkw7HZDOpsNmrrDGrr6v+tq7NRa6t/v84+Xj9WV2fDncta1tTa2Hbg\nNEdySjEBiQNjSbu1H+Ghnd13kA5EYUJExIfYDIOMnccpsV50a13DqP9L3dbwr83AMOqP1/CvzTAw\nbJdsY//3221sl29rDwgNYcHWEBrqP94eDIqPYNpt8fSM7uLpVjxKYUJExId8c7qM93ef8HQbdiYT\nmE0mTCYTZhOYzPX/mk0mLBYzfhYTAX4WAjuZ8Pv2fYvFjJ+54X0zFsu3b5u/28c+bv5uH3e/kCK+\nexjX9Orq3qIdlMKEiIgPOZJTAsDPx/dnQJ9ubqtrAszmS0KByfTt+w1h4ZLQYK7/uAm89qWSvkZh\nQkTEhxw5WYIJuOmaKLoE+tYkQWk9Zk83ICIibaOmto6jZ87TM7qLgoS4lcKEiIiPOHbmPDW1Nvr3\ndt/lDRFQmBAR8RlHckoB6N9HkwbFvRQmRER8RMN8Cb0CQdxNYUJExAdU19Rx9EwZvWNCfO7ujNL6\n9GoOEaCuro4TJ445te1VV/XDYvGN++2L9zh65jy1dQbX9tZZCXE/hQkR4MSJY/xy2bsEhUU3uV1l\nWQErnphEfPzVbdSZiHscOVl/f4n+bry3hEgDhQmRbwWFRdOlWw9PtyHSKr7KKcFkgmt66syEuJ/m\nTIiIeLmqmvr7S/SJCSGos/6GFPdTmBAR8XLf5JZRZzN0iUNajcKEiIiX++rb9Tj6a/KltBKFCRER\nL3fkZClmk4mrNV9CWonChIiIF6uqruP42fP0iQ0hsJPmS0jrUJgQEfFiX+eWfjtfQmclpPUoTIiI\neLGvvl2PY4AW95JWpDAhIuLFjpwswWI2kdAzzNOtiBdTmBAR8VIXqmo5ftbKVXEhdA7QfAlpPQoT\nIiJe6pvcMmyGQX9d4pBWpjAhIuKl7OtxKExIK1OYEBHxUkdySuvnS/TQfAlpXQoTIiJe6EJVLSfz\nrPTtHkqnAIun2xEvpzAhIuKFvj5dqvkS0mYUJkREvNCRk/X3l9B6HNIWFCZERLzQlzkl+FlMxGu+\nhLQBhQkRES9TebGGnHwr/eJC6eSv+RLS+pwKE6NHj2bSpEmkpqYybdo0AMrKypg5cybJycnMmjUL\nq9Vq3/7ZZ59l3LhxTJ48mS+//NI+np6eTnJyMsnJyWRkZNjHDx8+TEpKCsnJyTz33HP28ZYcQ0TE\n1/3zVBmGAf37aL6ENK2x5/cPPviAO++8kwEDBnD48GGn6jgVJkwmE3/961/JyMhg7dq1AKxcuZIR\nI0awefNmhg0bxquvvgpAVlYWOTk5bNmyhWeeeYZFixYB9cHglVdeYe3ataxZs4aXX37ZHg6efvpp\nnnvuOTZv3syJEyfYuXNni44hIiJwJKf+/hLXavKlONDY8/s111zDyy+/zNChQ52u41SYMAwDm812\n2VhmZiZpaWkApKWlkZmZaR9PTU0FYPDgwVitVoqKiti1axeJiYmEhIQQGhpKYmIiO3fupLCwkIqK\nCgYNGgRAamoqW7dubdExRESkPkz4Wcwk9Aj1dCvSzjX2/N6vXz+uuuoqDMNwuo7TZyZmzZrF1KlT\nWbNmDQDnzp0jMjISgKioKIqLiwEoKCggNjbWvm9sbCz5+fnk5+cTFxdnH4+JibGPX7p9w3hzjnHp\nPiIivqz8Qg2n8stJ6BGKv5/mS0jTLn1+//vf/97iOk6t/PLOO+/Yn8xnzpxJ3759MZlMjW77/SRj\nGAYmk6nRhNPUeFNaso+IiC/4+lQpBrrEIc659Pl9xowZ9OvXjyFDhjS7jlNhIioqCoDw8HDGjBlD\ndnY2ERERFBUVERkZSWFhIeHh4UD9WYK8vDz7vnl5eURHRxMbG8u+ffsuGx8+fDixsbGcPXvWPp6f\nn090dDQAkZGRzTpGU7p1C8JPKV2uICrqJj4afpOn2xBx2cmPTwAwfFB3oqJCPNuMtHuXPr+PHTuW\nQ4cOtU6YuHDhAjabjeDgYCorK9m1axdz585l9OjRrF+/njlz5pCenk5SUhIASUlJrF69mgkTJnDw\n4EFCQ0OJjIxk5MiRvPTSS1itVmw2G7t372b+/PmEhobSpUsXsrOzGThwIBkZGdx3330AzT5GU0pK\nKpv9xRHfcfTo1yxcuZcu3Xo0uV15SS5L5wwnPv7qNupMpHn+76sC/P3MhAf5U1hodbyDeC1HYfJK\nz++XcnbehMMwUVRUxNy5czGZTNTV1ZGSksLIkSO5/vrreeyxx1i3bh3du3dnxYoVANx2221kZWUx\nduxYAgMDWbp0KQBhYWE8/PDDTJ06FZPJxNy5cwkNrZ8ctGjRIhYuXEhVVRWjRo1i1KhRAMyePbtZ\nxxAR8WXlF2o4VVDOgD7d8PfTbYSkaVd6ft+6dSuLFy+mpKSEBx98kP79+/P66683WctkNGe6Zgem\nhC5N0ZkJ8Qb7vyrklfRDpN3al5TEvp5uRzysLS9zKbqKiHgJ3V9CPEVhQkTESxzJKSHAz0y/7rq/\nhLQthQkRES9wvrKa3MIKEnqG4WfRr3ZpW3rEiYh4gX/mNCw5rksc0vYUJkREvEDDfAmFCfEEhQkR\nES9wJKeUTv4WrorTjaqk7SlMiIh0cGUV1ZwpquBqzZcQD9GjTkSkg/vK/pLQrh7uRHyVwoSISAd3\npGHyZR/NlxDPUJgQEengvsopoVOAhT4xmi8hnqEwISLSgZWWV3H2XCXX9Oyq+RLiMXrkiYh0YF/Z\nL3FovoR4jsKEiEgHpvtLSHvgcAlyERFpv46cLCGwk4XeMV0a/XhdXR0nThxzqtZVV/XDYrG4sz3x\nEQoTIiIdVIm1ivySCwyKj8BibvxE84kTx/jlsncJCotuslZlWQErnphEfPzVrdGqeDmFCRGRDsrZ\nSxxBYdF06dajLVoSH6U5EyIiHVTDzao0+VI8TWFCRKSDOnKylKBOfvSO1v0lxLMUJkREOqDi8xcp\nKL3ANb26YjabPN2O+DiFCRGRDui7+RK6xCGepzAhItIBHTmp9Tik/VCYEBHpgI7klBDc2Y+e0Y3f\nX0KkLSlMiIh0MEVlFygqu1g/X8Kk+RLieQoTIiIdzFdaclzaGYUJEZEO5shJrcch7YvChIhIB2IY\nBkdySugS6E+PqGBPtyMCKEyIiHQoRWUXOXe+ims1X0LaEYUJEZEOxH6JQ/MlpB1RmBAR6UCOfDv5\n8lrdrEraEYUJEZEOomG+REiQPz0iNV9C2g+FCRGRDqKw9AIl1iqu7d0Nk+ZLSDuiMCEi0kE0XOLQ\nehzS3ihMiIh0ELq/hLRXChMiIh1Aw3yJ0OAA4iKCPN2OyGUUJkREOoD8kguUllfTv3dXzZeQdkdh\nQkSkAziSo0sc0n4pTIiIdAAN8yV0fwlpjxQmRETaufr5EqWEdQkgNlzzJaT9UZgQEWnn8oorOV9R\nTX/dX0LaKYUJEZF27ruXhOoSh7RPChMiIu3cdzer0uRLaZ8UJkRE2jHDMPgqp4RuIZ2I7hbo6XZE\nGuXn6QZEROTKzpyr5HxlDSOui9F8CXG70aNH06VLF8xmM35+fqxdu5aysjIef/xxcnNz6dmzJ3/4\nwx8ICQlpso7TZyZsNhtpaWk8+OCDAJw+fZq77rqL5ORk5s2bR21tLQDV1dU8/vjjjBs3jrvvvpsz\nZ87Ya7z66quMGzeO8ePHs2vXLvv4jh07uOOOO0hOTmblypX28ZYcQ0TEW3xzuow33v8CgGt1iUNa\ngclk4q9//SsZGRmsXbsWgJUrVzJixAg2b97MsGHDePXVVx3WcTpMrFq1ivj4ePv7L7zwAjNmzGDz\n5s2EhITYm1i7di1hYWFs2bKF+++/n2XLlgHwzTffsGnTJjZu3Mhrr73G7373OwzDwGazsXjxYt54\n4w3ef/99NmzYwNGjR1t0DBERb1BUeoH/zPgHS/62nxN5Vm4ZEM2I62I83ZZ4oYbn4UtlZmaSlpYG\nQFpaGlu3bnVYx6kwkZeXR1ZWFtOnT7eP7d27l+Tk5B8c7NImkpOT2bt3LwDbtm1jwoQJ+Pn50bNn\nT/r06UN2djbZ2dn06dOHHj164O/vz8SJE8nMzGzWMfbs2ePMpyEi0q5dqKpl7fajPPnaPj49UkDf\nuFCevPdmHpx8Pf5+Fk+3J17IZDIxa9Yspk6dypo1awA4d+4ckZGRAERFRVFSUuKwjlNzJpYsWcKC\nBQuwWq0AlJSUEBYWhtlcn0ViY2PJz88HoKCggNjYWAAsFgshISGUlpaSn5/PDTfcYK8ZExNDfn4+\nhmEQFxd32fihQ4eadYzQ0FBKS0vp2lUvmxKRjsdmM9iZfYb0Hcc4X1lDt5BOTLs9nmE/isGseRLS\nit555x2ioqIoLi5m5syZ9O3bt0VzcxyGie3btxMZGcmAAQPYt28fUH9axDCMy7ZrOPj3xxs+dqXx\n759eadCcYxiG4fCT79YtCD8le7mCqKib+Gj4TZ5uQ3zQ5/8s5PV3/8GJs+fpFGDhZ3f0J/W2eDoH\nuGd+vB7b0pSoqCgAwsPDGTNmDNnZ2URERFBUVERkZCSFhYWEh4c7rOPw0XrgwAG2bdtGVlYWVVVV\nVFRUsGTJEqxWKzabDbPZTF5eHtHR0UD9mYW8vDxiYmKoq6vDarUSFhZGbGwsZ8+etddt2McwjMsm\nUObn5xMdHU14eDjnz5936hjl5eWEhYU1+XmUlFQ6/GKI7zp69GsWrtxLl249mtyuvCSXpXOGEx9/\ndRt1Jt7q7LkK1nx0lIPfFGECEgfGMmVUPN1COmEtu4DVTcfRY9t3RUU1/QqMCxcuYLPZCA4OprKy\nkl27djF37lxGjx7N+vXrmTNnDunp6SQlJTk8lsM5E/PmzWP79u1kZmayfPlyhg0bxgsvvMCwYcP4\n4IMPAC472OjRo0lPTwfggw8+YPjw4fbxjRs3Ul1dzalTp8jJyWHQoEEMHDiQnJwccnNzqa6uZsOG\nDfZaw4cPb9YxRETau/ILNfzXh//k39/4hIPfFHFNr678+8+HMmvij+gW0snT7YkPKSoq4qc//Smp\nqancfffdjB49mpEjRzJ79mx2795tn5M4Z84ch7VafB7tV7/6FfPmzWPFihUMGDCAadOmATB9+nSe\neOIJxo0bR9euXVm+fDkACQkJjB8/nokTJ+Ln58eiRYswmUxYLBaeeuopZs6ciWEYTJs2zf6qkeYe\nQ0Skvaqts/HRgVze/fg4FRdrie4ayPSfJHDTNZG6f4R4RK9evfjf//3fH4x37dqVt956q1m1TEZj\nkxm8UGGhu04aijfSqWBpLYZh8Pk35/ifj74hv7iSwE5+pPz4KpJu7om/X+vfhFiPbd/l6DKHO+kO\nmCIireRUQTnvZH7NlydLMJtM/OSmHkwe2ZfQoABPtybiVgoTIiJuVlZRTfqOY+zMPoNhwMB+Edw1\nOoEekcGebk2kVShMiEiHUVB6gYydx6i8WOvpVpr0z1OlXKyuo3tkMPeMTuD6fhGebkmkVSlMiEiH\ncPZcBS+8c5ASa5WnW3EoJMif6bfHM+qG7ljMWpxZvJ/ChIi0e6cLy3nhnYOcr6jmrp8kcPuN3T3d\nUpMC/CyYzXqFhvgOhQkRaddO5ll58X8OUn6hhnvHXcPom3p6uiUR+R6FCRFpt47mlrH8759zsaqW\nGeP7c+vg9n1GQsRXKUyISLv0VU4Jf1ibTU2NjdmTfsTwH8V6uiURuQKFCRFpdw4fL+aP67Kpsxk8\nlHodN18b7emWRKQJChMi0q4c/LqIP2UcAkzMnTKQwQmRnm5JRBxQmBCRduOzIwW8+u5hLBYTj04d\nxI+ucrz0sYh4nsKEiLQLe/6Rx+sbvqCTv4XHpg/mml5dPd2SiDhJYUJEPG7H52d4e9MRAjv5Me/u\nG+jXPdTTLYlIMyhMiIhHZe4/zeoP/0mXQH/m33MDvWPabqVDEXEPhQkR8ZhN+06y5qOjhAUHMP+e\nG+gR1cXTLYlICyhMiEibMwyDdz8+wf/uOk63kE4s+JcbiQkP8nRbItJCChMi0qYMw2Bt1lE27c0h\nMqwzT/zLjUR1DfR0WyLiAoUJEWkzhmHw31u/Zuv+08SEB/HEPTcQHtrZ022JiIsUJkSkTdgMg1Uf\nfMWOz8/QIzKY+ffcQFiXTp5uS0TcQGFCRFpdnc3GmxuOsOdwHr1juvCru28gJCjA022JiJsoTIhI\nq6qts7HyvS/47EgB8d1DefyuwQR19vd0WyLiRgoTIj7MMAzqbAbVNTZq6mzU1NRRZxjf26iR/a5Q\nqzFrPjrKwW+KuKZXV345bRCBnfRrR8Tb6KdapBn8OgXz2TdWTpSeBr57AjXs/2t4+7vxKzzHuoWB\nQW2tjepaGzWX/FddW3fJ2zZqvn2/upFtWrO/Btdd1Y25UwfRyd/S+gcTkTanMCHSDJ2DurL5/0qB\nUk+30iz+fmb8LWb8/c0E+JkJ7BRQP+ZX/36AnwW/b982m02YGqlhamywkS2/v123kE6MH9Ybfz8F\nCRFvpTAh0gwXy4v5lzFXExMTi8l0+ZPud0+iJkymS55mTWDfstEnZNf4W8yXBQN/fwv+FjMB/vUB\nws/PjLnxJCAi4hYKEyLNUFtzgR/1CiI+PsbTrYiItBtmTzcgIiIiHZvChIiIiLhEYUJERERcojAh\nIiIiLlGYEBEREZcoTIiIiIhLFCZERETEJQoTIiIi4hKFCREREXGJwoSIiIi4RGFCREREXKIwISIi\nIi5RmBARERGXKEyIfMtsCfB0CyIiHZLChMi3LH7+nm5BRKRDUpgQ+ZbZT2cmRERawmGYqK6uZvr0\n6aSmppKSksLLL78MwOnTp7nrrrtITk5m3rx51NbW2rd//PHHGTduHHfffTdnzpyx13r11VcZN24c\n48ePZ9euXfbxHTt2cMcdd5CcnMzKlSvt4y05hkhLWXSZQ0R8kM1mIzU1lQcffBCAPXv2MGXKFFJS\nUli4cCE2m81hDYdhIiAggFWrVpGRkUFGRgY7duzg888/54UXXmDGjBls3ryZkJAQ1q5dC8DatWsJ\nCwtjy5Yt3H///SxbtgyAb775hk2bNrFx40Zee+01fve732EYBjabjcWLF/PGG2/w/vvvs2HDBo4e\nPQrQ7GOItFRFVR1mi5+n2xARaXOrVq0iISEBAMMwWLhwIX/4wx9477336N69O+vXr3dYw6nLHIGB\ngUD9GYHa2lpMJhP79u0jOTkZgLS0NLZu3QpAZmYmaWlpACQnJ7N3714Atm3bxoQJE/Dz86Nnz570\n6dOH7OxssrOz6dOnDz169MDf35+JEyeSmZkJwN69e506xp49e5z5NESu6ExxtadbEBFpc3l5eWRl\nZTF9+nQASkpKCAgIoHfv3gCMGDGCLVu2OKzjVJhoOAWSmJhIYmIivXr1IjQ0FLO5fvfY2Fjy8/MB\nKCgoIDY2FgCLxUJISAilpaXk5+cTFxdnrxkTE0N+fn6j4wUFBZSUlBAWFubUMUJDQyktLXXmUxFp\n1JlzChMi4nuWLFnCggULMJlMAISHh1NbW8vhw4cB2Lx5M3l5eQ7rOHVe12w2k5GRQXl5Ob/4xS/s\nlyEu1dCIYRiNfuxK41e6FmMYxg/2udIxDMOwf+xKunULws/P0uQ24rsq6joB8Lff3UFYl04e7kbE\nfaKibuKj4Td5ug1ph7Zv305kZCQDBgxg37599vHly5ezZMkSampqSExMxM/PcVRo1kXiLl26MHTo\nUD7//HPOnz+PzWbDbDaTl5dHdHQ0UH9mIS8vj5iYGOrq6rBarYSFhREbG8vZs2fttRr2MQzjsgmU\n+fn5REdHEx4e7vQxysvLCQsLa7L3kpLK5nyq4kNshsGXx4uw1dVy76IPmty2vCSXpXOGEx9/dRt1\nJ+Kao0e/ZuHKvXTp1qPJ7fTY9j5RUSFNfvzAgQNs27aNrKwsqqqqqKioYMGCBTz//POsXr0agI8/\n/pgTJ044PJbDyxzFxcVYrVYALl68yJ49e0hISGDYsGF88EH9L9709HSSkpIAGD16NOnp6QB88MEH\nDB8+3D6+ceNGqqurOXXqFDk5OQwaNIiBAweSk5NDbm4u1dXVbNiwwV5r+PDhzTqGSEvkF1dyscag\nrlaXOkTEd8ybN4/t27eTmZnJ8uXLGTZsGM8//zzFxcVA/TzJ1157jXvuucdhLYdnJgoLC/nNb36D\nzWbDZrMxYcIEbrvtNvr168e8efNYsWIFAwYMYNq0aQBMnz6dJ554gnHjxtG1a1eWL18OQEJCAuPH\nj2fixIn4+fmxaNEiTCYTFouFp556ipkzZ2IYBtOmTSM+Ph6AX/3qV806hkhLHD97HgBbXTUQ5Nlm\nREQ87PXXX2f79u0YhsFPf/pThg0b5nAfk9HYZAYvVFho9XQL0k79bctXbDuQS+X5AoJCo5vcVqeC\npaPRZQ7f5egyhzvpDpji846dOY/ZBLa6Gk+3IiLSISlMiE+rqa3jVEE5MV1190sRkZZSmBCfdjK/\nnDqbQY8IhQkRkZZSmBCfduxM/eTL7uEKEyIiLaUwIT7t2JkyAHooTIiItJjChPi042fPE9zZj25d\ntMiXiEhLKUyIzzpfWU1h6UX6dg91eDt2ERG5MoUJ8VnHv50v0S8u1MOdiIh0bAoT4rMaJl/2664w\nISLiCoUJ8VnHvr2Ndl+dmRARcYnChPgkm2Fw/Mx5orsGEhKkV3KIiLhCYUJ8UkHJBSqranWJQ0TE\nDRQmxCc13F+ir8KEiIjLFCbEJ2nypYiI+yhMiE86duY8FrOJ3tFtt0SviIi3UpgQn9OwUmjvmC74\n++lHQET7GZcYAAAc/ElEQVTEVfpNKj6nYaXQfnFhnm5FRMQrKEyIz9F8CRER91KYEJ9z/KzChIiI\nOylMiM85dqaM4M5+RHcL9HQrIiJeQWFCfIpWChURcT+FCfEpWilURMT9FCbEp2jypYiI+ylMiE/R\nSqEiIu6nMCE+QyuFioi0DoUJ8RlaKVREpHUoTIjP0EqhIiKtQ2FCfIYmX4qItA6FCfEZWilURKR1\nKEyIT9BKoSIirUe/VcUnaKVQEZHWozAhPuG45kuIiLQahQnxCce0UqiISKtRmBCfoJVCRURaj8KE\neD2tFCoi0roUJsTraaVQEZHWpTAhXk83qxIRaV0KE+L1tFKoiEjrUpgQr2ZopVARkVanMCFeLV8r\nhYqItDqFCfFqWilURKT1KUyIV9PkSxGR1ucwTOTl5fGv//qvTJgwgZSUFFatWgVAWVkZM2fOJDk5\nmVmzZmG1Wu37PPvss4wbN47Jkyfz5Zdf2sfT09NJTk4mOTmZjIwM+/jhw4dJSUkhOTmZ5557zj7e\nkmOIXOrYmfP4WbRSqIjIldhsNlJTU3nwwQcB2LNnD1OmTCE1NZWf/exnnDp1ymENh2HCYrGwcOFC\nNm7cyDvvvMPq1as5evQoK1euZMSIEWzevJlhw4bx6quvApCVlUVOTg5btmzhmWeeYdGiRUB9MHjl\nlVdYu3Yta9as4eWXX7aHg6effprnnnuOzZs3c+LECXbu3AnQ7GOIXKphpdBe0SFaKVRE5ApWrVpF\nQkKC/f2nn36aF198kYyMDCZOnMif/vQnhzUc/oaNiopiwIABAAQHBxMfH09+fj6ZmZmkpaUBkJaW\nRmZmJgCZmZmkpqYCMHjwYKxWK0VFRezatYvExERCQkIIDQ0lMTGRnTt3UlhYSEVFBYMGDQIgNTWV\nrVu32ms15xgil/pupVBd4hARaUxeXh5ZWVlMnz7dPmY2m+1/7JeXlxMdHe2wjl9zDnr69GmOHDnC\n4MGDOXfuHJGRkUB94CguLgagoKCA2NhY+z6xsbHk5+eTn59PXFycfTwmJsY+fun2DeOA08do2Kdh\nWxHQSqEiIo4sWbKEBQsWXDaNYPHixcyePZvAwEC6dOnC//zP/zis4/S534qKCh599FGefPJJgoOD\nr7jGgWEYP3jfZDL9YBxocrwpLdlHfI9WChURubLt27cTGRnJgAEDLnteffvtt3n99dfZvn07U6ZM\nYenSpQ5rOXVmora2lkcffZTJkyczZswYACIiIigqKiIyMpLCwkLCw8OB+rMEeXl59n3z8vKIjo4m\nNjaWffv2XTY+fPhwYmNjOXv2rH08Pz/ffkolMjKyWcdoSrduQfj5WZz5dMVLnMy3EhLkz3XXRDsM\nm1FRN/HR8JvaqDORtqPHtlzJgQMH2LZtG1lZWVRVVVFRUcEDDzzA8ePHGThwIADjx49n9uzZDms5\nFSaefPJJEhISuP/+++1jo0ePZv369cyZM4f09HSSkpIASEpKYvXq1UyYMIGDBw8SGhpKZGQkI0eO\n5KWXXsJqtWKz2di9ezfz588nNDSULl26kJ2dzcCBA8nIyOC+++5r0TGaUlJS6cynKl7ifGU1eecq\nub5fOEVF5Q63P3r0axau3EuXbj2a3K68JJelc4YTH3+1u1oVaVV6bPuuqKimX8U2b9485s2bB8An\nn3zCm2++yZ/+9CcSExM5efIkffr0YdeuXfTr18/hsRyGif379/Pee+9xzTXXkJqaislk4vHHH2f2\n7Nk89thjrFu3ju7du7NixQoAbrvtNrKyshg7diyBgYH20yNhYWE8/PDDTJ06FZPJxNy5cwkNrT/9\nvGjRIhYuXEhVVRWjRo1i1KhRAM0+hkgDrRQqItJ8ZrOZxYsXM3fuXCwWC6GhoSxZssThfiajsQkI\nXqiw0Op4I/Ea6TuO8d7uEzw2fTCD4iMcbq+/3sRb6bHtuxydmXAnvfhevNJ3K4XqZlUiIq1NYUK8\njlYKFRFpWwoT4nW0UqiISNtSmBCvo5VCRUTalsKEeB2tFCoi0rYUJsTraKVQEZG2pTAhXkUrhYqI\ntD39thWvkqOVQkVE2pzChHgVzZcQEWl7ChPiVbRSqIhI21OYEK9y7EwZwZ39iO4W6OlWRER8hsKE\neI3zldUUll6kb/dQh0uOi4iI+yhMiNfQSqEiIp6hMCFe47vJl2Ee7kRExLcoTIjXOK6VQkVEPMLP\n0w2Ib/omt4wtn+RQXWtzW81/nirVSqEiIh6gMCFt6mJ1LeuzjpG5/zRGK9S/+dqoVqgqIiJNUZiQ\nNvOPY+d4+4OvOHf+IjHhQfz8jmu5KtaNkyVN0Mnf4r56IiLiFIUJaXXlF2p4J/Nrdv8jD7PJxMQR\nfZiUeBX+fnriFxHxBgoT0moMw+DTIwX814f/5HxlDX1iQpgxoT+9YzRBUkTEmyhMSKsosVbx181f\ncfCbIvz9zEz/STzjhvbCYtYLiEREvI3ChLiVzTDY8fkZ1nz0DReq6ujfuyv3j+9PTLcgT7cmIiKt\nRGFC3Ca/uJK3Nh3hq1OlBHaycP8d13Lr4O6YdWtrERGvpjAhLquz2djyySkydh2nptbGDQmR3Jd8\nLd1COnm6NRERaQMKE+KSnHwrf9l4hJP5VkKD/Jk1cQBD+0droS0RER+iMCEtUlNbx7sfn2DT3hxs\nhkHi9bHcnXQ1XQL9Pd2aiIi0MYUJabZ/nirlL5uOkF9cSURoZ+6/41qu7xfh6bZERMRDFCbEaReq\nalm7/Sgf/V8uJmDMzT2Zcls/OgfoYSQi4sv0LCBXVF1TR15xJXnFlZwpqmBn9llKrFV0jwzm5+P7\nk9BDS32LiIjChM8zDANrZQ1nz1Vw9lxl/X/FFeSdq+Rc2cXLFuOymE1MSryKiSOuwt9PN58SEZF6\nChM+os5mo7D0ImfP1QeFS0NDxcXaH2wfFhzAtb27EhsRTFx4EHERQfSKCSEsWMt7i4jI5RQmvIBh\nGFTV1HG+sgZrRTXnK6uxVtZQWHrh27MNFRSUXKDOdvmi3xaziehugVzTqytxEcHERQQRGxFEXHgQ\nQZ31qgwREXGOwkQ7VVtnw1pZw/mKaqyV9QHhfEWN/e3LP1ZDTa3tirUCO/nRJzaEuIig+tAQXh8a\noroG4mfR5QoREXGNwkQL/PNUKdsOnKau7vK/9I1GtjWMxkYb2w7KLzacWajhQtUPLz18n5/FTFiw\nPz0igwkNDiAkyJ/QoABCgurfDg/tTPeIIEKDA3QTKRERaTUKE820/WAuq7f88weXDNzBZIKQoAAi\nQjsREhTyvYDw7b/BAYQG+RMSFEDnAItCgoiIeJzChJNq62y8k/k12w7k0iXQnwcmXUef2BCn9m3s\n+f6HQyY6d7JoUSwREelwFCacUH6hhj+lH+JITik9o4J5ZOogoroGerotERGRdkFhwoHTheX8x9ps\nisouctM1UfzbnQN0x0cREZFL6FmxCf/3dSEr3/uCquo6JiVexaSRfXUZQkRE5HsUJhphGAYb9pwk\nfccx/P3MPJR6PUP7R3u6LRGfUldXx4kTx5ze/qqr+mGxWFqxIxG5EoWJ76mqqeMvG7/kky8LCA/t\nxCNTBjk90VJE3OfEiWP8ctm7BIU5DvKVZQWseGIS8fFXt0FnIvJ9ChOXKD5/kT+uO8TJfCsJPcP4\nRdpA3T5axIOCwqLp0q2Hp9sQEQcc3v7wySef5Mc//jEpKSn2sbKyMmbOnElycjKzZs3CarXaP/bs\ns88ybtw4Jk+ezJdffmkfT09PJzk5meTkZDIyMuzjhw8fJiUlheTkZJ577jmXjuGKb3LLeObtzziZ\nb+XWQXE8cc+NChIiIuL1bDYbqampPPjggwD87Gc/Iy0tjdTUVG699Vbmzp3rsIbDMDFlyhTeeOON\ny8ZWrlzJiBEj2Lx5M8OGDePVV18FICsri5ycHLZs2cIzzzzDokWLgPpg8Morr7B27VrWrFnDyy+/\nbA8HTz/9NM899xybN2/mxIkT7Ny5s0XHcMWu7LM8/18HKK+s4adjrubn4/trVUwREfEJq1atIiEh\nwf7+6tWrSU9PJyMjgxtvvJGxY8c6rOHwGXPIkCGEhoZeNpaZmUlaWhoAaWlpZGZm2sdTU1MBGDx4\nMFarlaKiInbt2kViYiIhISGEhoaSmJjIzp07KSwspKKigkGDBgGQmprK1q1bW3SMlqiz1d+I6s2N\nX9LJ38Ljdw9mzJBeuqukiIj4hLy8PLKyspg+ffoPPlZeXs7evXsZM2aMwzotmjNRXFxMZGQkAFFR\nURQXFwNQUFBAbGysfbvY2Fjy8/PJz88nLi7OPh4TE2Mfv3T7hnGAc+fOOXWMhn0atnVWxcUa/vy/\nhzl8vJi4iCAenTqImPCgZtUQERHpyJYsWcKCBQsum0rQYOvWrYwYMYLg4GCHddx6Lv/7i1oZhoHJ\nZGp0saumxptzDGf2+b6z5yp4dtV+Dh8vZlB8BL+9b4iChIiI+JTt27cTGRnJgAEDGn1u3bBhA3fe\neadTtVp0ZiIiIoKioiIiIyMpLCwkPDwcqD9LkJeXZ98uLy+P6OhoYmNj2bdv32Xjw4cPJzY2lrNn\nz9rH8/PziY6ufxlYZGRks47hSLduQfj5Wfjsy3xe+Nt+Ki7WMvUnCdw34UdYzLqs4euiom7io+E3\neboNuYS+J+6hr6NcyYEDB9i2bRtZWVlUVVVRUVHBggULeP755yktLeXQoUO88sorTtVyKkx8P7GM\nHj2a9evXM2fOHNLT00lKSgIgKSmJ1atXM2HCBA4ePEhoaCiRkZGMHDmSl156CavVis1mY/fu3cyf\nP5/Q0FC6dOlCdnY2AwcOJCMjg/vuu69Fx3CkuLiCzZ+cYs32b7CYzcxO+REjroul+Fx5k/s158Y5\numlOx3X06NcsXLnX4csQy0tyWTpnuO5n0Aac/Z6Avi9N0WPbd0VFNX2PpHnz5jFv3jwAPvnkE958\n802ef/55ADZt2sRPfvITAgKce1WjwzDxq1/9in379lFaWsrtt9/OI488wpw5c/jlL3/JunXr6N69\nOytWrADgtttuIysri7FjxxIYGMjSpUsBCAsL4+GHH2bq1KmYTCbmzp1rn9S5aNEiFi5cSFVVFaNG\njWLUqFEAzJ49m8cee8zpYzjyxoYv2f2PPLp2CeCRqYPoGxfqeCecv3GObpojIiLeYtOmTcyZM8fp\n7R2GiRdffLHR8bfeeqvR8X//939vdHzKlClMmTLlB+PXX38977333g/Gu3bt2uxjNGX3P/LoGxfK\n3CkD6RbSqVn76sY5IiLizW655RZuueUW+/urVq1q1v4+cwfM8cN6k3prX/z9dBlCRETEnXwmTEz/\nSYLjjURERKTZdJtHERERcYnChIiIiLhEYUJERERcojAhIiIiLlGYEBEREZf4zKs5xHOacxdR0J1E\nRUQ6GoUJaXXO3kUUdCdREZGOSGFC2oTuIioi4r00Z0JERERcojAhIiIiLtFlDhGRdkKTlaWjUpgQ\nEWknNFnZNzQnNHaUwKgwISLSjmiysvdzNjR2pMCoMCEiItLGvC00agKmiIiIuERhQkRERFyiMCEi\nIiIuUZgQERERlyhMiIiIiEsUJkRERMQlChMiIiLiEoUJERERcYnChIiIiLhEYUJERERcojAhIiIi\nLlGYEBEREZcoTIiIiIhLFCZERETEJQoTIiIi4hKFCREREXGJwoSIiIi4RGFCREREXKIwISIiIi5R\nmBARERGX+Hm6AWl/6urqOHHimFPbXnVVPywWSyt3JCIi7ZnPhImjR7/+wZieCBt34sQxfrnsXYLC\nopvcrrKsgBVPTCI+/uo26kxERNojnwkTC1fuvex9PRE2LSgsmi7deni6DRER6QB8JkzoiVFERKR1\naAKmiIiIuMRnzkx4M02YFBERT+qwYWLHjh0sWbIEwzCYOnUqc+bM8XRLHqMJkyIi0lI2m42pU6cS\nExPDn//8ZwBeeuklPvjgA/z8/PiXf/kX7r333iZrdMgwYbPZWLx4MW+99RbR0dFMmzaNpKQk4uPj\nPd2ax2jCpIiItMSqVauIj4+nvLwcgHXr1pGfn8/mzZsBKC4udlijQ4aJ7Oxs+vTpQ48e9U+eEydO\nJDMz06fDhEhz6fKYeCs9tp2Xl5dHVlYWDz74IH/5y18AeOedd1i+fLl9m/DwcId1OmSYyM/PJy4u\nzv5+TEwMhw4d8mBHIh2PLo9Je9AaT/x6bDtvyZIlLFiwAKvVah/Lyclhw4YNfPjhh0RERPDb3/6W\nPn36NFmnQ4YJwzCavU95Se5l71eWFTi9rzPbNqceNH4TrcY4+yB3d4+eqNfcmu39a+ju/lqrpjt5\n4nvSnO3A/T12hM+5PT+2T5w4xpynXqdzl6b/+r1YXszKxf/mU49tV78nUVE3NbnP9u3biYyMZMCA\nAezbt88+Xl1dTefOnVm3bh0ffvghTz75JKtXr26ylsloyTOzhx08eJA//vGPvPHGGwCsXLkSwKcn\nYYqIiDTH8uXLeffdd7FYLFRVVVFRUcGYMWM4fPgwr7/+Ot27dwdgyJAhfPbZZ03W6pD3mRg4cCA5\nOTnk5uZSXV3Nhg0bSEpK8nRbIiIiHca8efPYvn07mZmZLF++nGHDhrFs2TLGjBnDnj17ANi3bx99\n+/Z1WKtDXuawWCw89dRTzJw5E8MwmDZtmiZfioiIuMHs2bOZP38+b731FsHBwTz77LMO9+mQlzlE\nRESk/eiQlzlERESk/VCYEBEREZcoTIiIiIhLvDpM3Hjjje2iRnNqfvLJJzz44INuq9cSzam3cOFC\ntmzZ4taaznJU8/e//z0pKSksW7bMpVot+Z44U7egoIBf/vKXHq3nqGZubi4pKSku1WipjlBTPf5Q\nbm4u77//vltqtZQ7a7b3/lqzZnN0yFdzOMtkMrWLGq1ds73X81TNNWvW8Omnnzp17Nboz1Hd6Oho\nVqxY4dF6jmq2ZY2OWNOTPdpsNsxmx38PtnWPp0+f5v333+fOO+90uVZLubNme++vNWs2h1efmWjQ\n8BfqpEmT2Lhxo9tqVFZW8vOf/5wpU6YwadIkMjMz3dJXeXk5DzzwAHfccQdPP/20y/Vee+01UlJS\nSE1Nvex+6y2t98wzzzB+/HhmzpzJuXPnnK7XVM033niDadOmMXnyZF5++eUW19y0aRMADz30EJWV\nlUyZMsU+5kp/FRUVPProo4wfP54nnniiWf1dqa6zf/W3Rb0r1XRHjU8++YT77ruPhx9+mLFjx/Li\niy/y3nvvMX36dCZNmsSpU6daXLOl35Mrfa4t/Rpcqcd77723RT/LTdX82c9+xkMPPcSECRNcrldY\nWMi9995LWloaKSkp7N+/v8U1G37Oli9fzv79+0lLS+Ptt992qb/vnxVcvHgxGRkZLvX3+OOPs2PH\nDvs2Cxcu5MMPP2xRrd/97nd89NFHAPziF7/gt7/9LQBr1651Ktg3VnPBggVs27bNvs38+fPtx3BG\nY1/H//iP/yA1NZW0tDRGjRrFk08+6XQ9pxle7IYbbjA2b95szJw50zAMwygqKjJuv/12o7Cw0C01\namtrjfLycsMwDKO4uNgYO3asyzX37dtnDBo0yDh9+rRhs9mMGTNmGJs3b25xvaysLOOee+4xqqqq\nDMMwjLKyMpf627Jli308Pz/fGDJkiMP+HNXctWuX8dRTTxmGYRg2m8144IEHjE8//dSlmoZhGDfe\neKPDGs7U2rdvnzFkyBAjPz/fsNlsxt13323s37/f5bqnT5827rzzTqd7bI167qrp6Os3dOhQo6io\nyKiqqjJuvfVW449//KNhGIbx9ttvG0uWLGlRzZZ8T5qq2ZLfFe7+WXam5g033GDk5uY6rONMvTff\nfNP485//bBhG/c9eRUWFyzX37dtnPPDAA27p7/u1nnnmGSM9Pd2lmh9++KHx61//2jAMw6iurjZu\nv/12++/H5tbasGGD8fzzzxuGYRjTpk0z7r77bsMwDOM3v/mNsWvXrhbV/OSTT4yHH37YMAzDsFqt\nRlJSklFXV+fS59zAarUakyZNMr744guH9ZrL689MHDhwgIkTJwIQERHBLbfc0uxFwa5UwzAMXnzx\nRSZNmsSMGTMoKChw+i/1pvoaNGgQPXr0wGQyMXHiRKf+WmisXnZ2Nnv27GHKlCkEBAQAEBoa2uL+\nsrOz+fTTT+3j0dHRDB8+3Kl6TX3Ou3bt4uOPPyYtLY20tDSOHz/OyZMnXaoJzV/DxdH3JDo6GpPJ\nRP/+/cnNzW2qlNN1W8Ld9dxVs6kaAwcOJCIigoCAAHr37k1iYiIA11xzTZNfy9b4nlzpsb1///4W\nfQ3c/bPsTM2G2xw760r1Bg4cyLp163j55Zf56quvCAoKcrlmS7TlY3rUqFHs27ePmpoaduzYwZAh\nQ+y/H5tb6+abb+azzz7j6NGjJCQkEBERQWFhIQcPHnQ4h+FKNYcOHUpOTg7FxcW8//77jBs3zqnL\nWU3VbDB//nxmzJjBgAEDnKrXHF49ZwJ++ITS3CeYpmq89957lJaWkpGRgdlsZvTo0VRVVbm9L2eu\nhTVWr2G/llxLc3e9K9Vs8MADD3DXXXe5tWZz+2yqlr+/v/1ti8VCXV2dW+q2hLvruaums18/k8lk\n/+VtNpupra11uWZzvidXemy39Gvg7p9lRzUDAwOdquFMvSFDhrB69Wq2b9/Ob37zG2bMmMHkyZNd\n7tFd/VksFmw2m33c2d+vTdUMCAjglltuYefOnWzcuNGpS4NXqhUTE0NZWRk7d+5k6NChlJWVsWnT\nJoKDgx0Gs6a+fpMnT+bdd99l48aNLF261GF/ztT84x//SFxcHKmpqU7Xaw6vPzMxdOhQNmzYgM1m\no7i4mM8++4xBgwa5pYbVaiU8PByz2czevXs5c+aMW/rKzs4mNzcXm83Gxo0bufnmm1tc78c//jHr\n1q3j4sWLAJSVlbnU36XjBQUFl60019KaI0eOZN26dVRWVgL1S8wXFxe3uObgwYOB5v9yc8djpS3q\ntkafrflz4um+nK05dOhQNm7c2OxjNdXjoUOHmv2z3Bqf95XqnTlzhvDwcKZPn8706dP54osvXK4Z\nHBxMRUWFW/rr0aMHR48epaamBqvVal8vwpWaABMmTGD9+vUcOHCAkSNHulTrxhtv5O2332bo0KHc\nfPPNvPnmmwwZMsSlmmlpaaxatQqTydSspSKuVPOjjz7i448/ts/paA1ee2airq6OTp06MWbMGA4c\nOMDkyZMxmUwsWLCAiIgIt9RISUnhoYceYtKkSVx//fVOfdMd1Tx69CiDBg1i8eLFnDx5kuHDhzN2\n7NgW17v11ls5cuQIU6dOJSAggFGjRvH444+3uN7YsWPZu3cvEydOpHv37k69HMlRzcTERI4dO8bd\nd98NQHBwMMuWLSM8/MpLEjdVs2E/Z/8KdOZ7cil31W3OpRJn6zX3bIw7emyNr58nao4dO5aDBw86\n/bvCmR6vv/56p3+WW/J5u/o5Z2Rk8MYbb+Dn50dwcDC///3vXa4ZFhaGxWKxT/i7//77W1wLYPz4\n8dx555307NmT6667zuX+ABITE/n1r39NUlISfn5Xfhp0ptbNN9/Mxx9/TK9evYiLi6OsrKzJMOFM\nzYiICPr16+fw8eJszb/85S8UFhYybdo0TCYTo0eP5pFHHnGqttPcPAej3fjyyy+N6dOne7xGa9ds\n7/U6Qs3W6K816jqqd+jQIePee+91a822qtERazqq19yJiM7UbK72/nVUf42rrKw0xo4da1itVrfV\nbG1eGSb++7//25g4caKxe/duj9Zo7ZrtvV5HqNka/bVGXUf1Dh06ZCQnJxsffvhhm/bY3r+/rVXT\nmXrNDRO+9vOs/hq3e/du4/bbbzdWrVrltpptQauGioiIiEu8fgKmiIiItC6FCREREXGJwoSIiIi4\nRGFCREREXKIwISIiIi5RmBARERGX/H+T3S+yvT5Q6wAAAABJRU5ErkJggg==\n", 245 | "text/plain": [ 246 | "" 247 | ] 248 | }, 249 | "metadata": {}, 250 | "output_type": "display_data" 251 | } 252 | ], 253 | "source": [ 254 | "jo = df.select(lambda x: df.name.values[x].startswith(\"Jo\"))\n", 255 | "pareto(jo, 3)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "The line crosses 50% at Jor. Looking at all the names beginning with Jor, we see that Jordan has a cumulative total of 50.002042%." 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 8, 268 | "metadata": { 269 | "collapsed": false, 270 | "deletable": true, 271 | "editable": true 272 | }, 273 | "outputs": [ 274 | { 275 | "data": { 276 | "text/html": [ 277 | "
\n", 278 | "\n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | "
namename_totalname_cumulativename_percent
13702Jordain514740264949.844152
13703Jordan46692114786957050.002042
13704Jordana145314787102350.002533
13705Jordann12514787114850.002575
13706Jordanna3014787117850.002585
13707Jordanne4914787122750.002602
13708Jordany9714787132450.002635
13709Jorden494414787626850.004307
13710Jordi134214787761050.004760
13711Jordin330514788091550.005878
13712Jordis4314788095850.005892
13713Jordon1127414789223250.009705
13714Jordy289514789512750.010684
13715Jordyn4480514793993250.025834
13716Jordynn187914794181150.026470
13717Jorel19714794200850.026536
13718Jorell2914794203750.026546
13719Jorene514794204250.026548
13720Joretta31614794235850.026655
13721Jorge12062914806298750.067445
13722Jorgeluis15214806313950.067497
13723Jorgen1014806314950.067500
13724Jorgina514806315450.067502
13725Jori47714806363150.067663
13726Jorie36114806399250.067785
13727Jorja116314806515550.068179
13728Jorje60314806575850.068382
13729Jorley1614806577450.068388
13730Jorman1014806578450.068391
13731Jory86114806664550.068682
\n", 501 | "
" 502 | ], 503 | "text/plain": [ 504 | " name name_total name_cumulative name_percent\n", 505 | "13702 Jordain 5 147402649 49.844152\n", 506 | "13703 Jordan 466921 147869570 50.002042\n", 507 | "13704 Jordana 1453 147871023 50.002533\n", 508 | "13705 Jordann 125 147871148 50.002575\n", 509 | "13706 Jordanna 30 147871178 50.002585\n", 510 | "13707 Jordanne 49 147871227 50.002602\n", 511 | "13708 Jordany 97 147871324 50.002635\n", 512 | "13709 Jorden 4944 147876268 50.004307\n", 513 | "13710 Jordi 1342 147877610 50.004760\n", 514 | "13711 Jordin 3305 147880915 50.005878\n", 515 | "13712 Jordis 43 147880958 50.005892\n", 516 | "13713 Jordon 11274 147892232 50.009705\n", 517 | "13714 Jordy 2895 147895127 50.010684\n", 518 | "13715 Jordyn 44805 147939932 50.025834\n", 519 | "13716 Jordynn 1879 147941811 50.026470\n", 520 | "13717 Jorel 197 147942008 50.026536\n", 521 | "13718 Jorell 29 147942037 50.026546\n", 522 | "13719 Jorene 5 147942042 50.026548\n", 523 | "13720 Joretta 316 147942358 50.026655\n", 524 | "13721 Jorge 120629 148062987 50.067445\n", 525 | "13722 Jorgeluis 152 148063139 50.067497\n", 526 | "13723 Jorgen 10 148063149 50.067500\n", 527 | "13724 Jorgina 5 148063154 50.067502\n", 528 | "13725 Jori 477 148063631 50.067663\n", 529 | "13726 Jorie 361 148063992 50.067785\n", 530 | "13727 Jorja 1163 148065155 50.068179\n", 531 | "13728 Jorje 603 148065758 50.068382\n", 532 | "13729 Jorley 16 148065774 50.068388\n", 533 | "13730 Jorman 10 148065784 50.068391\n", 534 | "13731 Jory 861 148066645 50.068682" 535 | ] 536 | }, 537 | "execution_count": 8, 538 | "metadata": {}, 539 | "output_type": "execute_result" 540 | } 541 | ], 542 | "source": [ 543 | "jor = df.select(lambda x: df.name.values[x].startswith(\"Jor\"))\n", 544 | "jor" 545 | ] 546 | }, 547 | { 548 | "cell_type": "markdown", 549 | "metadata": {}, 550 | "source": [ 551 | " So, to optimize for comfort in an alphabetized line, we’ve found the perfectly unbiased name: Jordan." 552 | ] 553 | }, 554 | { 555 | "cell_type": "code", 556 | "execution_count": null, 557 | "metadata": { 558 | "collapsed": true, 559 | "deletable": true, 560 | "editable": true 561 | }, 562 | "outputs": [], 563 | "source": [] 564 | } 565 | ], 566 | "metadata": { 567 | "kernelspec": { 568 | "display_name": "Python 2", 569 | "language": "python", 570 | "name": "python2" 571 | }, 572 | "language_info": { 573 | "codemirror_mode": { 574 | "name": "ipython", 575 | "version": 2 576 | }, 577 | "file_extension": ".py", 578 | "mimetype": "text/x-python", 579 | "name": "python", 580 | "nbconvert_exporter": "python", 581 | "pygments_lexer": "ipython2", 582 | "version": "2.7.9" 583 | } 584 | }, 585 | "nbformat": 4, 586 | "nbformat_minor": 2 587 | } 588 | -------------------------------------------------------------------------------- /2019/avro-to-arrow/AUTHORS: -------------------------------------------------------------------------------- 1 | # This is the list of Avro-to-Arrow authors for copyright purposes. 2 | # 3 | # This does not necessarily list everyone who has contributed code, since in 4 | # some cases, their employer may be the copyright holder. To see the full list 5 | # of contributors, see the revision history in source control. 6 | Apache Software Foundation (ASF) 7 | Google LLC 8 | Jeffrey Evan Hammerbacher 9 | Doug Cutting 10 | Philip Zeyliger 11 | Douglass Cutting 12 | Ryan Blue 13 | Miki Tebeka 14 | Thomas White 15 | Niels Basjes 16 | Michael A. Smith 17 | Daniel Kulp 18 | shiraeeshi 19 | Thiruvalluvan M G 20 | Prem Santosh 21 | Matthieu Monsch 22 | Fokko Driesprong 23 | -------------------------------------------------------------------------------- /2019/avro-to-arrow/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | 204 | ---------------------------------------------------------------------- 205 | License for the Jansson C JSON parser used in the C implementation: 206 | 207 | Copyright (c) 2009-2011 Petri Lehtinen 208 | 209 | Some files include an additional copyright notice: 210 | * lang/c/jansson/src/pack_unpack.c 211 | Copyright (c) 2011 Graeme Smecher 212 | * lang/c/jansson/test/suites/api/test_unpack.c 213 | Copyright (c) 2011 Graeme Smecher 214 | * lang/c/jansson/src/memory.c 215 | Copyright (c) 2011 Basile Starynkevitch 216 | 217 | | Permission is hereby granted, free of charge, to any person obtaining a copy 218 | | of this software and associated documentation files (the "Software"), to deal 219 | | in the Software without restriction, including without limitation the rights 220 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 221 | | copies of the Software, and to permit persons to whom the Software is 222 | | furnished to do so, subject to the following conditions: 223 | | 224 | | The above copyright notice and this permission notice shall be included in 225 | | all copies or substantial portions of the Software. 226 | | 227 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 228 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 229 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 230 | | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 231 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 232 | | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 233 | | THE SOFTWARE. 234 | 235 | ---------------------------------------------------------------------- 236 | License for msinttypes.h and msstdint.h used in the C implementation: 237 | Source from: 238 | http://code.google.com/p/msinttypes/downloads/detail?name=msinttypes-r26.zip 239 | 240 | Copyright (c) 2006-2008 Alexander Chemeris 241 | 242 | | Redistribution and use in source and binary forms, with or without 243 | | modification, are permitted provided that the following conditions are met: 244 | | 245 | | 1. Redistributions of source code must retain the above copyright notice, 246 | | this list of conditions and the following disclaimer. 247 | | 248 | | 2. Redistributions in binary form must reproduce the above copyright 249 | | notice, this list of conditions and the following disclaimer in the 250 | | documentation and/or other materials provided with the distribution. 251 | | 252 | | 3. The name of the author may be used to endorse or promote products 253 | | derived from this software without specific prior written permission. 254 | | 255 | | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 256 | | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 257 | | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 258 | | EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 259 | | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 260 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 261 | | OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 262 | | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 263 | | OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 264 | | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 265 | 266 | ---------------------------------------------------------------------- 267 | License for st.c and st.h used in the C implementation: 268 | 269 | | This is a public domain general purpose hash table package written by 270 | | Peter Moore @ UCB. 271 | 272 | ---------------------------------------------------------------------- 273 | License for Dirent API for Microsoft Visual Studio used in the C implementation: 274 | Source from: 275 | http://www.softagalleria.net/download/dirent/dirent-1.11.zip 276 | 277 | Copyright (C) 2006 Toni Ronkko 278 | 279 | | Permission is hereby granted, free of charge, to any person obtaining 280 | | a copy of this software and associated documentation files (the 281 | | ``Software''), to deal in the Software without restriction, including 282 | | without limitation the rights to use, copy, modify, merge, publish, 283 | | distribute, sublicense, and/or sell copies of the Software, and to 284 | | permit persons to whom the Software is furnished to do so, subject to 285 | | the following conditions: 286 | | 287 | | The above copyright notice and this permission notice shall be included 288 | | in all copies or substantial portions of the Software. 289 | | 290 | | THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 291 | | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 292 | | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 293 | | IN NO EVENT SHALL TONI RONKKO BE LIABLE FOR ANY CLAIM, DAMAGES OR 294 | | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 295 | | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 296 | | OTHER DEALINGS IN THE SOFTWARE. 297 | 298 | ---------------------------------------------------------------------- 299 | License for simplejson used in the python implementation: 300 | 301 | Source from: https://github.com/simplejson/simplejson 302 | 303 | Copyright (c) 2006 Bob Ippolito 304 | 305 | | Permission is hereby granted, free of charge, to any person obtaining a copy of 306 | | this software and associated documentation files (the "Software"), to deal in 307 | | the Software without restriction, including without limitation the rights to 308 | | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 309 | | of the Software, and to permit persons to whom the Software is furnished to do 310 | | so, subject to the following conditions: 311 | | 312 | | The above copyright notice and this permission notice shall be included in all 313 | | copies or substantial portions of the Software. 314 | | 315 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 316 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 317 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 318 | | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 319 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 320 | | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 321 | | SOFTWARE. 322 | 323 | ---------------------------------------------------------------------- 324 | License for ivy-2.2.0.jar used in the python implementation: 325 | 326 | Apache License version 2.0 (see above) 327 | 328 | ---------------------------------------------------------------------- 329 | License for pyAntTasks-1.3.jar used in the python implementation: 330 | 331 | Apache License version 2.0 (see above) 332 | 333 | ---------------------------------------------------------------------- 334 | License for NUnit binary included with the C# implementation: 335 | File: nunit.framework.dll 336 | 337 | | NUnit License 338 | | 339 | | Copyright © 2002-2015 Charlie Poole 340 | | Copyright © 2002-2004 James W. Newkirk, Michael C. Two, Alexei A. Vorontsov 341 | | Copyright © 2000-2002 Philip A. Craig 342 | | 343 | | This software is provided 'as-is', without any express or implied warranty. In 344 | | no event will the authors be held liable for any damages arising from the use 345 | | of this software. 346 | | 347 | | Permission is granted to anyone to use this software for any purpose, including 348 | | commercial applications, and to alter it and redistribute it freely, subject to 349 | | the following restrictions: 350 | | 351 | | The origin of this software must not be misrepresented; you must not claim that 352 | | you wrote the original software. If you use this software in a product, an 353 | | acknowledgment (see the following) in the product documentation is required. 354 | | 355 | | Portions Copyright © 2002-2012 Charlie Poole or Copyright © 2002-2004 James W. 356 | | Newkirk, Michael C. Two, Alexei A. Vorontsov or Copyright © 2000-2002 Philip A. 357 | | Craig 358 | | 359 | | Altered source versions must be plainly marked as such, and must not be 360 | | misrepresented as being the original software. 361 | | 362 | | This notice may not be removed or altered from any source distribution. 363 | | License Note 364 | | 365 | | This license is based on the open source zlib/libpng license. The idea was to 366 | | keep the license as simple as possible to encourage use of NUnit in free and 367 | | commercial applications and libraries, but to keep the source code together and 368 | | to give credit to the NUnit contributors for their efforts. While this license 369 | | allows shipping NUnit in source and binary form, if shipping a NUnit variant is 370 | | the sole purpose of your product, please let us know. 371 | 372 | ---------------------------------------------------------------------- 373 | License for the Json.NET binary included with the C# implementation: 374 | File: Newtonsoft.Json.dll 375 | 376 | Copyright (c) 2007 James Newton-King 377 | 378 | | Permission is hereby granted, free of charge, to any person obtaining 379 | | a copy of this software and associated documentation files (the 380 | | "Software"), to deal in the Software without restriction, including 381 | | without limitation the rights to use, copy, modify, merge, publish, 382 | | distribute, sublicense, and/or sell copies of the Software, and to 383 | | permit persons to whom the Software is furnished to do so, subject to 384 | | the following conditions: 385 | | 386 | | The above copyright notice and this permission notice shall be 387 | | included in all copies or substantial portions of the Software. 388 | | 389 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 390 | | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 391 | | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 392 | | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 393 | | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 394 | | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 395 | | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 396 | 397 | ---------------------------------------------------------------------- 398 | License for the Castle Core binary included with the C# implementation: 399 | File: Castle.Core.dll 400 | 401 | Copyright (c) 2004-2015 Castle Project 402 | 403 | License: Apache License version 2.0 (see above) 404 | URL: http://opensource.org/licenses/Apache-2.0 405 | 406 | ---------------------------------------------------------------------- 407 | License for the log4net binary included with the C# implementation: 408 | File: log4net.dll 409 | 410 | Copyright 2004-2015 The Apache Software Foundation. 411 | 412 | License: Apache License version 2.0 (see above) 413 | 414 | ---------------------------------------------------------------------- 415 | License for the m4 macros used by the C++ implementation: 416 | 417 | Files: 418 | * lang/c++/m4/m4_ax_boost_system.m4 419 | Copyright (c) 2008 Thomas Porschberg 420 | Copyright (c) 2008 Michael Tindal 421 | Copyright (c) 2008 Daniel Casimiro 422 | * lang/c++/m4/m4_ax_boost_asio.m4 423 | Copyright (c) 2008 Thomas Porschberg 424 | Copyright (c) 2008 Pete Greenwell 425 | * lang/c++/m4/m4_ax_boost_filesystem.m4 426 | Copyright (c) 2009 Thomas Porschberg 427 | Copyright (c) 2009 Michael Tindal 428 | Copyright (c) 2009 Roman Rybalko 429 | * lang/c++/m4/m4_ax_boost_thread.m4 430 | Copyright (c) 2009 Thomas Porschberg 431 | Copyright (c) 2009 Michael Tindal 432 | * lang/c++/m4/m4_ax_boost_regex.m4 433 | Copyright (c) 2008 Thomas Porschberg 434 | Copyright (c) 2008 Michael Tindal 435 | * lang/c++/m4/m4_ax_boost_base.m4 436 | Copyright (c) 2008 Thomas Porschberg 437 | 438 | License text: 439 | | Copying and distribution of this file, with or without modification, are 440 | | permitted in any medium without royalty provided the copyright notice 441 | | and this notice are preserved. This file is offered as-is, without any 442 | | warranty. 443 | 444 | ---------------------------------------------------------------------- 445 | License for the AVRO_BOOT_NO_TRAIT code in the C++ implementation: 446 | File: lang/c++/api/Boost.hh 447 | 448 | | Boost Software License - Version 1.0 - August 17th, 2003 449 | | 450 | | Permission is hereby granted, free of charge, to any person or organization 451 | | obtaining a copy of the software and accompanying documentation covered by 452 | | this license (the "Software") to use, reproduce, display, distribute, 453 | | execute, and transmit the Software, and to prepare derivative works of the 454 | | Software, and to permit third-parties to whom the Software is furnished to 455 | | do so, all subject to the following: 456 | | 457 | | The copyright notices in the Software and this entire statement, including 458 | | the above license grant, this restriction and the following disclaimer, 459 | | must be included in all copies of the Software, in whole or in part, and 460 | | all derivative works of the Software, unless such copies or derivative 461 | | works are solely in the form of machine-executable object code generated by 462 | | a source language processor. 463 | | 464 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 465 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 466 | | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 467 | | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 468 | | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 469 | | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 470 | | DEALINGS IN THE SOFTWARE. 471 | 472 | ---------------------------------------------------------------------- 473 | License for jquery.tipsy.js, tipsy.js, and tipsy.css used by the Java IPC implementation: 474 | 475 | Copyright (c) 2008 Jason Frame (jason@onehackoranother.com) 476 | 477 | | Permission is hereby granted, free of charge, to any person obtaining a copy 478 | | of this software and associated documentation files (the "Software"), to deal 479 | | in the Software without restriction, including without limitation the rights 480 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 481 | | copies of the Software, and to permit persons to whom the Software is 482 | | furnished to do so, subject to the following conditions: 483 | | 484 | | The above copyright notice and this permission notice shall be included in 485 | | all copies or substantial portions of the Software. 486 | | 487 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 488 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 489 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 490 | | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 491 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 492 | | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 493 | | THE SOFTWARE. 494 | 495 | ---------------------------------------------------------------------- 496 | License for protovis-r3.2.js used by the Java IPC implementation: 497 | 498 | Copyright (c) 2010, Stanford Visualization Group 499 | All rights reserved. 500 | 501 | | Redistribution and use in source and binary forms, with or without modification, 502 | | are permitted provided that the following conditions are met: 503 | | 504 | | * Redistributions of source code must retain the above copyright notice, 505 | | this list of conditions and the following disclaimer. 506 | | 507 | | * Redistributions in binary form must reproduce the above copyright notice, 508 | | this list of conditions and the following disclaimer in the documentation 509 | | and/or other materials provided with the distribution. 510 | | 511 | | * Neither the name of Stanford University nor the names of its contributors 512 | | may be used to endorse or promote products derived from this software 513 | | without specific prior written permission. 514 | | 515 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 516 | | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 517 | | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 518 | | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 519 | | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 520 | | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 521 | | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 522 | | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 523 | | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 524 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 525 | 526 | ---------------------------------------------------------------------- 527 | License for g.Raphael 0.4.1 used by the Java IPC implementation: 528 | 529 | Copyright (c) 2009 Dmitry Baranovskiy (http://g.raphaeljs.com) 530 | Licensed under the MIT (http://www.opensource.org/licenses/mit-license.php) license. 531 | 532 | ---------------------------------------------------------------------- 533 | License for jQuery v1.4.2 used by the Java IPC implementation: 534 | 535 | Copyright 2010, John Resig 536 | Dual licensed under the MIT or GPL Version 2 licenses. 537 | http://jquery.org/license 538 | 539 | jQuery includes Sizzle.js 540 | http://sizzlejs.com/ 541 | Copyright 2010, The Dojo Foundation 542 | Released under the MIT, BSD, and GPL Licenses. 543 | 544 | Both are included under the terms of the MIT license: 545 | 546 | | Permission is hereby granted, free of charge, to any person obtaining a copy 547 | | of this software and associated documentation files (the "Software"), to deal 548 | | in the Software without restriction, including without limitation the rights 549 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 550 | | copies of the Software, and to permit persons to whom the Software is 551 | | furnished to do so, subject to the following conditions: 552 | | 553 | | The above copyright notice and this permission notice shall be included in 554 | | all copies or substantial portions of the Software. 555 | | 556 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 557 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 558 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 559 | | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 560 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 561 | | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 562 | | THE SOFTWARE. 563 | 564 | ---------------------------------------------------------------------- 565 | License for portions of idl.jj in the Java compiler implementation: 566 | 567 | Portions of idl.jj were modeled after the example Java 1.5 568 | parser included with JavaCC. For those portions: 569 | 570 | Copyright (c) 2006, Sun Microsystems, Inc. 571 | All rights reserved. 572 | 573 | | Redistribution and use in source and binary forms, with or without 574 | | modification, are permitted provided that the following conditions are met: 575 | | 576 | | * Redistributions of source code must retain the above copyright notice, 577 | | this list of conditions and the following disclaimer. 578 | | * Redistributions in binary form must reproduce the above copyright 579 | | notice, this list of conditions and the following disclaimer in the 580 | | documentation and/or other materials provided with the distribution. 581 | | * Neither the name of the Sun Microsystems, Inc. nor the names of its 582 | | contributors may be used to endorse or promote products derived from 583 | | this software without specific prior written permission. 584 | | 585 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 586 | | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 587 | | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 588 | | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 589 | | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 590 | | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 591 | | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 592 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 593 | | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 594 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 595 | | THE POSSIBILITY OF SUCH DAMAGE. 596 | -------------------------------------------------------------------------------- /2019/avro-to-arrow/README.rst: -------------------------------------------------------------------------------- 1 | # Avro-to-Arrow 2 | 3 | This package provides a fast path for converting from Avro (as serialized by 4 | the BigQuery Storage API) to the Arrow Table in-memory format for fast 5 | analytics. 6 | 7 | This package is created for educational / experimental purposes. 8 | 9 | ## About the fork 10 | 11 | This project is a fork of the [Apache Avro™](https://github.com/apache/avro) project. 12 | 13 | Learn more about Avro, please visit their website at: 14 | 15 | http://avro.apache.org/ 16 | 17 | This package optimizes Avro parsing of the schemaless blocks provided by the 18 | BigQuery Storage API. Many of these optimatizations don't make sense in a 19 | general purpose parser package. 20 | 21 | ## Contributing 22 | 23 | This package is made for educational purposes. Direct your efforts towards the 24 | official Arrow and Avro packages, instead. 25 | 26 | ## License 27 | 28 | Apache Version 2.0 29 | 30 | -------------------------------------------------------------------------------- /2019/avro-to-arrow/avro_to_arrow/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Avro-to-Arrow Project 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /2019/avro-to-arrow/avro_to_arrow/decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Avro-to-Arrow Project 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Private module.""" 16 | 17 | import numba 18 | import numpy 19 | 20 | 21 | @numba.jit(nopython=True, nogil=True) 22 | def read_boolean(position, block): 23 | """Read a single byte whose value is either 0 (false) or 1 (true). 24 | 25 | Returns: 26 | Tuple[int, numba.uint8]: 27 | (new position, boolean) 28 | 29 | """ 30 | # We store bool as a bit array. Return 0xff so that we can bitwise AND with 31 | # the mask that says which bit to write to. 32 | value = numba.uint8(0xFF if block[position] != 0 else 0) 33 | return (position + 1, value) 34 | 35 | 36 | @numba.jit(nopython=True, nogil=True) 37 | def read_double(position, block): 38 | """A double is written as 8 bytes. 39 | 40 | Returns: 41 | Tuple[numba.int, numba.float64]: 42 | (new position, double precision floating point) 43 | """ 44 | # Temporarily use an integer data type for bit shifting purposes. Encoded 45 | # as little-endian IEEE 754 floating point. 46 | value = numpy.uint64(block[position]) 47 | value = numpy.uint64( 48 | value 49 | | (numpy.uint64(block[position + 1]) << 8) 50 | | (numpy.uint64(block[position + 2]) << 16) 51 | | (numpy.uint64(block[position + 3]) << 24) 52 | | (numpy.uint64(block[position + 4]) << 32) 53 | | (numpy.uint64(block[position + 5]) << 40) 54 | | (numpy.uint64(block[position + 6]) << 48) 55 | | (numpy.uint64(block[position + 7]) << 56) 56 | ) 57 | return (position + 8, value.view(numpy.float64)) 58 | 59 | 60 | # @numba.jit(nopython=True, nogil=True) 61 | def read_long(position, block): 62 | """Read an int64 using variable-length, zig-zag coding. 63 | 64 | Returns: 65 | Tuple[int, int]: 66 | (new position, long integer) 67 | """ 68 | b = block[position] 69 | n = b & 0x7F 70 | shift = 7 71 | 72 | while (b & 0x80) != 0: 73 | position += 1 74 | b = block[position] 75 | n |= (b & 0x7F) << shift 76 | shift += 7 77 | 78 | datum = numpy.int64((n >> 1) ^ -(n & 1)) 79 | return (position + 1, datum) 80 | -------------------------------------------------------------------------------- /2019/avro-to-arrow/avro_to_arrow/generator.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Avro-to-Arrow Project 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Private module.""" 16 | 17 | import copy 18 | 19 | import numpy 20 | import numba 21 | import pandas 22 | import pyarrow 23 | 24 | 25 | # Scalar types to support. 26 | # INT64 27 | # FLOAT64 28 | # BOOL 29 | # 30 | # Later: 31 | # NUMERIC (decimal) ??? how is this actually serialized. Let's wait. 32 | # DATE 33 | # TIME 34 | # TIMESTAMP 35 | # 36 | # Even later: 37 | # DATETIME - need to parse from string 38 | 39 | 40 | def generate_avro_to_arrow_parser(avro_schema): 41 | """Return a parser that takes a ReadRowsResponse message and returns a 42 | :class:`pyarrow.Table` object. 43 | 44 | Args: 45 | avro_schema (Map): 46 | Avro schema in JSON format. 47 | 48 | Returns: 49 | A function that takes a message and returns a table. 50 | """ 51 | gen_globals = copy.copy(globals()) 52 | gen_locals = copy.copy(locals()) 53 | 54 | gen_code = _generate_message_to_buffers(avro_schema) + """ 55 | def message_to_table(message): 56 | global message_to_buffers 57 | 58 | row_count = message.avro_rows.row_count 59 | block = message.avro_rows.serialized_binary_rows 60 | """ 61 | 62 | field_indexes = list(range(len(avro_schema["fields"]))) 63 | fields = ", ".join("field_{}".format(field_index) for field_index in field_indexes) 64 | gen_code += " {} = message_to_buffers(row_count, block)\n".format(fields) 65 | 66 | for field_index in field_indexes: 67 | gen_code += " field_{field_index}_nullmask, field_{field_index}_rows = field_{field_index}\n".format(field_index=field_index) 68 | gen_code += " field_{field_index}_array = pyarrow.Array.from_buffers({pyarrow_type}, row_count, [\n".format(field_index=field_index, pyarrow_type=_generate_pyarrow_type(avro_schema["fields"][field_index])) 69 | gen_code += " pyarrow.py_buffer(field_{field_index}_nullmask),\n".format(field_index=field_index) 70 | gen_code += " pyarrow.py_buffer(field_{field_index}_rows),\n".format(field_index=field_index) 71 | gen_code += " ])\n" 72 | 73 | field_arrays = ", ".join("field_{}_array".format(field_index) for field_index in field_indexes) 74 | field_names = ", ".join(repr(field["name"]) for field in avro_schema["fields"]) 75 | gen_code += " return pyarrow.Table.from_arrays([{}], names=[{}])\n".format(field_arrays, field_names) 76 | exec(gen_code, gen_globals, gen_locals) 77 | 78 | return gen_locals["message_to_table"] 79 | 80 | 81 | def _generate_pyarrow_type(avro_field): 82 | # TODO: Verify first is "null", since all fields should be nullable. 83 | avro_type = avro_field["type"][1] 84 | 85 | if avro_type == "long": 86 | return "pyarrow.int64()" 87 | elif avro_type == "double": 88 | return "pyarrow.float64()" 89 | elif avro_type == "boolean": 90 | return "pyarrow.bool_()" 91 | else: 92 | raise NotImplementedError("Got unexpected type: {}.".format()) 93 | 94 | 95 | def _generate_populate_data_array(field_index, avro_field): 96 | # TODO: Verify first is "null", since all fields should be nullable. 97 | avro_type = avro_field["type"][1] 98 | lines = [] 99 | 100 | if avro_type == "long": 101 | lines.append( 102 | " position, field_{}_data[i] = _read_long(position, block)".format(field_index) 103 | ) 104 | elif avro_type == "double": 105 | lines.append( 106 | " position, field_{}_data[i] = _read_double(position, block)".format(field_index) 107 | ) 108 | elif avro_type == "boolean": 109 | lines.append( 110 | " position, boolmask = _read_boolean(position, block)" 111 | ) 112 | lines.append( 113 | " field_{field_index}_data[nullbyte] = field_{field_index}_data[nullbyte] | (boolmask & nullbit)".format(field_index=field_index) 114 | ) 115 | else: 116 | raise NotImplementedError("Got unexpected type: {}.".format()) 117 | return "\n".join(lines) 118 | 119 | 120 | def _generate_data_array(field_index, avro_field): 121 | # TODO: Verify first is "null", since all fields should be nullable. 122 | avro_type = avro_field["type"][1] 123 | 124 | if avro_type == "long": 125 | constructor = "numpy.empty(row_count, dtype=numpy.int64)" 126 | elif avro_type == "double": 127 | constructor = "numpy.empty(row_count, dtype=numpy.float64)" 128 | elif avro_type == "boolean": 129 | constructor = "_make_bitarray(row_count)" 130 | else: 131 | raise NotImplementedError("Got unexpected type: {}.".format()) 132 | return " field_{}_data = {}".format(field_index, constructor) 133 | 134 | 135 | def _generate_message_to_buffers(avro_schema): 136 | gen_lines = [""" 137 | @numba.jit(nopython=True, nogil=True) 138 | def message_to_buffers(row_count, block): #, avro_schema): 139 | '''Parse all rows in a stream block. 140 | 141 | Args: 142 | block ( \ 143 | ~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse \ 144 | ): 145 | A block containing Avro bytes to parse into rows. 146 | avro_schema (fastavro.schema): 147 | A parsed Avro schema, used to deserialized the bytes in the 148 | block. 149 | 150 | Returns: 151 | Iterable[Mapping]: 152 | A sequence of rows, represented as dictionaries. 153 | ''' 154 | position = 0 155 | nullbit = numba.uint8(0) 156 | """] 157 | 158 | # Each column needs a nullmask and a data array. 159 | for field_index, field in enumerate(avro_schema["fields"]): 160 | gen_lines.append(" field_{}_nullmask = _make_bitarray(row_count)".format(field_index)) 161 | gen_lines.append(_generate_data_array(field_index, field)) 162 | 163 | gen_lines.append(""" 164 | for i in range(row_count): 165 | nullbit = _rotate_nullbit(nullbit) 166 | nullbyte = i // 8 167 | """) 168 | 169 | for field_index, field in enumerate(avro_schema["fields"]): 170 | gen_lines.append(""" 171 | position, union_type = _read_long(position, block) 172 | if union_type != 0: 173 | field_{field_index}_nullmask[nullbyte] = field_{field_index}_nullmask[nullbyte] | nullbit 174 | """.format(field_index=field_index)) 175 | gen_lines.append(_generate_populate_data_array(field_index, field)) 176 | 177 | gen_lines.append(""" 178 | return ( 179 | """) 180 | 181 | for field_index in range(len(avro_schema["fields"])): 182 | gen_lines.append( 183 | " (field_{field_index}_nullmask, field_{field_index}_data),".format(field_index=field_index) 184 | ) 185 | gen_lines.append(" )") 186 | return "\n".join(gen_lines) 187 | 188 | 189 | 190 | @numba.jit(nopython=True, nogil=True) 191 | def _copy_bytes(input_bytes, input_start, output_bytes, output_start, strlen): 192 | input_pos = input_start 193 | output_pos = output_start 194 | input_end = input_start + strlen 195 | while input_pos < input_end: 196 | output_bytes[output_pos] = input_bytes[input_pos] 197 | input_pos += 1 198 | output_pos += 1 199 | 200 | 201 | @numba.jit(nopython=True, nogil=True) 202 | def _read_boolean(position, block): 203 | """Read a single byte whose value is either 0 (false) or 1 (true). 204 | 205 | Returns: 206 | Tuple[int, numba.uint8]: 207 | (new position, boolean) 208 | 209 | """ 210 | # We store bool as a bit array. Return 0xff so that we can bitwise AND with 211 | # the mask that says which bit to write to. 212 | value = numba.uint8(0xff if block[position] != 0 else 0) 213 | return (position + 1, value) 214 | 215 | 216 | @numba.jit(nopython=True, nogil=True) 217 | def _read_bytes(position, block): 218 | position, strlen = _read_long(position, block) 219 | value = numpy.empty(strlen, dtype=numpy.uint8) 220 | for i in range(strlen): 221 | value[i] = block[position + i] 222 | return (position + strlen, value) 223 | 224 | 225 | @numba.jit(nopython=True, nogil=True) 226 | def _read_double(position, block): 227 | """A double is written as 8 bytes. 228 | 229 | Returns: 230 | Tuple[numba.int, numba.float64]: 231 | (new position, double precision floating point) 232 | """ 233 | # Temporarily use an integer data type for bit shifting purposes. Encoded 234 | # as little-endian IEEE 754 floating point. 235 | value = numpy.uint64(block[position]) 236 | value = (value 237 | | (numpy.uint64(block[position + 1]) << 8) 238 | | (numpy.uint64(block[position + 2]) << 16) 239 | | (numpy.uint64(block[position + 3]) << 24) 240 | | (numpy.uint64(block[position + 4]) << 32) 241 | | (numpy.uint64(block[position + 5]) << 40) 242 | | (numpy.uint64(block[position + 6]) << 48) 243 | | (numpy.uint64(block[position + 7]) << 56)) 244 | return (position + 8, numpy.uint64(value).view(numpy.float64)) 245 | 246 | 247 | @numba.jit(nopython=True, nogil=True) 248 | def _read_long(position, block): 249 | """Read an int64 using variable-length, zig-zag coding. 250 | 251 | Returns: 252 | Tuple[int, int]: 253 | (new position, long integer) 254 | """ 255 | b = block[position] 256 | n = b & 0x7F 257 | shift = 7 258 | 259 | while (b & 0x80) != 0: 260 | position += 1 261 | b = block[position] 262 | n |= (b & 0x7F) << shift 263 | shift += 7 264 | 265 | return (position + 1, (n >> 1) ^ -(n & 1)) 266 | 267 | 268 | @numba.jit(nopython=True, nogil=True) 269 | def _make_bitarray(row_count): #, avro_schema): 270 | extra_byte = 0 271 | if (row_count % 8) != 0: 272 | extra_byte = 1 273 | return numpy.zeros(row_count // 8 + extra_byte, dtype=numpy.uint8) 274 | 275 | 276 | @numba.jit(nopython=True, nogil=True) 277 | def _rotate_nullbit(nullbit): 278 | # TODO: Arrow assumes little endian. Detect big endian machines and modify 279 | # rotation direction. 280 | nullbit = (nullbit << 1) & 255 281 | 282 | # Have we looped? 283 | if nullbit == 0: 284 | return numba.uint8(1) 285 | 286 | return nullbit 287 | -------------------------------------------------------------------------------- /2019/avro-to-arrow/example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Avro-to-Arrow Project 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Example of code that is generated by avro-to-arrow package.""" 16 | 17 | 18 | @numba.jit(nopython=True, nogil=True) 19 | def message_to_buffers(row_count, block): #, avro_schema): 20 | '''Parse all rows in a stream block. 21 | 22 | Args: 23 | block ( ~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse ): 24 | A block containing Avro bytes to parse into rows. 25 | avro_schema (fastavro.schema): 26 | A parsed Avro schema, used to deserialized the bytes in the 27 | block. 28 | 29 | Returns: 30 | Iterable[Mapping]: 31 | A sequence of rows, represented as dictionaries. 32 | ''' 33 | position = 0 34 | nullbit = numba.uint8(0) 35 | 36 | field_0_nullmask = _make_bitarray(row_count) 37 | field_0_data = numpy.empty(row_count, dtype=numpy.int64) 38 | field_1_nullmask = _make_bitarray(row_count) 39 | field_1_data = numpy.empty(row_count, dtype=numpy.float64) 40 | field_2_nullmask = _make_bitarray(row_count) 41 | field_2_data = _make_bitarray(row_count) 42 | 43 | for i in range(row_count): 44 | nullbit = _rotate_nullbit(nullbit) 45 | nullbyte = i // 8 46 | 47 | position, union_type = _read_long(position, block) 48 | if union_type != 0: 49 | field_0_nullmask[nullbyte] = field_0_nullmask[nullbyte] | nullbit 50 | position, field_0_data[i] = _read_long(position, block) 51 | 52 | position, union_type = _read_long(position, block) 53 | if union_type != 0: 54 | field_1_nullmask[nullbyte] = field_1_nullmask[nullbyte] | nullbit 55 | position, field_1_data[i] = _read_double(position, block) 56 | 57 | position, union_type = _read_long(position, block) 58 | if union_type != 0: 59 | field_2_nullmask[nullbyte] = field_2_nullmask[nullbyte] | nullbit 60 | position, boolmask = _read_boolean(position, block) 61 | field_2_data[nullbyte] = field_2_data[nullbyte] | (boolmask & nullbit) 62 | 63 | return ( 64 | (field_0_nullmask, field_0_data), 65 | (field_1_nullmask, field_1_data), 66 | (field_2_nullmask, field_2_data), 67 | ) 68 | 69 | 70 | def message_to_table(message): 71 | global message_to_buffers 72 | 73 | row_count = message.avro_rows.row_count 74 | block = message.avro_rows.serialized_binary_rows 75 | field_0, field_1, field_2 = message_to_buffers(row_count, block) 76 | field_0_nullmask, field_0_rows = field_0 77 | field_0_array = pyarrow.Array.from_buffers(pyarrow.int64(), row_count, [ 78 | pyarrow.py_buffer(field_0_nullmask), 79 | pyarrow.py_buffer(field_0_rows), 80 | ]) 81 | field_1_nullmask, field_1_rows = field_1 82 | field_1_array = pyarrow.Array.from_buffers(pyarrow.float64(), row_count, [ 83 | pyarrow.py_buffer(field_1_nullmask), 84 | pyarrow.py_buffer(field_1_rows), 85 | ]) 86 | field_2_nullmask, field_2_rows = field_2 87 | field_2_array = pyarrow.Array.from_buffers(pyarrow.bool_(), row_count, [ 88 | pyarrow.py_buffer(field_2_nullmask), 89 | pyarrow.py_buffer(field_2_rows), 90 | ]) 91 | return pyarrow.Table.from_arrays([field_0_array, field_1_array, field_2_array], names=['int_col', 'float_col', 'bool_col']) 92 | -------------------------------------------------------------------------------- /2019/avro-to-arrow/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2019 The Avro-to-Arrow Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import io 18 | import os 19 | 20 | import setuptools 21 | 22 | name = 'avro-to-arrow' 23 | description = 'Fast parser from Avro bytes (as provided by BigQuery Storage API) to in-memory Arrow table.' 24 | version = '0.1.0' 25 | release_status = 'Development Status :: 3 - Alpha' 26 | dependencies = [ 27 | 'numba', 28 | 'numpy', 29 | ] 30 | 31 | package_root = os.path.abspath(os.path.dirname(__file__)) 32 | 33 | readme_filename = os.path.join(package_root, 'README.rst') 34 | with io.open(readme_filename, encoding='utf-8') as readme_file: 35 | readme = readme_file.read() 36 | 37 | packages = setuptools.find_packages() 38 | 39 | setuptools.setup( 40 | name=name, 41 | version=version, 42 | description=description, 43 | long_description=readme, 44 | author='The Avro-to-Arrow Project', 45 | author_email='googleapis-packages@google.com', 46 | license='Apache 2.0', 47 | url='https://github.com/googleapis/python-avro-to-arrow', 48 | classifiers=[ 49 | release_status, 50 | 'Intended Audience :: Developers', 51 | 'License :: OSI Approved :: Apache Software License', 52 | 'Programming Language :: Python', 53 | 'Programming Language :: Python :: 2', 54 | 'Programming Language :: Python :: 2.7', 55 | 'Programming Language :: Python :: 3', 56 | 'Programming Language :: Python :: 3.5', 57 | 'Programming Language :: Python :: 3.6', 58 | 'Programming Language :: Python :: 3.7', 59 | 'Operating System :: OS Independent', 60 | 'Topic :: Internet', 61 | ], 62 | platforms='Posix; MacOS X; Windows', 63 | packages=packages, 64 | install_requires=dependencies, 65 | python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*', 66 | include_package_data=True, 67 | zip_safe=False, 68 | ) 69 | -------------------------------------------------------------------------------- /2019/avro-to-arrow/tests/test_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Avro-to-Arrow Project 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import struct 16 | 17 | import pytest 18 | 19 | 20 | @pytest.fixture 21 | def module_under_test(): 22 | from avro_to_arrow import decoder 23 | 24 | return decoder 25 | 26 | 27 | @pytest.mark.parametrize( 28 | "block,position,expected", 29 | [ 30 | (b"\x00", 0, 0x00), 31 | (b"\x01", 0, 0xFF), 32 | (b"\xff", 0, 0xFF), 33 | (b"\x00\x01\x00", 1, 0xFF), 34 | (b"\xff\x00\xff", 1, 0x00), 35 | ], 36 | ) 37 | def test_read_boolean(module_under_test, block, position, expected): 38 | actual_position, actual_bitmask = module_under_test.read_boolean(position, block) 39 | assert actual_position == position + 1 40 | assert actual_bitmask == expected 41 | 42 | 43 | @pytest.mark.parametrize( 44 | "expected", 45 | [ 46 | 0.0, 47 | 1.0, 48 | (2 ** 1023 * (1 + (1.0 - (2 ** -52)))), # maximum double 49 | float("inf"), 50 | float("-inf"), 51 | ], 52 | ) 53 | def test_read_double(module_under_test, expected): 54 | block = struct.pack("\n", 56 | "\n", 69 | "\n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | "
xhex
010000000000000001
1123456000000000001e240
29876543210000000024cb016ea
3-1001fffffffffffffc17
\n", 100 | "" 101 | ], 102 | "text/plain": [ 103 | " x hex\n", 104 | "0 1 0000000000000001\n", 105 | "1 123456 000000000001e240\n", 106 | "2 9876543210 000000024cb016ea\n", 107 | "3 -1001 fffffffffffffc17" 108 | ] 109 | }, 110 | "execution_count": 4, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "%%bigquery\n", 117 | "SELECT\n", 118 | " x,\n", 119 | " bqutil.fn.to_hex(x) AS hex\n", 120 | "FROM\n", 121 | " UNNEST([1, 123456, 9876543210, -1001]) AS x;" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "Try a function that uses a JS library hosted in GCS. See: https://github.com/GoogleCloudPlatform/bigquery-utils/pull/434 and https://github.com/GoogleCloudPlatform/bigquery-utils/blob/master/udfs/community/README.md#xml_to_jsonxml-string\n" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 3, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "application/vnd.jupyter.widget-view+json": { 139 | "model_id": "9a7df3ece0584d909681ec621cdbbfd6", 140 | "version_major": 2, 141 | "version_minor": 0 142 | }, 143 | "text/plain": [ 144 | "Query is running: 0%| |" 145 | ] 146 | }, 147 | "metadata": {}, 148 | "output_type": "display_data" 149 | }, 150 | { 151 | "data": { 152 | "application/vnd.jupyter.widget-view+json": { 153 | "model_id": "38ff038524e84cc39d7c6adf5ae33807", 154 | "version_major": 2, 155 | "version_minor": 0 156 | }, 157 | "text/plain": [ 158 | "Downloading: 0%| |" 159 | ] 160 | }, 161 | "metadata": {}, 162 | "output_type": "display_data" 163 | }, 164 | { 165 | "data": { 166 | "text/html": [ 167 | "
\n", 168 | "\n", 181 | "\n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | "
output_json
0{\"xml\":{\"_attributes\":{\"foo\":\"FOO\"},\"bar\":{\"ba...
\n", 195 | "
" 196 | ], 197 | "text/plain": [ 198 | " output_json\n", 199 | "0 {\"xml\":{\"_attributes\":{\"foo\":\"FOO\"},\"bar\":{\"ba..." 200 | ] 201 | }, 202 | "execution_count": 3, 203 | "metadata": {}, 204 | "output_type": "execute_result" 205 | } 206 | ], 207 | "source": [ 208 | "%%bigquery\n", 209 | "SELECT bqutil.fn.xml_to_json(\n", 210 | " 'BAZ'\n", 211 | ") AS output_json" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [] 220 | } 221 | ], 222 | "metadata": { 223 | "kernelspec": { 224 | "display_name": "scratch", 225 | "language": "python", 226 | "name": "python3" 227 | }, 228 | "language_info": { 229 | "codemirror_mode": { 230 | "name": "ipython", 231 | "version": 3 232 | }, 233 | "file_extension": ".py", 234 | "mimetype": "text/x-python", 235 | "name": "python", 236 | "nbconvert_exporter": "python", 237 | "pygments_lexer": "ipython3", 238 | "version": "3.12.6" 239 | } 240 | }, 241 | "nbformat": 4, 242 | "nbformat_minor": 2 243 | } 244 | -------------------------------------------------------------------------------- /2024/12-pydata-global/img/iowa-categories.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tswast/code-snippets/bf46bc3e489ca3f6793327c93ebb3f677a3087d0/2024/12-pydata-global/img/iowa-categories.png -------------------------------------------------------------------------------- /2024/12-pydata-global/img/iowa-counties-map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tswast/code-snippets/bf46bc3e489ca3f6793327c93ebb3f677a3087d0/2024/12-pydata-global/img/iowa-counties-map.png -------------------------------------------------------------------------------- /2024/12-pydata-global/img/iowa-pop-volume-line.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tswast/code-snippets/bf46bc3e489ca3f6793327c93ebb3f677a3087d0/2024/12-pydata-global/img/iowa-pop-volume-line.png -------------------------------------------------------------------------------- /2024/12-pydata-global/img/iowa-pop-volume-scatter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tswast/code-snippets/bf46bc3e489ca3f6793327c93ebb3f677a3087d0/2024/12-pydata-global/img/iowa-pop-volume-scatter.png -------------------------------------------------------------------------------- /2024/12-pydata-global/img/iowa-volume-per-pop-lines.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tswast/code-snippets/bf46bc3e489ca3f6793327c93ebb3f677a3087d0/2024/12-pydata-global/img/iowa-volume-per-pop-lines.png -------------------------------------------------------------------------------- /2024/12-pydata-global/img/iowa-volume-per-pop-scatter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tswast/code-snippets/bf46bc3e489ca3f6793327c93ebb3f677a3087d0/2024/12-pydata-global/img/iowa-volume-per-pop-scatter.png -------------------------------------------------------------------------------- /2024/12-pydata-global/img/iowa-volumes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tswast/code-snippets/bf46bc3e489ca3f6793327c93ebb3f677a3087d0/2024/12-pydata-global/img/iowa-volumes.png -------------------------------------------------------------------------------- /2024/12-pydata-global/img/iowa-words.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tswast/code-snippets/bf46bc3e489ca3f6793327c93ebb3f677a3087d0/2024/12-pydata-global/img/iowa-words.png -------------------------------------------------------------------------------- /2024/12-pydata-global/img/iowa-zips-dirty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tswast/code-snippets/bf46bc3e489ca3f6793327c93ebb3f677a3087d0/2024/12-pydata-global/img/iowa-zips-dirty.png -------------------------------------------------------------------------------- /2024/12-pydata-global/img/iowa-zips.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tswast/code-snippets/bf46bc3e489ca3f6793327c93ebb3f677a3087d0/2024/12-pydata-global/img/iowa-zips.png -------------------------------------------------------------------------------- /2024/12-pydata-global/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: bigquery-dataframes-iowa-liquor-sales 3 | summary: In this lab, you will use BigQuery DataFrames from a Python notebook in BigQuery Studio to clean and analyze the Iowa liquor sales public dataset. 4 | status: [draft] 5 | authors: Tim Swena 6 | categories: bigquery,python,ml 7 | tags: web 8 | duration: 0 9 | 10 | --- 11 | 12 | # Exploratory data analysis of Iowa liquor sales using the BigQuery DataFrames package 13 | 14 | 15 | ## Overview 16 | 17 | 18 | In this lab, you will use BigQuery DataFrames from a Python notebook in BigQuery Studio to clean and analyze the Iowa liquor sales public dataset. 19 | Make use of BigQuery ML and remote function capabilities to discover insights. 20 | 21 | You will create a Python notebook to compare sales across geographic areas. This can be adapted to work on any structured data. 22 | 23 | ### Objectives 24 | 25 | In this lab, you learn how to perform the following tasks: 26 | 27 | * Activate and use Python notebooks in BigQuery Studio 28 | * Connect to BigQuery using the BigQuery DataFrames package 29 | * Create a linear regression using BigQuery ML 30 | * Perform complex aggregations and joins using a familiar pandas-like syntax 31 | 32 | 33 | ## Requirements 34 | 35 | * A browser, such as [Chrome](https://www.google.com/chrome/browser/desktop/) or [Firefox](https://www.mozilla.org/firefox/) 36 | * A Google Cloud project with billing enabled 37 | 38 | 39 | ### Before you begin 40 | 41 | To follow the instructions in this codelab, you'll need a Google Cloud Project with BigQuery Studio enabled and a connected billing account. 42 | 43 | 1. In the [Google Cloud Console](https://console.cloud.google.com/), on the project selector page, select or create a Google Cloud [project](https://cloud.google.com/resource-manager/docs/creating-managing-projects) 44 | 2. Ensure that billing is enabled for your Google Cloud project. Learn how to [check if billing is enabled on a project](https://cloud.google.com/billing/docs/how-to/verify-billing-enabled) 45 | 3. Follow the instructions to [Enable BigQuery Studio for asset management](https://cloud.google.com/bigquery/docs/enable-assets). 46 | 47 | ### Prepare BigQuery Studio 48 | 49 | Create an empty notebook and connect it to a runtime. 50 | 51 | 1. Go to [BigQuery Studio](https://console.cloud.google.com/bigquery) in the Google Cloud Console. 52 | 2. Click the **▼** next to the **+** button. 53 | 3. Select **Python notebook**. 54 | 4. Close the template selector. 55 | 5. Select **+ Code** to create a new code cell. 56 | 6. Install the latest version of the BigQuery DataFrames package from the code cell. 57 | 58 | Type the following command. 59 | 60 | ``` 61 | %pip install --upgrade bigframes --quiet 62 | ``` 63 | 64 | Click the **Run cell** button or press *Shift + Enter* to run the code cell. 65 | 66 | 67 | ## Read a public dataset 68 | 69 | Initialize the BigQuery DataFrames package by running the following in a new code cell: 70 | 71 | ```python 72 | import bigframes.pandas as bpd 73 | 74 | bpd.options.bigquery.ordering_mode = "partial" 75 | bpd.options.display.repr_mode = "deferred" 76 | ``` 77 | 78 | Note: in this tutorial, we use the experimental "partial ordering mode", which 79 | allows for more efficient queries when used with pandas-like filtering. Some 80 | pandas features that require a strict ordering or index may not work. 81 | 82 | Check your `bigframes` package version with 83 | 84 | ``` 85 | bpd.__version__ 86 | ``` 87 | 88 | This tutorial requires version 1.27.0 or later. 89 | 90 | ### Iowa liquor retail sales 91 | 92 | The [Iowa liquor retail sales dataset](https://console.cloud.google.com/marketplace/product/iowa-department-of-commerce/iowa-liquor-sales) 93 | is provided on BigQuery through [Google Cloud's public dataset program](https://cloud.google.com/datasets). 94 | This dataset contains every wholesale purchase of liquor in the State of Iowa by retailers for sale to individuals since January 1, 2012. 95 | Data are collected by the Alcoholic Beverages Division within the Iowa Department of Commerce. 96 | 97 | 98 | In BigQuery, query the 99 | [bigquery-public-data.iowa_liquor_sales.sales](https://console.cloud.google.com/bigquery?p=bigquery-public-data&d=iowa_liquor_sales&t=sales&page=table) 100 | to analyze the Iowa liquor retail sales. Use the `bigframes.pandas.read_gbq()` 101 | method to create a DataFrame from a query string or table ID. 102 | 103 | Run the following in a new code cell to create a DataFrame named "df": 104 | 105 | ``` 106 | df = bpd.read_gbq_table("bigquery-public-data.iowa_liquor_sales.sales") 107 | ``` 108 | 109 | 110 | ### Discover basic information about a DataFrame 111 | 112 | Use the `DataFrame.peek()` method to download a small sample of the data. 113 | 114 | **Run this cell:** 115 | 116 | ``` 117 | df.peek() 118 | ``` 119 | 120 | **Expected output:** 121 | 122 | ``` 123 | index invoice_and_item_number date store_number store_name ... 124 | 0 RINV-04620300080 2023-04-28 10197 SUNSHINE FOODS / HAWARDEN 125 | 1 RINV-04864800097 2023-09-25 2621 HY-VEE FOOD STORE #3 / SIOUX CITY 126 | 2 RINV-05057200028 2023-12-28 4255 FAREWAY STORES #058 / ORANGE CITY 127 | 3 ... 128 | ``` 129 | 130 | Note: `head()` requires ordering and is generally less efficient than `peek()` 131 | if you want to visualize a sample of data. 132 | 133 | Just as with pandas, use the `DataFrame.dtypes` property to see all available 134 | columns and their corresponding data types. These are exposed in a 135 | pandas-compatible way. 136 | 137 | **Run this cell:** 138 | 139 | ``` 140 | df.dtypes 141 | ``` 142 | 143 | **Expected output:** 144 | 145 | ``` 146 | invoice_and_item_number string[pyarrow] 147 | date date32[day][pyarrow] 148 | store_number string[pyarrow] 149 | store_name string[pyarrow] 150 | address string[pyarrow] 151 | city string[pyarrow] 152 | zip_code string[pyarrow] 153 | store_location geometry 154 | county_number string[pyarrow] 155 | county string[pyarrow] 156 | category string[pyarrow] 157 | category_name string[pyarrow] 158 | vendor_number string[pyarrow] 159 | vendor_name string[pyarrow] 160 | item_number string[pyarrow] 161 | item_description string[pyarrow] 162 | pack Int64 163 | bottle_volume_ml Int64 164 | state_bottle_cost Float64 165 | state_bottle_retail Float64 166 | bottles_sold Int64 167 | sale_dollars Float64 168 | volume_sold_liters Float64 169 | volume_sold_gallons Float64 170 | 171 | dtype: object 172 | ``` 173 | 174 | The `DataFrame.describe()` method queries some basic statistics from the DataFrame. 175 | Run `DataFrame.to_pandas()` to download these summary statistics as a pandas DataFrame. 176 | 177 | **Run this cell:** 178 | 179 | ``` 180 | df.describe("all").to_pandas() 181 | ``` 182 | 183 | **Expected output:** 184 | 185 | ``` 186 | invoice_and_item_number date store_number store_name ... 187 | nunique 30305765 3158 3353 ... 188 | std ... 189 | mean ... 190 | 75% ... 191 | 25% ... 192 | count 30305765 30305765 30305765 ... 193 | min ... 194 | 50% ... 195 | max ... 196 | 9 rows × 24 columns 197 | ``` 198 | 199 | ## Visualize and clean the data 200 | 201 | The Iowa liquor retail sales dataset provides fine-grained geographic information, 202 | including where the retail stores are located. Use these data to identify trends 203 | and differences across geographic areas. 204 | 205 | ### Visualize sales per zip code 206 | 207 | There are several built-in visualization methods such as [DataFrame.plot.hist()](https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.operations.plotting.PlotAccessor#bigframes_operations_plotting_PlotAccessor_hist). 208 | Use this method to compare liquor sales by ZIP code. 209 | 210 | ``` 211 | volume_by_zip = df.groupby("zip_code").agg({"volume_sold_liters": "sum"}) 212 | volume_by_zip.plot.hist(bins=20) 213 | ``` 214 | 215 | **Expected output:** 216 | 217 | ![Histogram of volumes](img/iowa-volumes.png) 218 | 219 | Use a bar chart to see which zip colds sold the most alcohol. 220 | 221 | ``` 222 | ( 223 | volume_by_zip 224 | .sort_values("volume_sold_liters", ascending=False) 225 | .head(25) 226 | .to_pandas() 227 | .plot.bar(rot=80) 228 | ) 229 | ``` 230 | 231 | **Expected output:** 232 | 233 | ![Bar chart of volumes of alcohol in the top selling zip codes](img/iowa-zips-dirty.png) 234 | 235 | ### Clean the data 236 | 237 | Some ZIP codes have a trailing `.0`. Possibly somewhere in the data collection 238 | the ZIP codes were accidentally converted into floating point values. Use 239 | regular expressions to clean up the ZIP codes and repeat the analysis. 240 | 241 | ``` 242 | df = ( 243 | bpd.read_gbq_table("bigquery-public-data.iowa_liquor_sales.sales") 244 | .assign( 245 | zip_code=lambda _: _["zip_code"].str.replace(".0", "") 246 | ) 247 | ) 248 | volume_by_zip = df.groupby("zip_code").agg({"volume_sold_liters": "sum"}) 249 | ( 250 | volume_by_zip 251 | .sort_values("volume_sold_liters", ascending=False) 252 | .head(25) 253 | .to_pandas() 254 | .plot.bar(rot=80) 255 | ) 256 | ``` 257 | 258 | **Expected output:** 259 | 260 | ![Bar chart of volumes of alcohol in the top selling zip codes](img/iowa-zips.png) 261 | 262 | ## Discover correlations in sales 263 | 264 | Why do some zip codes sell more than others? One hypothesis is that it's due to 265 | population size differences. A zip code with more population will likely sell 266 | more liquor. 267 | 268 | Test this hypothesis by calculating the correlation between population and liquor 269 | sales volume. 270 | 271 | ### Join with other datasets 272 | 273 | Join with a population dataset 274 | such as the [US Census Bureau's American Community Survey](https://console.cloud.google.com/marketplace/product/united-states-census-bureau/acs) ZIP code tabulation area survey. 275 | 276 | ``` 277 | census_acs = bpd.read_gbq_table("bigquery-public-data.census_bureau_acs.zcta_2020_5yr") 278 | ``` 279 | 280 | The American Community Survey identifies states by GEOID. In the case of ZIP code 281 | tabulation areas, the GEOID equals the ZIP code. 282 | 283 | ``` 284 | volume_by_pop = volume_by_zip.join( 285 | census_acs.set_index("geo_id") 286 | ) 287 | ``` 288 | 289 | Create a scatter plot to compare ZIP code tabulation area populations with 290 | liters of alcohol sold. 291 | 292 | ``` 293 | ( 294 | volume_by_pop[["volume_sold_liters", "total_pop"]] 295 | .to_pandas() 296 | .plot.scatter(x="total_pop", y="volume_sold_liters") 297 | ) 298 | ``` 299 | 300 | **Expected output:** 301 | 302 | ![Scatter plot of zip code tabulation areas by the population and liters of liquor sold](img/iowa-pop-volume-scatter.png) 303 | 304 | ### Calculate correlations 305 | 306 | The trend looks roughly linear. Fit a linear regression model to this to check 307 | how well population can predict liquor sales. 308 | 309 | ``` 310 | from bigframes.ml.linear_model import LinearRegression 311 | 312 | feature_columns = volume_by_pop[["total_pop"]] 313 | label_columns = volume_by_pop[["volume_sold_liters"]] 314 | 315 | # Create the linear model 316 | model = LinearRegression() 317 | model.fit(feature_columns, label_columns) 318 | ``` 319 | 320 | Check how good the fit is by using the `score` method. 321 | 322 | ``` 323 | model.score(feature_columns, label_columns).to_pandas() 324 | ``` 325 | 326 | **Sample output:** 327 | 328 | ``` 329 | mean_absolute_error mean_squared_error mean_squared_log_error median_absolute_error r2_score explained_variance 330 | 0 245065.664095 224398167097.364288 5.595021 178196.31289 0.380096 0.380096 331 | ``` 332 | 333 | Draw the best fit line but calling the `predict` function on a range of population 334 | values. 335 | 336 | ``` 337 | import matplotlib.pyplot as pyplot 338 | import numpy as np 339 | import pandas as pd 340 | 341 | line = pd.Series(np.arange(0, 50_000), name="total_pop") 342 | predictions = model.predict(line).to_pandas() 343 | 344 | zips = volume_by_pop[["volume_sold_liters", "total_pop"]].to_pandas() 345 | pyplot.scatter(zips["total_pop"], zips["volume_sold_liters"]) 346 | pyplot.plot( 347 | line, 348 | predictions.sort_values("total_pop")["predicted_volume_sold_liters"], 349 | marker=None, 350 | color="red", 351 | ) 352 | ``` 353 | 354 | **Expected output:** 355 | 356 | ![Scatter plot with a best fit line](img/iowa-pop-volume-line.png) 357 | 358 | 359 | ### Addressing heteroscedasticity 360 | 361 | The data in the previous chart appears to be heteroscedastic. The variance around 362 | the best fit line grows with the population. 363 | 364 | Perhaps the amount of alcohol purchased per person is relatively constant. 365 | 366 | ``` 367 | volume_per_pop = ( 368 | volume_by_pop[volume_by_pop['total_pop'] > 0] 369 | .assign(liters_per_pop=lambda df: df["volume_sold_liters"] / df["total_pop"]) 370 | ) 371 | 372 | ( 373 | volume_per_pop[["liters_per_pop", "total_pop"]] 374 | .to_pandas() 375 | .plot.scatter(x="total_pop", y="liters_per_pop") 376 | ) 377 | ``` 378 | 379 | **Expected output:** 380 | 381 | ![Scatter plot of liters per population](img/iowa-volume-per-pop-scatter.png) 382 | 383 | Calculate the average liters of alcohol purchased in two different ways: 384 | 385 | 1. What is the average amount of alcohol purchased per person in Iowa? 386 | 2. What is the average over all zip codes of the amount of alcohol purchased per person. 387 | 388 | In (1), it reflects how much alcohol is purchased in the whole state. In (2), 389 | it reflects the average zip code, which won't necessarily be the same as (1) 390 | because different zip codes have different populations. 391 | 392 | ``` 393 | df = ( 394 | bpd.read_gbq_table("bigquery-public-data.iowa_liquor_sales.sales") 395 | .assign( 396 | zip_code=lambda _: _["zip_code"].str.replace(".0", "") 397 | ) 398 | ) 399 | census_state = bpd.read_gbq( 400 | "bigquery-public-data.census_bureau_acs.state_2020_5yr", 401 | index_col="geo_id", 402 | ) 403 | 404 | volume_per_pop_statewide = ( 405 | df['volume_sold_liters'].sum() 406 | / census_state["total_pop"].loc['19'] 407 | ) 408 | volume_per_pop_statewide 409 | ``` 410 | 411 | **Expected output:** `87.997` 412 | 413 | ``` 414 | average_per_zip = volume_per_pop["liters_per_pop"].mean() 415 | average_per_zip 416 | ``` 417 | 418 | **Expected output:** `67.139` 419 | 420 | Plot these averages, similar to above. 421 | 422 | ``` 423 | import numpy as np 424 | import pandas as pd 425 | from matplotlib import pyplot 426 | 427 | line = pd.Series(np.arange(0, 50_000), name="total_pop") 428 | 429 | zips = volume_per_pop[["liters_per_pop", "total_pop"]].to_pandas() 430 | pyplot.scatter(zips["total_pop"], zips["liters_per_pop"]) 431 | pyplot.plot(line, np.full(line.shape, volume_per_pop_statewide), marker=None, color="magenta") 432 | pyplot.plot(line, np.full(line.shape, average_per_zip), marker=None, color="red") 433 | ``` 434 | 435 | **Expected output:** 436 | 437 | ![Scatter plot of liters per population](img/iowa-volume-per-pop-lines.png) 438 | 439 | There are still some zip codes that are quite large outliers, especially in areas 440 | with less population. It is left as an exercise to hypothesize why this is. For 441 | example, it could be that some zip codes are low population but high consumption 442 | because they contain the only liquor store in the area. If so, calculating based 443 | the population of surrounding zip codes may even these outliers out. 444 | 445 | 446 | ## Comparing types of liquor sold 447 | 448 | In addition to geographic data, the Iowa liquor retail sales database also 449 | contains detailed information about the item sold. Perhaps by analyzing these, 450 | we can reveal differences in tastes across geographic areas. 451 | 452 | ### Explore categories 453 | 454 | Items are categorized in the database. How many categories are there? 455 | 456 | ``` 457 | import bigframes.pandas as bpd 458 | 459 | bpd.options.bigquery.ordering_mode = "partial" 460 | bpd.options.display.repr_mode = "deferred" 461 | 462 | df = bpd.read_gbq_table("bigquery-public-data.iowa_liquor_sales.sales") 463 | df.category_name.nunique() 464 | ``` 465 | 466 | **Expected output:** `103` 467 | 468 | Which are the most popular categories by volume? 469 | 470 | ``` 471 | counts = ( 472 | df.groupby("category_name") 473 | .agg({"volume_sold_liters": "sum"}) 474 | .sort_values(["volume_sold_liters"], ascending=False) 475 | .to_pandas() 476 | ) 477 | counts.head(25).plot.bar(rot=80) 478 | ``` 479 | 480 | ![Bar chart of top categories of liquor sold](img/iowa-categories.png) 481 | 482 | ### Working with the ARRAY data type 483 | 484 | There are several categories each of whiskey, rum, vodka, and more. I'd like to 485 | group these together somehow. 486 | 487 | Start by splitting the category names into separate words by using the 488 | [Series.str.split()](https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.operations.strings.StringMethods#bigframes_operations_strings_StringMethods_split) method. 489 | Unnest the array this creates by using the `explode()` method. 490 | 491 | ``` 492 | category_parts = df.category_name.str.split(" ").explode() 493 | counts = ( 494 | category_parts 495 | .groupby(category_parts) 496 | .size() 497 | .sort_values(ascending=False) 498 | .to_pandas() 499 | ) 500 | counts.head(25).plot.bar(rot=80) 501 | ``` 502 | 503 | ![Words by count from categories](img/iowa-words.png) 504 | 505 | ``` 506 | category_parts.nunique() 507 | ``` 508 | 509 | **Expected output:** `113` 510 | 511 | Looking at the chart above, the data still have VODKA separate from VODKAS. More 512 | grouping is needed to collapse categories into a smaller set. 513 | 514 | ## Using NLTK with BigQuery DataFrames 515 | 516 | With only about 100 categories, it would be feasible to write some heuristics or 517 | even manually create a mapping from category to the wider liquor type. Alternatively, 518 | one could use a large language model such as Gemini to create such a mapping. 519 | Try the codelab [Get insights from unstructured data using BigQuery DataFrames](/bigquery-dataframes-clustering-unstructured-data) to use BigQuery DataFrames with Gemini. 520 | 521 | Instead, use a more traditional natural language processing package, NLTK, to 522 | process these data. Technology called a "stemmer" can merge plural and singular 523 | nouns into the same value, for example. 524 | 525 | 526 | ### Using NLTK to stem words 527 | 528 | The [NLTK package](https://www.nltk.org/) provides natural language processing 529 | methods that are accessible from Python. Install the package to try it out. 530 | 531 | ``` 532 | %pip install nltk 533 | ``` 534 | 535 | Next, import the package. Inspect the version. It will be used later on in the 536 | tutorial. 537 | 538 | ``` 539 | import nltk 540 | 541 | nltk.__version__ 542 | ``` 543 | 544 | One way of standardizing words to "stem" the word. This removes any suffixes, 545 | as a trailing "s" for plurals. 546 | 547 | ``` 548 | def stem(word: str) -> str: 549 | # https://www.nltk.org/howto/stem.html 550 | import nltk.stem.snowball 551 | 552 | # Avoid failure if a NULL is passed in. 553 | if not word: 554 | return word 555 | 556 | stemmer = nltk.stem.snowball.SnowballStemmer("english") 557 | return stemmer.stem(word) 558 | ``` 559 | 560 | Try this out on a few words. 561 | 562 | ``` 563 | stem("WHISKEY") 564 | ``` 565 | 566 | **Expected output:** `whiskey` 567 | 568 | ``` 569 | stem("WHISKIES") 570 | ``` 571 | 572 | **Expected output:** `whiski` 573 | 574 | Unfortunately, this didn't map whiskies to the same as whiskey. 575 | Stemmers don't work well with irregular plurals. Try a lemmatizer, which uses 576 | more sophisticated techniques to identify the base word, called a "lemma". 577 | 578 | ``` 579 | def lemmatize(word: str) -> str: 580 | # https://stackoverflow.com/a/18400977/101923 581 | # https://www.nltk.org/api/nltk.stem.wordnet.html#module-nltk.stem.wordnet 582 | import nltk 583 | import nltk.stem.wordnet 584 | 585 | 586 | # Avoid failure if a NULL is passed in. 587 | if not word: 588 | return word 589 | 590 | nltk.download('wordnet') 591 | wnl = nltk.stem.wordnet.WordNetLemmatizer() 592 | return wnl.lemmatize(word.lower()) 593 | ``` 594 | 595 | Try this out on a few words. 596 | 597 | ``` 598 | lemmatize("WHISKIES") 599 | ``` 600 | 601 | **Expected output:** `whisky` 602 | 603 | ``` 604 | lemmatize("WHISKY") 605 | ``` 606 | 607 | **Expected output:** `whisky` 608 | 609 | ``` 610 | lemmatize("WHISKEY") 611 | ``` 612 | 613 | **Expected output:** `whiskey` 614 | 615 | Unfortunately, this lemmatizer doesn't map "whiskey" to the same lemma as 616 | "whiskies". Since this word is particularly important for the Iowa retail liquor 617 | sales database, manually map it to the American spelling by using a dictionary. 618 | 619 | ``` 620 | def lemmatize(word: str) -> str: 621 | # https://stackoverflow.com/a/18400977/101923 622 | # https://www.nltk.org/api/nltk.stem.wordnet.html#module-nltk.stem.wordnet 623 | import nltk 624 | import nltk.stem.wordnet 625 | 626 | 627 | # Avoid failure if a NULL is passed in. 628 | if not word: 629 | return word 630 | 631 | nltk.download('wordnet') 632 | wnl = nltk.stem.wordnet.WordNetLemmatizer() 633 | lemma = wnl.lemmatize(word.lower()) 634 | 635 | table = { 636 | "whisky": "whiskey", # Use the American spelling. 637 | } 638 | return table.get(lemma, lemma) 639 | ``` 640 | 641 | Try this out on a few words. 642 | 643 | ``` 644 | lemmatize("WHISKIES") 645 | ``` 646 | 647 | **Expected output:** `whiskey` 648 | 649 | ``` 650 | lemmatize("WHISKEY") 651 | ``` 652 | 653 | **Expected output:** `whiskey` 654 | 655 | Congrats! This lemmatizer should work well for narrowing the categories. To use 656 | it with BigQuery, you must deploy it to the cloud. 657 | 658 | ### Setup your project for function deployment 659 | 660 | Before you deploy this to the cloud so that BigQuery can access this function, 661 | you'll need to do some one time setup. 662 | 663 | Create a new code cell and replace `your-project-id` with the Google Cloud project 664 | ID you're using for this tutorial. 665 | 666 | ``` 667 | project_id = "your-project-id" 668 | ``` 669 | 670 | Create a service account without any permissions, since this function doesn't 671 | need access to any cloud resources. 672 | 673 | ``` 674 | from google.cloud import iam_admin_v1 675 | from google.cloud.iam_admin_v1 import types 676 | 677 | iam_admin_client = iam_admin_v1.IAMClient() 678 | request = types.CreateServiceAccountRequest() 679 | 680 | account_id = "bigframes-no-permissions" 681 | request.account_id = account_id 682 | request.name = f"projects/{project_id}" 683 | 684 | display_name = "bigframes remote function (no permissions)" 685 | service_account = types.ServiceAccount() 686 | service_account.display_name = display_name 687 | request.service_account = service_account 688 | 689 | account = iam_admin_client.create_service_account(request=request) 690 | print(account.email) 691 | ``` 692 | 693 | **Expected output:** `bigframes-no-permissions@your-project-id.iam.gserviceaccount.com` 694 | 695 | Create a BigQuery dataset to hold the function. 696 | 697 | ``` 698 | from google.cloud import bigquery 699 | 700 | bqclient = bigquery.Client(project=project_id) 701 | dataset = bigquery.Dataset(f"{project_id}.functions") 702 | bqclient.create_dataset(dataset, exists_ok=True) 703 | ``` 704 | 705 | ### Deploying a remote function 706 | 707 | Enable the Cloud Functions API if not yet already enabled. 708 | 709 | ``` 710 | !gcloud services enable cloudfunctions.googleapis.com 711 | ``` 712 | 713 | Now, deploy your function to the dataset you just created. Add a 714 | `@bpd.remote_function` decorator to the function you created in the previous 715 | steps. 716 | 717 | ``` 718 | @bpd.remote_function( 719 | dataset=f"{project_id}.functions", 720 | name="lemmatize", 721 | # TODO: Replace this with your version of nltk. 722 | packages=["nltk==3.9.1"], 723 | cloud_function_service_account=f"bigframes-no-permissions@{project_id}.iam.gserviceaccount.com", 724 | cloud_function_ingress_settings="internal-only", 725 | ) 726 | def lemmatize(word: str) -> str: 727 | # https://stackoverflow.com/a/18400977/101923 728 | # https://www.nltk.org/api/nltk.stem.wordnet.html#module-nltk.stem.wordnet 729 | import nltk 730 | import nltk.stem.wordnet 731 | 732 | 733 | # Avoid failure if a NULL is passed in. 734 | if not word: 735 | return word 736 | 737 | nltk.download('wordnet') 738 | wnl = nltk.stem.wordnet.WordNetLemmatizer() 739 | lemma = wnl.lemmatize(word.lower()) 740 | 741 | table = { 742 | "whisky": "whiskey", # Use the American spelling. 743 | } 744 | return table.get(lemma, lemma) 745 | ``` 746 | 747 | Deployment should take about two minutes. 748 | 749 | ### Using the remote functions 750 | 751 | Once the deployment completes, you can test this function. 752 | 753 | ``` 754 | lemmatize = bpd.read_gbq_function(f"{project_id}.functions.lemmatize") 755 | 756 | words = bpd.Series(["whiskies", "whisky", "whiskey", "vodkas", "vodka"]) 757 | words.apply(lemmatize).to_pandas() 758 | ``` 759 | 760 | **Expected output:** 761 | 762 | ``` 763 | 0 whiskey 764 | 1 whiskey 765 | 2 whiskey 766 | 3 vodka 767 | 4 vodka 768 | 769 | dtype: string 770 | ``` 771 | 772 | ## Comparing alcohol consumption by county 773 | 774 | Now that the `lemmatize` function is available, use it to combine categories. 775 | 776 | ### Finding the word to best summarize the category 777 | 778 | First, create a DataFrame of all categories in the database. 779 | 780 | ``` 781 | df = bpd.read_gbq_table("bigquery-public-data.iowa_liquor_sales.sales") 782 | 783 | categories = ( 784 | df['category_name'] 785 | .groupby(df['category_name']) 786 | .size() 787 | .to_frame() 788 | .rename(columns={"category_name": "total_orders"}) 789 | .reset_index(drop=False) 790 | ) 791 | categories.to_pandas() 792 | ``` 793 | 794 | **Expected output:** 795 | 796 | ``` 797 | category_name total_orders 798 | 0 100 PROOF VODKA 99124 799 | 1 100% AGAVE TEQUILA 724374 800 | 2 AGED DARK RUM 59433 801 | 3 AMARETTO - IMPORTED 102 802 | 4 AMERICAN ALCOHOL 24351 803 | ... ... ... 804 | 98 WATERMELON SCHNAPPS 17844 805 | 99 WHISKEY LIQUEUR 1442732 806 | 100 WHITE CREME DE CACAO 7213 807 | 101 WHITE CREME DE MENTHE 2459 808 | 102 WHITE RUM 436553 809 | 103 rows × 2 columns 810 | ``` 811 | 812 | Next, create a DataFrame of all words in the categories, except for a few 813 | filler words like punctuation and "item". 814 | 815 | ``` 816 | words = ( 817 | categories.assign( 818 | words=categories['category_name'] 819 | .str.lower() 820 | .str.split(" ") 821 | ) 822 | .assign(num_words=lambda _: _['words'].str.len()) 823 | .explode("words") 824 | .rename(columns={"words": "word"}) 825 | ) 826 | words = words[ 827 | # Remove punctuation and "item", unless it's the only word 828 | (words['word'].str.isalnum() & ~(words['word'].str.startswith('item'))) 829 | | (words['num_words'] == 1) 830 | ] 831 | words.to_pandas() 832 | ``` 833 | 834 | **Expected output:** 835 | 836 | ``` 837 | category_name total_orders word num_words 838 | 0 100 PROOF VODKA 99124 100 3 839 | 1 100 PROOF VODKA 99124 proof 3 840 | 2 100 PROOF VODKA 99124 vodka 3 841 | ... ... ... ... ... 842 | 252 WHITE RUM 436553 white 2 843 | 253 WHITE RUM 436553 rum 2 844 | 254 rows × 4 columns 845 | ``` 846 | 847 | Note that by lemmatizing after grouping, you are reducing the load on your Cloud 848 | Function. It is possible to apply the lemmatize function on each of the several 849 | million rows in the database, but it would cost more than applying it after 850 | grouping and may require quota increases. 851 | 852 | ``` 853 | lemmas = words.assign(lemma=lambda _: _["word"].apply(lemmatize)) 854 | lemmas.to_pandas() 855 | ``` 856 | 857 | **Expected output:** 858 | 859 | ``` 860 | category_name total_orders word num_words lemma 861 | 0 100 PROOF VODKA 99124 100 3 100 862 | 1 100 PROOF VODKA 99124 proof 3 proof 863 | 2 100 PROOF VODKA 99124 vodka 3 vodka 864 | ... ... ... ... ... ... 865 | 252 WHITE RUM 436553 white 2 white 866 | 253 WHITE RUM 436553 rum 2 rum 867 | 254 rows × 5 columns 868 | ``` 869 | 870 | Now that the words have been lemmatized, you need to select the lemma that best 871 | summarizes the category. Since there aren't many function words in the categories, 872 | use the heuristic that if a word appears in multiple other categories, it's 873 | likely better as a summarizing word (e.g. whiskey). 874 | 875 | ``` 876 | lemma_counts = ( 877 | lemmas 878 | .groupby("lemma", as_index=False) 879 | .agg({"total_orders": "sum"}) 880 | .rename(columns={"total_orders": "total_orders_with_lemma"}) 881 | ) 882 | 883 | categories_with_lemma_counts = lemmas.merge(lemma_counts, on="lemma") 884 | 885 | max_lemma_count = ( 886 | categories_with_lemma_counts 887 | .groupby("category_name", as_index=False) 888 | .agg({"total_orders_with_lemma": "max"}) 889 | .rename(columns={"total_orders_with_lemma": "max_lemma_count"}) 890 | ) 891 | 892 | categories_with_max = categories_with_lemma_counts.merge( 893 | max_lemma_count, 894 | on="category_name" 895 | ) 896 | 897 | categories_mapping = categories_with_max[ 898 | categories_with_max['total_orders_with_lemma'] == categories_with_max['max_lemma_count'] 899 | ].groupby("category_name", as_index=False).max() 900 | categories_mapping.to_pandas() 901 | ``` 902 | 903 | **Expected output:** 904 | 905 | ``` 906 | category_name total_orders word num_words lemma total_orders_with_lemma max_lemma_count 907 | 0 100 PROOF VODKA 99124 vodka 3 vodka 7575769 7575769 908 | 1 100% AGAVE TEQUILA 724374 tequila 3 tequila 1601092 1601092 909 | 2 AGED DARK RUM 59433 rum 3 rum 3226633 3226633 910 | ... ... ... ... ... ... ... ... 911 | 100 WHITE CREME DE CACAO 7213 white 4 white 446225 446225 912 | 101 WHITE CREME DE MENTHE 2459 white 4 white 446225 446225 913 | 102 WHITE RUM 436553 rum 2 rum 3226633 3226633 914 | 103 rows × 7 columns 915 | ``` 916 | 917 | Now that there is a single lemma summarizing each category, merge this to the 918 | original DataFrame. 919 | 920 | ``` 921 | df_with_lemma = df.merge( 922 | categories_mapping, 923 | on="category_name", 924 | how="left" 925 | ) 926 | df_with_lemma[df_with_lemma['category_name'].notnull()].peek() 927 | ``` 928 | 929 | **Expected output:** 930 | 931 | ``` 932 | invoice_and_item_number ... lemma total_orders_with_lemma max_lemma_count 933 | 0 S30989000030 ... vodka 7575769 7575769 934 | 1 S30538800106 ... vodka 7575769 7575769 935 | 2 S30601200013 ... vodka 7575769 7575769 936 | 3 S30527200047 ... vodka 7575769 7575769 937 | 4 S30833600058 ... vodka 7575769 7575769 938 | 5 rows × 30 columns 939 | ``` 940 | 941 | ### Comparing counties 942 | 943 | Compare sales in each county to see what differences there are. 944 | 945 | ``` 946 | county_lemma = ( 947 | df_with_lemma 948 | .groupby(["county", "lemma"]) 949 | .agg({"volume_sold_liters": "sum"}) 950 | # Cast to an integer for more deterministic equality comparisons. 951 | .assign(volume_sold_int64=lambda _: _['volume_sold_liters'].astype("Int64")) 952 | ) 953 | ``` 954 | 955 | Find the most sold product (lemma) in each county. 956 | 957 | ``` 958 | county_max = ( 959 | county_lemma 960 | .reset_index(drop=False) 961 | .groupby("county") 962 | .agg({"volume_sold_int64": "max"}) 963 | ) 964 | 965 | county_max_lemma = county_lemma[ 966 | county_lemma["volume_sold_int64"] == county_max["volume_sold_int64"] 967 | ] 968 | 969 | county_max_lemma.to_pandas() 970 | ``` 971 | 972 | **Expected output:** 973 | 974 | ``` 975 | volume_sold_liters volume_sold_int64 976 | county lemma 977 | SCOTT vodka 6044393.1 6044393 978 | APPANOOSE whiskey 292490.44 292490 979 | HAMILTON whiskey 329118.92 329118 980 | ... ... ... ... 981 | WORTH whiskey 100542.85 100542 982 | MITCHELL vodka 158791.94 158791 983 | RINGGOLD whiskey 65107.8 65107 984 | 101 rows × 2 columns 985 | ``` 986 | 987 | How different are the counties from each other? 988 | 989 | ``` 990 | county_max_lemma.groupby("lemma").size().to_pandas() 991 | ``` 992 | 993 | **Expected output:** 994 | 995 | ``` 996 | lemma 997 | american 1 998 | liqueur 1 999 | vodka 15 1000 | whiskey 83 1001 | 1002 | dtype: Int64 1003 | ``` 1004 | 1005 | In most counties, whiskey is the most popular product by volume, with vodka most 1006 | popular in 15 counties. Compare this to the most popular liquor types statewide. 1007 | 1008 | ``` 1009 | total_liters = ( 1010 | df_with_lemma 1011 | .groupby("lemma") 1012 | .agg({"volume_sold_liters": "sum"}) 1013 | .sort_values("volume_sold_liters", ascending=False) 1014 | ) 1015 | total_liters.to_pandas() 1016 | ``` 1017 | 1018 | **Expected output:** 1019 | 1020 | ``` 1021 | volume_sold_liters 1022 | lemma 1023 | vodka 85356422.950001 1024 | whiskey 85112339.980001 1025 | rum 33891011.72 1026 | american 19994259.64 1027 | imported 14985636.61 1028 | tequila 12357782.37 1029 | cocktails/rtd 7406769.87 1030 | ... 1031 | ``` 1032 | 1033 | Whiskey and vodka have nearly the same volume, with vodka a bit higher than 1034 | whiskey statewide. 1035 | 1036 | ### Comparing proportions 1037 | 1038 | What is unique about the sales in each county? What makes the county different 1039 | from the rest of the state? 1040 | 1041 | Use the [Cohen's h measure](https://en.wikipedia.org/wiki/Cohen%27s_h) to find 1042 | which liquor sales volumes differ the most proportionally from what would be 1043 | expected based on the proportion of sales statewide. 1044 | 1045 | ``` 1046 | import numpy as np 1047 | 1048 | total_proportions = total_liters / total_liters.sum() 1049 | total_phi = 2 * np.arcsin(np.sqrt(total_proportions)) 1050 | 1051 | county_liters = df_with_lemma.groupby(["county", "lemma"]).agg({"volume_sold_liters": "sum"}) 1052 | county_totals = df_with_lemma.groupby(["county"]).agg({"volume_sold_liters": "sum"}) 1053 | county_proportions = county_liters / county_totals 1054 | county_phi = 2 * np.arcsin(np.sqrt(county_proportions)) 1055 | 1056 | cohens_h = ( 1057 | (county_phi - total_phi) 1058 | .rename(columns={"volume_sold_liters": "cohens_h"}) 1059 | .assign(cohens_h_int=lambda _: (_['cohens_h'] * 1_000_000).astype("Int64")) 1060 | ) 1061 | ``` 1062 | 1063 | Now that the Cohen's h has been measured for each lemma, find the largest 1064 | difference from the statewide proportion in each county. 1065 | 1066 | ``` 1067 | # Note: one might want to use the absolute value here if interested in counties 1068 | # that drink _less_ of a particular liquor than expected. 1069 | largest_per_county = cohens_h.groupby("county").agg({"cohens_h_int": "max"}) 1070 | counties = cohens_h[cohens_h['cohens_h_int'] == largest_per_county["cohens_h_int"]] 1071 | counties.sort_values('cohens_h', ascending=False).to_pandas() 1072 | ``` 1073 | 1074 | **Expected output:** 1075 | 1076 | ``` 1077 | cohens_h cohens_h_int 1078 | county lemma 1079 | EL PASO liqueur 1.289667 1289667 1080 | ADAMS whiskey 0.373591 373590 1081 | IDA whiskey 0.306481 306481 1082 | OSCEOLA whiskey 0.295524 295523 1083 | PALO ALTO whiskey 0.293697 293696 1084 | ... ... ... ... 1085 | MUSCATINE rum 0.053757 53757 1086 | MARION rum 0.053427 53427 1087 | MITCHELL vodka 0.048212 48212 1088 | WEBSTER rum 0.044896 44895 1089 | CERRO GORDO cocktails/rtd 0.027496 27495 1090 | 100 rows × 2 columns 1091 | ``` 1092 | 1093 | The larger the Cohen's h value, the more likely it is that there is a 1094 | statistically significant difference in the amount of that type of alcohol 1095 | consumed compared to the state averages. For the smaller positive values, the 1096 | difference in consumption is different than the statewide average, but it may 1097 | be due to random differences. 1098 | 1099 | An aside: EL PASO county doesn't appear to be a 1100 | [county in Iowa](https://en.wikipedia.org/wiki/List_of_counties_in_Iowa) 1101 | this may indicate another need for data cleanup before fully depending on these 1102 | results. 1103 | 1104 | ### Visualizing counties 1105 | 1106 | Join with 1107 | the [`bigquery-public-data.geo_us_boundaries.counties` table](https://console.cloud.google.com/bigquery?ws=!1m5!1m4!4m3!1sbigquery-public-data!2sgeo_us_boundaries!3scounties) 1108 | to get the geographic area for each county. County names are not unique across 1109 | the United States, so filter to only include counties from Iowa. The FIPS code 1110 | for Iowa is '19'. 1111 | 1112 | ``` 1113 | counties_geo = ( 1114 | bpd.read_gbq("bigquery-public-data.geo_us_boundaries.counties") 1115 | .assign(county=lambda _: _['county_name'].str.upper()) 1116 | ) 1117 | counties_plus = ( 1118 | counties 1119 | .reset_index(drop=False) 1120 | .merge(counties_geo[counties_geo['state_fips_code'] == '19'], on="county", how="left") 1121 | .dropna(subset=["county_geom"]) 1122 | .to_pandas() 1123 | ) 1124 | counties_plus 1125 | ``` 1126 | 1127 | **Expected output:** 1128 | 1129 | ``` 1130 | county lemma cohens_h cohens_h_int geo_id state_fips_code ... 1131 | 0 ALLAMAKEE american 0.087931 87930 19005 19 ... 1132 | 1 BLACK HAWK american 0.106256 106256 19013 19 ... 1133 | 2 WINNESHIEK american 0.093101 93101 19191 19 ... 1134 | ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... 1135 | 96 CLINTON tequila 0.075708 75707 19045 19 ... 1136 | 97 POLK tequila 0.087438 87438 19153 19 ... 1137 | 98 LEE schnapps 0.064663 64663 19111 19 ... 1138 | 99 rows × 23 columns 1139 | ``` 1140 | 1141 | Use GeoPandas to visualize these differences on a map. 1142 | 1143 | ``` 1144 | import geopandas 1145 | 1146 | counties_plus = geopandas.GeoDataFrame(counties_plus, geometry="county_geom") 1147 | 1148 | # https://stackoverflow.com/a/42214156/101923 1149 | ax = counties_plus.plot(figsize=(14, 14)) 1150 | counties_plus.apply( 1151 | lambda row: ax.annotate( 1152 | text=row['lemma'], 1153 | xy=row['county_geom'].centroid.coords[0], 1154 | ha='center' 1155 | ), 1156 | axis=1, 1157 | ) 1158 | ``` 1159 | 1160 | ![A map of the alcohol that is most different from statewide sales volume proportions in each county](img/iowa-counties-map.png) 1161 | 1162 | ## Clean up 1163 | 1164 | If you have created a new Google Cloud project for this tutorial, you can [delete it](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) to prevent additional charges for tables or other resources created. 1165 | 1166 | Alternatively, delete the Cloud Functions, service accounts, and datasets created for this tutorial. 1167 | 1168 | 1169 | ## Congratulations! 1170 | 1171 | You have cleaned and analyzed structured data using BigQuery DataFrames. 1172 | Along the way you've explored Google Cloud's Public Datasets, Python notebooks 1173 | in BigQuery Studio, BigQuery ML, BigQuery Remote Functions, and the power of 1174 | BigQuery DataFrames. Fantastic job! 1175 | 1176 | 1177 | ### Next steps 1178 | 1179 | * Apply these steps to other data, such as the [USA names database](https://github.com/tswast/code-snippets/blob/main/2024/12-bigframes-usa-names/usa_names.ipynb). 1180 | * Try [generating Python code in your notebook](https://cloud.google.com/colab/docs/use-code-completion). Python notebooks in BigQuery Studio are powered by Colab Enterprise. Hint: I find asking for help generating test data to be quite useful. 1181 | * Explore the [sample notebooks for BigQuery DataFrames](https://github.com/googleapis/python-bigquery-dataframes/tree/main/notebooks) on GitHub. 1182 | * Create a [schedule to run a notebook in BigQuery Studio](https://cloud.google.com/bigquery/docs/orchestrate-notebooks). 1183 | * Deploy a [Remote Function with BigQuery DataFrames](https://cloud.google.com/bigquery/docs/samples/bigquery-dataframes-remote-function) to integrate third-party Python packages with BigQuery. 1184 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # code-snippets 2 | Snippets of code used in blog posts and other media. 3 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Code Snippets 5 | 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | bigquery-magics==0.5.0 2 | nbconvert==7.16.4 3 | pandas-gbq==0.26.0 4 | --------------------------------------------------------------------------------