├── CH7.ipynb
├── DSP-CH1.ipynb
├── DSP-CH2.ipynb
├── DSP-Ch8.ipynb
├── DSP_CH3.ipynb
├── DSP_CH5.ipynb
├── README.md
├── Redis-py-NHL.ipynb
├── Stackdriver.ipynb
├── append.py
├── apply.py
├── book_sample.pdf
├── ch6_pyspark.html
├── ch6_pyspark.ipynb
├── dash_app.py
├── dataflow_read.py
├── echo.py
├── keras_games.py
├── logit.py
├── natality.py
├── predict.py
└── stream.py


/CH7.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## append.py"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "scrolled": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import apache_beam as beam\n",
 19 |     "import argparse\n",
 20 |     "from apache_beam.options.pipeline_options import PipelineOptions\n",
 21 |     "from apache_beam.io import ReadFromText\n",
 22 |     "from apache_beam.io import WriteToText\n",
 23 |     "\n",
 24 |     "# define a function for transforming the data \n",
 25 |     "class AppendDoFn(beam.DoFn):\n",
 26 |     "    def process(self, element):\n",
 27 |     "        return element + \" - Hello World!\"\n",
 28 |     "        \n",
 29 |     "# set up pipeline parameters \n",
 30 |     "parser = argparse.ArgumentParser()\n",
 31 |     "parser.add_argument('--input', dest='input',\n",
 32 |     "                  default='gs://dataflow-samples/shakespeare/kinglear.txt')\n",
 33 |     "parser.add_argument('--output', dest='output',\n",
 34 |     "                  default='gs://dsp_model_store/shakespeare/kinglear.txt')\n",
 35 |     "known_args, pipeline_args = parser.parse_known_args(None)\n",
 36 |     "pipeline_options = PipelineOptions(pipeline_args)\n",
 37 |     "\n",
 38 |     "# define the pipeline steps \n",
 39 |     "p = beam.Pipeline(options=pipeline_options)\n",
 40 |     "lines = p | 'read' >> ReadFromText(known_args.input)\n",
 41 |     "appended = lines | 'append' >> beam.ParDo(AppendDoFn())\n",
 42 |     "appended | 'write' >> WriteToText(known_args.output)\n",
 43 |     "\n",
 44 |     "# run the pipeline \n",
 45 |     "result = p.run()\n",
 46 |     "result.wait_until_finish()\n"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "## dataflow_read.py"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 2,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "name": "stderr",
 63 |      "output_type": "stream",
 64 |      "text": [
 65 |       "ERROR:root:Exception at bundle <apache_beam.runners.direct.bundle_factory._Bundle object at 0x7fb004550f50>, due to an exception.\n",
 66 |       " Traceback (most recent call last):\n",
 67 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 343, in call\n",
 68 |       "    finish_state)\n",
 69 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 383, in attempt_call\n",
 70 |       "    result = evaluator.finish_bundle()\n",
 71 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/transform_evaluator.py\", line 319, in finish_bundle\n",
 72 |       "    with self._source.reader() as reader:\n",
 73 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery.py\", line 495, in reader\n",
 74 |       "    kms_key=self.kms_key)\n",
 75 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery_tools.py\", line 864, in __init__\n",
 76 |       "    'Missing executing project information. Please use the --project '\n",
 77 |       "RuntimeError: Missing executing project information. Please use the --project command line option to specify it.\n",
 78 |       "\n",
 79 |       "ERROR:root:Exception at bundle <apache_beam.runners.direct.bundle_factory._Bundle object at 0x7fb004550f50>, due to an exception.\n",
 80 |       " Traceback (most recent call last):\n",
 81 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 343, in call\n",
 82 |       "    finish_state)\n",
 83 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 383, in attempt_call\n",
 84 |       "    result = evaluator.finish_bundle()\n",
 85 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/transform_evaluator.py\", line 319, in finish_bundle\n",
 86 |       "    with self._source.reader() as reader:\n",
 87 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery.py\", line 495, in reader\n",
 88 |       "    kms_key=self.kms_key)\n",
 89 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery_tools.py\", line 864, in __init__\n",
 90 |       "    'Missing executing project information. Please use the --project '\n",
 91 |       "RuntimeError: Missing executing project information. Please use the --project command line option to specify it.\n",
 92 |       "\n",
 93 |       "ERROR:root:Exception at bundle <apache_beam.runners.direct.bundle_factory._Bundle object at 0x7fb004550f50>, due to an exception.\n",
 94 |       " Traceback (most recent call last):\n",
 95 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 343, in call\n",
 96 |       "    finish_state)\n",
 97 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 383, in attempt_call\n",
 98 |       "    result = evaluator.finish_bundle()\n",
 99 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/transform_evaluator.py\", line 319, in finish_bundle\n",
100 |       "    with self._source.reader() as reader:\n",
101 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery.py\", line 495, in reader\n",
102 |       "    kms_key=self.kms_key)\n",
103 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery_tools.py\", line 864, in __init__\n",
104 |       "    'Missing executing project information. Please use the --project '\n",
105 |       "RuntimeError: Missing executing project information. Please use the --project command line option to specify it.\n",
106 |       "\n",
107 |       "ERROR:root:Exception at bundle <apache_beam.runners.direct.bundle_factory._Bundle object at 0x7fb004550f50>, due to an exception.\n",
108 |       " Traceback (most recent call last):\n",
109 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 343, in call\n",
110 |       "    finish_state)\n",
111 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 383, in attempt_call\n",
112 |       "    result = evaluator.finish_bundle()\n",
113 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/transform_evaluator.py\", line 319, in finish_bundle\n",
114 |       "    with self._source.reader() as reader:\n",
115 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery.py\", line 495, in reader\n",
116 |       "    kms_key=self.kms_key)\n",
117 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery_tools.py\", line 864, in __init__\n",
118 |       "    'Missing executing project information. Please use the --project '\n",
119 |       "RuntimeError: Missing executing project information. Please use the --project command line option to specify it.\n",
120 |       "\n",
121 |       "ERROR:root:Giving up after 4 attempts.\n",
122 |       "WARNING:root:A task failed with exception: Missing executing project information. Please use the --project command line option to specify it.\n"
123 |      ]
124 |     },
125 |     {
126 |      "ename": "RuntimeError",
127 |      "evalue": "Missing executing project information. Please use the --project command line option to specify it.",
128 |      "output_type": "error",
129 |      "traceback": [
130 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
131 |       "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
132 |       "\u001b[0;32m<ipython-input-2-8178ba527e57>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     27\u001b[0m \u001b[0;31m# run the pipeline\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     28\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 29\u001b[0;31m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait_until_finish\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
133 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/runners/direct/direct_runner.py\u001b[0m in \u001b[0;36mwait_until_finish\u001b[0;34m(self, duration)\u001b[0m\n\u001b[1;32m    429\u001b[0m             'DirectRunner does not support duration argument.')\n\u001b[1;32m    430\u001b[0m       \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 431\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mawait_completion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    432\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mPipelineState\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDONE\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    433\u001b[0m       \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# pylint: disable=broad-except\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
134 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\u001b[0m in \u001b[0;36mawait_completion\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    398\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    399\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0mawait_completion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 400\u001b[0;31m     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mawait_completion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    401\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    402\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0mshutdown\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
135 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\u001b[0m in \u001b[0;36mawait_completion\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    444\u001b[0m       \u001b[0;32mif\u001b[0m \u001b[0mupdate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexception\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    445\u001b[0m         \u001b[0mt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mupdate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexc_info\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 446\u001b[0;31m         \u001b[0mraise_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    447\u001b[0m     \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    448\u001b[0m       \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecutor_service\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshutdown\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
136 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/future/utils/__init__.py\u001b[0m in \u001b[0;36mraise_\u001b[0;34m(tp, value, tb)\u001b[0m\n\u001b[1;32m    411\u001b[0m             \u001b[0mexc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    412\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__traceback__\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 413\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    414\u001b[0m         \u001b[0;32mraise\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    415\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
137 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, state_sampler)\u001b[0m\n\u001b[1;32m    341\u001b[0m                           \u001b[0mstart_state\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    342\u001b[0m                           \u001b[0mprocess_state\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 343\u001b[0;31m                           finish_state)\n\u001b[0m\u001b[1;32m    344\u001b[0m         \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    345\u001b[0m       \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
138 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\u001b[0m in \u001b[0;36mattempt_call\u001b[0;34m(self, metrics_container, side_input_values, start_state, process_state, finish_state)\u001b[0m\n\u001b[1;32m    381\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    382\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mfinish_state\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 383\u001b[0;31m       \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mevaluator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfinish_bundle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    384\u001b[0m       \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlogical_metric_updates\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmetrics_container\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_cumulative\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    385\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
139 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/runners/direct/transform_evaluator.py\u001b[0m in \u001b[0;36mfinish_bundle\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    317\u001b[0m       \u001b[0mbundles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_read_values_to_bundles\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    318\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 319\u001b[0;31m       \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_source\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    320\u001b[0m         \u001b[0mbundles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_read_values_to_bundles\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    321\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
140 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery.py\u001b[0m in \u001b[0;36mreader\u001b[0;34m(self, test_bigquery_client)\u001b[0m\n\u001b[1;32m    493\u001b[0m         \u001b[0muse_legacy_sql\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_legacy_sql\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    494\u001b[0m         \u001b[0mflatten_results\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflatten_results\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 495\u001b[0;31m         kms_key=self.kms_key)\n\u001b[0m\u001b[1;32m    496\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    497\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
141 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery_tools.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, source, test_bigquery_client, use_legacy_sql, flatten_results, kms_key)\u001b[0m\n\u001b[1;32m    862\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecuting_project\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mtest_bigquery_client\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    863\u001b[0m       raise RuntimeError(\n\u001b[0;32m--> 864\u001b[0;31m           \u001b[0;34m'Missing executing project information. Please use the --project '\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    865\u001b[0m           'command line option to specify it.')\n\u001b[1;32m    866\u001b[0m     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrow_as_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcoder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRowAsDictJsonCoder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
142 |       "\u001b[0;31mRuntimeError\u001b[0m: Missing executing project information. Please use the --project command line option to specify it."
143 |      ]
144 |     }
145 |    ],
146 |    "source": [
147 |     "import apache_beam as beam\n",
148 |     "import argparse\n",
149 |     "from apache_beam.options.pipeline_options import PipelineOptions\n",
150 |     "\n",
151 |     "parser = argparse.ArgumentParser()\n",
152 |     "known_args, pipeline_args = parser.parse_known_args(None)\n",
153 |     "pipeline_options = PipelineOptions(pipeline_args)\n",
154 |     "\n",
155 |     "class ApplyDoFn(beam.DoFn):\n",
156 |     "    def process(self, element):\n",
157 |     "        print(element)\n",
158 |     "\n",
159 |     "\n",
160 |     "query = \"\"\"\n",
161 |     "select *\n",
162 |     "from `bigquery-public-data.samples.natality`\n",
163 |     "order by rand()\n",
164 |     "limit 100\n",
165 |     "\"\"\"\n",
166 |     "\n",
167 |     "# define the pipeline steps\n",
168 |     "p = beam.Pipeline(options=pipeline_options)\n",
169 |     "data = p | 'Read from BigQuery' >> beam.io.Read(\n",
170 |     "       beam.io.BigQuerySource(query=query, use_standard_sql=True))\n",
171 |     "scored = data | 'Apply Model' >> beam.ParDo(ApplyDoFn())\n",
172 |     "\n",
173 |     "# run the pipeline\n",
174 |     "result = p.run()\n",
175 |     "result.wait_until_finish()"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "markdown",
180 |    "metadata": {},
181 |    "source": [
182 |     "## Query Natality"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": 16,
188 |    "metadata": {},
189 |    "outputs": [
190 |     {
191 |      "data": {
192 |       "text/html": [
193 |        "<div>\n",
194 |        "<style scoped>\n",
195 |        "    .dataframe tbody tr th:only-of-type {\n",
196 |        "        vertical-align: middle;\n",
197 |        "    }\n",
198 |        "\n",
199 |        "    .dataframe tbody tr th {\n",
200 |        "        vertical-align: top;\n",
201 |        "    }\n",
202 |        "\n",
203 |        "    .dataframe thead th {\n",
204 |        "        text-align: right;\n",
205 |        "    }\n",
206 |        "</style>\n",
207 |        "<table border=\"1\" class=\"dataframe\">\n",
208 |        "  <thead>\n",
209 |        "    <tr style=\"text-align: right;\">\n",
210 |        "      <th></th>\n",
211 |        "      <th>year</th>\n",
212 |        "      <th>plurality</th>\n",
213 |        "      <th>apgar_5min</th>\n",
214 |        "      <th>mother_age</th>\n",
215 |        "      <th>father_age</th>\n",
216 |        "      <th>gestation_weeks</th>\n",
217 |        "      <th>ever_born</th>\n",
218 |        "      <th>mother_married</th>\n",
219 |        "      <th>weight</th>\n",
220 |        "    </tr>\n",
221 |        "  </thead>\n",
222 |        "  <tbody>\n",
223 |        "    <tr>\n",
224 |        "      <th>0</th>\n",
225 |        "      <td>1970</td>\n",
226 |        "      <td>0.0</td>\n",
227 |        "      <td>0.0</td>\n",
228 |        "      <td>37</td>\n",
229 |        "      <td>46</td>\n",
230 |        "      <td>38.0</td>\n",
231 |        "      <td>8</td>\n",
232 |        "      <td>1</td>\n",
233 |        "      <td>7.625790</td>\n",
234 |        "    </tr>\n",
235 |        "    <tr>\n",
236 |        "      <th>1</th>\n",
237 |        "      <td>1971</td>\n",
238 |        "      <td>1.0</td>\n",
239 |        "      <td>0.0</td>\n",
240 |        "      <td>43</td>\n",
241 |        "      <td>47</td>\n",
242 |        "      <td>38.0</td>\n",
243 |        "      <td>12</td>\n",
244 |        "      <td>1</td>\n",
245 |        "      <td>7.438397</td>\n",
246 |        "    </tr>\n",
247 |        "    <tr>\n",
248 |        "      <th>2</th>\n",
249 |        "      <td>1972</td>\n",
250 |        "      <td>1.0</td>\n",
251 |        "      <td>0.0</td>\n",
252 |        "      <td>46</td>\n",
253 |        "      <td>48</td>\n",
254 |        "      <td>41.0</td>\n",
255 |        "      <td>13</td>\n",
256 |        "      <td>1</td>\n",
257 |        "      <td>8.437091</td>\n",
258 |        "    </tr>\n",
259 |        "    <tr>\n",
260 |        "      <th>3</th>\n",
261 |        "      <td>1972</td>\n",
262 |        "      <td>1.0</td>\n",
263 |        "      <td>0.0</td>\n",
264 |        "      <td>38</td>\n",
265 |        "      <td>34</td>\n",
266 |        "      <td>99.0</td>\n",
267 |        "      <td>10</td>\n",
268 |        "      <td>1</td>\n",
269 |        "      <td>7.374463</td>\n",
270 |        "    </tr>\n",
271 |        "    <tr>\n",
272 |        "      <th>4</th>\n",
273 |        "      <td>1973</td>\n",
274 |        "      <td>1.0</td>\n",
275 |        "      <td>0.0</td>\n",
276 |        "      <td>42</td>\n",
277 |        "      <td>49</td>\n",
278 |        "      <td>99.0</td>\n",
279 |        "      <td>10</td>\n",
280 |        "      <td>1</td>\n",
281 |        "      <td>5.813590</td>\n",
282 |        "    </tr>\n",
283 |        "  </tbody>\n",
284 |        "</table>\n",
285 |        "</div>"
286 |       ],
287 |       "text/plain": [
288 |        "   year  plurality  apgar_5min  mother_age  father_age  gestation_weeks  \\\n",
289 |        "0  1970        0.0         0.0          37          46             38.0   \n",
290 |        "1  1971        1.0         0.0          43          47             38.0   \n",
291 |        "2  1972        1.0         0.0          46          48             41.0   \n",
292 |        "3  1972        1.0         0.0          38          34             99.0   \n",
293 |        "4  1973        1.0         0.0          42          49             99.0   \n",
294 |        "\n",
295 |        "   ever_born  mother_married    weight  \n",
296 |        "0          8               1  7.625790  \n",
297 |        "1         12               1  7.438397  \n",
298 |        "2         13               1  8.437091  \n",
299 |        "3         10               1  7.374463  \n",
300 |        "4         10               1  5.813590  "
301 |       ]
302 |      },
303 |      "execution_count": 16,
304 |      "metadata": {},
305 |      "output_type": "execute_result"
306 |     }
307 |    ],
308 |    "source": [
309 |     "from google.cloud import bigquery\n",
310 |     "client = bigquery.Client()\n",
311 |     "\n",
312 |     "sql = \"\"\"\n",
313 |     "SELECT year, plurality, apgar_5min, \n",
314 |     "       mother_age, father_age,    \n",
315 |     "       gestation_weeks, ever_born\n",
316 |     "       ,case when mother_married = true \n",
317 |     "             then 1 else 0 end as mother_married\n",
318 |     "       ,weight_pounds as weight\n",
319 |     "  FROM  `bigquery-public-data.samples.natality`\n",
320 |     "  limit 10000\n",
321 |     "\"\"\"\n",
322 |     "\n",
323 |     "natalityDF = client.query(sql).to_dataframe().fillna(0)\n",
324 |     "natalityDF.head()"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "markdown",
329 |    "metadata": {},
330 |    "source": [
331 |     "## Train and Save Model"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": 20,
337 |    "metadata": {},
338 |    "outputs": [],
339 |    "source": [
340 |     "from sklearn.linear_model import LinearRegression\n",
341 |     "import pickle\n",
342 |     "from google.cloud import storage\n",
343 |     "\n",
344 |     "# fit and pickle a model \n",
345 |     "model = LinearRegression()\n",
346 |     "model.fit(natalityDF.iloc[:,1:8], natalityDF['weight'])\n",
347 |     "pickle.dump(model, open(\"natality.pkl\", 'wb'))\n",
348 |     "\n",
349 |     "# Save to GCS\n",
350 |     "bucket = storage.Client().get_bucket('dsp_model_store')\n",
351 |     "blob = bucket.blob('natality/sklearn-linear')\n",
352 |     "blob.upload_from_filename('natality.pkl')\n"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "markdown",
357 |    "metadata": {},
358 |    "source": [
359 |     "## Test Model Loading "
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "code",
364 |    "execution_count": 12,
365 |    "metadata": {},
366 |    "outputs": [
367 |     {
368 |      "data": {
369 |       "text/plain": [
370 |        "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
371 |       ]
372 |      },
373 |      "execution_count": 12,
374 |      "metadata": {},
375 |      "output_type": "execute_result"
376 |     }
377 |    ],
378 |    "source": [
379 |     "\n",
380 |     "from google.cloud import storage\n",
381 |     "import pickle \n",
382 |     "\n",
383 |     "bucket = storage.Client().get_bucket('dsp_model_store')\n",
384 |     "blob = bucket.get_blob('natality/sklearn-linear')\n",
385 |     "model = pickle.loads(blob.download_as_string())\n",
386 |     "model\n"
387 |    ]
388 |   },
389 |   {
390 |    "cell_type": "markdown",
391 |    "metadata": {},
392 |    "source": [
393 |     "## Prediction Pipeline "
394 |    ]
395 |   },
396 |   {
397 |    "cell_type": "code",
398 |    "execution_count": 26,
399 |    "metadata": {},
400 |    "outputs": [
401 |     {
402 |      "name": "stderr",
403 |      "output_type": "stream",
404 |      "text": [
405 |       "/home/ec2-user/.local/lib/python3.7/site-packages/ipykernel_launcher.py:45: BeamDeprecationWarning: parse_table_schema_from_json is deprecated since 2.11.0. Use bigquery_tools.parse_table_schema_from_json instead.\n",
406 |       "/home/ec2-user/.local/lib/python3.7/site-packages/ipykernel_launcher.py:73: BeamDeprecationWarning: BigQuerySink is deprecated since 2.11.0. Use WriteToBigQuery instead.\n",
407 |       "ERROR:root:Exception at bundle <apache_beam.runners.direct.bundle_factory._Bundle object at 0x7f0e173ee960>, due to an exception.\n",
408 |       " Traceback (most recent call last):\n",
409 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 343, in call\n",
410 |       "    finish_state)\n",
411 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 383, in attempt_call\n",
412 |       "    result = evaluator.finish_bundle()\n",
413 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/transform_evaluator.py\", line 319, in finish_bundle\n",
414 |       "    with self._source.reader() as reader:\n",
415 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery.py\", line 495, in reader\n",
416 |       "    kms_key=self.kms_key)\n",
417 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery_tools.py\", line 864, in __init__\n",
418 |       "    'Missing executing project information. Please use the --project '\n",
419 |       "RuntimeError: Missing executing project information. Please use the --project command line option to specify it.\n",
420 |       "\n",
421 |       "ERROR:root:Exception at bundle <apache_beam.runners.direct.bundle_factory._Bundle object at 0x7f0e173ee960>, due to an exception.\n",
422 |       " Traceback (most recent call last):\n",
423 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 343, in call\n",
424 |       "    finish_state)\n",
425 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 383, in attempt_call\n",
426 |       "    result = evaluator.finish_bundle()\n",
427 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/transform_evaluator.py\", line 319, in finish_bundle\n",
428 |       "    with self._source.reader() as reader:\n",
429 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery.py\", line 495, in reader\n",
430 |       "    kms_key=self.kms_key)\n",
431 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery_tools.py\", line 864, in __init__\n",
432 |       "    'Missing executing project information. Please use the --project '\n",
433 |       "RuntimeError: Missing executing project information. Please use the --project command line option to specify it.\n",
434 |       "\n",
435 |       "ERROR:root:Exception at bundle <apache_beam.runners.direct.bundle_factory._Bundle object at 0x7f0e173ee960>, due to an exception.\n",
436 |       " Traceback (most recent call last):\n",
437 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 343, in call\n",
438 |       "    finish_state)\n",
439 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 383, in attempt_call\n",
440 |       "    result = evaluator.finish_bundle()\n",
441 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/transform_evaluator.py\", line 319, in finish_bundle\n",
442 |       "    with self._source.reader() as reader:\n",
443 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery.py\", line 495, in reader\n",
444 |       "    kms_key=self.kms_key)\n",
445 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery_tools.py\", line 864, in __init__\n",
446 |       "    'Missing executing project information. Please use the --project '\n",
447 |       "RuntimeError: Missing executing project information. Please use the --project command line option to specify it.\n",
448 |       "\n",
449 |       "ERROR:root:Exception at bundle <apache_beam.runners.direct.bundle_factory._Bundle object at 0x7f0e173ee960>, due to an exception.\n",
450 |       " Traceback (most recent call last):\n",
451 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 343, in call\n",
452 |       "    finish_state)\n",
453 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\", line 383, in attempt_call\n",
454 |       "    result = evaluator.finish_bundle()\n",
455 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/runners/direct/transform_evaluator.py\", line 319, in finish_bundle\n",
456 |       "    with self._source.reader() as reader:\n",
457 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery.py\", line 495, in reader\n",
458 |       "    kms_key=self.kms_key)\n",
459 |       "  File \"/home/ec2-user/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery_tools.py\", line 864, in __init__\n",
460 |       "    'Missing executing project information. Please use the --project '\n",
461 |       "RuntimeError: Missing executing project information. Please use the --project command line option to specify it.\n",
462 |       "\n",
463 |       "ERROR:root:Giving up after 4 attempts.\n",
464 |       "WARNING:root:A task failed with exception: Missing executing project information. Please use the --project command line option to specify it.\n"
465 |      ]
466 |     },
467 |     {
468 |      "ename": "RuntimeError",
469 |      "evalue": "Missing executing project information. Please use the --project command line option to specify it.",
470 |      "output_type": "error",
471 |      "traceback": [
472 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
473 |       "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
474 |       "\u001b[0;32m<ipython-input-26-c61273add384>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     77\u001b[0m \u001b[0;31m# run the pipeline\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     78\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 79\u001b[0;31m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait_until_finish\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
475 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/runners/direct/direct_runner.py\u001b[0m in \u001b[0;36mwait_until_finish\u001b[0;34m(self, duration)\u001b[0m\n\u001b[1;32m    429\u001b[0m             'DirectRunner does not support duration argument.')\n\u001b[1;32m    430\u001b[0m       \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 431\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mawait_completion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    432\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mPipelineState\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDONE\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    433\u001b[0m       \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# pylint: disable=broad-except\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
476 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\u001b[0m in \u001b[0;36mawait_completion\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    398\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    399\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0mawait_completion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 400\u001b[0;31m     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mawait_completion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    401\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    402\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0mshutdown\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
477 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\u001b[0m in \u001b[0;36mawait_completion\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    444\u001b[0m       \u001b[0;32mif\u001b[0m \u001b[0mupdate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexception\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    445\u001b[0m         \u001b[0mt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mupdate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexc_info\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 446\u001b[0;31m         \u001b[0mraise_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    447\u001b[0m     \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    448\u001b[0m       \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecutor_service\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshutdown\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
478 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/future/utils/__init__.py\u001b[0m in \u001b[0;36mraise_\u001b[0;34m(tp, value, tb)\u001b[0m\n\u001b[1;32m    411\u001b[0m             \u001b[0mexc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    412\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__traceback__\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 413\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    414\u001b[0m         \u001b[0;32mraise\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    415\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
479 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, state_sampler)\u001b[0m\n\u001b[1;32m    341\u001b[0m                           \u001b[0mstart_state\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    342\u001b[0m                           \u001b[0mprocess_state\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 343\u001b[0;31m                           finish_state)\n\u001b[0m\u001b[1;32m    344\u001b[0m         \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    345\u001b[0m       \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
480 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/runners/direct/executor.py\u001b[0m in \u001b[0;36mattempt_call\u001b[0;34m(self, metrics_container, side_input_values, start_state, process_state, finish_state)\u001b[0m\n\u001b[1;32m    381\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    382\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mfinish_state\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 383\u001b[0;31m       \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mevaluator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfinish_bundle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    384\u001b[0m       \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlogical_metric_updates\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmetrics_container\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_cumulative\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    385\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
481 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/runners/direct/transform_evaluator.py\u001b[0m in \u001b[0;36mfinish_bundle\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    317\u001b[0m       \u001b[0mbundles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_read_values_to_bundles\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    318\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 319\u001b[0;31m       \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_source\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    320\u001b[0m         \u001b[0mbundles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_read_values_to_bundles\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    321\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
482 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery.py\u001b[0m in \u001b[0;36mreader\u001b[0;34m(self, test_bigquery_client)\u001b[0m\n\u001b[1;32m    493\u001b[0m         \u001b[0muse_legacy_sql\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_legacy_sql\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    494\u001b[0m         \u001b[0mflatten_results\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflatten_results\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 495\u001b[0;31m         kms_key=self.kms_key)\n\u001b[0m\u001b[1;32m    496\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    497\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
483 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/apache_beam/io/gcp/bigquery_tools.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, source, test_bigquery_client, use_legacy_sql, flatten_results, kms_key)\u001b[0m\n\u001b[1;32m    862\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecuting_project\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mtest_bigquery_client\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    863\u001b[0m       raise RuntimeError(\n\u001b[0;32m--> 864\u001b[0;31m           \u001b[0;34m'Missing executing project information. Please use the --project '\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    865\u001b[0m           'command line option to specify it.')\n\u001b[1;32m    866\u001b[0m     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrow_as_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcoder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRowAsDictJsonCoder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
484 |       "\u001b[0;31mRuntimeError\u001b[0m: Missing executing project information. Please use the --project command line option to specify it."
485 |      ]
486 |     }
487 |    ],
488 |    "source": [
489 |     "import apache_beam as beam\n",
490 |     "import argparse\n",
491 |     "from apache_beam.options.pipeline_options import PipelineOptions\n",
492 |     "from apache_beam.options.pipeline_options import SetupOptions\n",
493 |     "from apache_beam.io.gcp.bigquery import parse_table_schema_from_json\n",
494 |     "import json\n",
495 |     "\n",
496 |     "query = \"\"\"\n",
497 |     "    SELECT year, plurality, apgar_5min, \n",
498 |     "    mother_age, father_age,    \n",
499 |     "       gestation_weeks, ever_born\n",
500 |     "       ,case when mother_married = true \n",
501 |     "          then 1 else 0 end as mother_married\n",
502 |     "      ,weight_pounds as weight\n",
503 |     "      ,current_timestamp as time\n",
504 |     "      ,GENERATE_UUID() as guid\n",
505 |     "    FROM `bigquery-public-data.samples.natality` \n",
506 |     "    limit 100    \n",
507 |     "\"\"\"\n",
508 |     "\n",
509 |     "class ApplyDoFn(beam.DoFn):\n",
510 |     "\n",
511 |     "    def __init__(self):\n",
512 |     "        self._model = None\n",
513 |     "        from google.cloud import storage\n",
514 |     "        import pandas as pd\n",
515 |     "        import pickle as pkl\n",
516 |     "        self._storage = storage\n",
517 |     "        self._pkl = pkl\n",
518 |     "        self._pd = pd\n",
519 |     "     \n",
520 |     "    def process(self, element):\n",
521 |     "        if self._model is None:\n",
522 |     "            bucket = self._storage.Client().get_bucket('dsp_model_store')\n",
523 |     "            blob = bucket.get_blob('natality/sklearn-linear')\n",
524 |     "            self._model = self._pkl.loads(blob.download_as_string())\n",
525 |     "        \n",
526 |     "        new_x = self._pd.DataFrame.from_dict(element, orient = \"index\").transpose().fillna(0)   \n",
527 |     "        weight = self._model.predict(new_x.iloc[:,1:8])[0]\n",
528 |     "        return [ { 'guid': element['guid'], 'weight': weight, 'time': str(element['time']) } ]\n",
529 |     "\n",
530 |     "schema = parse_table_schema_from_json(json.dumps({'fields':\n",
531 |     "            [ { 'name': 'guid', 'type': 'STRING'},\n",
532 |     "              { 'name': 'weight', 'type': 'FLOAT64'},\n",
533 |     "              { 'name': 'time', 'type': 'STRING'} ]}))\n",
534 |     "\n",
535 |     "class PublishDoFn(beam.DoFn):\n",
536 |     "    \n",
537 |     "    def __init__(self):\n",
538 |     "        from google.cloud import datastore       \n",
539 |     "        self._ds = datastore\n",
540 |     "    \n",
541 |     "    def process(self, element):\n",
542 |     "        client = self._ds.Client()\n",
543 |     "        key = client.key('natality-guid', element['guid'])\n",
544 |     "        entity = self._ds.Entity(key)\n",
545 |     "        entity['weight'] = element['weight']         \n",
546 |     "        entity['time'] = element['time']\n",
547 |     "        client.put(entity)\n",
548 |     "\n",
549 |     "parser = argparse.ArgumentParser()\n",
550 |     "known_args, pipeline_args = parser.parse_known_args(None)\n",
551 |     "pipeline_options = PipelineOptions(pipeline_args)\n",
552 |     "\n",
553 |     "# define the pipeline steps\n",
554 |     "p = beam.Pipeline(options=pipeline_options)\n",
555 |     "data = p | 'Read from BigQuery' >> beam.io.Read(\n",
556 |     "       beam.io.BigQuerySource(query=query, use_standard_sql=True))\n",
557 |     "scored = data | 'Apply Model' >> beam.ParDo(ApplyDoFn())\n",
558 |     "scored | 'Save to BigQuery' >> beam.io.Write(beam.io.BigQuerySink(\n",
559 |     "                'weight_preds', 'dsp_demo', schema = schema,\n",
560 |     "                create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,\n",
561 |     "                write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND))\n",
562 |     "\n",
563 |     "scored | 'Create entities' >> beam.ParDo(PublishDoFn())\n",
564 |     "\n",
565 |     "# run the pipeline\n",
566 |     "result = p.run()\n",
567 |     "result.wait_until_finish()\n"
568 |    ]
569 |   },
570 |   {
571 |    "cell_type": "markdown",
572 |    "metadata": {},
573 |    "source": [
574 |     "## Read from Datastore"
575 |    ]
576 |   },
577 |   {
578 |    "cell_type": "code",
579 |    "execution_count": 25,
580 |    "metadata": {},
581 |    "outputs": [
582 |     {
583 |      "name": "stdout",
584 |      "output_type": "stream",
585 |      "text": [
586 |       "<Entity('natality-guid', '0046cdef-6a0f-4586-86ec-4b995cfc7c4e') {'weight': 7.9434742419056, 'time': '2019-12-15 03:00:06.319496 UTC'}>\n"
587 |      ]
588 |     }
589 |    ],
590 |    "source": [
591 |     "\n",
592 |     "from google.cloud import datastore\n",
593 |     "client = datastore.Client()\n",
594 |     "query = client.query(kind='natality-guid')\n",
595 |     "\n",
596 |     "query_iter = query.fetch()\n",
597 |     "for entity in query_iter:\n",
598 |     "    print(entity)\n",
599 |     "    break\n",
600 |     "\n"
601 |    ]
602 |   }
603 |  ],
604 |  "metadata": {
605 |   "kernelspec": {
606 |    "display_name": "Python 3",
607 |    "language": "python",
608 |    "name": "python3"
609 |   },
610 |   "language_info": {
611 |    "codemirror_mode": {
612 |     "name": "ipython",
613 |     "version": 3
614 |    },
615 |    "file_extension": ".py",
616 |    "mimetype": "text/x-python",
617 |    "name": "python",
618 |    "nbconvert_exporter": "python",
619 |    "pygments_lexer": "ipython3",
620 |    "version": "3.7.4"
621 |   }
622 |  },
623 |  "nbformat": 4,
624 |  "nbformat_minor": 2
625 | }
626 | 


--------------------------------------------------------------------------------
/DSP-Ch8.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Spark Streaming"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Kafka Producer"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "from kafka import KafkaProducer\n",
 24 |     "from json import dumps\n",
 25 |     "import time\n",
 26 |     " \n",
 27 |     "producer = KafkaProducer(bootstrap_servers=['localhost:9092'],\n",
 28 |     "          value_serializer=lambda x: dumps(x).encode('utf-8'))\n",
 29 |     " \n",
 30 |     "data = {'hello' : 'world', 'time': time.time()}\n",
 31 |     "producer.send('dsp', data)\n"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "## Kafka Consumer"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "from kafka import KafkaConsumer\n",
 48 |     "from json import loads\n",
 49 |     " \n",
 50 |     "consumer = KafkaConsumer('dsp',\n",
 51 |     "     bootstrap_servers=['localhost:9092'],\n",
 52 |     "     value_deserializer=lambda x: loads(x.decode('utf-8')))\n",
 53 |     " \n",
 54 |     "for x in consumer:\n",
 55 |     "    print(x.value)\n"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "## Model Producer"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 1,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "data": {
 72 |       "text/plain": [
 73 |        "RecordMetadata(topic='dsp', partition=0, topic_partition=TopicPartition(topic='dsp', partition=0), offset=109, timestamp=1576709681368, checksum=None, serialized_key_size=-1, serialized_value_size=142, serialized_header_size=-1)"
 74 |       ]
 75 |      },
 76 |      "execution_count": 1,
 77 |      "metadata": {},
 78 |      "output_type": "execute_result"
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "\n",
 83 |     "from kafka import KafkaProducer\n",
 84 |     "from json import dumps\n",
 85 |     "import time\n",
 86 |     "import uuid\n",
 87 |     "\n",
 88 |     "producer = KafkaProducer(bootstrap_servers=['54.166.148.190:9092'],\n",
 89 |     "          value_serializer=lambda x: dumps(x).encode('utf-8'))\n",
 90 |     "\n",
 91 |     "data = { 'G1': 1, 'G2': 0, 'G3': 0, 'G4': 0, 'G5': 0, \n",
 92 |     "         'G6': 0, 'G7': 0, 'G8': 0, 'G9': 0, 'G10': 0, \n",
 93 |     "        'User_ID': str(uuid.uuid1())}\n",
 94 |     "result = producer.send('dsp', data)\n",
 95 |     "result.get()\n"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "## Streaming Pipeline "
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "\n",
112 |     "from pyspark.sql.types import StringType\n",
113 |     "import json \n",
114 |     "import pandas as pd\n",
115 |     "from sklearn.linear_model import LogisticRegression\n",
116 |     "\n",
117 |     "# build a logsitic regression model \n",
118 |     "gamesDF = pd.read_csv(\"https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv\")\n",
119 |     "model = LogisticRegression() \n",
120 |     "model.fit(gamesDF.iloc[:,0:10], gamesDF['label'])\n",
121 |     "\n",
122 |     "# read from Kafka \n",
123 |     "df = spark .readStream.format(\"kafka\") \\\n",
124 |     "  .option(\"kafka.bootstrap.servers\", \"54.166.148.190:9092\") \\\n",
125 |     "  .option(\"subscribe\", \"dsp\").load()\n",
126 |     "\n",
127 |     "# define the UDF for scoring users \n",
128 |     "def score(row):\n",
129 |     "    d = json.loads(row)\n",
130 |     "    p = pd.DataFrame.from_dict(d, orient = \"index\").transpose()        \n",
131 |     "    pred = model.predict_proba(p.iloc[:,0:10])[0][0]\n",
132 |     "    result = {'User_ID': d['User_ID'], 'pred': pred }\n",
133 |     "    return str(json.dumps(result))\n",
134 |     "    \n",
135 |     "# select the value field and apply the UDF     \n",
136 |     "df = df.selectExpr(\"CAST(value AS STRING)\")\n",
137 |     "score_udf = udf(score, StringType())    \n",
138 |     "df = df.select( score_udf(\"value\").alias(\"value\"))\n",
139 |     "\n",
140 |     "# Write results to Kafka \n",
141 |     "query = df.writeStream.format(\"kafka\") \\\n",
142 |     "  .option(\"kafka.bootstrap.servers\", \"54.166.148.190:9092\") \\\n",
143 |     "  .option(\"topic\", \"preds\") \\\n",
144 |     "  .option(\"checkpointLocation\", \"/temp\").start()\n"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "## Model Consumer"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "from kafka import KafkaConsumer\n",
161 |     "from json import loads\n",
162 |     "\n",
163 |     "consumer = KafkaConsumer('preds',\n",
164 |     "     bootstrap_servers=['54.166.148.190:9092'],\n",
165 |     "     value_deserializer=lambda x: loads(x))\n",
166 |     "\n",
167 |     "for x in consumer:\n",
168 |     "    print(x.value)"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {},
174 |    "source": [
175 |     "# Dataflow Streaming"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "markdown",
180 |    "metadata": {},
181 |    "source": [
182 |     "## PubSub Consumer"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "import time\n",
192 |     "from google.cloud import pubsub_v1\n",
193 |     "\n",
194 |     "subscriber = pubsub_v1.SubscriberClient()\n",
195 |     "subscription_path = subscriber.subscription_path(\"gameanalytics-199018\", \"dsp\")\n",
196 |     "\n",
197 |     "def callback(message):\n",
198 |     "    print(message.data)\n",
199 |     "    message.ack()\n",
200 |     "\n",
201 |     "subscriber.subscribe(subscription_path, callback=callback)\n",
202 |     "\n",
203 |     "while True:\n",
204 |     "    time.sleep(10)"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {},
210 |    "source": [
211 |     "## PubSub Producer"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": null,
217 |    "metadata": {},
218 |    "outputs": [],
219 |    "source": [
220 |     "from google.cloud import pubsub_v1\n",
221 |     "\n",
222 |     "publisher = pubsub_v1.PublisherClient()\n",
223 |     "topic_path = publisher.topic_path(\"gameanalytics-199018\", \"natality\")\n",
224 |     "\n",
225 |     "data = \"Hello World!\".encode('utf-8')\n",
226 |     "publisher.publish(topic_path, data=data)"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "markdown",
231 |    "metadata": {},
232 |    "source": [
233 |     "## Streaming Pipeline"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": null,
239 |    "metadata": {},
240 |    "outputs": [],
241 |    "source": [
242 |     "import apache_beam as beam\n",
243 |     "import argparse\n",
244 |     "from apache_beam.options.pipeline_options import PipelineOptions\n",
245 |     "from apache_beam.io.gcp.bigquery import parse_table_schema_from_json\n",
246 |     "import json\n",
247 |     "\n",
248 |     "class ApplyDoFn(beam.DoFn):\n",
249 |     "\n",
250 |     "    def __init__(self):\n",
251 |     "        self._model = None\n",
252 |     "        from google.cloud import storage\n",
253 |     "        import pandas as pd\n",
254 |     "        import pickle as pkl\n",
255 |     "        import json as js\n",
256 |     "        self._storage = storage\n",
257 |     "        self._pkl = pkl\n",
258 |     "        self._pd = pd\n",
259 |     "        self._json = js\n",
260 |     "     \n",
261 |     "    def process(self, element):\n",
262 |     "        if self._model is None:\n",
263 |     "            bucket = self._storage.Client().get_bucket(\n",
264 |     "                                                 'dsp_model_store')\n",
265 |     "            blob = bucket.get_blob('natality/sklearn-linear')\n",
266 |     "            self._model =self._pkl.loads(blob.download_as_string())\n",
267 |     "        \n",
268 |     "        element = self._json.loads(element.decode('utf-8'))\n",
269 |     "        new_x = self._pd.DataFrame.from_dict(element, \n",
270 |     "                            orient = \"index\").transpose().fillna(0)   \n",
271 |     "        weight = self._model.predict(new_x.iloc[:,1:8])[0]\n",
272 |     "        return [ { 'guid': element['guid'], 'weight': weight, \n",
273 |     "                                   'time': str(element['time']) } ]\n",
274 |     "             \n",
275 |     "class PublishDoFn(beam.DoFn):\n",
276 |     "    \n",
277 |     "    def __init__(self):\n",
278 |     "        from google.cloud import datastore       \n",
279 |     "        self._ds = datastore\n",
280 |     "    \n",
281 |     "    def process(self, element):\n",
282 |     "        client = self._ds.Client()\n",
283 |     "        key = client.key('natality-guid', element['guid'])\n",
284 |     "        entity = self._ds.Entity(key)\n",
285 |     "        entity['weight'] = element['weight']         \n",
286 |     "        entity['time'] = element['time']\n",
287 |     "        client.put(entity)\n",
288 |     "            \n",
289 |     "# set up pipeline parameters \n",
290 |     "parser = argparse.ArgumentParser()\n",
291 |     "known_args, pipeline_args = parser.parse_known_args(None)\n",
292 |     "pipeline_options = PipelineOptions(pipeline_args)\n",
293 |     "\n",
294 |     "# define the topics \n",
295 |     "topic = \"projects/{project}/topics/{topic}\"\n",
296 |     "topic = topic.format(project = \"gameanalytics-199018\", topic = \"natality\")\n",
297 |     "\n",
298 |     "schema = parse_table_schema_from_json(json.dumps({'fields':\n",
299 |     "            [ { 'name': 'guid', 'type': 'STRING'},\n",
300 |     "              { 'name': 'weight', 'type': 'FLOAT64'},\n",
301 |     "              { 'name': 'time', 'type': 'STRING'} ]}))\n",
302 |     "\n",
303 |     "# define the pipeline steps \n",
304 |     "p = beam.Pipeline(options=pipeline_options)\n",
305 |     "lines = p | 'Read PubSub' >> beam.io.ReadFromPubSub(topic=topic)\n",
306 |     "scored = lines | 'apply' >> beam.ParDo(ApplyDoFn())\n",
307 |     "scored | 'Create entities' >> beam.ParDo(PublishDoFn())\n",
308 |     "\n",
309 |     "# run the pipeline \n",
310 |     "result = p.run()\n",
311 |     "result.wait_until_finish()"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "markdown",
316 |    "metadata": {},
317 |    "source": [
318 |     "## Streaming Producer"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": 2,
324 |    "metadata": {},
325 |    "outputs": [
326 |     {
327 |      "data": {
328 |       "text/plain": [
329 |        "<google.cloud.pubsub_v1.publisher.futures.Future at 0x7f0f866bebd0>"
330 |       ]
331 |      },
332 |      "execution_count": 2,
333 |      "metadata": {},
334 |      "output_type": "execute_result"
335 |     }
336 |    ],
337 |    "source": [
338 |     "\n",
339 |     "import json\n",
340 |     "from google.cloud import pubsub_v1\n",
341 |     "import time \n",
342 |     "\n",
343 |     "publisher = pubsub_v1.PublisherClient()\n",
344 |     "topic_path = publisher.topic_path(\"gameanalytics-199018\", \"natality\")\n",
345 |     "\n",
346 |     "data = json.dumps({'year': 2001, 'plurality': 1, 'apgar_5min': 99, 'mother_age': 33, \n",
347 |     "     'father_age': 40, 'gestation_weeks': 38, 'ever_born': 8, \n",
348 |     "     'mother_married': 1, 'weight': 6.8122838958, \n",
349 |     "     'time': str(time.time()), \n",
350 |     "     'guid': 'b281c5e8-85b2-4cbd-a2d8-e501ca816363'}\n",
351 |     ").encode('utf-8') \n",
352 |     "\n",
353 |     "publisher.publish(topic_path, data=data)"
354 |    ]
355 |   }
356 |  ],
357 |  "metadata": {
358 |   "kernelspec": {
359 |    "display_name": "Python 3",
360 |    "language": "python",
361 |    "name": "python3"
362 |   },
363 |   "language_info": {
364 |    "codemirror_mode": {
365 |     "name": "ipython",
366 |     "version": 3
367 |    },
368 |    "file_extension": ".py",
369 |    "mimetype": "text/x-python",
370 |    "name": "python",
371 |    "nbconvert_exporter": "python",
372 |    "pygments_lexer": "ipython3",
373 |    "version": "3.7.4"
374 |   }
375 |  },
376 |  "nbformat": 4,
377 |  "nbformat_minor": 2
378 | }
379 | 


--------------------------------------------------------------------------------
/DSP_CH3.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "ename": "ModuleNotFoundError",
 10 |      "evalue": "No module named 'google.appengine'",
 11 |      "output_type": "error",
 12 |      "traceback": [
 13 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 14 |       "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
 15 |       "\u001b[0;32m<ipython-input-2-9de9e3ac7d3a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;31m#import webapp2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mgoogle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappengine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapi\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mapp_identity\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 16 |       "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'google.appengine'"
 17 |      ]
 18 |     }
 19 |    ],
 20 |    "source": [
 21 |     "\n",
 22 |     "\n",
 23 |     "# CLOUD STORAGE\n",
 24 |     "# https://cloud.google.com/appengine/docs/standard/python/googlecloudstorageclient/read-write-to-cloud-storage\n",
 25 |     "\n",
 26 |     "#import logging\n",
 27 |     "#import os\n",
 28 |     "#import cloudstorage as gcs\n",
 29 |     "#import webapp2\n",
 30 |     "\n",
 31 |     "#from google.appengine.api import app_identity\n"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": []
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 1,
 44 |    "metadata": {},
 45 |    "outputs": [
 46 |     {
 47 |      "name": "stdout",
 48 |      "output_type": "stream",
 49 |      "text": [
 50 |       "{'response': 'Hello from Cloud Function', 'success': True}\n"
 51 |      ]
 52 |     }
 53 |    ],
 54 |    "source": [
 55 |     "\n",
 56 |     "import requests\n",
 57 |     "\n",
 58 |     "result = requests.post(\"https://us-central1-gameanalytics-199018.cloudfunctions.net/echo\",  json = { 'msg': 'Hello from Cloud Function' })\n",
 59 |     "print(result.json())\n",
 60 |     "\n"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 4,
 66 |    "metadata": {},
 67 |    "outputs": [
 68 |     {
 69 |      "name": "stdout",
 70 |      "output_type": "stream",
 71 |      "text": [
 72 |       "{'response': 'Hello from Auth', 'success': True}\n"
 73 |      ]
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "\n",
 78 |     "\n",
 79 |     "import requests\n",
 80 |     "\n",
 81 |     "result = requests.post(\"https://us-central1-gameanalytics-199018.cloudfunctions.net/auth\",  json = { 'msg': 'Hello from Auth' })\n",
 82 |     "print(result.json())\n",
 83 |     "\n",
 84 |     "\n",
 85 |     "\n"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 5,
 91 |    "metadata": {},
 92 |    "outputs": [
 93 |     {
 94 |      "ename": "JSONDecodeError",
 95 |      "evalue": "Expecting value: line 2 column 1 (char 1)",
 96 |      "output_type": "error",
 97 |      "traceback": [
 98 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 99 |       "\u001b[0;31mJSONDecodeError\u001b[0m                           Traceback (most recent call last)",
100 |       "\u001b[0;32m<ipython-input-5-0c32d75f276f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      3\u001b[0m result = requests.post(\"https://us-central1-gameanalytics-199018.cloudfunctions.net/predict\", \\\n\u001b[1;32m      4\u001b[0m                        json = { 'G1': '1', 'G2': '0', 'G3': '0', 'G4': '0', 'G5': '0', 'G6': '0', 'G7': '0', 'G8': '0', 'G9': '0', 'G10': '0' })\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
101 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/requests/models.py\u001b[0m in \u001b[0;36mjson\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m    895\u001b[0m                     \u001b[0;31m# used.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    896\u001b[0m                     \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 897\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mcomplexjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    898\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    899\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
102 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/simplejson/__init__.py\u001b[0m in \u001b[0;36mloads\u001b[0;34m(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, use_decimal, **kw)\u001b[0m\n\u001b[1;32m    516\u001b[0m             \u001b[0mparse_constant\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mobject_pairs_hook\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    517\u001b[0m             and not use_decimal and not kw):\n\u001b[0;32m--> 518\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_default_decoder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    519\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mcls\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    520\u001b[0m         \u001b[0mcls\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mJSONDecoder\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
103 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/simplejson/decoder.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, s, _w, _PY3)\u001b[0m\n\u001b[1;32m    368\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0m_PY3\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbytes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    369\u001b[0m             \u001b[0ms\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 370\u001b[0;31m         \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraw_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    371\u001b[0m         \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_w\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    372\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
104 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/simplejson/decoder.py\u001b[0m in \u001b[0;36mraw_decode\u001b[0;34m(self, s, idx, _w, _PY3)\u001b[0m\n\u001b[1;32m    398\u001b[0m             \u001b[0;32melif\u001b[0m \u001b[0mord0\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0xef\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0midx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'\\xef\\xbb\\xbf'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    399\u001b[0m                 \u001b[0midx\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 400\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscan_once\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0m_w\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
105 |       "\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 2 column 1 (char 1)"
106 |      ]
107 |     }
108 |    ],
109 |    "source": [
110 |     "\n",
111 |     "import requests\n",
112 |     "\n",
113 |     "result = requests.post(\"https://us-central1-gameanalytics-199018.cloudfunctions.net/predict\", \\\n",
114 |     "                       json = { 'G1': '1', 'G2': '0', 'G3': '0', 'G4': '0', 'G5': '0', 'G6': '0', 'G7': '0', 'G8': '0', 'G9': '0', 'G10': '0' })\n",
115 |     "print(result.json())\n",
116 |     "\n",
117 |     "\n"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": []
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 7,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "name": "stdout",
134 |      "output_type": "stream",
135 |      "text": [
136 |       "Prediction 0.06745113592634559\n"
137 |      ]
138 |     }
139 |    ],
140 |    "source": [
141 |     "\n",
142 |     "import requests\n",
143 |     "\n",
144 |     "result = requests.post(\"https://3z5btf0ucb.execute-api.us-east-1.amazonaws.com/default/logit\", \\\n",
145 |     "                       json = { 'G1': '1', 'G2': '0', 'G3': '0', 'G4': '0', 'G5': '0', 'G6': '0', 'G7': '0', 'G8': '0', 'G9': '0', 'G10': '0' })\n",
146 |     "print(result.text)\n",
147 |     "\n",
148 |     "\n",
149 |     "\n"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": []
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 5,
162 |    "metadata": {},
163 |    "outputs": [
164 |     {
165 |      "name": "stdout",
166 |      "output_type": "stream",
167 |      "text": [
168 |       "ben-df-test\n",
169 |       "dataproc-de8793b2-5dc6-4500-9aff-cfd3d4b60aa8-us\n",
170 |       "dsp_model_store\n",
171 |       "dsp_model_store_1\n",
172 |       "dsp_pmodel_store\n",
173 |       "gameanalytics-199018.appspot.com\n",
174 |       "staging.gameanalytics-199018.appspot.com\n"
175 |      ]
176 |     }
177 |    ],
178 |    "source": [
179 |     "from google.cloud import storage\n",
180 |     "bucket_name = \"dsp_model_store_1\"\n",
181 |     "\n",
182 |     "storage_client = storage.Client()\n",
183 |     "#storage_client.create_bucket(bucket_name)\n",
184 |     "\n",
185 |     "for bucket in storage_client.list_buckets():\n",
186 |     "    print(bucket.name)\n"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {},
193 |    "outputs": [],
194 |    "source": [
195 |     "\n",
196 |     "# save to GCS\n",
197 |     "bucket = storage_client.get_bucket(bucket_name)\n",
198 |     "blob = bucket.blob(\"serverless/keras/v1\")\n",
199 |     "blob.upload_from_filename(\"logit.pkl\")\n",
200 |     "blob.upload_from_filename(\"games.h5\")\n",
201 |     "\n"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 7,
207 |    "metadata": {},
208 |    "outputs": [
209 |     {
210 |      "data": {
211 |       "text/plain": [
212 |        "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
213 |        "                   intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
214 |        "                   multi_class='warn', n_jobs=None, penalty='l2',\n",
215 |        "                   random_state=None, solver='warn', tol=0.0001, verbose=0,\n",
216 |        "                   warm_start=False)"
217 |       ]
218 |      },
219 |      "execution_count": 7,
220 |      "metadata": {},
221 |      "output_type": "execute_result"
222 |     }
223 |    ],
224 |    "source": [
225 |     "\n",
226 |     "# load from GCS\n",
227 |     "blob = bucket.blob(\"serverless/logit/v1\")\n",
228 |     "blob.download_to_filename(\"/tmp/local_logit.pkl\")\n",
229 |     "\n",
230 |     "import pickle as pk\n",
231 |     "model = pk.load(open(\"/tmp/local_logit.pkl\", 'rb'))\n",
232 |     "model\n"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": null,
238 |    "metadata": {},
239 |    "outputs": [],
240 |    "source": []
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 3,
245 |    "metadata": {},
246 |    "outputs": [],
247 |    "source": [
248 |     "from google.cloud import storage\n",
249 |     "\n",
250 |     "storage_client = storage.Client()\n",
251 |     "bucket_name = \"dsp_model_store_1\"\n",
252 |     "\n",
253 |     "storage_client = storage.Client()\n",
254 |     "bucket = storage_client.get_bucket(bucket_name)\n",
255 |     "\n",
256 |     "blob = bucket.blob(\"serverless/logit/v1\")\n",
257 |     "blob.download_to_filename(\"/tmp/local_logit.pkl\")\n",
258 |     "\n",
259 |     "model = pickle.load(open(\"/tmp/local_logit.pkl\", 'rb'))\n",
260 |     "\n",
261 |     "\n"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "metadata": {},
268 |    "outputs": [],
269 |    "source": []
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": 28,
274 |    "metadata": {},
275 |    "outputs": [
276 |     {
277 |      "data": {
278 |       "text/plain": [
279 |        "b'\\x80\\x03csklearn.linear_model.logistic\\nLogisticRegression\\nq\\x00)\\x81q\\x01}q\\x02(X\\x07\\x00\\x00\\x00penaltyq\\x03X\\x02\\x00\\x00\\x00l2q\\x04X\\x04\\x00\\x00\\x00dualq\\x05\\x89X\\x03\\x00\\x00\\x00tolq\\x06G?\\x1a6\\xe2\\xeb\\x1cC-X\\x01\\x00\\x00\\x00Cq\\x07G?\\xf0\\x00\\x00\\x00\\x00\\x00\\x00X\\r\\x00\\x00\\x00fit_interceptq\\x08\\x88X\\x11\\x00\\x00\\x00intercept_scalingq\\tK\\x01X\\x0c\\x00\\x00\\x00class_weightq\\nNX\\x0c\\x00\\x00\\x00random_stateq\\x0bNX\\x06\\x00\\x00\\x00solverq\\x0cX\\x04\\x00\\x00\\x00warnq\\rX\\x08\\x00\\x00\\x00max_iterq\\x0eKdX\\x0b\\x00\\x00\\x00multi_classq\\x0fh\\rX\\x07\\x00\\x00\\x00verboseq\\x10K\\x00X\\n\\x00\\x00\\x00warm_startq\\x11\\x89X\\x06\\x00\\x00\\x00n_jobsq\\x12NX\\x08\\x00\\x00\\x00l1_ratioq\\x13NX\\x08\\x00\\x00\\x00classes_q\\x14cnumpy.core.multiarray\\n_reconstruct\\nq\\x15cnumpy\\nndarray\\nq\\x16K\\x00\\x85q\\x17C\\x01bq\\x18\\x87q\\x19Rq\\x1a(K\\x01K\\x02\\x85q\\x1bcnumpy\\ndtype\\nq\\x1cX\\x02\\x00\\x00\\x00i8q\\x1dK\\x00K\\x01\\x87q\\x1eRq\\x1f(K\\x03X\\x01\\x00\\x00\\x00<q NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK\\x00tq!b\\x89C\\x10\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00q\"tq#bX\\x05\\x00\\x00\\x00coef_q$h\\x15h\\x16K\\x00\\x85q%h\\x18\\x87q&Rq\\'(K\\x01K\\x01K\\n\\x86q(h\\x1cX\\x02\\x00\\x00\\x00f8q)K\\x00K\\x01\\x87q*Rq+(K\\x03h NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK\\x00tq,b\\x89CP\\xbcr\\xf7\\xdb\\xc2\\xde\\xdb\\xbf\\xd1Q\\x04\\xb0\\xa1<\\xd5?\\xad\\xed\\xf3\\x9b\\xfeY\\xd9?\\xa0(\\xb2$\\x85f\\xda\\xbf\\x0e\\xc3\\x08d\\xd3\\x87\\xd5?\\x1d\\xb48y\\xfa\\x80\\xc6\\xbf\\xcca~\\x86\\x11}\\xd7\\xbf\\'\\xbe`\\x1d0;\\xdf\\xbf\\xc2p#\\x08cC\\xec?\\x9a \\xa0\\x06WO\\xfb?q-tq.bX\\n\\x00\\x00\\x00intercept_q/h\\x15h\\x16K\\x00\\x85q0h\\x18\\x87q1Rq2(K\\x01K\\x01\\x85q3h+\\x89C\\x08\\xdf\\x18*\\x94C\\x87\\x01\\xc0q4tq5bX\\x07\\x00\\x00\\x00n_iter_q6h\\x15h\\x16K\\x00\\x85q7h\\x18\\x87q8Rq9(K\\x01K\\x01\\x85q:h\\x1cX\\x02\\x00\\x00\\x00i4q;K\\x00K\\x01\\x87q<Rq=(K\\x03h NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK\\x00tq>b\\x89C\\x04\\x05\\x00\\x00\\x00q?tq@bX\\x10\\x00\\x00\\x00_sklearn_versionqAX\\x06\\x00\\x00\\x000.21.3qBub.'"
280 |       ]
281 |      },
282 |      "execution_count": 28,
283 |      "metadata": {},
284 |      "output_type": "execute_result"
285 |     }
286 |    ],
287 |    "source": [
288 |     "from google.cloud import storage\n",
289 |     "\n",
290 |     "storage_client = storage.Client()\n",
291 |     "bucket_name = \"dsp_model_store_1\"\n",
292 |     "\n",
293 |     "storage_client = storage.Client()\n",
294 |     "bucket = storage_client.get_bucket(bucket_name)\n",
295 |     "blob = bucket.blob(\"serverless/logit/v1\")\n",
296 |     "\n",
297 |     "contents = blob.download_as_string()\n",
298 |     "contents"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "code",
303 |    "execution_count": 24,
304 |    "metadata": {},
305 |    "outputs": [
306 |     {
307 |      "data": {
308 |       "text/plain": [
309 |        "<Blob: dsp_model_store_1, serverless/logit/v1, None>"
310 |       ]
311 |      },
312 |      "execution_count": 24,
313 |      "metadata": {},
314 |      "output_type": "execute_result"
315 |     }
316 |    ],
317 |    "source": [
318 |     "blob"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "metadata": {},
325 |    "outputs": [],
326 |    "source": []
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": null,
331 |    "metadata": {},
332 |    "outputs": [],
333 |    "source": []
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": null,
338 |    "metadata": {},
339 |    "outputs": [],
340 |    "source": []
341 |   },
342 |   {
343 |    "cell_type": "code",
344 |    "execution_count": null,
345 |    "metadata": {},
346 |    "outputs": [],
347 |    "source": []
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": null,
352 |    "metadata": {},
353 |    "outputs": [],
354 |    "source": [
355 |     "from google.cloud import storage\n",
356 |     "\n",
357 |     "storage_client = storage.Client()\n",
358 |     "\n",
359 |     "def hello_gcs_generic(data, context):\n",
360 |     "    bucket = storage_client.get_bucket(data['bucket'])\n",
361 |     "    blob = bucket.blob(data['name'])\n",
362 |     "    contents = blob.download_as_string()\n",
363 |     "    # Process the file contents, etc..."
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": null,
369 |    "metadata": {},
370 |    "outputs": [],
371 |    "source": []
372 |   },
373 |   {
374 |    "cell_type": "code",
375 |    "execution_count": 10,
376 |    "metadata": {},
377 |    "outputs": [],
378 |    "source": [
379 |     "import cloudstorage as gcs"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "code",
384 |    "execution_count": 11,
385 |    "metadata": {},
386 |    "outputs": [
387 |     {
388 |      "ename": "AttributeError",
389 |      "evalue": "module 'cloudstorage' has no attribute 'open'",
390 |      "output_type": "error",
391 |      "traceback": [
392 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
393 |       "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
394 |       "\u001b[0;32m<ipython-input-11-52dff39caf17>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mfilenamee\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"gs://dsp_model_store_1/serverless/logit/v1/local_logit.pkl\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mgcs_file\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgcs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      4\u001b[0m \u001b[0mcontents\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgcs_file\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mgcs_file\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
395 |       "\u001b[0;31mAttributeError\u001b[0m: module 'cloudstorage' has no attribute 'open'"
396 |      ]
397 |     }
398 |    ],
399 |    "source": [
400 |     "filenamee = \"gs://dsp_model_store_1/serverless/logit/v1/local_logit.pkl\"\n",
401 |     "\n",
402 |     "gcs_file = gcs.open(filename)\n",
403 |     "contents = gcs_file.read()\n",
404 |     "gcs_file.close()"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "code",
409 |    "execution_count": null,
410 |    "metadata": {},
411 |    "outputs": [],
412 |    "source": []
413 |   },
414 |   {
415 |    "cell_type": "code",
416 |    "execution_count": 1,
417 |    "metadata": {},
418 |    "outputs": [],
419 |    "source": [
420 |     "from google.cloud import storage\n",
421 |     "bucket_name = \"dsp_model_store_1\"\n",
422 |     "\n",
423 |     "storage_client = storage.Client()\n",
424 |     "bucket = storage_client.get_bucket(bucket_name)\n",
425 |     "\n",
426 |     "blob = bucket.blob(\"serverless/logit/v1\")\n",
427 |     "blob.download_to_filename(\"local_logit.pkl\")\n",
428 |     "\n"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "code",
433 |    "execution_count": null,
434 |    "metadata": {},
435 |    "outputs": [],
436 |    "source": [
437 |     "import pickle \n",
438 |     "\n",
439 |     "blob = bucket.blob(\"serverless/logit/v1\")\n",
440 |     "#blob.download_to_filename(\"local_logit.pkl\")\n",
441 |     "#model = pickle.load(open(\"local_logit.pkl\", 'rb'))\n",
442 |     "\n",
443 |     "sm = blob.download_as_string(\"local_logit.pkl\")\n",
444 |     "#model = pickle.load(sm)\n",
445 |     "\n",
446 |     "\n",
447 |     "#model\n"
448 |    ]
449 |   },
450 |   {
451 |    "cell_type": "code",
452 |    "execution_count": null,
453 |    "metadata": {},
454 |    "outputs": [],
455 |    "source": []
456 |   },
457 |   {
458 |    "cell_type": "code",
459 |    "execution_count": 2,
460 |    "metadata": {},
461 |    "outputs": [],
462 |    "source": [
463 |     "from google.resumable_media.requests import Download\n",
464 |     "\n"
465 |    ]
466 |   },
467 |   {
468 |    "cell_type": "code",
469 |    "execution_count": null,
470 |    "metadata": {},
471 |    "outputs": [],
472 |    "source": [
473 |     "\n",
474 |     "download = Download(\n",
475 |     "                download_url, stream=file_obj, headers=headers, start=start, end=end\n",
476 |     "            )\n",
477 |     "            download.consume(transport)"
478 |    ]
479 |   },
480 |   {
481 |    "cell_type": "code",
482 |    "execution_count": null,
483 |    "metadata": {},
484 |    "outputs": [],
485 |    "source": []
486 |   },
487 |   {
488 |    "cell_type": "code",
489 |    "execution_count": null,
490 |    "metadata": {},
491 |    "outputs": [],
492 |    "source": []
493 |   },
494 |   {
495 |    "cell_type": "code",
496 |    "execution_count": 30,
497 |    "metadata": {},
498 |    "outputs": [
499 |     {
500 |      "ename": "AttributeError",
501 |      "evalue": "'str' object has no attribute '_http'",
502 |      "output_type": "error",
503 |      "traceback": [
504 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
505 |       "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
506 |       "\u001b[0;32m<ipython-input-30-6d304bc06ccd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;31m#model = pickle.load(open(\"local_logit.pkl\", 'rb'))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0msm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mblob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdownload_as_string\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"local_logit.pkl\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      8\u001b[0m \u001b[0;31m#model = pickle.load(sm)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
507 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/google/cloud/storage/blob.py\u001b[0m in \u001b[0;36mdownload_as_string\u001b[0;34m(self, client, start, end)\u001b[0m\n\u001b[1;32m    705\u001b[0m         \"\"\"\n\u001b[1;32m    706\u001b[0m         \u001b[0mstring_buffer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBytesIO\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 707\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdownload_to_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstring_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclient\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mclient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstart\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstart\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    708\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mstring_buffer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetvalue\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    709\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
508 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/google/cloud/storage/blob.py\u001b[0m in \u001b[0;36mdownload_to_file\u001b[0;34m(self, file_obj, client, start, end)\u001b[0m\n\u001b[1;32m    642\u001b[0m         \u001b[0mheaders\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"accept-encoding\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"gzip\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    643\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 644\u001b[0;31m         \u001b[0mtransport\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_transport\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclient\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    645\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    646\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_download\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtransport\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile_obj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdownload_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstart\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
509 |       "\u001b[0;32m~/.local/lib/python3.7/site-packages/google/cloud/storage/blob.py\u001b[0m in \u001b[0;36m_get_transport\u001b[0;34m(self, client)\u001b[0m\n\u001b[1;32m    525\u001b[0m         \"\"\"\n\u001b[1;32m    526\u001b[0m         \u001b[0mclient\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_require_client\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclient\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 527\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mclient\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_http\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    528\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    529\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_get_download_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
510 |       "\u001b[0;31mAttributeError\u001b[0m: 'str' object has no attribute '_http'"
511 |      ]
512 |     }
513 |    ],
514 |    "source": [
515 |     "import pickle \n",
516 |     "\n",
517 |     "blob = bucket.blob(\"serverless/logit/v1\")\n",
518 |     "#blob.download_to_filename(\"local_logit.pkl\")\n",
519 |     "#model = pickle.load(open(\"local_logit.pkl\", 'rb'))\n",
520 |     "\n",
521 |     "sm = blob.download_as_string(\"local_logit.pkl\")\n",
522 |     "#model = pickle.load(sm)\n",
523 |     "\n",
524 |     "\n",
525 |     "#model\n"
526 |    ]
527 |   },
528 |   {
529 |    "cell_type": "code",
530 |    "execution_count": null,
531 |    "metadata": {},
532 |    "outputs": [],
533 |    "source": []
534 |   },
535 |   {
536 |    "cell_type": "code",
537 |    "execution_count": 25,
538 |    "metadata": {},
539 |    "outputs": [
540 |     {
541 |      "data": {
542 |       "text/plain": [
543 |        "<Blob: dsp_model_store_1, serverless/logit/v1, None>"
544 |       ]
545 |      },
546 |      "execution_count": 25,
547 |      "metadata": {},
548 |      "output_type": "execute_result"
549 |     }
550 |    ],
551 |    "source": [
552 |     "blob = bucket.blob(\"serverless/logit/v1\")\n",
553 |     "blob"
554 |    ]
555 |   },
556 |   {
557 |    "cell_type": "code",
558 |    "execution_count": null,
559 |    "metadata": {},
560 |    "outputs": [],
561 |    "source": []
562 |   },
563 |   {
564 |    "cell_type": "code",
565 |    "execution_count": null,
566 |    "metadata": {},
567 |    "outputs": [],
568 |    "source": []
569 |   },
570 |   {
571 |    "cell_type": "code",
572 |    "execution_count": null,
573 |    "metadata": {},
574 |    "outputs": [],
575 |    "source": []
576 |   },
577 |   {
578 |    "cell_type": "code",
579 |    "execution_count": null,
580 |    "metadata": {},
581 |    "outputs": [],
582 |    "source": [
583 |     "storage_client = storage.Client()\n",
584 |     "    bucket = storage_client.get_bucket(bucket_name)\n",
585 |     "    blob = bucket.blob(source_blob_name)\n",
586 |     "\n",
587 |     "    blob.download_to_filename(destination_file_name)\n",
588 |     "\n",
589 |     "    print('Blob {} downloaded to {}.'.format(\n",
590 |     "        source_blob_name,\n",
591 |     "        destination_file_name))"
592 |    ]
593 |   },
594 |   {
595 |    "cell_type": "code",
596 |    "execution_count": null,
597 |    "metadata": {},
598 |    "outputs": [],
599 |    "source": []
600 |   }
601 |  ],
602 |  "metadata": {
603 |   "kernelspec": {
604 |    "display_name": "Python 3",
605 |    "language": "python",
606 |    "name": "python3"
607 |   },
608 |   "language_info": {
609 |    "codemirror_mode": {
610 |     "name": "ipython",
611 |     "version": 3
612 |    },
613 |    "file_extension": ".py",
614 |    "mimetype": "text/x-python",
615 |    "name": "python",
616 |    "nbconvert_exporter": "python",
617 |    "pygments_lexer": "ipython3",
618 |    "version": "3.7.3"
619 |   }
620 |  },
621 |  "nbformat": 4,
622 |  "nbformat_minor": 2
623 | }
624 | 


--------------------------------------------------------------------------------
/DSP_CH5.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "/home/ec2-user/.local/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
 13 |       "  FutureWarning)\n",
 14 |       "1it [00:03,  3.35s/it]\n"
 15 |      ]
 16 |     }
 17 |    ],
 18 |    "source": [
 19 |     "import pandas as pd\n",
 20 |     "import numpy as np\n",
 21 |     "from google.oauth2 import service_account\n",
 22 |     "from sklearn.linear_model import LogisticRegression\n",
 23 |     "from datetime import datetime\n",
 24 |     "import pandas_gbq\n",
 25 |     "\n",
 26 |     "# fetch the data set and add IDs \n",
 27 |     "gamesDF = pd.read_csv(\"https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv\")\n",
 28 |     "gamesDF['User_ID'] = gamesDF.index \n",
 29 |     "gamesDF['New_User'] = np.floor(np.random.randint(0, 10, gamesDF.shape[0])/9)\n",
 30 |     "\n",
 31 |     "# train and test groups \n",
 32 |     "train = gamesDF[gamesDF['New_User'] == 0]\n",
 33 |     "x_train = train.iloc[:,0:10]\n",
 34 |     "y_train = train['label']\n",
 35 |     "test = gamesDF[gamesDF['New_User'] == 1]\n",
 36 |     "x_test = test.iloc[:,0:10]\n",
 37 |     "\n",
 38 |     "# build a model\n",
 39 |     "model = LogisticRegression()\n",
 40 |     "model.fit(x_train, y_train)\n",
 41 |     "y_pred = model.predict_proba(x_test)[:, 1]\n",
 42 |     "\n",
 43 |     "# build a predictions data frame\n",
 44 |     "resultDF = pd.DataFrame({'User_ID':test['User_ID'], 'Pred':y_pred} ) \n",
 45 |     "resultDF['time'] = str(datetime. now())\n",
 46 |     "\n",
 47 |     "# save predictions to BigQuery \n",
 48 |     "table_id = \"dsp_demo.user_scores\"\n",
 49 |     "project_id = \"gameanalytics-199018\"\n",
 50 |     "credentials = service_account.Credentials.from_service_account_file('dsdemo.json')\n",
 51 |     "pandas_gbq.to_gbq(resultDF, table_id, project_id=project_id, if_exists = 'replace', credentials=credentials)\n"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 10,
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "data": {
 61 |       "text/html": [
 62 |        "<div>\n",
 63 |        "<style scoped>\n",
 64 |        "    .dataframe tbody tr th:only-of-type {\n",
 65 |        "        vertical-align: middle;\n",
 66 |        "    }\n",
 67 |        "\n",
 68 |        "    .dataframe tbody tr th {\n",
 69 |        "        vertical-align: top;\n",
 70 |        "    }\n",
 71 |        "\n",
 72 |        "    .dataframe thead th {\n",
 73 |        "        text-align: right;\n",
 74 |        "    }\n",
 75 |        "</style>\n",
 76 |        "<table border=\"1\" class=\"dataframe\">\n",
 77 |        "  <thead>\n",
 78 |        "    <tr style=\"text-align: right;\">\n",
 79 |        "      <th></th>\n",
 80 |        "      <th>User_ID</th>\n",
 81 |        "      <th>Pred</th>\n",
 82 |        "      <th>time</th>\n",
 83 |        "    </tr>\n",
 84 |        "  </thead>\n",
 85 |        "  <tbody>\n",
 86 |        "    <tr>\n",
 87 |        "      <th>0</th>\n",
 88 |        "      <td>2735</td>\n",
 89 |        "      <td>0.112154</td>\n",
 90 |        "      <td>2019-11-04 01:06:02.929789</td>\n",
 91 |        "    </tr>\n",
 92 |        "    <tr>\n",
 93 |        "      <th>1</th>\n",
 94 |        "      <td>6597</td>\n",
 95 |        "      <td>0.209646</td>\n",
 96 |        "      <td>2019-11-04 01:06:02.929789</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>2</th>\n",
100 |        "      <td>13159</td>\n",
101 |        "      <td>0.209646</td>\n",
102 |        "      <td>2019-11-04 01:06:02.929789</td>\n",
103 |        "    </tr>\n",
104 |        "    <tr>\n",
105 |        "      <th>3</th>\n",
106 |        "      <td>18179</td>\n",
107 |        "      <td>0.209646</td>\n",
108 |        "      <td>2019-11-04 01:06:02.929789</td>\n",
109 |        "    </tr>\n",
110 |        "    <tr>\n",
111 |        "      <th>4</th>\n",
112 |        "      <td>466</td>\n",
113 |        "      <td>0.283951</td>\n",
114 |        "      <td>2019-11-04 01:06:02.929789</td>\n",
115 |        "    </tr>\n",
116 |        "  </tbody>\n",
117 |        "</table>\n",
118 |        "</div>"
119 |       ],
120 |       "text/plain": [
121 |        "   User_ID      Pred                        time\n",
122 |        "0     2735  0.112154  2019-11-04 01:06:02.929789\n",
123 |        "1     6597  0.209646  2019-11-04 01:06:02.929789\n",
124 |        "2    13159  0.209646  2019-11-04 01:06:02.929789\n",
125 |        "3    18179  0.209646  2019-11-04 01:06:02.929789\n",
126 |        "4      466  0.283951  2019-11-04 01:06:02.929789"
127 |       ]
128 |      },
129 |      "execution_count": 10,
130 |      "metadata": {},
131 |      "output_type": "execute_result"
132 |     }
133 |    ],
134 |    "source": [
135 |     "from google.oauth2 import service_account\n",
136 |     "import pandas_gbq\n",
137 |     "\n",
138 |     "credentials = service_account.Credentials.from_service_account_file(\n",
139 |     "    'dsdemo.json',\n",
140 |     ")\n",
141 |     "\n",
142 |     "project_id = \"gameanalytics-199018\"\n",
143 |     "sql = \"SELECT * FROM  dsp_demo.user_scores\"\n",
144 |     "df = pandas_gbq.read_gbq(sql, project_id=project_id, credentials=credentials)\n",
145 |     "df.head()\n"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": []
154 |   }
155 |  ],
156 |  "metadata": {
157 |   "kernelspec": {
158 |    "display_name": "Python 3",
159 |    "language": "python",
160 |    "name": "python3"
161 |   },
162 |   "language_info": {
163 |    "codemirror_mode": {
164 |     "name": "ipython",
165 |     "version": 3
166 |    },
167 |    "file_extension": ".py",
168 |    "mimetype": "text/x-python",
169 |    "name": "python",
170 |    "nbconvert_exporter": "python",
171 |    "pygments_lexer": "ipython3",
172 |    "version": "3.7.4"
173 |   }
174 |  },
175 |  "nbformat": 4,
176 |  "nbformat_minor": 2
177 | }
178 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DS_Production


--------------------------------------------------------------------------------
/Redis-py-NHL.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "FakeStrictRedis<ConnectionPool<FakeConnection<server=<fakeredis._server.FakeServer object at 0x7f56d81c8240>,db=0>>>\n",
 13 |       "None\n"
 14 |      ]
 15 |     }
 16 |    ],
 17 |    "source": [
 18 |     "import fakeredis\n",
 19 |     "import json\n",
 20 |     "\n",
 21 |     "server = fakeredis.FakeServer()\n",
 22 |     "redis = fakeredis.FakeStrictRedis(server=server)\n",
 23 |     "print(redis)\n",
 24 |     "\n",
 25 |     "userID = 12345\n",
 26 |     "record = redis.get(userID)\n",
 27 |     "print(record)\n"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 2,
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "name": "stdout",
 37 |      "output_type": "stream",
 38 |      "text": [
 39 |       "Sessions:0\n"
 40 |      ]
 41 |     },
 42 |     {
 43 |      "data": {
 44 |       "text/plain": [
 45 |        "1"
 46 |       ]
 47 |      },
 48 |      "execution_count": 2,
 49 |      "metadata": {},
 50 |      "output_type": "execute_result"
 51 |     }
 52 |    ],
 53 |    "source": [
 54 |     "\n",
 55 |     "# Create \n",
 56 |     "record = redis.get(userID)\n",
 57 |     "if record is None:\n",
 58 |     "    profile = {\"sessions\": 0 }\n",
 59 |     "    redis.set(userID, json.dumps(profile))\n",
 60 |     "\n",
 61 |     "# Read\n",
 62 |     "record = redis.get(userID)\n",
 63 |     "profile = json.loads(record)\n",
 64 |     "print(\"Sessions:\" + str(profile['sessions']))\n",
 65 |     "\n",
 66 |     "# Update\n",
 67 |     "profile['sessions'] += 1\n",
 68 |     "redis.set(userID, json.dumps(profile))\n",
 69 |     "\n",
 70 |     "# Expire/Delete \n",
 71 |     "redis.expire(userID, 15)\n",
 72 |     "redis.delete(userID)\n"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 3,
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "data": {
 82 |       "text/html": [
 83 |        "<div>\n",
 84 |        "<style scoped>\n",
 85 |        "    .dataframe tbody tr th:only-of-type {\n",
 86 |        "        vertical-align: middle;\n",
 87 |        "    }\n",
 88 |        "\n",
 89 |        "    .dataframe tbody tr th {\n",
 90 |        "        vertical-align: top;\n",
 91 |        "    }\n",
 92 |        "\n",
 93 |        "    .dataframe thead th {\n",
 94 |        "        text-align: right;\n",
 95 |        "    }\n",
 96 |        "</style>\n",
 97 |        "<table border=\"1\" class=\"dataframe\">\n",
 98 |        "  <thead>\n",
 99 |        "    <tr style=\"text-align: right;\">\n",
100 |        "      <th></th>\n",
101 |        "      <th>game_id</th>\n",
102 |        "      <th>player_id</th>\n",
103 |        "      <th>team_id</th>\n",
104 |        "      <th>timeOnIce</th>\n",
105 |        "      <th>assists</th>\n",
106 |        "      <th>goals</th>\n",
107 |        "      <th>shots</th>\n",
108 |        "      <th>hits</th>\n",
109 |        "      <th>powerPlayGoals</th>\n",
110 |        "      <th>powerPlayAssists</th>\n",
111 |        "      <th>...</th>\n",
112 |        "      <th>faceoffTaken</th>\n",
113 |        "      <th>takeaways</th>\n",
114 |        "      <th>giveaways</th>\n",
115 |        "      <th>shortHandedGoals</th>\n",
116 |        "      <th>shortHandedAssists</th>\n",
117 |        "      <th>blocked</th>\n",
118 |        "      <th>plusMinus</th>\n",
119 |        "      <th>evenTimeOnIce</th>\n",
120 |        "      <th>shortHandedTimeOnIce</th>\n",
121 |        "      <th>powerPlayTimeOnIce</th>\n",
122 |        "    </tr>\n",
123 |        "  </thead>\n",
124 |        "  <tbody>\n",
125 |        "    <tr>\n",
126 |        "      <th>0</th>\n",
127 |        "      <td>2011030221</td>\n",
128 |        "      <td>8467412</td>\n",
129 |        "      <td>1</td>\n",
130 |        "      <td>999</td>\n",
131 |        "      <td>0</td>\n",
132 |        "      <td>0</td>\n",
133 |        "      <td>1</td>\n",
134 |        "      <td>3</td>\n",
135 |        "      <td>0</td>\n",
136 |        "      <td>0</td>\n",
137 |        "      <td>...</td>\n",
138 |        "      <td>0</td>\n",
139 |        "      <td>0</td>\n",
140 |        "      <td>0</td>\n",
141 |        "      <td>0</td>\n",
142 |        "      <td>0</td>\n",
143 |        "      <td>0</td>\n",
144 |        "      <td>-1</td>\n",
145 |        "      <td>885</td>\n",
146 |        "      <td>98</td>\n",
147 |        "      <td>16</td>\n",
148 |        "    </tr>\n",
149 |        "    <tr>\n",
150 |        "      <th>36</th>\n",
151 |        "      <td>2011030222</td>\n",
152 |        "      <td>8467412</td>\n",
153 |        "      <td>1</td>\n",
154 |        "      <td>993</td>\n",
155 |        "      <td>0</td>\n",
156 |        "      <td>0</td>\n",
157 |        "      <td>1</td>\n",
158 |        "      <td>4</td>\n",
159 |        "      <td>0</td>\n",
160 |        "      <td>0</td>\n",
161 |        "      <td>...</td>\n",
162 |        "      <td>0</td>\n",
163 |        "      <td>0</td>\n",
164 |        "      <td>0</td>\n",
165 |        "      <td>0</td>\n",
166 |        "      <td>0</td>\n",
167 |        "      <td>1</td>\n",
168 |        "      <td>0</td>\n",
169 |        "      <td>919</td>\n",
170 |        "      <td>54</td>\n",
171 |        "      <td>20</td>\n",
172 |        "    </tr>\n",
173 |        "    <tr>\n",
174 |        "      <th>90</th>\n",
175 |        "      <td>2011030223</td>\n",
176 |        "      <td>8467412</td>\n",
177 |        "      <td>1</td>\n",
178 |        "      <td>1091</td>\n",
179 |        "      <td>1</td>\n",
180 |        "      <td>1</td>\n",
181 |        "      <td>4</td>\n",
182 |        "      <td>3</td>\n",
183 |        "      <td>0</td>\n",
184 |        "      <td>0</td>\n",
185 |        "      <td>...</td>\n",
186 |        "      <td>1</td>\n",
187 |        "      <td>0</td>\n",
188 |        "      <td>0</td>\n",
189 |        "      <td>0</td>\n",
190 |        "      <td>0</td>\n",
191 |        "      <td>0</td>\n",
192 |        "      <td>1</td>\n",
193 |        "      <td>1023</td>\n",
194 |        "      <td>68</td>\n",
195 |        "      <td>0</td>\n",
196 |        "    </tr>\n",
197 |        "  </tbody>\n",
198 |        "</table>\n",
199 |        "<p>3 rows × 22 columns</p>\n",
200 |        "</div>"
201 |       ],
202 |       "text/plain": [
203 |        "       game_id  player_id  team_id  timeOnIce  assists  goals  shots  hits  \\\n",
204 |        "0   2011030221    8467412        1        999        0      0      1     3   \n",
205 |        "36  2011030222    8467412        1        993        0      0      1     4   \n",
206 |        "90  2011030223    8467412        1       1091        1      1      4     3   \n",
207 |        "\n",
208 |        "    powerPlayGoals  powerPlayAssists  ...  faceoffTaken  takeaways  giveaways  \\\n",
209 |        "0                0                 0  ...             0          0          0   \n",
210 |        "36               0                 0  ...             0          0          0   \n",
211 |        "90               0                 0  ...             1          0          0   \n",
212 |        "\n",
213 |        "    shortHandedGoals  shortHandedAssists  blocked  plusMinus  evenTimeOnIce  \\\n",
214 |        "0                  0                   0        0         -1            885   \n",
215 |        "36                 0                   0        1          0            919   \n",
216 |        "90                 0                   0        0          1           1023   \n",
217 |        "\n",
218 |        "    shortHandedTimeOnIce  powerPlayTimeOnIce  \n",
219 |        "0                     98                  16  \n",
220 |        "36                    54                  20  \n",
221 |        "90                    68                   0  \n",
222 |        "\n",
223 |        "[3 rows x 22 columns]"
224 |       ]
225 |      },
226 |      "execution_count": 3,
227 |      "metadata": {},
228 |      "output_type": "execute_result"
229 |     }
230 |    ],
231 |    "source": [
232 |     "import pandas as pd\n",
233 |     "\n",
234 |     "df = pd.read_csv(\"game_skater_stats.csv\")\n",
235 |     "df = df[df['player_id'] == 8467412]\n",
236 |     "df.head(3)"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": 4,
242 |    "metadata": {},
243 |    "outputs": [
244 |     {
245 |      "name": "stdout",
246 |      "output_type": "stream",
247 |      "text": [
248 |       "{'playerID': 8467412, 'Game_ID': 2011030221, 'goals': 0, 'assists': 0, 'shots': 1, 'hits': 3}\n"
249 |      ]
250 |     }
251 |    ],
252 |    "source": [
253 |     "import requests\n",
254 |     "\n",
255 |     "for index, row in df.iterrows():\n",
256 |     "    event = { \"playerID\": int(row['player_id']), \"Game_ID\": int(row['game_id']),\n",
257 |     "              \"goals\": int(row['goals']), \"assists\": int(row['assists']), \n",
258 |     "              \"shots\": int(row['shots']), \"hits\": int(row['hits']) }\n",
259 |     "    print(event)\n",
260 |     "    \n",
261 |     "    #requests.post(\"http://localhost:5000/update\", json = event) \n",
262 |     "    #requests.get(\"http://localhost:5000/score?player=8467412\")   \n",
263 |     "    break\n",
264 |     "    \n"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": 5,
270 |    "metadata": {},
271 |    "outputs": [
272 |     {
273 |      "name": "stdout",
274 |      "output_type": "stream",
275 |      "text": [
276 |       " * Serving Flask app \"__main__\" (lazy loading)\n",
277 |       " * Environment: production\n",
278 |       "   WARNING: Do not use the development server in a production environment.\n",
279 |       "   Use a production WSGI server instead.\n",
280 |       " * Debug mode: off\n"
281 |      ]
282 |     },
283 |     {
284 |      "name": "stderr",
285 |      "output_type": "stream",
286 |      "text": [
287 |       " * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)\n"
288 |      ]
289 |     }
290 |    ],
291 |    "source": [
292 |     "import flask\n",
293 |     "import fakeredis\n",
294 |     "import json\n",
295 |     "\n",
296 |     "server = fakeredis.FakeServer()\n",
297 |     "redis = fakeredis.FakeStrictRedis(server=server)\n",
298 |     "app = flask.Flask(__name__)\n",
299 |     "\n",
300 |     "# endpoint for profile updates\n",
301 |     "@app.route(\"/update\", methods=[\"GET\",\"POST\"])\n",
302 |     "def update():\n",
303 |     "    \n",
304 |     "    # get the player ID to update\n",
305 |     "    event = flask.request.json\n",
306 |     "    playerID = event.get('playerID')\n",
307 |     "    \n",
308 |     "    # CREATE: heck if a record exists \n",
309 |     "    record = redis.get(playerID)\n",
310 |     "    if record is None:\n",
311 |     "        profile = {\"goals\": 0, \"shots\": 0, \"assists\": 0, \"hits\": 0 }\n",
312 |     "        redis.set(playerID, json.dumps(profile))\n",
313 |     "\n",
314 |     "    # READ: get the user summary\n",
315 |     "    record = redis.get(playerID)\n",
316 |     "    profile = json.loads(record)\n",
317 |     "\n",
318 |     "    # UPDATE: add the new attributes\n",
319 |     "    profile['goals'] += event['goals']\n",
320 |     "    profile['shots'] += event['shots']\n",
321 |     "    profile['assists'] += event['assists']\n",
322 |     "    profile['hits'] += event['hits']\n",
323 |     "    redis.set(playerID, json.dumps(profile))\n",
324 |     "    \n",
325 |     "    # return the updated profile\n",
326 |     "    return flask.jsonify(profile)\n",
327 |     "\n",
328 |     "# endpoint for model serving\n",
329 |     "@app.route(\"/score\", methods=[\"GET\"])\n",
330 |     "def score():\n",
331 |     "    result = {}\n",
332 |     "\n",
333 |     "    try:\n",
334 |     "        # get the user profile \n",
335 |     "        playerID = flask.request.args['playerID']\n",
336 |     "        record = redis.get(playerID)\n",
337 |     "        profile = json.loads(record)\n",
338 |     "        \n",
339 |     "        # calculate a regression value\n",
340 |     "        score = 1 + profile['goals']   * 10.0 \\\n",
341 |     "                  + profile['shots']   * 1.0 \\\n",
342 |     "                  + profile['assists'] * 2.0 \\\n",
343 |     "                  + profile['hits']    * 0.5\n",
344 |     "        \n",
345 |     "        result['score'] = score\n",
346 |     "    except:\n",
347 |     "        None\n",
348 |     "    \n",
349 |     "    return flask.jsonify(result)\n",
350 |     "\n",
351 |     "# start the flask app, allow remote connections\n",
352 |     "if __name__ == '__main__':\n",
353 |     "    app.run(host='0.0.0.0')"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "code",
358 |    "execution_count": 9,
359 |    "metadata": {},
360 |    "outputs": [
361 |     {
362 |      "name": "stdout",
363 |      "output_type": "stream",
364 |      "text": [
365 |       "{'playerID': 8467412, 'Game_ID': 2011030221, 'goals': 0, 'assists': 0, 'shots': 1, 'hits': 3}\n"
366 |      ]
367 |     }
368 |    ],
369 |    "source": [
370 |     "\n",
371 |     "for index, row in df.iterrows():\n",
372 |     "    event = { \"playerID\": int(row['player_id']), \"Game_ID\": int(row['game_id']),\n",
373 |     "              \"goals\": int(row['goals']), \"assists\": int(row['assists']), \n",
374 |     "              \"shots\": int(row['shots']), \"hits\": int(row['hits']) }\n",
375 |     "    print(event)\n",
376 |     "    \n",
377 |     "    requests.post(\"http://localhost:5000/update\", json = event) \n",
378 |     "    prediction = requests.get(\"http://localhost:5000/score?playerID=8467412\")   \n",
379 |     "    print(prediction.json())\n",
380 |     "    break"
381 |    ]
382 |   }
383 |  ],
384 |  "metadata": {
385 |   "kernelspec": {
386 |    "display_name": "Python 3",
387 |    "language": "python",
388 |    "name": "python3"
389 |   },
390 |   "language_info": {
391 |    "codemirror_mode": {
392 |     "name": "ipython",
393 |     "version": 3
394 |    },
395 |    "file_extension": ".py",
396 |    "mimetype": "text/x-python",
397 |    "name": "python",
398 |    "nbconvert_exporter": "python",
399 |    "pygments_lexer": "ipython3",
400 |    "version": "3.7.3"
401 |   }
402 |  },
403 |  "nbformat": 4,
404 |  "nbformat_minor": 2
405 | }
406 | 


--------------------------------------------------------------------------------
/Stackdriver.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Initial Endpoint"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "\n",
 17 |     "import pandas as pd\n",
 18 |     "from sklearn.linear_model import LogisticRegression\n",
 19 |     "import flask\n",
 20 |     "\n",
 21 |     "df = pd.read_csv(\"https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv\")\n",
 22 |     "model = LogisticRegression()\n",
 23 |     "model.fit(df.drop(['label'], axis=1), df['label'])\n",
 24 |     "\n",
 25 |     "app = flask.Flask(__name__)\n",
 26 |     "\n",
 27 |     "@app.route(\"/\", methods=[\"GET\",\"POST\"])\n",
 28 |     "def predict():\n",
 29 |     "    data = {\"success\": False}\n",
 30 |     "    \n",
 31 |     "    params = flask.request.json\n",
 32 |     "    if params is None:\n",
 33 |     "        params = flask.request.args\n",
 34 |     "\n",
 35 |     "    if \"G1\" in params.keys(): \n",
 36 |     "        new_row = { \"G1\": params.get(\"G1\"), \"G2\": params.get(\"G2\"), \n",
 37 |     "                    \"G3\": params.get(\"G3\"), \"G4\": params.get(\"G4\"), \n",
 38 |     "                    \"G5\": params.get(\"G5\"), \"G6\": params.get(\"G6\"), \n",
 39 |     "                    \"G7\": params.get(\"G7\"), \"G8\": params.get(\"G8\"), \n",
 40 |     "                    \"G9\": params.get(\"G9\"), \"G10\": params.get(\"G10\") }\n",
 41 |     "\n",
 42 |     "        new_x = pd.DataFrame.from_dict(new_row, orient = \"index\").transpose()                \n",
 43 |     "        data[\"response\"] = str(model.predict_proba(new_x)[0][1])\n",
 44 |     "        data[\"success\"] = True\n",
 45 |     "\n",
 46 |     "    return flask.jsonify(data)\n",
 47 |     "\n",
 48 |     "if __name__ == '__main__':\n",
 49 |     "    app.run(host='0.0.0.0')"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "# Production Endpoint"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "from google.cloud import monitoring_v3\n",
 66 |     "from google.oauth2 import service_account\n",
 67 |     "from google.cloud import logging\n",
 68 |     "import socket    \n",
 69 |     "import random\n",
 70 |     "import time\n",
 71 |     "import pandas as pd\n",
 72 |     "from sklearn.linear_model import LogisticRegression\n",
 73 |     "import flask\n",
 74 |     "from multiprocessing import Value\n",
 75 |     "import threading\n",
 76 |     "\n",
 77 |     "# create a unique host name for the pod \n",
 78 |     "host = socket.gethostbyname(socket.gethostname()) + \" - \" + str(random.randint(0, 1000000))\n",
 79 |     "\n",
 80 |     "# load GCP credentials and set up the Stackdriver monitor connection\n",
 81 |     "credentials = service_account.Credentials.from_service_account_file('serving.json')\n",
 82 |     "client = monitoring_v3.MetricServiceClient(credentials = credentials)\n",
 83 |     "project_name = client.project_path('serving-268422')\n",
 84 |     "\n",
 85 |     "# set up the Stackdriver logging connection\n",
 86 |     "logging_client = logging.Client(project = 'serving-268422', credentials = credentials)\n",
 87 |     "logger = logging_client.logger('model_service')\n",
 88 |     "logger.log_text(\"(\" + host + \") Launching model service\")\n",
 89 |     "\n",
 90 |     "# train a scikit-learn model \n",
 91 |     "df = pd.read_csv(\"https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv\")\n",
 92 |     "model = LogisticRegression()\n",
 93 |     "model.fit(df.drop(['label'], axis=1), df['label'])\n",
 94 |     "\n",
 95 |     "# set up the app and a request tracker                 \n",
 96 |     "counter = Value('i', 0)\n",
 97 |     "app = flask.Flask(__name__)\n",
 98 |     "\n",
 99 |     "# define a function for writing metrics to Stackdriver  \n",
100 |     "def write_metric_value(value):\n",
101 |     "    series = monitoring_v3.types.TimeSeries()\n",
102 |     "    series.metric.type = 'custom.googleapis.com/serving/requests'\n",
103 |     "    series.metric.labels['ip'] = host\n",
104 |     "    point = series.points.add()\n",
105 |     "    point.value.double_value = value\n",
106 |     "    now = time.time()\n",
107 |     "    point.interval.end_time.seconds = int(now)\n",
108 |     "    client.create_time_series(project_name, [series])\n",
109 |     "\n",
110 |     "# set up a callback for recording requests per minute to Stackdriver \n",
111 |     "def log_requests():\n",
112 |     "    threading.Timer(60.0, log_requests).start() \n",
113 |     "\n",
114 |     "    requests = 0\n",
115 |     "    with counter.get_lock():\n",
116 |     "        requests = counter.value\n",
117 |     "        counter.value = 0        \n",
118 |     "    \n",
119 |     "    print(\"writing value: \" + str(requests))\n",
120 |     "    write_metric_value(requests)\n",
121 |     "\n",
122 |     "# initiate the request per minute tracking \n",
123 |     "log_requests()\n",
124 |     "                \n",
125 |     "# define the model endpoint \n",
126 |     "@app.route(\"/\", methods=[\"GET\",\"POST\"])\n",
127 |     "def predict():\n",
128 |     "    try :\n",
129 |     "                \n",
130 |     "        # update the number of requests \n",
131 |     "        with counter.get_lock():\n",
132 |     "            counter.value += 1        \n",
133 |     "        \n",
134 |     "        data = {\"success\": False}\n",
135 |     "\n",
136 |     "        # check for passed in parameters   \n",
137 |     "        params = flask.request.json\n",
138 |     "        if params is None:\n",
139 |     "            params = flask.request.args\n",
140 |     "            \n",
141 |     "        # get a model prediction \n",
142 |     "        if \"G1\" in params.keys(): \n",
143 |     "            new_row = { \"G1\": params.get(\"G1\"), \"G2\": params.get(\"G2\"), \n",
144 |     "                        \"G3\": params.get(\"G3\"), \"G4\": params.get(\"G4\"), \n",
145 |     "                        \"G5\": params.get(\"G5\"), \"G6\": params.get(\"G6\"), \n",
146 |     "                        \"G7\": params.get(\"G7\"), \"G8\": params.get(\"G8\"), \n",
147 |     "                        \"G9\": params.get(\"G9\"), \"G10\": params.get(\"G10\") }\n",
148 |     "\n",
149 |     "            new_x = pd.DataFrame.from_dict(new_row, orient = \"index\").transpose()                \n",
150 |     "            data[\"response\"] = str(model.predict_proba(new_x)[0][1])\n",
151 |     "            data[\"success\"] = True\n",
152 |     "            \n",
153 |     "        return flask.jsonify(data)\n",
154 |     "    except:\n",
155 |     "        \n",
156 |     "        # log any invalid requests \n",
157 |     "        logger.log_text(\"(\" + host + \") Error servicing request: \" + str(flask.request) + \" \" + str(params))\n",
158 |     "        flask.abort(400)\n",
159 |     "\n",
160 |     "# let gunicorn manage the ports to use \n",
161 |     "if __name__ == '__main__':\n",
162 |     "    app.run(host='0.0.0.0')\n",
163 |     "\n"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "markdown",
168 |    "metadata": {},
169 |    "source": [
170 |     "# Dockerfile"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": null,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "FROM ubuntu:latest\n",
180 |     "MAINTAINER Ben Weber  \n",
181 |     "\n",
182 |     "RUN apt-get update \\  \n",
183 |     "  && apt-get install -y python3-pip python3-dev \\  \n",
184 |     "  && cd /usr/local/bin \\  \n",
185 |     "  && ln -s /usr/bin/python3 python \n",
186 |     "\n",
187 |     "RUN pip3 install flask \n",
188 |     "RUN pip3 install pandas \n",
189 |     "RUN pip3 install gunicorn   \n",
190 |     "RUN pip3 install scikit-learn \n",
191 |     "RUN pip3 install google-cloud-logging \n",
192 |     "RUN pip3 install google-cloud-monitoring \n",
193 |     "  \n",
194 |     "COPY serving.json serving.json\n",
195 |     "COPY app.py app.py\n",
196 |     "\n",
197 |     "ENTRYPOINT [\"gunicorn\", \"--bind\", \"0.0.0.0\", \"app:app\"]"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "metadata": {},
203 |    "source": [
204 |     "# Stackdriver Monitoring"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 5,
210 |    "metadata": {},
211 |    "outputs": [],
212 |    "source": [
213 |     "\n",
214 |     "# connect to the monitoring service \n",
215 |     "from google.cloud import monitoring_v3\n",
216 |     "from google.oauth2 import service_account\n",
217 |     "import time\n",
218 |     "\n",
219 |     "credentials = service_account.Credentials.from_service_account_file('serving.json')\n",
220 |     "client = monitoring_v3.MetricServiceClient(credentials = credentials)\n",
221 |     "project_name = client.project_path('serving-268422')\n"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 6,
227 |    "metadata": {},
228 |    "outputs": [],
229 |    "source": [
230 |     "\n",
231 |     "# create a custom metric\n",
232 |     "descriptor = monitoring_v3.types.MetricDescriptor()\n",
233 |     "descriptor.type = 'custom.googleapis.com/serving/requests' \n",
234 |     "descriptor.metric_kind = (monitoring_v3.enums.MetricDescriptor.MetricKind.GAUGE)\n",
235 |     "descriptor.value_type = (monitoring_v3.enums.MetricDescriptor.ValueType.DOUBLE)\n",
236 |     "descriptor.description = 'Model serving requests.'\n"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": [
245 |     "\n",
246 |     "# record a data point to the custom metric \n",
247 |     "series = monitoring_v3.types.TimeSeries()\n",
248 |     "series.metric.type = 'custom.googleapis.com/serving/requests'\n",
249 |     "series.metric.labels['ip'] = \"1.2.3.4\"\n",
250 |     "point = series.points.add()\n",
251 |     "point.value.double_value = 50\n",
252 |     "now = time.time()\n",
253 |     "point.interval.end_time.seconds = int(now)\n",
254 |     "client.create_time_series(project_name, [series])\n"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "markdown",
259 |    "metadata": {},
260 |    "source": [
261 |     "# Stackdriver Logging"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "metadata": {},
268 |    "outputs": [],
269 |    "source": [
270 |     "\n",
271 |     "# connect to the monitoring service \n",
272 |     "from google.cloud import logging\n",
273 |     "\n",
274 |     "logging_client = logging.Client(project = 'serving-268422', credentials = credentials)\n",
275 |     "logger = logging_client.logger('model_service')\n"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": null,
281 |    "metadata": {},
282 |    "outputs": [],
283 |    "source": [
284 |     "\n",
285 |     "# log a message to stack driver \n",
286 |     "logger.log_text('Hello World!')\n"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "markdown",
291 |    "metadata": {},
292 |    "source": [
293 |     "# Endpoint Testing"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 11,
299 |    "metadata": {},
300 |    "outputs": [
301 |     {
302 |      "name": "stdout",
303 |      "output_type": "stream",
304 |      "text": [
305 |       "<Response [200]>\n",
306 |       "{'response': '0.06730006696024816', 'success': True}\n"
307 |      ]
308 |     }
309 |    ],
310 |    "source": [
311 |     "import requests\n",
312 |     "\n",
313 |     "result = requests.post(\"http://35.226.218.118/\", json = { 'G1':'1', 'G2':'0', 'G3':'0', 'G4':'0', 'G5':'0', \\\n",
314 |     "                                                        'G6':'0', 'G7':'0', 'G8':'0', 'G9':'0', 'G10':'0'}) \n",
315 |     "print(result)\n",
316 |     "print(result.json())\n"
317 |    ]
318 |   }
319 |  ],
320 |  "metadata": {
321 |   "kernelspec": {
322 |    "display_name": "Python 3",
323 |    "language": "python",
324 |    "name": "python3"
325 |   },
326 |   "language_info": {
327 |    "codemirror_mode": {
328 |     "name": "ipython",
329 |     "version": 3
330 |    },
331 |    "file_extension": ".py",
332 |    "mimetype": "text/x-python",
333 |    "name": "python",
334 |    "nbconvert_exporter": "python",
335 |    "pygments_lexer": "ipython3",
336 |    "version": "3.8.1"
337 |   }
338 |  },
339 |  "nbformat": 4,
340 |  "nbformat_minor": 4
341 | }
342 | 


--------------------------------------------------------------------------------
/append.py:
--------------------------------------------------------------------------------
 1 | import apache_beam as beam
 2 | import argparse
 3 | from apache_beam.options.pipeline_options import PipelineOptions
 4 | from apache_beam.io import ReadFromText
 5 | from apache_beam.io import WriteToText
 6 | 
 7 | # define a function for transforming the data 
 8 | class AppendDoFn(beam.DoFn):
 9 |     def process(self, element):
10 |         return "Hello World! " + element
11 |                         
12 | # set up pipeline parameters 
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument('--input', dest='input',
15 |                       default='gs://dataflow-samples/shakespeare/kinglear.txt')
16 | parser.add_argument('--output', dest='output',
17 |                     default='gs://dsp_model_store/shakespeare/kinglear3.txt')
18 | known_args, pipeline_args = parser.parse_known_args(None)
19 | pipeline_options = PipelineOptions(pipeline_args)
20 | 
21 | # define the pipeline steps 
22 | p = beam.Pipeline(options=pipeline_options)
23 | lines = p | 'read' >> ReadFromText(known_args.input)
24 | appended = lines | 'append' >> beam.ParDo(AppendDoFn())
25 | appended | 'write' >> WriteToText(known_args.output)
26 | 
27 | # run the pipeline 
28 | result = p.run()
29 | result.wait_until_finish()
30 | 
31 | 


--------------------------------------------------------------------------------
/apply.py:
--------------------------------------------------------------------------------
 1 | import apache_beam as beam
 2 | import argparse
 3 | from apache_beam.options.pipeline_options import PipelineOptions
 4 | from apache_beam.options.pipeline_options import SetupOptions
 5 | from apache_beam.io.gcp.bigquery import parse_table_schema_from_json
 6 | import json
 7 | 
 8 | query = """
 9 |     SELECT year, plurality, apgar_5min, 
10 |     mother_age, father_age,    
11 |        gestation_weeks, ever_born
12 |        ,case when mother_married = true 
13 |           then 1 else 0 end as mother_married
14 |       ,weight_pounds as weight
15 |       ,current_timestamp as time
16 |       ,GENERATE_UUID() as guid
17 |     FROM `bigquery-public-data.samples.natality` 
18 |     limit 100    
19 | """
20 | 
21 | class ApplyDoFn(beam.DoFn):
22 | 
23 |     def __init__(self):
24 |         self._model = None
25 |         from google.cloud import storage
26 |         import pandas as pd
27 |         import pickle as pkl
28 |         self._storage = storage
29 |         self._pkl = pkl
30 |         self._pd = pd
31 |      
32 |     def process(self, element):
33 |         if self._model is None:
34 |             bucket = self._storage.Client().get_bucket('dsp_model_store')
35 |             blob = bucket.get_blob('natality/sklearn-linear')
36 |             self._model = self._pkl.loads(blob.download_as_string())
37 |         
38 |         new_x = self._pd.DataFrame.from_dict(element, orient = "index").transpose().fillna(0)   
39 |         weight = self._model.predict(new_x.iloc[:,1:8])[0]
40 |         return [ { 'guid': element['guid'], 'weight': weight, 'time': str(element['time']) } ]
41 | 
42 | schema = parse_table_schema_from_json(json.dumps({'fields':
43 |             [ { 'name': 'guid', 'type': 'STRING'},
44 |               { 'name': 'weight', 'type': 'FLOAT64'},
45 |               { 'name': 'time', 'type': 'STRING'} ]}))
46 | 
47 | class PublishDoFn(beam.DoFn):
48 |     
49 |     def __init__(self):
50 |         from google.cloud import datastore       
51 |         self._ds = datastore
52 |     
53 |     def process(self, element):
54 |         client = self._ds.Client()
55 |         key = client.key('natality-guid', element['guid'])
56 |         entity = self._ds.Entity(key)
57 |         entity['weight'] = element['weight']         
58 |         entity['time'] = element['time']
59 |         client.put(entity)
60 | 
61 | parser = argparse.ArgumentParser()
62 | known_args, pipeline_args = parser.parse_known_args(None)
63 | pipeline_options = PipelineOptions(pipeline_args)
64 | 
65 | # define the pipeline steps
66 | p = beam.Pipeline(options=pipeline_options)
67 | data = p | 'Read from BigQuery' >> beam.io.Read(
68 |        beam.io.BigQuerySource(query=query, use_standard_sql=True))
69 | scored = data | 'Apply Model' >> beam.ParDo(ApplyDoFn())
70 | scored | 'Save to BigQuery' >> beam.io.Write(beam.io.BigQuerySink(
71 |                 'weight_preds', 'dsp_demo', schema = schema,
72 |                 create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
73 |                 write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND))
74 | 
75 | scored | 'Create entities' >> beam.ParDo(PublishDoFn())
76 | 
77 | # run the pipeline
78 | result = p.run()
79 | result.wait_until_finish()
80 | 
81 | 


--------------------------------------------------------------------------------
/book_sample.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bgweber/DS_Production/3b909e326f18caddd6562d6a9d54c64e52b1fa48/book_sample.pdf


--------------------------------------------------------------------------------
/dash_app.py:
--------------------------------------------------------------------------------
 1 | import dash
 2 | import dash_html_components as html
 3 | import dash_core_components as dcc
 4 | from dash.dependencies import Input, Output
 5 | import pandas as pd
 6 | import mlflow.sklearn
 7 | 
 8 | app = dash.Dash(__name__)
 9 | 
10 | app.layout = html.Div(children=[
11 |     html.H1(children='Model UI'),
12 |     html.P([
13 |         html.Label('Game 1 '),
14 |         dcc.Input(value='1', type='text', id='g1'),
15 |     ]),
16 |     html.Div([
17 |         html.Label('Game 2 '),
18 |         dcc.Input(value='0', type='text', id='g2'),
19 |     ]),
20 |     html.P([
21 |         html.Label('Prediction '),
22 |         dcc.Input(value='0', type='text', id='pred')
23 |     ]),
24 | ])
25 | 
26 | model_path = "models/logit_games_v1"
27 | model  = mlflow.sklearn.load_model(model_path)
28 | 
29 | @app.callback(
30 |     Output(component_id='pred', component_property='value'),
31 |     [Input(component_id='g1', component_property='value'),
32 |      Input(component_id='g2', component_property='value')]
33 | )
34 | def update_prediction(game1, game2):
35 | 
36 |     new_row = { "G1": float(game1), "G2": float(game2), 
37 |                 "G3": 0, "G4": 0, 
38 |                 "G5": 0, "G6": 0, 
39 |                 "G7": 0, "G8": 0, 
40 |                 "G9": 0, "G10":0 }
41 | 
42 |     new_x = pd.DataFrame.from_dict(new_row, orient = "index").transpose()                
43 |     return str(model.predict_proba(new_x)[0][1])    
44 | 
45 | if __name__ == '__main__':
46 |     app.run_server(host='0.0.0.0')
47 | 


--------------------------------------------------------------------------------
/dataflow_read.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import apache_beam as beam
 3 | import argparse
 4 | from apache_beam.options.pipeline_options import PipelineOptions
 5 | 
 6 | parser = argparse.ArgumentParser()
 7 | known_args, pipeline_args = parser.parse_known_args(None)
 8 | pipeline_options = PipelineOptions(pipeline_args)
 9 | 
10 | class ApplyDoFn(beam.DoFn):
11 |     def process(self, element):
12 |         print(element)
13 | 
14 | 
15 | query = """
16 | select *
17 | from `bigquery-public-data.samples.natality`
18 | order by rand()
19 | limit 100 
20 | """
21 | 
22 | # define the pipeline steps 
23 | p = beam.Pipeline(options=pipeline_options)
24 | data = p | 'Read from BigQuery' >> beam.io.Read(
25 |        beam.io.BigQuerySource(query=query, use_standard_sql=True))
26 | scored = data | 'Apply Model' >> beam.ParDo(ApplyDoFn())
27 | 
28 | # run the pipeline 
29 | result = p.run()
30 | result.wait_until_finish()
31 | 
32 | 


--------------------------------------------------------------------------------
/echo.py:
--------------------------------------------------------------------------------
 1 | # load Flask 
 2 | import flask
 3 | app = flask.Flask(__name__)
 4 | 
 5 | # define a predict function as an endpoint 
 6 | @app.route("/", methods=["GET","POST"])
 7 | def predict():
 8 |     data = {"success": False}
 9 |     
10 |     # check for passed in parameters   
11 |     params = flask.request.json
12 |     if params is None:
13 |         params = flask.request.args
14 |     
15 |     # if parameters are found, echo the msg parameter 
16 |     if "msg" in params.keys(): 
17 |         data["response"] = params.get("msg")
18 |         data["success"] = True
19 |         
20 |     # return a response in json format 
21 |     return flask.jsonify(data)
22 |     
23 | # start the flask app, allow remote connections
24 | if __name__ == '__main__':
25 |     app.run(host='0.0.0.0')
26 | 
27 | 


--------------------------------------------------------------------------------
/keras_games.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pandas as pd
 3 | import mlflow
 4 | import mlflow.keras
 5 | import flask
 6 | import tensorflow as tf
 7 | import keras as k
 8 | 
 9 | def auc(y_true, y_pred):
10 |     auc = tf.metrics.auc(y_true, y_pred)[1]
11 |     k.backend.get_session().run(
12 |                   tf.local_variables_initializer())
13 |     return auc
14 | 
15 | global graph
16 | graph = tf.get_default_graph()
17 | model_path = "models/keras_games_v1"
18 | model = mlflow.keras.load_model(model_path, custom_objects={'auc': auc})
19 | 
20 | app = flask.Flask(__name__)
21 | 
22 | @app.route("/", methods=["GET","POST"])
23 | def predict():
24 |     data = {"success": False}
25 |     params = flask.request.args
26 | 
27 |     if "G1" in params.keys(): 
28 |         new_row = { "G1": params.get("G1"), "G2": params.get("G2"), 
29 |                     "G3": params.get("G3"), "G4": params.get("G4"), 
30 |                     "G5": params.get("G5"), "G6": params.get("G6"), 
31 |                     "G7": params.get("G7"), "G8": params.get("G8"), 
32 |                     "G9": params.get("G9"), "G10": params.get("G10") }
33 | 
34 |         new_x = pd.DataFrame.from_dict(new_row, orient = "index").transpose()                
35 | 
36 |         with graph.as_default():        
37 |             data["response"] = str(model.predict(new_x)[0][0])
38 |             data["success"] = True
39 | 
40 |     return flask.jsonify(data)
41 | 
42 | if __name__ == '__main__':
43 |     app.run(host='0.0.0.0')
44 | 
45 | #model.evaluate(x, y, verbose = 0)
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/logit.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pandas as pd
 3 | from sklearn.linear_model import LogisticRegression
 4 | import pickle
 5 | 
 6 | model = pickle.load(open("logit.pkl", 'rb'))
 7 | 
 8 | # load Flask
 9 | import flask
10 | app = flask.Flask(__name__)
11 | 
12 | # define a predict function as an endpoint
13 | @app.route("/", methods=["GET"])
14 | def predict():
15 |     data = {"success": False}
16 | 
17 |     # get the request parameters
18 |     params = flask.request.args
19 | 
20 |     # if parameters are found, echo the msg parameter
21 |     if (params != None):
22 |         new_row = { "G1": params.get("G1"), "G2": params.get("G2"), 
23 |                     "G3": params.get("G3"), "G4": params.get("G4"), 
24 |                     "G5": params.get("G5"), "G6": params.get("G6"), 
25 |                     "G7": params.get("G7"), "G8": params.get("G8"), 
26 |                     "G9": params.get("G9"), "G10": params.get("G10") }
27 | 
28 |         new_x = pd.DataFrame.from_dict(new_row, orient = "index").transpose()                
29 |         data["response"] = str(model.predict_proba(new_x)[0][1])
30 |         data["success"] = True
31 | 
32 |     # return a response in json format
33 |     return flask.jsonify(data)
34 | 
35 | # start the flask app, allow remote connections
36 | app.run(host='0.0.0.0')
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/natality.py:
--------------------------------------------------------------------------------
 1 | import apache_beam as beam
 2 | import argparse
 3 | from apache_beam.options.pipeline_options import PipelineOptions
 4 | from apache_beam.io.gcp.bigquery import parse_table_schema_from_json
 5 | import json
 6 | 
 7 | class ApplyDoFn(beam.DoFn):
 8 | 
 9 |     def __init__(self):
10 |         self._model = None
11 |         from google.cloud import storage
12 |         import pandas as pd
13 |         import pickle as pkl
14 |         import json as js
15 |         self._storage = storage
16 |         self._pkl = pkl
17 |         self._pd = pd
18 |         self._json = js
19 |      
20 |     def process(self, element):
21 |         if self._model is None:
22 |             bucket = self._storage.Client().get_bucket(
23 |                                                  'dsp_model_store')
24 |             blob = bucket.get_blob('natality/sklearn-linear')
25 |             self._model =self._pkl.loads(blob.download_as_string())
26 |         
27 |         element = self._json.loads(element.decode('utf-8'))
28 |         new_x = self._pd.DataFrame.from_dict(element, 
29 |                             orient = "index").transpose().fillna(0)   
30 |         weight = self._model.predict(new_x.iloc[:,1:8])[0]
31 |         print(str(weight))
32 |         return [ { 'guid': element['guid'], 'weight': weight, 
33 |                                    'time': str(element['time']) } ]
34 |              
35 | class PublishDoFn(beam.DoFn):
36 |     
37 |     def __init__(self):
38 |         from google.cloud import datastore       
39 |         self._ds = datastore
40 |     
41 |     def process(self, element):
42 |         client = self._ds.Client()
43 |         key = client.key('natality-guid', element['guid'])
44 |         entity = self._ds.Entity(key)
45 |         entity['weight'] = element['weight']         
46 |         entity['time'] = element['time']
47 |         print("publish")
48 |         print(entity)
49 |         client.put(entity)
50 |             
51 | # set up pipeline parameters 
52 | parser = argparse.ArgumentParser()
53 | known_args, pipeline_args = parser.parse_known_args(None)
54 | pipeline_options = PipelineOptions(pipeline_args)
55 | 
56 | # define the topics 
57 | topic = "projects/{project}/topics/{topic}"
58 | topic = topic.format(project = "gameanalytics-199018", topic = "natality")
59 | 
60 | schema = parse_table_schema_from_json(json.dumps({'fields':
61 |             [ { 'name': 'guid', 'type': 'STRING'},
62 |               { 'name': 'weight', 'type': 'FLOAT64'},
63 |               { 'name': 'time', 'type': 'STRING'} ]}))
64 | 
65 | # define the pipeline steps 
66 | p = beam.Pipeline(options=pipeline_options)
67 | lines = p | 'Read PubSub' >> beam.io.ReadFromPubSub(topic=topic)
68 | scored = lines | 'apply' >> beam.ParDo(ApplyDoFn())
69 | scored | 'Create entities' >> beam.ParDo(PublishDoFn())
70 | 
71 | # run the pipeline 
72 | result = p.run()
73 | result.wait_until_finish()
74 | 
75 | 


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pandas as pd
 3 | from sklearn.linear_model import LogisticRegression
 4 | #import pickle
 5 | import mlflow
 6 | import mlflow.sklearn
 7 | 
 8 | 
 9 | #model = pickle.load(open("logit.pkl", 'rb'))
10 | model_path = "models/logit_games_v1"
11 | model  = mlflow.sklearn.load_model(model_path)
12 | 
13 | import flask
14 | app = flask.Flask(__name__)
15 | 
16 | @app.route("/", methods=["GET","POST"])
17 | def predict():
18 |     data = {"success": False}
19 |     params = flask.request.args
20 | 
21 |     if "G1" in params.keys(): 
22 |         new_row = { "G1": params.get("G1"), "G2": params.get("G2"), 
23 |                     "G3": params.get("G3"), "G4": params.get("G4"), 
24 |                     "G5": params.get("G5"), "G6": params.get("G6"), 
25 |                     "G7": params.get("G7"), "G8": params.get("G8"), 
26 |                     "G9": params.get("G9"), "G10":params.get("G10") }
27 | 
28 |         new_x = pd.DataFrame.from_dict(new_row, orient = "index").transpose()                
29 |         data["response"] = str(model.predict_proba(new_x)[0][1])
30 |         data["success"] = True
31 | 
32 |     return flask.jsonify(data)
33 | 
34 | if __name__ == '__main__':
35 |     app.run(host='0.0.0.0')
36 | 
37 | 


--------------------------------------------------------------------------------
/stream.py:
--------------------------------------------------------------------------------
 1 | import apache_beam as beam
 2 | import argparse
 3 | from apache_beam.options.pipeline_options import PipelineOptions
 4 | 
 5 | # define a function for transforming the data 
 6 | class AppendDoFn(beam.DoFn):
 7 |     def process(self, element):
 8 |         print("Hellow World! - " + element)
 9 |         
10 | # set up pipeline parameters 
11 | parser = argparse.ArgumentParser()
12 | known_args, pipeline_args = parser.parse_known_args(None)
13 | pipeline_options = PipelineOptions(pipeline_args)
14 | 
15 | # define the topics 
16 | topic = "projects/{project}/topics/{topic}"
17 | topic = topic.format(project = "gameanalytics-199018", topic = "natality")
18 | 
19 | 
20 | # define the pipeline steps 
21 | p = beam.Pipeline(options=pipeline_options)
22 | lines = p | 'Read PubSub' >> beam.io.ReadFromPubSub(topic=topic)
23 | appended = lines | 'append' >> beam.ParDo(AppendDoFn())
24 | 
25 | # run the pipeline 
26 | result = p.run()
27 | result.wait_until_finish()
28 | 


--------------------------------------------------------------------------------