├── .gitignore
├── Chapter01
    ├── arrow.png
    └── chapter1-time-series-analysis-overview.ipynb
├── Chapter03
    ├── arrow.png
    └── chapter3-dataset-preparation.ipynb
├── Assets
    └── kaggle_api.png
├── LICENSE
├── Chapter11
    └── generate_inference_data.py
├── README.md
└── Chapter09
    ├── create_schema.py
    └── chapter9-preparing-l4e-dataset.ipynb


/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | Data/
3 | 


--------------------------------------------------------------------------------
/Chapter01/arrow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Time-Series-Analysis-on-AWS/HEAD/Chapter01/arrow.png


--------------------------------------------------------------------------------
/Chapter03/arrow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Time-Series-Analysis-on-AWS/HEAD/Chapter03/arrow.png


--------------------------------------------------------------------------------
/Assets/kaggle_api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Time-Series-Analysis-on-AWS/HEAD/Assets/kaggle_api.png


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Packt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Chapter11/generate_inference_data.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import datetime
 3 | import os
 4 | import pandas as pd
 5 | import pytz
 6 | 
 7 | def main():
 8 |     os.makedirs('inference-data/input', exist_ok=True)
 9 | 
10 |     # How many sequences of data we want to extract:
11 |     num_sequences = 3
12 | 
13 |     # The scheduling frequency in minutes: this **MUST** match the
14 |     # resampling rate used to train the model:
15 |     frequency = 5
16 | 
17 |     # Set current timezone to UTC:
18 |     utc_timezone = pytz.timezone("UTC")
19 | 
20 |     for root, dirs, files in os.walk('train-data'):
21 |         for f in files:
22 |             component = root.split('/')[-1]
23 |             print(f'Creating inference data from component {component}')
24 | 
25 |             component_fname = os.path.join(root, f)
26 |             inference_df = pd.read_csv(component_fname)
27 |             inference_df['Timestamp'] = pd.to_datetime(inference_df['Timestamp'])
28 |             inference_df = inference_df.set_index('Timestamp')
29 | 
30 |             # We know that some events of interest are happening after this date:
31 |             start = pd.to_datetime('2018-12-27 02:05:00')
32 |             for i in range(num_sequences):
33 |                 end = start + datetime.timedelta(minutes=+frequency - 1)
34 |                 inference_input = inference_df.loc[start:end, :]
35 |                 start = start + datetime.timedelta(minutes=+frequency)
36 | 
37 |                 # Rounding time to the previous X minutes 
38 |                 # where X is the selected frequency:
39 |                 filename_tm = datetime.datetime.now(utc_timezone)
40 |                 filename_tm = filename_tm - datetime.timedelta(
41 |                     minutes=filename_tm.minute % frequency,
42 |                     seconds=filename_tm.second,
43 |                     microseconds=filename_tm.microsecond
44 |                 )
45 |                 filename_tm = filename_tm + datetime.timedelta(minutes=+frequency * (i))
46 |                 current_timestamp = (filename_tm).strftime(format='%Y%m%d%H%M%S')
47 | 
48 |                 # The timestamp inside the file are in UTC and are not linked to the current timezone:
49 |                 timestamp_tm = datetime.datetime.now(utc_timezone)
50 |                 timestamp_tm = timestamp_tm - datetime.timedelta(
51 |                     minutes=timestamp_tm.minute % frequency,
52 |                     seconds=timestamp_tm.second,
53 |                     microseconds=timestamp_tm.microsecond
54 |                 )
55 |                 timestamp_tm = timestamp_tm + datetime.timedelta(minutes=+frequency * (i))
56 | 
57 |                 # We need to reset the index to match the time 
58 |                 # at which the scheduler will run inference:
59 |                 new_index = pd.date_range(
60 |                     start=timestamp_tm,
61 |                     periods=inference_input.shape[0], 
62 |                     freq='1min'
63 |                 )
64 |                 inference_input.index = new_index
65 |                 inference_input.index.name = 'Timestamp'
66 |                 inference_input = inference_input.reset_index()
67 |                 inference_input['Timestamp'] = inference_input['Timestamp'].dt.strftime('%Y-%m-%dT%H:%M:%S.%f')
68 | 
69 |                 # Export this file in CSV format:
70 |                 scheduled_fname = os.path.join('inference-data', 'input', f'{component}_{current_timestamp}.csv')
71 |                 inference_input.to_csv(scheduled_fname, index=None)
72 |             
73 | if __name__ == '__main__':
74 |     main()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | # Time Series Analysis on AWS
 5 | 
 6 | <a href="https://www.packtpub.com/product/time-series-analysis-on-aws/9781801816847"><img src="https://static.packt-cdn.com/products/9781801816847/cover/smaller" alt="Time Series Analysis on AWS" height="256px" align="right"></a>
 7 | 
 8 | This is the code repository for [Time Series Analysis on AWS](https://www.packtpub.com/product/time-series-analysis-on-aws/9781801816847), published by Packt.
 9 | 
10 | **Learn how to build forecasting models and detect anomalies in your time series data**
11 | 
12 | ## What is this book about?
13 | Being a business analyst and data scientist, you have to use many algorithms and approaches to prepare, process, and build ML-based applications by leveraging time series data, but you face common problems, such as not knowing which algorithm to choose or how to combine and interpret them. Amazon Web Services (AWS) provides numerous services to help you build applications fueled by artificial intelligence (AI) capabilities. This book helps you get to grips with three AWS AI/ML-managed services to enable you to deliver your desired business outcomes.
14 | 
15 | This book covers the following exciting features: <First 5 What you'll learn points>
16 | * Understand how time series data differs from other types of data
17 | * Explore the key challenges that can be solved using time series data
18 | * Forecast future values of business metrics using Amazon Forecast
19 | * Detect anomalies and deliver forewarnings using Lookout for Equipment
20 | * Detect anomalies in business metrics using Amazon Lookout for Metrics
21 | * Visualize your predictions to reduce the time to extract insights
22 | 
23 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/1801816840) today!
24 | 
25 | <a href="https://www.packtpub.com/?utm_source=github&utm_medium=banner&utm_campaign=GitHubBanner"><img src="https://raw.githubusercontent.com/PacktPublishing/GitHub/master/GitHub.png" 
26 | alt="https://www.packtpub.com/" border="5" /></a>
27 | 
28 | ## Instructions and Navigations
29 | All of the code is organized into folders. For example, Chapter02.
30 | 
31 | The code will look like the following:
32 | ```
33 | START = '2013-06-01'
34 | END = '2013-07-31'
35 | DATASET = 'household_energy_consumption'
36 | FORECAST_PREFIX = 'export_energy_consumption_XXXX'
37 | ```
38 | 
39 | **Following is what you need for this book:**
40 | If you're a data analyst, business analyst, or data scientist looking to analyze time series data effectively for solving business problems, this is the book for you. Basic statistics knowledge is assumed, but no machine learning knowledge is necessary. Prior experience with time series data and how it relates to various business problems will help you get the most out of this book. This guide will also help machine learning practitioners find new ways to leverage their skills to build effective time series-based applications.
41 | 
42 | With the following software and hardware list you can run all code files present in the book (Chapter 1-15).
43 | 
44 | ### Software and Hardware List
45 | 
46 | | Chapter  |AWS services covered in the book                  | OS required                        |
47 | | -------- | ------------------------------------| -----------------------------------|
48 | | 1 - 15     | Amazon Forecast                    | Any browser (Chrome recommended) running on Windows, Mac OS X, and Linux (Any) |
49 | | 1 - 15       | Amazon Lookout for Equipment       | Any browser (Chrome recommended) running on Windows, Mac OS X, and Linux (Any) |
50 | | 1 - 15        | Amazon Lookout for Metrics         | Any browser (Chrome recommended) running on Windows, Mac OS X, and Linux (Any) |
51 | 
52 | 
53 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it]( https://static.packt-cdn.com/downloads/9781801816847_ColorImages.pdf).
54 | 
55 | ### Related products 
56 | * Data Engineering with AWS [[Packt]](https://www.packtpub.com/product/data-engineering-with-aws/9781800560413) [[Amazon]](https://www.amazon.com/dp/1800560419)
57 | 
58 | * Serverless Analytics with Amazon Athena [[Packt]](https://www.packtpub.com/product/serverless-analytics-with-amazon-athena/9781800562349) [[Amazon]](https://www.amazon.com/dp/1800562349)
59 | 
60 | ## Get to Know the Author
61 | **Michaël Hoarau**
62 | is an AI/ML specialist solutions architect (SA) working at Amazon Web Services (AWS). He is an AWS Certified Associate SA. He previously worked as an AI/ML specialist SA at AWS and the EMEA head of data science at GE Digital. He has experience in building product quality prediction systems for multiple industries. He has used forecasting techniques to build virtual sensors for industrial production lines. He has also helped multiple customers build forecasting and anomaly detection systems to increase their business efficiency.
63 | ### Download a free PDF
64 | 
65 |  <i>If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.<br>Simply click on the link to claim your free PDF.</i>
66 | <p align="center"> <a href="https://packt.link/free-ebook/9781801816847">https://packt.link/free-ebook/9781801816847 </a> </p>


--------------------------------------------------------------------------------
/Chapter09/create_schema.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import pandas as pd
  4 | import s3fs
  5 | 
  6 | from typing import List, Dict
  7 | 
  8 | def create_data_schema_from_s3_path(s3_path):
  9 |     """
 10 |     Generates a data schema compatible for Lookout for Equipment from an S3 
 11 |     directory
 12 |     
 13 |     Parameters:
 14 |         s3_path (string):
 15 |             a path pointing to the root directory on S3 where all the CSV files 
 16 |             are located
 17 |             
 18 |     Returns:
 19 |         string:
 20 |             a JSON-formatted string ready to be used as a schema for a Lookout
 21 |             for Equipment dataset
 22 |     """
 23 |     # We should have only directories at the first level of this S3 path:
 24 |     fs = s3fs.S3FileSystem()
 25 |     components = fs.ls(s3_path)
 26 |     
 27 |     # Loops through each subdirectory found in the root dir:
 28 |     DATASET_COMPONENT_FIELDS_MAP = dict()
 29 |     for subsystem in components:
 30 |         # The first tag should always be Timestamp
 31 |         subsystem_tags = ['timestamp']
 32 |         
 33 |         # Opens the first file (they have the same structure):
 34 |         files = fs.ls(subsystem)
 35 |         for file in files:
 36 |             if file[-1] != '/':
 37 |                 break
 38 | 
 39 |         current_subsystem_df = pd.read_csv(f's3://{file}', nrows=1)
 40 |         subsystem_tags = subsystem_tags + current_subsystem_df.columns.tolist()[1:]
 41 |         
 42 |         DATASET_COMPONENT_FIELDS_MAP.update({subsystem.split('/')[-1]: subsystem_tags})
 43 | 
 44 |     # Generate the associated JSON schema:
 45 |     schema = create_data_schema(DATASET_COMPONENT_FIELDS_MAP)
 46 |     
 47 |     return schema
 48 |     
 49 | def create_data_schema(component_fields_map: Dict):
 50 |     """
 51 |     Generates a JSON formatted string from a dictionary
 52 |     
 53 |     Parameters:
 54 |         component_fields_map (dict):
 55 |             a dictionary containing a field maps for the dataset schema
 56 |             
 57 |     Returns:
 58 |         string:
 59 |             a JSON-formatted string ready to be used as a schema for a dataset
 60 |     """
 61 |     schema = json.dumps(
 62 |         _create_data_schema_map(
 63 |             component_fields_map=component_fields_map
 64 |         )
 65 |     )
 66 |     
 67 |     return schema
 68 | 
 69 | def _create_data_schema_map(component_fields_map: Dict):
 70 |     """
 71 |     Generate a dictionary with the JSON format expected by Lookout for Equipment
 72 |     to be used as the schema for a dataset at ingestion, training time and
 73 |     inference time
 74 |     
 75 |     Parameters:
 76 |         component_fields_map (dict):
 77 |             a dictionary containing a field maps for the dataset schema
 78 | 
 79 |     Returns:
 80 |         dict:
 81 |             a dictionnary containing the detailed schema built from the original
 82 |             dictionary mapping
 83 |     """
 84 |     # Build the schema for the current component:
 85 |     component_schema_list = [_create_component_schema(
 86 |             component_name, 
 87 |             component_fields_map[component_name]
 88 |         ) for component_name in component_fields_map
 89 |     ]
 90 |     
 91 |     # The root of the schema is a "Components" tag:
 92 |     data_schema = dict()
 93 |     data_schema['Components'] = component_schema_list
 94 | 
 95 |     return data_schema
 96 | 
 97 | def _create_component_schema(component_name: str, field_names: List):
 98 |     """
 99 |     Build a schema for a given component and fieds list
100 |     
101 |     Parameters
102 |         component_name (string):
103 |             name of the component to build a schema for
104 |         
105 |         field_names (list of strings):
106 |             name of all the fields included in this component
107 |             
108 |     Returns:
109 |         dict:
110 |             A dictionnary containing the detailed schema for a given component
111 |     """
112 |     # Test if the field names is correct for this component:
113 |     if len(field_names) == 0:
114 |         raise Exception(f'Field names for component {component_name} should not be empty')
115 |     if len(field_names) == 1:
116 |         raise Exception(f'Component {component_name} must have at least one sensor beyond the timestamp')
117 |     
118 |     # The first field is a timestamp:
119 |     col_list  = [{'Name': field_names[0], 'Type': 'DATETIME'}]
120 |     
121 |     # All the others are float values:
122 |     col_list = col_list + [
123 |         {'Name': field_name, 'Type': 'DOUBLE'} 
124 |         for field_name in field_names[1:]
125 |     ]
126 |     
127 |     # Build the schema for this component:
128 |     component_schema = dict()
129 |     component_schema['ComponentName'] = component_name
130 |     component_schema['Columns'] = col_list
131 |             
132 |     return component_schema
133 | 
134 | if __name__ == '__main__':
135 |     parser = argparse.ArgumentParser(description="Generate a JSON schema from an S3 location")
136 |     parser.add_argument("s3path", type=str, help="The root S3 location where the training data are")
137 |     args = parser.parse_args()
138 |     s3_path = args.s3path
139 | 
140 |     schema = create_data_schema_from_s3_path(s3_path)
141 |     
142 |     print(schema)


--------------------------------------------------------------------------------
/Chapter09/chapter9-preparing-l4e-dataset.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "1baf74c4",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Time series analysis on AWS\n",
  9 |     "*Chapter 9 - Creating a dataset and ingesting your data*"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "id": "3ba931c3",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "## Initializations\n",
 18 |     "---"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "id": "29ddeca5",
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "!pip install --quiet tqdm kaggle"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "id": "d16b27fd",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "### Imports"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "id": "44909eac",
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "import matplotlib.colors as mpl_colors\n",
 47 |     "import matplotlib.dates as mdates\n",
 48 |     "import matplotlib.ticker as ticker\n",
 49 |     "import matplotlib.pyplot as plt\n",
 50 |     "import numpy as np\n",
 51 |     "import os\n",
 52 |     "import pandas as pd\n",
 53 |     "import sys\n",
 54 |     "import warnings\n",
 55 |     "import zipfile\n",
 56 |     "\n",
 57 |     "from matplotlib import gridspec\n",
 58 |     "from sklearn.preprocessing import normalize\n",
 59 |     "from tqdm import tqdm\n",
 60 |     "from urllib.request import urlretrieve"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "id": "45b8057e",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "### Parameters"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "id": "615f66ee",
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "RAW_DATA = os.path.join('..', 'Data', 'raw')\n",
 79 |     "DATA = os.path.join('..', 'Data')\n",
 80 |     "warnings.filterwarnings(\"ignore\")\n",
 81 |     "os.makedirs(RAW_DATA, exist_ok=True)\n",
 82 |     "\n",
 83 |     "%matplotlib inline\n",
 84 |     "plt.style.use('fivethirtyeight')\n",
 85 |     "prop_cycle = plt.rcParams['axes.prop_cycle']\n",
 86 |     "colors = prop_cycle.by_key()['color']\n",
 87 |     "\n",
 88 |     "plt.rcParams['figure.dpi'] = 300\n",
 89 |     "plt.rcParams['lines.linewidth'] = 0.3\n",
 90 |     "plt.rcParams['axes.titlesize'] = 6\n",
 91 |     "plt.rcParams['axes.labelsize'] = 6\n",
 92 |     "plt.rcParams['xtick.labelsize'] = 5\n",
 93 |     "plt.rcParams['ytick.labelsize'] = 5\n",
 94 |     "plt.rcParams['grid.linewidth'] = 0.2\n",
 95 |     "plt.rcParams['legend.fontsize'] = 5"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "id": "c104b772",
101 |    "metadata": {},
102 |    "source": [
103 |     "### Helper functions"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "id": "8e34ec73",
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "def progress_report_hook(count, block_size, total_size):\n",
114 |     "    mb = int(count * block_size // 1048576)\n",
115 |     "    if count % 500 == 0:\n",
116 |     "        sys.stdout.write(\"\\r{} MB downloaded\".format(mb))\n",
117 |     "        sys.stdout.flush()"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "id": "67742269",
123 |    "metadata": {},
124 |    "source": [
125 |     "### Downloading datasets"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "id": "ed48f20a",
131 |    "metadata": {},
132 |    "source": [
133 |     "#### **Dataset 4:** Industrial pump data\n",
134 |     "To download this dataset from Kaggle, you will need to have an account and create a token that you install on your machine. You can follow [**this link**](https://www.kaggle.com/docs/api) to get started with the Kaggle API. Once generated, make sure your Kaggle token is stored in the `~/.kaggle/kaggle.json` file, or the next cells will issue an error. To get a Kaggle token, go to kaggle.com and create an account. Then navigate to **My account** and scroll down to the API section. There, click the **Create new API token** button:\n",
135 |     "\n",
136 |     "<img src=\"../Assets/kaggle_api.png\" />\n"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "id": "42ec6c50",
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "FILE_NAME    = 'pump-sensor-data.zip'\n",
147 |     "ARCHIVE_PATH = os.path.join(RAW_DATA, FILE_NAME)\n",
148 |     "FILE_PATH    = os.path.join(DATA, 'pump', 'sensor.csv')\n",
149 |     "FILE_DIR     = os.path.dirname(FILE_PATH)\n",
150 |     "\n",
151 |     "if not os.path.isfile(FILE_PATH):\n",
152 |     "    if not os.path.exists('/home/ec2-user/.kaggle/kaggle.json'):\n",
153 |     "        os.makedirs('/home/ec2-user/.kaggle/', exist_ok=True)\n",
154 |     "        raise Exception('The kaggle.json token was not found.\\nCreating the /home/ec2-user/.kaggle/ directory: put your kaggle.json file there once you have generated it from the Kaggle website')\n",
155 |     "    else:\n",
156 |     "        print('The kaggle.json token file was found: making sure it is not readable by other users on this system.')\n",
157 |     "        !chmod 600 /home/ec2-user/.kaggle/kaggle.json\n",
158 |     "\n",
159 |     "    os.makedirs(os.path.join(DATA, 'pump'), exist_ok=True)\n",
160 |     "    !kaggle datasets download -d nphantawee/pump-sensor-data -p $RAW_DATA\n",
161 |     "\n",
162 |     "    print(\"\\nExtracting data archive\")\n",
163 |     "    zip_ref = zipfile.ZipFile(ARCHIVE_PATH, 'r')\n",
164 |     "    zip_ref.extractall(FILE_DIR + '/')\n",
165 |     "    zip_ref.close()\n",
166 |     "    \n",
167 |     "else:\n",
168 |     "    print(\"File found, skipping download\")"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "id": "011c7f2c",
174 |    "metadata": {},
175 |    "source": [
176 |     "## Dataset visualization\n",
177 |     "---"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "id": "32e5cc41",
183 |    "metadata": {},
184 |    "source": [
185 |     "### **4.** Industrial pump data"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": null,
191 |    "id": "03c07a8c",
192 |    "metadata": {},
193 |    "outputs": [],
194 |    "source": [
195 |     "FILE_PATH = os.path.join(DATA, 'pump', 'sensor.csv')\n",
196 |     "pump_df = pd.read_csv(FILE_PATH, sep=',')\n",
197 |     "pump_df.drop(columns={'Unnamed: 0'}, inplace=True)\n",
198 |     "pump_df['timestamp'] = pd.to_datetime(pump_df['timestamp'], format='%Y-%m-%d %H:%M:%S')\n",
199 |     "pump_df = pump_df.set_index('timestamp')\n",
200 |     "\n",
201 |     "pump_df['machine_status'].replace(to_replace='NORMAL', value=np.nan, inplace=True)\n",
202 |     "pump_df['machine_status'].replace(to_replace='BROKEN', value=1, inplace=True)\n",
203 |     "pump_df['machine_status'].replace(to_replace='RECOVERING', value=1, inplace=True)\n",
204 |     "\n",
205 |     "print('Shape:', pump_df.shape)\n",
206 |     "pump_df.head()"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "id": "ef2f9cff",
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": [
216 |     "pump_df"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "id": "dc6c4ae7",
223 |    "metadata": {},
224 |    "outputs": [],
225 |    "source": [
226 |     "file_structure_df = pump_df.iloc[:, 0:10].resample('5D').mean()"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "id": "96e5b0af",
233 |    "metadata": {},
234 |    "outputs": [],
235 |    "source": [
236 |     "plt.rcParams['hatch.linewidth'] = 0.5\n",
237 |     "plt.rcParams['lines.linewidth'] = 0.5\n",
238 |     "\n",
239 |     "fig = plt.figure(figsize=(5,1))\n",
240 |     "ax1 = fig.add_subplot(1,1,1)\n",
241 |     "plot1 = ax1.plot(pump_df['sensor_00'], label='Healthy pump')\n",
242 |     "\n",
243 |     "ax2 = ax1.twinx()\n",
244 |     "plot2 = ax2.fill_between(\n",
245 |     "    x=pump_df.index, \n",
246 |     "    y1=0.0, \n",
247 |     "    y2=pump_df['machine_status'], \n",
248 |     "    color=colors[1], \n",
249 |     "    linewidth=0.0,\n",
250 |     "    edgecolor='#000000',\n",
251 |     "    alpha=0.5, \n",
252 |     "    hatch=\"//////\", \n",
253 |     "    label='Broken pump'\n",
254 |     ")\n",
255 |     "ax2.grid(False)\n",
256 |     "ax2.set_yticks([])\n",
257 |     "\n",
258 |     "labels = [plot1[0].get_label(), plot2.get_label()]\n",
259 |     "\n",
260 |     "plt.legend(handles=[plot1[0], plot2], labels=labels, loc='lower center', ncol=2, bbox_to_anchor=(0.5, -.4))\n",
261 |     "plt.title('Industrial pump sensor data')\n",
262 |     "\n",
263 |     "plt.show()"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": null,
269 |    "id": "32caf6ba",
270 |    "metadata": {},
271 |    "outputs": [],
272 |    "source": [
273 |     "start_date = np.min(pump_df.index)\n",
274 |     "end_date = np.max(pump_df.index)\n",
275 |     "num_periods = pump_df.shape[0]\n",
276 |     "\n",
277 |     "new_index = pd.date_range(start=start_date, periods=num_periods, freq='5min')\n",
278 |     "pump_df.index = new_index\n",
279 |     "pump_df.index.name = 'Timestamp'"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": null,
285 |    "id": "908b6c98",
286 |    "metadata": {},
287 |    "outputs": [],
288 |    "source": [
289 |     "plt.rcParams['hatch.linewidth'] = 0.5\n",
290 |     "plt.rcParams['lines.linewidth'] = 0.5\n",
291 |     "\n",
292 |     "fig = plt.figure(figsize=(5,1))\n",
293 |     "ax1 = fig.add_subplot(1,1,1)\n",
294 |     "plot1 = ax1.plot(pump_df['sensor_00'], label='sensor_00')\n",
295 |     "# plot1 = ax1.plot(pump_df['sensor_34'], label='Healthy sensor_34')\n",
296 |     "\n",
297 |     "ax2 = ax1.twinx()\n",
298 |     "plot2 = ax2.fill_between(\n",
299 |     "    x=pump_df.index, \n",
300 |     "    y1=0.0, \n",
301 |     "    y2=pump_df['machine_status'], \n",
302 |     "    color=colors[1], \n",
303 |     "    linewidth=0.0,\n",
304 |     "    edgecolor='#000000',\n",
305 |     "    alpha=0.5, \n",
306 |     "    hatch=\"//////\", \n",
307 |     "    label='Broken pump'\n",
308 |     ")\n",
309 |     "ax2.grid(False)\n",
310 |     "ax2.set_yticks([])\n",
311 |     "\n",
312 |     "labels = [plot1[0].get_label(), plot2.get_label()]\n",
313 |     "\n",
314 |     "plt.legend(handles=[plot1[0], plot2], labels=labels, loc='lower center', ncol=2, bbox_to_anchor=(0.5, -.4))\n",
315 |     "plt.title('Industrial pump sensor data')\n",
316 |     "\n",
317 |     "# start = pd.to_datetime('2018-06-24 14:25')\n",
318 |     "# end = pd.to_datetime('2018-07-06 09:40')\n",
319 |     "# plt.xlim(start, end)\n",
320 |     "plt.show()"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": null,
326 |    "id": "0cdd9361",
327 |    "metadata": {},
328 |    "outputs": [],
329 |    "source": [
330 |     "plt.rcParams['axes.titlesize'] = 4\n",
331 |     "plt.rcParams['axes.labelsize'] = 4\n",
332 |     "plt.rcParams['xtick.labelsize'] = 3\n",
333 |     "plt.rcParams['ytick.labelsize'] = 3\n",
334 |     "\n",
335 |     "for f in list(pump_df.columns):\n",
336 |     "    fig = plt.figure(figsize=(2.5,0.5))\n",
337 |     "    ax1 = fig.add_subplot(1,1,1)\n",
338 |     "    plot1 = ax1.plot(pump_df[f])\n",
339 |     "    ax1.set_title(f)\n",
340 |     "    \n",
341 |     "plt.show()"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "code",
346 |    "execution_count": null,
347 |    "id": "692ef77a",
348 |    "metadata": {},
349 |    "outputs": [],
350 |    "source": [
351 |     "pump_df = pump_df.drop(columns=['sensor_50', 'sensor_15'])"
352 |    ]
353 |   },
354 |   {
355 |    "cell_type": "code",
356 |    "execution_count": null,
357 |    "id": "b46e10dd",
358 |    "metadata": {},
359 |    "outputs": [],
360 |    "source": [
361 |     "pump_df"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "markdown",
366 |    "id": "48650f84",
367 |    "metadata": {},
368 |    "source": [
369 |     "## Preparing the dataset for Lookout for Equipment\n",
370 |     "---\n",
371 |     "### Preparing time series data"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": null,
377 |    "id": "18768b76",
378 |    "metadata": {},
379 |    "outputs": [],
380 |    "source": [
381 |     "TRAIN_DATA = os.path.join('..', 'Data', 'pump', 'train-data')\n",
382 |     "LABEL_DATA = os.path.join('..', 'Data', 'pump', 'label-data')\n",
383 |     "\n",
384 |     "os.makedirs(TRAIN_DATA, exist_ok=True)\n",
385 |     "os.makedirs(LABEL_DATA, exist_ok=True)\n",
386 |     "\n",
387 |     "pump_df.index.name = 'Timestamp'"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "code",
392 |    "execution_count": null,
393 |    "id": "cfc3af2f",
394 |    "metadata": {},
395 |    "outputs": [],
396 |    "source": [
397 |     "features = list(pump_df.columns)[:-1]\n",
398 |     "\n",
399 |     "for tag in tqdm(features):\n",
400 |     "    os.makedirs(os.path.join(TRAIN_DATA, tag), exist_ok=True)\n",
401 |     "    fname = os.path.join(TRAIN_DATA, tag, 'tag_data.csv')\n",
402 |     "    tag_df = pump_df[[tag]]\n",
403 |     "    tag_df.to_csv(fname)"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "markdown",
408 |    "id": "7cec31d8",
409 |    "metadata": {},
410 |    "source": [
411 |     "### Preparing label data"
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "code",
416 |    "execution_count": null,
417 |    "id": "b4843119",
418 |    "metadata": {},
419 |    "outputs": [],
420 |    "source": [
421 |     "expanded_labels = pump_df[['machine_status']]\n",
422 |     "expanded_labels['machine_status'].unique()"
423 |    ]
424 |   },
425 |   {
426 |    "cell_type": "code",
427 |    "execution_count": null,
428 |    "id": "d210da63",
429 |    "metadata": {},
430 |    "outputs": [],
431 |    "source": [
432 |     "from dateutil.relativedelta import relativedelta\n",
433 |     "\n",
434 |     "range_df = expanded_labels.copy()\n",
435 |     "range_df['BROKEN'] = False\n",
436 |     "range_df.loc[range_df['machine_status'] == 1.0, 'BROKEN'] = True\n",
437 |     "\n",
438 |     "range_df['Next Status'] = range_df['BROKEN'].shift(-1)\n",
439 |     "range_df['Start Range'] = (range_df['BROKEN'] == False) & (range_df['Next Status'] == True)\n",
440 |     "range_df['End Range'] = (range_df['BROKEN'] == True) & (range_df['Next Status'] == False)\n",
441 |     "range_df.iloc[0,3] = range_df.iloc[0,1]\n",
442 |     "range_df = range_df[(range_df['Start Range'] == True) | (range_df['End Range'] == True)]\n",
443 |     "\n",
444 |     "labels_df = pd.DataFrame(columns=['start', 'end'])\n",
445 |     "for index, row in range_df.iterrows():\n",
446 |     "    if row['Start Range']:\n",
447 |     "        start = index\n",
448 |     "\n",
449 |     "    if row['End Range']:\n",
450 |     "        end = index\n",
451 |     "        labels_df = labels_df.append({\n",
452 |     "            'start': start + relativedelta(hours=-12),\n",
453 |     "            'end': end + relativedelta(hours=+12)\n",
454 |     "        }, ignore_index=True)\n",
455 |     "        \n",
456 |     "labels_df"
457 |    ]
458 |   },
459 |   {
460 |    "cell_type": "code",
461 |    "execution_count": null,
462 |    "id": "4a92c2ec",
463 |    "metadata": {},
464 |    "outputs": [],
465 |    "source": [
466 |     "from dateutil.relativedelta import relativedelta\n",
467 |     "\n",
468 |     "labels_fname = os.path.join(LABEL_DATA, 'labels.csv')\n",
469 |     "labels_df['start'] = pd.to_datetime(labels_df['start'])\n",
470 |     "labels_df['end'] = pd.to_datetime(labels_df['end'])\n",
471 |     "labels_df['start'] = labels_df['start'].dt.strftime('%Y-%m-%dT%H:%M:%S.%f')\n",
472 |     "labels_df['end'] = labels_df['end'].dt.strftime('%Y-%m-%dT%H:%M:%S.%f')\n",
473 |     "labels_df.to_csv(labels_fname, header=None, index=None)"
474 |    ]
475 |   },
476 |   {
477 |    "cell_type": "markdown",
478 |    "id": "8388aa1c",
479 |    "metadata": {},
480 |    "source": [
481 |     "## Creating schema\n",
482 |     "---"
483 |    ]
484 |   },
485 |   {
486 |    "cell_type": "code",
487 |    "execution_count": null,
488 |    "id": "162e991c",
489 |    "metadata": {},
490 |    "outputs": [],
491 |    "source": [
492 |     "!pip install --quiet markdown"
493 |    ]
494 |   },
495 |   {
496 |    "cell_type": "code",
497 |    "execution_count": null,
498 |    "id": "23965ffe",
499 |    "metadata": {},
500 |    "outputs": [],
501 |    "source": [
502 |     "# Helper functions for managing Lookout for Equipment API calls:\n",
503 |     "sys.path.append('../../amazon-lookout-for-equipment-python-sdk/src')\n",
504 |     "import lookoutequipment as lookout\n",
505 |     "import sagemaker"
506 |    ]
507 |   },
508 |   {
509 |    "cell_type": "code",
510 |    "execution_count": null,
511 |    "id": "31b51e73",
512 |    "metadata": {},
513 |    "outputs": [],
514 |    "source": [
515 |     "DATASET_NAME = 'pump'\n",
516 |     "BUCKET       = 'pump-anomaly-detection'\n",
517 |     "PREFIX       = 'train-data/'\n",
518 |     "ROLE_ARN     = sagemaker.get_execution_role()"
519 |    ]
520 |   },
521 |   {
522 |    "cell_type": "code",
523 |    "execution_count": null,
524 |    "id": "413ba6dc",
525 |    "metadata": {},
526 |    "outputs": [],
527 |    "source": [
528 |     "lookout_dataset = lookout.LookoutEquipmentDataset(\n",
529 |     "    dataset_name=DATASET_NAME,\n",
530 |     "    component_root_dir=TRAIN_DATA,\n",
531 |     "    access_role_arn=ROLE_ARN\n",
532 |     ")"
533 |    ]
534 |   },
535 |   {
536 |    "cell_type": "code",
537 |    "execution_count": null,
538 |    "id": "1463753f",
539 |    "metadata": {},
540 |    "outputs": [],
541 |    "source": [
542 |     "lookout_dataset.dataset_schema"
543 |    ]
544 |   }
545 |  ],
546 |  "metadata": {
547 |   "kernelspec": {
548 |    "display_name": "conda_python3",
549 |    "language": "python",
550 |    "name": "conda_python3"
551 |   },
552 |   "language_info": {
553 |    "codemirror_mode": {
554 |     "name": "ipython",
555 |     "version": 3
556 |    },
557 |    "file_extension": ".py",
558 |    "mimetype": "text/x-python",
559 |    "name": "python",
560 |    "nbconvert_exporter": "python",
561 |    "pygments_lexer": "ipython3",
562 |    "version": "3.6.13"
563 |   }
564 |  },
565 |  "nbformat": 4,
566 |  "nbformat_minor": 5
567 | }
568 | 


--------------------------------------------------------------------------------
/Chapter01/chapter1-time-series-analysis-overview.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "id": "0b7105c6",
   6 |    "metadata": {},
   7 |    "source": [
   8 |     "# Time series analysis on AWS\n",
   9 |     "*Chapter 1 - Time series analysis overview*"
  10 |    ]
  11 |   },
  12 |   {
  13 |    "cell_type": "markdown",
  14 |    "id": "b393e4ec",
  15 |    "metadata": {},
  16 |    "source": [
  17 |     "## Initializations\n",
  18 |     "---"
  19 |    ]
  20 |   },
  21 |   {
  22 |    "cell_type": "code",
  23 |    "execution_count": null,
  24 |    "id": "d4dd4b8b",
  25 |    "metadata": {},
  26 |    "outputs": [],
  27 |    "source": [
  28 |     "!pip install --quiet tqdm kaggle tsia ruptures"
  29 |    ]
  30 |   },
  31 |   {
  32 |    "cell_type": "markdown",
  33 |    "id": "f9997f46",
  34 |    "metadata": {},
  35 |    "source": [
  36 |     "### Imports"
  37 |    ]
  38 |   },
  39 |   {
  40 |    "cell_type": "code",
  41 |    "execution_count": null,
  42 |    "id": "fd65af91",
  43 |    "metadata": {},
  44 |    "outputs": [],
  45 |    "source": [
  46 |     "import matplotlib.colors as mpl_colors\n",
  47 |     "import matplotlib.dates as mdates\n",
  48 |     "import matplotlib.ticker as ticker\n",
  49 |     "import matplotlib.pyplot as plt\n",
  50 |     "import numpy as np\n",
  51 |     "import os\n",
  52 |     "import pandas as pd\n",
  53 |     "import ruptures as rpt\n",
  54 |     "import sys\n",
  55 |     "import tsia\n",
  56 |     "import warnings\n",
  57 |     "import zipfile\n",
  58 |     "\n",
  59 |     "from matplotlib import gridspec\n",
  60 |     "from sklearn.preprocessing import normalize\n",
  61 |     "from tqdm import tqdm\n",
  62 |     "from urllib.request import urlretrieve"
  63 |    ]
  64 |   },
  65 |   {
  66 |    "cell_type": "markdown",
  67 |    "id": "7d104af8",
  68 |    "metadata": {},
  69 |    "source": [
  70 |     "### Parameters"
  71 |    ]
  72 |   },
  73 |   {
  74 |    "cell_type": "code",
  75 |    "execution_count": null,
  76 |    "id": "13989034",
  77 |    "metadata": {},
  78 |    "outputs": [],
  79 |    "source": [
  80 |     "RAW_DATA = os.path.join('..', 'Data', 'raw')\n",
  81 |     "DATA = os.path.join('..', 'Data')\n",
  82 |     "warnings.filterwarnings(\"ignore\")\n",
  83 |     "os.makedirs(RAW_DATA, exist_ok=True)\n",
  84 |     "\n",
  85 |     "%matplotlib inline\n",
  86 |     "# plt.style.use('Solarize_Light2')\n",
  87 |     "plt.style.use('fivethirtyeight')\n",
  88 |     "prop_cycle = plt.rcParams['axes.prop_cycle']\n",
  89 |     "colors = prop_cycle.by_key()['color']\n",
  90 |     "\n",
  91 |     "plt.rcParams['figure.dpi'] = 300\n",
  92 |     "plt.rcParams['lines.linewidth'] = 0.3\n",
  93 |     "plt.rcParams['axes.titlesize'] = 6\n",
  94 |     "plt.rcParams['axes.labelsize'] = 6\n",
  95 |     "plt.rcParams['xtick.labelsize'] = 4.5\n",
  96 |     "plt.rcParams['ytick.labelsize'] = 4.5\n",
  97 |     "plt.rcParams['grid.linewidth'] = 0.2\n",
  98 |     "plt.rcParams['legend.fontsize'] = 5"
  99 |    ]
 100 |   },
 101 |   {
 102 |    "cell_type": "markdown",
 103 |    "id": "71228c3a",
 104 |    "metadata": {},
 105 |    "source": [
 106 |     "### Helper functions"
 107 |    ]
 108 |   },
 109 |   {
 110 |    "cell_type": "code",
 111 |    "execution_count": null,
 112 |    "id": "ead7b9b7",
 113 |    "metadata": {},
 114 |    "outputs": [],
 115 |    "source": [
 116 |     "def progress_report_hook(count, block_size, total_size):\n",
 117 |     "    mb = int(count * block_size // 1048576)\n",
 118 |     "    if count % 500 == 0:\n",
 119 |     "        sys.stdout.write(\"\\r{} MB downloaded\".format(mb))\n",
 120 |     "        sys.stdout.flush()"
 121 |    ]
 122 |   },
 123 |   {
 124 |    "cell_type": "markdown",
 125 |    "id": "590ace42",
 126 |    "metadata": {},
 127 |    "source": [
 128 |     "### Downloading datasets"
 129 |    ]
 130 |   },
 131 |   {
 132 |    "cell_type": "markdown",
 133 |    "id": "c1a10de9",
 134 |    "metadata": {},
 135 |    "source": [
 136 |     "#### **Dataset 1:** Household energy consumption"
 137 |    ]
 138 |   },
 139 |   {
 140 |    "cell_type": "code",
 141 |    "execution_count": null,
 142 |    "id": "7821f0b1",
 143 |    "metadata": {},
 144 |    "outputs": [],
 145 |    "source": [
 146 |     "ORIGINAL_DATA = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip'\n",
 147 |     "ARCHIVE_PATH  = os.path.join(RAW_DATA, 'energy-consumption.zip')\n",
 148 |     "FILE_NAME     = 'energy-consumption.csv'\n",
 149 |     "FILE_PATH     = os.path.join(DATA, 'energy', FILE_NAME)\n",
 150 |     "FILE_DIR      = os.path.dirname(FILE_PATH)\n",
 151 |     "\n",
 152 |     "if not os.path.isfile(FILE_PATH):\n",
 153 |     "    print(\"Downloading dataset (258MB), can take a few minutes depending on your connection\")\n",
 154 |     "    urlretrieve(ORIGINAL_DATA, ARCHIVE_PATH, reporthook=progress_report_hook)\n",
 155 |     "    os.makedirs(os.path.join(DATA, 'energy'), exist_ok=True)\n",
 156 |     "\n",
 157 |     "    print(\"\\nExtracting data archive\")\n",
 158 |     "    zip_ref = zipfile.ZipFile(ARCHIVE_PATH, 'r')\n",
 159 |     "    zip_ref.extractall(FILE_DIR + '/')\n",
 160 |     "    zip_ref.close()\n",
 161 |     "    \n",
 162 |     "    !rm -Rf $FILE_DIR/__MACOSX\n",
 163 |     "    !mv $FILE_DIR/LD2011_2014.txt $FILE_PATH\n",
 164 |     "    \n",
 165 |     "else:\n",
 166 |     "    print(\"File found, skipping download\")"
 167 |    ]
 168 |   },
 169 |   {
 170 |    "cell_type": "markdown",
 171 |    "id": "171badbf",
 172 |    "metadata": {},
 173 |    "source": [
 174 |     "#### **Dataset 2:** Nasa Turbofan remaining useful lifetime"
 175 |    ]
 176 |   },
 177 |   {
 178 |    "cell_type": "code",
 179 |    "execution_count": null,
 180 |    "id": "244b7160",
 181 |    "metadata": {},
 182 |    "outputs": [],
 183 |    "source": [
 184 |     "ok = True\n",
 185 |     "ok = ok and os.path.exists(os.path.join(DATA, 'turbofan', 'train_FD001.txt'))\n",
 186 |     "ok = ok and os.path.exists(os.path.join(DATA, 'turbofan', 'test_FD001.txt'))\n",
 187 |     "ok = ok and os.path.exists(os.path.join(DATA, 'turbofan', 'RUL_FD001.txt'))\n",
 188 |     "\n",
 189 |     "if (ok):\n",
 190 |     "    print(\"File found, skipping download\")\n",
 191 |     "\n",
 192 |     "else:\n",
 193 |     "    print('Some datasets are missing, create working directories and download original dataset from the NASA repository.')\n",
 194 |     "    \n",
 195 |     "    # Making sure the directory already exists:\n",
 196 |     "    os.makedirs(os.path.join(DATA, 'turbofan'), exist_ok=True)\n",
 197 |     "\n",
 198 |     "    # Download the dataset from the NASA repository, unzip it and set\n",
 199 |     "    # aside the first training file to work on:\n",
 200 |     "    !wget https://ti.arc.nasa.gov/c/6/ --output-document=$RAW_DATA/CMAPSSData.zip\n",
 201 |     "    !unzip $RAW_DATA/CMAPSSData.zip -d $RAW_DATA\n",
 202 |     "    !cp $RAW_DATA/train_FD001.txt $DATA/turbofan/train_FD001.txt\n",
 203 |     "    !cp $RAW_DATA/test_FD001.txt $DATA/turbofan/test_FD001.txt\n",
 204 |     "    !cp $RAW_DATA/RUL_FD001.txt $DATA/turbofan/RUL_FD001.txt"
 205 |    ]
 206 |   },
 207 |   {
 208 |    "cell_type": "markdown",
 209 |    "id": "7eedc9c2",
 210 |    "metadata": {},
 211 |    "source": [
 212 |     "#### **Dataset 3:** Human heartbeat"
 213 |    ]
 214 |   },
 215 |   {
 216 |    "cell_type": "code",
 217 |    "execution_count": null,
 218 |    "id": "a1314ead",
 219 |    "metadata": {},
 220 |    "outputs": [],
 221 |    "source": [
 222 |     "ECG_DATA_SOURCE = 'http://www.timeseriesclassification.com/Downloads/ECG200.zip'\n",
 223 |     "ARCHIVE_PATH  = os.path.join(RAW_DATA, 'ECG200.zip')\n",
 224 |     "FILE_NAME     = 'ecg.csv'\n",
 225 |     "FILE_PATH     = os.path.join(DATA, 'ecg', FILE_NAME)\n",
 226 |     "FILE_DIR      = os.path.dirname(FILE_PATH)\n",
 227 |     "\n",
 228 |     "if not os.path.isfile(FILE_PATH):\n",
 229 |     "    urlretrieve(ECG_DATA_SOURCE, ARCHIVE_PATH)\n",
 230 |     "    os.makedirs(os.path.join(DATA, 'ecg'), exist_ok=True)\n",
 231 |     "\n",
 232 |     "    print(\"\\nExtracting data archive\")\n",
 233 |     "    zip_ref = zipfile.ZipFile(ARCHIVE_PATH, 'r')\n",
 234 |     "    zip_ref.extractall(FILE_DIR + '/')\n",
 235 |     "    zip_ref.close()\n",
 236 |     "    \n",
 237 |     "    !mv $DATA/ecg/ECG200_TRAIN.txt $FILE_PATH\n",
 238 |     "    \n",
 239 |     "else:\n",
 240 |     "    print(\"File found, skipping download\")"
 241 |    ]
 242 |   },
 243 |   {
 244 |    "cell_type": "markdown",
 245 |    "id": "0a297084",
 246 |    "metadata": {},
 247 |    "source": [
 248 |     "#### **Dataset 4:** Industrial pump data\n",
 249 |     "To download this dataset from Kaggle, you will need to have an account and create a token that you install on your machine. You can follow [**this link**](https://www.kaggle.com/docs/api) to get started with the Kaggle API. Once generated, make sure your Kaggle token is stored in the `~/.kaggle/kaggle.json` file, or the next cells will issue an error. In some cases, you may still have an error while using this location. Try moving your token in this location instead: `~/kaggle/kaggle.json` (not the absence of the `.` in the folder name).\n",
 250 |     "\n",
 251 |     "To get a Kaggle token, go to kaggle.com and create an account. Then navigate to **My account** and scroll down to the API section. There, click the **Create new API token** button:\n",
 252 |     "\n",
 253 |     "<img src=\"../Assets/kaggle_api.png\" />\n"
 254 |    ]
 255 |   },
 256 |   {
 257 |    "cell_type": "code",
 258 |    "execution_count": null,
 259 |    "id": "3617c828",
 260 |    "metadata": {},
 261 |    "outputs": [],
 262 |    "source": [
 263 |     "FILE_NAME    = 'pump-sensor-data.zip'\n",
 264 |     "ARCHIVE_PATH = os.path.join(RAW_DATA, FILE_NAME)\n",
 265 |     "FILE_PATH    = os.path.join(DATA, 'pump', 'sensor.csv')\n",
 266 |     "FILE_DIR     = os.path.dirname(FILE_PATH)\n",
 267 |     "\n",
 268 |     "if not os.path.isfile(FILE_PATH):\n",
 269 |     "    if not os.path.exists('/home/ec2-user/.kaggle/kaggle.json'):\n",
 270 |     "        os.makedirs('/home/ec2-user/.kaggle/', exist_ok=True)\n",
 271 |     "        raise Exception('The kaggle.json token was not found.\\nCreating the /home/ec2-user/.kaggle/ directory: put your kaggle.json file there once you have generated it from the Kaggle website')\n",
 272 |     "    else:\n",
 273 |     "        print('The kaggle.json token file was found: making sure it is not readable by other users on this system.')\n",
 274 |     "        !chmod 600 /home/ec2-user/.kaggle/kaggle.json\n",
 275 |     "\n",
 276 |     "    os.makedirs(os.path.join(DATA, 'pump'), exist_ok=True)\n",
 277 |     "    !kaggle datasets download -d nphantawee/pump-sensor-data -p $RAW_DATA\n",
 278 |     "\n",
 279 |     "    print(\"\\nExtracting data archive\")\n",
 280 |     "    zip_ref = zipfile.ZipFile(ARCHIVE_PATH, 'r')\n",
 281 |     "    zip_ref.extractall(FILE_DIR + '/')\n",
 282 |     "    zip_ref.close()\n",
 283 |     "    \n",
 284 |     "else:\n",
 285 |     "    print(\"File found, skipping download\")"
 286 |    ]
 287 |   },
 288 |   {
 289 |    "cell_type": "markdown",
 290 |    "id": "99c3cc24",
 291 |    "metadata": {},
 292 |    "source": [
 293 |     "#### **Dataset 5:** London household energy consumption with weather data"
 294 |    ]
 295 |   },
 296 |   {
 297 |    "cell_type": "code",
 298 |    "execution_count": null,
 299 |    "id": "b5b137b9",
 300 |    "metadata": {},
 301 |    "outputs": [],
 302 |    "source": [
 303 |     "FILE_NAME    = 'smart-meters-in-london.zip'\n",
 304 |     "ARCHIVE_PATH = os.path.join(RAW_DATA, FILE_NAME)\n",
 305 |     "FILE_PATH    = os.path.join(DATA, 'energy-london', 'smart-meters-in-london.zip')\n",
 306 |     "FILE_DIR     = os.path.dirname(FILE_PATH)\n",
 307 |     "\n",
 308 |     "# Checks if the data were already downloaded:\n",
 309 |     "if os.path.exists(os.path.join(DATA, 'energy-london', 'acorn_details.csv')):\n",
 310 |     "    print(\"File found, skipping download\")\n",
 311 |     "    \n",
 312 |     "else:\n",
 313 |     "    # Downloading and unzipping datasets from Kaggle:\n",
 314 |     "    print(\"Downloading dataset (2.26G), can take a few minutes depending on your connection\")\n",
 315 |     "    os.makedirs(os.path.join(DATA, 'energy-london'), exist_ok=True)\n",
 316 |     "    !kaggle datasets download -d jeanmidev/smart-meters-in-london -p $RAW_DATA\n",
 317 |     "    \n",
 318 |     "    print('Unzipping files...')\n",
 319 |     "    zip_ref = zipfile.ZipFile(ARCHIVE_PATH, 'r')\n",
 320 |     "    zip_ref.extractall(FILE_DIR + '/')\n",
 321 |     "    zip_ref.close()\n",
 322 |     "    \n",
 323 |     "    !rm $DATA/energy-london/*zip\n",
 324 |     "    !rm $DATA/energy-london/*gz\n",
 325 |     "    !mv $DATA/energy-london/halfhourly_dataset/halfhourly_dataset/* $DATA/energy-london/halfhourly_dataset\n",
 326 |     "    !rm -Rf $DATA/energy-london/halfhourly_dataset/halfhourly_dataset\n",
 327 |     "    !mv $DATA/energy-london/daily_dataset/daily_dataset/* $DATA/energy-london/daily_dataset\n",
 328 |     "    !rm -Rf $DATA/energy-london/daily_dataset/daily_dataset"
 329 |    ]
 330 |   },
 331 |   {
 332 |    "cell_type": "markdown",
 333 |    "id": "f5bd6e2e",
 334 |    "metadata": {},
 335 |    "source": [
 336 |     "## Dataset visualization\n",
 337 |     "---"
 338 |    ]
 339 |   },
 340 |   {
 341 |    "cell_type": "markdown",
 342 |    "id": "bef12b32",
 343 |    "metadata": {},
 344 |    "source": [
 345 |     "### **1.** Household energy consumption"
 346 |    ]
 347 |   },
 348 |   {
 349 |    "cell_type": "code",
 350 |    "execution_count": null,
 351 |    "id": "9649de6c",
 352 |    "metadata": {},
 353 |    "outputs": [],
 354 |    "source": [
 355 |     "%%time\n",
 356 |     "\n",
 357 |     "FILE_PATH = os.path.join(DATA, 'energy', 'energy-consumption.csv')\n",
 358 |     "energy_df = pd.read_csv(FILE_PATH, sep=';', decimal=',')\n",
 359 |     "energy_df = energy_df.rename(columns={'Unnamed: 0': 'Timestamp'})\n",
 360 |     "energy_df['Timestamp'] = pd.to_datetime(energy_df['Timestamp'])\n",
 361 |     "energy_df = energy_df.set_index('Timestamp')\n",
 362 |     "energy_df.iloc[100000:, 1:5].head()"
 363 |    ]
 364 |   },
 365 |   {
 366 |    "cell_type": "code",
 367 |    "execution_count": null,
 368 |    "id": "acc364f3",
 369 |    "metadata": {},
 370 |    "outputs": [],
 371 |    "source": [
 372 |     "fig = plt.figure(figsize=(5, 1.876))\n",
 373 |     "plt.plot(energy_df['MT_002'])\n",
 374 |     "plt.title('Energy consumption for household MT_002')\n",
 375 |     "plt.show()"
 376 |    ]
 377 |   },
 378 |   {
 379 |    "cell_type": "markdown",
 380 |    "id": "f5ab3461",
 381 |    "metadata": {},
 382 |    "source": [
 383 |     "### **2.** NASA Turbofan data"
 384 |    ]
 385 |   },
 386 |   {
 387 |    "cell_type": "code",
 388 |    "execution_count": null,
 389 |    "id": "b2e44083",
 390 |    "metadata": {},
 391 |    "outputs": [],
 392 |    "source": [
 393 |     "FILE_PATH = os.path.join(DATA, 'turbofan', 'train_FD001.txt')\n",
 394 |     "turbofan_df = pd.read_csv(FILE_PATH, header=None, sep=' ')\n",
 395 |     "turbofan_df.dropna(axis='columns', how='all', inplace=True)\n",
 396 |     "print('Shape:', turbofan_df.shape)\n",
 397 |     "turbofan_df.head(5)"
 398 |    ]
 399 |   },
 400 |   {
 401 |    "cell_type": "code",
 402 |    "execution_count": null,
 403 |    "id": "d8342359",
 404 |    "metadata": {},
 405 |    "outputs": [],
 406 |    "source": [
 407 |     "columns = [\n",
 408 |     "    'unit_number',\n",
 409 |     "    'cycle',\n",
 410 |     "    'setting_1',\n",
 411 |     "    'setting_2',\n",
 412 |     "    'setting_3',\n",
 413 |     "] + ['sensor_{}'.format(s) for s in range(1,22)]\n",
 414 |     "turbofan_df.columns = columns\n",
 415 |     "turbofan_df.head()"
 416 |    ]
 417 |   },
 418 |   {
 419 |    "cell_type": "code",
 420 |    "execution_count": null,
 421 |    "id": "32788dc9",
 422 |    "metadata": {},
 423 |    "outputs": [],
 424 |    "source": [
 425 |     "# Add a RUL column and group the data by unit_number:\n",
 426 |     "turbofan_df['rul'] = 0\n",
 427 |     "grouped_data = turbofan_df.groupby(by='unit_number')\n",
 428 |     "\n",
 429 |     "# Loops through each unit number to get the lifecycle counts:\n",
 430 |     "for unit, rul in enumerate(grouped_data.count()['cycle']):\n",
 431 |     "    current_df = turbofan_df[turbofan_df['unit_number'] == (unit+1)].copy()\n",
 432 |     "    current_df['rul'] = rul - current_df['cycle']\n",
 433 |     "    turbofan_df[turbofan_df['unit_number'] == (unit+1)] = current_df"
 434 |    ]
 435 |   },
 436 |   {
 437 |    "cell_type": "code",
 438 |    "execution_count": null,
 439 |    "id": "3b1fe985",
 440 |    "metadata": {},
 441 |    "outputs": [],
 442 |    "source": [
 443 |     "df = turbofan_df.iloc[:, [0,1,2,3,4,5,6,25,26]].copy()\n",
 444 |     "df = df[df['unit_number'] == 1]\n",
 445 |     "\n",
 446 |     "def highlight_cols(s):\n",
 447 |     "    return f'background-color: rgba(0, 143, 213, 0.3)'\n",
 448 |     "\n",
 449 |     "df.head(10).style.applymap(highlight_cols, subset=['rul'])"
 450 |    ]
 451 |   },
 452 |   {
 453 |    "cell_type": "markdown",
 454 |    "id": "6f313864",
 455 |    "metadata": {},
 456 |    "source": [
 457 |     "### **3.** ECG Data"
 458 |    ]
 459 |   },
 460 |   {
 461 |    "cell_type": "code",
 462 |    "execution_count": null,
 463 |    "id": "d5fc7c8e",
 464 |    "metadata": {},
 465 |    "outputs": [],
 466 |    "source": [
 467 |     "FILE_PATH = os.path.join(DATA, 'ecg', 'ecg.csv')\n",
 468 |     "ecg_df = pd.read_csv(FILE_PATH, header=None, sep='  ')\n",
 469 |     "print('Shape:', ecg_df.shape)\n",
 470 |     "ecg_df.head()"
 471 |    ]
 472 |   },
 473 |   {
 474 |    "cell_type": "code",
 475 |    "execution_count": null,
 476 |    "id": "3b2664cf",
 477 |    "metadata": {},
 478 |    "outputs": [],
 479 |    "source": [
 480 |     "plt.rcParams['lines.linewidth'] = 0.7\n",
 481 |     "fig = plt.figure(figsize=(5,2))\n",
 482 |     "label_normal = False\n",
 483 |     "label_ischemia = False\n",
 484 |     "for i in range(0,100):\n",
 485 |     "    label = ecg_df.iloc[i, 0]\n",
 486 |     "    if (label == -1):\n",
 487 |     "        color = colors[1]\n",
 488 |     "        \n",
 489 |     "        if label_ischemia:\n",
 490 |     "            plt.plot(ecg_df.iloc[i,1:96], color=color, alpha=0.5, linestyle='--', linewidth=0.5)\n",
 491 |     "        else:\n",
 492 |     "            plt.plot(ecg_df.iloc[i,1:96], color=color, alpha=0.5, label='Ischemia', linestyle='--')\n",
 493 |     "            label_ischemia = True\n",
 494 |     "            \n",
 495 |     "    else:\n",
 496 |     "        color = colors[0]\n",
 497 |     "        \n",
 498 |     "        if label_normal:\n",
 499 |     "            plt.plot(ecg_df.iloc[i,1:96], color=color, alpha=0.5)\n",
 500 |     "        else:\n",
 501 |     "            plt.plot(ecg_df.iloc[i,1:96], color=color, alpha=0.5, label='Normal')\n",
 502 |     "            label_normal = True\n",
 503 |     "    \n",
 504 |     "plt.title('Human heartbeat activity')\n",
 505 |     "plt.legend(loc='upper right', ncol=2)\n",
 506 |     "plt.show()"
 507 |    ]
 508 |   },
 509 |   {
 510 |    "cell_type": "markdown",
 511 |    "id": "5b904dba",
 512 |    "metadata": {},
 513 |    "source": [
 514 |     "### **4.** Industrial pump data"
 515 |    ]
 516 |   },
 517 |   {
 518 |    "cell_type": "code",
 519 |    "execution_count": null,
 520 |    "id": "ce940bbb",
 521 |    "metadata": {},
 522 |    "outputs": [],
 523 |    "source": [
 524 |     "FILE_PATH = os.path.join(DATA, 'pump', 'sensor.csv')\n",
 525 |     "pump_df = pd.read_csv(FILE_PATH, sep=',')\n",
 526 |     "pump_df.drop(columns={'Unnamed: 0'}, inplace=True)\n",
 527 |     "pump_df['timestamp'] = pd.to_datetime(pump_df['timestamp'], format='%Y-%m-%d %H:%M:%S')\n",
 528 |     "pump_df = pump_df.set_index('timestamp')\n",
 529 |     "\n",
 530 |     "pump_df['machine_status'].replace(to_replace='NORMAL', value=np.nan, inplace=True)\n",
 531 |     "pump_df['machine_status'].replace(to_replace='BROKEN', value=1, inplace=True)\n",
 532 |     "pump_df['machine_status'].replace(to_replace='RECOVERING', value=1, inplace=True)\n",
 533 |     "\n",
 534 |     "print('Shape:', pump_df.shape)\n",
 535 |     "pump_df.head()"
 536 |    ]
 537 |   },
 538 |   {
 539 |    "cell_type": "code",
 540 |    "execution_count": null,
 541 |    "id": "7455aa11",
 542 |    "metadata": {},
 543 |    "outputs": [],
 544 |    "source": [
 545 |     "file_structure_df = pump_df.iloc[:, 0:10].resample('5D').mean()"
 546 |    ]
 547 |   },
 548 |   {
 549 |    "cell_type": "code",
 550 |    "execution_count": null,
 551 |    "id": "913b88c5",
 552 |    "metadata": {},
 553 |    "outputs": [],
 554 |    "source": [
 555 |     "plt.rcParams['hatch.linewidth'] = 0.5\n",
 556 |     "plt.rcParams['lines.linewidth'] = 0.5\n",
 557 |     "\n",
 558 |     "fig = plt.figure(figsize=(5,1))\n",
 559 |     "ax1 = fig.add_subplot(1,1,1)\n",
 560 |     "plot1 = ax1.plot(pump_df['sensor_00'], label='Healthy pump')\n",
 561 |     "\n",
 562 |     "ax2 = ax1.twinx()\n",
 563 |     "plot2 = ax2.fill_between(\n",
 564 |     "    x=pump_df.index, \n",
 565 |     "    y1=0.0, \n",
 566 |     "    y2=pump_df['machine_status'], \n",
 567 |     "    color=colors[1], \n",
 568 |     "    linewidth=0.0,\n",
 569 |     "    edgecolor='#000000',\n",
 570 |     "    alpha=0.5, \n",
 571 |     "    hatch=\"//////\", \n",
 572 |     "    label='Broken pump'\n",
 573 |     ")\n",
 574 |     "ax2.grid(False)\n",
 575 |     "ax2.set_yticks([])\n",
 576 |     "\n",
 577 |     "labels = [plot1[0].get_label(), plot2.get_label()]\n",
 578 |     "\n",
 579 |     "plt.legend(handles=[plot1[0], plot2], labels=labels, loc='lower center', ncol=2, bbox_to_anchor=(0.5, -.4))\n",
 580 |     "plt.title('Industrial pump sensor data')\n",
 581 |     "plt.show()"
 582 |    ]
 583 |   },
 584 |   {
 585 |    "cell_type": "markdown",
 586 |    "id": "e3f3743d",
 587 |    "metadata": {},
 588 |    "source": [
 589 |     "### **5.** London household energy consumption with weather data"
 590 |    ]
 591 |   },
 592 |   {
 593 |    "cell_type": "markdown",
 594 |    "id": "fcd0191e",
 595 |    "metadata": {},
 596 |    "source": [
 597 |     "We want to filter out households that are are subject to the dToU tariff and keep only the ones with a known ACORN (i.e. not in the ACORN-U group): this will allow us to better model future analysis by adding the Acorn detail informations (which by definitions, won't be available for the ACORN-U group)."
 598 |    ]
 599 |   },
 600 |   {
 601 |    "cell_type": "code",
 602 |    "execution_count": null,
 603 |    "id": "1f57ceb9",
 604 |    "metadata": {},
 605 |    "outputs": [],
 606 |    "source": [
 607 |     "household_filename = os.path.join(DATA, 'energy-london', 'informations_households.csv')\n",
 608 |     "household_df = pd.read_csv(household_filename)\n",
 609 |     "household_df = household_df[(household_df['stdorToU'] == 'Std') & (household_df['Acorn'] == 'ACORN-E')]\n",
 610 |     "print(household_df.shape)\n",
 611 |     "household_df.head()"
 612 |    ]
 613 |   },
 614 |   {
 615 |    "cell_type": "markdown",
 616 |    "id": "68add26c",
 617 |    "metadata": {},
 618 |    "source": [
 619 |     "#### Associating households with they energy consumption data\n",
 620 |     "Each household (with an ID starting by `MACxxxxx` in the table above) has its consumption data stored in a block file name `block_xx`. This file is also available from the `informations_household.csv` file extracted above. We have the association between `household_id` and `block_file`: we can open each of them and keep the consumption for the households of interest. All these data will be concatenated into an `energy_df` dataframe:"
 621 |    ]
 622 |   },
 623 |   {
 624 |    "cell_type": "code",
 625 |    "execution_count": null,
 626 |    "id": "4fd6fd80",
 627 |    "metadata": {},
 628 |    "outputs": [],
 629 |    "source": [
 630 |     "%%time\n",
 631 |     "\n",
 632 |     "household_ids = household_df['LCLid'].tolist()\n",
 633 |     "consumption_file = os.path.join(DATA, 'energy-london', 'hourly_consumption.csv')\n",
 634 |     "min_data_points = ((pd.to_datetime('2020-12-31') - pd.to_datetime('2020-01-01')).days + 1)*24*2\n",
 635 |     "\n",
 636 |     "if os.path.exists(consumption_file):\n",
 637 |     "    print('Half-hourly consumption file already exists, loading from disk...')\n",
 638 |     "    energy_df = pd.read_csv(consumption_file)\n",
 639 |     "    energy_df['timestamp'] = pd.to_datetime(energy_df['timestamp'], format='%Y-%m-%d %H:%M:%S.%f')\n",
 640 |     "    print('Done.')\n",
 641 |     "    \n",
 642 |     "else:\n",
 643 |     "    print('Half-hourly consumption file not found. We need to generate it.')\n",
 644 |     "    \n",
 645 |     "    # We know have the block number we can use to open the right file:\n",
 646 |     "    energy_df = pd.DataFrame()\n",
 647 |     "    target_block_files = household_df['file'].unique().tolist()\n",
 648 |     "    print('- {} block files to process: '.format(len(target_block_files)), end='')\n",
 649 |     "    df_list = []\n",
 650 |     "    for block_file in tqdm(target_block_files):\n",
 651 |     "        # Reads the current block file:\n",
 652 |     "        current_filename = os.path.join(DATA, 'energy-london', 'halfhourly_dataset', '{}.csv'.format(block_file))\n",
 653 |     "        df = pd.read_csv(current_filename)\n",
 654 |     "        \n",
 655 |     "        # Set readable column names and adjust data types:\n",
 656 |     "        df.columns = ['household_id', 'timestamp', 'energy']\n",
 657 |     "        df = df.replace(to_replace='Null', value=0.0)\n",
 658 |     "        df['energy'] = df['energy'].astype(np.float64)\n",
 659 |     "        df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S.%f')\n",
 660 |     "        \n",
 661 |     "        # We filter on the households sampled earlier:\n",
 662 |     "        df_list.append(df[df['household_id'].isin(household_ids)].reset_index(drop=True))\n",
 663 |     "    \n",
 664 |     "    # Concatenate with the main dataframe:\n",
 665 |     "    energy_df = pd.concat(df_list, axis='index', ignore_index=True)\n",
 666 |     "    \n",
 667 |     "    datapoints = energy_df.groupby(by='household_id').count()\n",
 668 |     "    datapoints = datapoints[datapoints['timestamp'] < min_data_points]\n",
 669 |     "    hhid_to_remove = datapoints.index.tolist()\n",
 670 |     "    energy_df = energy_df[~energy_df['household_id'].isin(hhid_to_remove)]\n",
 671 |     "\n",
 672 |     "    # Let's save this dataset to disk, we will use it from now on:\n",
 673 |     "    print('Saving file to disk... ', end='')\n",
 674 |     "    energy_df.to_csv(consumption_file, index=False)\n",
 675 |     "    print('Done.')"
 676 |    ]
 677 |   },
 678 |   {
 679 |    "cell_type": "code",
 680 |    "execution_count": null,
 681 |    "id": "465cdc8f",
 682 |    "metadata": {},
 683 |    "outputs": [],
 684 |    "source": [
 685 |     "start = np.min(energy_df['timestamp'])\n",
 686 |     "end = np.max(energy_df['timestamp'])\n",
 687 |     "weather_filename = os.path.join(DATA, 'energy-london', 'weather_hourly_darksky.csv')\n",
 688 |     "\n",
 689 |     "weather_df = pd.read_csv(weather_filename)\n",
 690 |     "weather_df['time'] = pd.to_datetime(weather_df['time'], format='%Y-%m-%d %H:%M:%S')\n",
 691 |     "weather_df = weather_df.drop(columns=['precipType', 'icon', 'summary'])\n",
 692 |     "weather_df = weather_df.sort_values(by='time')\n",
 693 |     "weather_df = weather_df.set_index('time')\n",
 694 |     "weather_df = weather_df[start:end]\n",
 695 |     "\n",
 696 |     "# Let's make sure we have one datapoint per hour to match \n",
 697 |     "# the frequency used for the household energy consumption data:\n",
 698 |     "weather_df = weather_df.resample(rule='1H').mean()     # This will generate NaN values timestamp missing data\n",
 699 |     "weather_df = weather_df.interpolate(method='linear')   # This will fill the missing values with the average \n",
 700 |     "\n",
 701 |     "print(weather_df.shape)\n",
 702 |     "weather_df"
 703 |    ]
 704 |   },
 705 |   {
 706 |    "cell_type": "code",
 707 |    "execution_count": null,
 708 |    "id": "3427cb1a",
 709 |    "metadata": {},
 710 |    "outputs": [],
 711 |    "source": [
 712 |     "energy_df = energy_df.set_index(['household_id', 'timestamp'])\n",
 713 |     "energy_df"
 714 |    ]
 715 |   },
 716 |   {
 717 |    "cell_type": "code",
 718 |    "execution_count": null,
 719 |    "id": "40f173f5",
 720 |    "metadata": {},
 721 |    "outputs": [],
 722 |    "source": [
 723 |     "hhid = household_ids[2]\n",
 724 |     "hh_energy = energy_df.loc[hhid, :]\n",
 725 |     "start = '2012-07-01'\n",
 726 |     "end = '2012-07-15'\n",
 727 |     "\n",
 728 |     "fig = plt.figure(figsize=(5,1))\n",
 729 |     "ax1 = fig.add_subplot(1,1,1)\n",
 730 |     "plot2 = ax1.fill_between(\n",
 731 |     "    x=weather_df.loc[start:end, 'temperature'].index, \n",
 732 |     "    y1=0.0, \n",
 733 |     "    y2=weather_df.loc[start:end, 'temperature'], \n",
 734 |     "    color=colors[1], \n",
 735 |     "    linewidth=0.0,\n",
 736 |     "    edgecolor='#000000',\n",
 737 |     "    alpha=0.25, \n",
 738 |     "    hatch=\"//////\", \n",
 739 |     "    label='Temperature'\n",
 740 |     ")\n",
 741 |     "ax1.set_ylim((0,40))\n",
 742 |     "ax1.grid(False)\n",
 743 |     "\n",
 744 |     "ax2 = ax1.twinx()\n",
 745 |     "ax2.plot(hh_energy[start:end], label='Energy consumption', linewidth=2, color='#FFFFFF', alpha=0.5)\n",
 746 |     "plot1 = ax2.plot(hh_energy[start:end], label='Energy consumption', linewidth=0.7)\n",
 747 |     "ax2.set_title(f'Energy consumption for household {hhid}')\n",
 748 |     "\n",
 749 |     "labels = [plot1[0].get_label(), plot2.get_label()]\n",
 750 |     "plt.legend(handles=[plot1[0], plot2], labels=labels, loc='upper left', fontsize=3, ncol=2)\n",
 751 |     "\n",
 752 |     "plt.show()"
 753 |    ]
 754 |   },
 755 |   {
 756 |    "cell_type": "code",
 757 |    "execution_count": null,
 758 |    "id": "36684f21",
 759 |    "metadata": {},
 760 |    "outputs": [],
 761 |    "source": [
 762 |     "acorn_filename = os.path.join(DATA, 'energy-london', 'acorn_details.csv')\n",
 763 |     "acorn_df = pd.read_csv(acorn_filename, encoding='ISO-8859-1')\n",
 764 |     "acorn_df = acorn_df.sample(10).loc[:, ['MAIN CATEGORIES', 'CATEGORIES', 'REFERENCE', 'ACORN-A', 'ACORN-B', 'ACORN-E']]\n",
 765 |     "acorn_df"
 766 |    ]
 767 |   },
 768 |   {
 769 |    "cell_type": "markdown",
 770 |    "id": "f09fcaf8",
 771 |    "metadata": {},
 772 |    "source": [
 773 |     "## File structure exploration\n",
 774 |     "---"
 775 |    ]
 776 |   },
 777 |   {
 778 |    "cell_type": "code",
 779 |    "execution_count": null,
 780 |    "id": "d87ca63a",
 781 |    "metadata": {},
 782 |    "outputs": [],
 783 |    "source": [
 784 |     "from IPython.display import display_html\n",
 785 |     "\n",
 786 |     "def display_multiple_dataframe(*args, max_rows=None, max_cols=None):\n",
 787 |     "    html_str = ''\n",
 788 |     "    for df in args:\n",
 789 |     "        html_str += df.to_html(max_cols=max_cols, max_rows=max_rows)\n",
 790 |     "        \n",
 791 |     "    display_html(html_str.replace('table','table style=\"display:inline\"'), raw=True)"
 792 |    ]
 793 |   },
 794 |   {
 795 |    "cell_type": "code",
 796 |    "execution_count": null,
 797 |    "id": "c0fb229a",
 798 |    "metadata": {},
 799 |    "outputs": [],
 800 |    "source": [
 801 |     "display_multiple_dataframe(\n",
 802 |     "    file_structure_df[['sensor_00']],\n",
 803 |     "    file_structure_df[['sensor_01']],\n",
 804 |     "    file_structure_df[['sensor_03']],\n",
 805 |     "    max_rows=10, max_cols=None\n",
 806 |     ")"
 807 |    ]
 808 |   },
 809 |   {
 810 |    "cell_type": "code",
 811 |    "execution_count": null,
 812 |    "id": "7ca40f25",
 813 |    "metadata": {},
 814 |    "outputs": [],
 815 |    "source": [
 816 |     "display_multiple_dataframe(\n",
 817 |     "    file_structure_df.loc['2018-04', :].head(6),\n",
 818 |     "    file_structure_df.loc['2018-05', :].head(6),\n",
 819 |     "    file_structure_df.loc['2018-06', :].head(6),\n",
 820 |     "    max_rows=None, max_cols=2\n",
 821 |     ")"
 822 |    ]
 823 |   },
 824 |   {
 825 |    "cell_type": "code",
 826 |    "execution_count": null,
 827 |    "id": "6e87d94c",
 828 |    "metadata": {},
 829 |    "outputs": [],
 830 |    "source": [
 831 |     "display_multiple_dataframe(\n",
 832 |     "    file_structure_df.loc['2018-04', ['sensor_00']].head(6),\n",
 833 |     "    file_structure_df.loc['2018-05', ['sensor_00']].head(6),\n",
 834 |     "    file_structure_df.loc['2018-06', ['sensor_00']].head(6),\n",
 835 |     "    max_rows=10, max_cols=None\n",
 836 |     ")\n",
 837 |     "display_multiple_dataframe(\n",
 838 |     "    file_structure_df.loc['2018-04', ['sensor_01']].head(6),\n",
 839 |     "    file_structure_df.loc['2018-05', ['sensor_01']].head(6),\n",
 840 |     "    file_structure_df.loc['2018-06', ['sensor_01']].head(6),\n",
 841 |     "    max_rows=10, max_cols=None\n",
 842 |     ")\n",
 843 |     "print('.\\n.\\n.')\n",
 844 |     "display_multiple_dataframe(\n",
 845 |     "    file_structure_df.loc['2018-04', ['sensor_09']].head(6),\n",
 846 |     "    file_structure_df.loc['2018-05', ['sensor_09']].head(6),\n",
 847 |     "    file_structure_df.loc['2018-06', ['sensor_09']].head(6),\n",
 848 |     "    max_rows=10, max_cols=None\n",
 849 |     ")"
 850 |    ]
 851 |   },
 852 |   {
 853 |    "cell_type": "code",
 854 |    "execution_count": null,
 855 |    "id": "9c8d815f",
 856 |    "metadata": {},
 857 |    "outputs": [],
 858 |    "source": [
 859 |     "df1 = pump_df.iloc[:, [0]].resample('5D').mean()\n",
 860 |     "df2 = pump_df.iloc[:, [1]].resample('2D').mean()\n",
 861 |     "df3 = pump_df.iloc[:, [2]].resample('7D').mean()\n",
 862 |     "\n",
 863 |     "display_multiple_dataframe(\n",
 864 |     "    df1.head(10), df2.head(10), df3.head(10),\n",
 865 |     "    pd.merge(pd.merge(df1, df2, left_index=True, right_index=True, how='outer'), df3, left_index=True, right_index=True, how='outer').head(10),\n",
 866 |     "    max_rows=None, max_cols=None\n",
 867 |     ")"
 868 |    ]
 869 |   },
 870 |   {
 871 |    "cell_type": "code",
 872 |    "execution_count": null,
 873 |    "id": "7046cbb5",
 874 |    "metadata": {},
 875 |    "outputs": [],
 876 |    "source": [
 877 |     "pd.set_option('display.max_columns', None)\n",
 878 |     "pd.set_option('display.max_rows', 10)\n",
 879 |     "pd.merge(pd.merge(df1, df2, left_index=True, right_index=True, how='outer'), df3, left_index=True, right_index=True, how='outer').head(10)"
 880 |    ]
 881 |   },
 882 |   {
 883 |    "cell_type": "code",
 884 |    "execution_count": null,
 885 |    "id": "7ad04201",
 886 |    "metadata": {},
 887 |    "outputs": [],
 888 |    "source": [
 889 |     "plt.figure(figsize=(5,1))\n",
 890 |     "for i in range(len(colors)):\n",
 891 |     "    plt.plot(file_structure_df[f'sensor_0{i}'], linewidth=2, alpha=0.5, label=colors[i])\n",
 892 |     "\n",
 893 |     "plt.legend()\n",
 894 |     "plt.show()"
 895 |    ]
 896 |   },
 897 |   {
 898 |    "cell_type": "markdown",
 899 |    "id": "b7b85f1f",
 900 |    "metadata": {},
 901 |    "source": [
 902 |     "## Visualization\n",
 903 |     "---"
 904 |    ]
 905 |   },
 906 |   {
 907 |    "cell_type": "code",
 908 |    "execution_count": null,
 909 |    "id": "b4c78d4c",
 910 |    "metadata": {},
 911 |    "outputs": [],
 912 |    "source": [
 913 |     "fig = plt.figure(figsize=(5,1))\n",
 914 |     "ax1 = fig.add_subplot(1,1,1)\n",
 915 |     "ax2 = ax1.twinx()\n",
 916 |     "\n",
 917 |     "plot_sensor_0 = ax1.plot(pump_df['sensor_00'], label='Sensor 0', color=colors[0], linewidth=1, alpha=0.8)\n",
 918 |     "plot_sensor_1 = ax2.plot(pump_df['sensor_01'], label='Sensor 1', color=colors[1], linewidth=1, alpha=0.8)\n",
 919 |     "ax2.grid(False)\n",
 920 |     "plt.title('Pump sensor values (2 sensors)')\n",
 921 |     "plt.legend(handles=[plot_sensor_0[0], plot_sensor_1[0]], ncol=2, loc='lower right')\n",
 922 |     "plt.show()"
 923 |    ]
 924 |   },
 925 |   {
 926 |    "cell_type": "code",
 927 |    "execution_count": null,
 928 |    "id": "90daaba9",
 929 |    "metadata": {},
 930 |    "outputs": [],
 931 |    "source": [
 932 |     "reduced_pump_df = pump_df.loc[:, 'sensor_00':'sensor_14']\n",
 933 |     "reduced_pump_df = reduced_pump_df.replace([np.inf, -np.inf], np.nan)\n",
 934 |     "reduced_pump_df = reduced_pump_df.fillna(0.0)\n",
 935 |     "reduced_pump_df = reduced_pump_df.astype(np.float32)\n",
 936 |     "scaled_pump_df = pd.DataFrame(normalize(reduced_pump_df), index=reduced_pump_df.index, columns=reduced_pump_df.columns)\n",
 937 |     "scaled_pump_df"
 938 |    ]
 939 |   },
 940 |   {
 941 |    "cell_type": "code",
 942 |    "execution_count": null,
 943 |    "id": "cbc92f9d",
 944 |    "metadata": {},
 945 |    "outputs": [],
 946 |    "source": [
 947 |     "fig = plt.figure(figsize=(5,1))\n",
 948 |     "\n",
 949 |     "for i in range(0,15):\n",
 950 |     "    plt.plot(scaled_pump_df.iloc[:, i], alpha=0.6)\n",
 951 |     "\n",
 952 |     "plt.title('Pump sensor values (15 sensors)')\n",
 953 |     "plt.show()"
 954 |    ]
 955 |   },
 956 |   {
 957 |    "cell_type": "code",
 958 |    "execution_count": null,
 959 |    "id": "9739c28e",
 960 |    "metadata": {},
 961 |    "outputs": [],
 962 |    "source": [
 963 |     "pump_df2 = pump_df.copy()\n",
 964 |     "\n",
 965 |     "pump_df2 = pump_df2.replace([np.inf, -np.inf], np.nan)\n",
 966 |     "pump_df2 = pump_df2.fillna(0.0)\n",
 967 |     "pump_df2 = pump_df2.astype(np.float32)\n",
 968 |     "\n",
 969 |     "pump_description = pump_df2.describe().T\n",
 970 |     "constant_signals = pump_description[pump_description['min'] == pump_description['max']].index.tolist()\n",
 971 |     "pump_df2 = pump_df2.drop(columns=constant_signals)\n",
 972 |     "\n",
 973 |     "features = pump_df2.columns.tolist()"
 974 |    ]
 975 |   },
 976 |   {
 977 |    "cell_type": "code",
 978 |    "execution_count": null,
 979 |    "id": "dd1cf267",
 980 |    "metadata": {},
 981 |    "outputs": [],
 982 |    "source": [
 983 |     "def hex_to_rgb(hex_color):\n",
 984 |     "    \"\"\"\n",
 985 |     "    Converts a color string in hexadecimal format to RGB format.\n",
 986 |     "    \n",
 987 |     "    PARAMS\n",
 988 |     "    ======\n",
 989 |     "        hex_color: string\n",
 990 |     "            A string describing the color to convert from hexadecimal. It can\n",
 991 |     "            include the leading # character or not\n",
 992 |     "    \n",
 993 |     "    RETURNS\n",
 994 |     "    =======\n",
 995 |     "        rgb_color: tuple\n",
 996 |     "            Each color component of the returned tuple will be a float value\n",
 997 |     "            between 0.0 and 1.0\n",
 998 |     "    \"\"\"\n",
 999 |     "    hex_color = hex_color.lstrip('#')\n",
1000 |     "    rgb_color = tuple(int(hex_color[i:i+2], base=16) / 255.0 for i in [0, 2, 4])\n",
1001 |     "    return rgb_color\n",
1002 |     "\n",
1003 |     "def plot_timeseries_strip_chart(binned_timeseries, signal_list, fig_width=12, signal_height=0.15, dates=None, day_interval=7):\n",
1004 |     "    # Build a suitable colormap:\n",
1005 |     "    colors_list = [\n",
1006 |     "        hex_to_rgb('#DC322F'), \n",
1007 |     "        hex_to_rgb('#B58900'), \n",
1008 |     "        hex_to_rgb('#2AA198')\n",
1009 |     "    ]\n",
1010 |     "    cm = mpl_colors.LinearSegmentedColormap.from_list('RdAmGr', colors_list, N=len(colors_list))\n",
1011 |     "    \n",
1012 |     "    fig = plt.figure(figsize=(fig_width, signal_height * binned_timeseries.shape[0]))\n",
1013 |     "    ax = fig.add_subplot(1,1,1)\n",
1014 |     "    \n",
1015 |     "    # Devising the extent of the actual plot:\n",
1016 |     "    if dates is not None:\n",
1017 |     "        dnum = mdates.date2num(dates)\n",
1018 |     "        start = dnum[0] - (dnum[1]-dnum[0])/2.\n",
1019 |     "        stop = dnum[-1] + (dnum[1]-dnum[0])/2.\n",
1020 |     "        extent = [start, stop, 0, signal_height * (binned_timeseries.shape[0])]\n",
1021 |     "        \n",
1022 |     "    else:\n",
1023 |     "        extent = None\n",
1024 |     "        \n",
1025 |     "    # Plot the matrix:\n",
1026 |     "    im = ax.imshow(binned_timeseries, \n",
1027 |     "                   extent=extent, \n",
1028 |     "                   aspect=\"auto\", \n",
1029 |     "                   cmap=cm, \n",
1030 |     "                   origin='lower')\n",
1031 |     "    \n",
1032 |     "    # Adjusting the x-axis if we provide dates:\n",
1033 |     "    if dates is not None:\n",
1034 |     "        ax.xaxis.set_major_locator(mdates.MonthLocator())\n",
1035 |     "        ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))\n",
1036 |     "        for tick in ax.xaxis.get_major_ticks():\n",
1037 |     "            tick.label.set_fontsize(4)\n",
1038 |     "            tick.label.set_rotation(60)\n",
1039 |     "            tick.label.set_fontweight('bold')\n",
1040 |     "\n",
1041 |     "        ax.tick_params(axis='x', which='major', pad=7, labelcolor='#000000')\n",
1042 |     "        plt.xticks(ha='right')\n",
1043 |     "        \n",
1044 |     "    # Adjusting the y-axis:\n",
1045 |     "    ax.yaxis.set_major_locator(ticker.MultipleLocator(signal_height))\n",
1046 |     "    ax.set_yticklabels(signal_list, verticalalignment='bottom', fontsize=4)\n",
1047 |     "    ax.set_yticks(np.arange(len(signal_list)) * signal_height)\n",
1048 |     "\n",
1049 |     "    plt.grid()\n",
1050 |     "    return ax"
1051 |    ]
1052 |   },
1053 |   {
1054 |    "cell_type": "code",
1055 |    "execution_count": null,
1056 |    "id": "ef704a1a",
1057 |    "metadata": {},
1058 |    "outputs": [],
1059 |    "source": [
1060 |     "from IPython.display import display, Markdown, Latex\n",
1061 |     "\n",
1062 |     "# Build a list of dataframes, one per sensor:\n",
1063 |     "df_list = []\n",
1064 |     "for f in features[:1]:\n",
1065 |     "    df_list.append(pump_df2[[f]])\n",
1066 |     "\n",
1067 |     "# Discretize each signal in 3 bins:\n",
1068 |     "array = tsia.markov.discretize_multivariate(df_list)\n",
1069 |     "\n",
1070 |     "fig = plt.figure(figsize=(5.5, 0.6))\n",
1071 |     "plt.plot(pump_df2['sensor_00'], linewidth=0.7, alpha=0.6)\n",
1072 |     "plt.title('Line plot of the pump sensor 0')\n",
1073 |     "plt.show()\n",
1074 |     "\n",
1075 |     "display(Markdown('<img src=\"arrow.png\" align=\"left\" style=\"padding-left: 730px\"/>'))\n",
1076 |     "\n",
1077 |     "\n",
1078 |     "# Plot the strip chart:\n",
1079 |     "ax = plot_timeseries_strip_chart(\n",
1080 |     "    array, \n",
1081 |     "    signal_list=features[:1],\n",
1082 |     "    fig_width=5.21,\n",
1083 |     "    signal_height=0.2,\n",
1084 |     "    dates=df_list[0].index.to_pydatetime(),\n",
1085 |     "    day_interval=2\n",
1086 |     ")\n",
1087 |     "ax.set_title('Strip chart of the pump sensor 0');"
1088 |    ]
1089 |   },
1090 |   {
1091 |    "cell_type": "code",
1092 |    "execution_count": null,
1093 |    "id": "59c2ff60",
1094 |    "metadata": {},
1095 |    "outputs": [],
1096 |    "source": [
1097 |     "# Build a list of dataframes, one per sensor:\n",
1098 |     "df_list = []\n",
1099 |     "for f in features:\n",
1100 |     "    df_list.append(pump_df2[[f]])\n",
1101 |     "\n",
1102 |     "# Discretize each signal in 3 bins:\n",
1103 |     "array = tsia.markov.discretize_multivariate(df_list)\n",
1104 |     "\n",
1105 |     "# Plot the strip chart:\n",
1106 |     "fig = plot_timeseries_strip_chart(\n",
1107 |     "    array, \n",
1108 |     "    signal_list=features,\n",
1109 |     "    fig_width=5.5,\n",
1110 |     "    signal_height=0.1,\n",
1111 |     "    dates=df_list[0].index.to_pydatetime(),\n",
1112 |     "    day_interval=2\n",
1113 |     ")"
1114 |    ]
1115 |   },
1116 |   {
1117 |    "cell_type": "markdown",
1118 |    "id": "fa605700",
1119 |    "metadata": {},
1120 |    "source": [
1121 |     "### Recurrence plot"
1122 |    ]
1123 |   },
1124 |   {
1125 |    "cell_type": "code",
1126 |    "execution_count": null,
1127 |    "id": "f85db253",
1128 |    "metadata": {},
1129 |    "outputs": [],
1130 |    "source": [
1131 |     "from pyts.image import RecurrencePlot\n",
1132 |     "from pyts.image import GramianAngularField\n",
1133 |     "from pyts.image import MarkovTransitionField"
1134 |    ]
1135 |   },
1136 |   {
1137 |    "cell_type": "code",
1138 |    "execution_count": null,
1139 |    "id": "74fb9a2a",
1140 |    "metadata": {},
1141 |    "outputs": [],
1142 |    "source": [
1143 |     "hhid = household_ids[2]\n",
1144 |     "hh_energy = energy_df.loc[hhid, :]\n",
1145 |     "pump_extract_df = pump_df.iloc[:800, 0].copy()\n",
1146 |     "\n",
1147 |     "rp = RecurrencePlot(threshold='point', percentage=30)\n",
1148 |     "weather_rp = rp.fit_transform(weather_df.loc['2013-01-01':'2013-01-31']['temperature'].values.reshape(1, -1))\n",
1149 |     "energy_rp = rp.fit_transform(hh_energy['2012-07-01':'2012-07-15'].values.reshape(1, -1))\n",
1150 |     "pump_rp = rp.fit_transform(pump_extract_df.values.reshape(1, -1))\n",
1151 |     "\n",
1152 |     "\n",
1153 |     "fig = plt.figure(figsize=(5.5, 2.4))\n",
1154 |     "gs = gridspec.GridSpec(nrows=3, ncols=2, width_ratios=[3,1], hspace=0.8, wspace=0.0)\n",
1155 |     "\n",
1156 |     "# Pump sensor 0:\n",
1157 |     "ax = fig.add_subplot(gs[0])\n",
1158 |     "ax.plot(pump_extract_df, label='Pump sensor 0')\n",
1159 |     "ax.set_title(f'Pump sensor 0')\n",
1160 |     "\n",
1161 |     "ax = fig.add_subplot(gs[1])\n",
1162 |     "ax.imshow(pump_rp[0], cmap='binary', origin='lower')\n",
1163 |     "ax.axis('off')\n",
1164 |     "\n",
1165 |     "# Energy consumption line plot and recurrence plot:\n",
1166 |     "ax = fig.add_subplot(gs[2])\n",
1167 |     "plot1 = ax.plot(hh_energy['2012-07-01':'2012-07-15'], color=colors[1])\n",
1168 |     "ax.set_title(f'Energy consumption for household {hhid}')\n",
1169 |     "\n",
1170 |     "ax = fig.add_subplot(gs[3])\n",
1171 |     "ax.imshow(energy_rp[0], cmap='binary', origin='lower')\n",
1172 |     "ax.axis('off')\n",
1173 |     "\n",
1174 |     "# Daily temperature line plot and recurrence plot:\n",
1175 |     "ax = fig.add_subplot(gs[4])\n",
1176 |     "start = '2012-07-01'\n",
1177 |     "end = '2012-07-15'\n",
1178 |     "ax.plot(weather_df.loc['2013-01-01':'2013-01-31']['temperature'], color=colors[2])\n",
1179 |     "ax.set_title(f'Daily temperature')\n",
1180 |     "\n",
1181 |     "ax = fig.add_subplot(gs[5])\n",
1182 |     "ax.imshow(weather_rp[0], cmap='binary', origin='lower')\n",
1183 |     "ax.axis('off')\n",
1184 |     "\n",
1185 |     "plt.show()"
1186 |    ]
1187 |   },
1188 |   {
1189 |    "cell_type": "code",
1190 |    "execution_count": null,
1191 |    "id": "950062ef",
1192 |    "metadata": {},
1193 |    "outputs": [],
1194 |    "source": [
1195 |     "hhid = household_ids[2]\n",
1196 |     "hh_energy = energy_df.loc[hhid, :]\n",
1197 |     "pump_extract_df = pump_df.iloc[:800, 0].copy()\n",
1198 |     "\n",
1199 |     "gaf = GramianAngularField(image_size=48, method='summation')\n",
1200 |     "weather_gasf = gaf.fit_transform(weather_df.loc['2013-01-01':'2013-01-31']['temperature'].values.reshape(1, -1))\n",
1201 |     "energy_gasf = gaf.fit_transform(hh_energy['2012-07-01':'2012-07-15'].values.reshape(1, -1))\n",
1202 |     "pump_gasf = gaf.fit_transform(pump_extract_df.values.reshape(1, -1))\n",
1203 |     "\n",
1204 |     "fig = plt.figure(figsize=(5.5, 2.4))\n",
1205 |     "gs = gridspec.GridSpec(nrows=3, ncols=2, width_ratios=[3,1], hspace=0.8, wspace=0.0)\n",
1206 |     "\n",
1207 |     "# Pump sensor 0:\n",
1208 |     "ax = fig.add_subplot(gs[0])\n",
1209 |     "ax.plot(pump_extract_df, label='Pump sensor 0')\n",
1210 |     "ax.set_title(f'Pump sensor 0')\n",
1211 |     "\n",
1212 |     "ax = fig.add_subplot(gs[1])\n",
1213 |     "ax.imshow(pump_gasf[0], cmap='RdBu_r', origin='lower')\n",
1214 |     "ax.axis('off')\n",
1215 |     "\n",
1216 |     "# Energy consumption line plot and recurrence plot:\n",
1217 |     "ax = fig.add_subplot(gs[2])\n",
1218 |     "plot1 = ax.plot(hh_energy['2012-07-01':'2012-07-15'], color=colors[1])\n",
1219 |     "ax.set_title(f'Energy consumption for household {hhid}')\n",
1220 |     "\n",
1221 |     "ax = fig.add_subplot(gs[3])\n",
1222 |     "ax.imshow(energy_gasf[0], cmap='RdBu_r', origin='lower')\n",
1223 |     "ax.axis('off')\n",
1224 |     "\n",
1225 |     "# Daily temperature line plot and recurrence plot:\n",
1226 |     "ax = fig.add_subplot(gs[4])\n",
1227 |     "start = '2012-07-01'\n",
1228 |     "end = '2012-07-15'\n",
1229 |     "ax.plot(weather_df.loc['2013-01-01':'2013-01-31']['temperature'], color=colors[2])\n",
1230 |     "ax.set_title(f'Daily temperature')\n",
1231 |     "\n",
1232 |     "ax = fig.add_subplot(gs[5])\n",
1233 |     "ax.imshow(weather_gasf[0], cmap='RdBu_r', origin='lower')\n",
1234 |     "ax.axis('off')\n",
1235 |     "\n",
1236 |     "plt.show()"
1237 |    ]
1238 |   },
1239 |   {
1240 |    "cell_type": "code",
1241 |    "execution_count": null,
1242 |    "id": "123a169c",
1243 |    "metadata": {},
1244 |    "outputs": [],
1245 |    "source": [
1246 |     "mtf = MarkovTransitionField(image_size=48)\n",
1247 |     "\n",
1248 |     "weather_mtf = mtf.fit_transform(weather_df.loc['2013-01-01':'2013-01-31']['temperature'].values.reshape(1, -1))\n",
1249 |     "energy_mtf = mtf.fit_transform(hh_energy['2012-07-01':'2012-07-15'].values.reshape(1, -1))\n",
1250 |     "pump_mtf = mtf.fit_transform(pump_extract_df.values.reshape(1, -1))\n",
1251 |     "\n",
1252 |     "fig = plt.figure(figsize=(5.5, 2.4))\n",
1253 |     "gs = gridspec.GridSpec(nrows=3, ncols=2, width_ratios=[3,1], hspace=0.8, wspace=0.0)\n",
1254 |     "\n",
1255 |     "# Pump sensor 0:\n",
1256 |     "ax = fig.add_subplot(gs[0])\n",
1257 |     "ax.plot(pump_extract_df, label='Pump sensor 0')\n",
1258 |     "ax.set_title(f'Pump sensor 0')\n",
1259 |     "\n",
1260 |     "ax = fig.add_subplot(gs[1])\n",
1261 |     "ax.imshow(pump_mtf[0], cmap='RdBu_r', origin='lower')\n",
1262 |     "ax.axis('off')\n",
1263 |     "\n",
1264 |     "# Energy consumption line plot and recurrence plot:\n",
1265 |     "ax = fig.add_subplot(gs[2])\n",
1266 |     "plot1 = ax.plot(hh_energy['2012-07-01':'2012-07-15'], color=colors[1])\n",
1267 |     "ax.set_title(f'Energy consumption for household {hhid}')\n",
1268 |     "\n",
1269 |     "ax = fig.add_subplot(gs[3])\n",
1270 |     "ax.imshow(energy_mtf[0], cmap='RdBu_r', origin='lower')\n",
1271 |     "ax.axis('off')\n",
1272 |     "\n",
1273 |     "# Daily temperature line plot and recurrence plot:\n",
1274 |     "ax = fig.add_subplot(gs[4])\n",
1275 |     "start = '2012-07-01'\n",
1276 |     "end = '2012-07-15'\n",
1277 |     "ax.plot(weather_df.loc['2013-01-01':'2013-01-31']['temperature'], color=colors[2])\n",
1278 |     "ax.set_title(f'Daily temperature')\n",
1279 |     "\n",
1280 |     "ax = fig.add_subplot(gs[5])\n",
1281 |     "ax.imshow(weather_mtf[0], cmap='RdBu_r', origin='lower')\n",
1282 |     "ax.axis('off')\n",
1283 |     "\n",
1284 |     "plt.show()"
1285 |    ]
1286 |   },
1287 |   {
1288 |    "cell_type": "code",
1289 |    "execution_count": null,
1290 |    "id": "3f4e8ebe",
1291 |    "metadata": {},
1292 |    "outputs": [],
1293 |    "source": [
1294 |     "import matplotlib\n",
1295 |     "import matplotlib.cm as cm\n",
1296 |     "import networkx as nx\n",
1297 |     "import community\n",
1298 |     "\n",
1299 |     "def compute_network_graph(markov_field):\n",
1300 |     "    G = nx.from_numpy_matrix(markov_field[0])\n",
1301 |     "\n",
1302 |     "    # Uncover the communities in the current graph:\n",
1303 |     "    communities = community.best_partition(G)\n",
1304 |     "    nb_communities = len(pd.Series(communities).unique())\n",
1305 |     "    cmap = 'autumn'\n",
1306 |     "\n",
1307 |     "    # Compute node colors and edges colors for the modularity encoding:\n",
1308 |     "    edge_colors = [matplotlib.colors.to_hex(cm.get_cmap(cmap)(communities.get(v)/(nb_communities - 1))) for u,v in G.edges()]\n",
1309 |     "    node_colors = [communities.get(node) for node in G.nodes()]\n",
1310 |     "    node_size = [nx.average_clustering(G, [node])*90 for node in G.nodes()]\n",
1311 |     "\n",
1312 |     "    # Builds the options set to draw the network graph in the \"modularity\" configuration:\n",
1313 |     "    options = {\n",
1314 |     "        'node_size': 10,\n",
1315 |     "        'edge_color': edge_colors,\n",
1316 |     "        'node_color': node_colors,\n",
1317 |     "        'linewidths': 0,\n",
1318 |     "        'width': 0.1,\n",
1319 |     "        'alpha': 0.6,\n",
1320 |     "        'with_labels': False,\n",
1321 |     "        'cmap': cmap\n",
1322 |     "    }\n",
1323 |     "    \n",
1324 |     "    return G, options"
1325 |    ]
1326 |   },
1327 |   {
1328 |    "cell_type": "code",
1329 |    "execution_count": null,
1330 |    "id": "8bfa993a",
1331 |    "metadata": {},
1332 |    "outputs": [],
1333 |    "source": [
1334 |     "fig = plt.figure(figsize=(5.5, 2.4))\n",
1335 |     "gs = gridspec.GridSpec(nrows=3, ncols=2, width_ratios=[3,1], hspace=0.8, wspace=0.0)\n",
1336 |     "\n",
1337 |     "# Pump sensor 0:\n",
1338 |     "ax = fig.add_subplot(gs[0])\n",
1339 |     "ax.plot(pump_extract_df, label='Pump sensor 0')\n",
1340 |     "ax.set_title(f'Pump sensor 0')\n",
1341 |     "\n",
1342 |     "ax = fig.add_subplot(gs[1])\n",
1343 |     "G, options = compute_network_graph(weather_mtf)\n",
1344 |     "nx.draw_networkx(G, **options, pos=nx.spring_layout(G), ax=ax)\n",
1345 |     "ax.axis('off')\n",
1346 |     "\n",
1347 |     "# Energy consumption line plot and recurrence plot:\n",
1348 |     "ax = fig.add_subplot(gs[2])\n",
1349 |     "plot1 = ax.plot(hh_energy['2012-07-01':'2012-07-15'], color=colors[1])\n",
1350 |     "ax.set_title(f'Energy consumption for household {hhid}')\n",
1351 |     "\n",
1352 |     "ax = fig.add_subplot(gs[3])\n",
1353 |     "G, options = compute_network_graph(energy_mtf)\n",
1354 |     "nx.draw_networkx(G, **options, pos=nx.spring_layout(G), ax=ax)\n",
1355 |     "ax.axis('off')\n",
1356 |     "\n",
1357 |     "# Daily temperature line plot and recurrence plot:\n",
1358 |     "ax = fig.add_subplot(gs[4])\n",
1359 |     "start = '2012-07-01'\n",
1360 |     "end = '2012-07-15'\n",
1361 |     "ax.plot(weather_df.loc['2013-01-01':'2013-01-31']['temperature'], color=colors[2])\n",
1362 |     "ax.set_title(f'Daily temperature')\n",
1363 |     "\n",
1364 |     "ax = fig.add_subplot(gs[5])\n",
1365 |     "G, options = compute_network_graph(weather_mtf)\n",
1366 |     "nx.draw_networkx(G, **options, pos=nx.spring_layout(G), ax=ax)\n",
1367 |     "ax.axis('off')\n",
1368 |     "\n",
1369 |     "plt.show()"
1370 |    ]
1371 |   },
1372 |   {
1373 |    "cell_type": "markdown",
1374 |    "id": "979ad8ea",
1375 |    "metadata": {},
1376 |    "source": [
1377 |     "## Symbolic representation\n",
1378 |     "---"
1379 |    ]
1380 |   },
1381 |   {
1382 |    "cell_type": "code",
1383 |    "execution_count": null,
1384 |    "id": "102d6a60",
1385 |    "metadata": {},
1386 |    "outputs": [],
1387 |    "source": [
1388 |     "from pyts.bag_of_words import BagOfWords\n",
1389 |     "\n",
1390 |     "window_size, word_size = 30, 5\n",
1391 |     "bow = BagOfWords(window_size=window_size, word_size=word_size, window_step=window_size, numerosity_reduction=False)\n",
1392 |     "X = weather_df.loc['2013-01-01':'2013-01-31']['temperature'].values.reshape(1, -1)\n",
1393 |     "X_bow = bow.transform(X)\n",
1394 |     "time_index = weather_df.loc['2013-01-01':'2013-01-31']['temperature'].index\n",
1395 |     "len(X_bow[0].replace(' ', ''))"
1396 |    ]
1397 |   },
1398 |   {
1399 |    "cell_type": "code",
1400 |    "execution_count": null,
1401 |    "id": "96878a1d",
1402 |    "metadata": {},
1403 |    "outputs": [],
1404 |    "source": [
1405 |     "# Plot the considered subseries\n",
1406 |     "plt.figure(figsize=(5, 2))\n",
1407 |     "splits_series = np.linspace(0, X.shape[1], 1 + X.shape[1] // window_size, dtype='int64')\n",
1408 |     "for start, end in zip(splits_series[:-1], np.clip(splits_series[1:] + 1, 0, X.shape[1])):\n",
1409 |     "    plt.plot(np.arange(start, end), X[0, start:end], 'o-', linewidth=0.5, ms=0.1)\n",
1410 |     "\n",
1411 |     "# Plot the corresponding letters\n",
1412 |     "splits_letters = np.linspace(0, X.shape[1], 1 + word_size * X.shape[1] // window_size)\n",
1413 |     "splits_letters = ((splits_letters[:-1] + splits_letters[1:]) / 2)\n",
1414 |     "splits_letters = splits_letters.astype('int64')\n",
1415 |     "\n",
1416 |     "for i, (x, text) in enumerate(zip(splits_letters, X_bow[0].replace(' ', ''))):\n",
1417 |     "    t = plt.text(x, X[0, x], text, color=\"C{}\".format(i // 5), fontsize=3.5)\n",
1418 |     "    t.set_bbox(dict(facecolor='#FFFFFF', alpha=0.5, edgecolor=\"C{}\".format(i // 5), boxstyle='round4'))\n",
1419 |     "\n",
1420 |     "plt.title('Bag-of-words representation for weather temperature')\n",
1421 |     "plt.tight_layout()\n",
1422 |     "plt.show()"
1423 |    ]
1424 |   },
1425 |   {
1426 |    "cell_type": "code",
1427 |    "execution_count": null,
1428 |    "id": "94bb8ed5",
1429 |    "metadata": {},
1430 |    "outputs": [],
1431 |    "source": [
1432 |     "from pyts.transformation import WEASEL\n",
1433 |     "from sklearn.preprocessing import LabelEncoder"
1434 |    ]
1435 |   },
1436 |   {
1437 |    "cell_type": "code",
1438 |    "execution_count": null,
1439 |    "id": "d501fe5f",
1440 |    "metadata": {},
1441 |    "outputs": [],
1442 |    "source": [
1443 |     "X_train = ecg_df.iloc[:, 1:].values\n",
1444 |     "y_train = ecg_df.iloc[:, 0]\n",
1445 |     "y_train = LabelEncoder().fit_transform(y_train)\n",
1446 |     "weasel = WEASEL(word_size=3, n_bins=3, window_sizes=[10, 25], sparse=False)\n",
1447 |     "X_weasel = weasel.fit_transform(X_train, y_train)\n",
1448 |     "vocabulary_length = len(weasel.vocabulary_)"
1449 |    ]
1450 |   },
1451 |   {
1452 |    "cell_type": "code",
1453 |    "execution_count": null,
1454 |    "id": "7eef27d7",
1455 |    "metadata": {},
1456 |    "outputs": [],
1457 |    "source": [
1458 |     "plt.figure(figsize=(5,1.5))\n",
1459 |     "width = 0.4\n",
1460 |     "x = np.arange(vocabulary_length) - width / 2\n",
1461 |     "for i in range(len(X_weasel[y_train == 0])):\n",
1462 |     "    if i == 0:\n",
1463 |     "        plt.bar(x, X_weasel[y_train == 0][i], width=width, alpha=0.25, color=colors[1], label='Time series for Ischemia')\n",
1464 |     "    else:\n",
1465 |     "        plt.bar(x, X_weasel[y_train == 0][i], width=width, alpha=0.25, color=colors[1])\n",
1466 |     "    \n",
1467 |     "for i in range(len(X_weasel[y_train == 1])):\n",
1468 |     "    if i == 0:\n",
1469 |     "        plt.bar(x+width, X_weasel[y_train == 1][i], width=width, alpha=0.25, color=colors[0], label='Time series for Normal heartbeat')\n",
1470 |     "    else:\n",
1471 |     "        plt.bar(x+width, X_weasel[y_train == 1][i], width=width, alpha=0.25, color=colors[0])\n",
1472 |     "        \n",
1473 |     "plt.xticks(\n",
1474 |     "    np.arange(vocabulary_length),\n",
1475 |     "    np.vectorize(weasel.vocabulary_.get)(np.arange(X_weasel[0].size)),\n",
1476 |     "    fontsize=2,\n",
1477 |     "    rotation=60\n",
1478 |     ")\n",
1479 |     "    \n",
1480 |     "plt.legend(loc='upper right')\n",
1481 |     "plt.show()"
1482 |    ]
1483 |   },
1484 |   {
1485 |    "cell_type": "markdown",
1486 |    "id": "6677dddd",
1487 |    "metadata": {},
1488 |    "source": [
1489 |     "## Statistics\n",
1490 |     "---"
1491 |    ]
1492 |   },
1493 |   {
1494 |    "cell_type": "code",
1495 |    "execution_count": null,
1496 |    "id": "5c2ba909",
1497 |    "metadata": {},
1498 |    "outputs": [],
1499 |    "source": [
1500 |     "plt.rcParams['xtick.labelsize'] = 3\n",
1501 |     "\n",
1502 |     "import statsmodels.api as sm\n",
1503 |     "\n",
1504 |     "fig = plt.figure(figsize=(5.5, 3))\n",
1505 |     "gs = gridspec.GridSpec(nrows=3, ncols=2, width_ratios=[1,1], hspace=0.8)\n",
1506 |     "\n",
1507 |     "# Pump\n",
1508 |     "ax = fig.add_subplot(gs[0])\n",
1509 |     "ax.plot(pump_extract_df, label='Pump sensor 0')\n",
1510 |     "ax.set_title(f'Pump sensor 0')\n",
1511 |     "ax.tick_params(axis='x', which='both', labelbottom=False)\n",
1512 |     "\n",
1513 |     "ax = fig.add_subplot(gs[1])\n",
1514 |     "sm.graphics.tsa.plot_acf(pump_extract_df.values.squeeze(), ax=ax, markersize=1, title='')\n",
1515 |     "ax.set_ylim(-1.2, 1.2)\n",
1516 |     "ax.tick_params(axis='x', which='major', labelsize=4)\n",
1517 |     "\n",
1518 |     "# Energy consumption\n",
1519 |     "ax = fig.add_subplot(gs[2])\n",
1520 |     "ax.plot(hh_energy['2012-07-01':'2012-07-15'], color=colors[1])\n",
1521 |     "ax.set_title(f'Energy consumption for household {hhid}')\n",
1522 |     "ax.tick_params(axis='x', which='both', labelbottom=False)\n",
1523 |     "\n",
1524 |     "ax = fig.add_subplot(gs[3])\n",
1525 |     "sm.graphics.tsa.plot_acf(hh_energy['2012-07-01':'2012-07-15'].values.squeeze(), ax=ax, markersize=1, title='')\n",
1526 |     "ax.set_ylim(-0.3, 0.3)\n",
1527 |     "ax.tick_params(axis='x', which='major', labelsize=4)\n",
1528 |     "\n",
1529 |     "# Daily temperature:\n",
1530 |     "ax = fig.add_subplot(gs[4])\n",
1531 |     "start = '2012-07-01'\n",
1532 |     "end = '2012-07-15'\n",
1533 |     "ax.plot(weather_df.loc['2013-01-01':'2013-01-31']['temperature'], color=colors[2])\n",
1534 |     "ax.set_title(f'Daily temperature')\n",
1535 |     "ax.tick_params(axis='x', which='both', labelbottom=False)\n",
1536 |     "\n",
1537 |     "ax = fig.add_subplot(gs[5])\n",
1538 |     "sm.graphics.tsa.plot_acf(weather_df.loc['2013-01-01':'2013-01-31']['temperature'].values.squeeze(), ax=ax, markersize=1, title='')\n",
1539 |     "ax.set_ylim(-1.2, 1.2)\n",
1540 |     "ax.tick_params(axis='x', which='major', labelsize=4)\n",
1541 |     "\n",
1542 |     "plt.show()"
1543 |    ]
1544 |   },
1545 |   {
1546 |    "cell_type": "code",
1547 |    "execution_count": null,
1548 |    "id": "0f9cd561",
1549 |    "metadata": {},
1550 |    "outputs": [],
1551 |    "source": [
1552 |     "from statsmodels.tsa.seasonal import STL\n",
1553 |     "\n",
1554 |     "endog = endog.resample('30T').mean()"
1555 |    ]
1556 |   },
1557 |   {
1558 |    "cell_type": "code",
1559 |    "execution_count": null,
1560 |    "id": "2af017b7",
1561 |    "metadata": {},
1562 |    "outputs": [],
1563 |    "source": [
1564 |     "plt.rcParams['lines.markersize'] = 1\n",
1565 |     "\n",
1566 |     "title = f'Energy consumption for household {hhid}'\n",
1567 |     "endog = hh_energy['2012-07-01':'2012-07-15']\n",
1568 |     "endog.columns = [title]\n",
1569 |     "endog = endog[title]\n",
1570 |     "stl = STL(endog, period=48)\n",
1571 |     "res = stl.fit()\n",
1572 |     "fig = res.plot()\n",
1573 |     "\n",
1574 |     "fig = plt.gcf()\n",
1575 |     "fig.set_size_inches(5.5, 4)\n",
1576 |     "\n",
1577 |     "plt.show()"
1578 |    ]
1579 |   },
1580 |   {
1581 |    "cell_type": "markdown",
1582 |    "id": "ebb389e7",
1583 |    "metadata": {},
1584 |    "source": [
1585 |     "## Binary segmentation\n",
1586 |     "---"
1587 |    ]
1588 |   },
1589 |   {
1590 |    "cell_type": "code",
1591 |    "execution_count": null,
1592 |    "id": "b4494d2e",
1593 |    "metadata": {},
1594 |    "outputs": [],
1595 |    "source": [
1596 |     "signal = weather_df.loc['2013-01-01':'2013-01-31']['temperature'].values.squeeze()\n",
1597 |     "algo = rpt.Binseg(model='l2').fit(signal)\n",
1598 |     "my_bkps = algo.predict(n_bkps=3)"
1599 |    ]
1600 |   },
1601 |   {
1602 |    "cell_type": "code",
1603 |    "execution_count": null,
1604 |    "id": "d13ccee7",
1605 |    "metadata": {},
1606 |    "outputs": [],
1607 |    "source": [
1608 |     "my_bkps = [0] + my_bkps\n",
1609 |     "my_bkps"
1610 |    ]
1611 |   },
1612 |   {
1613 |    "cell_type": "code",
1614 |    "execution_count": null,
1615 |    "id": "6379306a",
1616 |    "metadata": {},
1617 |    "outputs": [],
1618 |    "source": [
1619 |     "fig = plt.figure(figsize=(5.5,1))\n",
1620 |     "start = '2012-07-01'\n",
1621 |     "end = '2012-07-15'\n",
1622 |     "plt.plot(weather_df.loc['2013-01-01':'2013-01-31']['temperature'], color='#FFFFFF', linewidth=1.2, alpha=0.8)\n",
1623 |     "plt.plot(weather_df.loc['2013-01-01':'2013-01-31']['temperature'], color=colors[2], linewidth=0.7)\n",
1624 |     "\n",
1625 |     "plt.title(f'Daily temperature')\n",
1626 |     "plt.xticks(rotation=60, fontsize=4)\n",
1627 |     "\n",
1628 |     "weather_index = weather_df.loc['2013-01-01':'2013-01-31']['temperature'].index\n",
1629 |     "\n",
1630 |     "for index, bkps in enumerate(my_bkps[:-1]):\n",
1631 |     "    x1 = weather_index[my_bkps[index]]\n",
1632 |     "    x2 = weather_index[np.clip(my_bkps[index+1], 0, len(weather_index)-1)]\n",
1633 |     "    \n",
1634 |     "    plt.axvspan(x1, x2, color=colors[index % 5], alpha=0.2)\n",
1635 |     "\n",
1636 |     "plt.title('Daily temperature segmentation')\n",
1637 |     "plt.show()"
1638 |    ]
1639 |   }
1640 |  ],
1641 |  "metadata": {
1642 |   "kernelspec": {
1643 |    "display_name": "conda_python3",
1644 |    "language": "python",
1645 |    "name": "conda_python3"
1646 |   },
1647 |   "language_info": {
1648 |    "codemirror_mode": {
1649 |     "name": "ipython",
1650 |     "version": 3
1651 |    },
1652 |    "file_extension": ".py",
1653 |    "mimetype": "text/x-python",
1654 |    "name": "python",
1655 |    "nbconvert_exporter": "python",
1656 |    "pygments_lexer": "ipython3",
1657 |    "version": "3.6.13"
1658 |   }
1659 |  },
1660 |  "nbformat": 4,
1661 |  "nbformat_minor": 5
1662 | }
1663 | 


--------------------------------------------------------------------------------
/Chapter03/chapter3-dataset-preparation.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "id": "eabe8a1f",
   6 |    "metadata": {},
   7 |    "source": [
   8 |     "# Time series analysis on AWS\n",
   9 |     "*Chapter 3 - Creating a project and ingesting your data*"
  10 |    ]
  11 |   },
  12 |   {
  13 |    "cell_type": "markdown",
  14 |    "id": "bcb5ce53",
  15 |    "metadata": {},
  16 |    "source": [
  17 |     "## Initializations\n",
  18 |     "---"
  19 |    ]
  20 |   },
  21 |   {
  22 |    "cell_type": "code",
  23 |    "execution_count": 1,
  24 |    "id": "34609045",
  25 |    "metadata": {},
  26 |    "outputs": [],
  27 |    "source": [
  28 |     "!pip install --quiet tqdm kaggle"
  29 |    ]
  30 |   },
  31 |   {
  32 |    "cell_type": "markdown",
  33 |    "id": "4b2e4f0a",
  34 |    "metadata": {},
  35 |    "source": [
  36 |     "### Imports"
  37 |    ]
  38 |   },
  39 |   {
  40 |    "cell_type": "code",
  41 |    "execution_count": 1,
  42 |    "id": "e5ea312c",
  43 |    "metadata": {},
  44 |    "outputs": [],
  45 |    "source": [
  46 |     "import numpy as np\n",
  47 |     "import os\n",
  48 |     "import pandas as pd\n",
  49 |     "import warnings\n",
  50 |     "import zipfile\n",
  51 |     "\n",
  52 |     "from tqdm import tqdm"
  53 |    ]
  54 |   },
  55 |   {
  56 |    "cell_type": "markdown",
  57 |    "id": "069698f4",
  58 |    "metadata": {},
  59 |    "source": [
  60 |     "### Parameters"
  61 |    ]
  62 |   },
  63 |   {
  64 |    "cell_type": "code",
  65 |    "execution_count": 2,
  66 |    "id": "1f46465d",
  67 |    "metadata": {},
  68 |    "outputs": [],
  69 |    "source": [
  70 |     "RAW_DATA = os.path.join('..', 'Data', 'raw')\n",
  71 |     "DATA = os.path.join('..', 'Data')\n",
  72 |     "warnings.filterwarnings(\"ignore\")\n",
  73 |     "os.makedirs(RAW_DATA, exist_ok=True)"
  74 |    ]
  75 |   },
  76 |   {
  77 |    "cell_type": "markdown",
  78 |    "id": "7f5af546",
  79 |    "metadata": {},
  80 |    "source": [
  81 |     "### Helper functions"
  82 |    ]
  83 |   },
  84 |   {
  85 |    "cell_type": "code",
  86 |    "execution_count": 3,
  87 |    "id": "3ce3ebfe",
  88 |    "metadata": {},
  89 |    "outputs": [],
  90 |    "source": [
  91 |     "from IPython.display import display_html\n",
  92 |     "\n",
  93 |     "def display_multiple_dataframe(*args, max_rows=None, max_cols=None):\n",
  94 |     "    html_str = ''\n",
  95 |     "    for df in args:\n",
  96 |     "        html_str += df.to_html(max_cols=max_cols, max_rows=max_rows)\n",
  97 |     "        \n",
  98 |     "    display_html(html_str.replace('table','table style=\"display:inline\"'), raw=True)"
  99 |    ]
 100 |   },
 101 |   {
 102 |    "cell_type": "markdown",
 103 |    "id": "cfeca823",
 104 |    "metadata": {},
 105 |    "source": [
 106 |     "### Downloading datasets"
 107 |    ]
 108 |   },
 109 |   {
 110 |    "cell_type": "markdown",
 111 |    "id": "f75fde65",
 112 |    "metadata": {},
 113 |    "source": [
 114 |     "To download the London household energy consumption with weather data from Kaggle, you will need a Kaggle API token. To do so, you will need to have an account on Kaggle and create a token that you install on your machine. You can follow [**this link**](https://www.kaggle.com/docs/api) to get started with the Kaggle API. Once generated, make sure your Kaggle token is stored in the `~/.kaggle/kaggle.json` file, or the next cells will issue an error. To get a Kaggle token, go to kaggle.com and create an account. Then navigate to **My account** and scroll down to the API section. There, click the **Create new API token** button:\n",
 115 |     "\n",
 116 |     "<img src=\"../Assets/kaggle_api.png\" />"
 117 |    ]
 118 |   },
 119 |   {
 120 |    "cell_type": "code",
 121 |    "execution_count": 4,
 122 |    "id": "31f242da",
 123 |    "metadata": {},
 124 |    "outputs": [
 125 |     {
 126 |      "name": "stdout",
 127 |      "output_type": "stream",
 128 |      "text": [
 129 |       "File found, skipping download\n"
 130 |      ]
 131 |     }
 132 |    ],
 133 |    "source": [
 134 |     "FILE_NAME    = 'smart-meters-in-london.zip'\n",
 135 |     "ARCHIVE_PATH = os.path.join(RAW_DATA, FILE_NAME)\n",
 136 |     "FILE_PATH    = os.path.join(DATA, 'energy-london', 'smart-meters-in-london.zip')\n",
 137 |     "FILE_DIR     = os.path.dirname(FILE_PATH)\n",
 138 |     "\n",
 139 |     "# Checks if the data were already downloaded:\n",
 140 |     "if os.path.exists(os.path.join(DATA, 'energy-london', 'acorn_details.csv')):\n",
 141 |     "    print(\"File found, skipping download\")\n",
 142 |     "    \n",
 143 |     "else:\n",
 144 |     "    # Downloading and unzipping datasets from Kaggle:\n",
 145 |     "    print(\"Downloading dataset (2.26G), can take a few minutes depending on your connection\")\n",
 146 |     "    os.makedirs(os.path.join(DATA, 'energy-london'), exist_ok=True)\n",
 147 |     "    !kaggle datasets download -d jeanmidev/smart-meters-in-london -p $RAW_DATA\n",
 148 |     "    \n",
 149 |     "    print('Unzipping files...')\n",
 150 |     "    zip_ref = zipfile.ZipFile(ARCHIVE_PATH, 'r')\n",
 151 |     "    zip_ref.extractall(FILE_DIR + '/')\n",
 152 |     "    zip_ref.close()\n",
 153 |     "    \n",
 154 |     "    !rm $DATA/energy-london/*zip\n",
 155 |     "    !rm $DATA/energy-london/*gz\n",
 156 |     "    !mv $DATA/energy-london/halfhourly_dataset/halfhourly_dataset/* $DATA/energy-london/halfhourly_dataset\n",
 157 |     "    !rm -Rf $DATA/energy-london/halfhourly_dataset/halfhourly_dataset\n",
 158 |     "    !mv $DATA/energy-london/daily_dataset/daily_dataset/* $DATA/energy-london/daily_dataset\n",
 159 |     "    !rm -Rf $DATA/energy-london/daily_dataset/daily_dataset"
 160 |    ]
 161 |   },
 162 |   {
 163 |    "cell_type": "markdown",
 164 |    "id": "546a8212",
 165 |    "metadata": {},
 166 |    "source": [
 167 |     "## Dataset visualization\n",
 168 |     "---"
 169 |    ]
 170 |   },
 171 |   {
 172 |    "cell_type": "markdown",
 173 |    "id": "9d9797d3",
 174 |    "metadata": {},
 175 |    "source": [
 176 |     "We want to filter out households that are are subject to the dToU tariff and keep only the ones with a known ACORN (i.e. not in the ACORN-U group): this will allow us to better model future analysis by adding the Acorn detail informations (which by definitions, won't be available for the ACORN-U group)."
 177 |    ]
 178 |   },
 179 |   {
 180 |    "cell_type": "code",
 181 |    "execution_count": 5,
 182 |    "id": "9ae29c75",
 183 |    "metadata": {},
 184 |    "outputs": [
 185 |     {
 186 |      "name": "stdout",
 187 |      "output_type": "stream",
 188 |      "text": [
 189 |       "(4404, 5)\n"
 190 |      ]
 191 |     },
 192 |     {
 193 |      "data": {
 194 |       "text/html": [
 195 |        "<div>\n",
 196 |        "<style scoped>\n",
 197 |        "    .dataframe tbody tr th:only-of-type {\n",
 198 |        "        vertical-align: middle;\n",
 199 |        "    }\n",
 200 |        "\n",
 201 |        "    .dataframe tbody tr th {\n",
 202 |        "        vertical-align: top;\n",
 203 |        "    }\n",
 204 |        "\n",
 205 |        "    .dataframe thead th {\n",
 206 |        "        text-align: right;\n",
 207 |        "    }\n",
 208 |        "</style>\n",
 209 |        "<table border=\"1\" class=\"dataframe\">\n",
 210 |        "  <thead>\n",
 211 |        "    <tr style=\"text-align: right;\">\n",
 212 |        "      <th></th>\n",
 213 |        "      <th>LCLid</th>\n",
 214 |        "      <th>stdorToU</th>\n",
 215 |        "      <th>Acorn</th>\n",
 216 |        "      <th>Acorn_grouped</th>\n",
 217 |        "      <th>file</th>\n",
 218 |        "    </tr>\n",
 219 |        "  </thead>\n",
 220 |        "  <tbody>\n",
 221 |        "    <tr>\n",
 222 |        "      <th>2</th>\n",
 223 |        "      <td>MAC000002</td>\n",
 224 |        "      <td>Std</td>\n",
 225 |        "      <td>ACORN-A</td>\n",
 226 |        "      <td>Affluent</td>\n",
 227 |        "      <td>block_0</td>\n",
 228 |        "    </tr>\n",
 229 |        "    <tr>\n",
 230 |        "      <th>3</th>\n",
 231 |        "      <td>MAC003613</td>\n",
 232 |        "      <td>Std</td>\n",
 233 |        "      <td>ACORN-A</td>\n",
 234 |        "      <td>Affluent</td>\n",
 235 |        "      <td>block_0</td>\n",
 236 |        "    </tr>\n",
 237 |        "    <tr>\n",
 238 |        "      <th>4</th>\n",
 239 |        "      <td>MAC003597</td>\n",
 240 |        "      <td>Std</td>\n",
 241 |        "      <td>ACORN-A</td>\n",
 242 |        "      <td>Affluent</td>\n",
 243 |        "      <td>block_0</td>\n",
 244 |        "    </tr>\n",
 245 |        "    <tr>\n",
 246 |        "      <th>5</th>\n",
 247 |        "      <td>MAC003579</td>\n",
 248 |        "      <td>Std</td>\n",
 249 |        "      <td>ACORN-A</td>\n",
 250 |        "      <td>Affluent</td>\n",
 251 |        "      <td>block_0</td>\n",
 252 |        "    </tr>\n",
 253 |        "    <tr>\n",
 254 |        "      <th>6</th>\n",
 255 |        "      <td>MAC003566</td>\n",
 256 |        "      <td>Std</td>\n",
 257 |        "      <td>ACORN-A</td>\n",
 258 |        "      <td>Affluent</td>\n",
 259 |        "      <td>block_0</td>\n",
 260 |        "    </tr>\n",
 261 |        "  </tbody>\n",
 262 |        "</table>\n",
 263 |        "</div>"
 264 |       ],
 265 |       "text/plain": [
 266 |        "       LCLid stdorToU    Acorn Acorn_grouped     file\n",
 267 |        "2  MAC000002      Std  ACORN-A      Affluent  block_0\n",
 268 |        "3  MAC003613      Std  ACORN-A      Affluent  block_0\n",
 269 |        "4  MAC003597      Std  ACORN-A      Affluent  block_0\n",
 270 |        "5  MAC003579      Std  ACORN-A      Affluent  block_0\n",
 271 |        "6  MAC003566      Std  ACORN-A      Affluent  block_0"
 272 |       ]
 273 |      },
 274 |      "execution_count": 5,
 275 |      "metadata": {},
 276 |      "output_type": "execute_result"
 277 |     }
 278 |    ],
 279 |    "source": [
 280 |     "household_filename = os.path.join(DATA, 'energy-london', 'informations_households.csv')\n",
 281 |     "household_df = pd.read_csv(household_filename)\n",
 282 |     "household_df = household_df[(household_df['stdorToU'] == 'Std') & (household_df['Acorn'] != 'ACORN-U')]\n",
 283 |     "household_ids = household_df['LCLid'].tolist()\n",
 284 |     "print(household_df.shape)\n",
 285 |     "household_df.head()"
 286 |    ]
 287 |   },
 288 |   {
 289 |    "cell_type": "code",
 290 |    "execution_count": 6,
 291 |    "id": "cca71b9a",
 292 |    "metadata": {},
 293 |    "outputs": [],
 294 |    "source": [
 295 |     "bad_household_ids = [\n",
 296 |     "    'MAC002136', 'MAC002594', 'MAC000636', 'MAC001309', 'MAC001269', 'MAC000037', 'MAC002072', 'MAC000197', \n",
 297 |     "    'MAC001644', 'MAC005040', 'MAC000404', 'MAC004982', 'MAC001959', 'MAC002564', 'MAC001829', 'MAC000504', \n",
 298 |     "    'MAC001522', 'MAC001456', 'MAC004732', 'MAC000915', 'MAC005232', 'MAC003993', 'MAC000530', 'MAC000235', \n",
 299 |     "    'MAC001549', 'MAC000220', 'MAC005344', 'MAC000120', 'MAC000172', 'MAC002050', 'MAC000287', 'MAC005191', \n",
 300 |     "    'MAC000964'\n",
 301 |     "]\n",
 302 |     "household_df = household_df[~household_df['LCLid'].isin(bad_household_ids)]"
 303 |    ]
 304 |   },
 305 |   {
 306 |    "cell_type": "markdown",
 307 |    "id": "a4c97db3",
 308 |    "metadata": {},
 309 |    "source": [
 310 |     "#### Associating households with they energy consumption data\n",
 311 |     "Each household (with an ID starting by `MACxxxxx` in the table above) has its consumption data stored in a block file name `block_xx`. This file is also available from the `informations_household.csv` file extracted above. We have the association between `household_id` and `block_file`: we can open each of them and keep the consumption for the households of interest. All these data will be concatenated into an `energy_df` dataframe. For the remaining of this analysis, we are going to extract 1 year of data between July 1st, 2012 and June 30th, 2013 and we are going to keep only the households with almost 100% complete data for this period:"
 312 |    ]
 313 |   },
 314 |   {
 315 |    "cell_type": "code",
 316 |    "execution_count": 7,
 317 |    "id": "91caae93",
 318 |    "metadata": {},
 319 |    "outputs": [],
 320 |    "source": [
 321 |     "start            = pd.to_datetime('2012-07-01 00:00:00')\n",
 322 |     "end              = pd.to_datetime('2013-06-30 23:59:00')\n",
 323 |     "threshold        = 0.95\n",
 324 |     "min_data_points  = ((end - start).days + 1)*24*2 * threshold"
 325 |    ]
 326 |   },
 327 |   {
 328 |    "cell_type": "code",
 329 |    "execution_count": 8,
 330 |    "id": "4971259e",
 331 |    "metadata": {},
 332 |    "outputs": [
 333 |     {
 334 |      "name": "stdout",
 335 |      "output_type": "stream",
 336 |      "text": [
 337 |       "Half-hourly consumption file already exists, loading from disk...\n",
 338 |       "Done.\n"
 339 |      ]
 340 |     }
 341 |    ],
 342 |    "source": [
 343 |     "consumption_file = os.path.join(DATA, 'energy-london', 'half_hourly_consumption-v2.csv')\n",
 344 |     "if os.path.exists(consumption_file):\n",
 345 |     "    print('Half-hourly consumption file already exists, loading from disk...')\n",
 346 |     "    energy_df = pd.read_csv(consumption_file)\n",
 347 |     "    energy_df['timestamp'] = pd.to_datetime(energy_df['timestamp'], format='%Y-%m-%d %H:%M:%S.%f')\n",
 348 |     "    print('Done.')\n",
 349 |     "    \n",
 350 |     "else:\n",
 351 |     "    print('Half-hourly consumption file not found. We need to generate it.')\n",
 352 |     "    \n",
 353 |     "    # We know have the block number we can use to open the right file:\n",
 354 |     "    energy_df = pd.DataFrame()\n",
 355 |     "    target_block_files = household_df['file'].unique().tolist()\n",
 356 |     "    print('- {} block files to process: '.format(len(target_block_files)), end='')\n",
 357 |     "    df_list = []\n",
 358 |     "    for block_file in tqdm(target_block_files):\n",
 359 |     "        # Reads the current block file:\n",
 360 |     "        current_filename = os.path.join(DATA, 'energy-london', 'halfhourly_dataset', '{}.csv'.format(block_file))\n",
 361 |     "        df = pd.read_csv(current_filename)\n",
 362 |     "        \n",
 363 |     "        # Set readable column names and adjust data types:\n",
 364 |     "        df.columns = ['household_id', 'timestamp', 'energy']\n",
 365 |     "        df = df.replace(to_replace='Null', value=0.0)\n",
 366 |     "        df['energy'] = df['energy'].astype(np.float64)\n",
 367 |     "        df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S.%f')\n",
 368 |     "        df = df[(df['timestamp'] >= start) & (df['timestamp'] <= end)]\n",
 369 |     "        \n",
 370 |     "        # We filter on the households sampled earlier:\n",
 371 |     "        df_list.append(df[df['household_id'].isin(household_ids)].reset_index(drop=True))\n",
 372 |     "        del df\n",
 373 |     "    \n",
 374 |     "    # Concatenate with the main dataframe:\n",
 375 |     "    energy_df = pd.concat(df_list, axis='index', ignore_index=True)\n",
 376 |     "    \n",
 377 |     "    datapoints = energy_df.groupby(by='household_id').count()\n",
 378 |     "    datapoints = datapoints[datapoints['timestamp'] < min_data_points]\n",
 379 |     "    hhid_to_remove = datapoints.index.tolist()\n",
 380 |     "    energy_df = energy_df[~energy_df['household_id'].isin(hhid_to_remove)]\n",
 381 |     "\n",
 382 |     "    # Let's save this dataset to disk, we will use it from now on:\n",
 383 |     "    print('Saving file to disk... ', end='')\n",
 384 |     "    energy_df.to_csv(consumption_file, index=False)\n",
 385 |     "    print('Done.')"
 386 |    ]
 387 |   },
 388 |   {
 389 |    "cell_type": "markdown",
 390 |    "id": "02577d57",
 391 |    "metadata": {},
 392 |    "source": [
 393 |     "Here is an extract from one of the half hourly block file:"
 394 |    ]
 395 |   },
 396 |   {
 397 |    "cell_type": "code",
 398 |    "execution_count": 9,
 399 |    "id": "5d888872",
 400 |    "metadata": {},
 401 |    "outputs": [
 402 |     {
 403 |      "data": {
 404 |       "text/html": [
 405 |        "<div>\n",
 406 |        "<style scoped>\n",
 407 |        "    .dataframe tbody tr th:only-of-type {\n",
 408 |        "        vertical-align: middle;\n",
 409 |        "    }\n",
 410 |        "\n",
 411 |        "    .dataframe tbody tr th {\n",
 412 |        "        vertical-align: top;\n",
 413 |        "    }\n",
 414 |        "\n",
 415 |        "    .dataframe thead th {\n",
 416 |        "        text-align: right;\n",
 417 |        "    }\n",
 418 |        "</style>\n",
 419 |        "<table border=\"1\" class=\"dataframe\">\n",
 420 |        "  <thead>\n",
 421 |        "    <tr style=\"text-align: right;\">\n",
 422 |        "      <th></th>\n",
 423 |        "      <th>LCLid</th>\n",
 424 |        "      <th>tstp</th>\n",
 425 |        "      <th>energy(kWh/hh)</th>\n",
 426 |        "    </tr>\n",
 427 |        "  </thead>\n",
 428 |        "  <tbody>\n",
 429 |        "    <tr>\n",
 430 |        "      <th>342953</th>\n",
 431 |        "      <td>MAC002543</td>\n",
 432 |        "      <td>2012-07-09 11:30:00.0000000</td>\n",
 433 |        "      <td>0.054</td>\n",
 434 |        "    </tr>\n",
 435 |        "    <tr>\n",
 436 |        "      <th>342954</th>\n",
 437 |        "      <td>MAC002543</td>\n",
 438 |        "      <td>2012-07-09 12:00:00.0000000</td>\n",
 439 |        "      <td>0.053</td>\n",
 440 |        "    </tr>\n",
 441 |        "    <tr>\n",
 442 |        "      <th>342955</th>\n",
 443 |        "      <td>MAC002543</td>\n",
 444 |        "      <td>2012-07-09 12:30:00.0000000</td>\n",
 445 |        "      <td>0.053</td>\n",
 446 |        "    </tr>\n",
 447 |        "    <tr>\n",
 448 |        "      <th>342956</th>\n",
 449 |        "      <td>MAC002543</td>\n",
 450 |        "      <td>2012-07-09 13:00:00.0000000</td>\n",
 451 |        "      <td>0.053</td>\n",
 452 |        "    </tr>\n",
 453 |        "    <tr>\n",
 454 |        "      <th>342957</th>\n",
 455 |        "      <td>MAC002543</td>\n",
 456 |        "      <td>2012-07-09 13:30:00.0000000</td>\n",
 457 |        "      <td>0.053</td>\n",
 458 |        "    </tr>\n",
 459 |        "  </tbody>\n",
 460 |        "</table>\n",
 461 |        "</div>"
 462 |       ],
 463 |       "text/plain": [
 464 |        "            LCLid                         tstp energy(kWh/hh)\n",
 465 |        "342953  MAC002543  2012-07-09 11:30:00.0000000         0.054 \n",
 466 |        "342954  MAC002543  2012-07-09 12:00:00.0000000         0.053 \n",
 467 |        "342955  MAC002543  2012-07-09 12:30:00.0000000         0.053 \n",
 468 |        "342956  MAC002543  2012-07-09 13:00:00.0000000         0.053 \n",
 469 |        "342957  MAC002543  2012-07-09 13:30:00.0000000         0.053 "
 470 |       ]
 471 |      },
 472 |      "execution_count": 9,
 473 |      "metadata": {},
 474 |      "output_type": "execute_result"
 475 |     }
 476 |    ],
 477 |    "source": [
 478 |     "block_filename = os.path.join(DATA, 'energy-london', 'halfhourly_dataset', 'block_12.csv')\n",
 479 |     "block_df = pd.read_csv(block_filename)\n",
 480 |     "block_df[342953:].head()"
 481 |    ]
 482 |   },
 483 |   {
 484 |    "cell_type": "code",
 485 |    "execution_count": 10,
 486 |    "id": "4c7b4cb7",
 487 |    "metadata": {},
 488 |    "outputs": [
 489 |     {
 490 |      "name": "stdout",
 491 |      "output_type": "stream",
 492 |      "text": [
 493 |       "(8760, 8)\n"
 494 |      ]
 495 |     },
 496 |     {
 497 |      "data": {
 498 |       "text/html": [
 499 |        "<div>\n",
 500 |        "<style scoped>\n",
 501 |        "    .dataframe tbody tr th:only-of-type {\n",
 502 |        "        vertical-align: middle;\n",
 503 |        "    }\n",
 504 |        "\n",
 505 |        "    .dataframe tbody tr th {\n",
 506 |        "        vertical-align: top;\n",
 507 |        "    }\n",
 508 |        "\n",
 509 |        "    .dataframe thead th {\n",
 510 |        "        text-align: right;\n",
 511 |        "    }\n",
 512 |        "</style>\n",
 513 |        "<table border=\"1\" class=\"dataframe\">\n",
 514 |        "  <thead>\n",
 515 |        "    <tr style=\"text-align: right;\">\n",
 516 |        "      <th></th>\n",
 517 |        "      <th>visibility</th>\n",
 518 |        "      <th>windBearing</th>\n",
 519 |        "      <th>temperature</th>\n",
 520 |        "      <th>dewPoint</th>\n",
 521 |        "      <th>pressure</th>\n",
 522 |        "      <th>apparentTemperature</th>\n",
 523 |        "      <th>windSpeed</th>\n",
 524 |        "      <th>humidity</th>\n",
 525 |        "    </tr>\n",
 526 |        "    <tr>\n",
 527 |        "      <th>time</th>\n",
 528 |        "      <th></th>\n",
 529 |        "      <th></th>\n",
 530 |        "      <th></th>\n",
 531 |        "      <th></th>\n",
 532 |        "      <th></th>\n",
 533 |        "      <th></th>\n",
 534 |        "      <th></th>\n",
 535 |        "      <th></th>\n",
 536 |        "    </tr>\n",
 537 |        "  </thead>\n",
 538 |        "  <tbody>\n",
 539 |        "    <tr>\n",
 540 |        "      <th>2012-07-01 00:00:00</th>\n",
 541 |        "      <td>13.24</td>\n",
 542 |        "      <td>231</td>\n",
 543 |        "      <td>13.44</td>\n",
 544 |        "      <td>10.25</td>\n",
 545 |        "      <td>1011.33</td>\n",
 546 |        "      <td>13.44</td>\n",
 547 |        "      <td>4.14</td>\n",
 548 |        "      <td>0.81</td>\n",
 549 |        "    </tr>\n",
 550 |        "    <tr>\n",
 551 |        "      <th>2012-07-01 01:00:00</th>\n",
 552 |        "      <td>13.37</td>\n",
 553 |        "      <td>232</td>\n",
 554 |        "      <td>13.25</td>\n",
 555 |        "      <td>10.20</td>\n",
 556 |        "      <td>1011.61</td>\n",
 557 |        "      <td>13.25</td>\n",
 558 |        "      <td>4.07</td>\n",
 559 |        "      <td>0.82</td>\n",
 560 |        "    </tr>\n",
 561 |        "    <tr>\n",
 562 |        "      <th>2012-07-01 02:00:00</th>\n",
 563 |        "      <td>13.08</td>\n",
 564 |        "      <td>229</td>\n",
 565 |        "      <td>12.28</td>\n",
 566 |        "      <td>9.62</td>\n",
 567 |        "      <td>1011.81</td>\n",
 568 |        "      <td>12.28</td>\n",
 569 |        "      <td>3.76</td>\n",
 570 |        "      <td>0.84</td>\n",
 571 |        "    </tr>\n",
 572 |        "    <tr>\n",
 573 |        "      <th>2012-07-01 03:00:00</th>\n",
 574 |        "      <td>13.79</td>\n",
 575 |        "      <td>229</td>\n",
 576 |        "      <td>11.77</td>\n",
 577 |        "      <td>9.23</td>\n",
 578 |        "      <td>1011.90</td>\n",
 579 |        "      <td>11.77</td>\n",
 580 |        "      <td>3.75</td>\n",
 581 |        "      <td>0.84</td>\n",
 582 |        "    </tr>\n",
 583 |        "    <tr>\n",
 584 |        "      <th>2012-07-01 04:00:00</th>\n",
 585 |        "      <td>13.68</td>\n",
 586 |        "      <td>223</td>\n",
 587 |        "      <td>11.26</td>\n",
 588 |        "      <td>9.06</td>\n",
 589 |        "      <td>1012.28</td>\n",
 590 |        "      <td>11.26</td>\n",
 591 |        "      <td>3.88</td>\n",
 592 |        "      <td>0.86</td>\n",
 593 |        "    </tr>\n",
 594 |        "    <tr>\n",
 595 |        "      <th>...</th>\n",
 596 |        "      <td>...</td>\n",
 597 |        "      <td>...</td>\n",
 598 |        "      <td>...</td>\n",
 599 |        "      <td>...</td>\n",
 600 |        "      <td>...</td>\n",
 601 |        "      <td>...</td>\n",
 602 |        "      <td>...</td>\n",
 603 |        "      <td>...</td>\n",
 604 |        "    </tr>\n",
 605 |        "    <tr>\n",
 606 |        "      <th>2013-06-30 19:00:00</th>\n",
 607 |        "      <td>15.50</td>\n",
 608 |        "      <td>245</td>\n",
 609 |        "      <td>21.03</td>\n",
 610 |        "      <td>12.39</td>\n",
 611 |        "      <td>1018.59</td>\n",
 612 |        "      <td>21.03</td>\n",
 613 |        "      <td>5.23</td>\n",
 614 |        "      <td>0.58</td>\n",
 615 |        "    </tr>\n",
 616 |        "    <tr>\n",
 617 |        "      <th>2013-06-30 20:00:00</th>\n",
 618 |        "      <td>16.09</td>\n",
 619 |        "      <td>248</td>\n",
 620 |        "      <td>19.67</td>\n",
 621 |        "      <td>12.34</td>\n",
 622 |        "      <td>1018.53</td>\n",
 623 |        "      <td>19.67</td>\n",
 624 |        "      <td>4.30</td>\n",
 625 |        "      <td>0.63</td>\n",
 626 |        "    </tr>\n",
 627 |        "    <tr>\n",
 628 |        "      <th>2013-06-30 21:00:00</th>\n",
 629 |        "      <td>14.31</td>\n",
 630 |        "      <td>254</td>\n",
 631 |        "      <td>18.46</td>\n",
 632 |        "      <td>11.99</td>\n",
 633 |        "      <td>1018.86</td>\n",
 634 |        "      <td>18.46</td>\n",
 635 |        "      <td>4.84</td>\n",
 636 |        "      <td>0.66</td>\n",
 637 |        "    </tr>\n",
 638 |        "    <tr>\n",
 639 |        "      <th>2013-06-30 22:00:00</th>\n",
 640 |        "      <td>13.58</td>\n",
 641 |        "      <td>267</td>\n",
 642 |        "      <td>16.25</td>\n",
 643 |        "      <td>11.40</td>\n",
 644 |        "      <td>1019.10</td>\n",
 645 |        "      <td>16.25</td>\n",
 646 |        "      <td>4.49</td>\n",
 647 |        "      <td>0.73</td>\n",
 648 |        "    </tr>\n",
 649 |        "    <tr>\n",
 650 |        "      <th>2013-06-30 23:00:00</th>\n",
 651 |        "      <td>14.08</td>\n",
 652 |        "      <td>265</td>\n",
 653 |        "      <td>15.10</td>\n",
 654 |        "      <td>10.67</td>\n",
 655 |        "      <td>1018.91</td>\n",
 656 |        "      <td>15.10</td>\n",
 657 |        "      <td>4.34</td>\n",
 658 |        "      <td>0.75</td>\n",
 659 |        "    </tr>\n",
 660 |        "  </tbody>\n",
 661 |        "</table>\n",
 662 |        "<p>8760 rows × 8 columns</p>\n",
 663 |        "</div>"
 664 |       ],
 665 |       "text/plain": [
 666 |        "                     visibility  windBearing  temperature  dewPoint  pressure  \\\n",
 667 |        "time                                                                            \n",
 668 |        "2012-07-01 00:00:00       13.24          231        13.44     10.25   1011.33   \n",
 669 |        "2012-07-01 01:00:00       13.37          232        13.25     10.20   1011.61   \n",
 670 |        "2012-07-01 02:00:00       13.08          229        12.28      9.62   1011.81   \n",
 671 |        "2012-07-01 03:00:00       13.79          229        11.77      9.23   1011.90   \n",
 672 |        "2012-07-01 04:00:00       13.68          223        11.26      9.06   1012.28   \n",
 673 |        "...                         ...          ...          ...       ...       ...   \n",
 674 |        "2013-06-30 19:00:00       15.50          245        21.03     12.39   1018.59   \n",
 675 |        "2013-06-30 20:00:00       16.09          248        19.67     12.34   1018.53   \n",
 676 |        "2013-06-30 21:00:00       14.31          254        18.46     11.99   1018.86   \n",
 677 |        "2013-06-30 22:00:00       13.58          267        16.25     11.40   1019.10   \n",
 678 |        "2013-06-30 23:00:00       14.08          265        15.10     10.67   1018.91   \n",
 679 |        "\n",
 680 |        "                     apparentTemperature  windSpeed  humidity  \n",
 681 |        "time                                                           \n",
 682 |        "2012-07-01 00:00:00                13.44       4.14      0.81  \n",
 683 |        "2012-07-01 01:00:00                13.25       4.07      0.82  \n",
 684 |        "2012-07-01 02:00:00                12.28       3.76      0.84  \n",
 685 |        "2012-07-01 03:00:00                11.77       3.75      0.84  \n",
 686 |        "2012-07-01 04:00:00                11.26       3.88      0.86  \n",
 687 |        "...                                  ...        ...       ...  \n",
 688 |        "2013-06-30 19:00:00                21.03       5.23      0.58  \n",
 689 |        "2013-06-30 20:00:00                19.67       4.30      0.63  \n",
 690 |        "2013-06-30 21:00:00                18.46       4.84      0.66  \n",
 691 |        "2013-06-30 22:00:00                16.25       4.49      0.73  \n",
 692 |        "2013-06-30 23:00:00                15.10       4.34      0.75  \n",
 693 |        "\n",
 694 |        "[8760 rows x 8 columns]"
 695 |       ]
 696 |      },
 697 |      "execution_count": 10,
 698 |      "metadata": {},
 699 |      "output_type": "execute_result"
 700 |     }
 701 |    ],
 702 |    "source": [
 703 |     "weather_filename = os.path.join(DATA, 'energy-london', 'weather_hourly_darksky.csv')\n",
 704 |     "\n",
 705 |     "weather_df = pd.read_csv(weather_filename)\n",
 706 |     "weather_df['time'] = pd.to_datetime(weather_df['time'], format='%Y-%m-%d %H:%M:%S')\n",
 707 |     "weather_df = weather_df.drop(columns=['precipType', 'icon', 'summary'])\n",
 708 |     "weather_df = weather_df.sort_values(by='time')\n",
 709 |     "weather_df = weather_df.set_index('time')\n",
 710 |     "weather_df = weather_df[start:end]\n",
 711 |     "\n",
 712 |     "# Let's make sure we have one datapoint per hour to match \n",
 713 |     "# the frequency used for the household energy consumption data:\n",
 714 |     "weather_df = weather_df.resample(rule='1H').mean()     # This will generate NaN values timestamp missing data\n",
 715 |     "weather_df = weather_df.interpolate(method='linear')   # This will fill the missing values with the average \n",
 716 |     "\n",
 717 |     "print(weather_df.shape)\n",
 718 |     "weather_df"
 719 |    ]
 720 |   },
 721 |   {
 722 |    "cell_type": "code",
 723 |    "execution_count": 11,
 724 |    "id": "b1878483",
 725 |    "metadata": {},
 726 |    "outputs": [
 727 |     {
 728 |      "data": {
 729 |       "text/html": [
 730 |        "<div>\n",
 731 |        "<style scoped>\n",
 732 |        "    .dataframe tbody tr th:only-of-type {\n",
 733 |        "        vertical-align: middle;\n",
 734 |        "    }\n",
 735 |        "\n",
 736 |        "    .dataframe tbody tr th {\n",
 737 |        "        vertical-align: top;\n",
 738 |        "    }\n",
 739 |        "\n",
 740 |        "    .dataframe thead th {\n",
 741 |        "        text-align: right;\n",
 742 |        "    }\n",
 743 |        "</style>\n",
 744 |        "<table border=\"1\" class=\"dataframe\">\n",
 745 |        "  <thead>\n",
 746 |        "    <tr style=\"text-align: right;\">\n",
 747 |        "      <th></th>\n",
 748 |        "      <th>MAIN CATEGORIES</th>\n",
 749 |        "      <th>CATEGORIES</th>\n",
 750 |        "      <th>REFERENCE</th>\n",
 751 |        "      <th>ACORN-A</th>\n",
 752 |        "      <th>ACORN-B</th>\n",
 753 |        "      <th>ACORN-E</th>\n",
 754 |        "    </tr>\n",
 755 |        "  </thead>\n",
 756 |        "  <tbody>\n",
 757 |        "    <tr>\n",
 758 |        "      <th>696</th>\n",
 759 |        "      <td>CONTACT</td>\n",
 760 |        "      <td>Preferred Channel</td>\n",
 761 |        "      <td>Email</td>\n",
 762 |        "      <td>137.000000</td>\n",
 763 |        "      <td>159.000000</td>\n",
 764 |        "      <td>73.000000</td>\n",
 765 |        "    </tr>\n",
 766 |        "    <tr>\n",
 767 |        "      <th>545</th>\n",
 768 |        "      <td>DIGITAL</td>\n",
 769 |        "      <td>Purchased on the internet</td>\n",
 770 |        "      <td>Car Insurance</td>\n",
 771 |        "      <td>123.000000</td>\n",
 772 |        "      <td>127.000000</td>\n",
 773 |        "      <td>112.000000</td>\n",
 774 |        "    </tr>\n",
 775 |        "    <tr>\n",
 776 |        "      <th>271</th>\n",
 777 |        "      <td>FINANCE</td>\n",
 778 |        "      <td>Expenditure per person per week</td>\n",
 779 |        "      <td>Furnishings, household equipment and routine m...</td>\n",
 780 |        "      <td>144.000000</td>\n",
 781 |        "      <td>132.000000</td>\n",
 782 |        "      <td>102.000000</td>\n",
 783 |        "    </tr>\n",
 784 |        "    <tr>\n",
 785 |        "      <th>667</th>\n",
 786 |        "      <td>SHOPPING</td>\n",
 787 |        "      <td>Furniture &amp; Fittings Stores</td>\n",
 788 |        "      <td>Mass Market</td>\n",
 789 |        "      <td>104.000000</td>\n",
 790 |        "      <td>123.000000</td>\n",
 791 |        "      <td>116.000000</td>\n",
 792 |        "    </tr>\n",
 793 |        "    <tr>\n",
 794 |        "      <th>673</th>\n",
 795 |        "      <td>SHOPPING</td>\n",
 796 |        "      <td>High Street Retailers</td>\n",
 797 |        "      <td>Costa</td>\n",
 798 |        "      <td>107.617424</td>\n",
 799 |        "      <td>126.495528</td>\n",
 800 |        "      <td>119.279432</td>\n",
 801 |        "    </tr>\n",
 802 |        "    <tr>\n",
 803 |        "      <th>794</th>\n",
 804 |        "      <td>LEISURE TIME</td>\n",
 805 |        "      <td>Interests &amp; Hobbies</td>\n",
 806 |        "      <td>Reading Books</td>\n",
 807 |        "      <td>107.000000</td>\n",
 808 |        "      <td>107.000000</td>\n",
 809 |        "      <td>68.000000</td>\n",
 810 |        "    </tr>\n",
 811 |        "    <tr>\n",
 812 |        "      <th>269</th>\n",
 813 |        "      <td>FINANCE</td>\n",
 814 |        "      <td>Expenditure per person per week</td>\n",
 815 |        "      <td>Clothing and footwear</td>\n",
 816 |        "      <td>134.000000</td>\n",
 817 |        "      <td>127.000000</td>\n",
 818 |        "      <td>108.000000</td>\n",
 819 |        "    </tr>\n",
 820 |        "    <tr>\n",
 821 |        "      <th>289</th>\n",
 822 |        "      <td>DIGITAL</td>\n",
 823 |        "      <td>Digital Attitudes</td>\n",
 824 |        "      <td>I worry that any personal information entered ...</td>\n",
 825 |        "      <td>105.000000</td>\n",
 826 |        "      <td>105.000000</td>\n",
 827 |        "      <td>100.000000</td>\n",
 828 |        "    </tr>\n",
 829 |        "    <tr>\n",
 830 |        "      <th>448</th>\n",
 831 |        "      <td>DIGITAL</td>\n",
 832 |        "      <td>Types of internet usage : Tablet / iPad</td>\n",
 833 |        "      <td>Download content/information from an advertisi...</td>\n",
 834 |        "      <td>99.000000</td>\n",
 835 |        "      <td>107.000000</td>\n",
 836 |        "      <td>133.000000</td>\n",
 837 |        "    </tr>\n",
 838 |        "    <tr>\n",
 839 |        "      <th>733</th>\n",
 840 |        "      <td>COMMUNITY SAFETY</td>\n",
 841 |        "      <td>Crime Survey for England</td>\n",
 842 |        "      <td>Taking everything into account I have confiden...</td>\n",
 843 |        "      <td>105.000000</td>\n",
 844 |        "      <td>105.000000</td>\n",
 845 |        "      <td>104.000000</td>\n",
 846 |        "    </tr>\n",
 847 |        "  </tbody>\n",
 848 |        "</table>\n",
 849 |        "</div>"
 850 |       ],
 851 |       "text/plain": [
 852 |        "      MAIN CATEGORIES                               CATEGORIES  \\\n",
 853 |        "696           CONTACT                        Preferred Channel   \n",
 854 |        "545           DIGITAL                Purchased on the internet   \n",
 855 |        "271           FINANCE          Expenditure per person per week   \n",
 856 |        "667          SHOPPING              Furniture & Fittings Stores   \n",
 857 |        "673          SHOPPING                    High Street Retailers   \n",
 858 |        "794      LEISURE TIME                      Interests & Hobbies   \n",
 859 |        "269           FINANCE          Expenditure per person per week   \n",
 860 |        "289           DIGITAL                        Digital Attitudes   \n",
 861 |        "448           DIGITAL  Types of internet usage : Tablet / iPad   \n",
 862 |        "733  COMMUNITY SAFETY                 Crime Survey for England   \n",
 863 |        "\n",
 864 |        "                                             REFERENCE     ACORN-A  \\\n",
 865 |        "696                                              Email  137.000000   \n",
 866 |        "545                                      Car Insurance  123.000000   \n",
 867 |        "271  Furnishings, household equipment and routine m...  144.000000   \n",
 868 |        "667                                        Mass Market  104.000000   \n",
 869 |        "673                                              Costa  107.617424   \n",
 870 |        "794                                      Reading Books  107.000000   \n",
 871 |        "269                              Clothing and footwear  134.000000   \n",
 872 |        "289  I worry that any personal information entered ...  105.000000   \n",
 873 |        "448  Download content/information from an advertisi...   99.000000   \n",
 874 |        "733  Taking everything into account I have confiden...  105.000000   \n",
 875 |        "\n",
 876 |        "        ACORN-B     ACORN-E  \n",
 877 |        "696  159.000000   73.000000  \n",
 878 |        "545  127.000000  112.000000  \n",
 879 |        "271  132.000000  102.000000  \n",
 880 |        "667  123.000000  116.000000  \n",
 881 |        "673  126.495528  119.279432  \n",
 882 |        "794  107.000000   68.000000  \n",
 883 |        "269  127.000000  108.000000  \n",
 884 |        "289  105.000000  100.000000  \n",
 885 |        "448  107.000000  133.000000  \n",
 886 |        "733  105.000000  104.000000  "
 887 |       ]
 888 |      },
 889 |      "execution_count": 11,
 890 |      "metadata": {},
 891 |      "output_type": "execute_result"
 892 |     }
 893 |    ],
 894 |    "source": [
 895 |     "acorn_filename = os.path.join(DATA, 'energy-london', 'acorn_details.csv')\n",
 896 |     "acorn_df = pd.read_csv(acorn_filename, encoding='ISO-8859-1')\n",
 897 |     "acorn_sample = acorn_df.sample(10).loc[:, ['MAIN CATEGORIES', 'CATEGORIES', 'REFERENCE', 'ACORN-A', 'ACORN-B', 'ACORN-E']]\n",
 898 |     "acorn_sample"
 899 |    ]
 900 |   },
 901 |   {
 902 |    "cell_type": "markdown",
 903 |    "id": "e2c11f90",
 904 |    "metadata": {},
 905 |    "source": [
 906 |     "## Datasets preparation\n",
 907 |     "---\n",
 908 |     "### Target time series dataset\n",
 909 |     "Our `energy_df` dataframe already has the right information, we just need to give them a name consistant with the schema expected by Amazon Forecas (`timestamp`, `target_value` and `item_id`): in addition, we are going to keep it at an hourly level as this will be consistent with the resolution that the weather data comes with:"
 910 |    ]
 911 |   },
 912 |   {
 913 |    "cell_type": "code",
 914 |    "execution_count": 12,
 915 |    "id": "4e8b4aee",
 916 |    "metadata": {},
 917 |    "outputs": [
 918 |     {
 919 |      "name": "stdout",
 920 |      "output_type": "stream",
 921 |      "text": [
 922 |       "Reading existing file\n",
 923 |       "CPU times: user 729 ms, sys: 15.5 ms, total: 745 ms\n",
 924 |       "Wall time: 781 ms\n"
 925 |      ]
 926 |     }
 927 |    ],
 928 |    "source": [
 929 |     "%%time\n",
 930 |     "\n",
 931 |     "hourly_consumption_file = os.path.join('..', 'Dataset', 'target_time_series.csv')\n",
 932 |     "if os.path.exists(hourly_consumption_file):\n",
 933 |     "    print('Reading existing file')\n",
 934 |     "    energy_df = pd.read_csv(hourly_consumption_file)\n",
 935 |     "    energy_df = energy_df.set_index(['item_id', 'timestamp'])\n",
 936 |     "    \n",
 937 |     "else:\n",
 938 |     "    print('Generating new target time series file')\n",
 939 |     "    os.makedirs(os.path.join('..', 'Dataset'), exist_ok=True)\n",
 940 |     "    energy_df.columns = ['item_id', 'timestamp', 'target_value']\n",
 941 |     "    energy_df = energy_df.groupby(by='item_id').resample(rule='24H', on='timestamp').sum()\n",
 942 |     "    energy_df.to_csv(hourly_consumption_file)\n",
 943 |     "    print(f'{os.stat(hourly_consumption_file).st_size/(1024*1024):.04} MB')"
 944 |    ]
 945 |   },
 946 |   {
 947 |    "cell_type": "code",
 948 |    "execution_count": 14,
 949 |    "id": "fe268f5c",
 950 |    "metadata": {},
 951 |    "outputs": [
 952 |     {
 953 |      "data": {
 954 |       "text/html": [
 955 |        "<div>\n",
 956 |        "<style scoped>\n",
 957 |        "    .dataframe tbody tr th:only-of-type {\n",
 958 |        "        vertical-align: middle;\n",
 959 |        "    }\n",
 960 |        "\n",
 961 |        "    .dataframe tbody tr th {\n",
 962 |        "        vertical-align: top;\n",
 963 |        "    }\n",
 964 |        "\n",
 965 |        "    .dataframe thead th {\n",
 966 |        "        text-align: right;\n",
 967 |        "    }\n",
 968 |        "</style>\n",
 969 |        "<table border=\"1\" class=\"dataframe\">\n",
 970 |        "  <thead>\n",
 971 |        "    <tr style=\"text-align: right;\">\n",
 972 |        "      <th></th>\n",
 973 |        "      <th></th>\n",
 974 |        "      <th>target_value</th>\n",
 975 |        "    </tr>\n",
 976 |        "    <tr>\n",
 977 |        "      <th>item_id</th>\n",
 978 |        "      <th>timestamp</th>\n",
 979 |        "      <th></th>\n",
 980 |        "    </tr>\n",
 981 |        "  </thead>\n",
 982 |        "  <tbody>\n",
 983 |        "    <tr>\n",
 984 |        "      <th rowspan=\"5\" valign=\"top\">MAC000003</th>\n",
 985 |        "      <th>2012-07-01</th>\n",
 986 |        "      <td>12.359</td>\n",
 987 |        "    </tr>\n",
 988 |        "    <tr>\n",
 989 |        "      <th>2012-07-02</th>\n",
 990 |        "      <td>12.775</td>\n",
 991 |        "    </tr>\n",
 992 |        "    <tr>\n",
 993 |        "      <th>2012-07-03</th>\n",
 994 |        "      <td>11.584</td>\n",
 995 |        "    </tr>\n",
 996 |        "    <tr>\n",
 997 |        "      <th>2012-07-04</th>\n",
 998 |        "      <td>12.700</td>\n",
 999 |        "    </tr>\n",
1000 |        "    <tr>\n",
1001 |        "      <th>2012-07-05</th>\n",
1002 |        "      <td>13.724</td>\n",
1003 |        "    </tr>\n",
1004 |        "    <tr>\n",
1005 |        "      <th>...</th>\n",
1006 |        "      <th>...</th>\n",
1007 |        "      <td>...</td>\n",
1008 |        "    </tr>\n",
1009 |        "    <tr>\n",
1010 |        "      <th rowspan=\"5\" valign=\"top\">MAC005562</th>\n",
1011 |        "      <th>2013-06-26</th>\n",
1012 |        "      <td>7.466</td>\n",
1013 |        "    </tr>\n",
1014 |        "    <tr>\n",
1015 |        "      <th>2013-06-27</th>\n",
1016 |        "      <td>10.738</td>\n",
1017 |        "    </tr>\n",
1018 |        "    <tr>\n",
1019 |        "      <th>2013-06-28</th>\n",
1020 |        "      <td>11.128</td>\n",
1021 |        "    </tr>\n",
1022 |        "    <tr>\n",
1023 |        "      <th>2013-06-29</th>\n",
1024 |        "      <td>9.363</td>\n",
1025 |        "    </tr>\n",
1026 |        "    <tr>\n",
1027 |        "      <th>2013-06-30</th>\n",
1028 |        "      <td>9.491</td>\n",
1029 |        "    </tr>\n",
1030 |        "  </tbody>\n",
1031 |        "</table>\n",
1032 |        "<p>1301437 rows × 1 columns</p>\n",
1033 |        "</div>"
1034 |       ],
1035 |       "text/plain": [
1036 |        "                      target_value\n",
1037 |        "item_id   timestamp               \n",
1038 |        "MAC000003 2012-07-01        12.359\n",
1039 |        "          2012-07-02        12.775\n",
1040 |        "          2012-07-03        11.584\n",
1041 |        "          2012-07-04        12.700\n",
1042 |        "          2012-07-05        13.724\n",
1043 |        "...                            ...\n",
1044 |        "MAC005562 2013-06-26         7.466\n",
1045 |        "          2013-06-27        10.738\n",
1046 |        "          2013-06-28        11.128\n",
1047 |        "          2013-06-29         9.363\n",
1048 |        "          2013-06-30         9.491\n",
1049 |        "\n",
1050 |        "[1301437 rows x 1 columns]"
1051 |       ]
1052 |      },
1053 |      "execution_count": 14,
1054 |      "metadata": {},
1055 |      "output_type": "execute_result"
1056 |     }
1057 |    ],
1058 |    "source": [
1059 |     "energy_df"
1060 |    ]
1061 |   },
1062 |   {
1063 |    "cell_type": "markdown",
1064 |    "id": "0546c697",
1065 |    "metadata": {},
1066 |    "source": [
1067 |     "### Related time series dataset"
1068 |    ]
1069 |   },
1070 |   {
1071 |    "cell_type": "code",
1072 |    "execution_count": 15,
1073 |    "id": "d256603b",
1074 |    "metadata": {},
1075 |    "outputs": [
1076 |     {
1077 |      "data": {
1078 |       "text/html": [
1079 |        "<div>\n",
1080 |        "<style scoped>\n",
1081 |        "    .dataframe tbody tr th:only-of-type {\n",
1082 |        "        vertical-align: middle;\n",
1083 |        "    }\n",
1084 |        "\n",
1085 |        "    .dataframe tbody tr th {\n",
1086 |        "        vertical-align: top;\n",
1087 |        "    }\n",
1088 |        "\n",
1089 |        "    .dataframe thead th {\n",
1090 |        "        text-align: right;\n",
1091 |        "    }\n",
1092 |        "</style>\n",
1093 |        "<table border=\"1\" class=\"dataframe\">\n",
1094 |        "  <thead>\n",
1095 |        "    <tr style=\"text-align: right;\">\n",
1096 |        "      <th></th>\n",
1097 |        "      <th>temperature</th>\n",
1098 |        "      <th>wind_speed</th>\n",
1099 |        "      <th>humidity</th>\n",
1100 |        "    </tr>\n",
1101 |        "    <tr>\n",
1102 |        "      <th>timestamp</th>\n",
1103 |        "      <th></th>\n",
1104 |        "      <th></th>\n",
1105 |        "      <th></th>\n",
1106 |        "    </tr>\n",
1107 |        "  </thead>\n",
1108 |        "  <tbody>\n",
1109 |        "    <tr>\n",
1110 |        "      <th>2012-07-01</th>\n",
1111 |        "      <td>14.862500</td>\n",
1112 |        "      <td>5.138750</td>\n",
1113 |        "      <td>0.703333</td>\n",
1114 |        "    </tr>\n",
1115 |        "    <tr>\n",
1116 |        "      <th>2012-07-02</th>\n",
1117 |        "      <td>15.229583</td>\n",
1118 |        "      <td>4.557500</td>\n",
1119 |        "      <td>0.832917</td>\n",
1120 |        "    </tr>\n",
1121 |        "    <tr>\n",
1122 |        "      <th>2012-07-03</th>\n",
1123 |        "      <td>16.927917</td>\n",
1124 |        "      <td>3.710833</td>\n",
1125 |        "      <td>0.895833</td>\n",
1126 |        "    </tr>\n",
1127 |        "    <tr>\n",
1128 |        "      <th>2012-07-04</th>\n",
1129 |        "      <td>18.426250</td>\n",
1130 |        "      <td>3.368750</td>\n",
1131 |        "      <td>0.853333</td>\n",
1132 |        "    </tr>\n",
1133 |        "    <tr>\n",
1134 |        "      <th>2012-07-05</th>\n",
1135 |        "      <td>18.496667</td>\n",
1136 |        "      <td>2.000833</td>\n",
1137 |        "      <td>0.707083</td>\n",
1138 |        "    </tr>\n",
1139 |        "    <tr>\n",
1140 |        "      <th>...</th>\n",
1141 |        "      <td>...</td>\n",
1142 |        "      <td>...</td>\n",
1143 |        "      <td>...</td>\n",
1144 |        "    </tr>\n",
1145 |        "    <tr>\n",
1146 |        "      <th>2013-06-26</th>\n",
1147 |        "      <td>16.252083</td>\n",
1148 |        "      <td>2.269583</td>\n",
1149 |        "      <td>0.632083</td>\n",
1150 |        "    </tr>\n",
1151 |        "    <tr>\n",
1152 |        "      <th>2013-06-27</th>\n",
1153 |        "      <td>15.173750</td>\n",
1154 |        "      <td>2.925833</td>\n",
1155 |        "      <td>0.700417</td>\n",
1156 |        "    </tr>\n",
1157 |        "    <tr>\n",
1158 |        "      <th>2013-06-28</th>\n",
1159 |        "      <td>16.288750</td>\n",
1160 |        "      <td>3.616667</td>\n",
1161 |        "      <td>0.867917</td>\n",
1162 |        "    </tr>\n",
1163 |        "    <tr>\n",
1164 |        "      <th>2013-06-29</th>\n",
1165 |        "      <td>17.214583</td>\n",
1166 |        "      <td>3.434583</td>\n",
1167 |        "      <td>0.603333</td>\n",
1168 |        "    </tr>\n",
1169 |        "    <tr>\n",
1170 |        "      <th>2013-06-30</th>\n",
1171 |        "      <td>19.741250</td>\n",
1172 |        "      <td>4.401667</td>\n",
1173 |        "      <td>0.676667</td>\n",
1174 |        "    </tr>\n",
1175 |        "  </tbody>\n",
1176 |        "</table>\n",
1177 |        "<p>365 rows × 3 columns</p>\n",
1178 |        "</div>"
1179 |       ],
1180 |       "text/plain": [
1181 |        "            temperature  wind_speed  humidity\n",
1182 |        "timestamp                                    \n",
1183 |        "2012-07-01    14.862500    5.138750  0.703333\n",
1184 |        "2012-07-02    15.229583    4.557500  0.832917\n",
1185 |        "2012-07-03    16.927917    3.710833  0.895833\n",
1186 |        "2012-07-04    18.426250    3.368750  0.853333\n",
1187 |        "2012-07-05    18.496667    2.000833  0.707083\n",
1188 |        "...                 ...         ...       ...\n",
1189 |        "2013-06-26    16.252083    2.269583  0.632083\n",
1190 |        "2013-06-27    15.173750    2.925833  0.700417\n",
1191 |        "2013-06-28    16.288750    3.616667  0.867917\n",
1192 |        "2013-06-29    17.214583    3.434583  0.603333\n",
1193 |        "2013-06-30    19.741250    4.401667  0.676667\n",
1194 |        "\n",
1195 |        "[365 rows x 3 columns]"
1196 |       ]
1197 |      },
1198 |      "execution_count": 15,
1199 |      "metadata": {},
1200 |      "output_type": "execute_result"
1201 |     }
1202 |    ],
1203 |    "source": [
1204 |     "weather_df = weather_df[['temperature', 'windSpeed', 'humidity']].reset_index()\n",
1205 |     "weather_df.columns = ['timestamp', 'temperature', 'wind_speed', 'humidity']\n",
1206 |     "weather_df['timestamp'] = pd.to_datetime(weather_df['timestamp'])\n",
1207 |     "weather_df = weather_df.set_index(['timestamp'])\n",
1208 |     "weather_df = weather_df.resample('24H').mean()\n",
1209 |     "weather_df"
1210 |    ]
1211 |   },
1212 |   {
1213 |    "cell_type": "markdown",
1214 |    "id": "b30b4399",
1215 |    "metadata": {},
1216 |    "source": [
1217 |     "The related time series dataset must conform to the following schema:\n",
1218 |     "\n",
1219 |     "```json\n",
1220 |     "{\n",
1221 |     "    'item_id': string,\n",
1222 |     "    'timestamp': timestamp,\n",
1223 |     "    'dimension_1': ...,\n",
1224 |     "    ...\n",
1225 |     "    'dimension_10': ...,\n",
1226 |     "    'related_field_1': double,\n",
1227 |     "    'related_field_2': double,\n",
1228 |     "    ...\n",
1229 |     "    'related_field_13': double\n",
1230 |     "}\n",
1231 |     "```\n",
1232 |     "\n",
1233 |     "You will note that each `item_id` must have its own related timeseries. For instance, temperature for every timestamp must be provided for household `MAC002543` and another set of temperature must be provided by household `MAC002556`. In a real situation, these temperatures might be slightly different as each household may be associated to a different weather station depending on their location. In this tutorial, we will duplicate the same data and simplify the problem by considering that only one weather station covers the whole London metropolitan area."
1234 |    ]
1235 |   },
1236 |   {
1237 |    "cell_type": "code",
1238 |    "execution_count": 16,
1239 |    "id": "e4fc98c6",
1240 |    "metadata": {},
1241 |    "outputs": [
1242 |     {
1243 |      "data": {
1244 |       "text/plain": [
1245 |        "3573"
1246 |       ]
1247 |      },
1248 |      "execution_count": 16,
1249 |      "metadata": {},
1250 |      "output_type": "execute_result"
1251 |     }
1252 |    ],
1253 |    "source": [
1254 |     "household_ids = list(energy_df.index.get_level_values('item_id').unique())\n",
1255 |     "len(household_ids)"
1256 |    ]
1257 |   },
1258 |   {
1259 |    "cell_type": "code",
1260 |    "execution_count": 17,
1261 |    "id": "a8838d64",
1262 |    "metadata": {},
1263 |    "outputs": [
1264 |     {
1265 |      "name": "stderr",
1266 |      "output_type": "stream",
1267 |      "text": [
1268 |       "100%|██████████| 3573/3573 [00:05<00:00, 649.25it/s]\n"
1269 |      ]
1270 |     }
1271 |    ],
1272 |    "source": [
1273 |     "df_list = []\n",
1274 |     "for hhid in tqdm(household_ids):\n",
1275 |     "    current_df = weather_df.reset_index().copy()\n",
1276 |     "    current_df['item_id'] = hhid\n",
1277 |     "    current_df = current_df[['item_id', 'timestamp', 'temperature', 'wind_speed', 'humidity']]\n",
1278 |     "    df_list.append(current_df)\n",
1279 |     "    del current_df"
1280 |    ]
1281 |   },
1282 |   {
1283 |    "cell_type": "code",
1284 |    "execution_count": 18,
1285 |    "id": "3ebd43ae",
1286 |    "metadata": {},
1287 |    "outputs": [
1288 |     {
1289 |      "data": {
1290 |       "text/plain": [
1291 |        "(1304145, 5)"
1292 |       ]
1293 |      },
1294 |      "execution_count": 18,
1295 |      "metadata": {},
1296 |      "output_type": "execute_result"
1297 |     }
1298 |    ],
1299 |    "source": [
1300 |     "related_time_series = pd.concat(df_list).reset_index(drop=True)\n",
1301 |     "related_time_series.shape"
1302 |    ]
1303 |   },
1304 |   {
1305 |    "cell_type": "code",
1306 |    "execution_count": 19,
1307 |    "id": "d39df1fe",
1308 |    "metadata": {},
1309 |    "outputs": [
1310 |     {
1311 |      "data": {
1312 |       "text/html": [
1313 |        "<table style=\"display:inline\" border=\"1\" class=\"dataframe\">\n",
1314 |        "  <thead>\n",
1315 |        "    <tr style=\"text-align: right;\">\n",
1316 |        "      <th></th>\n",
1317 |        "      <th>item_id</th>\n",
1318 |        "      <th>timestamp</th>\n",
1319 |        "      <th>temperature</th>\n",
1320 |        "      <th>wind_speed</th>\n",
1321 |        "      <th>humidity</th>\n",
1322 |        "    </tr>\n",
1323 |        "  </thead>\n",
1324 |        "  <tbody>\n",
1325 |        "    <tr>\n",
1326 |        "      <th>0</th>\n",
1327 |        "      <td>MAC000003</td>\n",
1328 |        "      <td>2012-07-01</td>\n",
1329 |        "      <td>14.862500</td>\n",
1330 |        "      <td>5.138750</td>\n",
1331 |        "      <td>0.703333</td>\n",
1332 |        "    </tr>\n",
1333 |        "    <tr>\n",
1334 |        "      <th>1</th>\n",
1335 |        "      <td>MAC000003</td>\n",
1336 |        "      <td>2012-07-02</td>\n",
1337 |        "      <td>15.229583</td>\n",
1338 |        "      <td>4.557500</td>\n",
1339 |        "      <td>0.832917</td>\n",
1340 |        "    </tr>\n",
1341 |        "    <tr>\n",
1342 |        "      <th>2</th>\n",
1343 |        "      <td>MAC000003</td>\n",
1344 |        "      <td>2012-07-03</td>\n",
1345 |        "      <td>16.927917</td>\n",
1346 |        "      <td>3.710833</td>\n",
1347 |        "      <td>0.895833</td>\n",
1348 |        "    </tr>\n",
1349 |        "    <tr>\n",
1350 |        "      <th>3</th>\n",
1351 |        "      <td>MAC000003</td>\n",
1352 |        "      <td>2012-07-04</td>\n",
1353 |        "      <td>18.426250</td>\n",
1354 |        "      <td>3.368750</td>\n",
1355 |        "      <td>0.853333</td>\n",
1356 |        "    </tr>\n",
1357 |        "    <tr>\n",
1358 |        "      <th>4</th>\n",
1359 |        "      <td>MAC000003</td>\n",
1360 |        "      <td>2012-07-05</td>\n",
1361 |        "      <td>18.496667</td>\n",
1362 |        "      <td>2.000833</td>\n",
1363 |        "      <td>0.707083</td>\n",
1364 |        "    </tr>\n",
1365 |        "    <tr>\n",
1366 |        "      <th>...</th>\n",
1367 |        "      <td>...</td>\n",
1368 |        "      <td>...</td>\n",
1369 |        "      <td>...</td>\n",
1370 |        "      <td>...</td>\n",
1371 |        "      <td>...</td>\n",
1372 |        "    </tr>\n",
1373 |        "    <tr>\n",
1374 |        "      <th>360</th>\n",
1375 |        "      <td>MAC000003</td>\n",
1376 |        "      <td>2013-06-26</td>\n",
1377 |        "      <td>16.252083</td>\n",
1378 |        "      <td>2.269583</td>\n",
1379 |        "      <td>0.632083</td>\n",
1380 |        "    </tr>\n",
1381 |        "    <tr>\n",
1382 |        "      <th>361</th>\n",
1383 |        "      <td>MAC000003</td>\n",
1384 |        "      <td>2013-06-27</td>\n",
1385 |        "      <td>15.173750</td>\n",
1386 |        "      <td>2.925833</td>\n",
1387 |        "      <td>0.700417</td>\n",
1388 |        "    </tr>\n",
1389 |        "    <tr>\n",
1390 |        "      <th>362</th>\n",
1391 |        "      <td>MAC000003</td>\n",
1392 |        "      <td>2013-06-28</td>\n",
1393 |        "      <td>16.288750</td>\n",
1394 |        "      <td>3.616667</td>\n",
1395 |        "      <td>0.867917</td>\n",
1396 |        "    </tr>\n",
1397 |        "    <tr>\n",
1398 |        "      <th>363</th>\n",
1399 |        "      <td>MAC000003</td>\n",
1400 |        "      <td>2013-06-29</td>\n",
1401 |        "      <td>17.214583</td>\n",
1402 |        "      <td>3.434583</td>\n",
1403 |        "      <td>0.603333</td>\n",
1404 |        "    </tr>\n",
1405 |        "    <tr>\n",
1406 |        "      <th>364</th>\n",
1407 |        "      <td>MAC000003</td>\n",
1408 |        "      <td>2013-06-30</td>\n",
1409 |        "      <td>19.741250</td>\n",
1410 |        "      <td>4.401667</td>\n",
1411 |        "      <td>0.676667</td>\n",
1412 |        "    </tr>\n",
1413 |        "  </tbody>\n",
1414 |        "</table style=\"display:inline\"><table style=\"display:inline\" border=\"1\" class=\"dataframe\">\n",
1415 |        "  <thead>\n",
1416 |        "    <tr style=\"text-align: right;\">\n",
1417 |        "      <th></th>\n",
1418 |        "      <th>item_id</th>\n",
1419 |        "      <th>temperature</th>\n",
1420 |        "      <th>wind_speed</th>\n",
1421 |        "      <th>humidity</th>\n",
1422 |        "    </tr>\n",
1423 |        "  </thead>\n",
1424 |        "  <tbody>\n",
1425 |        "    <tr>\n",
1426 |        "      <th>1303780</th>\n",
1427 |        "      <td>MAC005562</td>\n",
1428 |        "      <td>14.862500</td>\n",
1429 |        "      <td>5.138750</td>\n",
1430 |        "      <td>0.703333</td>\n",
1431 |        "    </tr>\n",
1432 |        "    <tr>\n",
1433 |        "      <th>1303781</th>\n",
1434 |        "      <td>MAC005562</td>\n",
1435 |        "      <td>15.229583</td>\n",
1436 |        "      <td>4.557500</td>\n",
1437 |        "      <td>0.832917</td>\n",
1438 |        "    </tr>\n",
1439 |        "    <tr>\n",
1440 |        "      <th>1303782</th>\n",
1441 |        "      <td>MAC005562</td>\n",
1442 |        "      <td>16.927917</td>\n",
1443 |        "      <td>3.710833</td>\n",
1444 |        "      <td>0.895833</td>\n",
1445 |        "    </tr>\n",
1446 |        "    <tr>\n",
1447 |        "      <th>1303783</th>\n",
1448 |        "      <td>MAC005562</td>\n",
1449 |        "      <td>18.426250</td>\n",
1450 |        "      <td>3.368750</td>\n",
1451 |        "      <td>0.853333</td>\n",
1452 |        "    </tr>\n",
1453 |        "    <tr>\n",
1454 |        "      <th>1303784</th>\n",
1455 |        "      <td>MAC005562</td>\n",
1456 |        "      <td>18.496667</td>\n",
1457 |        "      <td>2.000833</td>\n",
1458 |        "      <td>0.707083</td>\n",
1459 |        "    </tr>\n",
1460 |        "    <tr>\n",
1461 |        "      <th>...</th>\n",
1462 |        "      <td>...</td>\n",
1463 |        "      <td>...</td>\n",
1464 |        "      <td>...</td>\n",
1465 |        "      <td>...</td>\n",
1466 |        "    </tr>\n",
1467 |        "    <tr>\n",
1468 |        "      <th>1304140</th>\n",
1469 |        "      <td>MAC005562</td>\n",
1470 |        "      <td>16.252083</td>\n",
1471 |        "      <td>2.269583</td>\n",
1472 |        "      <td>0.632083</td>\n",
1473 |        "    </tr>\n",
1474 |        "    <tr>\n",
1475 |        "      <th>1304141</th>\n",
1476 |        "      <td>MAC005562</td>\n",
1477 |        "      <td>15.173750</td>\n",
1478 |        "      <td>2.925833</td>\n",
1479 |        "      <td>0.700417</td>\n",
1480 |        "    </tr>\n",
1481 |        "    <tr>\n",
1482 |        "      <th>1304142</th>\n",
1483 |        "      <td>MAC005562</td>\n",
1484 |        "      <td>16.288750</td>\n",
1485 |        "      <td>3.616667</td>\n",
1486 |        "      <td>0.867917</td>\n",
1487 |        "    </tr>\n",
1488 |        "    <tr>\n",
1489 |        "      <th>1304143</th>\n",
1490 |        "      <td>MAC005562</td>\n",
1491 |        "      <td>17.214583</td>\n",
1492 |        "      <td>3.434583</td>\n",
1493 |        "      <td>0.603333</td>\n",
1494 |        "    </tr>\n",
1495 |        "    <tr>\n",
1496 |        "      <th>1304144</th>\n",
1497 |        "      <td>MAC005562</td>\n",
1498 |        "      <td>19.741250</td>\n",
1499 |        "      <td>4.401667</td>\n",
1500 |        "      <td>0.676667</td>\n",
1501 |        "    </tr>\n",
1502 |        "  </tbody>\n",
1503 |        "</table style=\"display:inline\">"
1504 |       ]
1505 |      },
1506 |      "metadata": {},
1507 |      "output_type": "display_data"
1508 |     }
1509 |    ],
1510 |    "source": [
1511 |     "display_multiple_dataframe(\n",
1512 |     "    related_time_series[related_time_series['item_id'] == 'MAC000003'],\n",
1513 |     "    related_time_series[related_time_series['item_id'] == 'MAC005562'][['item_id', 'temperature', 'wind_speed', 'humidity']],\n",
1514 |     "    max_rows=10, max_cols=None\n",
1515 |     ")"
1516 |    ]
1517 |   },
1518 |   {
1519 |    "cell_type": "code",
1520 |    "execution_count": 20,
1521 |    "id": "8ccebfd9",
1522 |    "metadata": {},
1523 |    "outputs": [
1524 |     {
1525 |      "data": {
1526 |       "text/html": [
1527 |        "<div>\n",
1528 |        "<style scoped>\n",
1529 |        "    .dataframe tbody tr th:only-of-type {\n",
1530 |        "        vertical-align: middle;\n",
1531 |        "    }\n",
1532 |        "\n",
1533 |        "    .dataframe tbody tr th {\n",
1534 |        "        vertical-align: top;\n",
1535 |        "    }\n",
1536 |        "\n",
1537 |        "    .dataframe thead th {\n",
1538 |        "        text-align: right;\n",
1539 |        "    }\n",
1540 |        "</style>\n",
1541 |        "<table border=\"1\" class=\"dataframe\">\n",
1542 |        "  <thead>\n",
1543 |        "    <tr style=\"text-align: right;\">\n",
1544 |        "      <th></th>\n",
1545 |        "      <th>item_id</th>\n",
1546 |        "      <th>timestamp</th>\n",
1547 |        "      <th>temperature</th>\n",
1548 |        "      <th>wind_speed</th>\n",
1549 |        "      <th>humidity</th>\n",
1550 |        "    </tr>\n",
1551 |        "  </thead>\n",
1552 |        "  <tbody>\n",
1553 |        "    <tr>\n",
1554 |        "      <th>0</th>\n",
1555 |        "      <td>MAC000003</td>\n",
1556 |        "      <td>2012-07-01</td>\n",
1557 |        "      <td>14.862500</td>\n",
1558 |        "      <td>5.138750</td>\n",
1559 |        "      <td>0.703333</td>\n",
1560 |        "    </tr>\n",
1561 |        "    <tr>\n",
1562 |        "      <th>1</th>\n",
1563 |        "      <td>MAC000003</td>\n",
1564 |        "      <td>2012-07-02</td>\n",
1565 |        "      <td>15.229583</td>\n",
1566 |        "      <td>4.557500</td>\n",
1567 |        "      <td>0.832917</td>\n",
1568 |        "    </tr>\n",
1569 |        "    <tr>\n",
1570 |        "      <th>2</th>\n",
1571 |        "      <td>MAC000003</td>\n",
1572 |        "      <td>2012-07-03</td>\n",
1573 |        "      <td>16.927917</td>\n",
1574 |        "      <td>3.710833</td>\n",
1575 |        "      <td>0.895833</td>\n",
1576 |        "    </tr>\n",
1577 |        "    <tr>\n",
1578 |        "      <th>3</th>\n",
1579 |        "      <td>MAC000003</td>\n",
1580 |        "      <td>2012-07-04</td>\n",
1581 |        "      <td>18.426250</td>\n",
1582 |        "      <td>3.368750</td>\n",
1583 |        "      <td>0.853333</td>\n",
1584 |        "    </tr>\n",
1585 |        "    <tr>\n",
1586 |        "      <th>4</th>\n",
1587 |        "      <td>MAC000003</td>\n",
1588 |        "      <td>2012-07-05</td>\n",
1589 |        "      <td>18.496667</td>\n",
1590 |        "      <td>2.000833</td>\n",
1591 |        "      <td>0.707083</td>\n",
1592 |        "    </tr>\n",
1593 |        "    <tr>\n",
1594 |        "      <th>...</th>\n",
1595 |        "      <td>...</td>\n",
1596 |        "      <td>...</td>\n",
1597 |        "      <td>...</td>\n",
1598 |        "      <td>...</td>\n",
1599 |        "      <td>...</td>\n",
1600 |        "    </tr>\n",
1601 |        "    <tr>\n",
1602 |        "      <th>360</th>\n",
1603 |        "      <td>MAC000003</td>\n",
1604 |        "      <td>2013-06-26</td>\n",
1605 |        "      <td>16.252083</td>\n",
1606 |        "      <td>2.269583</td>\n",
1607 |        "      <td>0.632083</td>\n",
1608 |        "    </tr>\n",
1609 |        "    <tr>\n",
1610 |        "      <th>361</th>\n",
1611 |        "      <td>MAC000003</td>\n",
1612 |        "      <td>2013-06-27</td>\n",
1613 |        "      <td>15.173750</td>\n",
1614 |        "      <td>2.925833</td>\n",
1615 |        "      <td>0.700417</td>\n",
1616 |        "    </tr>\n",
1617 |        "    <tr>\n",
1618 |        "      <th>362</th>\n",
1619 |        "      <td>MAC000003</td>\n",
1620 |        "      <td>2013-06-28</td>\n",
1621 |        "      <td>16.288750</td>\n",
1622 |        "      <td>3.616667</td>\n",
1623 |        "      <td>0.867917</td>\n",
1624 |        "    </tr>\n",
1625 |        "    <tr>\n",
1626 |        "      <th>363</th>\n",
1627 |        "      <td>MAC000003</td>\n",
1628 |        "      <td>2013-06-29</td>\n",
1629 |        "      <td>17.214583</td>\n",
1630 |        "      <td>3.434583</td>\n",
1631 |        "      <td>0.603333</td>\n",
1632 |        "    </tr>\n",
1633 |        "    <tr>\n",
1634 |        "      <th>364</th>\n",
1635 |        "      <td>MAC000003</td>\n",
1636 |        "      <td>2013-06-30</td>\n",
1637 |        "      <td>19.741250</td>\n",
1638 |        "      <td>4.401667</td>\n",
1639 |        "      <td>0.676667</td>\n",
1640 |        "    </tr>\n",
1641 |        "  </tbody>\n",
1642 |        "</table>\n",
1643 |        "<p>365 rows × 5 columns</p>\n",
1644 |        "</div>"
1645 |       ],
1646 |       "text/plain": [
1647 |        "       item_id  timestamp  temperature  wind_speed  humidity\n",
1648 |        "0    MAC000003 2012-07-01    14.862500    5.138750  0.703333\n",
1649 |        "1    MAC000003 2012-07-02    15.229583    4.557500  0.832917\n",
1650 |        "2    MAC000003 2012-07-03    16.927917    3.710833  0.895833\n",
1651 |        "3    MAC000003 2012-07-04    18.426250    3.368750  0.853333\n",
1652 |        "4    MAC000003 2012-07-05    18.496667    2.000833  0.707083\n",
1653 |        "..         ...        ...          ...         ...       ...\n",
1654 |        "360  MAC000003 2013-06-26    16.252083    2.269583  0.632083\n",
1655 |        "361  MAC000003 2013-06-27    15.173750    2.925833  0.700417\n",
1656 |        "362  MAC000003 2013-06-28    16.288750    3.616667  0.867917\n",
1657 |        "363  MAC000003 2013-06-29    17.214583    3.434583  0.603333\n",
1658 |        "364  MAC000003 2013-06-30    19.741250    4.401667  0.676667\n",
1659 |        "\n",
1660 |        "[365 rows x 5 columns]"
1661 |       ]
1662 |      },
1663 |      "execution_count": 20,
1664 |      "metadata": {},
1665 |      "output_type": "execute_result"
1666 |     }
1667 |    ],
1668 |    "source": [
1669 |     "related_time_series[related_time_series['item_id'] == 'MAC000003']"
1670 |    ]
1671 |   },
1672 |   {
1673 |    "cell_type": "code",
1674 |    "execution_count": 21,
1675 |    "id": "0c93746a",
1676 |    "metadata": {},
1677 |    "outputs": [
1678 |     {
1679 |      "name": "stdout",
1680 |      "output_type": "stream",
1681 |      "text": [
1682 |       "89.33 MB\n",
1683 |       "CPU times: user 12 s, sys: 80.7 ms, total: 12.1 s\n",
1684 |       "Wall time: 20.1 s\n"
1685 |      ]
1686 |     }
1687 |    ],
1688 |    "source": [
1689 |     "%%time\n",
1690 |     "\n",
1691 |     "rts_fname = os.path.join('..', 'Dataset', 'related_time_series.csv')\n",
1692 |     "related_time_series.to_csv(rts_fname, index=None)\n",
1693 |     "print(f'{os.stat(rts_fname).st_size/(1024*1024):.04} MB')"
1694 |    ]
1695 |   },
1696 |   {
1697 |    "cell_type": "markdown",
1698 |    "id": "32f244ab",
1699 |    "metadata": {},
1700 |    "source": [
1701 |     "### Item metadata dataset"
1702 |    ]
1703 |   },
1704 |   {
1705 |    "cell_type": "markdown",
1706 |    "id": "9b0c6b28",
1707 |    "metadata": {},
1708 |    "source": [
1709 |     "* POPULATION > Geography (4 categories)\n",
1710 |     "* HOUSING > House Size (5 categories)\n",
1711 |     "* FAMILY > Household Size (4 categories)"
1712 |    ]
1713 |   },
1714 |   {
1715 |    "cell_type": "code",
1716 |    "execution_count": 22,
1717 |    "id": "186a2c69",
1718 |    "metadata": {},
1719 |    "outputs": [
1720 |     {
1721 |      "data": {
1722 |       "text/plain": [
1723 |        "array(['POPULATION', 'HOUSING', 'FAMILY', 'ECONOMY', 'EDUCATION',\n",
1724 |        "       'HEALTH', 'TRANSPORT', 'MARKETING CHANNELS', 'FINANCE', 'DIGITAL',\n",
1725 |        "       'SHOPPING', 'CONTACT', 'ENVIRONMENT', 'COMMUNITY SAFETY',\n",
1726 |        "       'LEISURE TIME'], dtype=object)"
1727 |       ]
1728 |      },
1729 |      "execution_count": 22,
1730 |      "metadata": {},
1731 |      "output_type": "execute_result"
1732 |     }
1733 |    ],
1734 |    "source": [
1735 |     "acorn_df['MAIN CATEGORIES'].unique()"
1736 |    ]
1737 |   },
1738 |   {
1739 |    "cell_type": "code",
1740 |    "execution_count": 23,
1741 |    "id": "f8125168",
1742 |    "metadata": {},
1743 |    "outputs": [
1744 |     {
1745 |      "name": "stdout",
1746 |      "output_type": "stream",
1747 |      "text": [
1748 |       "(9, 20)\n"
1749 |      ]
1750 |     },
1751 |     {
1752 |      "data": {
1753 |       "text/html": [
1754 |        "<div>\n",
1755 |        "<style scoped>\n",
1756 |        "    .dataframe tbody tr th:only-of-type {\n",
1757 |        "        vertical-align: middle;\n",
1758 |        "    }\n",
1759 |        "\n",
1760 |        "    .dataframe tbody tr th {\n",
1761 |        "        vertical-align: top;\n",
1762 |        "    }\n",
1763 |        "\n",
1764 |        "    .dataframe thead th {\n",
1765 |        "        text-align: right;\n",
1766 |        "    }\n",
1767 |        "</style>\n",
1768 |        "<table border=\"1\" class=\"dataframe\">\n",
1769 |        "  <thead>\n",
1770 |        "    <tr style=\"text-align: right;\">\n",
1771 |        "      <th></th>\n",
1772 |        "      <th>MAIN CATEGORIES</th>\n",
1773 |        "      <th>CATEGORIES</th>\n",
1774 |        "      <th>REFERENCE</th>\n",
1775 |        "      <th>ACORN-A</th>\n",
1776 |        "      <th>ACORN-B</th>\n",
1777 |        "      <th>ACORN-C</th>\n",
1778 |        "      <th>ACORN-D</th>\n",
1779 |        "      <th>ACORN-E</th>\n",
1780 |        "      <th>ACORN-F</th>\n",
1781 |        "      <th>ACORN-G</th>\n",
1782 |        "      <th>ACORN-H</th>\n",
1783 |        "      <th>ACORN-I</th>\n",
1784 |        "      <th>ACORN-J</th>\n",
1785 |        "      <th>ACORN-K</th>\n",
1786 |        "      <th>ACORN-L</th>\n",
1787 |        "      <th>ACORN-M</th>\n",
1788 |        "      <th>ACORN-N</th>\n",
1789 |        "      <th>ACORN-O</th>\n",
1790 |        "      <th>ACORN-P</th>\n",
1791 |        "      <th>ACORN-Q</th>\n",
1792 |        "    </tr>\n",
1793 |        "  </thead>\n",
1794 |        "  <tbody>\n",
1795 |        "    <tr>\n",
1796 |        "      <th>8</th>\n",
1797 |        "      <td>POPULATION</td>\n",
1798 |        "      <td>Geography</td>\n",
1799 |        "      <td>England</td>\n",
1800 |        "      <td>107.0</td>\n",
1801 |        "      <td>101.0</td>\n",
1802 |        "      <td>103.0</td>\n",
1803 |        "      <td>114.0</td>\n",
1804 |        "      <td>106.0</td>\n",
1805 |        "      <td>75.0</td>\n",
1806 |        "      <td>107.0</td>\n",
1807 |        "      <td>106.0</td>\n",
1808 |        "      <td>102.0</td>\n",
1809 |        "      <td>106.0</td>\n",
1810 |        "      <td>95.0</td>\n",
1811 |        "      <td>93.0</td>\n",
1812 |        "      <td>97.0</td>\n",
1813 |        "      <td>89.0</td>\n",
1814 |        "      <td>97.0</td>\n",
1815 |        "      <td>110.0</td>\n",
1816 |        "      <td>97.0</td>\n",
1817 |        "    </tr>\n",
1818 |        "    <tr>\n",
1819 |        "      <th>9</th>\n",
1820 |        "      <td>POPULATION</td>\n",
1821 |        "      <td>Geography</td>\n",
1822 |        "      <td>Northern Ireland</td>\n",
1823 |        "      <td>30.0</td>\n",
1824 |        "      <td>95.0</td>\n",
1825 |        "      <td>45.0</td>\n",
1826 |        "      <td>2.0</td>\n",
1827 |        "      <td>49.0</td>\n",
1828 |        "      <td>462.0</td>\n",
1829 |        "      <td>53.0</td>\n",
1830 |        "      <td>104.0</td>\n",
1831 |        "      <td>30.0</td>\n",
1832 |        "      <td>91.0</td>\n",
1833 |        "      <td>56.0</td>\n",
1834 |        "      <td>87.0</td>\n",
1835 |        "      <td>131.0</td>\n",
1836 |        "      <td>67.0</td>\n",
1837 |        "      <td>95.0</td>\n",
1838 |        "      <td>75.0</td>\n",
1839 |        "      <td>43.0</td>\n",
1840 |        "    </tr>\n",
1841 |        "    <tr>\n",
1842 |        "      <th>10</th>\n",
1843 |        "      <td>POPULATION</td>\n",
1844 |        "      <td>Geography</td>\n",
1845 |        "      <td>Scotland</td>\n",
1846 |        "      <td>93.0</td>\n",
1847 |        "      <td>105.0</td>\n",
1848 |        "      <td>87.0</td>\n",
1849 |        "      <td>47.0</td>\n",
1850 |        "      <td>93.0</td>\n",
1851 |        "      <td>144.0</td>\n",
1852 |        "      <td>54.0</td>\n",
1853 |        "      <td>46.0</td>\n",
1854 |        "      <td>97.0</td>\n",
1855 |        "      <td>53.0</td>\n",
1856 |        "      <td>167.0</td>\n",
1857 |        "      <td>114.0</td>\n",
1858 |        "      <td>121.0</td>\n",
1859 |        "      <td>194.0</td>\n",
1860 |        "      <td>139.0</td>\n",
1861 |        "      <td>31.0</td>\n",
1862 |        "      <td>183.0</td>\n",
1863 |        "    </tr>\n",
1864 |        "    <tr>\n",
1865 |        "      <th>11</th>\n",
1866 |        "      <td>POPULATION</td>\n",
1867 |        "      <td>Geography</td>\n",
1868 |        "      <td>Wales</td>\n",
1869 |        "      <td>22.0</td>\n",
1870 |        "      <td>73.0</td>\n",
1871 |        "      <td>99.0</td>\n",
1872 |        "      <td>10.0</td>\n",
1873 |        "      <td>46.0</td>\n",
1874 |        "      <td>249.0</td>\n",
1875 |        "      <td>77.0</td>\n",
1876 |        "      <td>84.0</td>\n",
1877 |        "      <td>113.0</td>\n",
1878 |        "      <td>73.0</td>\n",
1879 |        "      <td>98.0</td>\n",
1880 |        "      <td>211.0</td>\n",
1881 |        "      <td>104.0</td>\n",
1882 |        "      <td>150.0</td>\n",
1883 |        "      <td>88.0</td>\n",
1884 |        "      <td>54.0</td>\n",
1885 |        "      <td>45.0</td>\n",
1886 |        "    </tr>\n",
1887 |        "    <tr>\n",
1888 |        "      <th>70</th>\n",
1889 |        "      <td>FAMILY</td>\n",
1890 |        "      <td>Household Size</td>\n",
1891 |        "      <td>Household size : 1 person</td>\n",
1892 |        "      <td>48.0</td>\n",
1893 |        "      <td>51.0</td>\n",
1894 |        "      <td>93.0</td>\n",
1895 |        "      <td>86.0</td>\n",
1896 |        "      <td>85.0</td>\n",
1897 |        "      <td>83.0</td>\n",
1898 |        "      <td>73.0</td>\n",
1899 |        "      <td>65.0</td>\n",
1900 |        "      <td>151.0</td>\n",
1901 |        "      <td>85.0</td>\n",
1902 |        "      <td>112.0</td>\n",
1903 |        "      <td>98.0</td>\n",
1904 |        "      <td>91.0</td>\n",
1905 |        "      <td>193.0</td>\n",
1906 |        "      <td>131.0</td>\n",
1907 |        "      <td>72.0</td>\n",
1908 |        "      <td>160.0</td>\n",
1909 |        "    </tr>\n",
1910 |        "    <tr>\n",
1911 |        "      <th>71</th>\n",
1912 |        "      <td>FAMILY</td>\n",
1913 |        "      <td>Household Size</td>\n",
1914 |        "      <td>Household size : 2 persons</td>\n",
1915 |        "      <td>107.0</td>\n",
1916 |        "      <td>107.0</td>\n",
1917 |        "      <td>123.0</td>\n",
1918 |        "      <td>102.0</td>\n",
1919 |        "      <td>99.0</td>\n",
1920 |        "      <td>127.0</td>\n",
1921 |        "      <td>108.0</td>\n",
1922 |        "      <td>102.0</td>\n",
1923 |        "      <td>126.0</td>\n",
1924 |        "      <td>110.0</td>\n",
1925 |        "      <td>80.0</td>\n",
1926 |        "      <td>99.0</td>\n",
1927 |        "      <td>94.0</td>\n",
1928 |        "      <td>99.0</td>\n",
1929 |        "      <td>101.0</td>\n",
1930 |        "      <td>75.0</td>\n",
1931 |        "      <td>88.0</td>\n",
1932 |        "    </tr>\n",
1933 |        "    <tr>\n",
1934 |        "      <th>72</th>\n",
1935 |        "      <td>FAMILY</td>\n",
1936 |        "      <td>Household Size</td>\n",
1937 |        "      <td>Household size : 3-4 persons</td>\n",
1938 |        "      <td>114.0</td>\n",
1939 |        "      <td>119.0</td>\n",
1940 |        "      <td>92.0</td>\n",
1941 |        "      <td>107.0</td>\n",
1942 |        "      <td>109.0</td>\n",
1943 |        "      <td>91.0</td>\n",
1944 |        "      <td>106.0</td>\n",
1945 |        "      <td>116.0</td>\n",
1946 |        "      <td>64.0</td>\n",
1947 |        "      <td>105.0</td>\n",
1948 |        "      <td>93.0</td>\n",
1949 |        "      <td>98.0</td>\n",
1950 |        "      <td>108.0</td>\n",
1951 |        "      <td>63.0</td>\n",
1952 |        "      <td>88.0</td>\n",
1953 |        "      <td>119.0</td>\n",
1954 |        "      <td>85.0</td>\n",
1955 |        "    </tr>\n",
1956 |        "    <tr>\n",
1957 |        "      <th>73</th>\n",
1958 |        "      <td>FAMILY</td>\n",
1959 |        "      <td>Household Size</td>\n",
1960 |        "      <td>Household size : 5+ persons</td>\n",
1961 |        "      <td>128.0</td>\n",
1962 |        "      <td>104.0</td>\n",
1963 |        "      <td>61.0</td>\n",
1964 |        "      <td>95.0</td>\n",
1965 |        "      <td>100.0</td>\n",
1966 |        "      <td>69.0</td>\n",
1967 |        "      <td>106.0</td>\n",
1968 |        "      <td>106.0</td>\n",
1969 |        "      <td>33.0</td>\n",
1970 |        "      <td>78.0</td>\n",
1971 |        "      <td>175.0</td>\n",
1972 |        "      <td>114.0</td>\n",
1973 |        "      <td>112.0</td>\n",
1974 |        "      <td>50.0</td>\n",
1975 |        "      <td>74.0</td>\n",
1976 |        "      <td>179.0</td>\n",
1977 |        "      <td>76.0</td>\n",
1978 |        "    </tr>\n",
1979 |        "    <tr>\n",
1980 |        "      <th>134</th>\n",
1981 |        "      <td>TRANSPORT</td>\n",
1982 |        "      <td>Travel To Work</td>\n",
1983 |        "      <td>Work mainly at or from home</td>\n",
1984 |        "      <td>230.0</td>\n",
1985 |        "      <td>156.0</td>\n",
1986 |        "      <td>133.0</td>\n",
1987 |        "      <td>123.0</td>\n",
1988 |        "      <td>91.0</td>\n",
1989 |        "      <td>202.0</td>\n",
1990 |        "      <td>89.0</td>\n",
1991 |        "      <td>66.0</td>\n",
1992 |        "      <td>97.0</td>\n",
1993 |        "      <td>68.0</td>\n",
1994 |        "      <td>87.0</td>\n",
1995 |        "      <td>65.0</td>\n",
1996 |        "      <td>69.0</td>\n",
1997 |        "      <td>85.0</td>\n",
1998 |        "      <td>67.0</td>\n",
1999 |        "      <td>50.0</td>\n",
2000 |        "      <td>62.0</td>\n",
2001 |        "    </tr>\n",
2002 |        "  </tbody>\n",
2003 |        "</table>\n",
2004 |        "</div>"
2005 |       ],
2006 |       "text/plain": [
2007 |        "    MAIN CATEGORIES      CATEGORIES                     REFERENCE  ACORN-A  \\\n",
2008 |        "8        POPULATION       Geography                       England    107.0   \n",
2009 |        "9        POPULATION       Geography              Northern Ireland     30.0   \n",
2010 |        "10       POPULATION       Geography                      Scotland     93.0   \n",
2011 |        "11       POPULATION       Geography                         Wales     22.0   \n",
2012 |        "70           FAMILY  Household Size     Household size : 1 person     48.0   \n",
2013 |        "71           FAMILY  Household Size    Household size : 2 persons    107.0   \n",
2014 |        "72           FAMILY  Household Size  Household size : 3-4 persons    114.0   \n",
2015 |        "73           FAMILY  Household Size   Household size : 5+ persons    128.0   \n",
2016 |        "134       TRANSPORT  Travel To Work   Work mainly at or from home    230.0   \n",
2017 |        "\n",
2018 |        "     ACORN-B  ACORN-C  ACORN-D  ACORN-E  ACORN-F  ACORN-G  ACORN-H  ACORN-I  \\\n",
2019 |        "8      101.0    103.0    114.0    106.0     75.0    107.0    106.0    102.0   \n",
2020 |        "9       95.0     45.0      2.0     49.0    462.0     53.0    104.0     30.0   \n",
2021 |        "10     105.0     87.0     47.0     93.0    144.0     54.0     46.0     97.0   \n",
2022 |        "11      73.0     99.0     10.0     46.0    249.0     77.0     84.0    113.0   \n",
2023 |        "70      51.0     93.0     86.0     85.0     83.0     73.0     65.0    151.0   \n",
2024 |        "71     107.0    123.0    102.0     99.0    127.0    108.0    102.0    126.0   \n",
2025 |        "72     119.0     92.0    107.0    109.0     91.0    106.0    116.0     64.0   \n",
2026 |        "73     104.0     61.0     95.0    100.0     69.0    106.0    106.0     33.0   \n",
2027 |        "134    156.0    133.0    123.0     91.0    202.0     89.0     66.0     97.0   \n",
2028 |        "\n",
2029 |        "     ACORN-J  ACORN-K  ACORN-L  ACORN-M  ACORN-N  ACORN-O  ACORN-P  ACORN-Q  \n",
2030 |        "8      106.0     95.0     93.0     97.0     89.0     97.0    110.0     97.0  \n",
2031 |        "9       91.0     56.0     87.0    131.0     67.0     95.0     75.0     43.0  \n",
2032 |        "10      53.0    167.0    114.0    121.0    194.0    139.0     31.0    183.0  \n",
2033 |        "11      73.0     98.0    211.0    104.0    150.0     88.0     54.0     45.0  \n",
2034 |        "70      85.0    112.0     98.0     91.0    193.0    131.0     72.0    160.0  \n",
2035 |        "71     110.0     80.0     99.0     94.0     99.0    101.0     75.0     88.0  \n",
2036 |        "72     105.0     93.0     98.0    108.0     63.0     88.0    119.0     85.0  \n",
2037 |        "73      78.0    175.0    114.0    112.0     50.0     74.0    179.0     76.0  \n",
2038 |        "134     68.0     87.0     65.0     69.0     85.0     67.0     50.0     62.0  "
2039 |       ]
2040 |      },
2041 |      "execution_count": 23,
2042 |      "metadata": {},
2043 |      "output_type": "execute_result"
2044 |     }
2045 |    ],
2046 |    "source": [
2047 |     "mask = (\n",
2048 |     "    (acorn_df['MAIN CATEGORIES'] == 'FAMILY') & (acorn_df['CATEGORIES'] == 'Household Size') |\n",
2049 |     "    (acorn_df['MAIN CATEGORIES'] == 'POPULATION') & (acorn_df['CATEGORIES'] == 'Geography') |\n",
2050 |     "    (acorn_df['REFERENCE'] == 'Work mainly at or from home')\n",
2051 |     ")\n",
2052 |     "print(acorn_df[mask].shape)\n",
2053 |     "acorn_df[mask]"
2054 |    ]
2055 |   },
2056 |   {
2057 |    "cell_type": "code",
2058 |    "execution_count": 24,
2059 |    "id": "fbe77c71",
2060 |    "metadata": {},
2061 |    "outputs": [],
2062 |    "source": [
2063 |     "# num_cols = pd.get_option('display.max_columns')"
2064 |    ]
2065 |   },
2066 |   {
2067 |    "cell_type": "code",
2068 |    "execution_count": 31,
2069 |    "id": "935f069f",
2070 |    "metadata": {},
2071 |    "outputs": [
2072 |     {
2073 |      "data": {
2074 |       "text/html": [
2075 |        "<div>\n",
2076 |        "<style scoped>\n",
2077 |        "    .dataframe tbody tr th:only-of-type {\n",
2078 |        "        vertical-align: middle;\n",
2079 |        "    }\n",
2080 |        "\n",
2081 |        "    .dataframe tbody tr th {\n",
2082 |        "        vertical-align: top;\n",
2083 |        "    }\n",
2084 |        "\n",
2085 |        "    .dataframe thead th {\n",
2086 |        "        text-align: right;\n",
2087 |        "    }\n",
2088 |        "</style>\n",
2089 |        "<table border=\"1\" class=\"dataframe\">\n",
2090 |        "  <thead>\n",
2091 |        "    <tr style=\"text-align: right;\">\n",
2092 |        "      <th></th>\n",
2093 |        "      <th>item_id</th>\n",
2094 |        "      <th>geography_england</th>\n",
2095 |        "      <th>geography_northern_ireland</th>\n",
2096 |        "      <th>geography_scotland</th>\n",
2097 |        "      <th>geography_wales</th>\n",
2098 |        "      <th>family_1_person</th>\n",
2099 |        "      <th>family_2_persons</th>\n",
2100 |        "      <th>family_3_4_persons</th>\n",
2101 |        "      <th>family_5_more_persons</th>\n",
2102 |        "      <th>transport_work_from_home</th>\n",
2103 |        "    </tr>\n",
2104 |        "  </thead>\n",
2105 |        "  <tbody>\n",
2106 |        "    <tr>\n",
2107 |        "      <th>0</th>\n",
2108 |        "      <td>MAC000246</td>\n",
2109 |        "      <td>107.0</td>\n",
2110 |        "      <td>30.0</td>\n",
2111 |        "      <td>93.0</td>\n",
2112 |        "      <td>22.0</td>\n",
2113 |        "      <td>48.0</td>\n",
2114 |        "      <td>107.0</td>\n",
2115 |        "      <td>114.0</td>\n",
2116 |        "      <td>128.0</td>\n",
2117 |        "      <td>230.0</td>\n",
2118 |        "    </tr>\n",
2119 |        "    <tr>\n",
2120 |        "      <th>1</th>\n",
2121 |        "      <td>MAC004431</td>\n",
2122 |        "      <td>107.0</td>\n",
2123 |        "      <td>30.0</td>\n",
2124 |        "      <td>93.0</td>\n",
2125 |        "      <td>22.0</td>\n",
2126 |        "      <td>48.0</td>\n",
2127 |        "      <td>107.0</td>\n",
2128 |        "      <td>114.0</td>\n",
2129 |        "      <td>128.0</td>\n",
2130 |        "      <td>230.0</td>\n",
2131 |        "    </tr>\n",
2132 |        "    <tr>\n",
2133 |        "      <th>2</th>\n",
2134 |        "      <td>MAC004387</td>\n",
2135 |        "      <td>107.0</td>\n",
2136 |        "      <td>30.0</td>\n",
2137 |        "      <td>93.0</td>\n",
2138 |        "      <td>22.0</td>\n",
2139 |        "      <td>48.0</td>\n",
2140 |        "      <td>107.0</td>\n",
2141 |        "      <td>114.0</td>\n",
2142 |        "      <td>128.0</td>\n",
2143 |        "      <td>230.0</td>\n",
2144 |        "    </tr>\n",
2145 |        "    <tr>\n",
2146 |        "      <th>3</th>\n",
2147 |        "      <td>MAC004319</td>\n",
2148 |        "      <td>107.0</td>\n",
2149 |        "      <td>30.0</td>\n",
2150 |        "      <td>93.0</td>\n",
2151 |        "      <td>22.0</td>\n",
2152 |        "      <td>48.0</td>\n",
2153 |        "      <td>107.0</td>\n",
2154 |        "      <td>114.0</td>\n",
2155 |        "      <td>128.0</td>\n",
2156 |        "      <td>230.0</td>\n",
2157 |        "    </tr>\n",
2158 |        "    <tr>\n",
2159 |        "      <th>4</th>\n",
2160 |        "      <td>MAC004247</td>\n",
2161 |        "      <td>107.0</td>\n",
2162 |        "      <td>30.0</td>\n",
2163 |        "      <td>93.0</td>\n",
2164 |        "      <td>22.0</td>\n",
2165 |        "      <td>48.0</td>\n",
2166 |        "      <td>107.0</td>\n",
2167 |        "      <td>114.0</td>\n",
2168 |        "      <td>128.0</td>\n",
2169 |        "      <td>230.0</td>\n",
2170 |        "    </tr>\n",
2171 |        "    <tr>\n",
2172 |        "      <th>...</th>\n",
2173 |        "      <td>...</td>\n",
2174 |        "      <td>...</td>\n",
2175 |        "      <td>...</td>\n",
2176 |        "      <td>...</td>\n",
2177 |        "      <td>...</td>\n",
2178 |        "      <td>...</td>\n",
2179 |        "      <td>...</td>\n",
2180 |        "      <td>...</td>\n",
2181 |        "      <td>...</td>\n",
2182 |        "      <td>...</td>\n",
2183 |        "    </tr>\n",
2184 |        "    <tr>\n",
2185 |        "      <th>3535</th>\n",
2186 |        "      <td>MAC002345</td>\n",
2187 |        "      <td>97.0</td>\n",
2188 |        "      <td>43.0</td>\n",
2189 |        "      <td>183.0</td>\n",
2190 |        "      <td>45.0</td>\n",
2191 |        "      <td>160.0</td>\n",
2192 |        "      <td>88.0</td>\n",
2193 |        "      <td>85.0</td>\n",
2194 |        "      <td>76.0</td>\n",
2195 |        "      <td>62.0</td>\n",
2196 |        "    </tr>\n",
2197 |        "    <tr>\n",
2198 |        "      <th>3536</th>\n",
2199 |        "      <td>MAC002185</td>\n",
2200 |        "      <td>97.0</td>\n",
2201 |        "      <td>43.0</td>\n",
2202 |        "      <td>183.0</td>\n",
2203 |        "      <td>45.0</td>\n",
2204 |        "      <td>160.0</td>\n",
2205 |        "      <td>88.0</td>\n",
2206 |        "      <td>85.0</td>\n",
2207 |        "      <td>76.0</td>\n",
2208 |        "      <td>62.0</td>\n",
2209 |        "    </tr>\n",
2210 |        "    <tr>\n",
2211 |        "      <th>3537</th>\n",
2212 |        "      <td>MAC002347</td>\n",
2213 |        "      <td>97.0</td>\n",
2214 |        "      <td>43.0</td>\n",
2215 |        "      <td>183.0</td>\n",
2216 |        "      <td>45.0</td>\n",
2217 |        "      <td>160.0</td>\n",
2218 |        "      <td>88.0</td>\n",
2219 |        "      <td>85.0</td>\n",
2220 |        "      <td>76.0</td>\n",
2221 |        "      <td>62.0</td>\n",
2222 |        "    </tr>\n",
2223 |        "    <tr>\n",
2224 |        "      <th>3538</th>\n",
2225 |        "      <td>MAC002331</td>\n",
2226 |        "      <td>97.0</td>\n",
2227 |        "      <td>43.0</td>\n",
2228 |        "      <td>183.0</td>\n",
2229 |        "      <td>45.0</td>\n",
2230 |        "      <td>160.0</td>\n",
2231 |        "      <td>88.0</td>\n",
2232 |        "      <td>85.0</td>\n",
2233 |        "      <td>76.0</td>\n",
2234 |        "      <td>62.0</td>\n",
2235 |        "    </tr>\n",
2236 |        "    <tr>\n",
2237 |        "      <th>3539</th>\n",
2238 |        "      <td>MAC000318</td>\n",
2239 |        "      <td>97.0</td>\n",
2240 |        "      <td>43.0</td>\n",
2241 |        "      <td>183.0</td>\n",
2242 |        "      <td>45.0</td>\n",
2243 |        "      <td>160.0</td>\n",
2244 |        "      <td>88.0</td>\n",
2245 |        "      <td>85.0</td>\n",
2246 |        "      <td>76.0</td>\n",
2247 |        "      <td>62.0</td>\n",
2248 |        "    </tr>\n",
2249 |        "  </tbody>\n",
2250 |        "</table>\n",
2251 |        "<p>3540 rows × 10 columns</p>\n",
2252 |        "</div>"
2253 |       ],
2254 |       "text/plain": [
2255 |        "        item_id  geography_england  geography_northern_ireland  \\\n",
2256 |        "0     MAC000246              107.0                        30.0   \n",
2257 |        "1     MAC004431              107.0                        30.0   \n",
2258 |        "2     MAC004387              107.0                        30.0   \n",
2259 |        "3     MAC004319              107.0                        30.0   \n",
2260 |        "4     MAC004247              107.0                        30.0   \n",
2261 |        "...         ...                ...                         ...   \n",
2262 |        "3535  MAC002345               97.0                        43.0   \n",
2263 |        "3536  MAC002185               97.0                        43.0   \n",
2264 |        "3537  MAC002347               97.0                        43.0   \n",
2265 |        "3538  MAC002331               97.0                        43.0   \n",
2266 |        "3539  MAC000318               97.0                        43.0   \n",
2267 |        "\n",
2268 |        "      geography_scotland  geography_wales  family_1_person  family_2_persons  \\\n",
2269 |        "0                   93.0             22.0             48.0             107.0   \n",
2270 |        "1                   93.0             22.0             48.0             107.0   \n",
2271 |        "2                   93.0             22.0             48.0             107.0   \n",
2272 |        "3                   93.0             22.0             48.0             107.0   \n",
2273 |        "4                   93.0             22.0             48.0             107.0   \n",
2274 |        "...                  ...              ...              ...               ...   \n",
2275 |        "3535               183.0             45.0            160.0              88.0   \n",
2276 |        "3536               183.0             45.0            160.0              88.0   \n",
2277 |        "3537               183.0             45.0            160.0              88.0   \n",
2278 |        "3538               183.0             45.0            160.0              88.0   \n",
2279 |        "3539               183.0             45.0            160.0              88.0   \n",
2280 |        "\n",
2281 |        "      family_3_4_persons  family_5_more_persons  transport_work_from_home  \n",
2282 |        "0                  114.0                  128.0                     230.0  \n",
2283 |        "1                  114.0                  128.0                     230.0  \n",
2284 |        "2                  114.0                  128.0                     230.0  \n",
2285 |        "3                  114.0                  128.0                     230.0  \n",
2286 |        "4                  114.0                  128.0                     230.0  \n",
2287 |        "...                  ...                    ...                       ...  \n",
2288 |        "3535                85.0                   76.0                      62.0  \n",
2289 |        "3536                85.0                   76.0                      62.0  \n",
2290 |        "3537                85.0                   76.0                      62.0  \n",
2291 |        "3538                85.0                   76.0                      62.0  \n",
2292 |        "3539                85.0                   76.0                      62.0  \n",
2293 |        "\n",
2294 |        "[3540 rows x 10 columns]"
2295 |       ]
2296 |      },
2297 |      "execution_count": 31,
2298 |      "metadata": {},
2299 |      "output_type": "execute_result"
2300 |     }
2301 |    ],
2302 |    "source": [
2303 |     "metadata_df = acorn_df[mask].iloc[:, 2:].set_index('REFERENCE').T\n",
2304 |     "metadata_df.columns = [\n",
2305 |     "    'geography_england',\n",
2306 |     "    'geography_northern_ireland',\n",
2307 |     "    'geography_scotland',\n",
2308 |     "    'geography_wales',\n",
2309 |     "    'family_1_person',\n",
2310 |     "    'family_2_persons',\n",
2311 |     "    'family_3_4_persons',\n",
2312 |     "    'family_5_more_persons',\n",
2313 |     "    'transport_work_from_home'\n",
2314 |     "]\n",
2315 |     "metadata_df.index.name = 'segment'\n",
2316 |     "\n",
2317 |     "metadata_df = pd.merge(household_df[['LCLid', 'Acorn']], metadata_df, how='left', left_on='Acorn', right_index=True)\n",
2318 |     "metadata_df = metadata_df.drop(columns='Acorn')\n",
2319 |     "metadata_df = metadata_df.rename(columns={'LCLid': 'item_id'})\n",
2320 |     "metadata_df = metadata_df[metadata_df['item_id'].isin(household_ids)]\n",
2321 |     "metadata_df = metadata_df.reset_index(drop=True)\n",
2322 |     "# pd.set_option('display.max_columns', 6)\n",
2323 |     "metadata_df"
2324 |    ]
2325 |   },
2326 |   {
2327 |    "cell_type": "code",
2328 |    "execution_count": null,
2329 |    "id": "f28d1da0",
2330 |    "metadata": {},
2331 |    "outputs": [],
2332 |    "source": [
2333 |     "# pd.set_option('display.max_columns', num_cols)"
2334 |    ]
2335 |   },
2336 |   {
2337 |    "cell_type": "code",
2338 |    "execution_count": 32,
2339 |    "id": "a7dbb6a5",
2340 |    "metadata": {},
2341 |    "outputs": [
2342 |     {
2343 |      "name": "stdout",
2344 |      "output_type": "stream",
2345 |      "text": [
2346 |       "CPU times: user 27.2 ms, sys: 0 ns, total: 27.2 ms\n",
2347 |       "Wall time: 63.7 ms\n"
2348 |      ]
2349 |     },
2350 |     {
2351 |      "data": {
2352 |       "text/plain": [
2353 |        "0.1971435546875"
2354 |       ]
2355 |      },
2356 |      "execution_count": 32,
2357 |      "metadata": {},
2358 |      "output_type": "execute_result"
2359 |     }
2360 |    ],
2361 |    "source": [
2362 |     "%%time\n",
2363 |     "\n",
2364 |     "metadata_fname = os.path.join('..', 'Dataset', 'item_metadata.csv')\n",
2365 |     "metadata_df.to_csv(metadata_fname, index=None)\n",
2366 |     "os.stat(metadata_fname).st_size/(1024*1024)"
2367 |    ]
2368 |   },
2369 |   {
2370 |    "cell_type": "markdown",
2371 |    "id": "1923bfb0",
2372 |    "metadata": {},
2373 |    "source": [
2374 |     "## Visualization\n",
2375 |     "---"
2376 |    ]
2377 |   },
2378 |   {
2379 |    "cell_type": "code",
2380 |    "execution_count": 33,
2381 |    "id": "0a6880e8",
2382 |    "metadata": {},
2383 |    "outputs": [],
2384 |    "source": [
2385 |     "import matplotlib.pyplot as plt\n",
2386 |     "\n",
2387 |     "%matplotlib inline\n",
2388 |     "plt.style.use('fivethirtyeight')\n",
2389 |     "prop_cycle = plt.rcParams['axes.prop_cycle']\n",
2390 |     "colors = prop_cycle.by_key()['color']"
2391 |    ]
2392 |   },
2393 |   {
2394 |    "cell_type": "code",
2395 |    "execution_count": 34,
2396 |    "id": "17c6e9e9",
2397 |    "metadata": {},
2398 |    "outputs": [
2399 |     {
2400 |      "data": {
2401 |       "text/html": [
2402 |        "<div>\n",
2403 |        "<style scoped>\n",
2404 |        "    .dataframe tbody tr th:only-of-type {\n",
2405 |        "        vertical-align: middle;\n",
2406 |        "    }\n",
2407 |        "\n",
2408 |        "    .dataframe tbody tr th {\n",
2409 |        "        vertical-align: top;\n",
2410 |        "    }\n",
2411 |        "\n",
2412 |        "    .dataframe thead th {\n",
2413 |        "        text-align: right;\n",
2414 |        "    }\n",
2415 |        "</style>\n",
2416 |        "<table border=\"1\" class=\"dataframe\">\n",
2417 |        "  <thead>\n",
2418 |        "    <tr style=\"text-align: right;\">\n",
2419 |        "      <th></th>\n",
2420 |        "      <th>item_id</th>\n",
2421 |        "      <th>timestamp</th>\n",
2422 |        "      <th>target_value</th>\n",
2423 |        "    </tr>\n",
2424 |        "  </thead>\n",
2425 |        "  <tbody>\n",
2426 |        "    <tr>\n",
2427 |        "      <th>0</th>\n",
2428 |        "      <td>MAC000003</td>\n",
2429 |        "      <td>2012-07-01</td>\n",
2430 |        "      <td>12.359</td>\n",
2431 |        "    </tr>\n",
2432 |        "    <tr>\n",
2433 |        "      <th>1</th>\n",
2434 |        "      <td>MAC000003</td>\n",
2435 |        "      <td>2012-07-02</td>\n",
2436 |        "      <td>12.775</td>\n",
2437 |        "    </tr>\n",
2438 |        "    <tr>\n",
2439 |        "      <th>2</th>\n",
2440 |        "      <td>MAC000003</td>\n",
2441 |        "      <td>2012-07-03</td>\n",
2442 |        "      <td>11.584</td>\n",
2443 |        "    </tr>\n",
2444 |        "    <tr>\n",
2445 |        "      <th>3</th>\n",
2446 |        "      <td>MAC000003</td>\n",
2447 |        "      <td>2012-07-04</td>\n",
2448 |        "      <td>12.700</td>\n",
2449 |        "    </tr>\n",
2450 |        "    <tr>\n",
2451 |        "      <th>4</th>\n",
2452 |        "      <td>MAC000003</td>\n",
2453 |        "      <td>2012-07-05</td>\n",
2454 |        "      <td>13.724</td>\n",
2455 |        "    </tr>\n",
2456 |        "    <tr>\n",
2457 |        "      <th>...</th>\n",
2458 |        "      <td>...</td>\n",
2459 |        "      <td>...</td>\n",
2460 |        "      <td>...</td>\n",
2461 |        "    </tr>\n",
2462 |        "    <tr>\n",
2463 |        "      <th>1301432</th>\n",
2464 |        "      <td>MAC005562</td>\n",
2465 |        "      <td>2013-06-26</td>\n",
2466 |        "      <td>7.466</td>\n",
2467 |        "    </tr>\n",
2468 |        "    <tr>\n",
2469 |        "      <th>1301433</th>\n",
2470 |        "      <td>MAC005562</td>\n",
2471 |        "      <td>2013-06-27</td>\n",
2472 |        "      <td>10.738</td>\n",
2473 |        "    </tr>\n",
2474 |        "    <tr>\n",
2475 |        "      <th>1301434</th>\n",
2476 |        "      <td>MAC005562</td>\n",
2477 |        "      <td>2013-06-28</td>\n",
2478 |        "      <td>11.128</td>\n",
2479 |        "    </tr>\n",
2480 |        "    <tr>\n",
2481 |        "      <th>1301435</th>\n",
2482 |        "      <td>MAC005562</td>\n",
2483 |        "      <td>2013-06-29</td>\n",
2484 |        "      <td>9.363</td>\n",
2485 |        "    </tr>\n",
2486 |        "    <tr>\n",
2487 |        "      <th>1301436</th>\n",
2488 |        "      <td>MAC005562</td>\n",
2489 |        "      <td>2013-06-30</td>\n",
2490 |        "      <td>9.491</td>\n",
2491 |        "    </tr>\n",
2492 |        "  </tbody>\n",
2493 |        "</table>\n",
2494 |        "<p>1301437 rows × 3 columns</p>\n",
2495 |        "</div>"
2496 |       ],
2497 |       "text/plain": [
2498 |        "           item_id  timestamp  target_value\n",
2499 |        "0        MAC000003 2012-07-01        12.359\n",
2500 |        "1        MAC000003 2012-07-02        12.775\n",
2501 |        "2        MAC000003 2012-07-03        11.584\n",
2502 |        "3        MAC000003 2012-07-04        12.700\n",
2503 |        "4        MAC000003 2012-07-05        13.724\n",
2504 |        "...            ...        ...           ...\n",
2505 |        "1301432  MAC005562 2013-06-26         7.466\n",
2506 |        "1301433  MAC005562 2013-06-27        10.738\n",
2507 |        "1301434  MAC005562 2013-06-28        11.128\n",
2508 |        "1301435  MAC005562 2013-06-29         9.363\n",
2509 |        "1301436  MAC005562 2013-06-30         9.491\n",
2510 |        "\n",
2511 |        "[1301437 rows x 3 columns]"
2512 |       ]
2513 |      },
2514 |      "execution_count": 34,
2515 |      "metadata": {},
2516 |      "output_type": "execute_result"
2517 |     }
2518 |    ],
2519 |    "source": [
2520 |     "df = energy_df.reset_index()\n",
2521 |     "df['timestamp'] = pd.to_datetime(df['timestamp'])\n",
2522 |     "df"
2523 |    ]
2524 |   },
2525 |   {
2526 |    "cell_type": "code",
2527 |    "execution_count": 35,
2528 |    "id": "2a57a3b9",
2529 |    "metadata": {},
2530 |    "outputs": [
2531 |     {
2532 |      "data": {
2533 |       "text/plain": [
2534 |        "item_id                 object\n",
2535 |        "timestamp       datetime64[ns]\n",
2536 |        "target_value           float64\n",
2537 |        "dtype: object"
2538 |       ]
2539 |      },
2540 |      "execution_count": 35,
2541 |      "metadata": {},
2542 |      "output_type": "execute_result"
2543 |     }
2544 |    ],
2545 |    "source": [
2546 |     "df.dtypes"
2547 |    ]
2548 |   },
2549 |   {
2550 |    "cell_type": "code",
2551 |    "execution_count": 36,
2552 |    "id": "705be0b1",
2553 |    "metadata": {},
2554 |    "outputs": [
2555 |     {
2556 |      "data": {
2557 |       "text/plain": [
2558 |        "array(['MAC000003', 'MAC000004', 'MAC000006', 'MAC000008', 'MAC000013',\n",
2559 |        "       'MAC000018', 'MAC000019', 'MAC000020', 'MAC000021', 'MAC000022'],\n",
2560 |        "      dtype=object)"
2561 |       ]
2562 |      },
2563 |      "execution_count": 36,
2564 |      "metadata": {},
2565 |      "output_type": "execute_result"
2566 |     }
2567 |    ],
2568 |    "source": [
2569 |     "hhids = df['item_id'].unique()\n",
2570 |     "hhids[:10]"
2571 |    ]
2572 |   },
2573 |   {
2574 |    "cell_type": "code",
2575 |    "execution_count": 37,
2576 |    "id": "23a2161c",
2577 |    "metadata": {},
2578 |    "outputs": [
2579 |     {
2580 |      "data": {
2581 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAABfoAAAEJCAYAAADMypZ8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAB2VUlEQVR4nO3dd3hkZfn/8c+ZPpO+vbB9s33ZvhT50pRFBQWRr6KgfhFEQSw/BWkivaqI0kQRqRYEURQVC0jTZRvbW7b3bEsm09s5vz8mm02ZTGayaZO8X9fFdbHJJHkyOc8p93M/923U1tZaAgAAAAAAAAAABcnW3QMAAAAAAAAAAADtR6AfAAAAAAAAAIACRqAfAAAAAAAAAIACRqAfAAAAAAAAAIACRqAfAAAAAAAAAIACRqAfAAAAAAAAAIACRqAfAAAAAAAAAIACRqAfAAAAAAAAAIACRqC/k1VVVXX3EABkwRwFehbmJNCzMCeBrsWcA3oW5iTQszAnsyPQDwAAAAAAAABAASPQDwAAAAAAAABAASPQDwAAAAAAAABAASPQDwAAAAAAAABAASPQDwAAAAAAAABAASPQDwAAAAAAAABAASPQDwAAAAAAAABAASPQDwBAM8sOxFUXN7t7GAAAAAAAADkh0A8AQDPLDyV0IEKgHwAAAAAAFAYC/QAANJMwLYVTVncPAwAAAAAAICcE+gEAaCZhWoomCfQDAAAAAIDCQKAfAIBmkqYUIaMfAAAAAAAUCAL9AAA0kzAtRcjoBwAAAAAABYJAPwAAzSQsKUpGPwAAAAAAKBAE+gEAaCZJRj8AAAAAACggBPoBAGgmYYpAPwAAAAAAKBhtBvqnT5+u8vLyFv996lOf6orxAQDQ5VKWRTNeAAAAAABQMBxtveCNN95QKpVq+Pe+fft0+umn6/zzz+/McQEA0G3cNoOMfgAAAAAAUDDaDPQPGDCgyb+fffZZlZSUEOgHAPRaTruhuEmgHwAAAAAAFIa8avRblqVnn31Wn/70p+Xz+TprTAAAAAAAAAAAIEdGbW1tzimLr7/+ui644AK99dZbOv7447O+tqqq6pgHBwBAd3h8u1OGpCtGJbp7KAAAAAAAAKqsrMz6+TZL9zT29NNPa/bs2W0G+XP5wX1FVVUV7wXQgzFHkUm/YJ0MSZWVpd09lD6HOQn0LMxJoGsx54CehTkJ9CzMyexyLt1z4MAB/eUvf9EXvvCFzhwPAAAAAAAAAADIQ86B/l/96ldyu9264IILOnM8AAAAAAAAAAAgDzkF+i3L0jPPPKMLLrhAJSUlnT0mAAC6ldHdAwAAAAAAAMhDToH+t99+W5s3b6ZsDwAAAAAAAAAAPUxOzXhPPfVU1dbWdvJQAADoOazuHgAAAAAAAECOcq7RDwAAAAAAAAAAeh4C/QAAZECdfgAAAAAAUCgI9AMAAAAAAAAAUMAI9AMAAAAAAAAAUMAI9AMA0ArLoiUvAAAAAADo+Qj0AwCQgctmKGF29ygAAAAAAADaRqAfAIAMPA5D4SQZ/QAAAAAAoOcj0A8AQAY+u6FoikA/AAAAAADo+Qj0AwCQgcdhKEJGPwAAAAAAKAAE+gEAyMBrNxQhox8AAAAAABQAAv0AADRjSfI6DEXJ6AcAAAAAAAWAQD8AABl47IbCZPQDAAAAAIACQKAfAIBGTMuSIclHRj8AAAAAACgQBPoBAGgkaUpOmyGPw1CYQD8AAAAAACgABPoBAGgkYVpy2iSvXYpSugcAAAAAABQAAv0AADSSMCWHzZDXYSPQDwAAAAAACgKBfgAAGklalpxGOqOf0j0AAAAAAKAQEOgHAKCRRKMa/TTjBQAAAAAAhYBAPwAAjSRMSw6b5LEbilC6BwAAAAAAFAAC/QAANJKsz+i3GUZ3DwUAAAAAACAnBPoBAGgkYVpycnUEAAAAAAAFhFAGAACNJEzJYSObHwAAAAAAFA4C/QAANJIkox8AAAAAABSYnEIZ+/bt01e+8hWNGzdOgwcP1gknnKB33nmns8cGAECXS9TX6AcAAAAAACgUjrZeUFtbq7PPPlsnnniiXnjhBfXv31/bt2/XwIEDu2J8AAB0KWr0AwAAAACAQtNmoP8nP/mJhgwZoscff7zhY6NHj+7MMQEA0G2o0Q8AAAAAAApNmzmLr776qubMmaNLL71U48eP1ymnnKKf/exnsiyrK8YHAECXSlqWnMT5AQAAAABAATFqa2uzRuwHDx4sSbrqqqt0/vnna9WqVbruuut0yy236Iorrmj166qqqjp2pAAAdIG3D9vUzylNLTH1s+1OXTEq0d1DAgAAAAAAfVxlZWXWz7dZusc0Tc2aNUu33HKLJGnGjBnasmWLnnjiiayB/rZ+cF9RVVXFewH0YMxRNLd+e0Sjiu2q7O9Sv2CdKitLu3tIfQpzEuhZmJNA12LOAT0LcxLoWZiT2bVZumfw4MGaOHFik49NmDBBu3bt6rRBAQDQXVLU6AcA9EE/WRXo7iEAAADgGLQZ6D/xxBO1adOmJh/btGmTRowY0WmDAgCguyRMS876qyPdaAAAfcVGf1IpkysfAABAoWoz0H/VVVdp8eLF+sEPfqAtW7boD3/4g372s5/p8ssv74rxAQDQpdKBfjL6AQB9SzRlqS5BoB8AAKBQtRnonz17tp5//nm9/PLLOumkk3THHXfoxhtvJNAPAOiVkpbkqI/zOwwpSXYjAKAPiCYt1cbM7h4GAAAA2qnNZrySdPbZZ+vss8/u7LEAANDtGmf0ex2GIilLJWT4AwB6ubhpqTZOoB8AAKBQtZnRDwBAX5Iw1VCj32s3FEmS0Q8A6P2SpsjoBwAAKGAE+gEAaCRhWnLUZ/B7HAT6AQB9Q4nLIKMfAACggBHoBwCgkaSphtI9Pnu6dA8AAL1dhcummhjXPAAAgEJFoB8AgEbSNfrT/+9xGIqS0Q8A6APK3TYy+gEAAAoYgX4AABpJWZK9vveuz0FGPwCgb3DZDcW45gEAABQsAv0AADRjGPU1+mnGCwAAAAAACgCBfgAAWkGgHwAAAAAAFAIC/QAAtMLnMBSljAEAAAAAAOjhCPQDANAKj8NQmIx+AAAAAADQwxHoBwCgFV47Gf0AgN7PsrjWAQAAFDoC/QAAtMLroEY/AKD3S5iSy2aIKx4AAEDhItAPAEArvHZDETL6AQC9XMy05LZJDkNKmlz3AAAAChGBfgAAWmG3GSLeAQDo7WIpS267oTKXTf642d3DAQAAQDsQ6AcAAACAPiyWktx2Q+Vum2pjrHADAAAUIgL9AABkQbgDANDbxVOWXHZD5S6basnoBwAAKEgE+gEAAACgD4umLHnsUrnLINAPAABQoAj0AwCQhdHdAwAAoJPFUpZcNkMVbptqYgT6AQAAChGBfgAAAADow440403X6CfQDwAAUIgI9AMAAABAHxYz65vxumyqjdOdBgAAoBAR6AcAAACAPiyesuS2Sy67oYRJoB8AAKAQEegHAAAAgD4sWl+6R5II8wMAABQmAv0AAAAA0IfFGwX6AQAAUJgI9AMAAABAHxYzJbctHegn3A8AAFCYCPQDAAAAQB8WI6MfAACg4LUZ6L/nnntUXl7e5L8JEyZ0xdgAAAAAAJ0sVt+MV6JGPwAAQKFy5PKiyspK/fnPf274t91u77QBAQDQkxiSTMuSzSDTEQDQO8VTllxk9AMAABS0nAL9DodDgwcP7uyxAADQ43gdhiJJS0VOAiAAgN4pmrLkqQ/0Ow0pYVpy2rjuAQAAFJKcavRv27ZNkydP1vHHH68vfvGL2rZtWycPCwCAnsFjNxRNUcgAANB7JSzJUR/XL3fbVBszu3dAAAAAyJtRW1ubNXrxj3/8Q8FgUJWVlTp48KC+//3vq6qqSgsXLlS/fv1a/bqqqqoOHywAAJ3tZ9udumJUouHff9xn1wnlpoZ4CPYDAHqnx7c79eX6a9/f9ts1qdjUaB/XPQAAgJ6ksrIy6+fbLN1z1llnNfn33LlzNXPmTP3qV7/S1Vdf3e4f3FdUVVXxXgA9GHMUzfUL1qmysrTh36NsYQ0Z4FRlmbMbR9V3MCeBnoU52Tc0vvZt80ZV7rKpcpCrm0fVNzHngJ6FOQn0LMzJ7HIq3dNYcXGxJk2apC1btnTGeAAA6DbpprtNP3akRj8AAH1BhdumGkr3AAAAFJy8A/3RaFRVVVU05wUA9DoJU3IYTSP9BPoBFLI4PUaQp3KXodo4gX4AAIBC02ag/7vf/a7eeecdbdu2TUuWLNEXvvAFhcNhfeYzn+mK8QEA0GUSpiVnsysjzXgBFLL7lwcU4xyGNjRe4qYZLwAAQGFqs0b/nj17dPnll+vQoUMaMGCA5s6dq3/84x8aOXJkV4wPAIAukzQlR7PaPT6HoX1hAh4ACpM/bqo2Zmqwz97dQ0EP1ngpqMxlI6MfAACgALUZ6H/yySe7YhwAAHQ7MvoB9DaBhKnaOIF+5M5pM0TFOgAAgMKTd41+AAB6q4SZDnA0Ro1+AIUsmLBorIo2GW2/BAAAAD0cgX4AAOolTEuOZldGr91QhIx+AAUqYYlAP9rU/CpH4B8AAKDwEOgHAKBeMkNGv4eMfgAFrMxlqDbOOQwAAADo7Qj0AwBQL2FZcjZLY/Q5DEVT3TMeADhWFS4bGf3IG0tDAAAAhYdAPwAA9RKm5GiW0e+0GUqYhDwAFKZSl03+OIF+AAAAoLcj0A8AQL2kacmZ4cpImB9AobIZnMMAAACAvoBAPwAA9RKm1aJGPwAUKssixI/2cdsMxWhEDwAAUFAI9AMAUC9hKmNGP6F/AIUolpLcds5gyF+521AtvR0AAAAKCoF+AADqJU2rRY1+AChUoaSpIgfnNLSt+VFS7rKplt4OAAAABYVAPwAA9VrL6AeAQhRIWCp2EuhHdplKPJW7baohox8AAKCgEM4AAKBeazX6qVIMoBAFE5aKWb1EGzKVeKogox8AAKDgcOcPAEC9pCVK9wDoNYIJUyVOQ4Ykk8a8aEXMtORq9lRY7rapNsYxAwAAUEgI9AMAUC9pWjTjBdBrBOtL95S6bKqLE7RFZvGU1SKjv9xlkNEPAABQYAj0AwBQL2FKToOwPoDe4UjpngqCtsgimiHQX+ayqZYa/QAAAAWFQD8AAPUSpiV7hisjebAAClEwaarIYdSXYSFoi8ziGWr0222GOGIAAAAKC4F+AADqJU1lbMYLAIUomLBU4jRU4baphkA/WhFNWXJz7QMAACh4BPoBAKiXaKVGPwAUoobSPQT6kUXctOS2d/coAAAAcKwIZwAAUC/RSka/w0gvAgBAIUmYllx2Q+Uum2ppxotWxDLU6JdoRA8AAFBoCPQDAFAvaVpyZLgy+hyGQgmCZAAKU7mLjH60rrVAP1c9AACAwkKgHwCAeglLchotgx1FDpvCSUIeAAqTx2Eoxq4ktCKWoRkvAAAACg+BfgAA6rVWo9/nNBROkg0LoLAQ2kcuYqYlV4ZrH6F/AACAwkKgHwCAeqYl2TPU6Pc5DIXI6AcA9EKxlCUPpXsAAAAKHoF+AADaUOQwKN0DoOCQkY1cxFLpps0AAAAobHkH+n/4wx+qvLxc1157bWeMBwCAHsdHoB8A0EvFW6nRT+gfAACgsOQV6F+8eLGefvppTZ06tbPGAwBAj0OgHwDQW0VbKd1jM6QUTZwBAAAKRs6Bfr/fry996Ut66KGHVF5e3olDAgCgZyly2Aj0AwB6pXgrzXh9dkPhFNc+AACAQpFzoP+b3/ymzjvvPJ122mmdOR4AAHocn9NQOEGwA0DhsCyrSTNVj91QhAVLZBBNWRlL9/icHDMAAACFxJHLi55++mlt2bJFjz/+eM7fuKqqqt2D6m14L4CejTmKIw4fcqqqqrrFx4NJaXu1Q1X2ZDeMqu9hTgLHLpqSgjWOhnNaotah99dXa6A7/8Atc7J3O3DQqa2bW1776g7atSZhyu8h2N/VmHNAz8KcBHqWvjwnKysrs36+zUB/VVWVbr/9dv31r3+Vy+XqsB/cV1RVVfFeAD0YcxSN9QvWqbKytMXHk6alf8QCGT+HjsWcBDrGwWhKo82IKiuLJUkTHRGVlztUWeHM6/swJ3u/1q59YxwRDW7HMYNjw5wDehbmJNCzMCezazPQv2jRIh06dEgnnXRSw8dSqZT+85//6Mknn9SePXvkdrs7dZAAAHSFloUL0hw2Q5QpBlBIgglLxc6jZ7Vyt6HamNmNI0Kh8Tko3QMAAFBI2gz0n3POOZo1a1aTj331q1/VuHHj9K1vfSuvLH8AAAAAnS+QsFTsPNqOq9xl0+5QqhtHhJ6qtUVur8NQiEA/AABAwWgz0F9eXq7y8vImH/P5fKqoqNCUKVM6a1wAAHQ5whkAeotgwlRJo4z+CrdNq2sS3Tgi9FStXft8DkMHo+wCAQAAKBS2tl8CAAAAoJAEm2f0u22qoXQP8kDpHgAAgMLSZkZ/Jq+++mpHjwMAAABABwkmTA0vsjf8u9RpKJggaIuWspXuCSdZHAIAACgUZPQDAAAAvUzzZryGYVCeDBllK90TJqMfAACgYBDoBwAAAHqZYMJSiZNbfbQfpXsAAAAKC3f/AAAAQC8TTJhNMvqBfHnthsIpAv0AAACFgkA/AAAA0MskLclpI9CP9rPbDBHnBwAAKBwE+gEAAACgj2I5CACA3umpDSHtDqW6exjoQgT6AQAAgF6GRGzkKtuxwiIAAACFa5M/qT9tj3T3MNCFCPQDAJADQ5JpETpD50mYHF/oXDZJKY4z5IGjBQCAwlXkNFQdJqO/LyHQDwBADnwOQ+EkIQ90nsv+fZittehUZS6b6hKcx9AUWfsAAPReQ3x27SXY32cQ6AcAIAcE+tHZRpU49LO1we4eBnqJTMHbCrdNNTGzy8eCno3SPQAA9E6WpHNHefVnyvf0GQT6AQDIAYF+dDavw9CsAS69vTfW3UNBL1XuNgj0Iy9c9QAAKEzRpCW3zdDwIrv2sGu4zyDQDwBADoqcNoUoeYFOYlqWDEnnjfbo1R0RJamjjk5Q4bKpNk6gH0eZlsUDIQAAvdC+SEpDfemr/ECvnVr9fQT3dQAA5ICMfnSmmpipfm6bDMPQFyYU6emNoe4eEgpcprMVpXvQXCwlue2tF+ihdA8AAIVpdyil4UV2SdK5ozx6dUe0m0eErkCgHwCAetkCGulAPwEydI79EVODvOnbsskVTh2ImDoQIesG7WNZmRclCfSjuVjKyhroZ3kbAIDCtDec0rD6QP/IYod2BpPdPCJ0BQL9AABISpmWbFki/UUOQyEy+tFJ9kdMDfTaG/79lSnFenpjuBtHhEIWSVnyZgjelhPoRzPpQH93jwIAAHS0PaGUhvqOXuT7e2wkEvUBBPoBAJCUMCVnlkg/pXvQmQ5EUxroOXpbVu62KUGdfrRTKGGp2NnyfOa0GeI0hsaibWT0U7oHAIDCFExaKnYefb44d5SX8j19AIF+AAAkJSxLjixXRQL96EwHIqYGeUmrRccIJpo+2AGtiZuW3FkWuR2GWHQEAKAXGF3i0A7K9/R6PAEAACApaUoOo/VgR5HTplCCYAc6R23cVLmL3Fl0jEDCzJjRDzQXS0muLBn9Xha5AQDoNbLtYEfvQKAfAAClMxazJcCS0Y/OZEkysiw0AfkIJiyVtBLoJzkbjcVSljxZAv1FDpsiXPsAAAAKAoF+AADUdo1+ly1d4gAAerpspXtcNilK4Bb12mrGS0Y/AACFJ2layrKOj16MQD8AAEpn9Ger0U+2NToTRxc6UjBhqsiR+ag6vr9Tqw4nunhE6KnipiVXlkVuAv0AABSe6oipIRn6fzlthuIpruu9GYF+AACUznqgZiG6S6bbbafNoAkm2iWYtFqt0T9ngEtLD8a7eEToqaJtlu4xKN0DAECB2RNKaVhRy0B/qdNQXcLshhGhqxDoBwBAR0r3dPco0BdZVuYgWpnLkD/OjTjyl67Rn/mENtBr18EIxxXScmvGy/ECAEAh2RNOaZivZaC/zG1TXZwF/N6MkAYAADpSuoeMfnS9QCuNU8tcNvlj3Igjf8GE2WpGP9BYvI0a/TSiBwCg8OzOltFPIlGvRqAfAABJSTL60U0OREwNylBDs9xlUy034miHlKWsC5flbkM1MY4tpEv3uLNk9Pso3QMAQMGpjZsqd7W8vpe6bOwY7uXaDGn8/Oc/18knn6wRI0ZoxIgROuuss/Taa691xdgAAOgyCWr0o5vsj6Y00NPylozSPWivtsKycwa6tPQAdfqRbsbrbqMZb4hAPwAABccwWtkxTOmeXq3NQP+wYcN022236c0339Qbb7yhU089VRdffLFWr17dFeMDAKBLJCzJkeFmCOhsByKmBmbK6HfbVEvWNTrBjP5OrTiU6O5hoAeIpURGPwAAfUQpiUS9XpuB/nPOOUdnnXWWxo4dq/Hjx+vmm29WcXGxFi9e3BXjAwCgSyRNi9I96BYHoikNypjRT8YNOofPYVMkxbEFKZZL6R6OFXSRbYGkXtwS7u5hAECvVeayqS7Bdb03yyukkUql9NJLLykUCmn+/PmdNSYAALpcwhSle9AtDkZN9c8Q6KdGPzqVJVkWD3p9XayNZrweO8140XW2B5JadpCyYgBwLMws93clTkMBni96NUcuL1qzZo0WLFigaDSqoqIiPffcc5o6dWrWr6mqquqQAfYGvBdAz8YchSTtOGRXxG2qyt/6jdHhQ05VVVV34aj6pr42Jw8cdGrr5szH1Z79TlVV7eviEaHQ1eRwrvKE7Xpz9X4N97QdxO1rc7IvOXjIqc2bsh8rh7j2dbm+OueW77drS41dVVUHunsoQBN9dU6iMB2KS1ado9Vrd2+4rvflOVlZWZn18zkF+isrK/X222/L7/frlVde0ZVXXqk///nPmjJlSrt/cF9RVVXFewH0YMxRHLHOGdHYUocq+zlbfU2/YJ3Gjy/J2NgIHaMvzsn+wTpVVpZm/Fy/LJ8DWpPLcfPRAQmtr0no9LG+rK/ri3OyL8l2/snnNeg4fXnOmZGAKl2mKivLunsoQIO+PCdRmEIH45pZnFLlKG/Gzxf68wVzMrucSve4XC6NHTtWs2bN0i233KLp06fr0Ucf7eyxAQDQZXKp0e+xG4qmumY86DsoioGOlssxNbHMofX+ZKePBT0b5x/0JJGUpQq3jQbQAHAM9oRTGl6UpS4ferV2tR00TVPxOLXzAAC9R8Jqu0a/z2EonKSmIYDCZ7cZMomlIQccJuhK40od2lTHIiQAtNeeUErDCPT3WW2W7rn11lu1YMECDR8+XMFgUC+++KLeeecdvfDCC10xPgAAukTCtORoY/nb5zAUSlrq3zVDAoC85dNg12Gkz300Iu+7+Mujp5lQ5tC6moSmZymlCABo3YGoqQGeduV1oxdoM9BfXV2tK664Qvv371dpaammTp2qF198UR/84Ae7YnwAAHSJpNl2Rn+Rw1CY7eToQNGkJY+dUBs6TjhpyZfjMTW5wql1NQkd39/VyaNCT5XLFY0zFLrS2FKHXt0R7e5hAEDBMi3JRk+5PqvNQP9jjz3WFeMAAKBbJUxLjjbuh3xOQ+EEgX50nP3RlAaScYMOFEpaKnbm9nA3Z4BT/9gVI9CPrLjqoSuYliVDkttuKJbiqAMAoD14sgQAQFIih4x+n8OmEBn96EAHI6YGelu/HTOUXykWIJiwVNxWZ/F6w4vs2huhw3hfRr4feoqDlJoAgA6RSzI/zxe9F1dSAAAkJXOo0U/pHnS0/dGUBnlab5ZV5DQU5JhDHgIJM+eMfoNt3cgBRwm6wt5wSkN96euhzUhn+AMA8pNLAJ9n2t6NQD8AAMo1o99QOGl20YjQF+xvI6O/zGWTP8Yxh9ylM/oJzaLjEApAV2gc6B9RbNfOILuNACBf/rilMlf2UG+pyyZ/nKt7b0WgHwAApWv0t1XtwucwKN2DDnUwampgloz+cpdNtdyIIw8HIqYGZDmmgMY4u6Cn2Bc2GwL9E8oc2uhPdvOIAKDw7A6lNMyX/aG2zGWoLkEiUW9FoB8AAKWDHbY2ylgU0YwXHSyWsuTJ0gW6zGWTP86NOHK3O5zScUW5B/oNUSID2dErBF2hOpLSoPodbpVlTgL9ANAOu0MpDW/jPrCUHcO9GoF+AABy5KOeIbpYudtQLTfiyIM/bqrMlXvpnv5umw5FOcb6qlyOFJfNEOuN6GwpS3LUl1CscNu49gFAO6QD/Y6srylz2VRH8lqvRaAfAIAcee2GIiluitBx2jqaylw21RJhQ57yabI7tMiuPWFqYfdVuVzRvA5DERa50cXoFQ4A+auOpDQkS/8vSSp1GuwY7sUI9AMAkKN8gmdARyinWRY62XCfXXsJ9COLInazoRtQLQoA8mdKstuyP7OWuWyq4/mi1yLQDwAA0E3aWjoqdRmqI+MGnWhokV17QxxjfVUuy9deh6FwkmMEXavCbdPhaO6LkNsCSf1+S7gTRwQAvUOpy8bzRS9GoB8AAKAbJE1L9jaibDbDyKm0BiBJiRyOqeYGeWzan0cwDb1LrqV7yOhHZ4qlLLmaZaBOKHOoKo+GvG/tjWnh/nhHDw0ACkouu6E8dlGOthcj0A8AANANDkVN9fdwK4aOszec0lCfPa+vsdsMmTzr9UlJ05Ijh4WhImr0o5PtC6c0xNf0ejih3KGNeQT6dwRSKnVxTQXQd1k51jyjHG3vxpUQAADlVr4A6Ej7o6YGevILygLZ7AmlNLwo/2OKEG7fFEtZcuewBYSMfnS2fRkWKY8rsmtXKL/dRmU0mATQhx2OmernJszb13EEAAAAdIMDkZQGebkVQ8fZ3c5AP/qmuKncAv12Av3oXHvDZotAv83IfbfRjmBSI4rtmjnApeUHE50wQgDo+bgPhESgHwAAoFvsj5ga5OVmHB2HBzzkI5qy5La1HegvchrU8kWn2tNK2bFcq0v8Z19cJw9xaeYAp5Yfylynf3sg9zJAAFCIdodSOo77wD6PQD8AAEA3qCajHx0smLRU4sz/mEpnbFPuoq+JpSy5cogHkNGPzlYbN1XuahnVd9kMxXJYZNrkT2p8qUMlTpsCicyvv+zNwwomOM8B6L1I+IBEoB8AAEnUqEbXCyYsFbcjKAt0tKE+u/aGCID1NbGUJU8OpXt81OhHF8jUHPL0YW69vDWS19dnOqJ3BZMa4rVrGWV9APRiB6KmBpJE1OdxBAAAAPRgTpuhRK6FitGnWe08TIb67NoTzq/pJQpfOqM/l0C/TREC/egGcwe6tCWQVHWW81N1ONUksDXMZ9fuZk18/7Yzqu/OKdWyA5nL+mytS+qZjaGOGTQAdBPTSvc3Qd9GoB8AAKCLxVOWXDnehZW5DNXGyLZG5xlWZNNeAv0F6c09Me0Jte9vFzeVU41+t13U6Ee3+dq0Yj20Otjq5/9bHdfJQ9wN/5490KmlzQL6e8IpTSp3KtjKgtUbe2JacYhsfwB9h92QkiQS9UoE+gEAyIPTZihOwAPHaIM/qQnlzpxeW+6yyR8n0I/sYnksHjU31Gcn0F+gXt4a1qL9mbOU2xJNWXLnkNGfqaQK0FGsNrYilThtOnO4W3/clrmEz5qahKZWOBr+PaXCqbU1R4P2/rjZpHdJpp+3J5xSRXtPoABQgLL1NEFh42oGAEAeqFWMjrD6cELTKnIL9Je5bKqNc8whu73hlIa1swFbsdOmEOe1gmNZlkpdNm3wty8TOZ6y5M7xkCHUj85Sl7BU4sx+hJ053KPlB+OqybC7zWpWqsJpM9Q4H+Ofu6L60HEeSdKYEru2Bpouapr1gf8hPnY2AShcpmUpn3X5MpdBIlEvRaAfAIA8FDkMAmI4ZtsCSY0pzS3CVu7mRhxt2xVK6bh2BvpRmDbXJTWp3KFkO08PsRwz+iUa1qPz7AunNMzX9rnrG9NL9MDKQJOM/JqYqTJ3y2PYMKRUfUmKNTUJTavP+J830KXFzcr6rKlJalqFU3MHurSklRr+ANDTHYyaGujJPcRbyo7hXotAPwAAefA6DIXbG1UB6uXTLKvMZaNGP9q0O5TScAL9fcqbe2M6bZin3UH4XEv3AJ1pbzilITkE+svdNn1yjFc3LvI3BKf+Wx3TyYPdLV47scyhjf6k4ilLdsNoKD81vsyhTf5kk9e+vTemU4a6NLWfU2sOU6cfQGHK9z6wzGVTHTuGeyUC/QAA5MHnMBSiniG6ULpGf+875v7ff2oaSibg2O0Jtb90DwrTnvqH+kEem6rbUXJkoz+pcaWOtl8oSveg8+wNmzll9EvSzAEuXTujRHcvq9OqwwmtOJTQ8f1blsGbM9ClpQfjendfTKcMcTV83GYYLRbGDkdNDfDY5bQZYsMmgEK1K89Af6nLUB0Z/b1Sm4H+Bx54QGeccYZGjBihcePG6dOf/rTWrl3bFWMDAKDHKaJGP45RdTilwd7ccy3SNfp71414OGnqvf1xbahNtv1i5CSSsuRztD+Hx5BYeCkgKdNqyFI+vr9Tq9qRiZxIiYx+dLtcM/qP6Oex654TyvTvPVHtDqXktLU8hkcV27UtkNK71XGdPKRpxn86YSN9TU2aluyNTpt2I/0xACg0u/Ms4VhG6Z5eq82ngXfeeUeXXXaZXnvtNb3yyityOBw6//zzVVNT0xXjAwCgR6EZL47V6pqEpvbLrRGvJHkchmKp3nXMvb03rv83vaRFrWS037HG6Ad6bDoQ4YGvUKw6nNDx9eeRqf2cWlOTX6B/fySlAXksOPauMxB6kmjKkteR34KTzTD0tWkleuCk8oyfNwxDstILYs0XAmb1d+r9Q+n58v7BhGYNOHo9nlKR/1wCgJ7gUNRUP3d+iUR17FLvldo8Cn7/+9/rkksu0ZQpUzR16lQ9/vjjOnjwoBYuXNgV4wMAoEfxOW004+0gyw/GtbG27z1Qrz2c0NSK3AP9vdHiA3FdMMarHcH8y42gcwz12bW3HeVfkNnuUOe+l2/vjenUoelM5RKnTcE8H9b/vSemM4a1rG0OFBJXlh0pDptaLeuzrH6R+T/NavzPoyEvgAJm5Nj/S5JKnJTu6a3y3t8bDAZlmqbKy8s7YTgAAPRslO7pOC9vjejvu6LdPYwuV5ewVOrqu22SLMuSaVmy2wzShDtQHs92GQ0vsmsPgf4OEUqYuuC1g4p04rWiNm6qPI/MveY21iY1oSy3+vwSNfpReP5vYpE+PMLb4uOljbJYA/Gm1+NhRXbtDRP4AlB48r0PdNgM9bINw6iX+91dveuvv17Tp0/X/Pnzs76uqqqq3YPqbXgvgJ6NOQpJOnzIqaqq6jZfdzAubTvkUJVFbfFjkTSlgN+pgF+qcu9r8rnePicPH87tWGvyNTken4WgKmSof8ymqqr9SgYcWrK2WmV9e4PDMYumpFCN45iOkUhcWnHIocpYy3Nbb5+THe3Xux26cKCl37+/RfPLOz5oGDOlQK1DVVX7Gz4W8Tu0Yn21cil1blnS4RqnNm3K/Xg51IvOQYWgr8y5aKpzj61AKx8/fNip1Ruq6+dRdYvPcayjub4yJ1G42vOsUMjPF315TlZWVmb9fF6B/htvvFELFy7U3/72N9nt2e8i2/rBfUVVVRXvBdCDMUdxRP9gnSorS9t83eC4qSVWSJWVJV0wqt7rn7ui+sRUQ2/ujWn8+JKGraa9fU7GUpaGhAI5HWuN9cvx+CwEf1kV0BfmFKncbdNHSmM6HLc0d4Snu4cly7K00Z+U225odEneuTDdapM/oeOdCVWO87X7e4yzLP17ectjs9Dn5L48G30eq0DClK0uqKtnlOih1fnP9Vy8tTemj062VDns6Lw53RtVxGloxuC2y/GsOZzQKY6EKsfmfrz0pnNQT1focy4fD60O6GsneLv8nDvLCul9SR+ZbG8yjyRpbCygASOLVHEMO2bQu/SlOYnClDItDQj2necL5mR2OV+9brjhBr300kt65ZVXNHr06E4cEgAAXcvKo4slpXs6xn+rYzp5sEsTyhyq8ved3REbahOaUN6309f9jUqOzOjv0opD3VsPedmBuO5bXqc7l9Vp+aGEntoQ6tbxtMfuUErDi44tmG0zjDYrKeVzruwJNtYmdOm/D2tLXdedY57eENIXJvrk7cQm2u9Vx3TCoKYB/en9nFp1OLeeJ//eG9NpedbnN1R4f//uYlpWlx5zhWpPKKWEqW5ZWJ0/yKXH1gRbzCMpXcN/KXX6ARSQ6oipwd6uS2pAz5ZToP+6667Tiy++qFdeeUUTJkzo7DEBvY5pWd3W2OkPWyNKmjyYAdmkLClLP7cm7DZDPWFKJU1L1QVaTzuWStdnt9sMnTjYpf9WH/v5MdUT/ig5WFOT1PR+vSfQH81z0etgNKV+jbIkPQ5DsQyVTWpipt7ZFzvW4WUVSJi6falfm+uS+vq0Et08p0yfHudTkcNQINH1NZqPJYi6K5TScccY6G9LbczUea8d6pb3pj2iSUs/WxfS787qr6fbuXiz2Z/UX3dE9OzGkB5cGdAft0Wyvr4ubsoftzSiOB249DlsCmZ4v3YEk/rNpnC7xiRJ0ZQlr6PpRWuwz679kdz+NjVRU/09+R0vHruhaGFecjrEK9si+k+O56RH1gR17/t1rfZo6KwFoELzs3VBXTG5qFt+dmWZQ+PLHC3mkSTN6O/U8kO5LZoBQE/QEQkf6D3aDPRfc801+tWvfqUnnnhC5eXlqq6uVnV1tYLBYFeMD+gV/rA1oodXd/2c2RFMavGBuG5Y5NfKbs6YBHqyhCk5be1rNWhZVrsDdKZlKdTOoNk/d0d1z/t17fra7vb67qjOrM8mHVFk187gsUePvvRWjdbkmM3anbYGkhpd0r4b8Z6YTfvVd2r0xLpgzmP7x66YFhzXtEyCzVCLBelnNob02s5ozhnK+frPvpjuXlanr0wp1v+O8zUJ9pw7yqtXt3ddk2jLsvSTVQHduiS9q+Ce9+v0x20R1WZaAWnFnlBKQzu5PM2DqwK6e36Zfr6ufUHzrj5+H1gV0NenF6vYadO4Mkdex5JlWXp6Q0h/2xXVYK9dpw1z64opRdpSl9SeUOvnq6c2hHTpxKOByxMGu/Te/pb3Xy9tiWh/JKU39+R/nB2Opo6pmXckacndjkPF6zAUSRbGIk+ublrk11t72w7e/3tPVPsjKb26o+2/15t7Yurntum7c0r1+NqW9/7xlKWr36k5poWe3mDR/pimVDhV7Oye8jg2w9CTp/fL+Dmfw6YoizEACsieMIF+HNXmlfWJJ55QIBDQeeedp4kTJzb899BDD3XF+ICClzAtLT2Y0PxBLu2PdG0q1L92xfTlKUW6d36ZFh+I6wcrAq1mFwF9WcK05MjjWbMuburu99MBuXuWB/TjVe1byHtodVA/XBnQve/XaWcwv23+71XHNcRnb/dCQXdatD+u+YNcktRQm/9YrD6c0ClD3Pr91p4fOLGsdIAhX8VOQ4FE156/TcvSZn+y1YDz6sMJLRjh0aQKp+56P5DT7rEqf0KVZU3LNEytcGptzdEgbDRpqS5u6ra5pfr1ppD25rBzJdcM2aRp6YGVAW0LJHX3/DINyrDNeXKFU+tr2w4K18ZMff3dmpwCha0JJ019d3GdTh3q1m3zyvTd2aW6dkaJxpTY9fi6dFbw7maB5WDC1LZA0/NF3JTcuW5Laoe/7Yxo3kCXpvVzyiZpV57nK9OydOE/DineRcGzv+yIaFqFUyPrM+s/O96n3+YYWI2lLN2xrE5jSh366tRizR7o0shih3wOmy6fXKQn1mc+39fGTIWSloY1etCeN9Clxc0C/ZZlyR839fXpJXq3Oq4qf6LF93lgZaDV9+rn60P67PjMtfUdtvT1LJuF1TGdNCS/sj2S5OvAsnU1MVOrDidaHWs0aemtvTF9f3md7lxap/uWd/yi9j93RXXyYJf+Wx3LWj5s2YG4lh5I6PLJxZo9wJm1pMvecEpv7Inqs+N9GlnskN2QtjYr4fOjVQHdOKtU+8KpJue9QnMsuxJMy9JLWyK6cKy3A0eUP0eWBA9D6XH2ZDUxU69uz77LCEB+IklL/9rddckeHWUXGf1opM2CeLW1tV0wDKD3+vWmsD4z3ie3Pf1Q8dnKrtuiujOU1Mji9M+7bFKxttQl9Yv1QV09jSaiKAwbahMaVeyQJ8PW6o6UNK28MvrvOaG8yb/vXJbeop9pC3hrXtkW0egSh84b7VVd3NRzVWHtj6T0zeklDfXLWxNImPI5DH10pFd/3hHVp4+hAeexOpKlm2vAPpq05LIbTYLdQ3w27QmlmgTIpHQQ1JfDCszvNod10+xSPb4u2OWNN/NxLBnN5S6b/HHzmDJ5cxFOmnp4dVAJUzIMaXSxXZvqkvrenLIWr31le0TfnF4sn8OmIV6bbljk182zS1sdYzxlyWEzWhwr8wa69NedER3fP73488KWsD41ziebYei7s0v1vcV1um1uqYoyZH5alqVfrA9pgz+pUcV2fXlKcatz+UAkpftXBPSlSUVt9kno77HpQCSlga3UO7UsS/evqNOtc0r11t64bl7s11emFOf1kLUnlNIDKwO6ZkZJk2PWYTN0fH+Xju/vUm3M1HNVIe2PmHLVB/KLHYbsNqnIYdMXJ6Wv8R2wXiYp3YMkmDCbZNnWJaX/7Ivr9nnpY+BLk4v0/RUB3Tq35THRmtd2RvXB4R79YVtEn+rg85VpWToYNWVakmlJh2Omlh1I6LtzjjaXc9gMzR7g1MLqmE7M0qy2OpzS91cE9LVpxRqVoW54idOmkcUOrTmc0NRGJbgsK72A9NWpxU1e77YbLYLZC/fHdUL9Qud3ZpToxkV+3TCrVBVum/68PaLF++M6d5RXP10b1NenN71f21qXVLHT1upxObHMqQ21SU3LUh7svf1xfev4/O8Dmwf6X9kW0arDCdmM+qCo0ju0PjDErTEl9qzXhO+vqNP8gW79ZUckfa7R0WPYUHqH3byBTl09rUReh6En14e0riahyRUtf6+D0ZT2hU3ZjfTuoHKXTYPbuAaEk6Ze3xPV3fPL9ZGRHt26pE5lLluLWvFV/oT+vCOim2enj6XzRnt1+9I6zRnoavE9k6alB1YEdNu80obf/UuTi3X7Mr/unl8uKT0PJpY5NabUoa9NK9b1i/y6ZU5pXlntSdPKGqDubKGEqZ+tC6WbTtf/nhVumxYc51ZlWW5l6Z6rOnqO76mO7+/U33dF9eER+S9GrDwU14GoqdOGurP+rYIJUw+uDOpQLKXrZpbmfe/yi/Uh7Q4ldcZwd073SutrExpf6ujW4wfo6d7eG9MT64P64HBP2y/uQfxxU2Wu9u9O74jEK/QcXd/5BuhDIsl0NuTnJxTJsqwu3aZ7JKDS2NhSh2pjPTs7BWjswVVBfXC4WxeO7dxAdsJUXhn9zf3vWK9+tyWsz0/IbSFv9eGENtcl9f/qgy2lLpuumlqsXcGkfrs5rC9PKc769X/eHtW5o7yaXOHUC5vD0rj2j/1Y3bs8oJQpyZD6uW06ebBLMwe0DIIc8Y/dUZ3VrHTLSYPdem9/TJ8Yc/TvXBszdcHfD+o3H+qfMev6iE3+hEaW2OWyG7qkskhPrg/p2zM6bzHzp2uDctrSi6f5SjfKat+BVuayqTZuaUS7vjo30aSlW5bU6dvHNw08P7omqJ3BZEPdcSmdzWlaaggujC9z6vqZJbp9aZ3uPaEsYyDh3X0xnTSo5bExrMiuveH0rgHTslRVf92U0t//2hklunNZnW6fV9YkiB9PWbpveZ3OHO7R5ZOLVeVP6KZFfn1qnE9zmwXiFu+P6w/bIjkH1c4b7dUftkX0pcmZ/87PVoX1sVFe9fPYdf4Yrz50nFuPrw1pdIldn8zhfLXsQFwvb4vojnllWRcIy922Vhfn39ob093v1+k7HXi8D/XZtTecUmXZ0ffo6Z1O3Xba0Z9R5LSpssyh9w/GNSvLXG/snX1x3TmvVDcvrtP/ju24B8pF+2N6aUtE48scchiGbIZkt0nfmtHy73b+GK9uWuTXCYNcGX/+P3dF9c6+WKuLSkdcXOnTLUuOBm8l6cergvrEGG/GAHOx06a6Rot0/9wV1fWz0oFjh83QjbNKdeeyOrnthk4flt7ZIUlLDsS1oTahiY0WpX6xPqTvzi5t8TOOmN7PqUUH4lkD/UlTDYtG+fDajwb6w0lTSw4cXfyR0oGCbYGU/lMd06+qUnLapWuOL5G92bng1e0RnTHMU38dyC2I+rkJLd9zKb277t73Azp1qFuW0r1aFu6P62vTipucr5p7dE1IV9VfZ22GoZvnlOqmRX5dM6NEJU6blh2Ma8mBuGpipr47+2jg3mEzNKbEUb8zqel7/MOVAX1pclGTgKvHYej0oR79bWdE0/u5tGh/TDfXL5rabYaunVGi+5cHdNvc0pzmxKvbI3pzb0wum6FvHV+sfnn2WchHbczUv3ZHdVyRXSOKHRrktemlrRGtPZzQ5ZOLmry/NTFT9y2v021zy7LuLFpfm9DzVWHN7O/MuFjSk3xslFc/WxvUn7ZH9LFRuQf739oba9i1eO/7ARmGdOZwt+YMcDWZd+9Vx/Tytoi+Ob1EpS5Ddyyt0yWVRU0WELM5UL9L/BvTS/T0hrCunJr9nuSdfTG9uSemlGXJazf0iTFejc+yMLM/klIkaWl4kb3TFgbWHE7o2aqQxpY4dEUb973I395wx5T0+8++mNbWpHc19QWLD8R1wRifVh6KNySgFIr23Fsd6b/jJTLcq/DnBDrRUxtC+r+JR7Lt0hmsiTwzh9tr4f54xoCKvX5bd1eMAZDSGQalzpZZvG3ZFkhq7kCnVh1K6MKxnTOusvrAy7HOiYnlTj1XFc4pI+JQNKVnN4Z0zwkts2GPK3ZodyjV5vfZUJvQZ+pLNwypD8wdy818LGXJYahFQKYtv6oK6YRBLp1Zn/VyMJrScxvDSpjSvAznHykd4PzenKaBqskVDv1hW0SfGHP0Y7/cENLPTq3QI6uDDYGvjGPYFNa1M9Lfr8KdrqsbSphZA3Xt9cS6oMaVOrS1LqnlB+NZFzQy+fP2SMN7la9ydzqjf08opb/vimp7IKkSl00XjfO12AnRHvGUpVuW+PWNacUtsgovqfTpkTVB3TDr6N/tbzuj+uiIpr9Lf49dX5pcpEfXtMxE9sdN/WVnVPfOz54F/trOqM5u9n0H++y6bFKR7l8eUMqyNK3CqdkDXXpkdVBfnVbckIVbWebUvSeU6fmqsP62Iyp7/SFgWtJAj013zsstmCZJI+vnYiarDyfkj5s6qVFmeLHTpm/PKNEv14f09t6Y/mdo61njL28Na3copdtzDO615tShbg3x2nTjIn+H7fSYXOHUz9eFVOG2yWakj4vjS01VNNtldNE4n25c5NfM/s6G3+FwNCWvw9Zi4WL5wbhm1L9u3iCXFh+Ia/6glu+PaVk5Z/juCaX0s3VBTalw6p4TynL6Opth6IxhHv1tZ1QfHuFpGHcsZenBVQFVljpy2qXgtBk6ebBbb+6J6rRhHj2zMaRp/ZytLnqcNNilhdVxLRjhUSSZbkTe+HpT7rbpupkl8jgMlTQ6b31pcpFuXOTXvfW/33+rY5o1wJl1h9uYUrt+u+VoqZhtgaT+tD3SUPbLkDSpon2Pf0VOQ5H6ci1PrAvp8mZNVA3D0JhSh8aUpr//upqEfrAyoOtmHj1vBBKmFu6P644s5/RMnDZDJw5y6809MZ1W39/Fsix9f0VAN8wqadJY+KMjvfrekqPvW3NrDidU7jJ0XKNAtdNm6JY5pfr+ioCKnTbNHuDUpROLMs6ri8b79IMVdQ0Be8uy9NDqoE4c5Mq4U2jBCI9uXFSrf+6Ktfi9B3ntOus4jx5dG9K4Urs2+ZPyxy0dOuzUTCuk04e5NbLYob3hlB5eHdRpQ926/8Ry1cZM/WhlQCcNcbUr4zwXT6xP//zamKm/74pqbzilDw33ZNxBWOG26bJJRXp2YyhjMHBPKKWfrwtqZLFDN88ubddCU3e4Ykqxnq8K6TebwrqolXJZjf1tZ0Rb6lK6pn7x9dShbsVTlv69J6YHVwUUr6+CF09Zqixz6J75ZQ3nobvml+mHKwLaE061SIbI5MkNIV0xuVgVbpsOxUwFEmaT80djh6Mp/W1HVHfUXwPr4qZe3hrRi1siDYuOjQUSpu5fHtDMAU7tCaWUtKREytJNs0s7JOhfHU6fv0cUO3TnvDI9tiaoTf5E1oUH5Of3W8L64/aIvje7TOPK2h/yS5qW/rAtIl8n76zuKSzLkmlZOm+0Vz9aFSi4QH97lLnSyQheR8/cDY32IdAPdBJ/3NShmNnwwCNJJ9Y3ZTulHbVR8/Xf6pi+Ob1llt+sAU69fzDzQ3ZvdyCSktdhdFvjr0KSbzmWg9GUBmTILAskTH1nYa3mDnS1mhnbmt9tDuuKKcX6zaZwxrIux+JgNKXr3/NrdP2Dvs9paEg7M62POGOYW//eE9MZWQK5lmXp/uUB3TS7tNXA1JHzRGvlJarDTcuJXDDGqxe3RlqUjMjHD1cGdFyRvdUdCc9uDOljo7xNSgot3h9XIGHps41+3wEeu74+vVg3vOfX3IHOFsfPoWhKRU5bi4/bDEON9xodjqaUMC2NL3Pq+P7OVgOnO4JJDfDYmwQWPzPep99sDjfJuK8Op/Sn7RGdPMStyeWOdgVXn9kY0hBfOihjWpauf8+vyjJHkwWFaNLSi1vDuni8r8XPiKUs7QqlNLa0fbdew312/Xh1QNMqnFowwqPRJUU6HE3pV5vCOhAx9eGRniaB53wkTUu3LvXryqnFTYJfR5S6bHLbjSbzfNmBuD4+qmWAYGK5U/+tjmvx/njDYo9pWbrn/TrdMLNldu8RA+pL5by7L6475rX8vuPLnLpptlOWZWl1TVJ/3RHVd+eUNizWHWEzDH0ux501bRlZ7NC2QLJJOY9QwtTTG0OtLlj830Sf7lqWnk9jmv2tTcvSj1cFNb7M0WEl9CaUO3XdzBJtD3RMD6Dp/Zy6/8RySenjoi5u6tDO/S1eZ7elS4fdsqROrvraLRVum3YGk7prXlmTv/MftkV0Y30w6ZyRHt25rK7FPciDKwPyx00ZRvoYOnOYWweipv69J6ZD0ZQMw9CR2KAlqcRh6JoZJXlfz886zq1nq8K6+/1Aw8dqY2aTBaNcnDPSo+vf86suYcllM/ShLIG52QNcemBlQAtGpDO7my+QScpYisdhM/TZ8T49szGsz0/w6Q9bI7o3wwJxYzbDUNK09Or2iBbuj2tksV0Xj/d1SOa3124omLRUHU4pbqqh/0FrJlc4tS+c0tMbQvpCfdLLI6uD7b5WfWxU+j3/n6Eu2QxDz28K68MjPE2C/FI6i/6z43365YZQi51XpmXpqVbmb5HTltNCj9dhqNxtq2+AbdODq4KaO9CVdXHvq1NLFEtlLu/3P0PdctqOLmaUu22qqqqWc4hbb+yOaWcwLJddumHW0eO93G3TbfPK9OftEd2+1K9vH1+S8+J2PGXpnX0x/bc6rupISrfPLWtRLvBgNH0+ab4zKpvKMqd+uynSZPeKlL4uPrAyoNvnleZUXqanubiySH/cFtEv1gez7uR7cUtY4aSlq5od3y67oQUjPFrQaN7HU1aLxQ6bYejamaV6viqk25f6dfZxHs1vZffR3nBKHrvRsAB76cQiPbU+pK9leO47siB2/ayjC8ulLpu+MLFIb+6J6ldVoRZlZX+4IqDrZpY0OS+tq0no8XWhY7rXlNI7C97YHdXXp5c0XL+/PKVY313c+uJcoTkcTXXqbpu2rDqc0M5QSk+c1k83vOdvc5daNs9uDOtzE4q0ZH+8xf1QR9geSKqfx9bqItWxaE+Zs/W1SU0sd8rrMGSamedqb1PqMlSXMDVYBPp7EwL93agvnDgK3TffrdG3ZpS0+TCTyS/Xh3TZpKY3Th8Y7NaPVwe6JNAfT1kZt8+eNNitX64P9clA/33LA5pc4WhXyY2+xLQs3bG0TsOL7Dlt0/z3nqh+sT6kb0wvafJQaFqW7l5Wp7vml+mP2yJNAn9tiacsRVOWylw2XTDGq99sDutrHdhb4qUtEd06p1THFTsayg2UtLOu4RFnDHPre0vqsgb6/7ozqo+O9GTNvl1wnEd3v1/XaqD/5W0RfWL00Qy+wT57wxbu9thX/8C4sTaZcSfBov0xxU1LD68OamRJOmi0P2rqle0R3T63ZUDWZhj66EiP/rIjqnMabXe3LEs/qH/YzKTUachfn+725IZwQ+3xC8Z4df176XIbza+Zz24M6+vTmx6jY0sdempDSikznTn7+u6o3tob0/9NLNLSA3H9fktEliyNKHZo9gCnplQ423wQ+O3msIodhs6t/31shqH/d3yJHlgZaMjq3BZI6uHVQc0Z6KrfndA0++9Iv5b2Glfm0E8+UNHkY/08dl09rUQp09KrO6K64b1anTfa2+LYsSxLiWblOhKmpTWHE1p2MKE1NQl9ZUpR1ge4z09IBxy/dXyJdodSGlrUeg3uL0zw6YZFfk2qcKjEadMja4L6TBvBxvkDXfrp2qBmD2i5QNSYYRia3s+p6TmWNzgW54326PmqcMPuhE3+hB5dE9J3sixYGIah62aV6Mb3/Lp5TrpfgWVZentfXH/eHtHFlT7N6OAMsf4ee4tgZ0dw2Az189h1qJXPnzbMrYnlDg32Hl28W3M4oQdXBRvKZ+0OpTTAY2s49hw2Q4O8du1u1DTuT9sjGlOa7ldiWZY2+JP63ZaIBntt+t+x3lbr0beHYRhNFjQty5Kl/BtkG4ahT43z6b398RaBveZcdkPJ+kzeFYcSOn907hnYMwe49PddUT28OqhPjvXmtEg5Z4BL/Ty2Y94x0pzXYWh/1NTP14X0tem53UedMdyj56pC+seuqAZ6bBrqs7e7h4phGPrkWK9e2hLR1H5O1URNXVKZ+To5c4BLb++LNSl9tDOY1E/XhvT5CUV5715r7nOVRXp8XVB2w9DJg106uY37+rZ6d2S63o8ucWj0xOzPH+eO8mreQJduXVqnL04sytjD4AjLsvTY2pD8cVP/M9St62aWyB9P92T5brNddr9cH2q4Bufjssnp8nnfbNQD4gcrAg29XArVeaO9enNPTLcu8evaGSUtFvgfWh1QZZlTn5+Q2zU+2/P/xZVFSpiWXtsZ1a1L6jTEZ9clE3xNAqFPrg/p6mlH5+DwIruCSUu1MbPFos1TG8K6YIyvxaK4JJ02zKOHVwe04lC84br08tawThnibnHenVzh1Ft7Y1p1ONHu6+/C6pjeq443KYclpd+Pz4z36dmN4YZFwY6wN5zSEG/L5JLWxjbUZ8/YmyUfoYSpS14/rDvmlXVLeaqamKnn6ncN24z0gvi9ywPtuh4cjqa0O5zSpf2cGuy16XdbMicV/WhlQGcOd+d9b/PHbRFtrE3Ibkv3BhrkteujIz3tirs098LmsP6xK6ofnVyeV0LAP3dHdXH9vfrZIzx6bVc0r9Jd3WVLXVJD2nmvVOayyR+ntHNvQ6C/mxyOpnTx64f1l48MoPFFD7W2JqFZA1x6aFVQ180qyZit3Jp4ylIgYbYopeFxGIqlOv9EujvUehmPMpdNgYTZ6WPoaf66I6KzR3j07r5Ydw+lRzMtS3ctq9MFY3363eZwm6UUkqalv+6I6pkz+ukHKwKKp6yGB96frArqs5VFGuCx64sTi3TTYr/GlzlalIDI5NUdEZ0zMn1jNdBr18GI2WY5m3jKUnUk1SLDO5M9oVRD5vKRcgPHyjAMjS91aGNtIuP2/ZRp6e29sRaNfJtz2NK7TjI9sEnpwHzz3Q3T+jmzPnxtCyT19t5YxkznX24I6copxVp+KK5/7Y61yEx9ZVtUt80tld1maPnBuG5c5FcsZenuE8pa/XucNsyjG96r1dkjPA1B9Oeqwvr4aG/Gh01Jmj/IpcX74yqJp/995JxrGIa+NLlIT6wPNQmora9NqNhpZMwC+uhIj17Znt5CP7LY3pCleSSQbVmWdodSWnowob/siCqQsPTFiUUZtzf/c1dUsZSlTzd774b67Joz0KU/bY/IbTO05GBcd88vk8tu6JbFfp05/GhpqHjK0pa6ZEMpt45mtxn6+Givzh3l0R+3RXTjolrNHuDS1rqkYvWNLhs3BbUk2Q1pSoVTHx7hySmgM8BjVyyVzvB+aUtYn61sPaBhGIa+dXyJHlgR0EmD3Rritbf5ADitn1NXv1vTpDxQd+vvsetQ1NT+SEo/XxfSQI9N95yQvQa1lM7MvWFWie5aVqe5A11aeTihDwxx9ZpsxcaaB22n9nNqayCp328J64KxPv2qKtRiN9dnx/v0xPqQrplRonU1CW2sTTYsDBiGoUnlTk1qo1lyRzEMQ+39i8wZ6Mo5kFPqMlTlT2iAJ7egU2NXTyvWT9fmnqBxTicFJYocNi07ENHIYker5/FMLqks0g9WBLQjmNSDJ5cf0xjmD3LrT9v9WnwgrnvaKAN25ZRi3bDIr+/NKdVT60NKWdJNs0s6JOBc7k5noM4Z4NQJ7dxJ1VEG++y6d36ZHl4T1IpDiVZLzPxkdXrnwQcaLUr099g1vszRpEn1gUhKNsNo1+LhUJ9dKStd332Q166Xt4Y1b5Ar406xQnPaMLcmlTt069I6/d+EdC395Qfj+tWmsK6eVtwhwckjnLZ0YsG5o7zaFUzqwZUBDS9y6HMTfNobTqnMZbSYg1+cWKRfbgg19H6SpKUH4oqkrKxJNldNTe/CHFXsUF3C1PraZKvX4csnFen6RX7dOa/t62BzSw/E9e89MV03syTjOXDWAJf+uSuqXcGkjit2KJZKl4x5/2BcV0wuzns3ZDRp6ZbFflW4bbpovK/V0mqxlKUfrwpoRLFDSw8mVBszdfmkojYberfmyQ0hPfY/FXp0TVBTK7KXWutoKTO9e/LGWUd3DQ/x2fWRER49vTGc9z3oo2tDumpK+msGee06mCGpaE8oJYch/XtPTNsCKZ2Xw0K2ZVl6dG1Iw312XduotNu+cEo/XRvUp8f5si5atmVdTUJ7QindPb9Mty9NJ5zlWp61NmY2JKXMHejUrUvy69HRmWpipspdmUvhPlcVaigZlq9SZ7qcF3qXwr/qFqjnqtKZfe9Wd00Zl1zlW66jN/vDtoi+Ob1YF4z16tYldbp1bmnO28rSWbuZLwojihzaHkgec8ZANv/aHc26jdxlTy845HuTVqjCSVPv7Ivrrvll2hVMdcrWw87y4pawnDYjpxunY2VZlu5eFtB5o72a3s+pg5F0KZpsNcWfq0pv6TTqs0Z+sjqohGmpNm5pbKmjIfBsGIa+M6NU9y6va1KTtDXvH2ya9Th3kEtLDyZabCO3LEsrDyf06o6oTEsaWZwOzEVS6cymSyp9Leorrq1JHNMNZDafGufTAysDuml2y+//4tZIzk2FPznGqxe3hFvsqKjyJzQuw8POR0d69JNVwRaB/gORlH6+PqQyl01jSuwttmjvDqVUVF+G4LShbt28uK7JuWPlobimVDgash9nDnBpRn9nfdOm7H/DiyuL9KtN6QbFm/wJHYya+tyE1q93swa49JNVAR085NB1H2j6Po0vc+rP26NaWB3TsoMJHYikNKbUoS9NzvzQcsIgl25eXKdLWwneG0a6PvNxxeks4oRp6buL/Pr2jJImjX831Ca09GC8SY3pxj460pt+QCyyN5QmkdKBuUfWBBs+9tvN4Yx1jTuazTD0iTE+fWyUV+tqkzp3pLdDHzIvqfTp2aqwDkXNNhe/B3ntOmmwW8sOxlvdxdGYy27olQ8P6LSmf+110hCXntqQLleQaeGtNf08dl05tVi1MVP/2wV/+57k3FFePbQ6oHf2xZSw1OJ9K6/vpbE3nNKTG1ovg9SbnDzErWsX+vXoKRVtv7gZn8Ombx3feQ3Gc+V1GPrn7pje+Fj+i3HfOr5Y/njuPRiyuXJKsZy2tnvKOGyGvjy5SLcvrdNXp+ZXmikXbe3k6Ep2m6FvTC/Rv/dEddMivy6pbBooe3h1QDP7Nw3yH/Hpceldc7Prm8X+ckNIl7cjm/+IyyYV6ZE1QX16nFeb61K6ZkbvOf8dWVR5dE1QT20MaXK5s9MXcI8rdujmOWVaV5PQrUvqVB1JZVwwG+yzK2FaemhVoCE7d0SxXV+Zkv1vaTMM3TCrVPe8XyfTUtb+GXaboa9OTd/f5HNOWn4wrr/ujOqmWZmD/Ed8dVqxbllSp6E+u0IJUx8f7dUFY7x6YGVAx/dz6iOtPFtn8vK2iL4+vUSTyx367eaw/rA1os9U+jSmxNHw/LuxNqGfrw81OT/Uxkz9Yn1IDpv0tWnFef1ta2KmQglLo0oc+mr9feC32xl8zZdpWbpneUCXTixqcc09eYhba2oS+vWmsM4d5ckpnrHiUFzHFdmb7MQ8rtiuncFkk0bcz1Wle0WUu23647aIfrIqoKuzvG/xlKW736/TR0d6WixeD/HZdfPsUt202K9vHd/0fjyaTC9i2Ix00uKHjvNoakXLUpzBhNlwX2G3GfrKlGLdtaxOt8xpe0dDbaxp2THDMDTAa2tYuGxNOGm2uoBcEzP1zr5YQ3k2t93QSYNcGa9fz1eFtLA6ri9OKmqxMPXq9oj+sSuqDwxxt7in3BlMqp/b1u5F7FKXTXvCiXZ9LXquwoh09TKRpCV/3NRXpxbrzmV1HR7o//WmsIZ4bVnLR7Tm+ysCGl3i0Kf62ENpc5GkJctSwwnzxlklun1Jne6YV5ZT0KS12sVSuj7sa7uinVo+ZnsgqdFZahTPG+jSkgPxjDf8Pd0PVgQ0rtTeoixGNo+vDenL9Te654/x6ukNTbcVZxJImIomrXaVDAgnTa06lDjmLK/N/qS2B1IKJEydNtSdV5CpMdOytPJQQm/siems4zyaliHrO2Faum95QOeO8jQExk8b5tatS+paDfTXxEztCqX0fxOPBvO/Mb1EP10blGVJVzZ7CC532/SpsemmnldOKW71Ib3Kn9D4sqY3b2cf59H9KwJNAv1V/oSeWBfS/EEu/b/pJS0Cz0nT0ncX+zW9X9NSIH/aHtHVnfSA7nUY8tiNFot58ZSlVYcSOQd7R5U4tDPYsinvK9uiLRogSulzVcJMZ6lvDSS1pS6p7YGkPHZDV005GqB8dE2wSRPZX24I6ev1278Nw9DkCodWH040HCMvbYm02NJvGIa8Odw9TOvn1EtbwqqNmXp8bUh3t1Fb2m03tC9iymMoY4mXL08p1ktbw7pwrDfrDfeRMd6ZR/DQaTP0vTmluqV+UbfYadPhaEq/WB9qM2v0Gxlq4g702nVckV3vH4xrWj+nNtQmO6xufC4cNqNTStuMKE43Is6lUaCkFnWJ25LPzrmu8uER3nY3uxxd4pC6Pz7bLa6eWqxv/bdWX2+l5Nr/jvXq8jcP67kz+x9zGZVCMLO/UwM9tg7tNdPVBnhsevR/Ktq1GGczDFW4O+bvnM97OL7Mqe/X95zoC04flu7V8ptNYf1qU1gXjPFqYXVcUyucDU2MmzMMQ1dMLtbj69KZtEdKdrVXudumfm6bfrI6qB+eVN7u79NT2W2Gvja9pMsTpiZXOHXX/DLVxc1WS5F8Y3qJQgkz779fudumz08oksuuNp9zR5c4NMBj07MbQ3LYDO2PpBRKWhpT4tD/jvU2OT+kzHRPjEDc0o1tBPml9L3s1VOLVeqyNdn9e93MUr28NawfrQzo69Naf344wrIsra1JNJRL/GxlkYIJU3/aHtWft0cVr9/dWOIwdM/8siZjLnfb9O0ZJVp9OKG73w9kXJyoiZmKJK0W56In1gUbyvaOLHZooNemJQfiWXtd/GdfTCOL7Rl3vcRTlp7ckE7WGeaza3iRTSOLHS1KP9XGTN27vE6XVLZevuvySUVafiihn60NKZRMVx2wG4b2hFMNu9oa/5rV4ZQeaDZ/zx3p1e+3Rhqe7+riplKNFvPPG+3V8oNx3bTIr+tnteyhVBszddeyuqw9cew2QzfPTt+P3zGvLF0yLpLS/cvTJcCOK3aoNmbqn7ujenFLWP09Nl1SWaQKd7pM4n3LA7p2xtHyimNLHfr4KK8eXBVsstslk9d3R1s881441qeXtkRaPNM2/p0u+PtBvXBW/4z3sD9eFdDH63cEJC1L/ril6xelFzIaV194Yl1Qg312/fgD5frlhpD+Wx3XV6YUybSkB1cFVVnm0IMfqNAdS/2qiZlN5sczG8P62rT2P9emm/HmVnHCsizdtNgvn8Mmty19XjphkKtDSixuCyTlshkFfZ/UkxDo7wZHMvvsNkP9PDZVh1N5bw9rrfFldTil7YGkVhwydcpQd87blKR0cHqw166VhxL637GZS2S8si2i/xnqblF6w7LS9YEHemzdvoW1I7yyPdIkg7q/x65vTC/WrUv9umNe9u1fu4JJDctSu/i4Yof2hDqmaV4mifq61NmcMMiln64N9chAv2VZ+tP2qD6eIYN9yYG4Bnltev9gQmcd1/qNbmOb/Uk5bUebxlW4baqNZy4DY1qW3t0X1xt7ovLYDR2MmrovS3mS1vxwRUCxlFTmtrW7/EA8Zemna4O6+4T0Tf3Da4L67uzcsuiSpqWN/qRWHkpokz8pSZre36lLJxbp91sj+vP2iK6qv5E2LUsvbYloxaGEPjfB11DLVko/mI8otre6A+Kna4MZM4W+MqX1m43ZA12KpCzduzwgy0pnh5wxzN0kKP7SlkiT2qNSOtvXZqQzOjwOQ4v2x/TazqjuanaD3pjDZuj0YW69viemD9bfuCVNS7GU1e6mVLn46tRi3bu8Tmcd52mYY89VhXVJjrVbj5gz0KUlBxKaN8ilhJn+O1lSqyUTLhrv02s7oxpXateZw9waVuRrkVFz5ZQi3bjIr5HFdoWSlspcRpPslQvH+nTv+3Wa1q9MVf6ERpU48rqONHfZpCJd9M9DeviU8py+z7yBLlUmkhk/53UYuqSy84LlRU6brptZotuW1um2uaW65/2AvjuntN1ByEsqfbruPb9m9HfqwrE9Y9tvR7h1bql8XbgVHYXJMAz96OTWs9cnljv1u7P6F3Td7nw4bIZ+dmr+2fw9icveOQuI6Fhuu6EvTCxS0rT08taIplQ4dNqw7Auu48oc0k7p9qV1WTO6c3XZpCKFktYx3T/0dN21Kzpbjye33ZDb3r4g2dQ85vbnKn16/2BCFW6bBnptKnbatPxgXLcsqdPsAU59YoxXqw4n9FxVeldnPueN1na8f2KMT+tqErpzWZ1cdkOWpH5um84b7W1RrnZxhuB6sdOWV5+kaf2ciqfSjYyvbZSVv7A6ple2R+Q00uWVjpRF2htOyWEzmgQ8P1fp0/Xv+TWtlRI+CTNdnsiypO81qxxgWekM9ovG++S2G9oVSmnZwYR+sykip13637E+jS11aM3hhJ7eGNKNs0qzJoQZhqFZA1wNmeK7Q+kA/1Bf7uXkBvvs2t+ofM/zVWFd3KyM48wBLo0otuuuZXVNehJtrUvq0TVB3TQ7+zil9P34t2eU6I5lfl08vkjPVoUaknCk9MLChWN9unBs+n1/cn1IkZQlj93Qh0d4WiQDzR7oUm3c1PcW+3X6MLdOH+bOuONgTU1CnxjT9H59qM+ufeGWiVdH/LK+VNNja4INPbuOWHogrgllDs1udix+YIhL318e0ClD3TpzmFsPrw5qSj9nw7PqZZOKtepwQte955ckXTG5SJVl6Tl09bQSPbz6aH+w/ZGUfA4j63mhLaWuoz3S2vKPXTF9aLhHZw73KJq0tL42oac2hFThtunSifn3v7EsS2/tjemfu2MaVWzXBn+yYTcGjo1RW1tL54VOVFVVpcrKyoZ/p0xLtza6iToQSek3m8L6WoaMwNb8Z19MP1kd1HUzS1ps6/neYr+unZlulPf23liLuqitiaUs3Vzf7f5P26Ma5rO3qOe3/GBc7+yLKZxMdzD/vwk+Vbht+uvOdKPDc0Z6tTWQ1KGoqaunFWe9uQsnTS3eH2/zxrO7fG+xX7dnuNHdUpfUk+tDDfWqM3lwZUCfn5C96eD9y+v0tWktM5CPWLQ/pnkDXe0qofTuvpgiSStr6R5JumOpv8UFqSO9uSemUSV2jSpufdEjk3/sSpfnGOKzNzl+o0lLNy/x6/4TyrQ3bOpXm8ItatFtqE3okTVBDfLY5banL+x/3xnVHfPKmmQ//GFrRKNK7Jo1wNUwR2tjpm5b6tdHR3p1xjC3HDZDf90Rkddh6PQ8jtM/bovIbZfOGu7RDYv8bd54JUxLP1kV1PmjvU1KjHx/eZ0uHOtrqB3/q6qQxpc5Wq3ReyCS0u+2RHQ4asphkyaUOXR8f6fGljpa3MxUh1N6dE1QFW6b9kdT+sRoX6v1O+vipn66NqjvNCtdsr42kdc5pjXbA0n9e09MO4MpyZCO7+fUysOJjIsayw/GtTWQlNtuaHNdUldPLW7z2LIsSzcs8uuueembhn/sisptN3Tq0M5d5LIsS09tCMsw0pmrP1gR0C1z85tvCdPS7UvrNK7Uoc11SX1yjLchE/9Y+OPpjJoih6FvzyhpsWD2k1UBfWqcTz9bF9S1M0rbLNHTlsPRVF7ZZc2vm11ta11S171Xq3tPKM+7Jmxzqw8n9JPVAf3s1H4dNDqg63X3nAT6mu6Yc/GUpX/tjuZVHgVobtH+mP6wLaLK0nRz4s4K2FmWpQNRU/cvD+iu+U17Bty6xK/vzi7tkFKA/9kX09IDcS1w79O/EkPlcxj6Qn3izqNrQzquyK7zRnt157I6fW1acYtknB3BpH6zKdziOUqSnt4Q0tyBLg322fT9+t/jyJgfXh3QCYPcGZ/P0r2SItpcl9QAj01fy2GXQ0f5+bqgPjrSq0Fem+5aVtfQA6s507L007UhFTsNTShz6LWdUd04uzSvxb9VhxP6x66ovpHD7xdOmtpQm2y1F8ORMb25J6Z/74lpkC/dP+7IM066v0GgxS5mKf1s389t0/80e3asiZl6fG1Q188q1fNVIU0qdzb07TEtS9e/589a2ut3m8P6x+6ovjChKGPyZTRpyVLLUqm/2xzW8CK7Th7i1veX1+mySUXHtAvLsizdtSzz7978dTcu8uvuDCV4Vx1O6JmNoSZN4U3LUk3MVNJML0K67JJNhjb4E1pxKKGdwZQSpqUPDHHrg8PTiy8rD8W1aH+8RenaTLg3zY6M/i725x1RnTPyaNBwoNeuQzFTqRyysKX0CeXPOyL69Qf76XtL6jTQY2vY6vX67qjmDnSpxGnTpHKbfr810iLzP2mmM+/PPs7TZGX50TVBXTU1XU/tnJEe3bmsrsWF5XdbIrq9PsB9IJLSE+tDqo2bOvs4b0PN7f8Z6tYmf0I3vufXV6YUZ6yNLKWbdCZMSz6HLWuDoM6WMC0lzaYn0A21CVW2Mu6xpelGSHcsq9P35pS2OHGblqXaeNtbJk8f5tbfd0Uz1l3fEUzqmY1hvbsv3uYWs0ze2hvTN3NYOPLYjaw15Y7F67uj2lyX1PZgUjsC6ZX/WQOcbTaIM630Q8Y988v0yvb0lrwjNc0fWh1oCOwOK7LL5zC0yZ/Q+PoV7v2RlH65IaQfnVQuu81QJGnp/YPpOnfNtziePcKjH60KNNwMWJal+1fU6XtzyprsVvnwCI+uf8+v04a6W1zQMm3Z3RFMam1NoqGJ1Q2zSnX3++ma9Jnmt2VZunNpnT5T6dO/dkf1683pBlDra9OZ1I0bxH5mfDo7eNYAV8NNUsq09PqemN7ZG9MAj00XjvO12oS5scE+u26bV6Y9oZZNXZsrddkUN5vWH9wdSunxtUHd3wHb4UeVOPSFiY6G32fFoYROaOWcMKO/Uz9cGdBHRnj0tVbKQTRnGIYuGufTbzeH9dnKIr27L6bvtXEj0xEMw9Clk4r01t6YPv/GYf2oHQ0InTZDpw11a0SxvUObuJa50lkXyw8lMu6KubjSp7uWBVRZ5jjmIL+UuQxPTzam1KHffqh/h/SqmdbPqcf/p7CzeAEAvZ/LbhDkxzGbP8idc+PwY2EYhgZ57fratGL9YEVAN9UnCO0Np9TfY+uwfj8nD3ErlrJ0+3KXvnOiqyFDXUrv4P3D1ohuX+rXIK89447bkcUOjSt16J+7mvbPCydNbQ0k9YX6+/vLJhXp+ysCumFWqV7aEtaIYkerMZJSl02XHkMfjWNx7iivXtkWUbnb1qSXWnM2w9BVU4v1n30xra1J6ns51Mhvbno/Z867QXwOW9Yg/5ExnTHcozOGe7QjmNStS/z61Dif5gx0aenBuGYPzPyzPj7Ko+8urtPkCkeTuNovN4T0xfq/w2fGp3dvzOzvlN1m6MUtEV041pu1x8P/jvNl7eHUWhmtC8d6dcMiv8aVOtI7W47xOcswDI0sseuWxX59boKvIbbS3N92RvXhEZ6Mf8fp/Zy6Z36ZfrE+pN9vjcgwJEPpXTcOmxRLpReTTUmVZQ6dNtStkRmSQY/v79K/dscKqp9iT0VGfydrvNJ0pKbV3fPLm7zm3X0x1cXNNm+ujqyiXTcznSEcTabrT98yt1QOw9DtS5uusNXETD3SqNxHbczU3e/X6cMjPHpjT0wz+zt1wRivVh5OaMmBeJOa8Q+tDujCsUeDhv/ZF9OBqJlzQ9CEmd7qdvpQt05utkL57r6YdgRTumicV7csqdOXJhc1aepypJ7fWcd5GsqtdAbLsnTLkjpFkpYurvQ1ZMre836dvj6tOGtpj9WHE/rD1ohumt20bt8bu6OSlFN/hJsW+XXDrJbZtN9b7Nd1M0v07r64qiOpnOs6W5alx9aGNMBjy6nHwlt7YzItq0m2+rqahCaWt8wAz0c0aem2pS1Xe3+/JazDMTPrCu3vNoc1uuTojc2T60MaXWJXqcumNYcTDTdDUvpi8b0lft0zv0yRlKXvLU6X28i1JMsdS/26YVaptm7epLdTwzStX+Zs+X/viSqctJo0V94XTumuZXUqdho66ziPzhjmVsqSbljk1x1zm/ZxWHM4ob/vimZctHlwZUCnDnU3bOnzx009uT6kaMpqWCxobENtQq/vjumjIz16aWtEdXFTZw736JQhrk5tBLa+NqHF++O6pNKnF7ZEtKUuqW9ML+6WkgvtrYl665L04uNzVS13gnS2pGn1uAajbXlodUD/N7Eo5wbkHYkMDaBnYU4CXYs5B+TutZ1RBROmPjnWpx/V76zv38FJJtnm5KrDCVWWOrL2N7h9qV9fmlzcEFt5ZE1Q54z0NAlmvrknqr/siGpsqUNfzlIGtbvdtsSvuCndlUcvrJ7Isiw9szEdn4ikLH0jS/ynLm7qzmVHk/cOR9NJr413aiw/GNfKwwldMMar7y8P6LYOKIPWmi11SV3x1mE9f2b/vEuAtyaatPRcVUi7QildPqmoSd+II3HMu+blX9I4X0cqjbRVPpnrZHZ9ozBmD/Gf6rhOzlC//uTBLv23Ot7m1z+1IawLxvgayoB4HIaun1WiO5bW6eHVAV3ZrIxFhTvduGX14YS21CV117I63TCrVGcO9+iOeWUaXmTX9e/59fSGsC5tli362fE+/XpTWNKRmukRfWxU7uVLnDZDN8ws0Tv1292OCCZM/Xl7RBeN88owDN04q1Q/XhVUKJGuC7YtkNT17/k1Z4BLj68NadXhzusA/rN1IZ07yqP7TizTwv1xPbEuqGjSUspSm8Hiaf2cOmeURzcu8mtb4Gg96Tf3xlptdtXc1dOK9fDqYJOPLd4f15QKp4qcNi0Y4ZHNkP62M9Lm9wonTd28uE4nDHLl3Eh5/kCXFu1P/21SpqUfrQzoX7ujuuE9vw5F299D4PF1QX15SsuSKheM9amyzKn7ltfJtFquL8ZT6WzuxtkLX5xUpPcPJvR8VUifb1bf3GVPB9lf3RHVncvqdM2Mkrzqrp861KM398a0JmBT0rRazT45bai7YVFESi+Y/WBFQPedWKa755cpZUk3LfbrOwv9unxSUYubvKn9nKosc+iOpX69Vx2TVf99nt0Y0uQKZ5O6fWUum/7f8SUZg/xSuqZxicvQ33dFdenEIt06t0ynDs1cZ7AjTSp3ak1NQjct9muoz64bZpV2W13l9tZEvXRikT73+iGdP7rry4UVWpBfkr42raRbgvwAAABArs4e4dH2YEorD8UVSJgdHuRvy/R+mWvwN3bNjBI9sDKglGmpNmaqLm62yFg+bZhHn5tQpCsmd0+2fq6OK7brg8N7Xp+/fBlGup/JeaO98tmNrHGEUpdNX5hQpIfXpGM3T24IN2TzHzFzgEs7AunGwVd08kLN2FKHnjq9X4cF+aV0bPHyycW6bmapntoY1t93Rhs+9+qOqD4ywtvpQX4p/ax/4Vivnq+PRR5hZYgfoXVk9HeyqqoqlR03Vr/ZFJY/buqm2S3LvUjpTtvzB7lUE7O0/FBcwUT6z+KySRPKnfLW16TO1PF7fW1Cyw7E9dkMTQqTpqWvvlOjEUUOXTerpEVdNNOyFEu1rP0lpVeer5tZqnf2xWRa0llt1HzPxLIs3fV+QOeP9mpaP6fuWlanyycVNTkpVYdTenBVQNP7ObU7lNLXp5fIbTdkWuldAScOch1TLf8XNod18mBXk1XJf+yK6lDU1EWNmvIsORBPb5mbWZJzHexI0tIT64JKWuk63C9uieibeZTbORLsnTvQJau+lts9zWq5/bi+xEymrG3TsrTyUELPV4X17RklGpLnyf7OpXW6amqR7l2ezt6dUuFUXTxd7/DcUR6d2GxhqrVGNEds9if1150RXZ2lrMrqwwn9ZlNYN8xqGph/cn1IJw9xtWhea1mWIimr1cDydQtr9YX6secjZVq67j2/gnW1emzBqKy/18LqmPaEUvrISK9uXuzXTbNLm5T4OVInsnnzn8biKUv/2BXV4gNxJUxpcoWjUxuLdrTN/qQG+WwFHfx9e2+sRX1F9DxkaAA9C3MS6FrMOSA/SdPS/71xWNfOLGlSXqejdMScXFeTrjkfS1n6v4lFTRr3ojC8sDksS9KOQFLXZui7cDia0t93xZrEmArVi1vC2hFM6WvTinXLkpZVSTrbj1cFFEqmY6KG0iV8G1dY4DqZHYH+ThRImLrpjR2qHNpfnx7vyxoErIubenpjSDP6uzRrgLMhmBZNWtroT2hbIKVzR3nalblbGzNV5jLyXoFbeSiutTVJrTgUz9h0I1emZenWJXUaWWxXP7dNF4xteeJbV5NQTcxsUeZHSjd9Ma30YsTuUEqWJMuSvj69uM2g45a6pP60PSKbIYWTli6bVKTDUVMvb4vougwn5/aW2NgZTOqBlQF9+/iSJgsKbbGsdLD5znll+vP2iAb77C2asViWpZe3RrSuNilLktOW3jERTlqyGdKkMofOGeVtV6bzgysDqo6kdP2s0iZ1BY9sZavyJ+VzHv2+0aSl2+ZmrrF3ZKGieePbTPaEUnpyQ0g+h6HPVfrkdRj6YTualR6r324Oa1xst+ZOafsiceOiWllWOtO5rbr2bYkmrTYzP4C+ihs3oGdhTgJdizkH5K+thLRj0VFz8tebwjoYSelrOfTTQ890z/t1umJyUZfvHOkOG2oTunNZna6cUpwxTteduE5mR6C/E1mWpfVVmzR5QuEegJ/91yFdNbVYpxzjxE6all7YHNZnxvvadQFeX5tQkcPQMJ+9oRnw91cEdGmjzt6Z3LmsTt86Pl1LvCZm6hfrQ6qJmbptbmmPKaexpS6pl7aE5Y9bujOHWnfxlKWEaeVVpibb93LY1OoCUvMbpkX7Y1p5KJGxzv6LW8Ia7LXnlTF9IJLSc1VhrT6c0K1zS5v0augquV4kqvwJ2WS02mAaQMfgxg3oWZiTQNdizgE9C3MSfZVpWZ1eJrg9mJPZEbHqRIZhqNCTdh85paJJiZL2ctiMjKWFctW8nMtAr133zC/Tw2uCWn04kbFj+b5wSiVOo6HkS4Xb1uVNOHMxttShwT67LhybW4DcZTfazJjPVVvfp/mizPxBbq2rSeqdfbGGxR/LsvRsVVgpUzn/DkcM9NozNqntiSpb6UAPAAAAAADQm/TEID/aVrjFltElOiLI31nsNkPfmF6iCrdNj64Jtvj8sxtD+vyEwqiB/vkJRRpTWhjrbp+f4NM/d0W1O5RSLGXpjmV1Gl3i0KWTCuO9BgAAAAAAAHqbwogsAll86DiPQsmIXtoS1ifr6//746ZM9eyFikJlGIaum1mq7y72y5D0jenF3VJyBwAAAAAAAEAaUVD0CueN9upA1NR/9sUkpbP5LzmGUkHIzuswdPPsUt0xr4wgPwAAAAAAANDNCPSj1/jy5CK9vjum1YcTqomZGl7U+zuhd6dyt03eQm9CAQAAAAAAAPQCBPrRaxiGoetnlegnqwP6dIbmvAAAAAAAAADQG1FzA72Kw2boZ6f26+5hAAAAAAAAAECXIaMfAAAAAAAAAIACRqAfAAAAAAAAAIACllOg/91339VFF12kyZMnq7y8XM8//3xnjwsAAAAAAAAAAOQgp0B/KBTSlClTdO+998rr9Xb2mAAAAAAAAAAAQI5yasa7YMECLViwQJJ01VVXdeqAAAAAAAAAAABA7qjRDwAAAAAAAABAATNqa2utfL5g+PDhuv/++3XxxRdnfV1VVdUxDQwAAAAAAAAAAEiVlZVZP59T6Z7O+MF9RVVVFe8F0IMxR4GehTkJ9CzMSaBrMeeAnoU5CfQszMnsOi2jHwAAAAAAAAAAdD5q9AMAAAAAAAAAUMByKt0TDAa1ZcsWSZJpmtq1a5dWrlypiooKjRgxolMHCAAAAAAAAAAAWpdT6Z63335bH/vYx1p8/DOf+Ywee+yxThkYAAAAAAAAAABoW941+gEAAAAAAAAAQM9BjX4AAAAAAAAAAApYnwv0P/DAAzrjjDM0YsQIjRs3Tp/+9Ke1du3aJq+xLEv33HOPJk2apCFDhuicc87RunXrmrzmqaee0rnnnquRI0eqvLxc27dvb/L57du36+qrr9aMGTM0ZMgQzZgxQ7fddpsikUibY1yzZo0++tGPasiQIZo8ebLuu+8+WdbRjRdXXnmlysvLW/w3bNiwY3hngJ6hN8xRSfr5z3+u+fPna8iQIZo7d65+/etft/MdAbpXT5+T0WhUV155pU4++WQNGDBA55xzTovX7Nu3T5dffrnmzZunfv366corr2znuwF0v66ak6Zp6qKLLtK0adM0ePBgTZw4UVdccYX27NnT5hjbuk4yJ1FIesOce+edd7RgwQKNGTNGQ4YM0bx58/TQQw8dw7sCdJ/eMCfffvvtjDGdjRs3HsM7A3SP3jAne1Octc8F+t955x1ddtlleu211/TKK6/I4XDo/PPPV01NTcNrfvzjH+uRRx7Rfffdp9dff10DBw7UJz7xCQUCgYbXhMNhnXnmmbr++usz/pyqqiqlUik98MADWrhwoe6//3795je/afX1R9TV1ekTn/iEBg0apNdff1333nuvHnroIT388MMNr7n33nu1YcOGJv+NHj1a559//rG9OUAP0Bvm6C9+8Qvdeuut+s53vqOFCxfqhhtu0LXXXqu//vWvx/juAF2vp8/JVColj8ejK664QgsWLMj4mlgspn79+umb3/ym5s6d2453Aeg5umpOStKpp56qX/7yl1q8eLGeeeYZbdu2TZdccknW8eVynWROopD0hjlXXFysL3/5y/rLX/6ihQsX6pprrtE999yjJ5544hjeGaB79IY5ecTChQubxHXGjRvXjncE6F69YU72pjhrn6/RHwwGNXLkSD3//PP6yEc+IsuyNGnSJH3pS1/SNddcI0mKRCKqrKzUHXfcoUsvvbTJ17///vs644wztGLFCo0aNSrrz3riiSd01113aevWra2+5kiAcOPGjfJ6vZKk73//+3ryySe1du1aGYbR4msWLlyoD3/4w3rttdd0wgkn5PsWAD1aIc7RBQsWaM6cObrnnnsavu6mm27S0qVL9be//a29bwXQI/S0OdnYtddeq7Vr1+rVV19t9TWf/vSn1a9fPz322GM5fU+gp+vKOfmXv/xFn/3sZ7Vv3z55PJ6Mr8n3XpY5iUJT6HPuiEsuuURut1u/+MUv8n0LgB6lEOfk22+/rY997GPavHmz+vfv3wHvAtBzFOKcbK6Q46x9LqO/uWAwKNM0VV5eLildOqC6ulpnnnlmw2u8Xq9OPvlkvffee8f0swKBQMPPac2iRYt00kknNRx8kvTBD35Qe/fubbFt5Yinn35akydPLriDD8hFIc7RWCzW4iLj9Xq1dOlSJRKJYxoj0N162pwE+rqumpM1NTX63e9+p7lz57b6ICW1714WKCS9Yc6tWLFCixYt0gc+8IF2jw/oKQp5Tp5++umaOHGiPv7xj+utt95q99iAnqSQ5+QRhRxn7fOB/uuvv17Tp0/X/PnzJUnV1dWSpIEDBzZ53cCBA7V///52/5ydO3fqoYce0mWXXZb1dfv378/4s498rjm/368//vGP+vznP9/usQE9WSHO0Q9+8IN67rnntGzZMlmWpffff1/PPPOMEomEDh061O4xAj1BT5uTQF/X2XPylltu0bBhwzRmzBjt2rVLv/3tb7O+Pt97WaDQFPKcmzJligYNGqQzzjhDl112mb74xS/mPT6gpynEOTlkyBA98MADevbZZ/Xss8+qsrJS5513nt599928xwf0NIU4Jxsr9Dhrnw7033jjjVq4cKGeffZZ2e32Jp9rvnXDsqxWtz22Zf/+/frkJz+pM844Q1/96lcbPn7iiSdq+PDhGj58uC688MKsPzvTxyXphRdeUCqV0kUXXdSusQE9WaHO0WuvvVYLFizQggULNGDAAH32s5/VZz7zGUlq8XsAhaSnzkmgr+qKOfn1r39db731ll5++WXZ7XZdccUVDde9jriXBQpJoc+5v/zlL3rjjTf0ox/9SI899ph+85vf5D0+oCcp1DlZWVmpL37xi5o5c6bmz5+vH/7wh/rQhz5Ek2wUvEKdk40VepzV0d0D6C433HCDfv/73+tPf/qTRo8e3fDxwYMHS0oHGY477riGjx88eLDFClAuqqur9fGPf1yTJ0/W448/3uQgeuGFF5RMJiWpYZvJoEGDWqwoHTx4UFLL1S8pvZ3k4x//uCoqKvIeG9CTFfIc9Xq9euSRR/Tggw9q//79GjJkiJ566imVlJRQgxEFq6fOSaCv6qo52b9/f/Xv31/jx4/XhAkTNHXqVP33v//VySef3CH3skCh6A1z7si4p06dqv379+vee+8t2EAG0BvmZGNz5szR73//+7zHB/QUvWVOFnqctU9m9F933XV68cUX9corr2jChAlNPjdq1CgNHjxYb7zxRsPHotGo/vvf/+Zdm2nfvn0699xzNWHCBP3iF7+Qw9F0XWXkyJEaO3asxo4dq2HDhkmS5s+fr//+97+KRqMNr3vjjTc0dOjQFk0oli5dqtWrVxfsdhKgNb1ljjqdTg0fPlx2u10vvfSSzj77bNlsffK0iwLXk+ck0Bd11ZxszjRNSVI8Hpd07NdJoFD0xjlnmmbD9wUKTW+ck6tWrWoIiAKFprfMyd4QZ+1zGf3XXHONfvvb3+q5555TeXl5Q62ooqIiFRcXyzAMXXnllfrhD3+oyspKjR8/Xj/4wQ9UVFTUZNtHdXW1qqurtWnTJknShg0b5Pf7NWLECFVUVGjv3r0699xzNWTIEN1zzz1N6nIPGDCg1fIdF154oe677z5dddVVuuaaa7Rp0yY9+OCD+s53vtNiS8lTTz2lcePG6ZRTTunotwnoNr1hjm7atElLlizRvHnzVFtbq0ceeUTr1q3TY4891llvG9BpevqclKT169crHo/r0KFDCoVCWrlypSTp+OOPb3jNkY/V1dXJMAytXLlSLpdLkyZN6rg3C+gCXTUnFy1apBUrVujEE09UWVmZtm7dqrvvvlsjR47UiSee2Or4cr2XZU6iUPSGOff4449r1KhRqqyslCS9++67evjhh+mDg4LUG+bko48+qpEjR2ry5MmKx+N64YUX9Oqrr+qZZ57pxHcO6By9YU4e0RvirEZtba3V3YPoSke6Pjd33XXX6YYbbpCUrtV077336qmnnlJtba3mzJmjH/zgB5oyZUrD6++55x7dd999Lb7PI488oosvvljPP/98k7rCja1YsSLrSu6aNWt0zTXXaNmyZSovL9ell16q6667rskBGAgENGnSJH3nO9/RN77xjVx+daAg9IY5umHDBl1++eXatGmTnE6nTjnlFN12220ND1dAISmEOTl9+nTt3Lmzxcdra2uz/h4jRozQqlWrWv2+QE/UVXNy5cqVuvHGG7VmzRqFQiENGTJEH/rQh/Ttb39bw4cPzzrGXO5lmZMoFL1hzj366KN65plntGPHDjkcDo0ePVqf//zn9cUvfpHdpig4vWFO/vjHP9ZTTz2lvXv3yuPxaPLkyfp//+//acGCBe18V4Du0xvmpNR74qx9LtAPAAAAAAAAAEBvwvI9AAAAAAAAAAAFjEA/AAAAAAAAAAAFjEA/AAAAAAAAAAAFjEA/AAAAAAAAAAAFjEA/AAAAAAAAAAAFjEA/AAAAAAAAAAAFjEA/AAAAAAAAAAAFjEA/AAAAAAAAAAAFjEA/AAAAAAAAAAAF7P8DA4xmp85YHhUAAAAASUVORK5CYII=\n",
2582 |       "text/plain": [
2583 |        "<Figure size 1728x288 with 1 Axes>"
2584 |       ]
2585 |      },
2586 |      "metadata": {},
2587 |      "output_type": "display_data"
2588 |     }
2589 |    ],
2590 |    "source": [
2591 |     "hhid = 'MAC000004'\n",
2592 |     "hh_df = df.loc[df['item_id'] == hhid, ['timestamp', 'target_value']]\n",
2593 |     "hh_df = hh_df.set_index('timestamp')\n",
2594 |     "\n",
2595 |     "fig = plt.figure(figsize=(24,4))\n",
2596 |     "plt.plot(hh_df, linewidth=0.5)\n",
2597 |     "plt.show()"
2598 |    ]
2599 |   }
2600 |  ],
2601 |  "metadata": {
2602 |   "kernelspec": {
2603 |    "display_name": "conda_python3",
2604 |    "language": "python",
2605 |    "name": "conda_python3"
2606 |   },
2607 |   "language_info": {
2608 |    "codemirror_mode": {
2609 |     "name": "ipython",
2610 |     "version": 3
2611 |    },
2612 |    "file_extension": ".py",
2613 |    "mimetype": "text/x-python",
2614 |    "name": "python",
2615 |    "nbconvert_exporter": "python",
2616 |    "pygments_lexer": "ipython3",
2617 |    "version": "3.6.13"
2618 |   }
2619 |  },
2620 |  "nbformat": 4,
2621 |  "nbformat_minor": 5
2622 | }
2623 | 


--------------------------------------------------------------------------------