├── .gitignore ├── Chapter01 ├── arrow.png └── chapter1-time-series-analysis-overview.ipynb ├── Chapter03 ├── arrow.png └── chapter3-dataset-preparation.ipynb ├── Assets └── kaggle_api.png ├── LICENSE ├── Chapter11 └── generate_inference_data.py ├── README.md └── Chapter09 ├── create_schema.py └── chapter9-preparing-l4e-dataset.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | Data/ 3 | -------------------------------------------------------------------------------- /Chapter01/arrow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Time-Series-Analysis-on-AWS/HEAD/Chapter01/arrow.png -------------------------------------------------------------------------------- /Chapter03/arrow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Time-Series-Analysis-on-AWS/HEAD/Chapter03/arrow.png -------------------------------------------------------------------------------- /Assets/kaggle_api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Time-Series-Analysis-on-AWS/HEAD/Assets/kaggle_api.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Chapter11/generate_inference_data.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import datetime 3 | import os 4 | import pandas as pd 5 | import pytz 6 | 7 | def main(): 8 | os.makedirs('inference-data/input', exist_ok=True) 9 | 10 | # How many sequences of data we want to extract: 11 | num_sequences = 3 12 | 13 | # The scheduling frequency in minutes: this **MUST** match the 14 | # resampling rate used to train the model: 15 | frequency = 5 16 | 17 | # Set current timezone to UTC: 18 | utc_timezone = pytz.timezone("UTC") 19 | 20 | for root, dirs, files in os.walk('train-data'): 21 | for f in files: 22 | component = root.split('/')[-1] 23 | print(f'Creating inference data from component {component}') 24 | 25 | component_fname = os.path.join(root, f) 26 | inference_df = pd.read_csv(component_fname) 27 | inference_df['Timestamp'] = pd.to_datetime(inference_df['Timestamp']) 28 | inference_df = inference_df.set_index('Timestamp') 29 | 30 | # We know that some events of interest are happening after this date: 31 | start = pd.to_datetime('2018-12-27 02:05:00') 32 | for i in range(num_sequences): 33 | end = start + datetime.timedelta(minutes=+frequency - 1) 34 | inference_input = inference_df.loc[start:end, :] 35 | start = start + datetime.timedelta(minutes=+frequency) 36 | 37 | # Rounding time to the previous X minutes 38 | # where X is the selected frequency: 39 | filename_tm = datetime.datetime.now(utc_timezone) 40 | filename_tm = filename_tm - datetime.timedelta( 41 | minutes=filename_tm.minute % frequency, 42 | seconds=filename_tm.second, 43 | microseconds=filename_tm.microsecond 44 | ) 45 | filename_tm = filename_tm + datetime.timedelta(minutes=+frequency * (i)) 46 | current_timestamp = (filename_tm).strftime(format='%Y%m%d%H%M%S') 47 | 48 | # The timestamp inside the file are in UTC and are not linked to the current timezone: 49 | timestamp_tm = datetime.datetime.now(utc_timezone) 50 | timestamp_tm = timestamp_tm - datetime.timedelta( 51 | minutes=timestamp_tm.minute % frequency, 52 | seconds=timestamp_tm.second, 53 | microseconds=timestamp_tm.microsecond 54 | ) 55 | timestamp_tm = timestamp_tm + datetime.timedelta(minutes=+frequency * (i)) 56 | 57 | # We need to reset the index to match the time 58 | # at which the scheduler will run inference: 59 | new_index = pd.date_range( 60 | start=timestamp_tm, 61 | periods=inference_input.shape[0], 62 | freq='1min' 63 | ) 64 | inference_input.index = new_index 65 | inference_input.index.name = 'Timestamp' 66 | inference_input = inference_input.reset_index() 67 | inference_input['Timestamp'] = inference_input['Timestamp'].dt.strftime('%Y-%m-%dT%H:%M:%S.%f') 68 | 69 | # Export this file in CSV format: 70 | scheduled_fname = os.path.join('inference-data', 'input', f'{component}_{current_timestamp}.csv') 71 | inference_input.to_csv(scheduled_fname, index=None) 72 | 73 | if __name__ == '__main__': 74 | main() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # Time Series Analysis on AWS 5 | 6 | Time Series Analysis on AWS 7 | 8 | This is the code repository for [Time Series Analysis on AWS](https://www.packtpub.com/product/time-series-analysis-on-aws/9781801816847), published by Packt. 9 | 10 | **Learn how to build forecasting models and detect anomalies in your time series data** 11 | 12 | ## What is this book about? 13 | Being a business analyst and data scientist, you have to use many algorithms and approaches to prepare, process, and build ML-based applications by leveraging time series data, but you face common problems, such as not knowing which algorithm to choose or how to combine and interpret them. Amazon Web Services (AWS) provides numerous services to help you build applications fueled by artificial intelligence (AI) capabilities. This book helps you get to grips with three AWS AI/ML-managed services to enable you to deliver your desired business outcomes. 14 | 15 | This book covers the following exciting features: 16 | * Understand how time series data differs from other types of data 17 | * Explore the key challenges that can be solved using time series data 18 | * Forecast future values of business metrics using Amazon Forecast 19 | * Detect anomalies and deliver forewarnings using Lookout for Equipment 20 | * Detect anomalies in business metrics using Amazon Lookout for Metrics 21 | * Visualize your predictions to reduce the time to extract insights 22 | 23 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/1801816840) today! 24 | 25 | https://www.packtpub.com/ 27 | 28 | ## Instructions and Navigations 29 | All of the code is organized into folders. For example, Chapter02. 30 | 31 | The code will look like the following: 32 | ``` 33 | START = '2013-06-01' 34 | END = '2013-07-31' 35 | DATASET = 'household_energy_consumption' 36 | FORECAST_PREFIX = 'export_energy_consumption_XXXX' 37 | ``` 38 | 39 | **Following is what you need for this book:** 40 | If you're a data analyst, business analyst, or data scientist looking to analyze time series data effectively for solving business problems, this is the book for you. Basic statistics knowledge is assumed, but no machine learning knowledge is necessary. Prior experience with time series data and how it relates to various business problems will help you get the most out of this book. This guide will also help machine learning practitioners find new ways to leverage their skills to build effective time series-based applications. 41 | 42 | With the following software and hardware list you can run all code files present in the book (Chapter 1-15). 43 | 44 | ### Software and Hardware List 45 | 46 | | Chapter |AWS services covered in the book | OS required | 47 | | -------- | ------------------------------------| -----------------------------------| 48 | | 1 - 15 | Amazon Forecast | Any browser (Chrome recommended) running on Windows, Mac OS X, and Linux (Any) | 49 | | 1 - 15 | Amazon Lookout for Equipment | Any browser (Chrome recommended) running on Windows, Mac OS X, and Linux (Any) | 50 | | 1 - 15 | Amazon Lookout for Metrics | Any browser (Chrome recommended) running on Windows, Mac OS X, and Linux (Any) | 51 | 52 | 53 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it]( https://static.packt-cdn.com/downloads/9781801816847_ColorImages.pdf). 54 | 55 | ### Related products 56 | * Data Engineering with AWS [[Packt]](https://www.packtpub.com/product/data-engineering-with-aws/9781800560413) [[Amazon]](https://www.amazon.com/dp/1800560419) 57 | 58 | * Serverless Analytics with Amazon Athena [[Packt]](https://www.packtpub.com/product/serverless-analytics-with-amazon-athena/9781800562349) [[Amazon]](https://www.amazon.com/dp/1800562349) 59 | 60 | ## Get to Know the Author 61 | **Michaël Hoarau** 62 | is an AI/ML specialist solutions architect (SA) working at Amazon Web Services (AWS). He is an AWS Certified Associate SA. He previously worked as an AI/ML specialist SA at AWS and the EMEA head of data science at GE Digital. He has experience in building product quality prediction systems for multiple industries. He has used forecasting techniques to build virtual sensors for industrial production lines. He has also helped multiple customers build forecasting and anomaly detection systems to increase their business efficiency. 63 | ### Download a free PDF 64 | 65 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
66 |

https://packt.link/free-ebook/9781801816847

-------------------------------------------------------------------------------- /Chapter09/create_schema.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import pandas as pd 4 | import s3fs 5 | 6 | from typing import List, Dict 7 | 8 | def create_data_schema_from_s3_path(s3_path): 9 | """ 10 | Generates a data schema compatible for Lookout for Equipment from an S3 11 | directory 12 | 13 | Parameters: 14 | s3_path (string): 15 | a path pointing to the root directory on S3 where all the CSV files 16 | are located 17 | 18 | Returns: 19 | string: 20 | a JSON-formatted string ready to be used as a schema for a Lookout 21 | for Equipment dataset 22 | """ 23 | # We should have only directories at the first level of this S3 path: 24 | fs = s3fs.S3FileSystem() 25 | components = fs.ls(s3_path) 26 | 27 | # Loops through each subdirectory found in the root dir: 28 | DATASET_COMPONENT_FIELDS_MAP = dict() 29 | for subsystem in components: 30 | # The first tag should always be Timestamp 31 | subsystem_tags = ['timestamp'] 32 | 33 | # Opens the first file (they have the same structure): 34 | files = fs.ls(subsystem) 35 | for file in files: 36 | if file[-1] != '/': 37 | break 38 | 39 | current_subsystem_df = pd.read_csv(f's3://{file}', nrows=1) 40 | subsystem_tags = subsystem_tags + current_subsystem_df.columns.tolist()[1:] 41 | 42 | DATASET_COMPONENT_FIELDS_MAP.update({subsystem.split('/')[-1]: subsystem_tags}) 43 | 44 | # Generate the associated JSON schema: 45 | schema = create_data_schema(DATASET_COMPONENT_FIELDS_MAP) 46 | 47 | return schema 48 | 49 | def create_data_schema(component_fields_map: Dict): 50 | """ 51 | Generates a JSON formatted string from a dictionary 52 | 53 | Parameters: 54 | component_fields_map (dict): 55 | a dictionary containing a field maps for the dataset schema 56 | 57 | Returns: 58 | string: 59 | a JSON-formatted string ready to be used as a schema for a dataset 60 | """ 61 | schema = json.dumps( 62 | _create_data_schema_map( 63 | component_fields_map=component_fields_map 64 | ) 65 | ) 66 | 67 | return schema 68 | 69 | def _create_data_schema_map(component_fields_map: Dict): 70 | """ 71 | Generate a dictionary with the JSON format expected by Lookout for Equipment 72 | to be used as the schema for a dataset at ingestion, training time and 73 | inference time 74 | 75 | Parameters: 76 | component_fields_map (dict): 77 | a dictionary containing a field maps for the dataset schema 78 | 79 | Returns: 80 | dict: 81 | a dictionnary containing the detailed schema built from the original 82 | dictionary mapping 83 | """ 84 | # Build the schema for the current component: 85 | component_schema_list = [_create_component_schema( 86 | component_name, 87 | component_fields_map[component_name] 88 | ) for component_name in component_fields_map 89 | ] 90 | 91 | # The root of the schema is a "Components" tag: 92 | data_schema = dict() 93 | data_schema['Components'] = component_schema_list 94 | 95 | return data_schema 96 | 97 | def _create_component_schema(component_name: str, field_names: List): 98 | """ 99 | Build a schema for a given component and fieds list 100 | 101 | Parameters 102 | component_name (string): 103 | name of the component to build a schema for 104 | 105 | field_names (list of strings): 106 | name of all the fields included in this component 107 | 108 | Returns: 109 | dict: 110 | A dictionnary containing the detailed schema for a given component 111 | """ 112 | # Test if the field names is correct for this component: 113 | if len(field_names) == 0: 114 | raise Exception(f'Field names for component {component_name} should not be empty') 115 | if len(field_names) == 1: 116 | raise Exception(f'Component {component_name} must have at least one sensor beyond the timestamp') 117 | 118 | # The first field is a timestamp: 119 | col_list = [{'Name': field_names[0], 'Type': 'DATETIME'}] 120 | 121 | # All the others are float values: 122 | col_list = col_list + [ 123 | {'Name': field_name, 'Type': 'DOUBLE'} 124 | for field_name in field_names[1:] 125 | ] 126 | 127 | # Build the schema for this component: 128 | component_schema = dict() 129 | component_schema['ComponentName'] = component_name 130 | component_schema['Columns'] = col_list 131 | 132 | return component_schema 133 | 134 | if __name__ == '__main__': 135 | parser = argparse.ArgumentParser(description="Generate a JSON schema from an S3 location") 136 | parser.add_argument("s3path", type=str, help="The root S3 location where the training data are") 137 | args = parser.parse_args() 138 | s3_path = args.s3path 139 | 140 | schema = create_data_schema_from_s3_path(s3_path) 141 | 142 | print(schema) -------------------------------------------------------------------------------- /Chapter09/chapter9-preparing-l4e-dataset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "1baf74c4", 6 | "metadata": {}, 7 | "source": [ 8 | "# Time series analysis on AWS\n", 9 | "*Chapter 9 - Creating a dataset and ingesting your data*" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "id": "3ba931c3", 15 | "metadata": {}, 16 | "source": [ 17 | "## Initializations\n", 18 | "---" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "29ddeca5", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "!pip install --quiet tqdm kaggle" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "id": "d16b27fd", 34 | "metadata": {}, 35 | "source": [ 36 | "### Imports" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "id": "44909eac", 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "import matplotlib.colors as mpl_colors\n", 47 | "import matplotlib.dates as mdates\n", 48 | "import matplotlib.ticker as ticker\n", 49 | "import matplotlib.pyplot as plt\n", 50 | "import numpy as np\n", 51 | "import os\n", 52 | "import pandas as pd\n", 53 | "import sys\n", 54 | "import warnings\n", 55 | "import zipfile\n", 56 | "\n", 57 | "from matplotlib import gridspec\n", 58 | "from sklearn.preprocessing import normalize\n", 59 | "from tqdm import tqdm\n", 60 | "from urllib.request import urlretrieve" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "id": "45b8057e", 66 | "metadata": {}, 67 | "source": [ 68 | "### Parameters" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "id": "615f66ee", 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "RAW_DATA = os.path.join('..', 'Data', 'raw')\n", 79 | "DATA = os.path.join('..', 'Data')\n", 80 | "warnings.filterwarnings(\"ignore\")\n", 81 | "os.makedirs(RAW_DATA, exist_ok=True)\n", 82 | "\n", 83 | "%matplotlib inline\n", 84 | "plt.style.use('fivethirtyeight')\n", 85 | "prop_cycle = plt.rcParams['axes.prop_cycle']\n", 86 | "colors = prop_cycle.by_key()['color']\n", 87 | "\n", 88 | "plt.rcParams['figure.dpi'] = 300\n", 89 | "plt.rcParams['lines.linewidth'] = 0.3\n", 90 | "plt.rcParams['axes.titlesize'] = 6\n", 91 | "plt.rcParams['axes.labelsize'] = 6\n", 92 | "plt.rcParams['xtick.labelsize'] = 5\n", 93 | "plt.rcParams['ytick.labelsize'] = 5\n", 94 | "plt.rcParams['grid.linewidth'] = 0.2\n", 95 | "plt.rcParams['legend.fontsize'] = 5" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "id": "c104b772", 101 | "metadata": {}, 102 | "source": [ 103 | "### Helper functions" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "id": "8e34ec73", 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "def progress_report_hook(count, block_size, total_size):\n", 114 | " mb = int(count * block_size // 1048576)\n", 115 | " if count % 500 == 0:\n", 116 | " sys.stdout.write(\"\\r{} MB downloaded\".format(mb))\n", 117 | " sys.stdout.flush()" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "id": "67742269", 123 | "metadata": {}, 124 | "source": [ 125 | "### Downloading datasets" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "id": "ed48f20a", 131 | "metadata": {}, 132 | "source": [ 133 | "#### **Dataset 4:** Industrial pump data\n", 134 | "To download this dataset from Kaggle, you will need to have an account and create a token that you install on your machine. You can follow [**this link**](https://www.kaggle.com/docs/api) to get started with the Kaggle API. Once generated, make sure your Kaggle token is stored in the `~/.kaggle/kaggle.json` file, or the next cells will issue an error. To get a Kaggle token, go to kaggle.com and create an account. Then navigate to **My account** and scroll down to the API section. There, click the **Create new API token** button:\n", 135 | "\n", 136 | "\n" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "id": "42ec6c50", 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "FILE_NAME = 'pump-sensor-data.zip'\n", 147 | "ARCHIVE_PATH = os.path.join(RAW_DATA, FILE_NAME)\n", 148 | "FILE_PATH = os.path.join(DATA, 'pump', 'sensor.csv')\n", 149 | "FILE_DIR = os.path.dirname(FILE_PATH)\n", 150 | "\n", 151 | "if not os.path.isfile(FILE_PATH):\n", 152 | " if not os.path.exists('/home/ec2-user/.kaggle/kaggle.json'):\n", 153 | " os.makedirs('/home/ec2-user/.kaggle/', exist_ok=True)\n", 154 | " raise Exception('The kaggle.json token was not found.\\nCreating the /home/ec2-user/.kaggle/ directory: put your kaggle.json file there once you have generated it from the Kaggle website')\n", 155 | " else:\n", 156 | " print('The kaggle.json token file was found: making sure it is not readable by other users on this system.')\n", 157 | " !chmod 600 /home/ec2-user/.kaggle/kaggle.json\n", 158 | "\n", 159 | " os.makedirs(os.path.join(DATA, 'pump'), exist_ok=True)\n", 160 | " !kaggle datasets download -d nphantawee/pump-sensor-data -p $RAW_DATA\n", 161 | "\n", 162 | " print(\"\\nExtracting data archive\")\n", 163 | " zip_ref = zipfile.ZipFile(ARCHIVE_PATH, 'r')\n", 164 | " zip_ref.extractall(FILE_DIR + '/')\n", 165 | " zip_ref.close()\n", 166 | " \n", 167 | "else:\n", 168 | " print(\"File found, skipping download\")" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "id": "011c7f2c", 174 | "metadata": {}, 175 | "source": [ 176 | "## Dataset visualization\n", 177 | "---" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "id": "32e5cc41", 183 | "metadata": {}, 184 | "source": [ 185 | "### **4.** Industrial pump data" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "id": "03c07a8c", 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "FILE_PATH = os.path.join(DATA, 'pump', 'sensor.csv')\n", 196 | "pump_df = pd.read_csv(FILE_PATH, sep=',')\n", 197 | "pump_df.drop(columns={'Unnamed: 0'}, inplace=True)\n", 198 | "pump_df['timestamp'] = pd.to_datetime(pump_df['timestamp'], format='%Y-%m-%d %H:%M:%S')\n", 199 | "pump_df = pump_df.set_index('timestamp')\n", 200 | "\n", 201 | "pump_df['machine_status'].replace(to_replace='NORMAL', value=np.nan, inplace=True)\n", 202 | "pump_df['machine_status'].replace(to_replace='BROKEN', value=1, inplace=True)\n", 203 | "pump_df['machine_status'].replace(to_replace='RECOVERING', value=1, inplace=True)\n", 204 | "\n", 205 | "print('Shape:', pump_df.shape)\n", 206 | "pump_df.head()" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "id": "ef2f9cff", 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "pump_df" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "id": "dc6c4ae7", 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "file_structure_df = pump_df.iloc[:, 0:10].resample('5D').mean()" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "id": "96e5b0af", 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [ 236 | "plt.rcParams['hatch.linewidth'] = 0.5\n", 237 | "plt.rcParams['lines.linewidth'] = 0.5\n", 238 | "\n", 239 | "fig = plt.figure(figsize=(5,1))\n", 240 | "ax1 = fig.add_subplot(1,1,1)\n", 241 | "plot1 = ax1.plot(pump_df['sensor_00'], label='Healthy pump')\n", 242 | "\n", 243 | "ax2 = ax1.twinx()\n", 244 | "plot2 = ax2.fill_between(\n", 245 | " x=pump_df.index, \n", 246 | " y1=0.0, \n", 247 | " y2=pump_df['machine_status'], \n", 248 | " color=colors[1], \n", 249 | " linewidth=0.0,\n", 250 | " edgecolor='#000000',\n", 251 | " alpha=0.5, \n", 252 | " hatch=\"//////\", \n", 253 | " label='Broken pump'\n", 254 | ")\n", 255 | "ax2.grid(False)\n", 256 | "ax2.set_yticks([])\n", 257 | "\n", 258 | "labels = [plot1[0].get_label(), plot2.get_label()]\n", 259 | "\n", 260 | "plt.legend(handles=[plot1[0], plot2], labels=labels, loc='lower center', ncol=2, bbox_to_anchor=(0.5, -.4))\n", 261 | "plt.title('Industrial pump sensor data')\n", 262 | "\n", 263 | "plt.show()" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "id": "32caf6ba", 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [ 273 | "start_date = np.min(pump_df.index)\n", 274 | "end_date = np.max(pump_df.index)\n", 275 | "num_periods = pump_df.shape[0]\n", 276 | "\n", 277 | "new_index = pd.date_range(start=start_date, periods=num_periods, freq='5min')\n", 278 | "pump_df.index = new_index\n", 279 | "pump_df.index.name = 'Timestamp'" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "id": "908b6c98", 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [ 289 | "plt.rcParams['hatch.linewidth'] = 0.5\n", 290 | "plt.rcParams['lines.linewidth'] = 0.5\n", 291 | "\n", 292 | "fig = plt.figure(figsize=(5,1))\n", 293 | "ax1 = fig.add_subplot(1,1,1)\n", 294 | "plot1 = ax1.plot(pump_df['sensor_00'], label='sensor_00')\n", 295 | "# plot1 = ax1.plot(pump_df['sensor_34'], label='Healthy sensor_34')\n", 296 | "\n", 297 | "ax2 = ax1.twinx()\n", 298 | "plot2 = ax2.fill_between(\n", 299 | " x=pump_df.index, \n", 300 | " y1=0.0, \n", 301 | " y2=pump_df['machine_status'], \n", 302 | " color=colors[1], \n", 303 | " linewidth=0.0,\n", 304 | " edgecolor='#000000',\n", 305 | " alpha=0.5, \n", 306 | " hatch=\"//////\", \n", 307 | " label='Broken pump'\n", 308 | ")\n", 309 | "ax2.grid(False)\n", 310 | "ax2.set_yticks([])\n", 311 | "\n", 312 | "labels = [plot1[0].get_label(), plot2.get_label()]\n", 313 | "\n", 314 | "plt.legend(handles=[plot1[0], plot2], labels=labels, loc='lower center', ncol=2, bbox_to_anchor=(0.5, -.4))\n", 315 | "plt.title('Industrial pump sensor data')\n", 316 | "\n", 317 | "# start = pd.to_datetime('2018-06-24 14:25')\n", 318 | "# end = pd.to_datetime('2018-07-06 09:40')\n", 319 | "# plt.xlim(start, end)\n", 320 | "plt.show()" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "id": "0cdd9361", 327 | "metadata": {}, 328 | "outputs": [], 329 | "source": [ 330 | "plt.rcParams['axes.titlesize'] = 4\n", 331 | "plt.rcParams['axes.labelsize'] = 4\n", 332 | "plt.rcParams['xtick.labelsize'] = 3\n", 333 | "plt.rcParams['ytick.labelsize'] = 3\n", 334 | "\n", 335 | "for f in list(pump_df.columns):\n", 336 | " fig = plt.figure(figsize=(2.5,0.5))\n", 337 | " ax1 = fig.add_subplot(1,1,1)\n", 338 | " plot1 = ax1.plot(pump_df[f])\n", 339 | " ax1.set_title(f)\n", 340 | " \n", 341 | "plt.show()" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "id": "692ef77a", 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "pump_df = pump_df.drop(columns=['sensor_50', 'sensor_15'])" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "id": "b46e10dd", 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [ 361 | "pump_df" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "id": "48650f84", 367 | "metadata": {}, 368 | "source": [ 369 | "## Preparing the dataset for Lookout for Equipment\n", 370 | "---\n", 371 | "### Preparing time series data" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "id": "18768b76", 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "TRAIN_DATA = os.path.join('..', 'Data', 'pump', 'train-data')\n", 382 | "LABEL_DATA = os.path.join('..', 'Data', 'pump', 'label-data')\n", 383 | "\n", 384 | "os.makedirs(TRAIN_DATA, exist_ok=True)\n", 385 | "os.makedirs(LABEL_DATA, exist_ok=True)\n", 386 | "\n", 387 | "pump_df.index.name = 'Timestamp'" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": null, 393 | "id": "cfc3af2f", 394 | "metadata": {}, 395 | "outputs": [], 396 | "source": [ 397 | "features = list(pump_df.columns)[:-1]\n", 398 | "\n", 399 | "for tag in tqdm(features):\n", 400 | " os.makedirs(os.path.join(TRAIN_DATA, tag), exist_ok=True)\n", 401 | " fname = os.path.join(TRAIN_DATA, tag, 'tag_data.csv')\n", 402 | " tag_df = pump_df[[tag]]\n", 403 | " tag_df.to_csv(fname)" 404 | ] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "id": "7cec31d8", 409 | "metadata": {}, 410 | "source": [ 411 | "### Preparing label data" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": null, 417 | "id": "b4843119", 418 | "metadata": {}, 419 | "outputs": [], 420 | "source": [ 421 | "expanded_labels = pump_df[['machine_status']]\n", 422 | "expanded_labels['machine_status'].unique()" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": null, 428 | "id": "d210da63", 429 | "metadata": {}, 430 | "outputs": [], 431 | "source": [ 432 | "from dateutil.relativedelta import relativedelta\n", 433 | "\n", 434 | "range_df = expanded_labels.copy()\n", 435 | "range_df['BROKEN'] = False\n", 436 | "range_df.loc[range_df['machine_status'] == 1.0, 'BROKEN'] = True\n", 437 | "\n", 438 | "range_df['Next Status'] = range_df['BROKEN'].shift(-1)\n", 439 | "range_df['Start Range'] = (range_df['BROKEN'] == False) & (range_df['Next Status'] == True)\n", 440 | "range_df['End Range'] = (range_df['BROKEN'] == True) & (range_df['Next Status'] == False)\n", 441 | "range_df.iloc[0,3] = range_df.iloc[0,1]\n", 442 | "range_df = range_df[(range_df['Start Range'] == True) | (range_df['End Range'] == True)]\n", 443 | "\n", 444 | "labels_df = pd.DataFrame(columns=['start', 'end'])\n", 445 | "for index, row in range_df.iterrows():\n", 446 | " if row['Start Range']:\n", 447 | " start = index\n", 448 | "\n", 449 | " if row['End Range']:\n", 450 | " end = index\n", 451 | " labels_df = labels_df.append({\n", 452 | " 'start': start + relativedelta(hours=-12),\n", 453 | " 'end': end + relativedelta(hours=+12)\n", 454 | " }, ignore_index=True)\n", 455 | " \n", 456 | "labels_df" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": null, 462 | "id": "4a92c2ec", 463 | "metadata": {}, 464 | "outputs": [], 465 | "source": [ 466 | "from dateutil.relativedelta import relativedelta\n", 467 | "\n", 468 | "labels_fname = os.path.join(LABEL_DATA, 'labels.csv')\n", 469 | "labels_df['start'] = pd.to_datetime(labels_df['start'])\n", 470 | "labels_df['end'] = pd.to_datetime(labels_df['end'])\n", 471 | "labels_df['start'] = labels_df['start'].dt.strftime('%Y-%m-%dT%H:%M:%S.%f')\n", 472 | "labels_df['end'] = labels_df['end'].dt.strftime('%Y-%m-%dT%H:%M:%S.%f')\n", 473 | "labels_df.to_csv(labels_fname, header=None, index=None)" 474 | ] 475 | }, 476 | { 477 | "cell_type": "markdown", 478 | "id": "8388aa1c", 479 | "metadata": {}, 480 | "source": [ 481 | "## Creating schema\n", 482 | "---" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": null, 488 | "id": "162e991c", 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [ 492 | "!pip install --quiet markdown" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": null, 498 | "id": "23965ffe", 499 | "metadata": {}, 500 | "outputs": [], 501 | "source": [ 502 | "# Helper functions for managing Lookout for Equipment API calls:\n", 503 | "sys.path.append('../../amazon-lookout-for-equipment-python-sdk/src')\n", 504 | "import lookoutequipment as lookout\n", 505 | "import sagemaker" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": null, 511 | "id": "31b51e73", 512 | "metadata": {}, 513 | "outputs": [], 514 | "source": [ 515 | "DATASET_NAME = 'pump'\n", 516 | "BUCKET = 'pump-anomaly-detection'\n", 517 | "PREFIX = 'train-data/'\n", 518 | "ROLE_ARN = sagemaker.get_execution_role()" 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": null, 524 | "id": "413ba6dc", 525 | "metadata": {}, 526 | "outputs": [], 527 | "source": [ 528 | "lookout_dataset = lookout.LookoutEquipmentDataset(\n", 529 | " dataset_name=DATASET_NAME,\n", 530 | " component_root_dir=TRAIN_DATA,\n", 531 | " access_role_arn=ROLE_ARN\n", 532 | ")" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": null, 538 | "id": "1463753f", 539 | "metadata": {}, 540 | "outputs": [], 541 | "source": [ 542 | "lookout_dataset.dataset_schema" 543 | ] 544 | } 545 | ], 546 | "metadata": { 547 | "kernelspec": { 548 | "display_name": "conda_python3", 549 | "language": "python", 550 | "name": "conda_python3" 551 | }, 552 | "language_info": { 553 | "codemirror_mode": { 554 | "name": "ipython", 555 | "version": 3 556 | }, 557 | "file_extension": ".py", 558 | "mimetype": "text/x-python", 559 | "name": "python", 560 | "nbconvert_exporter": "python", 561 | "pygments_lexer": "ipython3", 562 | "version": "3.6.13" 563 | } 564 | }, 565 | "nbformat": 4, 566 | "nbformat_minor": 5 567 | } 568 | -------------------------------------------------------------------------------- /Chapter01/chapter1-time-series-analysis-overview.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0b7105c6", 6 | "metadata": {}, 7 | "source": [ 8 | "# Time series analysis on AWS\n", 9 | "*Chapter 1 - Time series analysis overview*" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "id": "b393e4ec", 15 | "metadata": {}, 16 | "source": [ 17 | "## Initializations\n", 18 | "---" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "d4dd4b8b", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "!pip install --quiet tqdm kaggle tsia ruptures" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "id": "f9997f46", 34 | "metadata": {}, 35 | "source": [ 36 | "### Imports" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "id": "fd65af91", 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "import matplotlib.colors as mpl_colors\n", 47 | "import matplotlib.dates as mdates\n", 48 | "import matplotlib.ticker as ticker\n", 49 | "import matplotlib.pyplot as plt\n", 50 | "import numpy as np\n", 51 | "import os\n", 52 | "import pandas as pd\n", 53 | "import ruptures as rpt\n", 54 | "import sys\n", 55 | "import tsia\n", 56 | "import warnings\n", 57 | "import zipfile\n", 58 | "\n", 59 | "from matplotlib import gridspec\n", 60 | "from sklearn.preprocessing import normalize\n", 61 | "from tqdm import tqdm\n", 62 | "from urllib.request import urlretrieve" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "id": "7d104af8", 68 | "metadata": {}, 69 | "source": [ 70 | "### Parameters" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "id": "13989034", 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "RAW_DATA = os.path.join('..', 'Data', 'raw')\n", 81 | "DATA = os.path.join('..', 'Data')\n", 82 | "warnings.filterwarnings(\"ignore\")\n", 83 | "os.makedirs(RAW_DATA, exist_ok=True)\n", 84 | "\n", 85 | "%matplotlib inline\n", 86 | "# plt.style.use('Solarize_Light2')\n", 87 | "plt.style.use('fivethirtyeight')\n", 88 | "prop_cycle = plt.rcParams['axes.prop_cycle']\n", 89 | "colors = prop_cycle.by_key()['color']\n", 90 | "\n", 91 | "plt.rcParams['figure.dpi'] = 300\n", 92 | "plt.rcParams['lines.linewidth'] = 0.3\n", 93 | "plt.rcParams['axes.titlesize'] = 6\n", 94 | "plt.rcParams['axes.labelsize'] = 6\n", 95 | "plt.rcParams['xtick.labelsize'] = 4.5\n", 96 | "plt.rcParams['ytick.labelsize'] = 4.5\n", 97 | "plt.rcParams['grid.linewidth'] = 0.2\n", 98 | "plt.rcParams['legend.fontsize'] = 5" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "id": "71228c3a", 104 | "metadata": {}, 105 | "source": [ 106 | "### Helper functions" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "id": "ead7b9b7", 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "def progress_report_hook(count, block_size, total_size):\n", 117 | " mb = int(count * block_size // 1048576)\n", 118 | " if count % 500 == 0:\n", 119 | " sys.stdout.write(\"\\r{} MB downloaded\".format(mb))\n", 120 | " sys.stdout.flush()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "id": "590ace42", 126 | "metadata": {}, 127 | "source": [ 128 | "### Downloading datasets" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "id": "c1a10de9", 134 | "metadata": {}, 135 | "source": [ 136 | "#### **Dataset 1:** Household energy consumption" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "id": "7821f0b1", 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "ORIGINAL_DATA = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip'\n", 147 | "ARCHIVE_PATH = os.path.join(RAW_DATA, 'energy-consumption.zip')\n", 148 | "FILE_NAME = 'energy-consumption.csv'\n", 149 | "FILE_PATH = os.path.join(DATA, 'energy', FILE_NAME)\n", 150 | "FILE_DIR = os.path.dirname(FILE_PATH)\n", 151 | "\n", 152 | "if not os.path.isfile(FILE_PATH):\n", 153 | " print(\"Downloading dataset (258MB), can take a few minutes depending on your connection\")\n", 154 | " urlretrieve(ORIGINAL_DATA, ARCHIVE_PATH, reporthook=progress_report_hook)\n", 155 | " os.makedirs(os.path.join(DATA, 'energy'), exist_ok=True)\n", 156 | "\n", 157 | " print(\"\\nExtracting data archive\")\n", 158 | " zip_ref = zipfile.ZipFile(ARCHIVE_PATH, 'r')\n", 159 | " zip_ref.extractall(FILE_DIR + '/')\n", 160 | " zip_ref.close()\n", 161 | " \n", 162 | " !rm -Rf $FILE_DIR/__MACOSX\n", 163 | " !mv $FILE_DIR/LD2011_2014.txt $FILE_PATH\n", 164 | " \n", 165 | "else:\n", 166 | " print(\"File found, skipping download\")" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "id": "171badbf", 172 | "metadata": {}, 173 | "source": [ 174 | "#### **Dataset 2:** Nasa Turbofan remaining useful lifetime" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "id": "244b7160", 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "ok = True\n", 185 | "ok = ok and os.path.exists(os.path.join(DATA, 'turbofan', 'train_FD001.txt'))\n", 186 | "ok = ok and os.path.exists(os.path.join(DATA, 'turbofan', 'test_FD001.txt'))\n", 187 | "ok = ok and os.path.exists(os.path.join(DATA, 'turbofan', 'RUL_FD001.txt'))\n", 188 | "\n", 189 | "if (ok):\n", 190 | " print(\"File found, skipping download\")\n", 191 | "\n", 192 | "else:\n", 193 | " print('Some datasets are missing, create working directories and download original dataset from the NASA repository.')\n", 194 | " \n", 195 | " # Making sure the directory already exists:\n", 196 | " os.makedirs(os.path.join(DATA, 'turbofan'), exist_ok=True)\n", 197 | "\n", 198 | " # Download the dataset from the NASA repository, unzip it and set\n", 199 | " # aside the first training file to work on:\n", 200 | " !wget https://ti.arc.nasa.gov/c/6/ --output-document=$RAW_DATA/CMAPSSData.zip\n", 201 | " !unzip $RAW_DATA/CMAPSSData.zip -d $RAW_DATA\n", 202 | " !cp $RAW_DATA/train_FD001.txt $DATA/turbofan/train_FD001.txt\n", 203 | " !cp $RAW_DATA/test_FD001.txt $DATA/turbofan/test_FD001.txt\n", 204 | " !cp $RAW_DATA/RUL_FD001.txt $DATA/turbofan/RUL_FD001.txt" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "id": "7eedc9c2", 210 | "metadata": {}, 211 | "source": [ 212 | "#### **Dataset 3:** Human heartbeat" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "id": "a1314ead", 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [ 222 | "ECG_DATA_SOURCE = 'http://www.timeseriesclassification.com/Downloads/ECG200.zip'\n", 223 | "ARCHIVE_PATH = os.path.join(RAW_DATA, 'ECG200.zip')\n", 224 | "FILE_NAME = 'ecg.csv'\n", 225 | "FILE_PATH = os.path.join(DATA, 'ecg', FILE_NAME)\n", 226 | "FILE_DIR = os.path.dirname(FILE_PATH)\n", 227 | "\n", 228 | "if not os.path.isfile(FILE_PATH):\n", 229 | " urlretrieve(ECG_DATA_SOURCE, ARCHIVE_PATH)\n", 230 | " os.makedirs(os.path.join(DATA, 'ecg'), exist_ok=True)\n", 231 | "\n", 232 | " print(\"\\nExtracting data archive\")\n", 233 | " zip_ref = zipfile.ZipFile(ARCHIVE_PATH, 'r')\n", 234 | " zip_ref.extractall(FILE_DIR + '/')\n", 235 | " zip_ref.close()\n", 236 | " \n", 237 | " !mv $DATA/ecg/ECG200_TRAIN.txt $FILE_PATH\n", 238 | " \n", 239 | "else:\n", 240 | " print(\"File found, skipping download\")" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "id": "0a297084", 246 | "metadata": {}, 247 | "source": [ 248 | "#### **Dataset 4:** Industrial pump data\n", 249 | "To download this dataset from Kaggle, you will need to have an account and create a token that you install on your machine. You can follow [**this link**](https://www.kaggle.com/docs/api) to get started with the Kaggle API. Once generated, make sure your Kaggle token is stored in the `~/.kaggle/kaggle.json` file, or the next cells will issue an error. In some cases, you may still have an error while using this location. Try moving your token in this location instead: `~/kaggle/kaggle.json` (not the absence of the `.` in the folder name).\n", 250 | "\n", 251 | "To get a Kaggle token, go to kaggle.com and create an account. Then navigate to **My account** and scroll down to the API section. There, click the **Create new API token** button:\n", 252 | "\n", 253 | "\n" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "id": "3617c828", 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "FILE_NAME = 'pump-sensor-data.zip'\n", 264 | "ARCHIVE_PATH = os.path.join(RAW_DATA, FILE_NAME)\n", 265 | "FILE_PATH = os.path.join(DATA, 'pump', 'sensor.csv')\n", 266 | "FILE_DIR = os.path.dirname(FILE_PATH)\n", 267 | "\n", 268 | "if not os.path.isfile(FILE_PATH):\n", 269 | " if not os.path.exists('/home/ec2-user/.kaggle/kaggle.json'):\n", 270 | " os.makedirs('/home/ec2-user/.kaggle/', exist_ok=True)\n", 271 | " raise Exception('The kaggle.json token was not found.\\nCreating the /home/ec2-user/.kaggle/ directory: put your kaggle.json file there once you have generated it from the Kaggle website')\n", 272 | " else:\n", 273 | " print('The kaggle.json token file was found: making sure it is not readable by other users on this system.')\n", 274 | " !chmod 600 /home/ec2-user/.kaggle/kaggle.json\n", 275 | "\n", 276 | " os.makedirs(os.path.join(DATA, 'pump'), exist_ok=True)\n", 277 | " !kaggle datasets download -d nphantawee/pump-sensor-data -p $RAW_DATA\n", 278 | "\n", 279 | " print(\"\\nExtracting data archive\")\n", 280 | " zip_ref = zipfile.ZipFile(ARCHIVE_PATH, 'r')\n", 281 | " zip_ref.extractall(FILE_DIR + '/')\n", 282 | " zip_ref.close()\n", 283 | " \n", 284 | "else:\n", 285 | " print(\"File found, skipping download\")" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "id": "99c3cc24", 291 | "metadata": {}, 292 | "source": [ 293 | "#### **Dataset 5:** London household energy consumption with weather data" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "id": "b5b137b9", 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [ 303 | "FILE_NAME = 'smart-meters-in-london.zip'\n", 304 | "ARCHIVE_PATH = os.path.join(RAW_DATA, FILE_NAME)\n", 305 | "FILE_PATH = os.path.join(DATA, 'energy-london', 'smart-meters-in-london.zip')\n", 306 | "FILE_DIR = os.path.dirname(FILE_PATH)\n", 307 | "\n", 308 | "# Checks if the data were already downloaded:\n", 309 | "if os.path.exists(os.path.join(DATA, 'energy-london', 'acorn_details.csv')):\n", 310 | " print(\"File found, skipping download\")\n", 311 | " \n", 312 | "else:\n", 313 | " # Downloading and unzipping datasets from Kaggle:\n", 314 | " print(\"Downloading dataset (2.26G), can take a few minutes depending on your connection\")\n", 315 | " os.makedirs(os.path.join(DATA, 'energy-london'), exist_ok=True)\n", 316 | " !kaggle datasets download -d jeanmidev/smart-meters-in-london -p $RAW_DATA\n", 317 | " \n", 318 | " print('Unzipping files...')\n", 319 | " zip_ref = zipfile.ZipFile(ARCHIVE_PATH, 'r')\n", 320 | " zip_ref.extractall(FILE_DIR + '/')\n", 321 | " zip_ref.close()\n", 322 | " \n", 323 | " !rm $DATA/energy-london/*zip\n", 324 | " !rm $DATA/energy-london/*gz\n", 325 | " !mv $DATA/energy-london/halfhourly_dataset/halfhourly_dataset/* $DATA/energy-london/halfhourly_dataset\n", 326 | " !rm -Rf $DATA/energy-london/halfhourly_dataset/halfhourly_dataset\n", 327 | " !mv $DATA/energy-london/daily_dataset/daily_dataset/* $DATA/energy-london/daily_dataset\n", 328 | " !rm -Rf $DATA/energy-london/daily_dataset/daily_dataset" 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "id": "f5bd6e2e", 334 | "metadata": {}, 335 | "source": [ 336 | "## Dataset visualization\n", 337 | "---" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "id": "bef12b32", 343 | "metadata": {}, 344 | "source": [ 345 | "### **1.** Household energy consumption" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "id": "9649de6c", 352 | "metadata": {}, 353 | "outputs": [], 354 | "source": [ 355 | "%%time\n", 356 | "\n", 357 | "FILE_PATH = os.path.join(DATA, 'energy', 'energy-consumption.csv')\n", 358 | "energy_df = pd.read_csv(FILE_PATH, sep=';', decimal=',')\n", 359 | "energy_df = energy_df.rename(columns={'Unnamed: 0': 'Timestamp'})\n", 360 | "energy_df['Timestamp'] = pd.to_datetime(energy_df['Timestamp'])\n", 361 | "energy_df = energy_df.set_index('Timestamp')\n", 362 | "energy_df.iloc[100000:, 1:5].head()" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": null, 368 | "id": "acc364f3", 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [ 372 | "fig = plt.figure(figsize=(5, 1.876))\n", 373 | "plt.plot(energy_df['MT_002'])\n", 374 | "plt.title('Energy consumption for household MT_002')\n", 375 | "plt.show()" 376 | ] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "id": "f5ab3461", 381 | "metadata": {}, 382 | "source": [ 383 | "### **2.** NASA Turbofan data" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "id": "b2e44083", 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "FILE_PATH = os.path.join(DATA, 'turbofan', 'train_FD001.txt')\n", 394 | "turbofan_df = pd.read_csv(FILE_PATH, header=None, sep=' ')\n", 395 | "turbofan_df.dropna(axis='columns', how='all', inplace=True)\n", 396 | "print('Shape:', turbofan_df.shape)\n", 397 | "turbofan_df.head(5)" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": null, 403 | "id": "d8342359", 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [ 407 | "columns = [\n", 408 | " 'unit_number',\n", 409 | " 'cycle',\n", 410 | " 'setting_1',\n", 411 | " 'setting_2',\n", 412 | " 'setting_3',\n", 413 | "] + ['sensor_{}'.format(s) for s in range(1,22)]\n", 414 | "turbofan_df.columns = columns\n", 415 | "turbofan_df.head()" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": null, 421 | "id": "32788dc9", 422 | "metadata": {}, 423 | "outputs": [], 424 | "source": [ 425 | "# Add a RUL column and group the data by unit_number:\n", 426 | "turbofan_df['rul'] = 0\n", 427 | "grouped_data = turbofan_df.groupby(by='unit_number')\n", 428 | "\n", 429 | "# Loops through each unit number to get the lifecycle counts:\n", 430 | "for unit, rul in enumerate(grouped_data.count()['cycle']):\n", 431 | " current_df = turbofan_df[turbofan_df['unit_number'] == (unit+1)].copy()\n", 432 | " current_df['rul'] = rul - current_df['cycle']\n", 433 | " turbofan_df[turbofan_df['unit_number'] == (unit+1)] = current_df" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": null, 439 | "id": "3b1fe985", 440 | "metadata": {}, 441 | "outputs": [], 442 | "source": [ 443 | "df = turbofan_df.iloc[:, [0,1,2,3,4,5,6,25,26]].copy()\n", 444 | "df = df[df['unit_number'] == 1]\n", 445 | "\n", 446 | "def highlight_cols(s):\n", 447 | " return f'background-color: rgba(0, 143, 213, 0.3)'\n", 448 | "\n", 449 | "df.head(10).style.applymap(highlight_cols, subset=['rul'])" 450 | ] 451 | }, 452 | { 453 | "cell_type": "markdown", 454 | "id": "6f313864", 455 | "metadata": {}, 456 | "source": [ 457 | "### **3.** ECG Data" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": null, 463 | "id": "d5fc7c8e", 464 | "metadata": {}, 465 | "outputs": [], 466 | "source": [ 467 | "FILE_PATH = os.path.join(DATA, 'ecg', 'ecg.csv')\n", 468 | "ecg_df = pd.read_csv(FILE_PATH, header=None, sep=' ')\n", 469 | "print('Shape:', ecg_df.shape)\n", 470 | "ecg_df.head()" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": null, 476 | "id": "3b2664cf", 477 | "metadata": {}, 478 | "outputs": [], 479 | "source": [ 480 | "plt.rcParams['lines.linewidth'] = 0.7\n", 481 | "fig = plt.figure(figsize=(5,2))\n", 482 | "label_normal = False\n", 483 | "label_ischemia = False\n", 484 | "for i in range(0,100):\n", 485 | " label = ecg_df.iloc[i, 0]\n", 486 | " if (label == -1):\n", 487 | " color = colors[1]\n", 488 | " \n", 489 | " if label_ischemia:\n", 490 | " plt.plot(ecg_df.iloc[i,1:96], color=color, alpha=0.5, linestyle='--', linewidth=0.5)\n", 491 | " else:\n", 492 | " plt.plot(ecg_df.iloc[i,1:96], color=color, alpha=0.5, label='Ischemia', linestyle='--')\n", 493 | " label_ischemia = True\n", 494 | " \n", 495 | " else:\n", 496 | " color = colors[0]\n", 497 | " \n", 498 | " if label_normal:\n", 499 | " plt.plot(ecg_df.iloc[i,1:96], color=color, alpha=0.5)\n", 500 | " else:\n", 501 | " plt.plot(ecg_df.iloc[i,1:96], color=color, alpha=0.5, label='Normal')\n", 502 | " label_normal = True\n", 503 | " \n", 504 | "plt.title('Human heartbeat activity')\n", 505 | "plt.legend(loc='upper right', ncol=2)\n", 506 | "plt.show()" 507 | ] 508 | }, 509 | { 510 | "cell_type": "markdown", 511 | "id": "5b904dba", 512 | "metadata": {}, 513 | "source": [ 514 | "### **4.** Industrial pump data" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": null, 520 | "id": "ce940bbb", 521 | "metadata": {}, 522 | "outputs": [], 523 | "source": [ 524 | "FILE_PATH = os.path.join(DATA, 'pump', 'sensor.csv')\n", 525 | "pump_df = pd.read_csv(FILE_PATH, sep=',')\n", 526 | "pump_df.drop(columns={'Unnamed: 0'}, inplace=True)\n", 527 | "pump_df['timestamp'] = pd.to_datetime(pump_df['timestamp'], format='%Y-%m-%d %H:%M:%S')\n", 528 | "pump_df = pump_df.set_index('timestamp')\n", 529 | "\n", 530 | "pump_df['machine_status'].replace(to_replace='NORMAL', value=np.nan, inplace=True)\n", 531 | "pump_df['machine_status'].replace(to_replace='BROKEN', value=1, inplace=True)\n", 532 | "pump_df['machine_status'].replace(to_replace='RECOVERING', value=1, inplace=True)\n", 533 | "\n", 534 | "print('Shape:', pump_df.shape)\n", 535 | "pump_df.head()" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": null, 541 | "id": "7455aa11", 542 | "metadata": {}, 543 | "outputs": [], 544 | "source": [ 545 | "file_structure_df = pump_df.iloc[:, 0:10].resample('5D').mean()" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": null, 551 | "id": "913b88c5", 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [ 555 | "plt.rcParams['hatch.linewidth'] = 0.5\n", 556 | "plt.rcParams['lines.linewidth'] = 0.5\n", 557 | "\n", 558 | "fig = plt.figure(figsize=(5,1))\n", 559 | "ax1 = fig.add_subplot(1,1,1)\n", 560 | "plot1 = ax1.plot(pump_df['sensor_00'], label='Healthy pump')\n", 561 | "\n", 562 | "ax2 = ax1.twinx()\n", 563 | "plot2 = ax2.fill_between(\n", 564 | " x=pump_df.index, \n", 565 | " y1=0.0, \n", 566 | " y2=pump_df['machine_status'], \n", 567 | " color=colors[1], \n", 568 | " linewidth=0.0,\n", 569 | " edgecolor='#000000',\n", 570 | " alpha=0.5, \n", 571 | " hatch=\"//////\", \n", 572 | " label='Broken pump'\n", 573 | ")\n", 574 | "ax2.grid(False)\n", 575 | "ax2.set_yticks([])\n", 576 | "\n", 577 | "labels = [plot1[0].get_label(), plot2.get_label()]\n", 578 | "\n", 579 | "plt.legend(handles=[plot1[0], plot2], labels=labels, loc='lower center', ncol=2, bbox_to_anchor=(0.5, -.4))\n", 580 | "plt.title('Industrial pump sensor data')\n", 581 | "plt.show()" 582 | ] 583 | }, 584 | { 585 | "cell_type": "markdown", 586 | "id": "e3f3743d", 587 | "metadata": {}, 588 | "source": [ 589 | "### **5.** London household energy consumption with weather data" 590 | ] 591 | }, 592 | { 593 | "cell_type": "markdown", 594 | "id": "fcd0191e", 595 | "metadata": {}, 596 | "source": [ 597 | "We want to filter out households that are are subject to the dToU tariff and keep only the ones with a known ACORN (i.e. not in the ACORN-U group): this will allow us to better model future analysis by adding the Acorn detail informations (which by definitions, won't be available for the ACORN-U group)." 598 | ] 599 | }, 600 | { 601 | "cell_type": "code", 602 | "execution_count": null, 603 | "id": "1f57ceb9", 604 | "metadata": {}, 605 | "outputs": [], 606 | "source": [ 607 | "household_filename = os.path.join(DATA, 'energy-london', 'informations_households.csv')\n", 608 | "household_df = pd.read_csv(household_filename)\n", 609 | "household_df = household_df[(household_df['stdorToU'] == 'Std') & (household_df['Acorn'] == 'ACORN-E')]\n", 610 | "print(household_df.shape)\n", 611 | "household_df.head()" 612 | ] 613 | }, 614 | { 615 | "cell_type": "markdown", 616 | "id": "68add26c", 617 | "metadata": {}, 618 | "source": [ 619 | "#### Associating households with they energy consumption data\n", 620 | "Each household (with an ID starting by `MACxxxxx` in the table above) has its consumption data stored in a block file name `block_xx`. This file is also available from the `informations_household.csv` file extracted above. We have the association between `household_id` and `block_file`: we can open each of them and keep the consumption for the households of interest. All these data will be concatenated into an `energy_df` dataframe:" 621 | ] 622 | }, 623 | { 624 | "cell_type": "code", 625 | "execution_count": null, 626 | "id": "4fd6fd80", 627 | "metadata": {}, 628 | "outputs": [], 629 | "source": [ 630 | "%%time\n", 631 | "\n", 632 | "household_ids = household_df['LCLid'].tolist()\n", 633 | "consumption_file = os.path.join(DATA, 'energy-london', 'hourly_consumption.csv')\n", 634 | "min_data_points = ((pd.to_datetime('2020-12-31') - pd.to_datetime('2020-01-01')).days + 1)*24*2\n", 635 | "\n", 636 | "if os.path.exists(consumption_file):\n", 637 | " print('Half-hourly consumption file already exists, loading from disk...')\n", 638 | " energy_df = pd.read_csv(consumption_file)\n", 639 | " energy_df['timestamp'] = pd.to_datetime(energy_df['timestamp'], format='%Y-%m-%d %H:%M:%S.%f')\n", 640 | " print('Done.')\n", 641 | " \n", 642 | "else:\n", 643 | " print('Half-hourly consumption file not found. We need to generate it.')\n", 644 | " \n", 645 | " # We know have the block number we can use to open the right file:\n", 646 | " energy_df = pd.DataFrame()\n", 647 | " target_block_files = household_df['file'].unique().tolist()\n", 648 | " print('- {} block files to process: '.format(len(target_block_files)), end='')\n", 649 | " df_list = []\n", 650 | " for block_file in tqdm(target_block_files):\n", 651 | " # Reads the current block file:\n", 652 | " current_filename = os.path.join(DATA, 'energy-london', 'halfhourly_dataset', '{}.csv'.format(block_file))\n", 653 | " df = pd.read_csv(current_filename)\n", 654 | " \n", 655 | " # Set readable column names and adjust data types:\n", 656 | " df.columns = ['household_id', 'timestamp', 'energy']\n", 657 | " df = df.replace(to_replace='Null', value=0.0)\n", 658 | " df['energy'] = df['energy'].astype(np.float64)\n", 659 | " df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S.%f')\n", 660 | " \n", 661 | " # We filter on the households sampled earlier:\n", 662 | " df_list.append(df[df['household_id'].isin(household_ids)].reset_index(drop=True))\n", 663 | " \n", 664 | " # Concatenate with the main dataframe:\n", 665 | " energy_df = pd.concat(df_list, axis='index', ignore_index=True)\n", 666 | " \n", 667 | " datapoints = energy_df.groupby(by='household_id').count()\n", 668 | " datapoints = datapoints[datapoints['timestamp'] < min_data_points]\n", 669 | " hhid_to_remove = datapoints.index.tolist()\n", 670 | " energy_df = energy_df[~energy_df['household_id'].isin(hhid_to_remove)]\n", 671 | "\n", 672 | " # Let's save this dataset to disk, we will use it from now on:\n", 673 | " print('Saving file to disk... ', end='')\n", 674 | " energy_df.to_csv(consumption_file, index=False)\n", 675 | " print('Done.')" 676 | ] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": null, 681 | "id": "465cdc8f", 682 | "metadata": {}, 683 | "outputs": [], 684 | "source": [ 685 | "start = np.min(energy_df['timestamp'])\n", 686 | "end = np.max(energy_df['timestamp'])\n", 687 | "weather_filename = os.path.join(DATA, 'energy-london', 'weather_hourly_darksky.csv')\n", 688 | "\n", 689 | "weather_df = pd.read_csv(weather_filename)\n", 690 | "weather_df['time'] = pd.to_datetime(weather_df['time'], format='%Y-%m-%d %H:%M:%S')\n", 691 | "weather_df = weather_df.drop(columns=['precipType', 'icon', 'summary'])\n", 692 | "weather_df = weather_df.sort_values(by='time')\n", 693 | "weather_df = weather_df.set_index('time')\n", 694 | "weather_df = weather_df[start:end]\n", 695 | "\n", 696 | "# Let's make sure we have one datapoint per hour to match \n", 697 | "# the frequency used for the household energy consumption data:\n", 698 | "weather_df = weather_df.resample(rule='1H').mean() # This will generate NaN values timestamp missing data\n", 699 | "weather_df = weather_df.interpolate(method='linear') # This will fill the missing values with the average \n", 700 | "\n", 701 | "print(weather_df.shape)\n", 702 | "weather_df" 703 | ] 704 | }, 705 | { 706 | "cell_type": "code", 707 | "execution_count": null, 708 | "id": "3427cb1a", 709 | "metadata": {}, 710 | "outputs": [], 711 | "source": [ 712 | "energy_df = energy_df.set_index(['household_id', 'timestamp'])\n", 713 | "energy_df" 714 | ] 715 | }, 716 | { 717 | "cell_type": "code", 718 | "execution_count": null, 719 | "id": "40f173f5", 720 | "metadata": {}, 721 | "outputs": [], 722 | "source": [ 723 | "hhid = household_ids[2]\n", 724 | "hh_energy = energy_df.loc[hhid, :]\n", 725 | "start = '2012-07-01'\n", 726 | "end = '2012-07-15'\n", 727 | "\n", 728 | "fig = plt.figure(figsize=(5,1))\n", 729 | "ax1 = fig.add_subplot(1,1,1)\n", 730 | "plot2 = ax1.fill_between(\n", 731 | " x=weather_df.loc[start:end, 'temperature'].index, \n", 732 | " y1=0.0, \n", 733 | " y2=weather_df.loc[start:end, 'temperature'], \n", 734 | " color=colors[1], \n", 735 | " linewidth=0.0,\n", 736 | " edgecolor='#000000',\n", 737 | " alpha=0.25, \n", 738 | " hatch=\"//////\", \n", 739 | " label='Temperature'\n", 740 | ")\n", 741 | "ax1.set_ylim((0,40))\n", 742 | "ax1.grid(False)\n", 743 | "\n", 744 | "ax2 = ax1.twinx()\n", 745 | "ax2.plot(hh_energy[start:end], label='Energy consumption', linewidth=2, color='#FFFFFF', alpha=0.5)\n", 746 | "plot1 = ax2.plot(hh_energy[start:end], label='Energy consumption', linewidth=0.7)\n", 747 | "ax2.set_title(f'Energy consumption for household {hhid}')\n", 748 | "\n", 749 | "labels = [plot1[0].get_label(), plot2.get_label()]\n", 750 | "plt.legend(handles=[plot1[0], plot2], labels=labels, loc='upper left', fontsize=3, ncol=2)\n", 751 | "\n", 752 | "plt.show()" 753 | ] 754 | }, 755 | { 756 | "cell_type": "code", 757 | "execution_count": null, 758 | "id": "36684f21", 759 | "metadata": {}, 760 | "outputs": [], 761 | "source": [ 762 | "acorn_filename = os.path.join(DATA, 'energy-london', 'acorn_details.csv')\n", 763 | "acorn_df = pd.read_csv(acorn_filename, encoding='ISO-8859-1')\n", 764 | "acorn_df = acorn_df.sample(10).loc[:, ['MAIN CATEGORIES', 'CATEGORIES', 'REFERENCE', 'ACORN-A', 'ACORN-B', 'ACORN-E']]\n", 765 | "acorn_df" 766 | ] 767 | }, 768 | { 769 | "cell_type": "markdown", 770 | "id": "f09fcaf8", 771 | "metadata": {}, 772 | "source": [ 773 | "## File structure exploration\n", 774 | "---" 775 | ] 776 | }, 777 | { 778 | "cell_type": "code", 779 | "execution_count": null, 780 | "id": "d87ca63a", 781 | "metadata": {}, 782 | "outputs": [], 783 | "source": [ 784 | "from IPython.display import display_html\n", 785 | "\n", 786 | "def display_multiple_dataframe(*args, max_rows=None, max_cols=None):\n", 787 | " html_str = ''\n", 788 | " for df in args:\n", 789 | " html_str += df.to_html(max_cols=max_cols, max_rows=max_rows)\n", 790 | " \n", 791 | " display_html(html_str.replace('table','table style=\"display:inline\"'), raw=True)" 792 | ] 793 | }, 794 | { 795 | "cell_type": "code", 796 | "execution_count": null, 797 | "id": "c0fb229a", 798 | "metadata": {}, 799 | "outputs": [], 800 | "source": [ 801 | "display_multiple_dataframe(\n", 802 | " file_structure_df[['sensor_00']],\n", 803 | " file_structure_df[['sensor_01']],\n", 804 | " file_structure_df[['sensor_03']],\n", 805 | " max_rows=10, max_cols=None\n", 806 | ")" 807 | ] 808 | }, 809 | { 810 | "cell_type": "code", 811 | "execution_count": null, 812 | "id": "7ca40f25", 813 | "metadata": {}, 814 | "outputs": [], 815 | "source": [ 816 | "display_multiple_dataframe(\n", 817 | " file_structure_df.loc['2018-04', :].head(6),\n", 818 | " file_structure_df.loc['2018-05', :].head(6),\n", 819 | " file_structure_df.loc['2018-06', :].head(6),\n", 820 | " max_rows=None, max_cols=2\n", 821 | ")" 822 | ] 823 | }, 824 | { 825 | "cell_type": "code", 826 | "execution_count": null, 827 | "id": "6e87d94c", 828 | "metadata": {}, 829 | "outputs": [], 830 | "source": [ 831 | "display_multiple_dataframe(\n", 832 | " file_structure_df.loc['2018-04', ['sensor_00']].head(6),\n", 833 | " file_structure_df.loc['2018-05', ['sensor_00']].head(6),\n", 834 | " file_structure_df.loc['2018-06', ['sensor_00']].head(6),\n", 835 | " max_rows=10, max_cols=None\n", 836 | ")\n", 837 | "display_multiple_dataframe(\n", 838 | " file_structure_df.loc['2018-04', ['sensor_01']].head(6),\n", 839 | " file_structure_df.loc['2018-05', ['sensor_01']].head(6),\n", 840 | " file_structure_df.loc['2018-06', ['sensor_01']].head(6),\n", 841 | " max_rows=10, max_cols=None\n", 842 | ")\n", 843 | "print('.\\n.\\n.')\n", 844 | "display_multiple_dataframe(\n", 845 | " file_structure_df.loc['2018-04', ['sensor_09']].head(6),\n", 846 | " file_structure_df.loc['2018-05', ['sensor_09']].head(6),\n", 847 | " file_structure_df.loc['2018-06', ['sensor_09']].head(6),\n", 848 | " max_rows=10, max_cols=None\n", 849 | ")" 850 | ] 851 | }, 852 | { 853 | "cell_type": "code", 854 | "execution_count": null, 855 | "id": "9c8d815f", 856 | "metadata": {}, 857 | "outputs": [], 858 | "source": [ 859 | "df1 = pump_df.iloc[:, [0]].resample('5D').mean()\n", 860 | "df2 = pump_df.iloc[:, [1]].resample('2D').mean()\n", 861 | "df3 = pump_df.iloc[:, [2]].resample('7D').mean()\n", 862 | "\n", 863 | "display_multiple_dataframe(\n", 864 | " df1.head(10), df2.head(10), df3.head(10),\n", 865 | " pd.merge(pd.merge(df1, df2, left_index=True, right_index=True, how='outer'), df3, left_index=True, right_index=True, how='outer').head(10),\n", 866 | " max_rows=None, max_cols=None\n", 867 | ")" 868 | ] 869 | }, 870 | { 871 | "cell_type": "code", 872 | "execution_count": null, 873 | "id": "7046cbb5", 874 | "metadata": {}, 875 | "outputs": [], 876 | "source": [ 877 | "pd.set_option('display.max_columns', None)\n", 878 | "pd.set_option('display.max_rows', 10)\n", 879 | "pd.merge(pd.merge(df1, df2, left_index=True, right_index=True, how='outer'), df3, left_index=True, right_index=True, how='outer').head(10)" 880 | ] 881 | }, 882 | { 883 | "cell_type": "code", 884 | "execution_count": null, 885 | "id": "7ad04201", 886 | "metadata": {}, 887 | "outputs": [], 888 | "source": [ 889 | "plt.figure(figsize=(5,1))\n", 890 | "for i in range(len(colors)):\n", 891 | " plt.plot(file_structure_df[f'sensor_0{i}'], linewidth=2, alpha=0.5, label=colors[i])\n", 892 | "\n", 893 | "plt.legend()\n", 894 | "plt.show()" 895 | ] 896 | }, 897 | { 898 | "cell_type": "markdown", 899 | "id": "b7b85f1f", 900 | "metadata": {}, 901 | "source": [ 902 | "## Visualization\n", 903 | "---" 904 | ] 905 | }, 906 | { 907 | "cell_type": "code", 908 | "execution_count": null, 909 | "id": "b4c78d4c", 910 | "metadata": {}, 911 | "outputs": [], 912 | "source": [ 913 | "fig = plt.figure(figsize=(5,1))\n", 914 | "ax1 = fig.add_subplot(1,1,1)\n", 915 | "ax2 = ax1.twinx()\n", 916 | "\n", 917 | "plot_sensor_0 = ax1.plot(pump_df['sensor_00'], label='Sensor 0', color=colors[0], linewidth=1, alpha=0.8)\n", 918 | "plot_sensor_1 = ax2.plot(pump_df['sensor_01'], label='Sensor 1', color=colors[1], linewidth=1, alpha=0.8)\n", 919 | "ax2.grid(False)\n", 920 | "plt.title('Pump sensor values (2 sensors)')\n", 921 | "plt.legend(handles=[plot_sensor_0[0], plot_sensor_1[0]], ncol=2, loc='lower right')\n", 922 | "plt.show()" 923 | ] 924 | }, 925 | { 926 | "cell_type": "code", 927 | "execution_count": null, 928 | "id": "90daaba9", 929 | "metadata": {}, 930 | "outputs": [], 931 | "source": [ 932 | "reduced_pump_df = pump_df.loc[:, 'sensor_00':'sensor_14']\n", 933 | "reduced_pump_df = reduced_pump_df.replace([np.inf, -np.inf], np.nan)\n", 934 | "reduced_pump_df = reduced_pump_df.fillna(0.0)\n", 935 | "reduced_pump_df = reduced_pump_df.astype(np.float32)\n", 936 | "scaled_pump_df = pd.DataFrame(normalize(reduced_pump_df), index=reduced_pump_df.index, columns=reduced_pump_df.columns)\n", 937 | "scaled_pump_df" 938 | ] 939 | }, 940 | { 941 | "cell_type": "code", 942 | "execution_count": null, 943 | "id": "cbc92f9d", 944 | "metadata": {}, 945 | "outputs": [], 946 | "source": [ 947 | "fig = plt.figure(figsize=(5,1))\n", 948 | "\n", 949 | "for i in range(0,15):\n", 950 | " plt.plot(scaled_pump_df.iloc[:, i], alpha=0.6)\n", 951 | "\n", 952 | "plt.title('Pump sensor values (15 sensors)')\n", 953 | "plt.show()" 954 | ] 955 | }, 956 | { 957 | "cell_type": "code", 958 | "execution_count": null, 959 | "id": "9739c28e", 960 | "metadata": {}, 961 | "outputs": [], 962 | "source": [ 963 | "pump_df2 = pump_df.copy()\n", 964 | "\n", 965 | "pump_df2 = pump_df2.replace([np.inf, -np.inf], np.nan)\n", 966 | "pump_df2 = pump_df2.fillna(0.0)\n", 967 | "pump_df2 = pump_df2.astype(np.float32)\n", 968 | "\n", 969 | "pump_description = pump_df2.describe().T\n", 970 | "constant_signals = pump_description[pump_description['min'] == pump_description['max']].index.tolist()\n", 971 | "pump_df2 = pump_df2.drop(columns=constant_signals)\n", 972 | "\n", 973 | "features = pump_df2.columns.tolist()" 974 | ] 975 | }, 976 | { 977 | "cell_type": "code", 978 | "execution_count": null, 979 | "id": "dd1cf267", 980 | "metadata": {}, 981 | "outputs": [], 982 | "source": [ 983 | "def hex_to_rgb(hex_color):\n", 984 | " \"\"\"\n", 985 | " Converts a color string in hexadecimal format to RGB format.\n", 986 | " \n", 987 | " PARAMS\n", 988 | " ======\n", 989 | " hex_color: string\n", 990 | " A string describing the color to convert from hexadecimal. It can\n", 991 | " include the leading # character or not\n", 992 | " \n", 993 | " RETURNS\n", 994 | " =======\n", 995 | " rgb_color: tuple\n", 996 | " Each color component of the returned tuple will be a float value\n", 997 | " between 0.0 and 1.0\n", 998 | " \"\"\"\n", 999 | " hex_color = hex_color.lstrip('#')\n", 1000 | " rgb_color = tuple(int(hex_color[i:i+2], base=16) / 255.0 for i in [0, 2, 4])\n", 1001 | " return rgb_color\n", 1002 | "\n", 1003 | "def plot_timeseries_strip_chart(binned_timeseries, signal_list, fig_width=12, signal_height=0.15, dates=None, day_interval=7):\n", 1004 | " # Build a suitable colormap:\n", 1005 | " colors_list = [\n", 1006 | " hex_to_rgb('#DC322F'), \n", 1007 | " hex_to_rgb('#B58900'), \n", 1008 | " hex_to_rgb('#2AA198')\n", 1009 | " ]\n", 1010 | " cm = mpl_colors.LinearSegmentedColormap.from_list('RdAmGr', colors_list, N=len(colors_list))\n", 1011 | " \n", 1012 | " fig = plt.figure(figsize=(fig_width, signal_height * binned_timeseries.shape[0]))\n", 1013 | " ax = fig.add_subplot(1,1,1)\n", 1014 | " \n", 1015 | " # Devising the extent of the actual plot:\n", 1016 | " if dates is not None:\n", 1017 | " dnum = mdates.date2num(dates)\n", 1018 | " start = dnum[0] - (dnum[1]-dnum[0])/2.\n", 1019 | " stop = dnum[-1] + (dnum[1]-dnum[0])/2.\n", 1020 | " extent = [start, stop, 0, signal_height * (binned_timeseries.shape[0])]\n", 1021 | " \n", 1022 | " else:\n", 1023 | " extent = None\n", 1024 | " \n", 1025 | " # Plot the matrix:\n", 1026 | " im = ax.imshow(binned_timeseries, \n", 1027 | " extent=extent, \n", 1028 | " aspect=\"auto\", \n", 1029 | " cmap=cm, \n", 1030 | " origin='lower')\n", 1031 | " \n", 1032 | " # Adjusting the x-axis if we provide dates:\n", 1033 | " if dates is not None:\n", 1034 | " ax.xaxis.set_major_locator(mdates.MonthLocator())\n", 1035 | " ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))\n", 1036 | " for tick in ax.xaxis.get_major_ticks():\n", 1037 | " tick.label.set_fontsize(4)\n", 1038 | " tick.label.set_rotation(60)\n", 1039 | " tick.label.set_fontweight('bold')\n", 1040 | "\n", 1041 | " ax.tick_params(axis='x', which='major', pad=7, labelcolor='#000000')\n", 1042 | " plt.xticks(ha='right')\n", 1043 | " \n", 1044 | " # Adjusting the y-axis:\n", 1045 | " ax.yaxis.set_major_locator(ticker.MultipleLocator(signal_height))\n", 1046 | " ax.set_yticklabels(signal_list, verticalalignment='bottom', fontsize=4)\n", 1047 | " ax.set_yticks(np.arange(len(signal_list)) * signal_height)\n", 1048 | "\n", 1049 | " plt.grid()\n", 1050 | " return ax" 1051 | ] 1052 | }, 1053 | { 1054 | "cell_type": "code", 1055 | "execution_count": null, 1056 | "id": "ef704a1a", 1057 | "metadata": {}, 1058 | "outputs": [], 1059 | "source": [ 1060 | "from IPython.display import display, Markdown, Latex\n", 1061 | "\n", 1062 | "# Build a list of dataframes, one per sensor:\n", 1063 | "df_list = []\n", 1064 | "for f in features[:1]:\n", 1065 | " df_list.append(pump_df2[[f]])\n", 1066 | "\n", 1067 | "# Discretize each signal in 3 bins:\n", 1068 | "array = tsia.markov.discretize_multivariate(df_list)\n", 1069 | "\n", 1070 | "fig = plt.figure(figsize=(5.5, 0.6))\n", 1071 | "plt.plot(pump_df2['sensor_00'], linewidth=0.7, alpha=0.6)\n", 1072 | "plt.title('Line plot of the pump sensor 0')\n", 1073 | "plt.show()\n", 1074 | "\n", 1075 | "display(Markdown(''))\n", 1076 | "\n", 1077 | "\n", 1078 | "# Plot the strip chart:\n", 1079 | "ax = plot_timeseries_strip_chart(\n", 1080 | " array, \n", 1081 | " signal_list=features[:1],\n", 1082 | " fig_width=5.21,\n", 1083 | " signal_height=0.2,\n", 1084 | " dates=df_list[0].index.to_pydatetime(),\n", 1085 | " day_interval=2\n", 1086 | ")\n", 1087 | "ax.set_title('Strip chart of the pump sensor 0');" 1088 | ] 1089 | }, 1090 | { 1091 | "cell_type": "code", 1092 | "execution_count": null, 1093 | "id": "59c2ff60", 1094 | "metadata": {}, 1095 | "outputs": [], 1096 | "source": [ 1097 | "# Build a list of dataframes, one per sensor:\n", 1098 | "df_list = []\n", 1099 | "for f in features:\n", 1100 | " df_list.append(pump_df2[[f]])\n", 1101 | "\n", 1102 | "# Discretize each signal in 3 bins:\n", 1103 | "array = tsia.markov.discretize_multivariate(df_list)\n", 1104 | "\n", 1105 | "# Plot the strip chart:\n", 1106 | "fig = plot_timeseries_strip_chart(\n", 1107 | " array, \n", 1108 | " signal_list=features,\n", 1109 | " fig_width=5.5,\n", 1110 | " signal_height=0.1,\n", 1111 | " dates=df_list[0].index.to_pydatetime(),\n", 1112 | " day_interval=2\n", 1113 | ")" 1114 | ] 1115 | }, 1116 | { 1117 | "cell_type": "markdown", 1118 | "id": "fa605700", 1119 | "metadata": {}, 1120 | "source": [ 1121 | "### Recurrence plot" 1122 | ] 1123 | }, 1124 | { 1125 | "cell_type": "code", 1126 | "execution_count": null, 1127 | "id": "f85db253", 1128 | "metadata": {}, 1129 | "outputs": [], 1130 | "source": [ 1131 | "from pyts.image import RecurrencePlot\n", 1132 | "from pyts.image import GramianAngularField\n", 1133 | "from pyts.image import MarkovTransitionField" 1134 | ] 1135 | }, 1136 | { 1137 | "cell_type": "code", 1138 | "execution_count": null, 1139 | "id": "74fb9a2a", 1140 | "metadata": {}, 1141 | "outputs": [], 1142 | "source": [ 1143 | "hhid = household_ids[2]\n", 1144 | "hh_energy = energy_df.loc[hhid, :]\n", 1145 | "pump_extract_df = pump_df.iloc[:800, 0].copy()\n", 1146 | "\n", 1147 | "rp = RecurrencePlot(threshold='point', percentage=30)\n", 1148 | "weather_rp = rp.fit_transform(weather_df.loc['2013-01-01':'2013-01-31']['temperature'].values.reshape(1, -1))\n", 1149 | "energy_rp = rp.fit_transform(hh_energy['2012-07-01':'2012-07-15'].values.reshape(1, -1))\n", 1150 | "pump_rp = rp.fit_transform(pump_extract_df.values.reshape(1, -1))\n", 1151 | "\n", 1152 | "\n", 1153 | "fig = plt.figure(figsize=(5.5, 2.4))\n", 1154 | "gs = gridspec.GridSpec(nrows=3, ncols=2, width_ratios=[3,1], hspace=0.8, wspace=0.0)\n", 1155 | "\n", 1156 | "# Pump sensor 0:\n", 1157 | "ax = fig.add_subplot(gs[0])\n", 1158 | "ax.plot(pump_extract_df, label='Pump sensor 0')\n", 1159 | "ax.set_title(f'Pump sensor 0')\n", 1160 | "\n", 1161 | "ax = fig.add_subplot(gs[1])\n", 1162 | "ax.imshow(pump_rp[0], cmap='binary', origin='lower')\n", 1163 | "ax.axis('off')\n", 1164 | "\n", 1165 | "# Energy consumption line plot and recurrence plot:\n", 1166 | "ax = fig.add_subplot(gs[2])\n", 1167 | "plot1 = ax.plot(hh_energy['2012-07-01':'2012-07-15'], color=colors[1])\n", 1168 | "ax.set_title(f'Energy consumption for household {hhid}')\n", 1169 | "\n", 1170 | "ax = fig.add_subplot(gs[3])\n", 1171 | "ax.imshow(energy_rp[0], cmap='binary', origin='lower')\n", 1172 | "ax.axis('off')\n", 1173 | "\n", 1174 | "# Daily temperature line plot and recurrence plot:\n", 1175 | "ax = fig.add_subplot(gs[4])\n", 1176 | "start = '2012-07-01'\n", 1177 | "end = '2012-07-15'\n", 1178 | "ax.plot(weather_df.loc['2013-01-01':'2013-01-31']['temperature'], color=colors[2])\n", 1179 | "ax.set_title(f'Daily temperature')\n", 1180 | "\n", 1181 | "ax = fig.add_subplot(gs[5])\n", 1182 | "ax.imshow(weather_rp[0], cmap='binary', origin='lower')\n", 1183 | "ax.axis('off')\n", 1184 | "\n", 1185 | "plt.show()" 1186 | ] 1187 | }, 1188 | { 1189 | "cell_type": "code", 1190 | "execution_count": null, 1191 | "id": "950062ef", 1192 | "metadata": {}, 1193 | "outputs": [], 1194 | "source": [ 1195 | "hhid = household_ids[2]\n", 1196 | "hh_energy = energy_df.loc[hhid, :]\n", 1197 | "pump_extract_df = pump_df.iloc[:800, 0].copy()\n", 1198 | "\n", 1199 | "gaf = GramianAngularField(image_size=48, method='summation')\n", 1200 | "weather_gasf = gaf.fit_transform(weather_df.loc['2013-01-01':'2013-01-31']['temperature'].values.reshape(1, -1))\n", 1201 | "energy_gasf = gaf.fit_transform(hh_energy['2012-07-01':'2012-07-15'].values.reshape(1, -1))\n", 1202 | "pump_gasf = gaf.fit_transform(pump_extract_df.values.reshape(1, -1))\n", 1203 | "\n", 1204 | "fig = plt.figure(figsize=(5.5, 2.4))\n", 1205 | "gs = gridspec.GridSpec(nrows=3, ncols=2, width_ratios=[3,1], hspace=0.8, wspace=0.0)\n", 1206 | "\n", 1207 | "# Pump sensor 0:\n", 1208 | "ax = fig.add_subplot(gs[0])\n", 1209 | "ax.plot(pump_extract_df, label='Pump sensor 0')\n", 1210 | "ax.set_title(f'Pump sensor 0')\n", 1211 | "\n", 1212 | "ax = fig.add_subplot(gs[1])\n", 1213 | "ax.imshow(pump_gasf[0], cmap='RdBu_r', origin='lower')\n", 1214 | "ax.axis('off')\n", 1215 | "\n", 1216 | "# Energy consumption line plot and recurrence plot:\n", 1217 | "ax = fig.add_subplot(gs[2])\n", 1218 | "plot1 = ax.plot(hh_energy['2012-07-01':'2012-07-15'], color=colors[1])\n", 1219 | "ax.set_title(f'Energy consumption for household {hhid}')\n", 1220 | "\n", 1221 | "ax = fig.add_subplot(gs[3])\n", 1222 | "ax.imshow(energy_gasf[0], cmap='RdBu_r', origin='lower')\n", 1223 | "ax.axis('off')\n", 1224 | "\n", 1225 | "# Daily temperature line plot and recurrence plot:\n", 1226 | "ax = fig.add_subplot(gs[4])\n", 1227 | "start = '2012-07-01'\n", 1228 | "end = '2012-07-15'\n", 1229 | "ax.plot(weather_df.loc['2013-01-01':'2013-01-31']['temperature'], color=colors[2])\n", 1230 | "ax.set_title(f'Daily temperature')\n", 1231 | "\n", 1232 | "ax = fig.add_subplot(gs[5])\n", 1233 | "ax.imshow(weather_gasf[0], cmap='RdBu_r', origin='lower')\n", 1234 | "ax.axis('off')\n", 1235 | "\n", 1236 | "plt.show()" 1237 | ] 1238 | }, 1239 | { 1240 | "cell_type": "code", 1241 | "execution_count": null, 1242 | "id": "123a169c", 1243 | "metadata": {}, 1244 | "outputs": [], 1245 | "source": [ 1246 | "mtf = MarkovTransitionField(image_size=48)\n", 1247 | "\n", 1248 | "weather_mtf = mtf.fit_transform(weather_df.loc['2013-01-01':'2013-01-31']['temperature'].values.reshape(1, -1))\n", 1249 | "energy_mtf = mtf.fit_transform(hh_energy['2012-07-01':'2012-07-15'].values.reshape(1, -1))\n", 1250 | "pump_mtf = mtf.fit_transform(pump_extract_df.values.reshape(1, -1))\n", 1251 | "\n", 1252 | "fig = plt.figure(figsize=(5.5, 2.4))\n", 1253 | "gs = gridspec.GridSpec(nrows=3, ncols=2, width_ratios=[3,1], hspace=0.8, wspace=0.0)\n", 1254 | "\n", 1255 | "# Pump sensor 0:\n", 1256 | "ax = fig.add_subplot(gs[0])\n", 1257 | "ax.plot(pump_extract_df, label='Pump sensor 0')\n", 1258 | "ax.set_title(f'Pump sensor 0')\n", 1259 | "\n", 1260 | "ax = fig.add_subplot(gs[1])\n", 1261 | "ax.imshow(pump_mtf[0], cmap='RdBu_r', origin='lower')\n", 1262 | "ax.axis('off')\n", 1263 | "\n", 1264 | "# Energy consumption line plot and recurrence plot:\n", 1265 | "ax = fig.add_subplot(gs[2])\n", 1266 | "plot1 = ax.plot(hh_energy['2012-07-01':'2012-07-15'], color=colors[1])\n", 1267 | "ax.set_title(f'Energy consumption for household {hhid}')\n", 1268 | "\n", 1269 | "ax = fig.add_subplot(gs[3])\n", 1270 | "ax.imshow(energy_mtf[0], cmap='RdBu_r', origin='lower')\n", 1271 | "ax.axis('off')\n", 1272 | "\n", 1273 | "# Daily temperature line plot and recurrence plot:\n", 1274 | "ax = fig.add_subplot(gs[4])\n", 1275 | "start = '2012-07-01'\n", 1276 | "end = '2012-07-15'\n", 1277 | "ax.plot(weather_df.loc['2013-01-01':'2013-01-31']['temperature'], color=colors[2])\n", 1278 | "ax.set_title(f'Daily temperature')\n", 1279 | "\n", 1280 | "ax = fig.add_subplot(gs[5])\n", 1281 | "ax.imshow(weather_mtf[0], cmap='RdBu_r', origin='lower')\n", 1282 | "ax.axis('off')\n", 1283 | "\n", 1284 | "plt.show()" 1285 | ] 1286 | }, 1287 | { 1288 | "cell_type": "code", 1289 | "execution_count": null, 1290 | "id": "3f4e8ebe", 1291 | "metadata": {}, 1292 | "outputs": [], 1293 | "source": [ 1294 | "import matplotlib\n", 1295 | "import matplotlib.cm as cm\n", 1296 | "import networkx as nx\n", 1297 | "import community\n", 1298 | "\n", 1299 | "def compute_network_graph(markov_field):\n", 1300 | " G = nx.from_numpy_matrix(markov_field[0])\n", 1301 | "\n", 1302 | " # Uncover the communities in the current graph:\n", 1303 | " communities = community.best_partition(G)\n", 1304 | " nb_communities = len(pd.Series(communities).unique())\n", 1305 | " cmap = 'autumn'\n", 1306 | "\n", 1307 | " # Compute node colors and edges colors for the modularity encoding:\n", 1308 | " edge_colors = [matplotlib.colors.to_hex(cm.get_cmap(cmap)(communities.get(v)/(nb_communities - 1))) for u,v in G.edges()]\n", 1309 | " node_colors = [communities.get(node) for node in G.nodes()]\n", 1310 | " node_size = [nx.average_clustering(G, [node])*90 for node in G.nodes()]\n", 1311 | "\n", 1312 | " # Builds the options set to draw the network graph in the \"modularity\" configuration:\n", 1313 | " options = {\n", 1314 | " 'node_size': 10,\n", 1315 | " 'edge_color': edge_colors,\n", 1316 | " 'node_color': node_colors,\n", 1317 | " 'linewidths': 0,\n", 1318 | " 'width': 0.1,\n", 1319 | " 'alpha': 0.6,\n", 1320 | " 'with_labels': False,\n", 1321 | " 'cmap': cmap\n", 1322 | " }\n", 1323 | " \n", 1324 | " return G, options" 1325 | ] 1326 | }, 1327 | { 1328 | "cell_type": "code", 1329 | "execution_count": null, 1330 | "id": "8bfa993a", 1331 | "metadata": {}, 1332 | "outputs": [], 1333 | "source": [ 1334 | "fig = plt.figure(figsize=(5.5, 2.4))\n", 1335 | "gs = gridspec.GridSpec(nrows=3, ncols=2, width_ratios=[3,1], hspace=0.8, wspace=0.0)\n", 1336 | "\n", 1337 | "# Pump sensor 0:\n", 1338 | "ax = fig.add_subplot(gs[0])\n", 1339 | "ax.plot(pump_extract_df, label='Pump sensor 0')\n", 1340 | "ax.set_title(f'Pump sensor 0')\n", 1341 | "\n", 1342 | "ax = fig.add_subplot(gs[1])\n", 1343 | "G, options = compute_network_graph(weather_mtf)\n", 1344 | "nx.draw_networkx(G, **options, pos=nx.spring_layout(G), ax=ax)\n", 1345 | "ax.axis('off')\n", 1346 | "\n", 1347 | "# Energy consumption line plot and recurrence plot:\n", 1348 | "ax = fig.add_subplot(gs[2])\n", 1349 | "plot1 = ax.plot(hh_energy['2012-07-01':'2012-07-15'], color=colors[1])\n", 1350 | "ax.set_title(f'Energy consumption for household {hhid}')\n", 1351 | "\n", 1352 | "ax = fig.add_subplot(gs[3])\n", 1353 | "G, options = compute_network_graph(energy_mtf)\n", 1354 | "nx.draw_networkx(G, **options, pos=nx.spring_layout(G), ax=ax)\n", 1355 | "ax.axis('off')\n", 1356 | "\n", 1357 | "# Daily temperature line plot and recurrence plot:\n", 1358 | "ax = fig.add_subplot(gs[4])\n", 1359 | "start = '2012-07-01'\n", 1360 | "end = '2012-07-15'\n", 1361 | "ax.plot(weather_df.loc['2013-01-01':'2013-01-31']['temperature'], color=colors[2])\n", 1362 | "ax.set_title(f'Daily temperature')\n", 1363 | "\n", 1364 | "ax = fig.add_subplot(gs[5])\n", 1365 | "G, options = compute_network_graph(weather_mtf)\n", 1366 | "nx.draw_networkx(G, **options, pos=nx.spring_layout(G), ax=ax)\n", 1367 | "ax.axis('off')\n", 1368 | "\n", 1369 | "plt.show()" 1370 | ] 1371 | }, 1372 | { 1373 | "cell_type": "markdown", 1374 | "id": "979ad8ea", 1375 | "metadata": {}, 1376 | "source": [ 1377 | "## Symbolic representation\n", 1378 | "---" 1379 | ] 1380 | }, 1381 | { 1382 | "cell_type": "code", 1383 | "execution_count": null, 1384 | "id": "102d6a60", 1385 | "metadata": {}, 1386 | "outputs": [], 1387 | "source": [ 1388 | "from pyts.bag_of_words import BagOfWords\n", 1389 | "\n", 1390 | "window_size, word_size = 30, 5\n", 1391 | "bow = BagOfWords(window_size=window_size, word_size=word_size, window_step=window_size, numerosity_reduction=False)\n", 1392 | "X = weather_df.loc['2013-01-01':'2013-01-31']['temperature'].values.reshape(1, -1)\n", 1393 | "X_bow = bow.transform(X)\n", 1394 | "time_index = weather_df.loc['2013-01-01':'2013-01-31']['temperature'].index\n", 1395 | "len(X_bow[0].replace(' ', ''))" 1396 | ] 1397 | }, 1398 | { 1399 | "cell_type": "code", 1400 | "execution_count": null, 1401 | "id": "96878a1d", 1402 | "metadata": {}, 1403 | "outputs": [], 1404 | "source": [ 1405 | "# Plot the considered subseries\n", 1406 | "plt.figure(figsize=(5, 2))\n", 1407 | "splits_series = np.linspace(0, X.shape[1], 1 + X.shape[1] // window_size, dtype='int64')\n", 1408 | "for start, end in zip(splits_series[:-1], np.clip(splits_series[1:] + 1, 0, X.shape[1])):\n", 1409 | " plt.plot(np.arange(start, end), X[0, start:end], 'o-', linewidth=0.5, ms=0.1)\n", 1410 | "\n", 1411 | "# Plot the corresponding letters\n", 1412 | "splits_letters = np.linspace(0, X.shape[1], 1 + word_size * X.shape[1] // window_size)\n", 1413 | "splits_letters = ((splits_letters[:-1] + splits_letters[1:]) / 2)\n", 1414 | "splits_letters = splits_letters.astype('int64')\n", 1415 | "\n", 1416 | "for i, (x, text) in enumerate(zip(splits_letters, X_bow[0].replace(' ', ''))):\n", 1417 | " t = plt.text(x, X[0, x], text, color=\"C{}\".format(i // 5), fontsize=3.5)\n", 1418 | " t.set_bbox(dict(facecolor='#FFFFFF', alpha=0.5, edgecolor=\"C{}\".format(i // 5), boxstyle='round4'))\n", 1419 | "\n", 1420 | "plt.title('Bag-of-words representation for weather temperature')\n", 1421 | "plt.tight_layout()\n", 1422 | "plt.show()" 1423 | ] 1424 | }, 1425 | { 1426 | "cell_type": "code", 1427 | "execution_count": null, 1428 | "id": "94bb8ed5", 1429 | "metadata": {}, 1430 | "outputs": [], 1431 | "source": [ 1432 | "from pyts.transformation import WEASEL\n", 1433 | "from sklearn.preprocessing import LabelEncoder" 1434 | ] 1435 | }, 1436 | { 1437 | "cell_type": "code", 1438 | "execution_count": null, 1439 | "id": "d501fe5f", 1440 | "metadata": {}, 1441 | "outputs": [], 1442 | "source": [ 1443 | "X_train = ecg_df.iloc[:, 1:].values\n", 1444 | "y_train = ecg_df.iloc[:, 0]\n", 1445 | "y_train = LabelEncoder().fit_transform(y_train)\n", 1446 | "weasel = WEASEL(word_size=3, n_bins=3, window_sizes=[10, 25], sparse=False)\n", 1447 | "X_weasel = weasel.fit_transform(X_train, y_train)\n", 1448 | "vocabulary_length = len(weasel.vocabulary_)" 1449 | ] 1450 | }, 1451 | { 1452 | "cell_type": "code", 1453 | "execution_count": null, 1454 | "id": "7eef27d7", 1455 | "metadata": {}, 1456 | "outputs": [], 1457 | "source": [ 1458 | "plt.figure(figsize=(5,1.5))\n", 1459 | "width = 0.4\n", 1460 | "x = np.arange(vocabulary_length) - width / 2\n", 1461 | "for i in range(len(X_weasel[y_train == 0])):\n", 1462 | " if i == 0:\n", 1463 | " plt.bar(x, X_weasel[y_train == 0][i], width=width, alpha=0.25, color=colors[1], label='Time series for Ischemia')\n", 1464 | " else:\n", 1465 | " plt.bar(x, X_weasel[y_train == 0][i], width=width, alpha=0.25, color=colors[1])\n", 1466 | " \n", 1467 | "for i in range(len(X_weasel[y_train == 1])):\n", 1468 | " if i == 0:\n", 1469 | " plt.bar(x+width, X_weasel[y_train == 1][i], width=width, alpha=0.25, color=colors[0], label='Time series for Normal heartbeat')\n", 1470 | " else:\n", 1471 | " plt.bar(x+width, X_weasel[y_train == 1][i], width=width, alpha=0.25, color=colors[0])\n", 1472 | " \n", 1473 | "plt.xticks(\n", 1474 | " np.arange(vocabulary_length),\n", 1475 | " np.vectorize(weasel.vocabulary_.get)(np.arange(X_weasel[0].size)),\n", 1476 | " fontsize=2,\n", 1477 | " rotation=60\n", 1478 | ")\n", 1479 | " \n", 1480 | "plt.legend(loc='upper right')\n", 1481 | "plt.show()" 1482 | ] 1483 | }, 1484 | { 1485 | "cell_type": "markdown", 1486 | "id": "6677dddd", 1487 | "metadata": {}, 1488 | "source": [ 1489 | "## Statistics\n", 1490 | "---" 1491 | ] 1492 | }, 1493 | { 1494 | "cell_type": "code", 1495 | "execution_count": null, 1496 | "id": "5c2ba909", 1497 | "metadata": {}, 1498 | "outputs": [], 1499 | "source": [ 1500 | "plt.rcParams['xtick.labelsize'] = 3\n", 1501 | "\n", 1502 | "import statsmodels.api as sm\n", 1503 | "\n", 1504 | "fig = plt.figure(figsize=(5.5, 3))\n", 1505 | "gs = gridspec.GridSpec(nrows=3, ncols=2, width_ratios=[1,1], hspace=0.8)\n", 1506 | "\n", 1507 | "# Pump\n", 1508 | "ax = fig.add_subplot(gs[0])\n", 1509 | "ax.plot(pump_extract_df, label='Pump sensor 0')\n", 1510 | "ax.set_title(f'Pump sensor 0')\n", 1511 | "ax.tick_params(axis='x', which='both', labelbottom=False)\n", 1512 | "\n", 1513 | "ax = fig.add_subplot(gs[1])\n", 1514 | "sm.graphics.tsa.plot_acf(pump_extract_df.values.squeeze(), ax=ax, markersize=1, title='')\n", 1515 | "ax.set_ylim(-1.2, 1.2)\n", 1516 | "ax.tick_params(axis='x', which='major', labelsize=4)\n", 1517 | "\n", 1518 | "# Energy consumption\n", 1519 | "ax = fig.add_subplot(gs[2])\n", 1520 | "ax.plot(hh_energy['2012-07-01':'2012-07-15'], color=colors[1])\n", 1521 | "ax.set_title(f'Energy consumption for household {hhid}')\n", 1522 | "ax.tick_params(axis='x', which='both', labelbottom=False)\n", 1523 | "\n", 1524 | "ax = fig.add_subplot(gs[3])\n", 1525 | "sm.graphics.tsa.plot_acf(hh_energy['2012-07-01':'2012-07-15'].values.squeeze(), ax=ax, markersize=1, title='')\n", 1526 | "ax.set_ylim(-0.3, 0.3)\n", 1527 | "ax.tick_params(axis='x', which='major', labelsize=4)\n", 1528 | "\n", 1529 | "# Daily temperature:\n", 1530 | "ax = fig.add_subplot(gs[4])\n", 1531 | "start = '2012-07-01'\n", 1532 | "end = '2012-07-15'\n", 1533 | "ax.plot(weather_df.loc['2013-01-01':'2013-01-31']['temperature'], color=colors[2])\n", 1534 | "ax.set_title(f'Daily temperature')\n", 1535 | "ax.tick_params(axis='x', which='both', labelbottom=False)\n", 1536 | "\n", 1537 | "ax = fig.add_subplot(gs[5])\n", 1538 | "sm.graphics.tsa.plot_acf(weather_df.loc['2013-01-01':'2013-01-31']['temperature'].values.squeeze(), ax=ax, markersize=1, title='')\n", 1539 | "ax.set_ylim(-1.2, 1.2)\n", 1540 | "ax.tick_params(axis='x', which='major', labelsize=4)\n", 1541 | "\n", 1542 | "plt.show()" 1543 | ] 1544 | }, 1545 | { 1546 | "cell_type": "code", 1547 | "execution_count": null, 1548 | "id": "0f9cd561", 1549 | "metadata": {}, 1550 | "outputs": [], 1551 | "source": [ 1552 | "from statsmodels.tsa.seasonal import STL\n", 1553 | "\n", 1554 | "endog = endog.resample('30T').mean()" 1555 | ] 1556 | }, 1557 | { 1558 | "cell_type": "code", 1559 | "execution_count": null, 1560 | "id": "2af017b7", 1561 | "metadata": {}, 1562 | "outputs": [], 1563 | "source": [ 1564 | "plt.rcParams['lines.markersize'] = 1\n", 1565 | "\n", 1566 | "title = f'Energy consumption for household {hhid}'\n", 1567 | "endog = hh_energy['2012-07-01':'2012-07-15']\n", 1568 | "endog.columns = [title]\n", 1569 | "endog = endog[title]\n", 1570 | "stl = STL(endog, period=48)\n", 1571 | "res = stl.fit()\n", 1572 | "fig = res.plot()\n", 1573 | "\n", 1574 | "fig = plt.gcf()\n", 1575 | "fig.set_size_inches(5.5, 4)\n", 1576 | "\n", 1577 | "plt.show()" 1578 | ] 1579 | }, 1580 | { 1581 | "cell_type": "markdown", 1582 | "id": "ebb389e7", 1583 | "metadata": {}, 1584 | "source": [ 1585 | "## Binary segmentation\n", 1586 | "---" 1587 | ] 1588 | }, 1589 | { 1590 | "cell_type": "code", 1591 | "execution_count": null, 1592 | "id": "b4494d2e", 1593 | "metadata": {}, 1594 | "outputs": [], 1595 | "source": [ 1596 | "signal = weather_df.loc['2013-01-01':'2013-01-31']['temperature'].values.squeeze()\n", 1597 | "algo = rpt.Binseg(model='l2').fit(signal)\n", 1598 | "my_bkps = algo.predict(n_bkps=3)" 1599 | ] 1600 | }, 1601 | { 1602 | "cell_type": "code", 1603 | "execution_count": null, 1604 | "id": "d13ccee7", 1605 | "metadata": {}, 1606 | "outputs": [], 1607 | "source": [ 1608 | "my_bkps = [0] + my_bkps\n", 1609 | "my_bkps" 1610 | ] 1611 | }, 1612 | { 1613 | "cell_type": "code", 1614 | "execution_count": null, 1615 | "id": "6379306a", 1616 | "metadata": {}, 1617 | "outputs": [], 1618 | "source": [ 1619 | "fig = plt.figure(figsize=(5.5,1))\n", 1620 | "start = '2012-07-01'\n", 1621 | "end = '2012-07-15'\n", 1622 | "plt.plot(weather_df.loc['2013-01-01':'2013-01-31']['temperature'], color='#FFFFFF', linewidth=1.2, alpha=0.8)\n", 1623 | "plt.plot(weather_df.loc['2013-01-01':'2013-01-31']['temperature'], color=colors[2], linewidth=0.7)\n", 1624 | "\n", 1625 | "plt.title(f'Daily temperature')\n", 1626 | "plt.xticks(rotation=60, fontsize=4)\n", 1627 | "\n", 1628 | "weather_index = weather_df.loc['2013-01-01':'2013-01-31']['temperature'].index\n", 1629 | "\n", 1630 | "for index, bkps in enumerate(my_bkps[:-1]):\n", 1631 | " x1 = weather_index[my_bkps[index]]\n", 1632 | " x2 = weather_index[np.clip(my_bkps[index+1], 0, len(weather_index)-1)]\n", 1633 | " \n", 1634 | " plt.axvspan(x1, x2, color=colors[index % 5], alpha=0.2)\n", 1635 | "\n", 1636 | "plt.title('Daily temperature segmentation')\n", 1637 | "plt.show()" 1638 | ] 1639 | } 1640 | ], 1641 | "metadata": { 1642 | "kernelspec": { 1643 | "display_name": "conda_python3", 1644 | "language": "python", 1645 | "name": "conda_python3" 1646 | }, 1647 | "language_info": { 1648 | "codemirror_mode": { 1649 | "name": "ipython", 1650 | "version": 3 1651 | }, 1652 | "file_extension": ".py", 1653 | "mimetype": "text/x-python", 1654 | "name": "python", 1655 | "nbconvert_exporter": "python", 1656 | "pygments_lexer": "ipython3", 1657 | "version": "3.6.13" 1658 | } 1659 | }, 1660 | "nbformat": 4, 1661 | "nbformat_minor": 5 1662 | } 1663 | -------------------------------------------------------------------------------- /Chapter03/chapter3-dataset-preparation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "eabe8a1f", 6 | "metadata": {}, 7 | "source": [ 8 | "# Time series analysis on AWS\n", 9 | "*Chapter 3 - Creating a project and ingesting your data*" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "id": "bcb5ce53", 15 | "metadata": {}, 16 | "source": [ 17 | "## Initializations\n", 18 | "---" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "id": "34609045", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "!pip install --quiet tqdm kaggle" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "id": "4b2e4f0a", 34 | "metadata": {}, 35 | "source": [ 36 | "### Imports" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 1, 42 | "id": "e5ea312c", 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "import numpy as np\n", 47 | "import os\n", 48 | "import pandas as pd\n", 49 | "import warnings\n", 50 | "import zipfile\n", 51 | "\n", 52 | "from tqdm import tqdm" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "id": "069698f4", 58 | "metadata": {}, 59 | "source": [ 60 | "### Parameters" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 2, 66 | "id": "1f46465d", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "RAW_DATA = os.path.join('..', 'Data', 'raw')\n", 71 | "DATA = os.path.join('..', 'Data')\n", 72 | "warnings.filterwarnings(\"ignore\")\n", 73 | "os.makedirs(RAW_DATA, exist_ok=True)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "id": "7f5af546", 79 | "metadata": {}, 80 | "source": [ 81 | "### Helper functions" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 3, 87 | "id": "3ce3ebfe", 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "from IPython.display import display_html\n", 92 | "\n", 93 | "def display_multiple_dataframe(*args, max_rows=None, max_cols=None):\n", 94 | " html_str = ''\n", 95 | " for df in args:\n", 96 | " html_str += df.to_html(max_cols=max_cols, max_rows=max_rows)\n", 97 | " \n", 98 | " display_html(html_str.replace('table','table style=\"display:inline\"'), raw=True)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "id": "cfeca823", 104 | "metadata": {}, 105 | "source": [ 106 | "### Downloading datasets" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "id": "f75fde65", 112 | "metadata": {}, 113 | "source": [ 114 | "To download the London household energy consumption with weather data from Kaggle, you will need a Kaggle API token. To do so, you will need to have an account on Kaggle and create a token that you install on your machine. You can follow [**this link**](https://www.kaggle.com/docs/api) to get started with the Kaggle API. Once generated, make sure your Kaggle token is stored in the `~/.kaggle/kaggle.json` file, or the next cells will issue an error. To get a Kaggle token, go to kaggle.com and create an account. Then navigate to **My account** and scroll down to the API section. There, click the **Create new API token** button:\n", 115 | "\n", 116 | "" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 4, 122 | "id": "31f242da", 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "File found, skipping download\n" 130 | ] 131 | } 132 | ], 133 | "source": [ 134 | "FILE_NAME = 'smart-meters-in-london.zip'\n", 135 | "ARCHIVE_PATH = os.path.join(RAW_DATA, FILE_NAME)\n", 136 | "FILE_PATH = os.path.join(DATA, 'energy-london', 'smart-meters-in-london.zip')\n", 137 | "FILE_DIR = os.path.dirname(FILE_PATH)\n", 138 | "\n", 139 | "# Checks if the data were already downloaded:\n", 140 | "if os.path.exists(os.path.join(DATA, 'energy-london', 'acorn_details.csv')):\n", 141 | " print(\"File found, skipping download\")\n", 142 | " \n", 143 | "else:\n", 144 | " # Downloading and unzipping datasets from Kaggle:\n", 145 | " print(\"Downloading dataset (2.26G), can take a few minutes depending on your connection\")\n", 146 | " os.makedirs(os.path.join(DATA, 'energy-london'), exist_ok=True)\n", 147 | " !kaggle datasets download -d jeanmidev/smart-meters-in-london -p $RAW_DATA\n", 148 | " \n", 149 | " print('Unzipping files...')\n", 150 | " zip_ref = zipfile.ZipFile(ARCHIVE_PATH, 'r')\n", 151 | " zip_ref.extractall(FILE_DIR + '/')\n", 152 | " zip_ref.close()\n", 153 | " \n", 154 | " !rm $DATA/energy-london/*zip\n", 155 | " !rm $DATA/energy-london/*gz\n", 156 | " !mv $DATA/energy-london/halfhourly_dataset/halfhourly_dataset/* $DATA/energy-london/halfhourly_dataset\n", 157 | " !rm -Rf $DATA/energy-london/halfhourly_dataset/halfhourly_dataset\n", 158 | " !mv $DATA/energy-london/daily_dataset/daily_dataset/* $DATA/energy-london/daily_dataset\n", 159 | " !rm -Rf $DATA/energy-london/daily_dataset/daily_dataset" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "id": "546a8212", 165 | "metadata": {}, 166 | "source": [ 167 | "## Dataset visualization\n", 168 | "---" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "id": "9d9797d3", 174 | "metadata": {}, 175 | "source": [ 176 | "We want to filter out households that are are subject to the dToU tariff and keep only the ones with a known ACORN (i.e. not in the ACORN-U group): this will allow us to better model future analysis by adding the Acorn detail informations (which by definitions, won't be available for the ACORN-U group)." 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 5, 182 | "id": "9ae29c75", 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "name": "stdout", 187 | "output_type": "stream", 188 | "text": [ 189 | "(4404, 5)\n" 190 | ] 191 | }, 192 | { 193 | "data": { 194 | "text/html": [ 195 | "
\n", 196 | "\n", 209 | "\n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | "
LCLidstdorToUAcornAcorn_groupedfile
2MAC000002StdACORN-AAffluentblock_0
3MAC003613StdACORN-AAffluentblock_0
4MAC003597StdACORN-AAffluentblock_0
5MAC003579StdACORN-AAffluentblock_0
6MAC003566StdACORN-AAffluentblock_0
\n", 263 | "
" 264 | ], 265 | "text/plain": [ 266 | " LCLid stdorToU Acorn Acorn_grouped file\n", 267 | "2 MAC000002 Std ACORN-A Affluent block_0\n", 268 | "3 MAC003613 Std ACORN-A Affluent block_0\n", 269 | "4 MAC003597 Std ACORN-A Affluent block_0\n", 270 | "5 MAC003579 Std ACORN-A Affluent block_0\n", 271 | "6 MAC003566 Std ACORN-A Affluent block_0" 272 | ] 273 | }, 274 | "execution_count": 5, 275 | "metadata": {}, 276 | "output_type": "execute_result" 277 | } 278 | ], 279 | "source": [ 280 | "household_filename = os.path.join(DATA, 'energy-london', 'informations_households.csv')\n", 281 | "household_df = pd.read_csv(household_filename)\n", 282 | "household_df = household_df[(household_df['stdorToU'] == 'Std') & (household_df['Acorn'] != 'ACORN-U')]\n", 283 | "household_ids = household_df['LCLid'].tolist()\n", 284 | "print(household_df.shape)\n", 285 | "household_df.head()" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 6, 291 | "id": "cca71b9a", 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [ 295 | "bad_household_ids = [\n", 296 | " 'MAC002136', 'MAC002594', 'MAC000636', 'MAC001309', 'MAC001269', 'MAC000037', 'MAC002072', 'MAC000197', \n", 297 | " 'MAC001644', 'MAC005040', 'MAC000404', 'MAC004982', 'MAC001959', 'MAC002564', 'MAC001829', 'MAC000504', \n", 298 | " 'MAC001522', 'MAC001456', 'MAC004732', 'MAC000915', 'MAC005232', 'MAC003993', 'MAC000530', 'MAC000235', \n", 299 | " 'MAC001549', 'MAC000220', 'MAC005344', 'MAC000120', 'MAC000172', 'MAC002050', 'MAC000287', 'MAC005191', \n", 300 | " 'MAC000964'\n", 301 | "]\n", 302 | "household_df = household_df[~household_df['LCLid'].isin(bad_household_ids)]" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "id": "a4c97db3", 308 | "metadata": {}, 309 | "source": [ 310 | "#### Associating households with they energy consumption data\n", 311 | "Each household (with an ID starting by `MACxxxxx` in the table above) has its consumption data stored in a block file name `block_xx`. This file is also available from the `informations_household.csv` file extracted above. We have the association between `household_id` and `block_file`: we can open each of them and keep the consumption for the households of interest. All these data will be concatenated into an `energy_df` dataframe. For the remaining of this analysis, we are going to extract 1 year of data between July 1st, 2012 and June 30th, 2013 and we are going to keep only the households with almost 100% complete data for this period:" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 7, 317 | "id": "91caae93", 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "start = pd.to_datetime('2012-07-01 00:00:00')\n", 322 | "end = pd.to_datetime('2013-06-30 23:59:00')\n", 323 | "threshold = 0.95\n", 324 | "min_data_points = ((end - start).days + 1)*24*2 * threshold" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 8, 330 | "id": "4971259e", 331 | "metadata": {}, 332 | "outputs": [ 333 | { 334 | "name": "stdout", 335 | "output_type": "stream", 336 | "text": [ 337 | "Half-hourly consumption file already exists, loading from disk...\n", 338 | "Done.\n" 339 | ] 340 | } 341 | ], 342 | "source": [ 343 | "consumption_file = os.path.join(DATA, 'energy-london', 'half_hourly_consumption-v2.csv')\n", 344 | "if os.path.exists(consumption_file):\n", 345 | " print('Half-hourly consumption file already exists, loading from disk...')\n", 346 | " energy_df = pd.read_csv(consumption_file)\n", 347 | " energy_df['timestamp'] = pd.to_datetime(energy_df['timestamp'], format='%Y-%m-%d %H:%M:%S.%f')\n", 348 | " print('Done.')\n", 349 | " \n", 350 | "else:\n", 351 | " print('Half-hourly consumption file not found. We need to generate it.')\n", 352 | " \n", 353 | " # We know have the block number we can use to open the right file:\n", 354 | " energy_df = pd.DataFrame()\n", 355 | " target_block_files = household_df['file'].unique().tolist()\n", 356 | " print('- {} block files to process: '.format(len(target_block_files)), end='')\n", 357 | " df_list = []\n", 358 | " for block_file in tqdm(target_block_files):\n", 359 | " # Reads the current block file:\n", 360 | " current_filename = os.path.join(DATA, 'energy-london', 'halfhourly_dataset', '{}.csv'.format(block_file))\n", 361 | " df = pd.read_csv(current_filename)\n", 362 | " \n", 363 | " # Set readable column names and adjust data types:\n", 364 | " df.columns = ['household_id', 'timestamp', 'energy']\n", 365 | " df = df.replace(to_replace='Null', value=0.0)\n", 366 | " df['energy'] = df['energy'].astype(np.float64)\n", 367 | " df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S.%f')\n", 368 | " df = df[(df['timestamp'] >= start) & (df['timestamp'] <= end)]\n", 369 | " \n", 370 | " # We filter on the households sampled earlier:\n", 371 | " df_list.append(df[df['household_id'].isin(household_ids)].reset_index(drop=True))\n", 372 | " del df\n", 373 | " \n", 374 | " # Concatenate with the main dataframe:\n", 375 | " energy_df = pd.concat(df_list, axis='index', ignore_index=True)\n", 376 | " \n", 377 | " datapoints = energy_df.groupby(by='household_id').count()\n", 378 | " datapoints = datapoints[datapoints['timestamp'] < min_data_points]\n", 379 | " hhid_to_remove = datapoints.index.tolist()\n", 380 | " energy_df = energy_df[~energy_df['household_id'].isin(hhid_to_remove)]\n", 381 | "\n", 382 | " # Let's save this dataset to disk, we will use it from now on:\n", 383 | " print('Saving file to disk... ', end='')\n", 384 | " energy_df.to_csv(consumption_file, index=False)\n", 385 | " print('Done.')" 386 | ] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "id": "02577d57", 391 | "metadata": {}, 392 | "source": [ 393 | "Here is an extract from one of the half hourly block file:" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": 9, 399 | "id": "5d888872", 400 | "metadata": {}, 401 | "outputs": [ 402 | { 403 | "data": { 404 | "text/html": [ 405 | "
\n", 406 | "\n", 419 | "\n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | "
LCLidtstpenergy(kWh/hh)
342953MAC0025432012-07-09 11:30:00.00000000.054
342954MAC0025432012-07-09 12:00:00.00000000.053
342955MAC0025432012-07-09 12:30:00.00000000.053
342956MAC0025432012-07-09 13:00:00.00000000.053
342957MAC0025432012-07-09 13:30:00.00000000.053
\n", 461 | "
" 462 | ], 463 | "text/plain": [ 464 | " LCLid tstp energy(kWh/hh)\n", 465 | "342953 MAC002543 2012-07-09 11:30:00.0000000 0.054 \n", 466 | "342954 MAC002543 2012-07-09 12:00:00.0000000 0.053 \n", 467 | "342955 MAC002543 2012-07-09 12:30:00.0000000 0.053 \n", 468 | "342956 MAC002543 2012-07-09 13:00:00.0000000 0.053 \n", 469 | "342957 MAC002543 2012-07-09 13:30:00.0000000 0.053 " 470 | ] 471 | }, 472 | "execution_count": 9, 473 | "metadata": {}, 474 | "output_type": "execute_result" 475 | } 476 | ], 477 | "source": [ 478 | "block_filename = os.path.join(DATA, 'energy-london', 'halfhourly_dataset', 'block_12.csv')\n", 479 | "block_df = pd.read_csv(block_filename)\n", 480 | "block_df[342953:].head()" 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": 10, 486 | "id": "4c7b4cb7", 487 | "metadata": {}, 488 | "outputs": [ 489 | { 490 | "name": "stdout", 491 | "output_type": "stream", 492 | "text": [ 493 | "(8760, 8)\n" 494 | ] 495 | }, 496 | { 497 | "data": { 498 | "text/html": [ 499 | "
\n", 500 | "\n", 513 | "\n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | "
visibilitywindBearingtemperaturedewPointpressureapparentTemperaturewindSpeedhumidity
time
2012-07-01 00:00:0013.2423113.4410.251011.3313.444.140.81
2012-07-01 01:00:0013.3723213.2510.201011.6113.254.070.82
2012-07-01 02:00:0013.0822912.289.621011.8112.283.760.84
2012-07-01 03:00:0013.7922911.779.231011.9011.773.750.84
2012-07-01 04:00:0013.6822311.269.061012.2811.263.880.86
...........................
2013-06-30 19:00:0015.5024521.0312.391018.5921.035.230.58
2013-06-30 20:00:0016.0924819.6712.341018.5319.674.300.63
2013-06-30 21:00:0014.3125418.4611.991018.8618.464.840.66
2013-06-30 22:00:0013.5826716.2511.401019.1016.254.490.73
2013-06-30 23:00:0014.0826515.1010.671018.9115.104.340.75
\n", 662 | "

8760 rows × 8 columns

\n", 663 | "
" 664 | ], 665 | "text/plain": [ 666 | " visibility windBearing temperature dewPoint pressure \\\n", 667 | "time \n", 668 | "2012-07-01 00:00:00 13.24 231 13.44 10.25 1011.33 \n", 669 | "2012-07-01 01:00:00 13.37 232 13.25 10.20 1011.61 \n", 670 | "2012-07-01 02:00:00 13.08 229 12.28 9.62 1011.81 \n", 671 | "2012-07-01 03:00:00 13.79 229 11.77 9.23 1011.90 \n", 672 | "2012-07-01 04:00:00 13.68 223 11.26 9.06 1012.28 \n", 673 | "... ... ... ... ... ... \n", 674 | "2013-06-30 19:00:00 15.50 245 21.03 12.39 1018.59 \n", 675 | "2013-06-30 20:00:00 16.09 248 19.67 12.34 1018.53 \n", 676 | "2013-06-30 21:00:00 14.31 254 18.46 11.99 1018.86 \n", 677 | "2013-06-30 22:00:00 13.58 267 16.25 11.40 1019.10 \n", 678 | "2013-06-30 23:00:00 14.08 265 15.10 10.67 1018.91 \n", 679 | "\n", 680 | " apparentTemperature windSpeed humidity \n", 681 | "time \n", 682 | "2012-07-01 00:00:00 13.44 4.14 0.81 \n", 683 | "2012-07-01 01:00:00 13.25 4.07 0.82 \n", 684 | "2012-07-01 02:00:00 12.28 3.76 0.84 \n", 685 | "2012-07-01 03:00:00 11.77 3.75 0.84 \n", 686 | "2012-07-01 04:00:00 11.26 3.88 0.86 \n", 687 | "... ... ... ... \n", 688 | "2013-06-30 19:00:00 21.03 5.23 0.58 \n", 689 | "2013-06-30 20:00:00 19.67 4.30 0.63 \n", 690 | "2013-06-30 21:00:00 18.46 4.84 0.66 \n", 691 | "2013-06-30 22:00:00 16.25 4.49 0.73 \n", 692 | "2013-06-30 23:00:00 15.10 4.34 0.75 \n", 693 | "\n", 694 | "[8760 rows x 8 columns]" 695 | ] 696 | }, 697 | "execution_count": 10, 698 | "metadata": {}, 699 | "output_type": "execute_result" 700 | } 701 | ], 702 | "source": [ 703 | "weather_filename = os.path.join(DATA, 'energy-london', 'weather_hourly_darksky.csv')\n", 704 | "\n", 705 | "weather_df = pd.read_csv(weather_filename)\n", 706 | "weather_df['time'] = pd.to_datetime(weather_df['time'], format='%Y-%m-%d %H:%M:%S')\n", 707 | "weather_df = weather_df.drop(columns=['precipType', 'icon', 'summary'])\n", 708 | "weather_df = weather_df.sort_values(by='time')\n", 709 | "weather_df = weather_df.set_index('time')\n", 710 | "weather_df = weather_df[start:end]\n", 711 | "\n", 712 | "# Let's make sure we have one datapoint per hour to match \n", 713 | "# the frequency used for the household energy consumption data:\n", 714 | "weather_df = weather_df.resample(rule='1H').mean() # This will generate NaN values timestamp missing data\n", 715 | "weather_df = weather_df.interpolate(method='linear') # This will fill the missing values with the average \n", 716 | "\n", 717 | "print(weather_df.shape)\n", 718 | "weather_df" 719 | ] 720 | }, 721 | { 722 | "cell_type": "code", 723 | "execution_count": 11, 724 | "id": "b1878483", 725 | "metadata": {}, 726 | "outputs": [ 727 | { 728 | "data": { 729 | "text/html": [ 730 | "
\n", 731 | "\n", 744 | "\n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | "
MAIN CATEGORIESCATEGORIESREFERENCEACORN-AACORN-BACORN-E
696CONTACTPreferred ChannelEmail137.000000159.00000073.000000
545DIGITALPurchased on the internetCar Insurance123.000000127.000000112.000000
271FINANCEExpenditure per person per weekFurnishings, household equipment and routine m...144.000000132.000000102.000000
667SHOPPINGFurniture & Fittings StoresMass Market104.000000123.000000116.000000
673SHOPPINGHigh Street RetailersCosta107.617424126.495528119.279432
794LEISURE TIMEInterests & HobbiesReading Books107.000000107.00000068.000000
269FINANCEExpenditure per person per weekClothing and footwear134.000000127.000000108.000000
289DIGITALDigital AttitudesI worry that any personal information entered ...105.000000105.000000100.000000
448DIGITALTypes of internet usage : Tablet / iPadDownload content/information from an advertisi...99.000000107.000000133.000000
733COMMUNITY SAFETYCrime Survey for EnglandTaking everything into account I have confiden...105.000000105.000000104.000000
\n", 849 | "
" 850 | ], 851 | "text/plain": [ 852 | " MAIN CATEGORIES CATEGORIES \\\n", 853 | "696 CONTACT Preferred Channel \n", 854 | "545 DIGITAL Purchased on the internet \n", 855 | "271 FINANCE Expenditure per person per week \n", 856 | "667 SHOPPING Furniture & Fittings Stores \n", 857 | "673 SHOPPING High Street Retailers \n", 858 | "794 LEISURE TIME Interests & Hobbies \n", 859 | "269 FINANCE Expenditure per person per week \n", 860 | "289 DIGITAL Digital Attitudes \n", 861 | "448 DIGITAL Types of internet usage : Tablet / iPad \n", 862 | "733 COMMUNITY SAFETY Crime Survey for England \n", 863 | "\n", 864 | " REFERENCE ACORN-A \\\n", 865 | "696 Email 137.000000 \n", 866 | "545 Car Insurance 123.000000 \n", 867 | "271 Furnishings, household equipment and routine m... 144.000000 \n", 868 | "667 Mass Market 104.000000 \n", 869 | "673 Costa 107.617424 \n", 870 | "794 Reading Books 107.000000 \n", 871 | "269 Clothing and footwear 134.000000 \n", 872 | "289 I worry that any personal information entered ... 105.000000 \n", 873 | "448 Download content/information from an advertisi... 99.000000 \n", 874 | "733 Taking everything into account I have confiden... 105.000000 \n", 875 | "\n", 876 | " ACORN-B ACORN-E \n", 877 | "696 159.000000 73.000000 \n", 878 | "545 127.000000 112.000000 \n", 879 | "271 132.000000 102.000000 \n", 880 | "667 123.000000 116.000000 \n", 881 | "673 126.495528 119.279432 \n", 882 | "794 107.000000 68.000000 \n", 883 | "269 127.000000 108.000000 \n", 884 | "289 105.000000 100.000000 \n", 885 | "448 107.000000 133.000000 \n", 886 | "733 105.000000 104.000000 " 887 | ] 888 | }, 889 | "execution_count": 11, 890 | "metadata": {}, 891 | "output_type": "execute_result" 892 | } 893 | ], 894 | "source": [ 895 | "acorn_filename = os.path.join(DATA, 'energy-london', 'acorn_details.csv')\n", 896 | "acorn_df = pd.read_csv(acorn_filename, encoding='ISO-8859-1')\n", 897 | "acorn_sample = acorn_df.sample(10).loc[:, ['MAIN CATEGORIES', 'CATEGORIES', 'REFERENCE', 'ACORN-A', 'ACORN-B', 'ACORN-E']]\n", 898 | "acorn_sample" 899 | ] 900 | }, 901 | { 902 | "cell_type": "markdown", 903 | "id": "e2c11f90", 904 | "metadata": {}, 905 | "source": [ 906 | "## Datasets preparation\n", 907 | "---\n", 908 | "### Target time series dataset\n", 909 | "Our `energy_df` dataframe already has the right information, we just need to give them a name consistant with the schema expected by Amazon Forecas (`timestamp`, `target_value` and `item_id`): in addition, we are going to keep it at an hourly level as this will be consistent with the resolution that the weather data comes with:" 910 | ] 911 | }, 912 | { 913 | "cell_type": "code", 914 | "execution_count": 12, 915 | "id": "4e8b4aee", 916 | "metadata": {}, 917 | "outputs": [ 918 | { 919 | "name": "stdout", 920 | "output_type": "stream", 921 | "text": [ 922 | "Reading existing file\n", 923 | "CPU times: user 729 ms, sys: 15.5 ms, total: 745 ms\n", 924 | "Wall time: 781 ms\n" 925 | ] 926 | } 927 | ], 928 | "source": [ 929 | "%%time\n", 930 | "\n", 931 | "hourly_consumption_file = os.path.join('..', 'Dataset', 'target_time_series.csv')\n", 932 | "if os.path.exists(hourly_consumption_file):\n", 933 | " print('Reading existing file')\n", 934 | " energy_df = pd.read_csv(hourly_consumption_file)\n", 935 | " energy_df = energy_df.set_index(['item_id', 'timestamp'])\n", 936 | " \n", 937 | "else:\n", 938 | " print('Generating new target time series file')\n", 939 | " os.makedirs(os.path.join('..', 'Dataset'), exist_ok=True)\n", 940 | " energy_df.columns = ['item_id', 'timestamp', 'target_value']\n", 941 | " energy_df = energy_df.groupby(by='item_id').resample(rule='24H', on='timestamp').sum()\n", 942 | " energy_df.to_csv(hourly_consumption_file)\n", 943 | " print(f'{os.stat(hourly_consumption_file).st_size/(1024*1024):.04} MB')" 944 | ] 945 | }, 946 | { 947 | "cell_type": "code", 948 | "execution_count": 14, 949 | "id": "fe268f5c", 950 | "metadata": {}, 951 | "outputs": [ 952 | { 953 | "data": { 954 | "text/html": [ 955 | "
\n", 956 | "\n", 969 | "\n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | "
target_value
item_idtimestamp
MAC0000032012-07-0112.359
2012-07-0212.775
2012-07-0311.584
2012-07-0412.700
2012-07-0513.724
.........
MAC0055622013-06-267.466
2013-06-2710.738
2013-06-2811.128
2013-06-299.363
2013-06-309.491
\n", 1032 | "

1301437 rows × 1 columns

\n", 1033 | "
" 1034 | ], 1035 | "text/plain": [ 1036 | " target_value\n", 1037 | "item_id timestamp \n", 1038 | "MAC000003 2012-07-01 12.359\n", 1039 | " 2012-07-02 12.775\n", 1040 | " 2012-07-03 11.584\n", 1041 | " 2012-07-04 12.700\n", 1042 | " 2012-07-05 13.724\n", 1043 | "... ...\n", 1044 | "MAC005562 2013-06-26 7.466\n", 1045 | " 2013-06-27 10.738\n", 1046 | " 2013-06-28 11.128\n", 1047 | " 2013-06-29 9.363\n", 1048 | " 2013-06-30 9.491\n", 1049 | "\n", 1050 | "[1301437 rows x 1 columns]" 1051 | ] 1052 | }, 1053 | "execution_count": 14, 1054 | "metadata": {}, 1055 | "output_type": "execute_result" 1056 | } 1057 | ], 1058 | "source": [ 1059 | "energy_df" 1060 | ] 1061 | }, 1062 | { 1063 | "cell_type": "markdown", 1064 | "id": "0546c697", 1065 | "metadata": {}, 1066 | "source": [ 1067 | "### Related time series dataset" 1068 | ] 1069 | }, 1070 | { 1071 | "cell_type": "code", 1072 | "execution_count": 15, 1073 | "id": "d256603b", 1074 | "metadata": {}, 1075 | "outputs": [ 1076 | { 1077 | "data": { 1078 | "text/html": [ 1079 | "
\n", 1080 | "\n", 1093 | "\n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | "
temperaturewind_speedhumidity
timestamp
2012-07-0114.8625005.1387500.703333
2012-07-0215.2295834.5575000.832917
2012-07-0316.9279173.7108330.895833
2012-07-0418.4262503.3687500.853333
2012-07-0518.4966672.0008330.707083
............
2013-06-2616.2520832.2695830.632083
2013-06-2715.1737502.9258330.700417
2013-06-2816.2887503.6166670.867917
2013-06-2917.2145833.4345830.603333
2013-06-3019.7412504.4016670.676667
\n", 1177 | "

365 rows × 3 columns

\n", 1178 | "
" 1179 | ], 1180 | "text/plain": [ 1181 | " temperature wind_speed humidity\n", 1182 | "timestamp \n", 1183 | "2012-07-01 14.862500 5.138750 0.703333\n", 1184 | "2012-07-02 15.229583 4.557500 0.832917\n", 1185 | "2012-07-03 16.927917 3.710833 0.895833\n", 1186 | "2012-07-04 18.426250 3.368750 0.853333\n", 1187 | "2012-07-05 18.496667 2.000833 0.707083\n", 1188 | "... ... ... ...\n", 1189 | "2013-06-26 16.252083 2.269583 0.632083\n", 1190 | "2013-06-27 15.173750 2.925833 0.700417\n", 1191 | "2013-06-28 16.288750 3.616667 0.867917\n", 1192 | "2013-06-29 17.214583 3.434583 0.603333\n", 1193 | "2013-06-30 19.741250 4.401667 0.676667\n", 1194 | "\n", 1195 | "[365 rows x 3 columns]" 1196 | ] 1197 | }, 1198 | "execution_count": 15, 1199 | "metadata": {}, 1200 | "output_type": "execute_result" 1201 | } 1202 | ], 1203 | "source": [ 1204 | "weather_df = weather_df[['temperature', 'windSpeed', 'humidity']].reset_index()\n", 1205 | "weather_df.columns = ['timestamp', 'temperature', 'wind_speed', 'humidity']\n", 1206 | "weather_df['timestamp'] = pd.to_datetime(weather_df['timestamp'])\n", 1207 | "weather_df = weather_df.set_index(['timestamp'])\n", 1208 | "weather_df = weather_df.resample('24H').mean()\n", 1209 | "weather_df" 1210 | ] 1211 | }, 1212 | { 1213 | "cell_type": "markdown", 1214 | "id": "b30b4399", 1215 | "metadata": {}, 1216 | "source": [ 1217 | "The related time series dataset must conform to the following schema:\n", 1218 | "\n", 1219 | "```json\n", 1220 | "{\n", 1221 | " 'item_id': string,\n", 1222 | " 'timestamp': timestamp,\n", 1223 | " 'dimension_1': ...,\n", 1224 | " ...\n", 1225 | " 'dimension_10': ...,\n", 1226 | " 'related_field_1': double,\n", 1227 | " 'related_field_2': double,\n", 1228 | " ...\n", 1229 | " 'related_field_13': double\n", 1230 | "}\n", 1231 | "```\n", 1232 | "\n", 1233 | "You will note that each `item_id` must have its own related timeseries. For instance, temperature for every timestamp must be provided for household `MAC002543` and another set of temperature must be provided by household `MAC002556`. In a real situation, these temperatures might be slightly different as each household may be associated to a different weather station depending on their location. In this tutorial, we will duplicate the same data and simplify the problem by considering that only one weather station covers the whole London metropolitan area." 1234 | ] 1235 | }, 1236 | { 1237 | "cell_type": "code", 1238 | "execution_count": 16, 1239 | "id": "e4fc98c6", 1240 | "metadata": {}, 1241 | "outputs": [ 1242 | { 1243 | "data": { 1244 | "text/plain": [ 1245 | "3573" 1246 | ] 1247 | }, 1248 | "execution_count": 16, 1249 | "metadata": {}, 1250 | "output_type": "execute_result" 1251 | } 1252 | ], 1253 | "source": [ 1254 | "household_ids = list(energy_df.index.get_level_values('item_id').unique())\n", 1255 | "len(household_ids)" 1256 | ] 1257 | }, 1258 | { 1259 | "cell_type": "code", 1260 | "execution_count": 17, 1261 | "id": "a8838d64", 1262 | "metadata": {}, 1263 | "outputs": [ 1264 | { 1265 | "name": "stderr", 1266 | "output_type": "stream", 1267 | "text": [ 1268 | "100%|██████████| 3573/3573 [00:05<00:00, 649.25it/s]\n" 1269 | ] 1270 | } 1271 | ], 1272 | "source": [ 1273 | "df_list = []\n", 1274 | "for hhid in tqdm(household_ids):\n", 1275 | " current_df = weather_df.reset_index().copy()\n", 1276 | " current_df['item_id'] = hhid\n", 1277 | " current_df = current_df[['item_id', 'timestamp', 'temperature', 'wind_speed', 'humidity']]\n", 1278 | " df_list.append(current_df)\n", 1279 | " del current_df" 1280 | ] 1281 | }, 1282 | { 1283 | "cell_type": "code", 1284 | "execution_count": 18, 1285 | "id": "3ebd43ae", 1286 | "metadata": {}, 1287 | "outputs": [ 1288 | { 1289 | "data": { 1290 | "text/plain": [ 1291 | "(1304145, 5)" 1292 | ] 1293 | }, 1294 | "execution_count": 18, 1295 | "metadata": {}, 1296 | "output_type": "execute_result" 1297 | } 1298 | ], 1299 | "source": [ 1300 | "related_time_series = pd.concat(df_list).reset_index(drop=True)\n", 1301 | "related_time_series.shape" 1302 | ] 1303 | }, 1304 | { 1305 | "cell_type": "code", 1306 | "execution_count": 19, 1307 | "id": "d39df1fe", 1308 | "metadata": {}, 1309 | "outputs": [ 1310 | { 1311 | "data": { 1312 | "text/html": [ 1313 | "\n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | " \n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | "
item_idtimestamptemperaturewind_speedhumidity
0MAC0000032012-07-0114.8625005.1387500.703333
1MAC0000032012-07-0215.2295834.5575000.832917
2MAC0000032012-07-0316.9279173.7108330.895833
3MAC0000032012-07-0418.4262503.3687500.853333
4MAC0000032012-07-0518.4966672.0008330.707083
..................
360MAC0000032013-06-2616.2520832.2695830.632083
361MAC0000032013-06-2715.1737502.9258330.700417
362MAC0000032013-06-2816.2887503.6166670.867917
363MAC0000032013-06-2917.2145833.4345830.603333
364MAC0000032013-06-3019.7412504.4016670.676667
\n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | "
item_idtemperaturewind_speedhumidity
1303780MAC00556214.8625005.1387500.703333
1303781MAC00556215.2295834.5575000.832917
1303782MAC00556216.9279173.7108330.895833
1303783MAC00556218.4262503.3687500.853333
1303784MAC00556218.4966672.0008330.707083
...............
1304140MAC00556216.2520832.2695830.632083
1304141MAC00556215.1737502.9258330.700417
1304142MAC00556216.2887503.6166670.867917
1304143MAC00556217.2145833.4345830.603333
1304144MAC00556219.7412504.4016670.676667
" 1504 | ] 1505 | }, 1506 | "metadata": {}, 1507 | "output_type": "display_data" 1508 | } 1509 | ], 1510 | "source": [ 1511 | "display_multiple_dataframe(\n", 1512 | " related_time_series[related_time_series['item_id'] == 'MAC000003'],\n", 1513 | " related_time_series[related_time_series['item_id'] == 'MAC005562'][['item_id', 'temperature', 'wind_speed', 'humidity']],\n", 1514 | " max_rows=10, max_cols=None\n", 1515 | ")" 1516 | ] 1517 | }, 1518 | { 1519 | "cell_type": "code", 1520 | "execution_count": 20, 1521 | "id": "8ccebfd9", 1522 | "metadata": {}, 1523 | "outputs": [ 1524 | { 1525 | "data": { 1526 | "text/html": [ 1527 | "
\n", 1528 | "\n", 1541 | "\n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | "
item_idtimestamptemperaturewind_speedhumidity
0MAC0000032012-07-0114.8625005.1387500.703333
1MAC0000032012-07-0215.2295834.5575000.832917
2MAC0000032012-07-0316.9279173.7108330.895833
3MAC0000032012-07-0418.4262503.3687500.853333
4MAC0000032012-07-0518.4966672.0008330.707083
..................
360MAC0000032013-06-2616.2520832.2695830.632083
361MAC0000032013-06-2715.1737502.9258330.700417
362MAC0000032013-06-2816.2887503.6166670.867917
363MAC0000032013-06-2917.2145833.4345830.603333
364MAC0000032013-06-3019.7412504.4016670.676667
\n", 1643 | "

365 rows × 5 columns

\n", 1644 | "
" 1645 | ], 1646 | "text/plain": [ 1647 | " item_id timestamp temperature wind_speed humidity\n", 1648 | "0 MAC000003 2012-07-01 14.862500 5.138750 0.703333\n", 1649 | "1 MAC000003 2012-07-02 15.229583 4.557500 0.832917\n", 1650 | "2 MAC000003 2012-07-03 16.927917 3.710833 0.895833\n", 1651 | "3 MAC000003 2012-07-04 18.426250 3.368750 0.853333\n", 1652 | "4 MAC000003 2012-07-05 18.496667 2.000833 0.707083\n", 1653 | ".. ... ... ... ... ...\n", 1654 | "360 MAC000003 2013-06-26 16.252083 2.269583 0.632083\n", 1655 | "361 MAC000003 2013-06-27 15.173750 2.925833 0.700417\n", 1656 | "362 MAC000003 2013-06-28 16.288750 3.616667 0.867917\n", 1657 | "363 MAC000003 2013-06-29 17.214583 3.434583 0.603333\n", 1658 | "364 MAC000003 2013-06-30 19.741250 4.401667 0.676667\n", 1659 | "\n", 1660 | "[365 rows x 5 columns]" 1661 | ] 1662 | }, 1663 | "execution_count": 20, 1664 | "metadata": {}, 1665 | "output_type": "execute_result" 1666 | } 1667 | ], 1668 | "source": [ 1669 | "related_time_series[related_time_series['item_id'] == 'MAC000003']" 1670 | ] 1671 | }, 1672 | { 1673 | "cell_type": "code", 1674 | "execution_count": 21, 1675 | "id": "0c93746a", 1676 | "metadata": {}, 1677 | "outputs": [ 1678 | { 1679 | "name": "stdout", 1680 | "output_type": "stream", 1681 | "text": [ 1682 | "89.33 MB\n", 1683 | "CPU times: user 12 s, sys: 80.7 ms, total: 12.1 s\n", 1684 | "Wall time: 20.1 s\n" 1685 | ] 1686 | } 1687 | ], 1688 | "source": [ 1689 | "%%time\n", 1690 | "\n", 1691 | "rts_fname = os.path.join('..', 'Dataset', 'related_time_series.csv')\n", 1692 | "related_time_series.to_csv(rts_fname, index=None)\n", 1693 | "print(f'{os.stat(rts_fname).st_size/(1024*1024):.04} MB')" 1694 | ] 1695 | }, 1696 | { 1697 | "cell_type": "markdown", 1698 | "id": "32f244ab", 1699 | "metadata": {}, 1700 | "source": [ 1701 | "### Item metadata dataset" 1702 | ] 1703 | }, 1704 | { 1705 | "cell_type": "markdown", 1706 | "id": "9b0c6b28", 1707 | "metadata": {}, 1708 | "source": [ 1709 | "* POPULATION > Geography (4 categories)\n", 1710 | "* HOUSING > House Size (5 categories)\n", 1711 | "* FAMILY > Household Size (4 categories)" 1712 | ] 1713 | }, 1714 | { 1715 | "cell_type": "code", 1716 | "execution_count": 22, 1717 | "id": "186a2c69", 1718 | "metadata": {}, 1719 | "outputs": [ 1720 | { 1721 | "data": { 1722 | "text/plain": [ 1723 | "array(['POPULATION', 'HOUSING', 'FAMILY', 'ECONOMY', 'EDUCATION',\n", 1724 | " 'HEALTH', 'TRANSPORT', 'MARKETING CHANNELS', 'FINANCE', 'DIGITAL',\n", 1725 | " 'SHOPPING', 'CONTACT', 'ENVIRONMENT', 'COMMUNITY SAFETY',\n", 1726 | " 'LEISURE TIME'], dtype=object)" 1727 | ] 1728 | }, 1729 | "execution_count": 22, 1730 | "metadata": {}, 1731 | "output_type": "execute_result" 1732 | } 1733 | ], 1734 | "source": [ 1735 | "acorn_df['MAIN CATEGORIES'].unique()" 1736 | ] 1737 | }, 1738 | { 1739 | "cell_type": "code", 1740 | "execution_count": 23, 1741 | "id": "f8125168", 1742 | "metadata": {}, 1743 | "outputs": [ 1744 | { 1745 | "name": "stdout", 1746 | "output_type": "stream", 1747 | "text": [ 1748 | "(9, 20)\n" 1749 | ] 1750 | }, 1751 | { 1752 | "data": { 1753 | "text/html": [ 1754 | "
\n", 1755 | "\n", 1768 | "\n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | " \n", 1780 | " \n", 1781 | " \n", 1782 | " \n", 1783 | " \n", 1784 | " \n", 1785 | " \n", 1786 | " \n", 1787 | " \n", 1788 | " \n", 1789 | " \n", 1790 | " \n", 1791 | " \n", 1792 | " \n", 1793 | " \n", 1794 | " \n", 1795 | " \n", 1796 | " \n", 1797 | " \n", 1798 | " \n", 1799 | " \n", 1800 | " \n", 1801 | " \n", 1802 | " \n", 1803 | " \n", 1804 | " \n", 1805 | " \n", 1806 | " \n", 1807 | " \n", 1808 | " \n", 1809 | " \n", 1810 | " \n", 1811 | " \n", 1812 | " \n", 1813 | " \n", 1814 | " \n", 1815 | " \n", 1816 | " \n", 1817 | " \n", 1818 | " \n", 1819 | " \n", 1820 | " \n", 1821 | " \n", 1822 | " \n", 1823 | " \n", 1824 | " \n", 1825 | " \n", 1826 | " \n", 1827 | " \n", 1828 | " \n", 1829 | " \n", 1830 | " \n", 1831 | " \n", 1832 | " \n", 1833 | " \n", 1834 | " \n", 1835 | " \n", 1836 | " \n", 1837 | " \n", 1838 | " \n", 1839 | " \n", 1840 | " \n", 1841 | " \n", 1842 | " \n", 1843 | " \n", 1844 | " \n", 1845 | " \n", 1846 | " \n", 1847 | " \n", 1848 | " \n", 1849 | " \n", 1850 | " \n", 1851 | " \n", 1852 | " \n", 1853 | " \n", 1854 | " \n", 1855 | " \n", 1856 | " \n", 1857 | " \n", 1858 | " \n", 1859 | " \n", 1860 | " \n", 1861 | " \n", 1862 | " \n", 1863 | " \n", 1864 | " \n", 1865 | " \n", 1866 | " \n", 1867 | " \n", 1868 | " \n", 1869 | " \n", 1870 | " \n", 1871 | " \n", 1872 | " \n", 1873 | " \n", 1874 | " \n", 1875 | " \n", 1876 | " \n", 1877 | " \n", 1878 | " \n", 1879 | " \n", 1880 | " \n", 1881 | " \n", 1882 | " \n", 1883 | " \n", 1884 | " \n", 1885 | " \n", 1886 | " \n", 1887 | " \n", 1888 | " \n", 1889 | " \n", 1890 | " \n", 1891 | " \n", 1892 | " \n", 1893 | " \n", 1894 | " \n", 1895 | " \n", 1896 | " \n", 1897 | " \n", 1898 | " \n", 1899 | " \n", 1900 | " \n", 1901 | " \n", 1902 | " \n", 1903 | " \n", 1904 | " \n", 1905 | " \n", 1906 | " \n", 1907 | " \n", 1908 | " \n", 1909 | " \n", 1910 | " \n", 1911 | " \n", 1912 | " \n", 1913 | " \n", 1914 | " \n", 1915 | " \n", 1916 | " \n", 1917 | " \n", 1918 | " \n", 1919 | " \n", 1920 | " \n", 1921 | " \n", 1922 | " \n", 1923 | " \n", 1924 | " \n", 1925 | " \n", 1926 | " \n", 1927 | " \n", 1928 | " \n", 1929 | " \n", 1930 | " \n", 1931 | " \n", 1932 | " \n", 1933 | " \n", 1934 | " \n", 1935 | " \n", 1936 | " \n", 1937 | " \n", 1938 | " \n", 1939 | " \n", 1940 | " \n", 1941 | " \n", 1942 | " \n", 1943 | " \n", 1944 | " \n", 1945 | " \n", 1946 | " \n", 1947 | " \n", 1948 | " \n", 1949 | " \n", 1950 | " \n", 1951 | " \n", 1952 | " \n", 1953 | " \n", 1954 | " \n", 1955 | " \n", 1956 | " \n", 1957 | " \n", 1958 | " \n", 1959 | " \n", 1960 | " \n", 1961 | " \n", 1962 | " \n", 1963 | " \n", 1964 | " \n", 1965 | " \n", 1966 | " \n", 1967 | " \n", 1968 | " \n", 1969 | " \n", 1970 | " \n", 1971 | " \n", 1972 | " \n", 1973 | " \n", 1974 | " \n", 1975 | " \n", 1976 | " \n", 1977 | " \n", 1978 | " \n", 1979 | " \n", 1980 | " \n", 1981 | " \n", 1982 | " \n", 1983 | " \n", 1984 | " \n", 1985 | " \n", 1986 | " \n", 1987 | " \n", 1988 | " \n", 1989 | " \n", 1990 | " \n", 1991 | " \n", 1992 | " \n", 1993 | " \n", 1994 | " \n", 1995 | " \n", 1996 | " \n", 1997 | " \n", 1998 | " \n", 1999 | " \n", 2000 | " \n", 2001 | " \n", 2002 | " \n", 2003 | "
MAIN CATEGORIESCATEGORIESREFERENCEACORN-AACORN-BACORN-CACORN-DACORN-EACORN-FACORN-GACORN-HACORN-IACORN-JACORN-KACORN-LACORN-MACORN-NACORN-OACORN-PACORN-Q
8POPULATIONGeographyEngland107.0101.0103.0114.0106.075.0107.0106.0102.0106.095.093.097.089.097.0110.097.0
9POPULATIONGeographyNorthern Ireland30.095.045.02.049.0462.053.0104.030.091.056.087.0131.067.095.075.043.0
10POPULATIONGeographyScotland93.0105.087.047.093.0144.054.046.097.053.0167.0114.0121.0194.0139.031.0183.0
11POPULATIONGeographyWales22.073.099.010.046.0249.077.084.0113.073.098.0211.0104.0150.088.054.045.0
70FAMILYHousehold SizeHousehold size : 1 person48.051.093.086.085.083.073.065.0151.085.0112.098.091.0193.0131.072.0160.0
71FAMILYHousehold SizeHousehold size : 2 persons107.0107.0123.0102.099.0127.0108.0102.0126.0110.080.099.094.099.0101.075.088.0
72FAMILYHousehold SizeHousehold size : 3-4 persons114.0119.092.0107.0109.091.0106.0116.064.0105.093.098.0108.063.088.0119.085.0
73FAMILYHousehold SizeHousehold size : 5+ persons128.0104.061.095.0100.069.0106.0106.033.078.0175.0114.0112.050.074.0179.076.0
134TRANSPORTTravel To WorkWork mainly at or from home230.0156.0133.0123.091.0202.089.066.097.068.087.065.069.085.067.050.062.0
\n", 2004 | "
" 2005 | ], 2006 | "text/plain": [ 2007 | " MAIN CATEGORIES CATEGORIES REFERENCE ACORN-A \\\n", 2008 | "8 POPULATION Geography England 107.0 \n", 2009 | "9 POPULATION Geography Northern Ireland 30.0 \n", 2010 | "10 POPULATION Geography Scotland 93.0 \n", 2011 | "11 POPULATION Geography Wales 22.0 \n", 2012 | "70 FAMILY Household Size Household size : 1 person 48.0 \n", 2013 | "71 FAMILY Household Size Household size : 2 persons 107.0 \n", 2014 | "72 FAMILY Household Size Household size : 3-4 persons 114.0 \n", 2015 | "73 FAMILY Household Size Household size : 5+ persons 128.0 \n", 2016 | "134 TRANSPORT Travel To Work Work mainly at or from home 230.0 \n", 2017 | "\n", 2018 | " ACORN-B ACORN-C ACORN-D ACORN-E ACORN-F ACORN-G ACORN-H ACORN-I \\\n", 2019 | "8 101.0 103.0 114.0 106.0 75.0 107.0 106.0 102.0 \n", 2020 | "9 95.0 45.0 2.0 49.0 462.0 53.0 104.0 30.0 \n", 2021 | "10 105.0 87.0 47.0 93.0 144.0 54.0 46.0 97.0 \n", 2022 | "11 73.0 99.0 10.0 46.0 249.0 77.0 84.0 113.0 \n", 2023 | "70 51.0 93.0 86.0 85.0 83.0 73.0 65.0 151.0 \n", 2024 | "71 107.0 123.0 102.0 99.0 127.0 108.0 102.0 126.0 \n", 2025 | "72 119.0 92.0 107.0 109.0 91.0 106.0 116.0 64.0 \n", 2026 | "73 104.0 61.0 95.0 100.0 69.0 106.0 106.0 33.0 \n", 2027 | "134 156.0 133.0 123.0 91.0 202.0 89.0 66.0 97.0 \n", 2028 | "\n", 2029 | " ACORN-J ACORN-K ACORN-L ACORN-M ACORN-N ACORN-O ACORN-P ACORN-Q \n", 2030 | "8 106.0 95.0 93.0 97.0 89.0 97.0 110.0 97.0 \n", 2031 | "9 91.0 56.0 87.0 131.0 67.0 95.0 75.0 43.0 \n", 2032 | "10 53.0 167.0 114.0 121.0 194.0 139.0 31.0 183.0 \n", 2033 | "11 73.0 98.0 211.0 104.0 150.0 88.0 54.0 45.0 \n", 2034 | "70 85.0 112.0 98.0 91.0 193.0 131.0 72.0 160.0 \n", 2035 | "71 110.0 80.0 99.0 94.0 99.0 101.0 75.0 88.0 \n", 2036 | "72 105.0 93.0 98.0 108.0 63.0 88.0 119.0 85.0 \n", 2037 | "73 78.0 175.0 114.0 112.0 50.0 74.0 179.0 76.0 \n", 2038 | "134 68.0 87.0 65.0 69.0 85.0 67.0 50.0 62.0 " 2039 | ] 2040 | }, 2041 | "execution_count": 23, 2042 | "metadata": {}, 2043 | "output_type": "execute_result" 2044 | } 2045 | ], 2046 | "source": [ 2047 | "mask = (\n", 2048 | " (acorn_df['MAIN CATEGORIES'] == 'FAMILY') & (acorn_df['CATEGORIES'] == 'Household Size') |\n", 2049 | " (acorn_df['MAIN CATEGORIES'] == 'POPULATION') & (acorn_df['CATEGORIES'] == 'Geography') |\n", 2050 | " (acorn_df['REFERENCE'] == 'Work mainly at or from home')\n", 2051 | ")\n", 2052 | "print(acorn_df[mask].shape)\n", 2053 | "acorn_df[mask]" 2054 | ] 2055 | }, 2056 | { 2057 | "cell_type": "code", 2058 | "execution_count": 24, 2059 | "id": "fbe77c71", 2060 | "metadata": {}, 2061 | "outputs": [], 2062 | "source": [ 2063 | "# num_cols = pd.get_option('display.max_columns')" 2064 | ] 2065 | }, 2066 | { 2067 | "cell_type": "code", 2068 | "execution_count": 31, 2069 | "id": "935f069f", 2070 | "metadata": {}, 2071 | "outputs": [ 2072 | { 2073 | "data": { 2074 | "text/html": [ 2075 | "
\n", 2076 | "\n", 2089 | "\n", 2090 | " \n", 2091 | " \n", 2092 | " \n", 2093 | " \n", 2094 | " \n", 2095 | " \n", 2096 | " \n", 2097 | " \n", 2098 | " \n", 2099 | " \n", 2100 | " \n", 2101 | " \n", 2102 | " \n", 2103 | " \n", 2104 | " \n", 2105 | " \n", 2106 | " \n", 2107 | " \n", 2108 | " \n", 2109 | " \n", 2110 | " \n", 2111 | " \n", 2112 | " \n", 2113 | " \n", 2114 | " \n", 2115 | " \n", 2116 | " \n", 2117 | " \n", 2118 | " \n", 2119 | " \n", 2120 | " \n", 2121 | " \n", 2122 | " \n", 2123 | " \n", 2124 | " \n", 2125 | " \n", 2126 | " \n", 2127 | " \n", 2128 | " \n", 2129 | " \n", 2130 | " \n", 2131 | " \n", 2132 | " \n", 2133 | " \n", 2134 | " \n", 2135 | " \n", 2136 | " \n", 2137 | " \n", 2138 | " \n", 2139 | " \n", 2140 | " \n", 2141 | " \n", 2142 | " \n", 2143 | " \n", 2144 | " \n", 2145 | " \n", 2146 | " \n", 2147 | " \n", 2148 | " \n", 2149 | " \n", 2150 | " \n", 2151 | " \n", 2152 | " \n", 2153 | " \n", 2154 | " \n", 2155 | " \n", 2156 | " \n", 2157 | " \n", 2158 | " \n", 2159 | " \n", 2160 | " \n", 2161 | " \n", 2162 | " \n", 2163 | " \n", 2164 | " \n", 2165 | " \n", 2166 | " \n", 2167 | " \n", 2168 | " \n", 2169 | " \n", 2170 | " \n", 2171 | " \n", 2172 | " \n", 2173 | " \n", 2174 | " \n", 2175 | " \n", 2176 | " \n", 2177 | " \n", 2178 | " \n", 2179 | " \n", 2180 | " \n", 2181 | " \n", 2182 | " \n", 2183 | " \n", 2184 | " \n", 2185 | " \n", 2186 | " \n", 2187 | " \n", 2188 | " \n", 2189 | " \n", 2190 | " \n", 2191 | " \n", 2192 | " \n", 2193 | " \n", 2194 | " \n", 2195 | " \n", 2196 | " \n", 2197 | " \n", 2198 | " \n", 2199 | " \n", 2200 | " \n", 2201 | " \n", 2202 | " \n", 2203 | " \n", 2204 | " \n", 2205 | " \n", 2206 | " \n", 2207 | " \n", 2208 | " \n", 2209 | " \n", 2210 | " \n", 2211 | " \n", 2212 | " \n", 2213 | " \n", 2214 | " \n", 2215 | " \n", 2216 | " \n", 2217 | " \n", 2218 | " \n", 2219 | " \n", 2220 | " \n", 2221 | " \n", 2222 | " \n", 2223 | " \n", 2224 | " \n", 2225 | " \n", 2226 | " \n", 2227 | " \n", 2228 | " \n", 2229 | " \n", 2230 | " \n", 2231 | " \n", 2232 | " \n", 2233 | " \n", 2234 | " \n", 2235 | " \n", 2236 | " \n", 2237 | " \n", 2238 | " \n", 2239 | " \n", 2240 | " \n", 2241 | " \n", 2242 | " \n", 2243 | " \n", 2244 | " \n", 2245 | " \n", 2246 | " \n", 2247 | " \n", 2248 | " \n", 2249 | " \n", 2250 | "
item_idgeography_englandgeography_northern_irelandgeography_scotlandgeography_walesfamily_1_personfamily_2_personsfamily_3_4_personsfamily_5_more_personstransport_work_from_home
0MAC000246107.030.093.022.048.0107.0114.0128.0230.0
1MAC004431107.030.093.022.048.0107.0114.0128.0230.0
2MAC004387107.030.093.022.048.0107.0114.0128.0230.0
3MAC004319107.030.093.022.048.0107.0114.0128.0230.0
4MAC004247107.030.093.022.048.0107.0114.0128.0230.0
.................................
3535MAC00234597.043.0183.045.0160.088.085.076.062.0
3536MAC00218597.043.0183.045.0160.088.085.076.062.0
3537MAC00234797.043.0183.045.0160.088.085.076.062.0
3538MAC00233197.043.0183.045.0160.088.085.076.062.0
3539MAC00031897.043.0183.045.0160.088.085.076.062.0
\n", 2251 | "

3540 rows × 10 columns

\n", 2252 | "
" 2253 | ], 2254 | "text/plain": [ 2255 | " item_id geography_england geography_northern_ireland \\\n", 2256 | "0 MAC000246 107.0 30.0 \n", 2257 | "1 MAC004431 107.0 30.0 \n", 2258 | "2 MAC004387 107.0 30.0 \n", 2259 | "3 MAC004319 107.0 30.0 \n", 2260 | "4 MAC004247 107.0 30.0 \n", 2261 | "... ... ... ... \n", 2262 | "3535 MAC002345 97.0 43.0 \n", 2263 | "3536 MAC002185 97.0 43.0 \n", 2264 | "3537 MAC002347 97.0 43.0 \n", 2265 | "3538 MAC002331 97.0 43.0 \n", 2266 | "3539 MAC000318 97.0 43.0 \n", 2267 | "\n", 2268 | " geography_scotland geography_wales family_1_person family_2_persons \\\n", 2269 | "0 93.0 22.0 48.0 107.0 \n", 2270 | "1 93.0 22.0 48.0 107.0 \n", 2271 | "2 93.0 22.0 48.0 107.0 \n", 2272 | "3 93.0 22.0 48.0 107.0 \n", 2273 | "4 93.0 22.0 48.0 107.0 \n", 2274 | "... ... ... ... ... \n", 2275 | "3535 183.0 45.0 160.0 88.0 \n", 2276 | "3536 183.0 45.0 160.0 88.0 \n", 2277 | "3537 183.0 45.0 160.0 88.0 \n", 2278 | "3538 183.0 45.0 160.0 88.0 \n", 2279 | "3539 183.0 45.0 160.0 88.0 \n", 2280 | "\n", 2281 | " family_3_4_persons family_5_more_persons transport_work_from_home \n", 2282 | "0 114.0 128.0 230.0 \n", 2283 | "1 114.0 128.0 230.0 \n", 2284 | "2 114.0 128.0 230.0 \n", 2285 | "3 114.0 128.0 230.0 \n", 2286 | "4 114.0 128.0 230.0 \n", 2287 | "... ... ... ... \n", 2288 | "3535 85.0 76.0 62.0 \n", 2289 | "3536 85.0 76.0 62.0 \n", 2290 | "3537 85.0 76.0 62.0 \n", 2291 | "3538 85.0 76.0 62.0 \n", 2292 | "3539 85.0 76.0 62.0 \n", 2293 | "\n", 2294 | "[3540 rows x 10 columns]" 2295 | ] 2296 | }, 2297 | "execution_count": 31, 2298 | "metadata": {}, 2299 | "output_type": "execute_result" 2300 | } 2301 | ], 2302 | "source": [ 2303 | "metadata_df = acorn_df[mask].iloc[:, 2:].set_index('REFERENCE').T\n", 2304 | "metadata_df.columns = [\n", 2305 | " 'geography_england',\n", 2306 | " 'geography_northern_ireland',\n", 2307 | " 'geography_scotland',\n", 2308 | " 'geography_wales',\n", 2309 | " 'family_1_person',\n", 2310 | " 'family_2_persons',\n", 2311 | " 'family_3_4_persons',\n", 2312 | " 'family_5_more_persons',\n", 2313 | " 'transport_work_from_home'\n", 2314 | "]\n", 2315 | "metadata_df.index.name = 'segment'\n", 2316 | "\n", 2317 | "metadata_df = pd.merge(household_df[['LCLid', 'Acorn']], metadata_df, how='left', left_on='Acorn', right_index=True)\n", 2318 | "metadata_df = metadata_df.drop(columns='Acorn')\n", 2319 | "metadata_df = metadata_df.rename(columns={'LCLid': 'item_id'})\n", 2320 | "metadata_df = metadata_df[metadata_df['item_id'].isin(household_ids)]\n", 2321 | "metadata_df = metadata_df.reset_index(drop=True)\n", 2322 | "# pd.set_option('display.max_columns', 6)\n", 2323 | "metadata_df" 2324 | ] 2325 | }, 2326 | { 2327 | "cell_type": "code", 2328 | "execution_count": null, 2329 | "id": "f28d1da0", 2330 | "metadata": {}, 2331 | "outputs": [], 2332 | "source": [ 2333 | "# pd.set_option('display.max_columns', num_cols)" 2334 | ] 2335 | }, 2336 | { 2337 | "cell_type": "code", 2338 | "execution_count": 32, 2339 | "id": "a7dbb6a5", 2340 | "metadata": {}, 2341 | "outputs": [ 2342 | { 2343 | "name": "stdout", 2344 | "output_type": "stream", 2345 | "text": [ 2346 | "CPU times: user 27.2 ms, sys: 0 ns, total: 27.2 ms\n", 2347 | "Wall time: 63.7 ms\n" 2348 | ] 2349 | }, 2350 | { 2351 | "data": { 2352 | "text/plain": [ 2353 | "0.1971435546875" 2354 | ] 2355 | }, 2356 | "execution_count": 32, 2357 | "metadata": {}, 2358 | "output_type": "execute_result" 2359 | } 2360 | ], 2361 | "source": [ 2362 | "%%time\n", 2363 | "\n", 2364 | "metadata_fname = os.path.join('..', 'Dataset', 'item_metadata.csv')\n", 2365 | "metadata_df.to_csv(metadata_fname, index=None)\n", 2366 | "os.stat(metadata_fname).st_size/(1024*1024)" 2367 | ] 2368 | }, 2369 | { 2370 | "cell_type": "markdown", 2371 | "id": "1923bfb0", 2372 | "metadata": {}, 2373 | "source": [ 2374 | "## Visualization\n", 2375 | "---" 2376 | ] 2377 | }, 2378 | { 2379 | "cell_type": "code", 2380 | "execution_count": 33, 2381 | "id": "0a6880e8", 2382 | "metadata": {}, 2383 | "outputs": [], 2384 | "source": [ 2385 | "import matplotlib.pyplot as plt\n", 2386 | "\n", 2387 | "%matplotlib inline\n", 2388 | "plt.style.use('fivethirtyeight')\n", 2389 | "prop_cycle = plt.rcParams['axes.prop_cycle']\n", 2390 | "colors = prop_cycle.by_key()['color']" 2391 | ] 2392 | }, 2393 | { 2394 | "cell_type": "code", 2395 | "execution_count": 34, 2396 | "id": "17c6e9e9", 2397 | "metadata": {}, 2398 | "outputs": [ 2399 | { 2400 | "data": { 2401 | "text/html": [ 2402 | "
\n", 2403 | "\n", 2416 | "\n", 2417 | " \n", 2418 | " \n", 2419 | " \n", 2420 | " \n", 2421 | " \n", 2422 | " \n", 2423 | " \n", 2424 | " \n", 2425 | " \n", 2426 | " \n", 2427 | " \n", 2428 | " \n", 2429 | " \n", 2430 | " \n", 2431 | " \n", 2432 | " \n", 2433 | " \n", 2434 | " \n", 2435 | " \n", 2436 | " \n", 2437 | " \n", 2438 | " \n", 2439 | " \n", 2440 | " \n", 2441 | " \n", 2442 | " \n", 2443 | " \n", 2444 | " \n", 2445 | " \n", 2446 | " \n", 2447 | " \n", 2448 | " \n", 2449 | " \n", 2450 | " \n", 2451 | " \n", 2452 | " \n", 2453 | " \n", 2454 | " \n", 2455 | " \n", 2456 | " \n", 2457 | " \n", 2458 | " \n", 2459 | " \n", 2460 | " \n", 2461 | " \n", 2462 | " \n", 2463 | " \n", 2464 | " \n", 2465 | " \n", 2466 | " \n", 2467 | " \n", 2468 | " \n", 2469 | " \n", 2470 | " \n", 2471 | " \n", 2472 | " \n", 2473 | " \n", 2474 | " \n", 2475 | " \n", 2476 | " \n", 2477 | " \n", 2478 | " \n", 2479 | " \n", 2480 | " \n", 2481 | " \n", 2482 | " \n", 2483 | " \n", 2484 | " \n", 2485 | " \n", 2486 | " \n", 2487 | " \n", 2488 | " \n", 2489 | " \n", 2490 | " \n", 2491 | " \n", 2492 | " \n", 2493 | "
item_idtimestamptarget_value
0MAC0000032012-07-0112.359
1MAC0000032012-07-0212.775
2MAC0000032012-07-0311.584
3MAC0000032012-07-0412.700
4MAC0000032012-07-0513.724
............
1301432MAC0055622013-06-267.466
1301433MAC0055622013-06-2710.738
1301434MAC0055622013-06-2811.128
1301435MAC0055622013-06-299.363
1301436MAC0055622013-06-309.491
\n", 2494 | "

1301437 rows × 3 columns

\n", 2495 | "
" 2496 | ], 2497 | "text/plain": [ 2498 | " item_id timestamp target_value\n", 2499 | "0 MAC000003 2012-07-01 12.359\n", 2500 | "1 MAC000003 2012-07-02 12.775\n", 2501 | "2 MAC000003 2012-07-03 11.584\n", 2502 | "3 MAC000003 2012-07-04 12.700\n", 2503 | "4 MAC000003 2012-07-05 13.724\n", 2504 | "... ... ... ...\n", 2505 | "1301432 MAC005562 2013-06-26 7.466\n", 2506 | "1301433 MAC005562 2013-06-27 10.738\n", 2507 | "1301434 MAC005562 2013-06-28 11.128\n", 2508 | "1301435 MAC005562 2013-06-29 9.363\n", 2509 | "1301436 MAC005562 2013-06-30 9.491\n", 2510 | "\n", 2511 | "[1301437 rows x 3 columns]" 2512 | ] 2513 | }, 2514 | "execution_count": 34, 2515 | "metadata": {}, 2516 | "output_type": "execute_result" 2517 | } 2518 | ], 2519 | "source": [ 2520 | "df = energy_df.reset_index()\n", 2521 | "df['timestamp'] = pd.to_datetime(df['timestamp'])\n", 2522 | "df" 2523 | ] 2524 | }, 2525 | { 2526 | "cell_type": "code", 2527 | "execution_count": 35, 2528 | "id": "2a57a3b9", 2529 | "metadata": {}, 2530 | "outputs": [ 2531 | { 2532 | "data": { 2533 | "text/plain": [ 2534 | "item_id object\n", 2535 | "timestamp datetime64[ns]\n", 2536 | "target_value float64\n", 2537 | "dtype: object" 2538 | ] 2539 | }, 2540 | "execution_count": 35, 2541 | "metadata": {}, 2542 | "output_type": "execute_result" 2543 | } 2544 | ], 2545 | "source": [ 2546 | "df.dtypes" 2547 | ] 2548 | }, 2549 | { 2550 | "cell_type": "code", 2551 | "execution_count": 36, 2552 | "id": "705be0b1", 2553 | "metadata": {}, 2554 | "outputs": [ 2555 | { 2556 | "data": { 2557 | "text/plain": [ 2558 | "array(['MAC000003', 'MAC000004', 'MAC000006', 'MAC000008', 'MAC000013',\n", 2559 | " 'MAC000018', 'MAC000019', 'MAC000020', 'MAC000021', 'MAC000022'],\n", 2560 | " dtype=object)" 2561 | ] 2562 | }, 2563 | "execution_count": 36, 2564 | "metadata": {}, 2565 | "output_type": "execute_result" 2566 | } 2567 | ], 2568 | "source": [ 2569 | "hhids = df['item_id'].unique()\n", 2570 | "hhids[:10]" 2571 | ] 2572 | }, 2573 | { 2574 | "cell_type": "code", 2575 | "execution_count": 37, 2576 | "id": "23a2161c", 2577 | "metadata": {}, 2578 | "outputs": [ 2579 | { 2580 | "data": { 2581 | "image/png": "iVBORw0KGgoAAAANSUhEUgAABfoAAAEJCAYAAADMypZ8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAB2VUlEQVR4nO3dd3hkZfn/8c+ZPpO+vbB9s33ZvhT50pRFBQWRr6KgfhFEQSw/BWkivaqI0kQRqRYEURQVC0jTZRvbW7b3bEsm09s5vz8mm02ZTGayaZO8X9fFdbHJJHkyOc8p93M/923U1tZaAgAAAAAAAAAABcnW3QMAAAAAAAAAAADtR6AfAAAAAAAAAIACRqAfAAAAAAAAAIACRqAfAAAAAAAAAIACRqAfAAAAAAAAAIACRqAfAAAAAAAAAIACRqAfAAAAAAAAAIACRqAfAAAAAAAAAIACRqC/k1VVVXX3EABkwRwFehbmJNCzMCeBrsWcA3oW5iTQszAnsyPQDwAAAAAAAABAASPQDwAAAAAAAABAASPQDwAAAAAAAABAASPQDwAAAAAAAABAASPQDwAAAAAAAABAASPQDwAAAAAAAABAASPQDwAAAAAAAABAASPQDwBAM8sOxFUXN7t7GAAAAAAAADkh0A8AQDPLDyV0IEKgHwAAAAAAFAYC/QAANJMwLYVTVncPAwAAAAAAICcE+gEAaCZhWoomCfQDAAAAAIDCQKAfAIBmkqYUIaMfAAAAAAAUCAL9AAA0kzAtRcjoBwAAAAAABYJAPwAAzSQsKUpGPwAAAAAAKBAE+gEAaCZJRj8AAAAAACggBPoBAGgmYYpAPwAAAAAAKBhtBvqnT5+u8vLyFv996lOf6orxAQDQ5VKWRTNeAAAAAABQMBxtveCNN95QKpVq+Pe+fft0+umn6/zzz+/McQEA0G3cNoOMfgAAAAAAUDDaDPQPGDCgyb+fffZZlZSUEOgHAPRaTruhuEmgHwAAAAAAFIa8avRblqVnn31Wn/70p+Xz+TprTAAAAAAAAAAAIEdGbW1tzimLr7/+ui644AK99dZbOv7447O+tqqq6pgHBwBAd3h8u1OGpCtGJbp7KAAAAAAAAKqsrMz6+TZL9zT29NNPa/bs2W0G+XP5wX1FVVUV7wXQgzFHkUm/YJ0MSZWVpd09lD6HOQn0LMxJoGsx54CehTkJ9CzMyexyLt1z4MAB/eUvf9EXvvCFzhwPAAAAAAAAAADIQ86B/l/96ldyu9264IILOnM8AAAAAAAAAAAgDzkF+i3L0jPPPKMLLrhAJSUlnT0mAAC6ldHdAwAAAAAAAMhDToH+t99+W5s3b6ZsDwAAAAAAAAAAPUxOzXhPPfVU1dbWdvJQAADoOazuHgAAAAAAAECOcq7RDwAAAAAAAAAAeh4C/QAAZECdfgAAAAAAUCgI9AMAAAAAAAAAUMAI9AMAAAAAAAAAUMAI9AMA0ArLoiUvAAAAAADo+Qj0AwCQgctmKGF29ygAAAAAAADaRqAfAIAMPA5D4SQZ/QAAAAAAoOcj0A8AQAY+u6FoikA/AAAAAADo+Qj0AwCQgcdhKEJGPwAAAAAAKAAE+gEAyMBrNxQhox8AAAAAABQAAv0AADRjSfI6DEXJ6AcAAAAAAAWAQD8AABl47IbCZPQDAAAAAIACQKAfAIBGTMuSIclHRj8AAAAAACgQBPoBAGgkaUpOmyGPw1CYQD8AAAAAACgABPoBAGgkYVpy2iSvXYpSugcAAAAAABQAAv0AADSSMCWHzZDXYSPQDwAAAAAACgKBfgAAGklalpxGOqOf0j0AAAAAAKAQEOgHAKCRRKMa/TTjBQAAAAAAhYBAPwAAjSRMSw6b5LEbilC6BwAAAAAAFAAC/QAANJKsz+i3GUZ3DwUAAAAAACAnBPoBAGgkYVpycnUEAAAAAAAFhFAGAACNJEzJYSObHwAAAAAAFA4C/QAANJIkox8AAAAAABSYnEIZ+/bt01e+8hWNGzdOgwcP1gknnKB33nmns8cGAECXS9TX6AcAAAAAACgUjrZeUFtbq7PPPlsnnniiXnjhBfXv31/bt2/XwIEDu2J8AAB0KWr0AwAAAACAQtNmoP8nP/mJhgwZoscff7zhY6NHj+7MMQEA0G2o0Q8AAAAAAApNmzmLr776qubMmaNLL71U48eP1ymnnKKf/exnsiyrK8YHAECXSlqWnMT5AQAAAABAATFqa2uzRuwHDx4sSbrqqqt0/vnna9WqVbruuut0yy236Iorrmj166qqqjp2pAAAdIG3D9vUzylNLTH1s+1OXTEq0d1DAgAAAAAAfVxlZWXWz7dZusc0Tc2aNUu33HKLJGnGjBnasmWLnnjiiayB/rZ+cF9RVVXFewH0YMxRNLd+e0Sjiu2q7O9Sv2CdKitLu3tIfQpzEuhZmJNA12LOAT0LcxLoWZiT2bVZumfw4MGaOHFik49NmDBBu3bt6rRBAQDQXVLU6AcA9EE/WRXo7iEAAADgGLQZ6D/xxBO1adOmJh/btGmTRowY0WmDAgCguyRMS876qyPdaAAAfcVGf1IpkysfAABAoWoz0H/VVVdp8eLF+sEPfqAtW7boD3/4g372s5/p8ssv74rxAQDQpdKBfjL6AQB9SzRlqS5BoB8AAKBQtRnonz17tp5//nm9/PLLOumkk3THHXfoxhtvJNAPAOiVkpbkqI/zOwwpSXYjAKAPiCYt1cbM7h4GAAAA2qnNZrySdPbZZ+vss8/u7LEAANDtGmf0ex2GIilLJWT4AwB6ubhpqTZOoB8AAKBQtZnRDwBAX5Iw1VCj32s3FEmS0Q8A6P2SpsjoBwAAKGAE+gEAaCRhWnLUZ/B7HAT6AQB9Q4nLIKMfAACggBHoBwCgkaSphtI9Pnu6dA8AAL1dhcummhjXPAAAgEJFoB8AgEbSNfrT/+9xGIqS0Q8A6APK3TYy+gEAAAoYgX4AABpJWZK9vveuz0FGPwCgb3DZDcW45gEAABQsAv0AADRjGPU1+mnGCwAAAAAACgCBfgAAWkGgHwAAAAAAFAIC/QAAtMLnMBSljAEAAAAAAOjhCPQDANAKj8NQmIx+AAAAAADQwxHoBwCgFV47Gf0AgN7PsrjWAQAAFDoC/QAAtMLroEY/AKD3S5iSy2aIKx4AAEDhItAPAEArvHZDETL6AQC9XMy05LZJDkNKmlz3AAAAChGBfgAAWmG3GSLeAQDo7WIpS267oTKXTf642d3DAQAAQDsQ6AcAAACAPiyWktx2Q+Vum2pjrHADAAAUIgL9AABkQbgDANDbxVOWXHZD5S6basnoBwAAKEgE+gEAAACgD4umLHnsUrnLINAPAABQoAj0AwCQhdHdAwAAoJPFUpZcNkMVbptqYgT6AQAAChGBfgAAAADow440403X6CfQDwAAUIgI9AMAAABAHxYz65vxumyqjdOdBgAAoBAR6AcAAACAPiyesuS2Sy67oYRJoB8AAKAQEegHAAAAgD4sWl+6R5II8wMAABQmAv0AAAAA0IfFGwX6AQAAUJgI9AMAAABAHxYzJbctHegn3A8AAFCYCPQDAAAAQB8WI6MfAACg4LUZ6L/nnntUXl7e5L8JEyZ0xdgAAAAAAJ0sVt+MV6JGPwAAQKFy5PKiyspK/fnPf274t91u77QBAQDQkxiSTMuSzSDTEQDQO8VTllxk9AMAABS0nAL9DodDgwcP7uyxAADQ43gdhiJJS0VOAiAAgN4pmrLkqQ/0Ow0pYVpy2rjuAQAAFJKcavRv27ZNkydP1vHHH68vfvGL2rZtWycPCwCAnsFjNxRNUcgAANB7JSzJUR/XL3fbVBszu3dAAAAAyJtRW1ubNXrxj3/8Q8FgUJWVlTp48KC+//3vq6qqSgsXLlS/fv1a/bqqqqoOHywAAJ3tZ9udumJUouHff9xn1wnlpoZ4CPYDAHqnx7c79eX6a9/f9ts1qdjUaB/XPQAAgJ6ksrIy6+fbLN1z1llnNfn33LlzNXPmTP3qV7/S1Vdf3e4f3FdUVVXxXgA9GHMUzfUL1qmysrTh36NsYQ0Z4FRlmbMbR9V3MCeBnoU52Tc0vvZt80ZV7rKpcpCrm0fVNzHngJ6FOQn0LMzJ7HIq3dNYcXGxJk2apC1btnTGeAAA6DbpprtNP3akRj8AAH1BhdumGkr3AAAAFJy8A/3RaFRVVVU05wUA9DoJU3IYTSP9BPoBFLI4PUaQp3KXodo4gX4AAIBC02ag/7vf/a7eeecdbdu2TUuWLNEXvvAFhcNhfeYzn+mK8QEA0GUSpiVnsysjzXgBFLL7lwcU4xyGNjRe4qYZLwAAQGFqs0b/nj17dPnll+vQoUMaMGCA5s6dq3/84x8aOXJkV4wPAIAukzQlR7PaPT6HoX1hAh4ACpM/bqo2Zmqwz97dQ0EP1ngpqMxlI6MfAACgALUZ6H/yySe7YhwAAHQ7MvoB9DaBhKnaOIF+5M5pM0TFOgAAgMKTd41+AAB6q4SZDnA0Ro1+AIUsmLBorIo2GW2/BAAAAD0cgX4AAOolTEuOZldGr91QhIx+AAUqYYlAP9rU/CpH4B8AAKDwEOgHAKBeMkNGv4eMfgAFrMxlqDbOOQwAAADo7Qj0AwBQL2FZcjZLY/Q5DEVT3TMeADhWFS4bGf3IG0tDAAAAhYdAPwAA9RKm5GiW0e+0GUqYhDwAFKZSl03+OIF+AAAAoLcj0A8AQL2kacmZ4cpImB9AobIZnMMAAACAvoBAPwAA9RKm1aJGPwAUKssixI/2cdsMxWhEDwAAUFAI9AMAUC9hKmNGP6F/AIUolpLcds5gyF+521AtvR0AAAAKCoF+AADqJU2rRY1+AChUoaSpIgfnNLSt+VFS7rKplt4OAAAABYVAPwAA9VrL6AeAQhRIWCp2EuhHdplKPJW7baohox8AAKCgEM4AAKBeazX6qVIMoBAFE5aKWb1EGzKVeKogox8AAKDgcOcPAEC9pCVK9wDoNYIJUyVOQ4Ykk8a8aEXMtORq9lRY7rapNsYxAwAAUEgI9AMAUC9pWjTjBdBrBOtL95S6bKqLE7RFZvGU1SKjv9xlkNEPAABQYAj0AwBQL2FKToOwPoDe4UjpngqCtsgimiHQX+ayqZYa/QAAAAWFQD8AAPUSpiV7hisjebAAClEwaarIYdSXYSFoi8ziGWr0222GOGIAAAAKC4F+AADqJU1lbMYLAIUomLBU4jRU4baphkA/WhFNWXJz7QMAACh4BPoBAKiXaKVGPwAUoobSPQT6kUXctOS2d/coAAAAcKwIZwAAUC/RSka/w0gvAgBAIUmYllx2Q+Uum2ppxotWxDLU6JdoRA8AAFBoCPQDAFAvaVpyZLgy+hyGQgmCZAAKU7mLjH60rrVAP1c9AACAwkKgHwCAeglLchotgx1FDpvCSUIeAAqTx2Eoxq4ktCKWoRkvAAAACg+BfgAA6rVWo9/nNBROkg0LoLAQ2kcuYqYlV4ZrH6F/AACAwkKgHwCAeqYl2TPU6Pc5DIXI6AcA9EKxlCUPpXsAAAAKHoF+AADaUOQwKN0DoOCQkY1cxFLpps0AAAAobHkH+n/4wx+qvLxc1157bWeMBwCAHsdHoB8A0EvFW6nRT+gfAACgsOQV6F+8eLGefvppTZ06tbPGAwBAj0OgHwDQW0VbKd1jM6QUTZwBAAAKRs6Bfr/fry996Ut66KGHVF5e3olDAgCgZyly2Aj0AwB6pXgrzXh9dkPhFNc+AACAQpFzoP+b3/ymzjvvPJ122mmdOR4AAHocn9NQOEGwA0DhsCyrSTNVj91QhAVLZBBNWRlL9/icHDMAAACFxJHLi55++mlt2bJFjz/+eM7fuKqqqt2D6m14L4CejTmKIw4fcqqqqrrFx4NJaXu1Q1X2ZDeMqu9hTgLHLpqSgjWOhnNaotah99dXa6A7/8Atc7J3O3DQqa2bW1776g7atSZhyu8h2N/VmHNAz8KcBHqWvjwnKysrs36+zUB/VVWVbr/9dv31r3+Vy+XqsB/cV1RVVfFeAD0YcxSN9QvWqbKytMXHk6alf8QCGT+HjsWcBDrGwWhKo82IKiuLJUkTHRGVlztUWeHM6/swJ3u/1q59YxwRDW7HMYNjw5wDehbmJNCzMCezazPQv2jRIh06dEgnnXRSw8dSqZT+85//6Mknn9SePXvkdrs7dZAAAHSFloUL0hw2Q5QpBlBIgglLxc6jZ7Vyt6HamNmNI0Kh8Tko3QMAAFBI2gz0n3POOZo1a1aTj331q1/VuHHj9K1vfSuvLH8AAAAAnS+QsFTsPNqOq9xl0+5QqhtHhJ6qtUVur8NQiEA/AABAwWgz0F9eXq7y8vImH/P5fKqoqNCUKVM6a1wAAHQ5whkAeotgwlRJo4z+CrdNq2sS3Tgi9FStXft8DkMHo+wCAQAAKBS2tl8CAAAAoJAEm2f0u22qoXQP8kDpHgAAgMLSZkZ/Jq+++mpHjwMAAABABwkmTA0vsjf8u9RpKJggaIuWspXuCSdZHAIAACgUZPQDAAAAvUzzZryGYVCeDBllK90TJqMfAACgYBDoBwAAAHqZYMJSiZNbfbQfpXsAAAAKC3f/AAAAQC8TTJhNMvqBfHnthsIpAv0AAACFgkA/AAAA0MskLclpI9CP9rPbDBHnBwAAKBwE+gEAAACgj2I5CACA3umpDSHtDqW6exjoQgT6AQAAgF6GRGzkKtuxwiIAAACFa5M/qT9tj3T3MNCFCPQDAJADQ5JpETpD50mYHF/oXDZJKY4z5IGjBQCAwlXkNFQdJqO/LyHQDwBADnwOQ+EkIQ90nsv+fZittehUZS6b6hKcx9AUWfsAAPReQ3x27SXY32cQ6AcAIAcE+tHZRpU49LO1we4eBnqJTMHbCrdNNTGzy8eCno3SPQAA9E6WpHNHefVnyvf0GQT6AQDIAYF+dDavw9CsAS69vTfW3UNBL1XuNgj0Iy9c9QAAKEzRpCW3zdDwIrv2sGu4zyDQDwBADoqcNoUoeYFOYlqWDEnnjfbo1R0RJamjjk5Q4bKpNk6gH0eZlsUDIQAAvdC+SEpDfemr/ECvnVr9fQT3dQAA5ICMfnSmmpipfm6bDMPQFyYU6emNoe4eEgpcprMVpXvQXCwlue2tF+ihdA8AAIVpdyil4UV2SdK5ozx6dUe0m0eErkCgHwCAetkCGulAPwEydI79EVODvOnbsskVTh2ImDoQIesG7WNZmRclCfSjuVjKyhroZ3kbAIDCtDec0rD6QP/IYod2BpPdPCJ0BQL9AABISpmWbFki/UUOQyEy+tFJ9kdMDfTaG/79lSnFenpjuBtHhEIWSVnyZgjelhPoRzPpQH93jwIAAHS0PaGUhvqOXuT7e2wkEvUBBPoBAJCUMCVnlkg/pXvQmQ5EUxroOXpbVu62KUGdfrRTKGGp2NnyfOa0GeI0hsaibWT0U7oHAIDCFExaKnYefb44d5SX8j19AIF+AAAkJSxLjixXRQL96EwHIqYGeUmrRccIJpo+2AGtiZuW3FkWuR2GWHQEAKAXGF3i0A7K9/R6PAEAACApaUoOo/VgR5HTplCCYAc6R23cVLmL3Fl0jEDCzJjRDzQXS0muLBn9Xha5AQDoNbLtYEfvQKAfAAClMxazJcCS0Y/OZEkysiw0AfkIJiyVtBLoJzkbjcVSljxZAv1FDpsiXPsAAAAKAoF+AADUdo1+ly1d4gAAerpspXtcNilK4Bb12mrGS0Y/AACFJ2layrKOj16MQD8AAEpn9Ger0U+2NToTRxc6UjBhqsiR+ag6vr9Tqw4nunhE6KnipiVXlkVuAv0AABSe6oipIRn6fzlthuIpruu9GYF+AACUznqgZiG6S6bbbafNoAkm2iWYtFqt0T9ngEtLD8a7eEToqaJtlu4xKN0DAECB2RNKaVhRy0B/qdNQXcLshhGhqxDoBwBAR0r3dPco0BdZVuYgWpnLkD/OjTjyl67Rn/mENtBr18EIxxXScmvGy/ECAEAh2RNOaZivZaC/zG1TXZwF/N6MkAYAADpSuoeMfnS9QCuNU8tcNvlj3Igjf8GE2WpGP9BYvI0a/TSiBwCg8OzOltFPIlGvRqAfAABJSTL60U0OREwNylBDs9xlUy034miHlKWsC5flbkM1MY4tpEv3uLNk9Pso3QMAQMGpjZsqd7W8vpe6bOwY7uXaDGn8/Oc/18knn6wRI0ZoxIgROuuss/Taa691xdgAAOgyCWr0o5vsj6Y00NPylozSPWivtsKycwa6tPQAdfqRbsbrbqMZb4hAPwAABccwWtkxTOmeXq3NQP+wYcN022236c0339Qbb7yhU089VRdffLFWr17dFeMDAKBLJCzJkeFmCOhsByKmBmbK6HfbVEvWNTrBjP5OrTiU6O5hoAeIpURGPwAAfUQpiUS9XpuB/nPOOUdnnXWWxo4dq/Hjx+vmm29WcXGxFi9e3BXjAwCgSyRNi9I96BYHoikNypjRT8YNOofPYVMkxbEFKZZL6R6OFXSRbYGkXtwS7u5hAECvVeayqS7Bdb03yyukkUql9NJLLykUCmn+/PmdNSYAALpcwhSle9AtDkZN9c8Q6KdGPzqVJVkWD3p9XayNZrweO8140XW2B5JadpCyYgBwLMws93clTkMBni96NUcuL1qzZo0WLFigaDSqoqIiPffcc5o6dWrWr6mqquqQAfYGvBdAz8YchSTtOGRXxG2qyt/6jdHhQ05VVVV34aj6pr42Jw8cdGrr5szH1Z79TlVV7eviEaHQ1eRwrvKE7Xpz9X4N97QdxO1rc7IvOXjIqc2bsh8rh7j2dbm+OueW77drS41dVVUHunsoQBN9dU6iMB2KS1ado9Vrd2+4rvflOVlZWZn18zkF+isrK/X222/L7/frlVde0ZVXXqk///nPmjJlSrt/cF9RVVXFewH0YMxRHLHOGdHYUocq+zlbfU2/YJ3Gjy/J2NgIHaMvzsn+wTpVVpZm/Fy/LJ8DWpPLcfPRAQmtr0no9LG+rK/ri3OyL8l2/snnNeg4fXnOmZGAKl2mKivLunsoQIO+PCdRmEIH45pZnFLlKG/Gzxf68wVzMrucSve4XC6NHTtWs2bN0i233KLp06fr0Ucf7eyxAQDQZXKp0e+xG4qmumY86DsoioGOlssxNbHMofX+ZKePBT0b5x/0JJGUpQq3jQbQAHAM9oRTGl6UpS4ferV2tR00TVPxOLXzAAC9R8Jqu0a/z2EonKSmIYDCZ7cZMomlIQccJuhK40od2lTHIiQAtNeeUErDCPT3WW2W7rn11lu1YMECDR8+XMFgUC+++KLeeecdvfDCC10xPgAAukTCtORoY/nb5zAUSlrq3zVDAoC85dNg12Gkz300Iu+7+Mujp5lQ5tC6moSmZymlCABo3YGoqQGeduV1oxdoM9BfXV2tK664Qvv371dpaammTp2qF198UR/84Ae7YnwAAHSJpNl2Rn+Rw1CY7eToQNGkJY+dUBs6TjhpyZfjMTW5wql1NQkd39/VyaNCT5XLFY0zFLrS2FKHXt0R7e5hAEDBMi3JRk+5PqvNQP9jjz3WFeMAAKBbJUxLjjbuh3xOQ+EEgX50nP3RlAaScYMOFEpaKnbm9nA3Z4BT/9gVI9CPrLjqoSuYliVDkttuKJbiqAMAoD14sgQAQFIih4x+n8OmEBn96EAHI6YGelu/HTOUXykWIJiwVNxWZ/F6w4vs2huhw3hfRr4feoqDlJoAgA6RSzI/zxe9F1dSAAAkJXOo0U/pHnS0/dGUBnlab5ZV5DQU5JhDHgIJM+eMfoNt3cgBRwm6wt5wSkN96euhzUhn+AMA8pNLAJ9n2t6NQD8AAMo1o99QOGl20YjQF+xvI6O/zGWTP8Yxh9ylM/oJzaLjEApAV2gc6B9RbNfOILuNACBf/rilMlf2UG+pyyZ/nKt7b0WgHwAApWv0t1XtwucwKN2DDnUwampgloz+cpdNtdyIIw8HIqYGZDmmgMY4u6Cn2Bc2GwL9E8oc2uhPdvOIAKDw7A6lNMyX/aG2zGWoLkEiUW9FoB8AAKWDHbY2ylgU0YwXHSyWsuTJ0gW6zGWTP86NOHK3O5zScUW5B/oNUSID2dErBF2hOpLSoPodbpVlTgL9ANAOu0MpDW/jPrCUHcO9GoF+AABy5KOeIbpYudtQLTfiyIM/bqrMlXvpnv5umw5FOcb6qlyOFJfNEOuN6GwpS3LUl1CscNu49gFAO6QD/Y6srylz2VRH8lqvRaAfAIAcee2GIiluitBx2jqaylw21RJhQ57yabI7tMiuPWFqYfdVuVzRvA5DERa50cXoFQ4A+auOpDQkS/8vSSp1GuwY7sUI9AMAkKN8gmdARyinWRY62XCfXXsJ9COLInazoRtQLQoA8mdKstuyP7OWuWyq4/mi1yLQDwAA0E3aWjoqdRmqI+MGnWhokV17QxxjfVUuy9deh6FwkmMEXavCbdPhaO6LkNsCSf1+S7gTRwQAvUOpy8bzRS9GoB8AAKAbJE1L9jaibDbDyKm0BiBJiRyOqeYGeWzan0cwDb1LrqV7yOhHZ4qlLLmaZaBOKHOoKo+GvG/tjWnh/nhHDw0ACkouu6E8dlGOthcj0A8AANANDkVN9fdwK4aOszec0lCfPa+vsdsMmTzr9UlJ05Ijh4WhImr0o5PtC6c0xNf0ejih3KGNeQT6dwRSKnVxTQXQd1k51jyjHG3vxpUQAADlVr4A6Ej7o6YGevILygLZ7AmlNLwo/2OKEG7fFEtZcuewBYSMfnS2fRkWKY8rsmtXKL/dRmU0mATQhx2OmernJszb13EEAAAAdIMDkZQGebkVQ8fZ3c5AP/qmuKncAv12Av3oXHvDZotAv83IfbfRjmBSI4rtmjnApeUHE50wQgDo+bgPhESgHwAAoFvsj5ga5OVmHB2HBzzkI5qy5La1HegvchrU8kWn2tNK2bFcq0v8Z19cJw9xaeYAp5Yfylynf3sg9zJAAFCIdodSOo77wD6PQD8AAEA3qCajHx0smLRU4sz/mEpnbFPuoq+JpSy5cogHkNGPzlYbN1XuahnVd9kMxXJYZNrkT2p8qUMlTpsCicyvv+zNwwomOM8B6L1I+IBEoB8AAEnUqEbXCyYsFbcjKAt0tKE+u/aGCID1NbGUJU8OpXt81OhHF8jUHPL0YW69vDWS19dnOqJ3BZMa4rVrGWV9APRiB6KmBpJE1OdxBAAAAPRgTpuhRK6FitGnWe08TIb67NoTzq/pJQpfOqM/l0C/TREC/egGcwe6tCWQVHWW81N1ONUksDXMZ9fuZk18/7Yzqu/OKdWyA5nL+mytS+qZjaGOGTQAdBPTSvc3Qd9GoB8AAKCLxVOWXDnehZW5DNXGyLZG5xlWZNNeAv0F6c09Me0Jte9vFzeVU41+t13U6Ee3+dq0Yj20Otjq5/9bHdfJQ9wN/5490KmlzQL6e8IpTSp3KtjKgtUbe2JacYhsfwB9h92QkiQS9UoE+gEAyIPTZihOwAPHaIM/qQnlzpxeW+6yyR8n0I/sYnksHjU31Gcn0F+gXt4a1qL9mbOU2xJNWXLnkNGfqaQK0FGsNrYilThtOnO4W3/clrmEz5qahKZWOBr+PaXCqbU1R4P2/rjZpHdJpp+3J5xSRXtPoABQgLL1NEFh42oGAEAeqFWMjrD6cELTKnIL9Je5bKqNc8whu73hlIa1swFbsdOmEOe1gmNZlkpdNm3wty8TOZ6y5M7xkCHUj85Sl7BU4sx+hJ053KPlB+OqybC7zWpWqsJpM9Q4H+Ofu6L60HEeSdKYEru2Bpouapr1gf8hPnY2AShcpmUpn3X5MpdBIlEvRaAfAIA8FDkMAmI4ZtsCSY0pzS3CVu7mRhxt2xVK6bh2BvpRmDbXJTWp3KFkO08PsRwz+iUa1qPz7AunNMzX9rnrG9NL9MDKQJOM/JqYqTJ3y2PYMKRUfUmKNTUJTavP+J830KXFzcr6rKlJalqFU3MHurSklRr+ANDTHYyaGujJPcRbyo7hXotAPwAAefA6DIXbG1UB6uXTLKvMZaNGP9q0O5TScAL9fcqbe2M6bZin3UH4XEv3AJ1pbzilITkE+svdNn1yjFc3LvI3BKf+Wx3TyYPdLV47scyhjf6k4ilLdsNoKD81vsyhTf5kk9e+vTemU4a6NLWfU2sOU6cfQGHK9z6wzGVTHTuGeyUC/QAA5MHnMBSiniG6ULpGf+875v7ff2oaSibg2O0Jtb90DwrTnvqH+kEem6rbUXJkoz+pcaWOtl8oSveg8+wNmzll9EvSzAEuXTujRHcvq9OqwwmtOJTQ8f1blsGbM9ClpQfjendfTKcMcTV83GYYLRbGDkdNDfDY5bQZYsMmgEK1K89Af6nLUB0Z/b1Sm4H+Bx54QGeccYZGjBihcePG6dOf/rTWrl3bFWMDAKDHKaJGP45RdTilwd7ccy3SNfp71414OGnqvf1xbahNtv1i5CSSsuRztD+Hx5BYeCkgKdNqyFI+vr9Tq9qRiZxIiYx+dLtcM/qP6Oex654TyvTvPVHtDqXktLU8hkcV27UtkNK71XGdPKRpxn86YSN9TU2aluyNTpt2I/0xACg0u/Ms4VhG6Z5eq82ngXfeeUeXXXaZXnvtNb3yyityOBw6//zzVVNT0xXjAwCgR6EZL47V6pqEpvbLrRGvJHkchmKp3nXMvb03rv83vaRFrWS037HG6Ad6bDoQ4YGvUKw6nNDx9eeRqf2cWlOTX6B/fySlAXksOPauMxB6kmjKkteR34KTzTD0tWkleuCk8oyfNwxDstILYs0XAmb1d+r9Q+n58v7BhGYNOHo9nlKR/1wCgJ7gUNRUP3d+iUR17FLvldo8Cn7/+9/rkksu0ZQpUzR16lQ9/vjjOnjwoBYuXNgV4wMAoEfxOW004+0gyw/GtbG27z1Qrz2c0NSK3AP9vdHiA3FdMMarHcH8y42gcwz12bW3HeVfkNnuUOe+l2/vjenUoelM5RKnTcE8H9b/vSemM4a1rG0OFBJXlh0pDptaLeuzrH6R+T/NavzPoyEvgAJm5Nj/S5JKnJTu6a3y3t8bDAZlmqbKy8s7YTgAAPRslO7pOC9vjejvu6LdPYwuV5ewVOrqu22SLMuSaVmy2wzShDtQHs92GQ0vsmsPgf4OEUqYuuC1g4p04rWiNm6qPI/MveY21iY1oSy3+vwSNfpReP5vYpE+PMLb4uOljbJYA/Gm1+NhRXbtDRP4AlB48r0PdNgM9bINw6iX+91dveuvv17Tp0/X/Pnzs76uqqqq3YPqbXgvgJ6NOQpJOnzIqaqq6jZfdzAubTvkUJVFbfFjkTSlgN+pgF+qcu9r8rnePicPH87tWGvyNTken4WgKmSof8ymqqr9SgYcWrK2WmV9e4PDMYumpFCN45iOkUhcWnHIocpYy3Nbb5+THe3Xux26cKCl37+/RfPLOz5oGDOlQK1DVVX7Gz4W8Tu0Yn21cil1blnS4RqnNm3K/Xg51IvOQYWgr8y5aKpzj61AKx8/fNip1Ruq6+dRdYvPcayjub4yJ1G42vOsUMjPF315TlZWVmb9fF6B/htvvFELFy7U3/72N9nt2e8i2/rBfUVVVRXvBdCDMUdxRP9gnSorS9t83eC4qSVWSJWVJV0wqt7rn7ui+sRUQ2/ujWn8+JKGraa9fU7GUpaGhAI5HWuN9cvx+CwEf1kV0BfmFKncbdNHSmM6HLc0d4Snu4cly7K00Z+U225odEneuTDdapM/oeOdCVWO87X7e4yzLP17ectjs9Dn5L48G30eq0DClK0uqKtnlOih1fnP9Vy8tTemj062VDns6Lw53RtVxGloxuC2y/GsOZzQKY6EKsfmfrz0pnNQT1focy4fD60O6GsneLv8nDvLCul9SR+ZbG8yjyRpbCygASOLVHEMO2bQu/SlOYnClDItDQj2necL5mR2OV+9brjhBr300kt65ZVXNHr06E4cEgAAXcvKo4slpXs6xn+rYzp5sEsTyhyq8ved3REbahOaUN6309f9jUqOzOjv0opD3VsPedmBuO5bXqc7l9Vp+aGEntoQ6tbxtMfuUErDi44tmG0zjDYrKeVzruwJNtYmdOm/D2tLXdedY57eENIXJvrk7cQm2u9Vx3TCoKYB/en9nFp1OLeeJ//eG9NpedbnN1R4f//uYlpWlx5zhWpPKKWEqW5ZWJ0/yKXH1gRbzCMpXcN/KXX6ARSQ6oipwd6uS2pAz5ZToP+6667Tiy++qFdeeUUTJkzo7DEBvY5pWd3W2OkPWyNKmjyYAdmkLClLP7cm7DZDPWFKJU1L1QVaTzuWStdnt9sMnTjYpf9WH/v5MdUT/ig5WFOT1PR+vSfQH81z0etgNKV+jbIkPQ5DsQyVTWpipt7ZFzvW4WUVSJi6falfm+uS+vq0Et08p0yfHudTkcNQINH1NZqPJYi6K5TScccY6G9LbczUea8d6pb3pj2iSUs/WxfS787qr6fbuXiz2Z/UX3dE9OzGkB5cGdAft0Wyvr4ubsoftzSiOB249DlsCmZ4v3YEk/rNpnC7xiRJ0ZQlr6PpRWuwz679kdz+NjVRU/09+R0vHruhaGFecjrEK9si+k+O56RH1gR17/t1rfZo6KwFoELzs3VBXTG5qFt+dmWZQ+PLHC3mkSTN6O/U8kO5LZoBQE/QEQkf6D3aDPRfc801+tWvfqUnnnhC5eXlqq6uVnV1tYLBYFeMD+gV/rA1oodXd/2c2RFMavGBuG5Y5NfKbs6YBHqyhCk5be1rNWhZVrsDdKZlKdTOoNk/d0d1z/t17fra7vb67qjOrM8mHVFk187gsUePvvRWjdbkmM3anbYGkhpd0r4b8Z6YTfvVd2r0xLpgzmP7x66YFhzXtEyCzVCLBelnNob02s5ozhnK+frPvpjuXlanr0wp1v+O8zUJ9pw7yqtXt3ddk2jLsvSTVQHduiS9q+Ce9+v0x20R1WZaAWnFnlBKQzu5PM2DqwK6e36Zfr6ufUHzrj5+H1gV0NenF6vYadO4Mkdex5JlWXp6Q0h/2xXVYK9dpw1z64opRdpSl9SeUOvnq6c2hHTpxKOByxMGu/Te/pb3Xy9tiWh/JKU39+R/nB2Opo6pmXckacndjkPF6zAUSRbGIk+ublrk11t72w7e/3tPVPsjKb26o+2/15t7Yurntum7c0r1+NqW9/7xlKWr36k5poWe3mDR/pimVDhV7Oye8jg2w9CTp/fL+Dmfw6YoizEACsieMIF+HNXmlfWJJ55QIBDQeeedp4kTJzb899BDD3XF+ICClzAtLT2Y0PxBLu2PdG0q1L92xfTlKUW6d36ZFh+I6wcrAq1mFwF9WcK05MjjWbMuburu99MBuXuWB/TjVe1byHtodVA/XBnQve/XaWcwv23+71XHNcRnb/dCQXdatD+u+YNcktRQm/9YrD6c0ClD3Pr91p4fOLGsdIAhX8VOQ4FE156/TcvSZn+y1YDz6sMJLRjh0aQKp+56P5DT7rEqf0KVZU3LNEytcGptzdEgbDRpqS5u6ra5pfr1ppD25rBzJdcM2aRp6YGVAW0LJHX3/DINyrDNeXKFU+tr2w4K18ZMff3dmpwCha0JJ019d3GdTh3q1m3zyvTd2aW6dkaJxpTY9fi6dFbw7maB5WDC1LZA0/NF3JTcuW5Laoe/7Yxo3kCXpvVzyiZpV57nK9OydOE/DineRcGzv+yIaFqFUyPrM+s/O96n3+YYWI2lLN2xrE5jSh366tRizR7o0shih3wOmy6fXKQn1mc+39fGTIWSloY1etCeN9Clxc0C/ZZlyR839fXpJXq3Oq4qf6LF93lgZaDV9+rn60P67PjMtfUdtvT1LJuF1TGdNCS/sj2S5OvAsnU1MVOrDidaHWs0aemtvTF9f3md7lxap/uWd/yi9j93RXXyYJf+Wx3LWj5s2YG4lh5I6PLJxZo9wJm1pMvecEpv7Inqs+N9GlnskN2QtjYr4fOjVQHdOKtU+8KpJue9QnMsuxJMy9JLWyK6cKy3A0eUP0eWBA9D6XH2ZDUxU69uz77LCEB+IklL/9rddckeHWUXGf1opM2CeLW1tV0wDKD3+vWmsD4z3ie3Pf1Q8dnKrtuiujOU1Mji9M+7bFKxttQl9Yv1QV09jSaiKAwbahMaVeyQJ8PW6o6UNK28MvrvOaG8yb/vXJbeop9pC3hrXtkW0egSh84b7VVd3NRzVWHtj6T0zeklDfXLWxNImPI5DH10pFd/3hHVp4+hAeexOpKlm2vAPpq05LIbTYLdQ3w27QmlmgTIpHQQ1JfDCszvNod10+xSPb4u2OWNN/NxLBnN5S6b/HHzmDJ5cxFOmnp4dVAJUzIMaXSxXZvqkvrenLIWr31le0TfnF4sn8OmIV6bbljk182zS1sdYzxlyWEzWhwr8wa69NedER3fP73488KWsD41ziebYei7s0v1vcV1um1uqYoyZH5alqVfrA9pgz+pUcV2fXlKcatz+UAkpftXBPSlSUVt9kno77HpQCSlga3UO7UsS/evqNOtc0r11t64bl7s11emFOf1kLUnlNIDKwO6ZkZJk2PWYTN0fH+Xju/vUm3M1HNVIe2PmHLVB/KLHYbsNqnIYdMXJ6Wv8R2wXiYp3YMkmDCbZNnWJaX/7Ivr9nnpY+BLk4v0/RUB3Tq35THRmtd2RvXB4R79YVtEn+rg85VpWToYNWVakmlJh2Omlh1I6LtzjjaXc9gMzR7g1MLqmE7M0qy2OpzS91cE9LVpxRqVoW54idOmkcUOrTmc0NRGJbgsK72A9NWpxU1e77YbLYLZC/fHdUL9Qud3ZpToxkV+3TCrVBVum/68PaLF++M6d5RXP10b1NenN71f21qXVLHT1upxObHMqQ21SU3LUh7svf1xfev4/O8Dmwf6X9kW0arDCdmM+qCo0ju0PjDErTEl9qzXhO+vqNP8gW79ZUckfa7R0WPYUHqH3byBTl09rUReh6En14e0riahyRUtf6+D0ZT2hU3ZjfTuoHKXTYPbuAaEk6Ze3xPV3fPL9ZGRHt26pE5lLluLWvFV/oT+vCOim2enj6XzRnt1+9I6zRnoavE9k6alB1YEdNu80obf/UuTi3X7Mr/unl8uKT0PJpY5NabUoa9NK9b1i/y6ZU5pXlntSdPKGqDubKGEqZ+tC6WbTtf/nhVumxYc51ZlWW5l6Z6rOnqO76mO7+/U33dF9eER+S9GrDwU14GoqdOGurP+rYIJUw+uDOpQLKXrZpbmfe/yi/Uh7Q4ldcZwd073SutrExpf6ujW4wfo6d7eG9MT64P64HBP2y/uQfxxU2Wu9u9O74jEK/QcXd/5BuhDIsl0NuTnJxTJsqwu3aZ7JKDS2NhSh2pjPTs7BWjswVVBfXC4WxeO7dxAdsJUXhn9zf3vWK9+tyWsz0/IbSFv9eGENtcl9f/qgy2lLpuumlqsXcGkfrs5rC9PKc769X/eHtW5o7yaXOHUC5vD0rj2j/1Y3bs8oJQpyZD6uW06ebBLMwe0DIIc8Y/dUZ3VrHTLSYPdem9/TJ8Yc/TvXBszdcHfD+o3H+qfMev6iE3+hEaW2OWyG7qkskhPrg/p2zM6bzHzp2uDctrSi6f5SjfKat+BVuayqTZuaUS7vjo30aSlW5bU6dvHNw08P7omqJ3BZEPdcSmdzWlaaggujC9z6vqZJbp9aZ3uPaEsYyDh3X0xnTSo5bExrMiuveH0rgHTslRVf92U0t//2hklunNZnW6fV9YkiB9PWbpveZ3OHO7R5ZOLVeVP6KZFfn1qnE9zmwXiFu+P6w/bIjkH1c4b7dUftkX0pcmZ/87PVoX1sVFe9fPYdf4Yrz50nFuPrw1pdIldn8zhfLXsQFwvb4vojnllWRcIy922Vhfn39ob093v1+k7HXi8D/XZtTecUmXZ0ffo6Z1O3Xba0Z9R5LSpssyh9w/GNSvLXG/snX1x3TmvVDcvrtP/ju24B8pF+2N6aUtE48scchiGbIZkt0nfmtHy73b+GK9uWuTXCYNcGX/+P3dF9c6+WKuLSkdcXOnTLUuOBm8l6cergvrEGG/GAHOx06a6Rot0/9wV1fWz0oFjh83QjbNKdeeyOrnthk4flt7ZIUlLDsS1oTahiY0WpX6xPqTvzi5t8TOOmN7PqUUH4lkD/UlTDYtG+fDajwb6w0lTSw4cXfyR0oGCbYGU/lMd06+qUnLapWuOL5G92bng1e0RnTHMU38dyC2I+rkJLd9zKb277t73Azp1qFuW0r1aFu6P62vTipucr5p7dE1IV9VfZ22GoZvnlOqmRX5dM6NEJU6blh2Ma8mBuGpipr47+2jg3mEzNKbEUb8zqel7/MOVAX1pclGTgKvHYej0oR79bWdE0/u5tGh/TDfXL5rabYaunVGi+5cHdNvc0pzmxKvbI3pzb0wum6FvHV+sfnn2WchHbczUv3ZHdVyRXSOKHRrktemlrRGtPZzQ5ZOLmry/NTFT9y2v021zy7LuLFpfm9DzVWHN7O/MuFjSk3xslFc/WxvUn7ZH9LFRuQf739oba9i1eO/7ARmGdOZwt+YMcDWZd+9Vx/Tytoi+Ob1EpS5Ddyyt0yWVRU0WELM5UL9L/BvTS/T0hrCunJr9nuSdfTG9uSemlGXJazf0iTFejc+yMLM/klIkaWl4kb3TFgbWHE7o2aqQxpY4dEUb973I395wx5T0+8++mNbWpHc19QWLD8R1wRifVh6KNySgFIr23Fsd6b/jJTLcq/DnBDrRUxtC+r+JR7Lt0hmsiTwzh9tr4f54xoCKvX5bd1eMAZDSGQalzpZZvG3ZFkhq7kCnVh1K6MKxnTOusvrAy7HOiYnlTj1XFc4pI+JQNKVnN4Z0zwkts2GPK3ZodyjV5vfZUJvQZ+pLNwypD8wdy818LGXJYahFQKYtv6oK6YRBLp1Zn/VyMJrScxvDSpjSvAznHykd4PzenKaBqskVDv1hW0SfGHP0Y7/cENLPTq3QI6uDDYGvjGPYFNa1M9Lfr8KdrqsbSphZA3Xt9cS6oMaVOrS1LqnlB+NZFzQy+fP2SMN7la9ydzqjf08opb/vimp7IKkSl00XjfO12AnRHvGUpVuW+PWNacUtsgovqfTpkTVB3TDr6N/tbzuj+uiIpr9Lf49dX5pcpEfXtMxE9sdN/WVnVPfOz54F/trOqM5u9n0H++y6bFKR7l8eUMqyNK3CqdkDXXpkdVBfnVbckIVbWebUvSeU6fmqsP62Iyp7/SFgWtJAj013zsstmCZJI+vnYiarDyfkj5s6qVFmeLHTpm/PKNEv14f09t6Y/mdo61njL28Na3copdtzDO615tShbg3x2nTjIn+H7fSYXOHUz9eFVOG2yWakj4vjS01VNNtldNE4n25c5NfM/s6G3+FwNCWvw9Zi4WL5wbhm1L9u3iCXFh+Ia/6glu+PaVk5Z/juCaX0s3VBTalw6p4TynL6Opth6IxhHv1tZ1QfHuFpGHcsZenBVQFVljpy2qXgtBk6ebBbb+6J6rRhHj2zMaRp/ZytLnqcNNilhdVxLRjhUSSZbkTe+HpT7rbpupkl8jgMlTQ6b31pcpFuXOTXvfW/33+rY5o1wJl1h9uYUrt+u+VoqZhtgaT+tD3SUPbLkDSpon2Pf0VOQ5H6ci1PrAvp8mZNVA3D0JhSh8aUpr//upqEfrAyoOtmHj1vBBKmFu6P644s5/RMnDZDJw5y6809MZ1W39/Fsix9f0VAN8wqadJY+KMjvfrekqPvW3NrDidU7jJ0XKNAtdNm6JY5pfr+ioCKnTbNHuDUpROLMs6ri8b79IMVdQ0Be8uy9NDqoE4c5Mq4U2jBCI9uXFSrf+6Ktfi9B3ntOus4jx5dG9K4Urs2+ZPyxy0dOuzUTCuk04e5NbLYob3hlB5eHdRpQ926/8Ry1cZM/WhlQCcNcbUr4zwXT6xP//zamKm/74pqbzilDw33ZNxBWOG26bJJRXp2YyhjMHBPKKWfrwtqZLFDN88ubddCU3e4Ykqxnq8K6TebwrqolXJZjf1tZ0Rb6lK6pn7x9dShbsVTlv69J6YHVwUUr6+CF09Zqixz6J75ZQ3nobvml+mHKwLaE061SIbI5MkNIV0xuVgVbpsOxUwFEmaT80djh6Mp/W1HVHfUXwPr4qZe3hrRi1siDYuOjQUSpu5fHtDMAU7tCaWUtKREytJNs0s7JOhfHU6fv0cUO3TnvDI9tiaoTf5E1oUH5Of3W8L64/aIvje7TOPK2h/yS5qW/rAtIl8n76zuKSzLkmlZOm+0Vz9aFSi4QH97lLnSyQheR8/cDY32IdAPdBJ/3NShmNnwwCNJJ9Y3ZTulHbVR8/Xf6pi+Ob1llt+sAU69fzDzQ3ZvdyCSktdhdFvjr0KSbzmWg9GUBmTILAskTH1nYa3mDnS1mhnbmt9tDuuKKcX6zaZwxrIux+JgNKXr3/NrdP2Dvs9paEg7M62POGOYW//eE9MZWQK5lmXp/uUB3TS7tNXA1JHzRGvlJarDTcuJXDDGqxe3RlqUjMjHD1cGdFyRvdUdCc9uDOljo7xNSgot3h9XIGHps41+3wEeu74+vVg3vOfX3IHOFsfPoWhKRU5bi4/bDEON9xodjqaUMC2NL3Pq+P7OVgOnO4JJDfDYmwQWPzPep99sDjfJuK8Op/Sn7RGdPMStyeWOdgVXn9kY0hBfOihjWpauf8+vyjJHkwWFaNLSi1vDuni8r8XPiKUs7QqlNLa0fbdew312/Xh1QNMqnFowwqPRJUU6HE3pV5vCOhAx9eGRniaB53wkTUu3LvXryqnFTYJfR5S6bHLbjSbzfNmBuD4+qmWAYGK5U/+tjmvx/njDYo9pWbrn/TrdMLNldu8RA+pL5by7L6475rX8vuPLnLpptlOWZWl1TVJ/3RHVd+eUNizWHWEzDH0ux501bRlZ7NC2QLJJOY9QwtTTG0OtLlj830Sf7lqWnk9jmv2tTcvSj1cFNb7M0WEl9CaUO3XdzBJtD3RMD6Dp/Zy6/8RySenjoi5u6tDO/S1eZ7elS4fdsqROrvraLRVum3YGk7prXlmTv/MftkV0Y30w6ZyRHt25rK7FPciDKwPyx00ZRvoYOnOYWweipv69J6ZD0ZQMw9CR2KAlqcRh6JoZJXlfz886zq1nq8K6+/1Aw8dqY2aTBaNcnDPSo+vf86suYcllM/ShLIG52QNcemBlQAtGpDO7my+QScpYisdhM/TZ8T49szGsz0/w6Q9bI7o3wwJxYzbDUNK09Or2iBbuj2tksV0Xj/d1SOa3124omLRUHU4pbqqh/0FrJlc4tS+c0tMbQvpCfdLLI6uD7b5WfWxU+j3/n6Eu2QxDz28K68MjPE2C/FI6i/6z43365YZQi51XpmXpqVbmb5HTltNCj9dhqNxtq2+AbdODq4KaO9CVdXHvq1NLFEtlLu/3P0PdctqOLmaUu22qqqqWc4hbb+yOaWcwLJddumHW0eO93G3TbfPK9OftEd2+1K9vH1+S8+J2PGXpnX0x/bc6rupISrfPLWtRLvBgNH0+ab4zKpvKMqd+uynSZPeKlL4uPrAyoNvnleZUXqanubiySH/cFtEv1gez7uR7cUtY4aSlq5od3y67oQUjPFrQaN7HU1aLxQ6bYejamaV6viqk25f6dfZxHs1vZffR3nBKHrvRsAB76cQiPbU+pK9leO47siB2/ayjC8ulLpu+MLFIb+6J6ldVoRZlZX+4IqDrZpY0OS+tq0no8XWhY7rXlNI7C97YHdXXp5c0XL+/PKVY313c+uJcoTkcTXXqbpu2rDqc0M5QSk+c1k83vOdvc5daNs9uDOtzE4q0ZH+8xf1QR9geSKqfx9bqItWxaE+Zs/W1SU0sd8rrMGSamedqb1PqMlSXMDVYBPp7EwL93agvnDgK3TffrdG3ZpS0+TCTyS/Xh3TZpKY3Th8Y7NaPVwe6JNAfT1kZt8+eNNitX64P9clA/33LA5pc4WhXyY2+xLQs3bG0TsOL7Dlt0/z3nqh+sT6kb0wvafJQaFqW7l5Wp7vml+mP2yJNAn9tiacsRVOWylw2XTDGq99sDutrHdhb4qUtEd06p1THFTsayg2UtLOu4RFnDHPre0vqsgb6/7ozqo+O9GTNvl1wnEd3v1/XaqD/5W0RfWL00Qy+wT57wxbu9thX/8C4sTaZcSfBov0xxU1LD68OamRJOmi0P2rqle0R3T63ZUDWZhj66EiP/rIjqnMabXe3LEs/qH/YzKTUachfn+725IZwQ+3xC8Z4df176XIbza+Zz24M6+vTmx6jY0sdempDSikznTn7+u6o3tob0/9NLNLSA3H9fktEliyNKHZo9gCnplQ423wQ+O3msIodhs6t/31shqH/d3yJHlgZaMjq3BZI6uHVQc0Z6KrfndA0++9Iv5b2Glfm0E8+UNHkY/08dl09rUQp09KrO6K64b1anTfa2+LYsSxLiWblOhKmpTWHE1p2MKE1NQl9ZUpR1ge4z09IBxy/dXyJdodSGlrUeg3uL0zw6YZFfk2qcKjEadMja4L6TBvBxvkDXfrp2qBmD2i5QNSYYRia3s+p6TmWNzgW54326PmqcMPuhE3+hB5dE9J3sixYGIah62aV6Mb3/Lp5TrpfgWVZentfXH/eHtHFlT7N6OAMsf4ee4tgZ0dw2Az189h1qJXPnzbMrYnlDg32Hl28W3M4oQdXBRvKZ+0OpTTAY2s49hw2Q4O8du1u1DTuT9sjGlOa7ldiWZY2+JP63ZaIBntt+t+x3lbr0beHYRhNFjQty5Kl/BtkG4ahT43z6b398RaBveZcdkPJ+kzeFYcSOn907hnYMwe49PddUT28OqhPjvXmtEg5Z4BL/Ty2Y94x0pzXYWh/1NTP14X0tem53UedMdyj56pC+seuqAZ6bBrqs7e7h4phGPrkWK9e2hLR1H5O1URNXVKZ+To5c4BLb++LNSl9tDOY1E/XhvT5CUV5715r7nOVRXp8XVB2w9DJg106uY37+rZ6d2S63o8ucWj0xOzPH+eO8mreQJduXVqnL04sytjD4AjLsvTY2pD8cVP/M9St62aWyB9P92T5brNddr9cH2q4Bufjssnp8nnfbNQD4gcrAg29XArVeaO9enNPTLcu8evaGSUtFvgfWh1QZZlTn5+Q2zU+2/P/xZVFSpiWXtsZ1a1L6jTEZ9clE3xNAqFPrg/p6mlH5+DwIruCSUu1MbPFos1TG8K6YIyvxaK4JJ02zKOHVwe04lC84br08tawThnibnHenVzh1Ft7Y1p1ONHu6+/C6pjeq443KYclpd+Pz4z36dmN4YZFwY6wN5zSEG/L5JLWxjbUZ8/YmyUfoYSpS14/rDvmlXVLeaqamKnn6ncN24z0gvi9ywPtuh4cjqa0O5zSpf2cGuy16XdbMicV/WhlQGcOd+d9b/PHbRFtrE3Ibkv3BhrkteujIz3tirs098LmsP6xK6ofnVyeV0LAP3dHdXH9vfrZIzx6bVc0r9Jd3WVLXVJD2nmvVOayyR+ntHNvQ6C/mxyOpnTx64f1l48MoPFFD7W2JqFZA1x6aFVQ180qyZit3Jp4ylIgYbYopeFxGIqlOv9EujvUehmPMpdNgYTZ6WPoaf66I6KzR3j07r5Ydw+lRzMtS3ctq9MFY3363eZwm6UUkqalv+6I6pkz+ukHKwKKp6yGB96frArqs5VFGuCx64sTi3TTYr/GlzlalIDI5NUdEZ0zMn1jNdBr18GI2WY5m3jKUnUk1SLDO5M9oVRD5vKRcgPHyjAMjS91aGNtIuP2/ZRp6e29sRaNfJtz2NK7TjI9sEnpwHzz3Q3T+jmzPnxtCyT19t5YxkznX24I6copxVp+KK5/7Y61yEx9ZVtUt80tld1maPnBuG5c5FcsZenuE8pa/XucNsyjG96r1dkjPA1B9Oeqwvr4aG/Gh01Jmj/IpcX74yqJp/995JxrGIa+NLlIT6wPNQmora9NqNhpZMwC+uhIj17Znt5CP7LY3pCleSSQbVmWdodSWnowob/siCqQsPTFiUUZtzf/c1dUsZSlTzd774b67Joz0KU/bY/IbTO05GBcd88vk8tu6JbFfp05/GhpqHjK0pa6ZEMpt45mtxn6+Givzh3l0R+3RXTjolrNHuDS1rqkYvWNLhs3BbUk2Q1pSoVTHx7hySmgM8BjVyyVzvB+aUtYn61sPaBhGIa+dXyJHlgR0EmD3Rritbf5ADitn1NXv1vTpDxQd+vvsetQ1NT+SEo/XxfSQI9N95yQvQa1lM7MvWFWie5aVqe5A11aeTihDwxx9ZpsxcaaB22n9nNqayCp328J64KxPv2qKtRiN9dnx/v0xPqQrplRonU1CW2sTTYsDBiGoUnlTk1qo1lyRzEMQ+39i8wZ6Mo5kFPqMlTlT2iAJ7egU2NXTyvWT9fmnqBxTicFJYocNi07ENHIYker5/FMLqks0g9WBLQjmNSDJ5cf0xjmD3LrT9v9WnwgrnvaKAN25ZRi3bDIr+/NKdVT60NKWdJNs0s6JOBc7k5noM4Z4NQJ7dxJ1VEG++y6d36ZHl4T1IpDiVZLzPxkdXrnwQcaLUr099g1vszRpEn1gUhKNsNo1+LhUJ9dKStd332Q166Xt4Y1b5Ar406xQnPaMLcmlTt069I6/d+EdC395Qfj+tWmsK6eVtwhwckjnLZ0YsG5o7zaFUzqwZUBDS9y6HMTfNobTqnMZbSYg1+cWKRfbgg19H6SpKUH4oqkrKxJNldNTe/CHFXsUF3C1PraZKvX4csnFen6RX7dOa/t62BzSw/E9e89MV03syTjOXDWAJf+uSuqXcGkjit2KJZKl4x5/2BcV0wuzns3ZDRp6ZbFflW4bbpovK/V0mqxlKUfrwpoRLFDSw8mVBszdfmkojYberfmyQ0hPfY/FXp0TVBTK7KXWutoKTO9e/LGWUd3DQ/x2fWRER49vTGc9z3oo2tDumpK+msGee06mCGpaE8oJYch/XtPTNsCKZ2Xw0K2ZVl6dG1Iw312XduotNu+cEo/XRvUp8f5si5atmVdTUJ7QindPb9Mty9NJ5zlWp61NmY2JKXMHejUrUvy69HRmWpipspdmUvhPlcVaigZlq9SZ7qcF3qXwr/qFqjnqtKZfe9Wd00Zl1zlW66jN/vDtoi+Ob1YF4z16tYldbp1bmnO28rSWbuZLwojihzaHkgec8ZANv/aHc26jdxlTy845HuTVqjCSVPv7Ivrrvll2hVMdcrWw87y4pawnDYjpxunY2VZlu5eFtB5o72a3s+pg5F0KZpsNcWfq0pv6TTqs0Z+sjqohGmpNm5pbKmjIfBsGIa+M6NU9y6va1KTtDXvH2ya9Th3kEtLDyZabCO3LEsrDyf06o6oTEsaWZwOzEVS6cymSyp9Leorrq1JHNMNZDafGufTAysDuml2y+//4tZIzk2FPznGqxe3hFvsqKjyJzQuw8POR0d69JNVwRaB/gORlH6+PqQyl01jSuwttmjvDqVUVF+G4LShbt28uK7JuWPlobimVDgash9nDnBpRn9nfdOm7H/DiyuL9KtN6QbFm/wJHYya+tyE1q93swa49JNVAR085NB1H2j6Po0vc+rP26NaWB3TsoMJHYikNKbUoS9NzvzQcsIgl25eXKdLWwneG0a6PvNxxeks4oRp6buL/Pr2jJImjX831Ca09GC8SY3pxj460pt+QCyyN5QmkdKBuUfWBBs+9tvN4Yx1jTuazTD0iTE+fWyUV+tqkzp3pLdDHzIvqfTp2aqwDkXNNhe/B3ntOmmwW8sOxlvdxdGYy27olQ8P6LSmf+110hCXntqQLleQaeGtNf08dl05tVi1MVP/2wV/+57k3FFePbQ6oHf2xZSw1OJ9K6/vpbE3nNKTG1ovg9SbnDzErWsX+vXoKRVtv7gZn8Ombx3feQ3Gc+V1GPrn7pje+Fj+i3HfOr5Y/njuPRiyuXJKsZy2tnvKOGyGvjy5SLcvrdNXp+ZXmikXbe3k6Ep2m6FvTC/Rv/dEddMivy6pbBooe3h1QDP7Nw3yH/Hpceldc7Prm8X+ckNIl7cjm/+IyyYV6ZE1QX16nFeb61K6ZkbvOf8dWVR5dE1QT20MaXK5s9MXcI8rdujmOWVaV5PQrUvqVB1JZVwwG+yzK2FaemhVoCE7d0SxXV+Zkv1vaTMM3TCrVPe8XyfTUtb+GXaboa9OTd/f5HNOWn4wrr/ujOqmWZmD/Ed8dVqxbllSp6E+u0IJUx8f7dUFY7x6YGVAx/dz6iOtPFtn8vK2iL4+vUSTyx367eaw/rA1os9U+jSmxNHw/LuxNqGfrw81OT/Uxkz9Yn1IDpv0tWnFef1ta2KmQglLo0oc+mr9feC32xl8zZdpWbpneUCXTixqcc09eYhba2oS+vWmsM4d5ckpnrHiUFzHFdmb7MQ8rtiuncFkk0bcz1Wle0WUu23647aIfrIqoKuzvG/xlKW736/TR0d6WixeD/HZdfPsUt202K9vHd/0fjyaTC9i2Ix00uKHjvNoakXLUpzBhNlwX2G3GfrKlGLdtaxOt8xpe0dDbaxp2THDMDTAa2tYuGxNOGm2uoBcEzP1zr5YQ3k2t93QSYNcGa9fz1eFtLA6ri9OKmqxMPXq9oj+sSuqDwxxt7in3BlMqp/b1u5F7FKXTXvCiXZ9LXquwoh09TKRpCV/3NRXpxbrzmV1HR7o//WmsIZ4bVnLR7Tm+ysCGl3i0Kf62ENpc5GkJctSwwnzxlklun1Jne6YV5ZT0KS12sVSuj7sa7uinVo+ZnsgqdFZahTPG+jSkgPxjDf8Pd0PVgQ0rtTeoixGNo+vDenL9Te654/x6ukNTbcVZxJImIomrXaVDAgnTa06lDjmLK/N/qS2B1IKJEydNtSdV5CpMdOytPJQQm/siems4zyaliHrO2Faum95QOeO8jQExk8b5tatS+paDfTXxEztCqX0fxOPBvO/Mb1EP10blGVJVzZ7CC532/SpsemmnldOKW71Ib3Kn9D4sqY3b2cf59H9KwJNAv1V/oSeWBfS/EEu/b/pJS0Cz0nT0ncX+zW9X9NSIH/aHtHVnfSA7nUY8tiNFot58ZSlVYcSOQd7R5U4tDPYsinvK9uiLRogSulzVcJMZ6lvDSS1pS6p7YGkPHZDV005GqB8dE2wSRPZX24I6ev1278Nw9DkCodWH040HCMvbYm02NJvGIa8Odw9TOvn1EtbwqqNmXp8bUh3t1Fb2m03tC9iymMoY4mXL08p1ktbw7pwrDfrDfeRMd6ZR/DQaTP0vTmluqV+UbfYadPhaEq/WB9qM2v0Gxlq4g702nVckV3vH4xrWj+nNtQmO6xufC4cNqNTStuMKE43Is6lUaCkFnWJ25LPzrmu8uER3nY3uxxd4pC6Pz7bLa6eWqxv/bdWX2+l5Nr/jvXq8jcP67kz+x9zGZVCMLO/UwM9tg7tNdPVBnhsevR/Ktq1GGczDFW4O+bvnM97OL7Mqe/X95zoC04flu7V8ptNYf1qU1gXjPFqYXVcUyucDU2MmzMMQ1dMLtbj69KZtEdKdrVXudumfm6bfrI6qB+eVN7u79NT2W2Gvja9pMsTpiZXOHXX/DLVxc1WS5F8Y3qJQgkz779fudumz08oksuuNp9zR5c4NMBj07MbQ3LYDO2PpBRKWhpT4tD/jvU2OT+kzHRPjEDc0o1tBPml9L3s1VOLVeqyNdn9e93MUr28NawfrQzo69Naf344wrIsra1JNJRL/GxlkYIJU3/aHtWft0cVr9/dWOIwdM/8siZjLnfb9O0ZJVp9OKG73w9kXJyoiZmKJK0W56In1gUbyvaOLHZooNemJQfiWXtd/GdfTCOL7Rl3vcRTlp7ckE7WGeaza3iRTSOLHS1KP9XGTN27vE6XVLZevuvySUVafiihn60NKZRMVx2wG4b2hFMNu9oa/5rV4ZQeaDZ/zx3p1e+3Rhqe7+riplKNFvPPG+3V8oNx3bTIr+tnteyhVBszddeyuqw9cew2QzfPTt+P3zGvLF0yLpLS/cvTJcCOK3aoNmbqn7ujenFLWP09Nl1SWaQKd7pM4n3LA7p2xtHyimNLHfr4KK8eXBVsstslk9d3R1s881441qeXtkRaPNM2/p0u+PtBvXBW/4z3sD9eFdDH63cEJC1L/ril6xelFzIaV194Yl1Qg312/fgD5frlhpD+Wx3XV6YUybSkB1cFVVnm0IMfqNAdS/2qiZlN5sczG8P62rT2P9emm/HmVnHCsizdtNgvn8Mmty19XjphkKtDSixuCyTlshkFfZ/UkxDo7wZHMvvsNkP9PDZVh1N5bw9rrfFldTil7YGkVhwydcpQd87blKR0cHqw166VhxL637GZS2S8si2i/xnqblF6w7LS9YEHemzdvoW1I7yyPdIkg7q/x65vTC/WrUv9umNe9u1fu4JJDctSu/i4Yof2hDqmaV4mifq61NmcMMiln64N9chAv2VZ+tP2qD6eIYN9yYG4Bnltev9gQmcd1/qNbmOb/Uk5bUebxlW4baqNZy4DY1qW3t0X1xt7ovLYDR2MmrovS3mS1vxwRUCxlFTmtrW7/EA8Zemna4O6+4T0Tf3Da4L67uzcsuiSpqWN/qRWHkpokz8pSZre36lLJxbp91sj+vP2iK6qv5E2LUsvbYloxaGEPjfB11DLVko/mI8otre6A+Kna4MZM4W+MqX1m43ZA12KpCzduzwgy0pnh5wxzN0kKP7SlkiT2qNSOtvXZqQzOjwOQ4v2x/TazqjuanaD3pjDZuj0YW69viemD9bfuCVNS7GU1e6mVLn46tRi3bu8Tmcd52mYY89VhXVJjrVbj5gz0KUlBxKaN8ilhJn+O1lSqyUTLhrv02s7oxpXateZw9waVuRrkVFz5ZQi3bjIr5HFdoWSlspcRpPslQvH+nTv+3Wa1q9MVf6ERpU48rqONHfZpCJd9M9DeviU8py+z7yBLlUmkhk/53UYuqSy84LlRU6brptZotuW1um2uaW65/2AvjuntN1ByEsqfbruPb9m9HfqwrE9Y9tvR7h1bql8XbgVHYXJMAz96OTWs9cnljv1u7P6F3Td7nw4bIZ+dmr+2fw9icveOQuI6Fhuu6EvTCxS0rT08taIplQ4dNqw7Auu48oc0k7p9qV1WTO6c3XZpCKFktYx3T/0dN21Kzpbjye33ZDb3r4g2dQ85vbnKn16/2BCFW6bBnptKnbatPxgXLcsqdPsAU59YoxXqw4n9FxVeldnPueN1na8f2KMT+tqErpzWZ1cdkOWpH5um84b7W1RrnZxhuB6sdOWV5+kaf2ciqfSjYyvbZSVv7A6ple2R+Q00uWVjpRF2htOyWEzmgQ8P1fp0/Xv+TWtlRI+CTNdnsiypO81qxxgWekM9ovG++S2G9oVSmnZwYR+sykip13637E+jS11aM3hhJ7eGNKNs0qzJoQZhqFZA1wNmeK7Q+kA/1Bf7uXkBvvs2t+ofM/zVWFd3KyM48wBLo0otuuuZXVNehJtrUvq0TVB3TQ7+zil9P34t2eU6I5lfl08vkjPVoUaknCk9MLChWN9unBs+n1/cn1IkZQlj93Qh0d4WiQDzR7oUm3c1PcW+3X6MLdOH+bOuONgTU1CnxjT9H59qM+ufeGWiVdH/LK+VNNja4INPbuOWHogrgllDs1udix+YIhL318e0ClD3TpzmFsPrw5qSj9nw7PqZZOKtepwQte955ckXTG5SJVl6Tl09bQSPbz6aH+w/ZGUfA4j63mhLaWuoz3S2vKPXTF9aLhHZw73KJq0tL42oac2hFThtunSifn3v7EsS2/tjemfu2MaVWzXBn+yYTcGjo1RW1tL54VOVFVVpcrKyoZ/p0xLtza6iToQSek3m8L6WoaMwNb8Z19MP1kd1HUzS1ps6/neYr+unZlulPf23liLuqitiaUs3Vzf7f5P26Ma5rO3qOe3/GBc7+yLKZxMdzD/vwk+Vbht+uvOdKPDc0Z6tTWQ1KGoqaunFWe9uQsnTS3eH2/zxrO7fG+xX7dnuNHdUpfUk+tDDfWqM3lwZUCfn5C96eD9y+v0tWktM5CPWLQ/pnkDXe0qofTuvpgiSStr6R5JumOpv8UFqSO9uSemUSV2jSpufdEjk3/sSpfnGOKzNzl+o0lLNy/x6/4TyrQ3bOpXm8ItatFtqE3okTVBDfLY5banL+x/3xnVHfPKmmQ//GFrRKNK7Jo1wNUwR2tjpm5b6tdHR3p1xjC3HDZDf90Rkddh6PQ8jtM/bovIbZfOGu7RDYv8bd54JUxLP1kV1PmjvU1KjHx/eZ0uHOtrqB3/q6qQxpc5Wq3ReyCS0u+2RHQ4asphkyaUOXR8f6fGljpa3MxUh1N6dE1QFW6b9kdT+sRoX6v1O+vipn66NqjvNCtdsr42kdc5pjXbA0n9e09MO4MpyZCO7+fUysOJjIsayw/GtTWQlNtuaHNdUldPLW7z2LIsSzcs8uuueembhn/sisptN3Tq0M5d5LIsS09tCMsw0pmrP1gR0C1z85tvCdPS7UvrNK7Uoc11SX1yjLchE/9Y+OPpjJoih6FvzyhpsWD2k1UBfWqcTz9bF9S1M0rbLNHTlsPRVF7ZZc2vm11ta11S171Xq3tPKM+7Jmxzqw8n9JPVAf3s1H4dNDqg63X3nAT6mu6Yc/GUpX/tjuZVHgVobtH+mP6wLaLK0nRz4s4K2FmWpQNRU/cvD+iu+U17Bty6xK/vzi7tkFKA/9kX09IDcS1w79O/EkPlcxj6Qn3izqNrQzquyK7zRnt157I6fW1acYtknB3BpH6zKdziOUqSnt4Q0tyBLg322fT9+t/jyJgfXh3QCYPcGZ/P0r2SItpcl9QAj01fy2GXQ0f5+bqgPjrSq0Fem+5aVtfQA6s507L007UhFTsNTShz6LWdUd04uzSvxb9VhxP6x66ovpHD7xdOmtpQm2y1F8ORMb25J6Z/74lpkC/dP+7IM066v0GgxS5mKf1s389t0/80e3asiZl6fG1Q188q1fNVIU0qdzb07TEtS9e/589a2ut3m8P6x+6ovjChKGPyZTRpyVLLUqm/2xzW8CK7Th7i1veX1+mySUXHtAvLsizdtSzz7978dTcu8uvuDCV4Vx1O6JmNoSZN4U3LUk3MVNJML0K67JJNhjb4E1pxKKGdwZQSpqUPDHHrg8PTiy8rD8W1aH+8RenaTLg3zY6M/i725x1RnTPyaNBwoNeuQzFTqRyysKX0CeXPOyL69Qf76XtL6jTQY2vY6vX67qjmDnSpxGnTpHKbfr810iLzP2mmM+/PPs7TZGX50TVBXTU1XU/tnJEe3bmsrsWF5XdbIrq9PsB9IJLSE+tDqo2bOvs4b0PN7f8Z6tYmf0I3vufXV6YUZ6yNLKWbdCZMSz6HLWuDoM6WMC0lzaYn0A21CVW2Mu6xpelGSHcsq9P35pS2OHGblqXaeNtbJk8f5tbfd0Uz1l3fEUzqmY1hvbsv3uYWs0ze2hvTN3NYOPLYjaw15Y7F67uj2lyX1PZgUjsC6ZX/WQOcbTaIM630Q8Y988v0yvb0lrwjNc0fWh1oCOwOK7LL5zC0yZ/Q+PoV7v2RlH65IaQfnVQuu81QJGnp/YPpOnfNtziePcKjH60KNNwMWJal+1fU6XtzyprsVvnwCI+uf8+v04a6W1zQMm3Z3RFMam1NoqGJ1Q2zSnX3++ma9Jnmt2VZunNpnT5T6dO/dkf1683pBlDra9OZ1I0bxH5mfDo7eNYAV8NNUsq09PqemN7ZG9MAj00XjvO12oS5scE+u26bV6Y9oZZNXZsrddkUN5vWH9wdSunxtUHd3wHb4UeVOPSFiY6G32fFoYROaOWcMKO/Uz9cGdBHRnj0tVbKQTRnGIYuGufTbzeH9dnKIr27L6bvtXEj0xEMw9Clk4r01t6YPv/GYf2oHQ0InTZDpw11a0SxvUObuJa50lkXyw8lMu6KubjSp7uWBVRZ5jjmIL+UuQxPTzam1KHffqh/h/SqmdbPqcf/p7CzeAEAvZ/LbhDkxzGbP8idc+PwY2EYhgZ57fratGL9YEVAN9UnCO0Np9TfY+uwfj8nD3ErlrJ0+3KXvnOiqyFDXUrv4P3D1ohuX+rXIK89447bkcUOjSt16J+7mvbPCydNbQ0k9YX6+/vLJhXp+ysCumFWqV7aEtaIYkerMZJSl02XHkMfjWNx7iivXtkWUbnb1qSXWnM2w9BVU4v1n30xra1J6ns51Mhvbno/Z867QXwOW9Yg/5ExnTHcozOGe7QjmNStS/z61Dif5gx0aenBuGYPzPyzPj7Ko+8urtPkCkeTuNovN4T0xfq/w2fGp3dvzOzvlN1m6MUtEV041pu1x8P/jvNl7eHUWhmtC8d6dcMiv8aVOtI7W47xOcswDI0sseuWxX59boKvIbbS3N92RvXhEZ6Mf8fp/Zy6Z36ZfrE+pN9vjcgwJEPpXTcOmxRLpReTTUmVZQ6dNtStkRmSQY/v79K/dscKqp9iT0VGfydrvNJ0pKbV3fPLm7zm3X0x1cXNNm+ujqyiXTcznSEcTabrT98yt1QOw9DtS5uusNXETD3SqNxHbczU3e/X6cMjPHpjT0wz+zt1wRivVh5OaMmBeJOa8Q+tDujCsUeDhv/ZF9OBqJlzQ9CEmd7qdvpQt05utkL57r6YdgRTumicV7csqdOXJhc1aepypJ7fWcd5GsqtdAbLsnTLkjpFkpYurvQ1ZMre836dvj6tOGtpj9WHE/rD1ohumt20bt8bu6OSlFN/hJsW+XXDrJbZtN9b7Nd1M0v07r64qiOpnOs6W5alx9aGNMBjy6nHwlt7YzItq0m2+rqahCaWt8wAz0c0aem2pS1Xe3+/JazDMTPrCu3vNoc1uuTojc2T60MaXWJXqcumNYcTDTdDUvpi8b0lft0zv0yRlKXvLU6X28i1JMsdS/26YVaptm7epLdTwzStX+Zs+X/viSqctJo0V94XTumuZXUqdho66ziPzhjmVsqSbljk1x1zm/ZxWHM4ob/vimZctHlwZUCnDnU3bOnzx009uT6kaMpqWCxobENtQq/vjumjIz16aWtEdXFTZw736JQhrk5tBLa+NqHF++O6pNKnF7ZEtKUuqW9ML+6WkgvtrYl665L04uNzVS13gnS2pGn1uAajbXlodUD/N7Eo5wbkHYkMDaBnYU4CXYs5B+TutZ1RBROmPjnWpx/V76zv38FJJtnm5KrDCVWWOrL2N7h9qV9fmlzcEFt5ZE1Q54z0NAlmvrknqr/siGpsqUNfzlIGtbvdtsSvuCndlUcvrJ7Isiw9szEdn4ikLH0jS/ynLm7qzmVHk/cOR9NJr413aiw/GNfKwwldMMar7y8P6LYOKIPWmi11SV3x1mE9f2b/vEuAtyaatPRcVUi7QildPqmoSd+II3HMu+blX9I4X0cqjbRVPpnrZHZ9ozBmD/Gf6rhOzlC//uTBLv23Ot7m1z+1IawLxvgayoB4HIaun1WiO5bW6eHVAV3ZrIxFhTvduGX14YS21CV117I63TCrVGcO9+iOeWUaXmTX9e/59fSGsC5tli362fE+/XpTWNKRmukRfWxU7uVLnDZDN8ws0Tv1292OCCZM/Xl7RBeN88owDN04q1Q/XhVUKJGuC7YtkNT17/k1Z4BLj68NadXhzusA/rN1IZ07yqP7TizTwv1xPbEuqGjSUspSm8Hiaf2cOmeURzcu8mtb4Gg96Tf3xlptdtXc1dOK9fDqYJOPLd4f15QKp4qcNi0Y4ZHNkP62M9Lm9wonTd28uE4nDHLl3Eh5/kCXFu1P/21SpqUfrQzoX7ujuuE9vw5F299D4PF1QX15SsuSKheM9amyzKn7ltfJtFquL8ZT6WzuxtkLX5xUpPcPJvR8VUifb1bf3GVPB9lf3RHVncvqdM2Mkrzqrp861KM398a0JmBT0rRazT45bai7YVFESi+Y/WBFQPedWKa755cpZUk3LfbrOwv9unxSUYubvKn9nKosc+iOpX69Vx2TVf99nt0Y0uQKZ5O6fWUum/7f8SUZg/xSuqZxicvQ33dFdenEIt06t0ynDs1cZ7AjTSp3ak1NQjct9muoz64bZpV2W13l9tZEvXRikT73+iGdP7rry4UVWpBfkr42raRbgvwAAABArs4e4dH2YEorD8UVSJgdHuRvy/R+mWvwN3bNjBI9sDKglGmpNmaqLm62yFg+bZhHn5tQpCsmd0+2fq6OK7brg8N7Xp+/fBlGup/JeaO98tmNrHGEUpdNX5hQpIfXpGM3T24IN2TzHzFzgEs7AunGwVd08kLN2FKHnjq9X4cF+aV0bPHyycW6bmapntoY1t93Rhs+9+qOqD4ywtvpQX4p/ax/4Vivnq+PRR5hZYgfoXVk9HeyqqoqlR03Vr/ZFJY/buqm2S3LvUjpTtvzB7lUE7O0/FBcwUT6z+KySRPKnfLW16TO1PF7fW1Cyw7E9dkMTQqTpqWvvlOjEUUOXTerpEVdNNOyFEu1rP0lpVeer5tZqnf2xWRa0llt1HzPxLIs3fV+QOeP9mpaP6fuWlanyycVNTkpVYdTenBVQNP7ObU7lNLXp5fIbTdkWuldAScOch1TLf8XNod18mBXk1XJf+yK6lDU1EWNmvIsORBPb5mbWZJzHexI0tIT64JKWuk63C9uieibeZTbORLsnTvQJau+lts9zWq5/bi+xEymrG3TsrTyUELPV4X17RklGpLnyf7OpXW6amqR7l2ezt6dUuFUXTxd7/DcUR6d2GxhqrVGNEds9if1150RXZ2lrMrqwwn9ZlNYN8xqGph/cn1IJw9xtWhea1mWIimr1cDydQtr9YX6secjZVq67j2/gnW1emzBqKy/18LqmPaEUvrISK9uXuzXTbNLm5T4OVInsnnzn8biKUv/2BXV4gNxJUxpcoWjUxuLdrTN/qQG+WwFHfx9e2+sRX1F9DxkaAA9C3MS6FrMOSA/SdPS/71xWNfOLGlSXqejdMScXFeTrjkfS1n6v4lFTRr3ojC8sDksS9KOQFLXZui7cDia0t93xZrEmArVi1vC2hFM6WvTinXLkpZVSTrbj1cFFEqmY6KG0iV8G1dY4DqZHYH+ThRImLrpjR2qHNpfnx7vyxoErIubenpjSDP6uzRrgLMhmBZNWtroT2hbIKVzR3nalblbGzNV5jLyXoFbeSiutTVJrTgUz9h0I1emZenWJXUaWWxXP7dNF4xteeJbV5NQTcxsUeZHSjd9Ma30YsTuUEqWJMuSvj69uM2g45a6pP60PSKbIYWTli6bVKTDUVMvb4vougwn5/aW2NgZTOqBlQF9+/iSJgsKbbGsdLD5znll+vP2iAb77C2asViWpZe3RrSuNilLktOW3jERTlqyGdKkMofOGeVtV6bzgysDqo6kdP2s0iZ1BY9sZavyJ+VzHv2+0aSl2+ZmrrF3ZKGieePbTPaEUnpyQ0g+h6HPVfrkdRj6YTualR6r324Oa1xst+ZOafsiceOiWllWOtO5rbr2bYkmrTYzP4C+ihs3oGdhTgJdizkH5K+thLRj0VFz8tebwjoYSelrOfTTQ890z/t1umJyUZfvHOkOG2oTunNZna6cUpwxTteduE5mR6C/E1mWpfVVmzR5QuEegJ/91yFdNbVYpxzjxE6all7YHNZnxvvadQFeX5tQkcPQMJ+9oRnw91cEdGmjzt6Z3LmsTt86Pl1LvCZm6hfrQ6qJmbptbmmPKaexpS6pl7aE5Y9bujOHWnfxlKWEaeVVpibb93LY1OoCUvMbpkX7Y1p5KJGxzv6LW8Ia7LXnlTF9IJLSc1VhrT6c0K1zS5v0augquV4kqvwJ2WS02mAaQMfgxg3oWZiTQNdizgE9C3MSfZVpWZ1eJrg9mJPZEbHqRIZhqNCTdh85paJJiZL2ctiMjKWFctW8nMtAr133zC/Tw2uCWn04kbFj+b5wSiVOo6HkS4Xb1uVNOHMxttShwT67LhybW4DcZTfazJjPVVvfp/mizPxBbq2rSeqdfbGGxR/LsvRsVVgpUzn/DkcM9NozNqntiSpb6UAPAAAAAADQm/TEID/aVrjFltElOiLI31nsNkPfmF6iCrdNj64Jtvj8sxtD+vyEwqiB/vkJRRpTWhjrbp+f4NM/d0W1O5RSLGXpjmV1Gl3i0KWTCuO9BgAAAAAAAHqbwogsAll86DiPQsmIXtoS1ifr6//746ZM9eyFikJlGIaum1mq7y72y5D0jenF3VJyBwAAAAAAAEAaUVD0CueN9upA1NR/9sUkpbP5LzmGUkHIzuswdPPsUt0xr4wgPwAAAAAAANDNCPSj1/jy5CK9vjum1YcTqomZGl7U+zuhd6dyt03eQm9CAQAAAAAAAPQCBPrRaxiGoetnlegnqwP6dIbmvAAAAAAAAADQG1FzA72Kw2boZ6f26+5hAAAAAAAAAECXIaMfAAAAAAAAAIACRqAfAAAAAAAAAIACllOg/91339VFF12kyZMnq7y8XM8//3xnjwsAAAAAAAAAAOQgp0B/KBTSlClTdO+998rr9Xb2mAAAAAAAAAAAQI5yasa7YMECLViwQJJ01VVXdeqAAAAAAAAAAABA7qjRDwAAAAAAAABAATNqa2utfL5g+PDhuv/++3XxxRdnfV1VVdUxDQwAAAAAAAAAAEiVlZVZP59T6Z7O+MF9RVVVFe8F0IMxR4GehTkJ9CzMSaBrMeeAnoU5CfQszMnsOi2jHwAAAAAAAAAAdD5q9AMAAAAAAAAAUMByKt0TDAa1ZcsWSZJpmtq1a5dWrlypiooKjRgxolMHCAAAAAAAAAAAWpdT6Z63335bH/vYx1p8/DOf+Ywee+yxThkYAAAAAAAAAABoW941+gEAAAAAAAAAQM9BjX4AAAAAAAAAAApYnwv0P/DAAzrjjDM0YsQIjRs3Tp/+9Ke1du3aJq+xLEv33HOPJk2apCFDhuicc87RunXrmrzmqaee0rnnnquRI0eqvLxc27dvb/L57du36+qrr9aMGTM0ZMgQzZgxQ7fddpsikUibY1yzZo0++tGPasiQIZo8ebLuu+8+WdbRjRdXXnmlysvLW/w3bNiwY3hngJ6hN8xRSfr5z3+u+fPna8iQIZo7d65+/etft/MdAbpXT5+T0WhUV155pU4++WQNGDBA55xzTovX7Nu3T5dffrnmzZunfv366corr2znuwF0v66ak6Zp6qKLLtK0adM0ePBgTZw4UVdccYX27NnT5hjbuk4yJ1FIesOce+edd7RgwQKNGTNGQ4YM0bx58/TQQw8dw7sCdJ/eMCfffvvtjDGdjRs3HsM7A3SP3jAne1Octc8F+t955x1ddtlleu211/TKK6/I4XDo/PPPV01NTcNrfvzjH+uRRx7Rfffdp9dff10DBw7UJz7xCQUCgYbXhMNhnXnmmbr++usz/pyqqiqlUik98MADWrhwoe6//3795je/afX1R9TV1ekTn/iEBg0apNdff1333nuvHnroIT388MMNr7n33nu1YcOGJv+NHj1a559//rG9OUAP0Bvm6C9+8Qvdeuut+s53vqOFCxfqhhtu0LXXXqu//vWvx/juAF2vp8/JVColj8ejK664QgsWLMj4mlgspn79+umb3/ym5s6d2453Aeg5umpOStKpp56qX/7yl1q8eLGeeeYZbdu2TZdccknW8eVynWROopD0hjlXXFysL3/5y/rLX/6ihQsX6pprrtE999yjJ5544hjeGaB79IY5ecTChQubxHXGjRvXjncE6F69YU72pjhrn6/RHwwGNXLkSD3//PP6yEc+IsuyNGnSJH3pS1/SNddcI0mKRCKqrKzUHXfcoUsvvbTJ17///vs644wztGLFCo0aNSrrz3riiSd01113aevWra2+5kiAcOPGjfJ6vZKk73//+3ryySe1du1aGYbR4msWLlyoD3/4w3rttdd0wgkn5PsWAD1aIc7RBQsWaM6cObrnnnsavu6mm27S0qVL9be//a29bwXQI/S0OdnYtddeq7Vr1+rVV19t9TWf/vSn1a9fPz322GM5fU+gp+vKOfmXv/xFn/3sZ7Vv3z55PJ6Mr8n3XpY5iUJT6HPuiEsuuURut1u/+MUv8n0LgB6lEOfk22+/rY997GPavHmz+vfv3wHvAtBzFOKcbK6Q46x9LqO/uWAwKNM0VV5eLildOqC6ulpnnnlmw2u8Xq9OPvlkvffee8f0swKBQMPPac2iRYt00kknNRx8kvTBD35Qe/fubbFt5Yinn35akydPLriDD8hFIc7RWCzW4iLj9Xq1dOlSJRKJYxoj0N162pwE+rqumpM1NTX63e9+p7lz57b6ICW1714WKCS9Yc6tWLFCixYt0gc+8IF2jw/oKQp5Tp5++umaOHGiPv7xj+utt95q99iAnqSQ5+QRhRxn7fOB/uuvv17Tp0/X/PnzJUnV1dWSpIEDBzZ53cCBA7V///52/5ydO3fqoYce0mWXXZb1dfv378/4s498rjm/368//vGP+vznP9/usQE9WSHO0Q9+8IN67rnntGzZMlmWpffff1/PPPOMEomEDh061O4xAj1BT5uTQF/X2XPylltu0bBhwzRmzBjt2rVLv/3tb7O+Pt97WaDQFPKcmzJligYNGqQzzjhDl112mb74xS/mPT6gpynEOTlkyBA98MADevbZZ/Xss8+qsrJS5513nt599928xwf0NIU4Jxsr9Dhrnw7033jjjVq4cKGeffZZ2e32Jp9rvnXDsqxWtz22Zf/+/frkJz+pM844Q1/96lcbPn7iiSdq+PDhGj58uC688MKsPzvTxyXphRdeUCqV0kUXXdSusQE9WaHO0WuvvVYLFizQggULNGDAAH32s5/VZz7zGUlq8XsAhaSnzkmgr+qKOfn1r39db731ll5++WXZ7XZdccUVDde9jriXBQpJoc+5v/zlL3rjjTf0ox/9SI899ph+85vf5D0+oCcp1DlZWVmpL37xi5o5c6bmz5+vH/7wh/rQhz5Ek2wUvEKdk40VepzV0d0D6C433HCDfv/73+tPf/qTRo8e3fDxwYMHS0oHGY477riGjx88eLDFClAuqqur9fGPf1yTJ0/W448/3uQgeuGFF5RMJiWpYZvJoEGDWqwoHTx4UFLL1S8pvZ3k4x//uCoqKvIeG9CTFfIc9Xq9euSRR/Tggw9q//79GjJkiJ566imVlJRQgxEFq6fOSaCv6qo52b9/f/Xv31/jx4/XhAkTNHXqVP33v//VySef3CH3skCh6A1z7si4p06dqv379+vee+8t2EAG0BvmZGNz5szR73//+7zHB/QUvWVOFnqctU9m9F933XV68cUX9corr2jChAlNPjdq1CgNHjxYb7zxRsPHotGo/vvf/+Zdm2nfvn0699xzNWHCBP3iF7+Qw9F0XWXkyJEaO3asxo4dq2HDhkmS5s+fr//+97+KRqMNr3vjjTc0dOjQFk0oli5dqtWrVxfsdhKgNb1ljjqdTg0fPlx2u10vvfSSzj77bNlsffK0iwLXk+ck0Bd11ZxszjRNSVI8Hpd07NdJoFD0xjlnmmbD9wUKTW+ck6tWrWoIiAKFprfMyd4QZ+1zGf3XXHONfvvb3+q5555TeXl5Q62ooqIiFRcXyzAMXXnllfrhD3+oyspKjR8/Xj/4wQ9UVFTUZNtHdXW1qqurtWnTJknShg0b5Pf7NWLECFVUVGjv3r0699xzNWTIEN1zzz1N6nIPGDCg1fIdF154oe677z5dddVVuuaaa7Rp0yY9+OCD+s53vtNiS8lTTz2lcePG6ZRTTunotwnoNr1hjm7atElLlizRvHnzVFtbq0ceeUTr1q3TY4891llvG9BpevqclKT169crHo/r0KFDCoVCWrlypSTp+OOPb3jNkY/V1dXJMAytXLlSLpdLkyZN6rg3C+gCXTUnFy1apBUrVujEE09UWVmZtm7dqrvvvlsjR47UiSee2Or4cr2XZU6iUPSGOff4449r1KhRqqyslCS9++67evjhh+mDg4LUG+bko48+qpEjR2ry5MmKx+N64YUX9Oqrr+qZZ57pxHcO6By9YU4e0RvirEZtba3V3YPoSke6Pjd33XXX6YYbbpCUrtV077336qmnnlJtba3mzJmjH/zgB5oyZUrD6++55x7dd999Lb7PI488oosvvljPP/98k7rCja1YsSLrSu6aNWt0zTXXaNmyZSovL9ell16q6667rskBGAgENGnSJH3nO9/RN77xjVx+daAg9IY5umHDBl1++eXatGmTnE6nTjnlFN12220ND1dAISmEOTl9+nTt3Lmzxcdra2uz/h4jRozQqlWrWv2+QE/UVXNy5cqVuvHGG7VmzRqFQiENGTJEH/rQh/Ttb39bw4cPzzrGXO5lmZMoFL1hzj366KN65plntGPHDjkcDo0ePVqf//zn9cUvfpHdpig4vWFO/vjHP9ZTTz2lvXv3yuPxaPLkyfp//+//acGCBe18V4Du0xvmpNR74qx9LtAPAAAAAAAAAEBvwvI9AAAAAAAAAAAFjEA/AAAAAAAAAAAFjEA/AAAAAAAAAAAFjEA/AAAAAAAAAAAFjEA/AAAAAAAAAAAFjEA/AAAAAAAAAAAFjEA/AAAAAAAAAAAFjEA/AAAAAAAAAAAFjEA/AAAAAAAAAAAF7P8DA4xmp85YHhUAAAAASUVORK5CYII=\n", 2582 | "text/plain": [ 2583 | "
" 2584 | ] 2585 | }, 2586 | "metadata": {}, 2587 | "output_type": "display_data" 2588 | } 2589 | ], 2590 | "source": [ 2591 | "hhid = 'MAC000004'\n", 2592 | "hh_df = df.loc[df['item_id'] == hhid, ['timestamp', 'target_value']]\n", 2593 | "hh_df = hh_df.set_index('timestamp')\n", 2594 | "\n", 2595 | "fig = plt.figure(figsize=(24,4))\n", 2596 | "plt.plot(hh_df, linewidth=0.5)\n", 2597 | "plt.show()" 2598 | ] 2599 | } 2600 | ], 2601 | "metadata": { 2602 | "kernelspec": { 2603 | "display_name": "conda_python3", 2604 | "language": "python", 2605 | "name": "conda_python3" 2606 | }, 2607 | "language_info": { 2608 | "codemirror_mode": { 2609 | "name": "ipython", 2610 | "version": 3 2611 | }, 2612 | "file_extension": ".py", 2613 | "mimetype": "text/x-python", 2614 | "name": "python", 2615 | "nbconvert_exporter": "python", 2616 | "pygments_lexer": "ipython3", 2617 | "version": "3.6.13" 2618 | } 2619 | }, 2620 | "nbformat": 4, 2621 | "nbformat_minor": 5 2622 | } 2623 | --------------------------------------------------------------------------------