├── README.md ├── common ├── util │ ├── __init__.py │ ├── notebook_utils.py │ └── fcst_utils.py └── images │ ├── amazon_forecast.png │ └── forecast_workflow.png ├── images ├── backtest.png ├── predictorARN.png ├── export_backtest_folders.png ├── export_backtest_results.png └── forecast_steps_overview.png ├── .gitignore ├── kaggle-data-analysis-and-preparation.ipynb ├── 3.Evaluating_Your_Predictor.ipynb ├── 2.Building_Your_Predictor.ipynb ├── 1.Importing_Your_Data.ipynb └── 0.Data_Explore.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # kaggle-m5-amazon-forecast-example -------------------------------------------------------------------------------- /common/util/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .fcst_utils import * 3 | from .notebook_utils import * 4 | 5 | -------------------------------------------------------------------------------- /images/backtest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietroppeter/kaggle-m5-amazon-forecast-example/main/images/backtest.png -------------------------------------------------------------------------------- /images/predictorARN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietroppeter/kaggle-m5-amazon-forecast-example/main/images/predictorARN.png -------------------------------------------------------------------------------- /common/images/amazon_forecast.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietroppeter/kaggle-m5-amazon-forecast-example/main/common/images/amazon_forecast.png -------------------------------------------------------------------------------- /common/images/forecast_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietroppeter/kaggle-m5-amazon-forecast-example/main/common/images/forecast_workflow.png -------------------------------------------------------------------------------- /images/export_backtest_folders.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietroppeter/kaggle-m5-amazon-forecast-example/main/images/export_backtest_folders.png -------------------------------------------------------------------------------- /images/export_backtest_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietroppeter/kaggle-m5-amazon-forecast-example/main/images/export_backtest_results.png -------------------------------------------------------------------------------- /images/forecast_steps_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pietroppeter/kaggle-m5-amazon-forecast-example/main/images/forecast_steps_overview.png -------------------------------------------------------------------------------- /common/util/notebook_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import io 3 | import ipywidgets 4 | 5 | 6 | widget_table = {} 7 | 8 | def create_text_widget( name, placeholder, default_value="" ): 9 | 10 | if name in widget_table: 11 | widget = widget_table[name] 12 | if name not in widget_table: 13 | widget = ipywidgets.Text( description = name, placeholder = placeholder, value=default_value ) 14 | widget_table[name] = widget 15 | display(widget) 16 | 17 | return widget 18 | 19 | 20 | class StatusIndicator: 21 | 22 | def __init__(self): 23 | self.previous_status = None 24 | self.need_newline = False 25 | 26 | def update( self, status ): 27 | if self.previous_status != status: 28 | if self.need_newline: 29 | sys.stdout.write("\n") 30 | sys.stdout.write( status + " ") 31 | self.need_newline = True 32 | self.previous_status = status 33 | else: 34 | sys.stdout.write(".") 35 | self.need_newline = True 36 | sys.stdout.flush() 37 | 38 | def end(self): 39 | if self.need_newline: 40 | sys.stdout.write("\n") 41 | 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /common/util/fcst_utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | import json 3 | import gzip 4 | 5 | import boto3 6 | import botocore.exceptions 7 | 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | 11 | import util.notebook_utils 12 | 13 | 14 | def wait_till_delete(callback, check_time = 5, timeout = None): 15 | 16 | elapsed_time = 0 17 | while timeout is None or elapsed_time < timeout: 18 | try: 19 | out = callback() 20 | except botocore.exceptions.ClientError as e: 21 | # When given the resource not found exception, deletion has occured 22 | if e.response['Error']['Code'] == 'ResourceNotFoundException': 23 | print('Successful delete') 24 | return 25 | else: 26 | raise 27 | time.sleep(check_time) # units of seconds 28 | elapsed_time += check_time 29 | 30 | raise TimeoutError( "Forecast resource deletion timed-out." ) 31 | 32 | 33 | def wait(callback, time_interval = 10): 34 | 35 | status_indicator = util.notebook_utils.StatusIndicator() 36 | 37 | while True: 38 | status = callback()['Status'] 39 | status_indicator.update(status) 40 | if status in ('ACTIVE', 'CREATE_FAILED'): break 41 | time.sleep(time_interval) 42 | 43 | status_indicator.end() 44 | 45 | return (status=="ACTIVE") 46 | 47 | 48 | def load_exact_sol(fname, item_id, is_schema_perm=False): 49 | exact = pd.read_csv(fname, header = None) 50 | exact.columns = ['item_id', 'timestamp', 'target'] 51 | if is_schema_perm: 52 | exact.columns = ['timestamp', 'target', 'item_id'] 53 | return exact.loc[exact['item_id'] == item_id] 54 | 55 | 56 | def get_or_create_iam_role( role_name ): 57 | 58 | iam = boto3.client("iam") 59 | 60 | assume_role_policy_document = { 61 | "Version": "2012-10-17", 62 | "Statement": [ 63 | { 64 | "Effect": "Allow", 65 | "Principal": { 66 | "Service": "forecast.amazonaws.com" 67 | }, 68 | "Action": "sts:AssumeRole" 69 | } 70 | ] 71 | } 72 | 73 | try: 74 | create_role_response = iam.create_role( 75 | RoleName = role_name, 76 | AssumeRolePolicyDocument = json.dumps(assume_role_policy_document) 77 | ) 78 | role_arn = create_role_response["Role"]["Arn"] 79 | print("Created", role_arn) 80 | 81 | print("Attaching policies...") 82 | iam.attach_role_policy( 83 | RoleName = role_name, 84 | PolicyArn = "arn:aws:iam::aws:policy/AmazonForecastFullAccess" 85 | ) 86 | 87 | iam.attach_role_policy( 88 | RoleName=role_name, 89 | PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess', 90 | ) 91 | 92 | print("Waiting for a minute to allow IAM role policy attachment to propagate") 93 | time.sleep(60) 94 | except iam.exceptions.EntityAlreadyExistsException: 95 | print("The role " + role_name + " exists, ignore to create it") 96 | role_arn = boto3.resource('iam').Role(role_name).arn 97 | 98 | print("Done.") 99 | return role_arn 100 | 101 | 102 | def delete_iam_role( role_name ): 103 | iam = boto3.client("iam") 104 | iam.detach_role_policy( PolicyArn = "arn:aws:iam::aws:policy/AmazonS3FullAccess", RoleName = role_name ) 105 | iam.detach_role_policy( PolicyArn = "arn:aws:iam::aws:policy/AmazonForecastFullAccess", RoleName = role_name ) 106 | iam.delete_role(RoleName=role_name) 107 | 108 | 109 | def create_bucket(bucket_name, region=None): 110 | """Create an S3 bucket in a specified region 111 | If a region is not specified, the bucket is created in the S3 default 112 | region (us-east-1). 113 | :param bucket_name: Bucket to create 114 | :param region: String region to create bucket in, e.g., 'us-west-2' 115 | :return: True if bucket created, else False 116 | """ 117 | try: 118 | if region is None: 119 | s3_client = boto3.client('s3') 120 | s3_client.create_bucket(Bucket=bucket_name) 121 | elif region == "us-east-1": 122 | s3_client = boto3.client('s3') 123 | s3_client.create_bucket(Bucket=bucket_name) 124 | else: 125 | s3_client = boto3.client('s3', region_name=region) 126 | location = {'LocationConstraint': region} 127 | s3_client.create_bucket(Bucket=bucket_name, 128 | CreateBucketConfiguration=location) 129 | except Exception as e: 130 | print(e) 131 | return False 132 | return True 133 | 134 | 135 | def plot_forecasts(fcsts, exact, freq = '1H', forecastHorizon=24, time_back = 80): 136 | p10 = pd.DataFrame(fcsts['Forecast']['Predictions']['p10']) 137 | p50 = pd.DataFrame(fcsts['Forecast']['Predictions']['p50']) 138 | p90 = pd.DataFrame(fcsts['Forecast']['Predictions']['p90']) 139 | pred_int = p50['Timestamp'].apply(lambda x: pd.Timestamp(x)) 140 | fcst_start_date = pred_int.iloc[0] 141 | fcst_end_date = pred_int.iloc[-1] 142 | time_int = exact['timestamp'].apply(lambda x: pd.Timestamp(x)) 143 | plt.plot(time_int[-time_back:],exact['target'].values[-time_back:], color = 'r') 144 | plt.plot(pred_int, p50['Value'].values, color = 'k') 145 | plt.fill_between(pred_int, 146 | p10['Value'].values, 147 | p90['Value'].values, 148 | color='b', alpha=0.3); 149 | plt.axvline(x=pd.Timestamp(fcst_start_date), linewidth=3, color='g', ls='dashed') 150 | plt.axvline(x=pd.Timestamp(fcst_end_date), linewidth=3, color='g', ls='dashed') 151 | plt.xticks(rotation=30) 152 | plt.legend(['Target', 'Forecast'], loc = 'lower left') 153 | 154 | 155 | def extract_gz( src, dst ): 156 | 157 | print( f"Extracting {src} to {dst}" ) 158 | 159 | with open(dst, 'wb') as fd_dst: 160 | with gzip.GzipFile( src, 'rb') as fd_src: 161 | data = fd_src.read() 162 | fd_dst.write(data) 163 | 164 | print("Done.") 165 | 166 | -------------------------------------------------------------------------------- /kaggle-data-analysis-and-preparation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Download Data " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "scrolled": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "!wget -O m5-forecasting-accuracy.zip https://tinyurl.com/y7w5ed7w\n", 19 | "!unzip m5-forecasting-accuracy.zip" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "### Import Necessary Packages" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import gc\n", 36 | "import matplotlib.pyplot as plt\n", 37 | "import numpy as np\n", 38 | "import os\n", 39 | "import pandas as pd\n", 40 | "import seaborn as sns\n", 41 | "from sklearn import preprocessing, metrics\n", 42 | "pd.set_option('display.max_columns', 500)\n", 43 | "pd.set_option('display.max_rows', 500)\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "### Define Utility Functions \n", 51 | "* reduce memory usage \n", 52 | "* read data " 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "def reduce_mem_usage(df, verbose=True):\n", 62 | " numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']\n", 63 | " start_mem = df.memory_usage().sum() / 1024**2 \n", 64 | " for col in df.columns:\n", 65 | " col_type = df[col].dtypes\n", 66 | " if col_type in numerics:\n", 67 | " c_min = df[col].min()\n", 68 | " c_max = df[col].max()\n", 69 | " if str(col_type)[:3] == 'int':\n", 70 | " if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:\n", 71 | " df[col] = df[col].astype(np.int8)\n", 72 | " elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:\n", 73 | " df[col] = df[col].astype(np.int16)\n", 74 | " elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:\n", 75 | " df[col] = df[col].astype(np.int32)\n", 76 | " elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:\n", 77 | " df[col] = df[col].astype(np.int64) \n", 78 | " else:\n", 79 | " if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:\n", 80 | " df[col] = df[col].astype(np.float16)\n", 81 | " elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:\n", 82 | " df[col] = df[col].astype(np.float32)\n", 83 | " else:\n", 84 | " df[col] = df[col].astype(np.float64) \n", 85 | " end_mem = df.memory_usage().sum() / 1024**2\n", 86 | " if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))\n", 87 | " return df" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "def read_data():\n", 97 | " print('Reading files...')\n", 98 | " calendar = pd.read_csv('calendar.csv')\n", 99 | " calendar = reduce_mem_usage(calendar)\n", 100 | " print('Calendar has {} rows and {} columns'.format(calendar.shape[0], calendar.shape[1]))\n", 101 | " sell_prices = pd.read_csv('sell_prices.csv')\n", 102 | " sell_prices = reduce_mem_usage(sell_prices)\n", 103 | " print('Sell prices has {} rows and {} columns'.format(sell_prices.shape[0], sell_prices.shape[1]))\n", 104 | " item_demands = pd.read_csv('sales_train_validation.csv')\n", 105 | " print('Sales train validation has {} rows and {} columns'.format(item_demands.shape[0], item_demands.shape[1]))\n", 106 | " return calendar, sell_prices, item_demands" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "### Show 3 Tables \n", 114 | "* calendar \n", 115 | "* sell prices \n", 116 | "* item demands per day " 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "calendar, sell_prices, item_demands = read_data()" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "calendar.head()" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "sell_prices.head()" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "item_demands.head()" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "foodproduct = item_demands[item_demands['cat_id']=='FOODS']\n", 162 | "target_product = foodproduct['item_id'].unique()\n", 163 | "\n", 164 | "len(target_product)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "foodproduct = item_demands[item_demands['cat_id']=='FOODS'][:300]\n", 174 | "target_product = foodproduct['item_id'].unique()\n", 175 | "item_demands = item_demands[(item_demands['item_id'].isin(target_product))]\n", 176 | "sell_prices = sell_prices[(sell_prices['item_id'].isin(target_product))]" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "def melt_item_demands(item_demands):\n", 186 | " id_vars = ['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']\n", 187 | " value_vars = item_demands.columns[6:]\n", 188 | " item_demands = pd.melt(item_demands, id_vars = ['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'], value_vars=value_vars, var_name = 'day', value_name = 'demand')\n", 189 | " item_demands = reduce_mem_usage(item_demands)\n", 190 | " return item_demands" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "item_demands_melt = melt_item_demands(item_demands)\n", 200 | "item_demands_melt.head()" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "data = pd.merge(item_demands_melt, calendar, how = 'left', left_on = ['day'], right_on = ['d'])\n", 210 | "data.drop(['d', 'day'], inplace = True, axis = 1)\n", 211 | "data.head()" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [ 220 | "data = data.merge(sell_prices, on = ['store_id', 'item_id', 'wm_yr_wk'], how = 'left')\n", 221 | "data.head()" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "from sklearn.preprocessing import OrdinalEncoder\n", 231 | "encoder = OrdinalEncoder()\n", 232 | "def transform(data):\n", 233 | " nan_features = ['event_name_1', 'event_type_1', 'event_name_2', 'event_type_2']\n", 234 | " \n", 235 | " for feature in nan_features: \n", 236 | " data[feature].fillna('unknown', inplace = True)\n", 237 | " values = [[v] for v in data[feature].unique()]\n", 238 | " transformed = encoder.fit_transform(values)\n", 239 | " trans_dict = {} \n", 240 | " for v,t in zip(values, transformed): \n", 241 | " trans_dict[v[0]] = t[0]\n", 242 | " data[feature] = data[feature].apply(lambda x: int(trans_dict[x])) \n", 243 | " \n", 244 | " return data\n", 245 | "\n" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [ 254 | "data['event_name_1'].unique()" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "transformed = transform(data)\n", 264 | "transformed.head()\n", 265 | "transformed.date.max()" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "item_cols = ['item_id', 'dept_id', 'cat_id']\n", 275 | "item_meta = transformed[item_cols].drop_duplicates()\n", 276 | "item_meta.to_csv('item_meta.csv', index=False)" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "demands_cols = ['item_id', 'date', 'demand', 'store_id', 'state_id']\n", 286 | "\n", 287 | "demands = transformed[demands_cols]\n", 288 | "demands = demands.rename(columns={\"date\":\"timestamp\"})" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "demands.to_csv('demands.csv', index=False)" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "related_time_series_cols = ['item_id','date','store_id','state_id','event_type_1','event_type_2', 'snap_CA', 'snap_TX', 'snap_WI', 'sell_price']\n", 307 | "\n", 308 | "related_ts = transformed[related_time_series_cols]\n", 309 | "related_ts = related_ts.rename(columns={\"date\":\"timestamp\"})\n" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "related_ts.to_csv('related_ts.csv', index=False)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [] 327 | } 328 | ], 329 | "metadata": { 330 | "kernelspec": { 331 | "display_name": "conda_python3", 332 | "language": "python", 333 | "name": "conda_python3" 334 | }, 335 | "language_info": { 336 | "codemirror_mode": { 337 | "name": "ipython", 338 | "version": 3 339 | }, 340 | "file_extension": ".py", 341 | "mimetype": "text/x-python", 342 | "name": "python", 343 | "nbconvert_exporter": "python", 344 | "pygments_lexer": "ipython3", 345 | "version": "3.6.13" 346 | } 347 | }, 348 | "nbformat": 4, 349 | "nbformat_minor": 4 350 | } 351 | -------------------------------------------------------------------------------- /3.Evaluating_Your_Predictor.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Evaluating Your Forecast\n", 8 | "\n", 9 | "So far you have prepared your data, and generated your first Forecast. Now is the time to pull down the predictions from this Predictor, and compare them to the actual observed values. This will let us know the impact of accuracy based on the Forecast.\n", 10 | "\n", 11 | "You can extend the approaches here to compare multiple models or predictors and to determine the impact of improved accuracy on your use case.\n", 12 | "\n", 13 | "Overview:\n", 14 | "\n", 15 | "* Setup\n", 16 | "* Obtaining a Prediction\n", 17 | "* Plotting the Actual Results\n", 18 | "* Plotting the Prediction\n", 19 | "* Comparing the Prediction to Actual Results" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "## Setup" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "Import the standard Python Libraries that are used in this lesson." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 1, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "import json\n", 43 | "import time\n", 44 | "import dateutil.parser\n", 45 | "\n", 46 | "import boto3\n", 47 | "import pandas as pd" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "The line below will retrieve your shared variables from the earlier notebooks." 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 2, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "%store -r" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "Once again connect to the Forecast APIs via the SDK." 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "session = boto3.Session(region_name=region) \n", 80 | "forecast = session.client(service_name='forecast') \n", 81 | "forecastquery = session.client(service_name='forecastquery')" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "## Obtaining a Prediction:\n", 89 | "\n", 90 | "Now that your predictor is active we will query it to get a prediction that will be plotted later." 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 4, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "# worst 10 \n", 100 | "# 971 TWS0850902\n", 101 | "# 659 TWS0926200\n", 102 | "# 178 TWS0926300\n", 103 | "# 170 TWS0855800\n", 104 | "# 126 TWS1126600\n", 105 | "# 701 TWS4416500\n", 106 | "# 302 TWS3602700\n", 107 | "# 794 TWS0548200\n", 108 | "# 758 TWS1243100\n", 109 | "# 1110 TWS4670200\n", 110 | "# best 10 \n", 111 | "\n", 112 | "\n", 113 | "item_id = \"TWS3802300\"" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 5, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "\n", 123 | "\n", 124 | "forecastResponse1 = forecastquery.query_forecast(\n", 125 | " ForecastArn=forecast_arn,\n", 126 | " Filters={\"item_id\":str(item_id)},\n", 127 | ")" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 6, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "{'Forecast': {'Predictions': {'p10': [{'Timestamp': '2022-02-01T00:00:00',\n", 139 | " 'Value': 0.0},\n", 140 | " {'Timestamp': '2022-03-01T00:00:00', 'Value': 0.0},\n", 141 | " {'Timestamp': '2022-04-01T00:00:00', 'Value': 0.0}],\n", 142 | " 'p50': [{'Timestamp': '2022-02-01T00:00:00', 'Value': 0.0},\n", 143 | " {'Timestamp': '2022-03-01T00:00:00', 'Value': 45.0},\n", 144 | " {'Timestamp': '2022-04-01T00:00:00', 'Value': 45.0}],\n", 145 | " 'p90': [{'Timestamp': '2022-02-01T00:00:00', 'Value': 59.0},\n", 146 | " {'Timestamp': '2022-03-01T00:00:00', 'Value': 78.0},\n", 147 | " {'Timestamp': '2022-04-01T00:00:00', 'Value': 78.0}]}},\n", 148 | " 'ResponseMetadata': {'RequestId': 'a1553081-cb60-4304-b9cb-b29daa46e8ac',\n", 149 | " 'HTTPStatusCode': 200,\n", 150 | " 'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',\n", 151 | " 'date': 'Wed, 12 Jan 2022 02:36:22 GMT',\n", 152 | " 'x-amzn-requestid': 'a1553081-cb60-4304-b9cb-b29daa46e8ac',\n", 153 | " 'content-length': '491',\n", 154 | " 'connection': 'keep-alive'},\n", 155 | " 'RetryAttempts': 0}}" 156 | ] 157 | }, 158 | "execution_count": 6, 159 | "metadata": {}, 160 | "output_type": "execute_result" 161 | } 162 | ], 163 | "source": [ 164 | "forecastResponse1" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "## Plotting the Prediction:\n", 172 | "\n", 173 | "Next we need to convert the JSON response from the Predictor to a dataframe that we can plot." 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 7, 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "data": { 183 | "text/html": [ 184 | "
\n", 185 | "\n", 198 | "\n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | "
TimestampValue
02022-02-01T00:00:000.0
12022-03-01T00:00:000.0
22022-04-01T00:00:000.0
\n", 224 | "
" 225 | ], 226 | "text/plain": [ 227 | " Timestamp Value\n", 228 | "0 2022-02-01T00:00:00 0.0\n", 229 | "1 2022-03-01T00:00:00 0.0\n", 230 | "2 2022-04-01T00:00:00 0.0" 231 | ] 232 | }, 233 | "execution_count": 7, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "# Generate DF \n", 240 | "prediction_df_p10 = pd.DataFrame.from_dict(forecastResponse1['Forecast']['Predictions']['p10'])\n", 241 | "prediction_df_p10.head()" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 8, 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "data": { 251 | "text/plain": [ 252 | "" 253 | ] 254 | }, 255 | "execution_count": 8, 256 | "metadata": {}, 257 | "output_type": "execute_result" 258 | }, 259 | { 260 | "data": { 261 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD4CAYAAADhNOGaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAARiElEQVR4nO3db4xd9X3n8fenxsgpuMEYQhzsdNytH8ReEYEGiy1RRUOT2JTErBIpHm0bCCtZaRaJJtvtQiMV9RlSoxah0CCrixS0SS2UltZFJg6klSolIctAwYnt0kwRhVm74LqrmJYQ4vDdB3NNh+kdzx3fe+fi+b1f0mju+f055ztHP/sz59x756aqkCS166dGXYAkabQMAklqnEEgSY0zCCSpcQaBJDXunFEXcCYuuuiiGhsbG3UZknRWeeKJJ/6pqi6e235WBsHY2BiTk5OjLkOSzipJ/qFbu7eGJKlxBoEkNc4gkKTGnZXPEUjSQn784x8zPT3Nq6++OupSltyqVatYv349K1eu7Gm8QSBpWZqenmb16tWMjY2RZNTlLJmq4vjx40xPT7Nx48ae5nhrSNKy9Oqrr7J27dqmQgAgCWvXrl3UlZBBIGnZai0ETlnsz20QSFLjDAJJGoJrrrmG/fv3v6ntrrvu4tOf/vS840f1RlmDQJKGYGJigj179rypbc+ePUxMTIyoovkZBJI0BB/72Md46KGH+NGPfgTAc889x5EjR/jKV77C+Pg4W7Zs4Y477ug69/zzz3/j8Ve/+lVuuukmAI4dO8ZHP/pRrrzySq688kq++c1vDqRWXz4qadn73b84yKEjJwa6z83v+hnu+PCWefvXrl3L1q1b+drXvsaOHTvYs2cPH//4x7n99tu58MIL+clPfsK1117LgQMHuOyyy3o65q233spnPvMZ3ve+9/H888/zoQ99iMOHD/f9sxgEkjQkp24PnQqC++67jwceeIDdu3dz8uRJjh49yqFDh3oOgkcffZRDhw69sX3ixAlefvllVq9e3VedBoGkZe90v7kP0w033MBnP/tZnnzySX74wx+yZs0aPv/5z/P444+zZs0abrrppq6v95/98s/Z/a+//jrf/va3edvb3jbQOn2OQJKG5Pzzz+eaa67h5ptvZmJighMnTnDeeefx9re/nRdffJGHH36467xLLrmEw4cP8/rrr/Pggw++0f7BD36QL3zhC29sP/XUUwOp0yCQpCGamJjg6aefZufOnbz3ve/l8ssvZ8uWLdx8881cffXVXefceeedXH/99bz//e9n3bp1b7TffffdTE5Octlll7F582buvffegdSYqhrIjpbS+Ph4+cE0kk7n8OHDvOc97xl1GSPT7edP8kRVjc8d6xWBJDXOIJCkxhkEkpats/HW9yAs9uc2CCQtS6tWreL48ePNhcGpzyNYtWpVz3N8H4GkZWn9+vVMT09z7NixUZey5E59QlmvDAJJy9LKlSt7/oSu1nlrSJIaZxBIUuMGEgRJtiV5JslUktu69CfJ3Z3+A0mumNO/IsnfJHloEPVIknrXdxAkWQHcA2wHNgMTSTbPGbYd2NT52gV8cU7/rUD/f0tVkrRog7gi2ApMVdWzVfUasAfYMWfMDuD+mvEYcEGSdQBJ1gO/AvzRAGqRJC3SIILgUuCFWdvTnbZex9wF/Bbw+ukOkmRXkskkky2+HEyShmUQQZAubXPfwdF1TJLrgZeq6omFDlJVu6tqvKrGL7744jOpU5LUxSCCYBrYMGt7PXCkxzFXAx9J8hwzt5Ten+R/D6AmSVKPBhEEjwObkmxMci6wE9g7Z8xe4BOdVw9dBfygqo5W1e1Vtb6qxjrz/rKqfnUANUmSetT3O4ur6mSSW4D9wArgvqo6mORTnf57gX3AdcAU8ArwyX6PK0kaDD+YRpIa4QfTSJK6MggkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkho3kCBIsi3JM0mmktzWpT9J7u70H0hyRad9Q5K/SnI4ycEktw6iHklS7/oOgiQrgHuA7cBmYCLJ5jnDtgObOl+7gC922k8C/72q3gNcBfy3LnMlSUM0iCuCrcBUVT1bVa8Be4Adc8bsAO6vGY8BFyRZV1VHq+pJgKp6GTgMXDqAmiRJPRpEEFwKvDBre5p//5/5gmOSjAGXA98ZQE2SpB4NIgjSpa0WMybJ+cCfAL9RVSe6HiTZlWQyyeSxY8fOuFhJ0psNIgimgQ2zttcDR3odk2QlMyHw5ar60/kOUlW7q2q8qsYvvvjiAZQtSYLBBMHjwKYkG5OcC+wE9s4Zsxf4ROfVQ1cBP6iqo0kC/C/gcFX9/gBqkSQt0jn97qCqTia5BdgPrADuq6qDST7V6b8X2AdcB0wBrwCf7Ey/Gvg14LtJnuq0/XZV7eu3LklSb1I193b+W9/4+HhNTk6OugxJOqskeaKqxue2+85iSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaN5AgSLItyTNJppLc1qU/Se7u9B9IckWvcyVJw9V3ECRZAdwDbAc2AxNJNs8Zth3Y1PnaBXxxEXMlSUN0zgD2sRWYqqpnAZLsAXYAh2aN2QHcX1UFPJbkgiTrgLEe5g7M7/7FQQ4dOTGMXUvSktj8rp/hjg9vGeg+B3Fr6FLghVnb0522Xsb0MheAJLuSTCaZPHbsWN9FS5JmDOKKIF3aqscxvcydaazaDewGGB8f7zpmIYNOUUlaDgYRBNPAhlnb64EjPY45t4e5kqQhGsStoceBTUk2JjkX2AnsnTNmL/CJzquHrgJ+UFVHe5wrSRqivq8IqupkkluA/cAK4L6qOpjkU53+e4F9wHXAFPAK8MnTze23JklS7zLzQp6zy/j4eE1OTo66DEk6qyR5oqrG57b7zmJJapxBIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1ziCQpMYZBJLUuL6CIMmFSR5J8v3O9zXzjNuW5JkkU0lum9X+e0n+NsmBJA8muaCfeiRJi9fvFcFtwDeqahPwjc72myRZAdwDbAc2AxNJNne6HwH+Y1VdBvwdcHuf9UiSFqnfINgBfKnz+EvADV3GbAWmqurZqnoN2NOZR1V9vapOdsY9Bqzvsx5J0iL1GwSXVNVRgM73d3QZcynwwqzt6U7bXDcDD/dZjyRpkc5ZaECSR4F3dun6XI/HSJe2mnOMzwEngS+fpo5dwC6Ad7/73T0eWpK0kAWDoKp+eb6+JC8mWVdVR5OsA17qMmwa2DBrez1wZNY+bgSuB66tqmIeVbUb2A0wPj4+7zhJ0uL0e2toL3Bj5/GNwJ93GfM4sCnJxiTnAjs780iyDfifwEeq6pU+a5EknYF+g+BO4ANJvg98oLNNkncl2QfQeTL4FmA/cBh4oKoOduZ/AVgNPJLkqST39lmPJGmRFrw1dDpVdRy4tkv7EeC6Wdv7gH1dxv18P8eXJPXPdxZLUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktS4voIgyYVJHkny/c73NfOM25bkmSRTSW7r0v+bSSrJRf3UI0lavH6vCG4DvlFVm4BvdLbfJMkK4B5gO7AZmEiyeVb/BuADwPN91iJJOgP9BsEO4Eudx18CbugyZiswVVXPVtVrwJ7OvFP+APgtoPqsRZJ0BvoNgkuq6ihA5/s7uoy5FHhh1vZ0p40kHwH+b1U9vdCBkuxKMplk8tixY32WLUk65ZyFBiR5FHhnl67P9XiMdGmrJD/d2ccHe9lJVe0GdgOMj4979SBJA7JgEFTVL8/Xl+TFJOuq6miSdcBLXYZNAxtmba8HjgD/AdgIPJ3kVPuTSbZW1T8u4meQJPWh31tDe4EbO49vBP68y5jHgU1JNiY5F9gJ7K2q71bVO6pqrKrGmAmMKwwBSVpa/QbBncAHknyfmVf+3AmQ5F1J9gFU1UngFmA/cBh4oKoO9nlcSdKALHhr6HSq6jhwbZf2I8B1s7b3AfsW2NdYP7VIks6M7yyWpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1LlU16hoWLckx4B/OcPpFwD8NsJxBsa7Fsa7Fsa7FeavWBf3V9rNVdfHcxrMyCPqRZLKqxkddx1zWtTjWtTjWtThv1bpgOLV5a0iSGmcQSFLjWgyC3aMuYB7WtTjWtTjWtThv1bpgCLU19xyBJOnNWrwikCTNYhBIUuOWVRAk2ZbkmSRTSW7r0p8kd3f6DyS5ote5Q67rv3TqOZDkW0neO6vvuSTfTfJUksklruuaJD/oHPupJL/T69wh1/U/ZtX0vSQ/SXJhp28o5yvJfUleSvK9efpHtbYWqmtUa2uhuka1thaqa8nXVmffG5L8VZLDSQ4mubXLmOGtsapaFl/ACuDvgZ8DzgWeBjbPGXMd8DAQ4CrgO73OHXJdvwCs6TzefqquzvZzwEUjOl/XAA+dydxh1jVn/IeBv1yC8/WLwBXA9+bpX/K11WNdS762eqxryddWL3WNYm119r0OuKLzeDXwd0v5/9dyuiLYCkxV1bNV9RqwB9gxZ8wO4P6a8RhwQZJ1Pc4dWl1V9a2q+n+dzceA9QM6dl91DWnuoPc9AfzxgI49r6r6a+CfTzNkFGtrwbpGtLZ6OV/zGen5mmNJ1hZAVR2tqic7j18GDgOXzhk2tDW2nILgUuCFWdvT/PsTOd+YXuYOs67Z/iszqX9KAV9P8kSSXQOqaTF1/ackTyd5OMmWRc4dZl0k+WlgG/Ans5qHdb4WMoq1tVhLtbZ6tdRrq2ejXFtJxoDLge/M6RraGjtn0VW+daVL29zXxs43ppe5Z6rnfSf5JWb+sb5vVvPVVXUkyTuAR5L8bee3mqWo60lm/jbJvyS5DvgzYFOPc4dZ1ykfBr5ZVbN/wxvW+VrIKNZWz5Z4bfViFGtrMUaytpKcz0z4/EZVnZjb3WXKQNbYcroimAY2zNpeDxzpcUwvc4dZF0kuA/4I2FFVx0+1V9WRzveXgAeZuQxckrqq6kRV/Uvn8T5gZZKLepk7zLpm2cmcS/chnq+FjGJt9WQEa2tBI1pbi7HkayvJSmZC4MtV9addhgxvjQ3jiY9RfDFzdfMssJF/e8Jky5wxv8Kbn2z5P73OHXJd7wamgF+Y034esHrW428B25awrnfyb2863Ao83zl3Iz1fnXFvZ+Ze73lLcb46+xxj/ic/l3xt9VjXkq+tHuta8rXVS10jXFsB7gfuOs2Yoa2xZXNrqKpOJrkF2M/Ms+j3VdXBJJ/q9N8L7GPmmfcp4BXgk6ebu4R1/Q6wFvjDJAAna+avC14CPNhpOwf4SlV9bQnr+hjw60lOAj8EdtbMyhv1+QL4z8DXq+pfZ00f2vlK8sfMvNLloiTTwB3Aylk1Lfna6rGuJV9bPda15Gurx7pgiddWx9XArwHfTfJUp+23mQnyoa8x/8SEJDVuOT1HIEk6AwaBJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJatz/B2QD02wvTl1LAAAAAElFTkSuQmCC\n", 262 | "text/plain": [ 263 | "
" 264 | ] 265 | }, 266 | "metadata": { 267 | "needs_background": "light" 268 | }, 269 | "output_type": "display_data" 270 | } 271 | ], 272 | "source": [ 273 | "# Plot\n", 274 | "prediction_df_p10.plot()" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "The above merely did the p10 values, now do the same for p50 and p90." 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 9, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "prediction_df_p50 = pd.DataFrame.from_dict(forecastResponse1['Forecast']['Predictions']['p50'])\n", 291 | "prediction_df_p90 = pd.DataFrame.from_dict(forecastResponse1['Forecast']['Predictions']['p90'])" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": {}, 297 | "source": [ 298 | "## Comparing the Prediction to Actual Results\n", 299 | "\n", 300 | "After obtaining the dataframes the next task is to plot them together to determine the best fit." 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 10, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": [ 309 | "# We start by creating a dataframe to house our content, here source will be which dataframe it came from\n", 310 | "results_df = pd.DataFrame(columns=['timestamp', 'value', 'source'])" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "Import the observed values into the dataframe:" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 11, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "# for index, row in gt_df.iterrows():\n", 327 | "# clean_timestamp = dateutil.parser.parse(row['timestamp'])\n", 328 | "# results_df = results_df.append({'timestamp' : clean_timestamp , 'value' : row['demand'], 'source': 'actual'} , ignore_index=True)" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 12, 334 | "metadata": {}, 335 | "outputs": [], 336 | "source": [ 337 | "# To show the new dataframe\n", 338 | "# results_df.head()" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 13, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "# Now add the P10, P50, and P90 Values\n", 348 | "for index, row in prediction_df_p10.iterrows():\n", 349 | " clean_timestamp = dateutil.parser.parse(row['Timestamp'])\n", 350 | " results_df = results_df.append({'timestamp' : clean_timestamp , 'value' : row['Value'], 'source': 'p10'} , ignore_index=True)\n", 351 | "for index, row in prediction_df_p50.iterrows():\n", 352 | " clean_timestamp = dateutil.parser.parse(row['Timestamp'])\n", 353 | " results_df = results_df.append({'timestamp' : clean_timestamp , 'value' : row['Value'], 'source': 'p50'} , ignore_index=True)\n", 354 | "for index, row in prediction_df_p90.iterrows():\n", 355 | " clean_timestamp = dateutil.parser.parse(row['Timestamp'])\n", 356 | " results_df = results_df.append({'timestamp' : clean_timestamp , 'value' : row['Value'], 'source': 'p90'} , ignore_index=True)" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 14, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "results_df = results_df.fillna(0)" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 15, 371 | "metadata": {}, 372 | "outputs": [], 373 | "source": [ 374 | "results_df = results_df.drop_duplicates()" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 16, 380 | "metadata": {}, 381 | "outputs": [ 382 | { 383 | "data": { 384 | "image/png": "\n", 385 | "text/plain": [ 386 | "
" 387 | ] 388 | }, 389 | "metadata": { 390 | "needs_background": "light" 391 | }, 392 | "output_type": "display_data" 393 | } 394 | ], 395 | "source": [ 396 | "import matplotlib.pyplot as plt\n", 397 | "for t, c in [('p10', 'orange'), ('p50', 'red'), ('p90', 'blue')]:\n", 398 | " c_df = results_df[results_df['source']==t]\n", 399 | " c_df = c_df.sort_values(by='timestamp')\n", 400 | " plt.plot(c_df['timestamp'], c_df['value'], 'o-', color= c, label=t)" 401 | ] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "metadata": {}, 406 | "source": [ 407 | "Once you are done exploring this Forecast you can cleanup all the work that was done by executing the cells inside `Cleanup.ipynb` within this folder." 408 | ] 409 | } 410 | ], 411 | "metadata": { 412 | "kernelspec": { 413 | "display_name": "conda_python3", 414 | "language": "python", 415 | "name": "conda_python3" 416 | }, 417 | "language_info": { 418 | "codemirror_mode": { 419 | "name": "ipython", 420 | "version": 3 421 | }, 422 | "file_extension": ".py", 423 | "mimetype": "text/x-python", 424 | "name": "python", 425 | "nbconvert_exporter": "python", 426 | "pygments_lexer": "ipython3", 427 | "version": "3.6.13" 428 | } 429 | }, 430 | "nbformat": 4, 431 | "nbformat_minor": 4 432 | } 433 | -------------------------------------------------------------------------------- /2.Building_Your_Predictor.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Building Your Predictor\n", 8 | "\n", 9 | "Forecasting is used in a variety of applications and business use cases: For example, retailers need to forecast the sales of their products to decide how much stock they need by location, Manufacturers need to estimate the number of parts required at their factories to optimize their supply chain, Businesses need to estimate their flexible workforce needs, Utilities need to forecast electricity consumption needs in order to attain an efficient energy network, and enterprises need to estimate their cloud infrastructure needs.\n", 10 | "\n", 11 | "\n", 12 | "\n", 13 | "In this notebook we will be walking through the steps outlined in 2nd-through-4th boxes above to build and query your first forecast.\n", 14 | "\n", 15 | "\n", 16 | "## Table Of Contents\n", 17 | "* Step 1: [Setup Amazon Forecast](#setup)\n", 18 | "* Step 2: [Create a Predictor](#createPredictor)\n", 19 | "* Step 3: [Get Predictor Error Metrics from Backtesting](#predictorErrors)\n", 20 | "* Step 4: [Create a Forecast](#createForecast)\n", 21 | "* Step 5: [Query the Forecast](#queryForecast)\n", 22 | "* [Next Steps](#nextSteps)\n", 23 | "\n", 24 | "For more informations about APIs, please check the [documentation](https://docs.aws.amazon.com/forecast/latest/dg/what-is-forecast.html)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "## Step 1: Setup Amazon Forecast\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "This section sets up the permissions and relevant endpoints." 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "import sys\n", 48 | "import os\n", 49 | "import time\n", 50 | "import pandas as pd\n", 51 | "\n", 52 | "# importing forecast notebook utility from notebooks/common directory\n", 53 | "sys.path.insert( 0, os.path.abspath(\"./common\") )\n", 54 | "import util\n", 55 | "\n", 56 | "%reload_ext autoreload\n", 57 | "import boto3\n", 58 | "import s3fs" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "The line below will retrieve your stored variables from the first notebook." 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "%store -r\n", 75 | "\n", 76 | "# Print your choices from first notebook\n", 77 | "# print(f\"item_id = {item_id}\")\n", 78 | "print(f\"project = {PROJECT}\")\n", 79 | "print(f\"data_version = {DATA_VERSION}\")\n", 80 | "print(f\"Forecast length = {FORECAST_LENGTH}\")\n", 81 | "print(f\"Dataset frequency = {DATASET_FREQUENCY}\")\n", 82 | "print(f\"Timestamp format = {TIMESTAMP_FORMAT}\")\n", 83 | "print(f\"dataset_group_arn = {dataset_group_arn}\")\n", 84 | "print(f\"role_arn = {role_arn}\")\n", 85 | "%store -r bucket_name\n", 86 | "print(f\"bucket_name = {bucket_name}\")\n", 87 | "%store -r region\n", 88 | "print(f\"region = {region}\")" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "The last part of the setup process is to validate that your account can communicate with Amazon Forecast, the cell below does just that." 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "# Connect API session\n", 105 | "session = boto3.Session(region_name=region) \n", 106 | "forecast = session.client(service_name='forecast') \n", 107 | "forecastquery = session.client(service_name='forecastquery')" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "## Step 2: Create a Predictor \n", 115 | "\n", 116 | "Once the datasets are specified with the corresponding schema, Amazon Forecast will automatically aggregate, at the specified time granularity, all the relevant pieces of information for each item, such as sales, price, promotions, as well as categorical attributes, and generate the desired dataset. Next, one can choose an algorithm (forecasting model) and evaluate how well this particular algorithm works on this dataset. The following graph gives a high-level overview of the forecasting models.\n", 117 | "\n", 118 | "\n", 119 | "\n", 120 | "\n", 121 | "\n", 122 | "Amazon Forecast provides several state-of-the-art forecasting algorithms including classic forecasting methods such as ETS, ARIMA, Prophet and deep learning approaches such as DeepAR+. Classical forecasting methods, such as Autoregressive Integrated Moving Average (ARIMA) or Exponential Smoothing (ETS), fit a single model to each individual time series, and then use that model to extrapolate the time series into the future. Amazon's Non-Parametric Time Series (NPTS) forecaster also fits a single model to each individual time series. Unlike the naive or seasonal naive forecasters that use a fixed time index (the previous index $T-1$ or the past season $T - \\tau$) as the prediction for time step $T$, NPTS randomly samples a time index $t \\in \\{0, \\dots T-1\\}$ in the past to generate a sample for the current time step $T$.\n", 123 | "\n", 124 | "In many applications, you may encounter many similar time series across a set of cross-sectional units. Examples of such time series groupings are demand for different products, server loads, and requests for web pages. In this case, it can be beneficial to train a single model jointly over all of these time series. CNN-QR and DeepAR+ take this approach, outperforming the standard ARIMA and ETS methods when your dataset contains hundreds of related time series. The trained model can also be used for generating forecasts for new time series that are similar to the ones it has been trained on. \n", 125 | "\n", 126 | "While deep learning approaches can outperform standard methods, this is only possible when there is sufficient data available for training. It is not true for example when one trains a neural network with a time-series containing only a few dozen observations. Amazon Forecast provides the best of two worlds allowing users to either choose a specific algorithm or let Amazon Forecast automatically perform model selection. \n", 127 | "\n", 128 | "\n", 129 | "## How to evaluate a forecasting model?\n", 130 | "\n", 131 | "Before moving forward, let's first introduce the notion of *backtest* when evaluating forecasting models. The key difference between evaluating forecasting algorithms and standard ML applications is that we need to make sure there is no future information gets used in the past. In other words, the procedure needs to be causal. \n", 132 | "\n", 133 | "\n", 134 | "\n" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "# Which algorithm do you want to use? Choices are:\n", 144 | "# 1. Choose PerformAutoML=True if you want to let Amazon Forecast choose a recipe automatically. \n", 145 | "# 2. If you know which recipe you want, the next level of automation is PerformHPO=True.\n", 146 | "# 3. Finally, you can specify exactly which recipe and enter your own hyperparameter values\n", 147 | "# https://docs.aws.amazon.com/forecast/latest/dg/aws-forecast-choosing-recipes.html\n", 148 | "\n", 149 | "algorithm_arn_prefix = 'arn:aws:forecast:::algorithm/'\n", 150 | "algorithm = 'CNN-QR'\n", 151 | "algorithm_arn = algorithm_arn_prefix + algorithm\n", 152 | "predictor = f\"{PROJECT}_{DATA_VERSION}\"\n", 153 | "print(f\"Predictor Name = {predictor}\")" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "create_predictor_response = \\\n", 163 | " forecast.create_auto_predictor(PredictorName=predictor, \n", 164 | " ForecastHorizon=FORECAST_LENGTH,\n", 165 | " ForecastFrequency=DATASET_FREQUENCY, \n", 166 | " OptimizationMetric='RMSE',\n", 167 | " ExplainPredictor=True, \n", 168 | " ForecastDimensions=['store_id', 'state_id'],\n", 169 | " DataConfig={\n", 170 | " 'DatasetGroupArn': dataset_group_arn,\n", 171 | " 'AttributeConfigs': [\n", 172 | " {\n", 173 | " 'AttributeName': \"demand\",\n", 174 | " 'Transformations': {\n", 175 | " \"aggregation\": \"sum\",\n", 176 | " \"backfill\": \"zero\",\n", 177 | " \"frontfill\": \"none\",\n", 178 | " \"middlefill\": \"zero\"\n", 179 | " }\n", 180 | " },\n", 181 | " \n", 182 | "# {\n", 183 | "# 'AttributeName': \"rolling_mean_t4\",\n", 184 | "# 'Transformations': {\n", 185 | "# \"backfill\": \"mean\",\n", 186 | "# \"futurefill\": \"mean\",\n", 187 | "# \"middlefill\": \"mean\"\n", 188 | "# }\n", 189 | "# },\n", 190 | "# {\n", 191 | "# 'AttributeName': \"rolling_std_t4\",\n", 192 | "# 'Transformations': {\n", 193 | "# \"backfill\": \"mean\",\n", 194 | "# \"futurefill\": \"mean\",\n", 195 | "# \"middlefill\": \"mean\"\n", 196 | "# }\n", 197 | "# },\n", 198 | "# {\n", 199 | "# 'AttributeName': \"rolling_mean_t12\",\n", 200 | "# 'Transformations': {\n", 201 | "# \"backfill\": \"mean\",\n", 202 | "# \"futurefill\": \"mean\",\n", 203 | "# \"middlefill\": \"mean\"\n", 204 | "# }\n", 205 | "# },\n", 206 | "# {\n", 207 | "# 'AttributeName': \"rolling_mean_t24\",\n", 208 | "# 'Transformations': {\n", 209 | "# \"backfill\": \"mean\",\n", 210 | "# \"futurefill\": \"mean\",\n", 211 | "# \"middlefill\": \"mean\"\n", 212 | "# }\n", 213 | "# },\n", 214 | " ]\n", 215 | " \n", 216 | " }\n", 217 | " ) \n", 218 | "\n", 219 | " \n" 220 | ] 221 | }, 222 | { 223 | "cell_type": "raw", 224 | "metadata": {}, 225 | "source": [ 226 | "create_predictor_response = \\\n", 227 | "\n", 228 | " forecast.create_predictor(PredictorName=predictor,\n", 229 | "# AlgorithmArn=algorithm,\n", 230 | " ForecastHorizon=FORECAST_LENGTH,\n", 231 | " PerformAutoML=True,\n", 232 | " OptimizationMetric='RMSE',\n", 233 | "\n", 234 | "# PerformHPO=True,\n", 235 | " InputDataConfig= {\"DatasetGroupArn\": dataset_group_arn},\n", 236 | " EvaluationParameters = { \n", 237 | " \"NumberOfBacktestWindows\": 3\n", 238 | " },\n", 239 | " FeaturizationConfig= {\"ForecastFrequency\": DATASET_FREQUENCY, \n", 240 | " 'Featurizations': \n", 241 | " [\n", 242 | " {\n", 243 | " \"AttributeName\": \"demand\",\n", 244 | " \"FeaturizationPipeline\": [\n", 245 | " {\n", 246 | " \"FeaturizationMethodName\": \"filling\",\n", 247 | " \"FeaturizationMethodParameters\": {\n", 248 | " \"aggregation\": \"sum\",\n", 249 | " \"backfill\": \"zero\",\n", 250 | " \"frontfill\": \"none\",\n", 251 | " \"middlefill\": \"zero\"\n", 252 | " }\n", 253 | " }\n", 254 | " ]\n", 255 | " },\n", 256 | " \n", 257 | " \n", 258 | " {\n", 259 | " \"AttributeName\": \"rolling_mean_t4\",\n", 260 | " \"FeaturizationPipeline\": [\n", 261 | " {\n", 262 | " \"FeaturizationMethodName\": \"filling\",\n", 263 | " \"FeaturizationMethodParameters\": {\n", 264 | " \"backfill\": \"mean\",\n", 265 | " \"futurefill\": \"mean\",\n", 266 | " \"middlefill\": \"mean\"\n", 267 | " }\n", 268 | " }\n", 269 | " ]\n", 270 | " }, \n", 271 | " {\n", 272 | " \"AttributeName\": \"rolling_std_t4\",\n", 273 | " \"FeaturizationPipeline\": [\n", 274 | " {\n", 275 | " \"FeaturizationMethodName\": \"filling\",\n", 276 | " \"FeaturizationMethodParameters\": {\n", 277 | " \"backfill\": \"mean\",\n", 278 | " \"futurefill\": \"mean\",\n", 279 | " \"middlefill\": \"mean\"\n", 280 | " }\n", 281 | " }\n", 282 | " ]\n", 283 | " },\n", 284 | " {\n", 285 | " \"AttributeName\": \"rolling_mean_t12\",\n", 286 | " \"FeaturizationPipeline\": [\n", 287 | " {\n", 288 | " \"FeaturizationMethodName\": \"filling\",\n", 289 | " \"FeaturizationMethodParameters\": {\n", 290 | " \"backfill\": \"mean\",\n", 291 | " \"futurefill\": \"mean\",\n", 292 | " \"middlefill\": \"mean\"\n", 293 | " }\n", 294 | " }\n", 295 | " ]\n", 296 | " }, \n", 297 | " {\n", 298 | " \"AttributeName\": \"rolling_mean_t24\",\n", 299 | " \"FeaturizationPipeline\": [\n", 300 | " {\n", 301 | " \"FeaturizationMethodName\": \"filling\",\n", 302 | " \"FeaturizationMethodParameters\": {\n", 303 | " \"backfill\": \"mean\",\n", 304 | " \"futurefill\": \"mean\",\n", 305 | " \"middlefill\": \"mean\"\n", 306 | " }\n", 307 | " }\n", 308 | " ]\n", 309 | " }\n", 310 | " \n", 311 | " \n", 312 | " ]\n", 313 | "\n", 314 | " }\n", 315 | " )" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "predictor_arn = create_predictor_response['PredictorArn']" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "Check the status of the predictor. When the status change from **CREATE_IN_PROGRESS** to **ACTIVE**, we can continue to next steps. Depending on data size, model selection and choice of hyper parameters tuning,it can take several hours to be **ACTIVE**." 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": { 338 | "scrolled": true 339 | }, 340 | "outputs": [], 341 | "source": [ 342 | "# status = util.wait(lambda: forecast.describe_predictor(PredictorArn=predictor_arn))\n", 343 | "status = util.wait(lambda: forecast.describe_auto_predictor(PredictorArn=predictor_arn))\n", 344 | "assert status" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": null, 350 | "metadata": {}, 351 | "outputs": [], 352 | "source": [ 353 | "# forecast.describe_predictor(PredictorArn=predictor_arn)\n", 354 | "forecast.describe_auto_predictor(PredictorArn=predictor_arn)" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": null, 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [ 363 | "%store predictor_arn\n", 364 | "%store bucket_name\n", 365 | "%store region " 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "## Step 3. Get Predictor Error Metrics from Backtesting \n", 373 | "\n", 374 | "After creating the predictors, we can query the forecast accuracy given by the backtest scenario and have a quantitative understanding of the performance of the algorithm. Such a process is iterative in nature during model development. When an algorithm with satisfying performance is found, the customer can deploy the predictor into a production environment, and query the forecasts for a particular item to make business decisions. The figure below shows a sample plot of different quantile forecasts of a predictor." 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [ 383 | "error_metrics = forecast.get_accuracy_metrics(PredictorArn=predictor_arn)\n", 384 | "error_metrics" 385 | ] 386 | }, 387 | { 388 | "cell_type": "markdown", 389 | "metadata": {}, 390 | "source": [ 391 | "## Step 4. Create a Forecast " 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": null, 397 | "metadata": {}, 398 | "outputs": [], 399 | "source": [ 400 | "\n", 401 | "forecast_name = predictor_arn.split('/')[-1]+\"_forecast\"" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": null, 407 | "metadata": {}, 408 | "outputs": [], 409 | "source": [ 410 | "forecast_name" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": null, 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "\n", 420 | "\n", 421 | "create_forecast_response = \\\n", 422 | " forecast.create_forecast(ForecastName=forecast_name[0:63],\n", 423 | " PredictorArn=predictor_arn)\n", 424 | "\n", 425 | "forecast_arn = create_forecast_response['ForecastArn']" 426 | ] 427 | }, 428 | { 429 | "cell_type": "markdown", 430 | "metadata": {}, 431 | "source": [ 432 | "Check the status of the forecast process, when the status change from **CREATE_IN_PROGRESS** to **ACTIVE**, we can continue to next steps. Depending on data size, model selection and choice of hyper parameters tuning,it can take several hours to be **ACTIVE**." 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": null, 438 | "metadata": {}, 439 | "outputs": [], 440 | "source": [ 441 | "status = util.wait(lambda: forecast.describe_forecast(ForecastArn=forecast_arn))\n", 442 | "assert status" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": null, 448 | "metadata": { 449 | "scrolled": true 450 | }, 451 | "outputs": [], 452 | "source": [ 453 | "forecast.describe_forecast(ForecastArn=forecast_arn)" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "metadata": {}, 460 | "outputs": [], 461 | "source": [ 462 | "forecast_arn" 463 | ] 464 | }, 465 | { 466 | "cell_type": "markdown", 467 | "metadata": {}, 468 | "source": [ 469 | "## Step 5: Query the Forecast " 470 | ] 471 | }, 472 | { 473 | "cell_type": "markdown", 474 | "metadata": {}, 475 | "source": [ 476 | "Once created, the forecast results are ready and you view them. " 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": null, 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [ 485 | "# 47\tFOODS_1_218\tWI_3\n", 486 | "# 5662\tFOODS_1_200\tTX_2\n", 487 | "# 916\tFOODS_1_096\tWI_3\n", 488 | "# 4\tFOODS_1_200\tCA_1\n", 489 | "# 5115\tFOODS_2_021\tCA_3\n", 490 | "# 3950\tFOODS_1_085\tWI_1\n", 491 | "# 1704\tFOODS_1_218\tCA_3\n", 492 | "# 3388\tFOODS_2_021\tCA_1\n", 493 | "# 4616\tFOODS_2_050\tTX_2\n", 494 | "# 578\tFOODS_1_004\tCA_1\n", 495 | "\n", 496 | "\n", 497 | "item_id = 'FOODS_1_218'\n", 498 | "store_id = 'WI_3'" 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": null, 504 | "metadata": {}, 505 | "outputs": [], 506 | "source": [ 507 | "forecast_response_deep = forecastquery.query_forecast(\n", 508 | " ForecastArn=forecast_arn,\n", 509 | " Filters={\"item_id\": item_id, \"store_id\":store_id})\n", 510 | "# Filters={\"item_id\": item_id})\n", 511 | "\n", 512 | "forecast_response_deep" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": null, 518 | "metadata": {}, 519 | "outputs": [], 520 | "source": [ 521 | "%store forecast_arn\n", 522 | "%store predictor_arn" 523 | ] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": {}, 528 | "source": [ 529 | "## Next Steps\n", 530 | "\n", 531 | "Congratulations!! You've trained your first Amazon Forecast model and generated your first forecast!!\n", 532 | "\n", 533 | "To dive deeper, here are a couple options for further evaluation:\n", 534 | "
    \n", 535 | "
  • To see an example of single item evaluation in a notebook, see `3.Evaluating_Your_Predictor.ipynb`.
  • \n", 536 | "
  • For an example how to use a notebook and Predictor Backtest Forecasts to evaluate all items at once using custom metrics, see `../advanced/Item_Level_Accuracy/Item_Level_Accuracy_Using_Bike_Example.ipynb`.
  • \n", 537 | "
  • Finally, for a production-level example, how to use Amazon QuickSight to visualize either Predictor Backtest Forecasts and/or Forecasts so you can share and socialize the results with others, see our automation solution Improving Forecast Accuracy
  • \n", 538 | "
  • Quick launch link for above automation
  • \n", 539 | "
\n", 540 | " \n", 541 | "For other advanced topics, see the `advanced` section of our notebooks. Several you may want to check out next:\n", 542 | "" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": null, 551 | "metadata": {}, 552 | "outputs": [], 553 | "source": [] 554 | } 555 | ], 556 | "metadata": { 557 | "kernelspec": { 558 | "display_name": "conda_python3", 559 | "language": "python", 560 | "name": "conda_python3" 561 | }, 562 | "language_info": { 563 | "codemirror_mode": { 564 | "name": "ipython", 565 | "version": 3 566 | }, 567 | "file_extension": ".py", 568 | "mimetype": "text/x-python", 569 | "name": "python", 570 | "nbconvert_exporter": "python", 571 | "pygments_lexer": "ipython3", 572 | "version": "3.6.13" 573 | } 574 | }, 575 | "nbformat": 4, 576 | "nbformat_minor": 4 577 | } 578 | -------------------------------------------------------------------------------- /1.Importing_Your_Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Getting Data Ready\n", 8 | "\n", 9 | "Forecasting is used in a variety of applications and business use cases: For example, retailers need to forecast the sales of their products to decide how much stock they need by location, Manufacturers need to estimate the number of parts required at their factories to optimize their supply chain, Businesses need to estimate their flexible workforce needs, Utilities need to forecast electricity consumption needs in order to attain an efficient energy network, and enterprises need to estimate their cloud infrastructure needs.\n", 10 | "\n", 11 | "\n", 12 | "\n", 13 | "In this notebook we will be walking through the first steps outlined in left-box above.\n", 14 | "\n", 15 | "\n", 16 | "## Table Of Contents\n", 17 | "* Step 1: [Setup Amazon Forecast](#setup)\n", 18 | "* Step 2: [Prepare the Datasets](#DataPrep)\n", 19 | "* Step 3: [Create the Dataset Group and Dataset](#DataSet)\n", 20 | "* Step 4: [Create the Target Time Series Data Import Job](#DataImport)\n", 21 | "* [Next Steps](#nextSteps)\n", 22 | "\n", 23 | "For more informations about APIs, please check the [documentation](https://docs.aws.amazon.com/forecast/latest/dg/what-is-forecast.html)" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Step 1: Setup Amazon Forecast" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "This section sets up the permissions and relevant endpoints." 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "!pip install boto3 --upgrade" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "import sys\n", 56 | "import os\n", 57 | "import pandas as pd\n", 58 | "\n", 59 | "# importing forecast notebook utility from notebooks/common directory\n", 60 | "sys.path.insert( 0, os.path.abspath(\"./common\") )\n", 61 | "import util\n", 62 | "\n", 63 | "%reload_ext autoreload\n", 64 | "import boto3\n", 65 | "import s3fs" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "# what is your forecast horizon in number time units you've selected?\n", 75 | "# e.g. if you're forecasting in months, how many months out do you want a forecast?\n", 76 | "FORECAST_LENGTH = 8\n", 77 | "\n", 78 | "# What is your forecast time unit granularity?\n", 79 | "# Choices are: ^Y|M|W|D|H|30min|15min|10min|5min|1min$ \n", 80 | "DATASET_FREQUENCY = \"W\"\n", 81 | "TIMESTAMP_FORMAT = \"yyyy-MM-dd\"\n", 82 | "\n", 83 | "# What name do you want to give this project? \n", 84 | "# We will use this same name for your Forecast Dataset Group name.\n", 85 | "PROJECT = 'm5_sku_prediction_2m'\n", 86 | "DATA_VERSION = 6" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "Configure the S3 bucket name and region name for this lesson.\n", 94 | "\n", 95 | "- If you don't have an S3 bucket, create it first on S3. \n", 96 | "- Although we have set the region to us-west-2 as a default value below, you can choose any of the regions that the service is available in." 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "import boto3 \n", 106 | "import sagemaker \n", 107 | "\n", 108 | "session = boto3.session.Session()\n", 109 | "region = session.region_name\n", 110 | "client = boto3.client(\"sts\")\n", 111 | "account_id = client.get_caller_identity()[\"Account\"]\n", 112 | "\n", 113 | "bucket_name = sagemaker.session.Session().default_bucket()" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "# Connect API session\n", 123 | "session = boto3.Session(region_name=region) \n", 124 | "forecast = session.client(service_name='forecast') \n", 125 | "forecastquery = session.client(service_name='forecastquery')" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "Create IAM Role for Forecast
\n", 133 | "Like many AWS services, Forecast will need to assume an IAM role in order to interact with your S3 resources securely. In the sample notebooks, we use the get_or_create_iam_role() utility function to create an IAM role. Please refer to \"notebooks/common/util/fcst_utils.py\" for implementation." 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "# Create the role to provide to Amazon Forecast.\n", 143 | "role_name = \"ForecastNotebookRole\"\n", 144 | "print(f\"Creating Role {role_name} ...\")\n", 145 | "role_arn = util.get_or_create_iam_role( role_name = role_name )\n", 146 | "\n", 147 | "# echo user inputs without account\n", 148 | "print(f\"Success! Created role arn = {role_arn.split('/')[1]}\")" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "## Step 2: Prepare the Datasets\n", 156 | "\n", 157 | "For this exercise, we use the individual household electric power consumption dataset. (Dua, D. and Karra Taniskidou, E. (2017). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.) We aggregate the usage data hourly. \n", 158 | "\n", 159 | "To begin, use Pandas to read the CSV and to show a sample of the data." 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "\n", 169 | "tts_file = \"./processed_demands.csv\"\n", 170 | "df = pd.read_csv(tts_file, header=0)\n", 171 | "df" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "Notice in the output above there are 3 columns of data:\n", 179 | "\n", 180 | "1. The Timestamp\n", 181 | "1. A Value\n", 182 | "1. An Item ID\n", 183 | "\n", 184 | "These are the 3 key required pieces of information to generate a forecast with Amazon Forecast. More can be added but these 3 must always remain present.\n", 185 | "\n", 186 | "The dataset happens to span January 01, 2014 to Deceber 31, 2014. We are only going to use January to October to train Amazon Forecast.\n", 187 | "\n", 188 | "You may notice a variable named `df` this is a popular convention when using Pandas if you are using the library's dataframe object, it is similar to a table in a database. You can learn more here: https://pandas.pydata.org/pandas-docs/stable/getting_started/10min.html\n" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "At this time the data is ready to be sent to S3 where Forecast will use it later. The following cells will upload the data to S3." 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "tt_key=f\"m5/{PROJECT}_{DATA_VERSION}/m5-demand-time-train.csv\"\n", 205 | "\n", 206 | "boto3.Session().resource('s3').Bucket(bucket_name).Object(tt_key).upload_file(tts_file)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "### Prepare Meta Data " 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [ 222 | "meta_file = './item_meta.csv'\n", 223 | "\n", 224 | "meta_df = pd.read_csv(meta_file, header=0)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "meta_df.head()" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "meta_key=f\"m5/{PROJECT}_{DATA_VERSION}/m5-item-meta.csv\"\n", 243 | "boto3.Session().resource('s3').Bucket(bucket_name).Object(meta_key).upload_file(meta_file)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "### Prepare Related Time Series " 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "# rts_file = './related_ts.csv'\n", 260 | "rts_file = './related_ts.csv'\n", 261 | "rts_df = pd.read_csv(rts_file, header=0)\n" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "rts_df" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "rts_key=f\"m5/{PROJECT}_{DATA_VERSION}/m5-rts.csv\"\n", 280 | "boto3.Session().resource('s3').Bucket(bucket_name).Object(rts_key).upload_file(rts_file)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "## Step 3: Create the Dataset Group and Dataset \n", 288 | "\n", 289 | "In Amazon Forecast , a dataset is a collection of file(s) which contain data that is relevant for a forecasting task. A dataset must conform to a schema provided by Amazon Forecast. Since data files are imported headerless, it is important to define a schema for your data.\n", 290 | "\n", 291 | "More details about `Domain` and dataset type can be found on the [documentation](https://docs.aws.amazon.com/forecast/latest/dg/howitworks-domains-ds-types.html) . For this example, we are using [CUSTOM](https://docs.aws.amazon.com/forecast/latest/dg/custom-domain.html) domain with 3 required attributes `timestamp`, `target_value` and `item_id`.\n", 292 | "\n", 293 | "\n", 294 | "Next, you need to make some choices. \n", 295 | "
    \n", 296 | "
  1. How many time units do you want to forecast?. For example, if your time unit is Hour, then if you want to forecast out 1 week, that would be 24*7 = 168 hours, so answer = 168.
  2. \n", 297 | "
  3. What is the time granularity for your data?. For example, if your time unit is Hour, answer = \"H\".
  4. \n", 298 | "
  5. Think of a name you want to give this project (Dataset Group name), so all files will have the same names. You should also use this same name for your Forecast DatasetGroup name, to set yourself up for reproducibility.
  6. \n", 299 | "
" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": {}, 305 | "source": [ 306 | "### Create the Dataset Group\n", 307 | "\n", 308 | "In this task, we define a container name or Dataset Group name, which will be used to keep track of Dataset import files, schema, and all Forecast results which go together.\n" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": { 315 | "scrolled": true 316 | }, 317 | "outputs": [], 318 | "source": [ 319 | "dataset_group = f\"{PROJECT}_{DATA_VERSION}\"\n", 320 | "print(f\"Dataset Group Name = {dataset_group}\")" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "dataset_arns = []\n", 330 | "create_dataset_group_response = \\\n", 331 | " forecast.create_dataset_group(Domain=\"RETAIL\",\n", 332 | " DatasetGroupName=dataset_group,\n", 333 | " DatasetArns=dataset_arns)" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [ 342 | "dataset_group_arn = create_dataset_group_response['DatasetGroupArn']" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "dataset_group_arn" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [ 360 | "forecast.describe_dataset_group(DatasetGroupArn=dataset_group_arn)" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": {}, 366 | "source": [ 367 | "### Create the Schema" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [ 376 | "# store_id\tstate_id\n", 377 | "# item_id\ttimestamp\tdemand\tlocation\n", 378 | "ts_schema ={\n", 379 | " \"Attributes\":[\n", 380 | " {\n", 381 | " \"AttributeName\":\"item_id\",\n", 382 | " \"AttributeType\":\"string\"\n", 383 | " },\n", 384 | " {\n", 385 | " \"AttributeName\":\"timestamp\",\n", 386 | " \"AttributeType\":\"timestamp\"\n", 387 | " },\n", 388 | " {\n", 389 | " \"AttributeName\":\"demand\",\n", 390 | " \"AttributeType\":\"float\"\n", 391 | " },\n", 392 | " {\n", 393 | " \"AttributeName\":\"store_id\",\n", 394 | " \"AttributeType\":\"string\"\n", 395 | " },\n", 396 | " {\n", 397 | " \"AttributeName\":\"state_id\",\n", 398 | " \"AttributeType\":\"string\"\n", 399 | " }\n", 400 | "\n", 401 | " ]\n", 402 | "}" 403 | ] 404 | }, 405 | { 406 | "cell_type": "markdown", 407 | "metadata": {}, 408 | "source": [ 409 | "### Create the Dataset" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": null, 415 | "metadata": {}, 416 | "outputs": [], 417 | "source": [ 418 | "ts_dataset_name = f\"{PROJECT}_{DATA_VERSION}_tts\"\n", 419 | "print(ts_dataset_name)" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "metadata": {}, 426 | "outputs": [], 427 | "source": [ 428 | "response = \\\n", 429 | "forecast.create_dataset(Domain=\"RETAIL\",\n", 430 | " DatasetType='TARGET_TIME_SERIES',\n", 431 | " DatasetName=ts_dataset_name,\n", 432 | " DataFrequency=DATASET_FREQUENCY,\n", 433 | " Schema=ts_schema\n", 434 | " )" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": null, 440 | "metadata": {}, 441 | "outputs": [], 442 | "source": [ 443 | "ts_dataset_arn = response['DatasetArn']" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": null, 449 | "metadata": {}, 450 | "outputs": [], 451 | "source": [ 452 | "forecast.describe_dataset(DatasetArn=ts_dataset_arn)" 453 | ] 454 | }, 455 | { 456 | "cell_type": "markdown", 457 | "metadata": {}, 458 | "source": [ 459 | "### Create Meta Schema " 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": null, 465 | "metadata": {}, 466 | "outputs": [], 467 | "source": [ 468 | "# item_id\tproduct_type\tsegment\n", 469 | "# product_type\tsegmentation\tstyle_code\tcolor_code\titem_style\n", 470 | "# ['item_id', 'ListingPrice', \n", 471 | "# 'SAPLevel1Code', 'SAPLevel2Code', 'SAPLevel3Code', 'SAPLevel4Code',\n", 472 | "# 'SAPLevel6Code', 'SAPLevel7Code', 'SAPLevel8Code', 'SAPLevel9Code',\n", 473 | "# 'MaterialType']\n", 474 | "# dept_id\tcat_id\n", 475 | "meta_schema ={\n", 476 | " \"Attributes\":[\n", 477 | " {\n", 478 | " \"AttributeName\":\"item_id\",\n", 479 | " \"AttributeType\":\"string\"\n", 480 | " }, \n", 481 | " \n", 482 | " {\n", 483 | " \"AttributeName\":\"dept_id\",\n", 484 | " \"AttributeType\":\"string\"\n", 485 | " },\n", 486 | " {\n", 487 | " \"AttributeName\":\"cat_id\",\n", 488 | " \"AttributeType\":\"string\"\n", 489 | " }\n", 490 | " ]\n", 491 | "}" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": null, 497 | "metadata": {}, 498 | "outputs": [], 499 | "source": [ 500 | "meta_dataset_name = f\"{PROJECT}_{DATA_VERSION}_mt\"\n", 501 | "print(meta_dataset_name)" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": null, 507 | "metadata": {}, 508 | "outputs": [], 509 | "source": [ 510 | "response = \\\n", 511 | "forecast.create_dataset(Domain=\"RETAIL\",\n", 512 | " DatasetType='ITEM_METADATA',\n", 513 | " DatasetName=meta_dataset_name,\n", 514 | " Schema=meta_schema\n", 515 | " )" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "metadata": {}, 522 | "outputs": [], 523 | "source": [ 524 | "meta_dataset_arn = response['DatasetArn']" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": null, 530 | "metadata": { 531 | "scrolled": true 532 | }, 533 | "outputs": [], 534 | "source": [ 535 | "forecast.describe_dataset(DatasetArn = meta_dataset_arn)" 536 | ] 537 | }, 538 | { 539 | "cell_type": "markdown", 540 | "metadata": {}, 541 | "source": [ 542 | "### Create Related Time Series Schema " 543 | ] 544 | }, 545 | { 546 | "cell_type": "code", 547 | "execution_count": null, 548 | "metadata": {}, 549 | "outputs": [], 550 | "source": [ 551 | "\n", 552 | "# rolling_mean_t4\trolling_std_t4\trolling_mean_t12\trolling_mean_t24\n", 553 | "# store_id,state_id,event_type_1,event_type_2,snap_CA,snap_TX,snap_WI,sell_price\n", 554 | "\n", 555 | "rts_schema ={\n", 556 | " \"Attributes\": [\n", 557 | " {\n", 558 | " \"AttributeName\":\"item_id\",\n", 559 | " \"AttributeType\":\"string\"\n", 560 | " },\n", 561 | " {\n", 562 | " \"AttributeName\":\"timestamp\",\n", 563 | " \"AttributeType\":\"timestamp\"\n", 564 | " },\n", 565 | " {\n", 566 | " \"AttributeName\": \"store_id\",\n", 567 | " \"AttributeType\": \"string\"\n", 568 | " },\n", 569 | " {\n", 570 | " \"AttributeName\": \"state_id\",\n", 571 | " \"AttributeType\": \"string\"\n", 572 | " },\n", 573 | " \n", 574 | " {\n", 575 | " \"AttributeName\": \"event_type_1\",\n", 576 | " \"AttributeType\": \"integer\"\n", 577 | " },\n", 578 | " {\n", 579 | " \"AttributeName\": \"event_type_2\",\n", 580 | " \"AttributeType\": \"integer\"\n", 581 | " },\n", 582 | " {\n", 583 | " \"AttributeName\": \"snap_CA\",\n", 584 | " \"AttributeType\": \"integer\"\n", 585 | " },\n", 586 | " {\n", 587 | " \"AttributeName\": \"snap_TX\",\n", 588 | " \"AttributeType\": \"integer\"\n", 589 | " },\n", 590 | " {\n", 591 | " \"AttributeName\": \"snap_WI\",\n", 592 | " \"AttributeType\": \"integer\"\n", 593 | " },\n", 594 | " {\n", 595 | " \"AttributeName\": \"sell_price\",\n", 596 | " \"AttributeType\": \"float\"\n", 597 | " },\n", 598 | "# {\n", 599 | "# \"AttributeName\": \"rolling_mean_t1\",\n", 600 | "# \"AttributeType\": \"float\"\n", 601 | "# },\n", 602 | "# {\n", 603 | "# \"AttributeName\": \"rolling_mean_t2\",\n", 604 | "# \"AttributeType\": \"float\"\n", 605 | "# },\n", 606 | "# {\n", 607 | "# \"AttributeName\": \"rolling_mean_t4\",\n", 608 | "# \"AttributeType\": \"float\"\n", 609 | "# },\n", 610 | "# {\n", 611 | "# \"AttributeName\": \"rolling_mean_t12\",\n", 612 | "# \"AttributeType\": \"float\"\n", 613 | "# },\n", 614 | "# {\n", 615 | "# \"AttributeName\": \"rolling_mean_t24\",\n", 616 | "# \"AttributeType\": \"float\"\n", 617 | "# }\n", 618 | " ]\n", 619 | "}\n" 620 | ] 621 | }, 622 | { 623 | "cell_type": "code", 624 | "execution_count": null, 625 | "metadata": {}, 626 | "outputs": [], 627 | "source": [ 628 | "rts_dataset_name = f\"{PROJECT}_{DATA_VERSION}_rts\"\n", 629 | "print(rts_dataset_name)" 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": null, 635 | "metadata": {}, 636 | "outputs": [], 637 | "source": [ 638 | "response = \\\n", 639 | "forecast.create_dataset(Domain=\"RETAIL\",\n", 640 | " DatasetType='RELATED_TIME_SERIES',\n", 641 | " DatasetName=rts_dataset_name,\n", 642 | " DataFrequency=DATASET_FREQUENCY,\n", 643 | " Schema=rts_schema\n", 644 | " )" 645 | ] 646 | }, 647 | { 648 | "cell_type": "code", 649 | "execution_count": null, 650 | "metadata": {}, 651 | "outputs": [], 652 | "source": [ 653 | "rts_dataset_arn = response['DatasetArn']" 654 | ] 655 | }, 656 | { 657 | "cell_type": "code", 658 | "execution_count": null, 659 | "metadata": { 660 | "scrolled": true 661 | }, 662 | "outputs": [], 663 | "source": [ 664 | "forecast.describe_dataset(DatasetArn = rts_dataset_arn)" 665 | ] 666 | }, 667 | { 668 | "cell_type": "markdown", 669 | "metadata": {}, 670 | "source": [ 671 | "### Update the dataset group with the datasets we created\n", 672 | "You can have multiple datasets under the same dataset group. Update it with the datasets we created before." 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": null, 678 | "metadata": {}, 679 | "outputs": [], 680 | "source": [ 681 | "dataset_arns = []\n", 682 | "dataset_arns.append(ts_dataset_arn)\n", 683 | "dataset_arns.append(rts_dataset_arn)\n", 684 | "dataset_arns.append(meta_dataset_arn)\n", 685 | "forecast.update_dataset_group(DatasetGroupArn=dataset_group_arn, DatasetArns=dataset_arns)" 686 | ] 687 | }, 688 | { 689 | "cell_type": "markdown", 690 | "metadata": {}, 691 | "source": [ 692 | "### Step 4: Create a Target Time Series Dataset Import Job \n", 693 | "\n", 694 | "\n", 695 | "Now that Forecast knows how to understand the CSV we are providing, the next step is to import the data from S3 into Amazon Forecaast." 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": null, 701 | "metadata": {}, 702 | "outputs": [], 703 | "source": [ 704 | "# Recall path to your data\n", 705 | "ts_s3_data_path = \"s3://\"+bucket_name+\"/\"+tt_key\n", 706 | "print(f\"S3 URI for your data file = {ts_s3_data_path}\")" 707 | ] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "execution_count": null, 712 | "metadata": {}, 713 | "outputs": [], 714 | "source": [ 715 | "ts_dataset_import_job_response = \\\n", 716 | " forecast.create_dataset_import_job(DatasetImportJobName=dataset_group,\n", 717 | " DatasetArn=ts_dataset_arn,\n", 718 | " DataSource= {\n", 719 | " \"S3Config\" : {\n", 720 | " \"Path\": ts_s3_data_path,\n", 721 | " \"RoleArn\": role_arn\n", 722 | " } \n", 723 | " },\n", 724 | " TimestampFormat=TIMESTAMP_FORMAT)" 725 | ] 726 | }, 727 | { 728 | "cell_type": "code", 729 | "execution_count": null, 730 | "metadata": {}, 731 | "outputs": [], 732 | "source": [ 733 | "ts_dataset_import_job_arn=ts_dataset_import_job_response['DatasetImportJobArn']\n", 734 | "ts_dataset_import_job_arn" 735 | ] 736 | }, 737 | { 738 | "cell_type": "markdown", 739 | "metadata": {}, 740 | "source": [ 741 | "Check the status of dataset, when the status change from **CREATE_IN_PROGRESS** to **ACTIVE**, we can continue to next steps. Depending on the data size. It can take 10 mins to be **ACTIVE**. This process will take 5 to 10 minutes." 742 | ] 743 | }, 744 | { 745 | "cell_type": "code", 746 | "execution_count": null, 747 | "metadata": {}, 748 | "outputs": [], 749 | "source": [ 750 | "status = util.wait(lambda: forecast.describe_dataset_import_job(DatasetImportJobArn=ts_dataset_import_job_arn))\n", 751 | "assert status" 752 | ] 753 | }, 754 | { 755 | "cell_type": "code", 756 | "execution_count": null, 757 | "metadata": { 758 | "scrolled": false 759 | }, 760 | "outputs": [], 761 | "source": [ 762 | "forecast.describe_dataset_import_job(DatasetImportJobArn=ts_dataset_import_job_arn)" 763 | ] 764 | }, 765 | { 766 | "cell_type": "markdown", 767 | "metadata": {}, 768 | "source": [ 769 | "### Step 5: Create a Item Meta Data Dataset Import Job \n", 770 | "\n", 771 | "\n", 772 | "Now that Forecast knows how to understand the CSV we are providing, the next step is to import the data from S3 into Amazon Forecaast." 773 | ] 774 | }, 775 | { 776 | "cell_type": "code", 777 | "execution_count": null, 778 | "metadata": {}, 779 | "outputs": [], 780 | "source": [ 781 | "# Recall path to your data\n", 782 | "meta_s3_data_path = \"s3://\"+bucket_name+\"/\"+meta_key\n", 783 | "print(f\"S3 URI for your data file = {meta_s3_data_path}\")" 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": null, 789 | "metadata": {}, 790 | "outputs": [], 791 | "source": [ 792 | "meta_dataset_import_job_response = \\\n", 793 | " forecast.create_dataset_import_job(DatasetImportJobName=dataset_group,\n", 794 | " DatasetArn=meta_dataset_arn,\n", 795 | " DataSource= {\n", 796 | " \"S3Config\" : {\n", 797 | " \"Path\": meta_s3_data_path,\n", 798 | " \"RoleArn\": role_arn\n", 799 | " }, \n", 800 | " })" 801 | ] 802 | }, 803 | { 804 | "cell_type": "code", 805 | "execution_count": null, 806 | "metadata": {}, 807 | "outputs": [], 808 | "source": [ 809 | "meta_dataset_arn" 810 | ] 811 | }, 812 | { 813 | "cell_type": "code", 814 | "execution_count": null, 815 | "metadata": {}, 816 | "outputs": [], 817 | "source": [ 818 | "meta_dataset_import_job_arn=meta_dataset_import_job_response['DatasetImportJobArn']\n", 819 | "meta_dataset_import_job_arn" 820 | ] 821 | }, 822 | { 823 | "cell_type": "code", 824 | "execution_count": null, 825 | "metadata": {}, 826 | "outputs": [], 827 | "source": [ 828 | "dataset_group" 829 | ] 830 | }, 831 | { 832 | "cell_type": "code", 833 | "execution_count": null, 834 | "metadata": { 835 | "scrolled": true 836 | }, 837 | "outputs": [], 838 | "source": [ 839 | "status = util.wait(lambda: forecast.describe_dataset_import_job(DatasetImportJobArn=meta_dataset_import_job_arn))\n", 840 | "assert status" 841 | ] 842 | }, 843 | { 844 | "cell_type": "markdown", 845 | "metadata": {}, 846 | "source": [ 847 | "### Step 6: Related Time Series Dataset Import Job \n", 848 | "\n", 849 | "\n", 850 | "Now that Forecast knows how to understand the CSV we are providing, the next step is to import the data from S3 into Amazon Forecaast." 851 | ] 852 | }, 853 | { 854 | "cell_type": "code", 855 | "execution_count": null, 856 | "metadata": {}, 857 | "outputs": [], 858 | "source": [ 859 | "# Recall path to your data\n", 860 | "rts_s3_data_path = \"s3://\"+bucket_name+\"/\"+rts_key\n", 861 | "print(f\"S3 URI for your data file = {rts_s3_data_path}\")" 862 | ] 863 | }, 864 | { 865 | "cell_type": "code", 866 | "execution_count": null, 867 | "metadata": {}, 868 | "outputs": [], 869 | "source": [ 870 | "rts_dataset_import_job_response = \\\n", 871 | " forecast.create_dataset_import_job(DatasetImportJobName=dataset_group,\n", 872 | " DatasetArn=rts_dataset_arn,\n", 873 | " DataSource= {\n", 874 | " \"S3Config\" : {\n", 875 | " \"Path\": rts_s3_data_path,\n", 876 | " \"RoleArn\": role_arn\n", 877 | " } \n", 878 | " }, \n", 879 | " TimestampFormat=TIMESTAMP_FORMAT)" 880 | ] 881 | }, 882 | { 883 | "cell_type": "code", 884 | "execution_count": null, 885 | "metadata": {}, 886 | "outputs": [], 887 | "source": [ 888 | "rts_s3_data_path" 889 | ] 890 | }, 891 | { 892 | "cell_type": "code", 893 | "execution_count": null, 894 | "metadata": {}, 895 | "outputs": [], 896 | "source": [ 897 | "rts_dataset_import_job_arn=rts_dataset_import_job_response['DatasetImportJobArn']\n", 898 | "rts_dataset_import_job_arn" 899 | ] 900 | }, 901 | { 902 | "cell_type": "code", 903 | "execution_count": null, 904 | "metadata": { 905 | "scrolled": true 906 | }, 907 | "outputs": [], 908 | "source": [ 909 | "status = util.wait(lambda: forecast.describe_dataset_import_job(DatasetImportJobArn=rts_dataset_import_job_arn))\n", 910 | "assert status" 911 | ] 912 | }, 913 | { 914 | "cell_type": "markdown", 915 | "metadata": {}, 916 | "source": [ 917 | "## Next Steps\n", 918 | "\n", 919 | "At this point you have successfully imported your data into Amazon Forecast and now it is time to get started in the next notebook to build your first model. To Continue, execute the cell below to store important variables where they can be used in the next notebook, then open `2.Building_Your_Predictor.ipynb`." 920 | ] 921 | }, 922 | { 923 | "cell_type": "code", 924 | "execution_count": null, 925 | "metadata": {}, 926 | "outputs": [], 927 | "source": [ 928 | "# Now save your choices for the next notebook \n", 929 | "# %store item_id\n", 930 | "%store PROJECT\n", 931 | "%store DATA_VERSION\n", 932 | "%store FORECAST_LENGTH\n", 933 | "%store DATASET_FREQUENCY\n", 934 | "%store TIMESTAMP_FORMAT\n", 935 | "%store ts_dataset_import_job_arn\n", 936 | "%store ts_dataset_arn\n", 937 | "%store dataset_group_arn\n", 938 | "%store role_arn\n", 939 | "%store bucket_name\n", 940 | "%store region\n", 941 | "%store tt_key" 942 | ] 943 | }, 944 | { 945 | "cell_type": "code", 946 | "execution_count": null, 947 | "metadata": {}, 948 | "outputs": [], 949 | "source": [] 950 | }, 951 | { 952 | "cell_type": "code", 953 | "execution_count": null, 954 | "metadata": {}, 955 | "outputs": [], 956 | "source": [] 957 | }, 958 | { 959 | "cell_type": "code", 960 | "execution_count": null, 961 | "metadata": {}, 962 | "outputs": [], 963 | "source": [] 964 | } 965 | ], 966 | "metadata": { 967 | "kernelspec": { 968 | "display_name": "conda_python3", 969 | "language": "python", 970 | "name": "conda_python3" 971 | }, 972 | "language_info": { 973 | "codemirror_mode": { 974 | "name": "ipython", 975 | "version": 3 976 | }, 977 | "file_extension": ".py", 978 | "mimetype": "text/x-python", 979 | "name": "python", 980 | "nbconvert_exporter": "python", 981 | "pygments_lexer": "ipython3", 982 | "version": "3.6.13" 983 | } 984 | }, 985 | "nbformat": 4, 986 | "nbformat_minor": 4 987 | } 988 | -------------------------------------------------------------------------------- /0.Data_Explore.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8384e06d", 6 | "metadata": {}, 7 | "source": [ 8 | "## Explore data and perform data transformantion\n", 9 | "* remove size information \n", 10 | "* aggregate stock and promo_counts by mean function \n", 11 | "* cast type " 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "id": "97e05739", 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "text/html": [ 23 | "
\n", 24 | "\n", 37 | "\n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | "
item_idtimestampdemandstore_idstate_id
0FOODS_1_0012011-01-293CA_1CA
1FOODS_1_0022011-01-290CA_1CA
2FOODS_1_0032011-01-290CA_1CA
3FOODS_1_0042011-01-290CA_1CA
4FOODS_1_0052011-01-293CA_1CA
..................
5738995FOODS_2_0802016-04-240WI_3WI
5738996FOODS_2_0812016-04-243WI_3WI
5738997FOODS_2_0822016-04-241WI_3WI
5738998FOODS_2_0832016-04-241WI_3WI
5738999FOODS_2_0842016-04-241WI_3WI
\n", 139 | "

5739000 rows × 5 columns

\n", 140 | "
" 141 | ], 142 | "text/plain": [ 143 | " item_id timestamp demand store_id state_id\n", 144 | "0 FOODS_1_001 2011-01-29 3 CA_1 CA\n", 145 | "1 FOODS_1_002 2011-01-29 0 CA_1 CA\n", 146 | "2 FOODS_1_003 2011-01-29 0 CA_1 CA\n", 147 | "3 FOODS_1_004 2011-01-29 0 CA_1 CA\n", 148 | "4 FOODS_1_005 2011-01-29 3 CA_1 CA\n", 149 | "... ... ... ... ... ...\n", 150 | "5738995 FOODS_2_080 2016-04-24 0 WI_3 WI\n", 151 | "5738996 FOODS_2_081 2016-04-24 3 WI_3 WI\n", 152 | "5738997 FOODS_2_082 2016-04-24 1 WI_3 WI\n", 153 | "5738998 FOODS_2_083 2016-04-24 1 WI_3 WI\n", 154 | "5738999 FOODS_2_084 2016-04-24 1 WI_3 WI\n", 155 | "\n", 156 | "[5739000 rows x 5 columns]" 157 | ] 158 | }, 159 | "execution_count": 1, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | } 163 | ], 164 | "source": [ 165 | "import pandas as pd \n", 166 | "import os \n", 167 | "\n", 168 | "\n", 169 | "root_dir = './'\n", 170 | "sales_file = 'demands.csv'\n", 171 | "item_file = 'item_meta.csv'\n", 172 | "rts_file = 'related_ts.csv'\n", 173 | "\n", 174 | "\n", 175 | "\n", 176 | "sales = pd.read_csv(os.path.join(root_dir, sales_file))\n", 177 | "sales" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "id": "e4098365", 183 | "metadata": {}, 184 | "source": [ 185 | "#### can replace 0 demands to null \n", 186 | "[best practice of preparing data](https://aws.amazon.com/blogs/machine-learning/tailor-and-prepare-your-data-for-amazon-forecast/)\n", 187 | "\n", 188 | "```\n", 189 | "processed_sales = sales.copy()\n", 190 | "processed_sales['demand'] = sales['demand'].apply(lambda x: None if x==0 else 0)\n", 191 | "processed_sales.to_csv('processed_demands.csv', index=False)\n", 192 | "processed_sales\n", 193 | "```" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 2, 199 | "id": "c5f7f224", 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/html": [ 205 | "
\n", 206 | "\n", 219 | "\n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | "
item_idtimestampdemandstore_idstate_id
0FOODS_1_0012011-01-293CA_1CA
4FOODS_1_0052011-01-293CA_1CA
9FOODS_1_0112011-01-292CA_1CA
11FOODS_1_0132011-01-292CA_1CA
14FOODS_1_0162011-01-294CA_1CA
..................
5738993FOODS_2_0782016-04-243WI_3WI
5738996FOODS_2_0812016-04-243WI_3WI
5738997FOODS_2_0822016-04-241WI_3WI
5738998FOODS_2_0832016-04-241WI_3WI
5738999FOODS_2_0842016-04-241WI_3WI
\n", 321 | "

2066737 rows × 5 columns

\n", 322 | "
" 323 | ], 324 | "text/plain": [ 325 | " item_id timestamp demand store_id state_id\n", 326 | "0 FOODS_1_001 2011-01-29 3 CA_1 CA\n", 327 | "4 FOODS_1_005 2011-01-29 3 CA_1 CA\n", 328 | "9 FOODS_1_011 2011-01-29 2 CA_1 CA\n", 329 | "11 FOODS_1_013 2011-01-29 2 CA_1 CA\n", 330 | "14 FOODS_1_016 2011-01-29 4 CA_1 CA\n", 331 | "... ... ... ... ... ...\n", 332 | "5738993 FOODS_2_078 2016-04-24 3 WI_3 WI\n", 333 | "5738996 FOODS_2_081 2016-04-24 3 WI_3 WI\n", 334 | "5738997 FOODS_2_082 2016-04-24 1 WI_3 WI\n", 335 | "5738998 FOODS_2_083 2016-04-24 1 WI_3 WI\n", 336 | "5738999 FOODS_2_084 2016-04-24 1 WI_3 WI\n", 337 | "\n", 338 | "[2066737 rows x 5 columns]" 339 | ] 340 | }, 341 | "execution_count": 2, 342 | "metadata": {}, 343 | "output_type": "execute_result" 344 | } 345 | ], 346 | "source": [ 347 | "processed_sales = sales.copy()\n", 348 | "processed_sales = processed_sales[processed_sales['demand']>0]\n", 349 | "processed_sales.to_csv('processed_demands.csv', index=False)\n", 350 | "processed_sales" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 3, 356 | "id": "3506d0cc", 357 | "metadata": {}, 358 | "outputs": [ 359 | { 360 | "data": { 361 | "text/html": [ 362 | "
\n", 363 | "\n", 376 | "\n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | "
item_iddept_idcat_id
0FOODS_1_001FOODS_1FOODS
1FOODS_1_002FOODS_1FOODS
2FOODS_1_003FOODS_1FOODS
3FOODS_1_004FOODS_1FOODS
4FOODS_1_005FOODS_1FOODS
............
295FOODS_2_080FOODS_2FOODS
296FOODS_2_081FOODS_2FOODS
297FOODS_2_082FOODS_2FOODS
298FOODS_2_083FOODS_2FOODS
299FOODS_2_084FOODS_2FOODS
\n", 454 | "

300 rows × 3 columns

\n", 455 | "
" 456 | ], 457 | "text/plain": [ 458 | " item_id dept_id cat_id\n", 459 | "0 FOODS_1_001 FOODS_1 FOODS\n", 460 | "1 FOODS_1_002 FOODS_1 FOODS\n", 461 | "2 FOODS_1_003 FOODS_1 FOODS\n", 462 | "3 FOODS_1_004 FOODS_1 FOODS\n", 463 | "4 FOODS_1_005 FOODS_1 FOODS\n", 464 | ".. ... ... ...\n", 465 | "295 FOODS_2_080 FOODS_2 FOODS\n", 466 | "296 FOODS_2_081 FOODS_2 FOODS\n", 467 | "297 FOODS_2_082 FOODS_2 FOODS\n", 468 | "298 FOODS_2_083 FOODS_2 FOODS\n", 469 | "299 FOODS_2_084 FOODS_2 FOODS\n", 470 | "\n", 471 | "[300 rows x 3 columns]" 472 | ] 473 | }, 474 | "execution_count": 3, 475 | "metadata": {}, 476 | "output_type": "execute_result" 477 | } 478 | ], 479 | "source": [ 480 | "meta_df = pd.read_csv(os.path.join(root_dir, item_file))\n", 481 | "\n", 482 | "meta_df" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": 4, 488 | "id": "f528b7e0", 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [ 492 | "related_ts = pd.read_csv(os.path.join(root_dir, rts_file))" 493 | ] 494 | }, 495 | { 496 | "cell_type": "markdown", 497 | "id": "b2af5c75", 498 | "metadata": {}, 499 | "source": [ 500 | "### Observe data distribution \n", 501 | "* Data is highly skewed \n" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": 5, 507 | "id": "51307f02", 508 | "metadata": {}, 509 | "outputs": [], 510 | "source": [ 511 | "demand_by_item = sales.groupby(['item_id', 'timestamp']).sum().reset_index()" 512 | ] 513 | }, 514 | { 515 | "cell_type": "code", 516 | "execution_count": 6, 517 | "id": "513edbf5", 518 | "metadata": {}, 519 | "outputs": [ 520 | { 521 | "data": { 522 | "text/html": [ 523 | "
\n", 524 | "\n", 537 | "\n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | "
item_iddemanddept_idcat_id
214FOODS_1_218234594FOODS_1FOODS
83FOODS_1_085148487FOODS_1FOODS
16FOODS_1_018133105FOODS_1FOODS
3FOODS_1_004129481FOODS_1FOODS
41FOODS_1_043128933FOODS_1FOODS
...............
82FOODS_1_0842047FOODS_1FOODS
157FOODS_1_1601998FOODS_1FOODS
77FOODS_1_0791681FOODS_1FOODS
286FOODS_2_0711123FOODS_2FOODS
288FOODS_2_0731094FOODS_2FOODS
\n", 627 | "

300 rows × 4 columns

\n", 628 | "
" 629 | ], 630 | "text/plain": [ 631 | " item_id demand dept_id cat_id\n", 632 | "214 FOODS_1_218 234594 FOODS_1 FOODS\n", 633 | "83 FOODS_1_085 148487 FOODS_1 FOODS\n", 634 | "16 FOODS_1_018 133105 FOODS_1 FOODS\n", 635 | "3 FOODS_1_004 129481 FOODS_1 FOODS\n", 636 | "41 FOODS_1_043 128933 FOODS_1 FOODS\n", 637 | ".. ... ... ... ...\n", 638 | "82 FOODS_1_084 2047 FOODS_1 FOODS\n", 639 | "157 FOODS_1_160 1998 FOODS_1 FOODS\n", 640 | "77 FOODS_1_079 1681 FOODS_1 FOODS\n", 641 | "286 FOODS_2_071 1123 FOODS_2 FOODS\n", 642 | "288 FOODS_2_073 1094 FOODS_2 FOODS\n", 643 | "\n", 644 | "[300 rows x 4 columns]" 645 | ] 646 | }, 647 | "execution_count": 6, 648 | "metadata": {}, 649 | "output_type": "execute_result" 650 | } 651 | ], 652 | "source": [ 653 | "\n", 654 | "\n", 655 | "demand_by_item.groupby(['item_id']).sum().reset_index().merge(meta_df, left_on=['item_id'], right_on=['item_id']).sort_values(by=['demand'], ascending=False)\n" 656 | ] 657 | }, 658 | { 659 | "cell_type": "code", 660 | "execution_count": 7, 661 | "id": "8e905f97", 662 | "metadata": {}, 663 | "outputs": [ 664 | { 665 | "data": { 666 | "text/plain": [ 667 | "" 668 | ] 669 | }, 670 | "execution_count": 7, 671 | "metadata": {}, 672 | "output_type": "execute_result" 673 | }, 674 | { 675 | "data": { 676 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAATj0lEQVR4nO3dfbAddX3H8feXEIkBKoEEmiHATZwMQhrIw5Xa4UFRCwjDky0aptMSoYYGnFFbZgzqNLFjOtiJ0qr1IY6OaFEICoqjVBEdsGNrcgMhEEJKkAjXZJIY0yZaEgh8+8fZ+/MSb5Jzk3vO3of3a+bO2f2d3T3f/c0hH/a3e3YjM5EkCeCwuguQJA0ehoIkqTAUJEmFoSBJKgwFSVJxeN0FHIrx48dnR0dH3WVI0pCycuXKX2XmhL7eG9Kh0NHRQVdXV91lSNKQEhG/2Nd7Dh9JkgpDQZJUGAqSpGJIn1OQNLK8+OKLdHd3s2vXrrpLGRLGjBnDpEmTGD16dNPrGAqShozu7m6OPvpoOjo6iIi6yxnUMpNt27bR3d3N5MmTm17P4SNJQ8auXbs47rjjDIQmRATHHXdcv4+qDAVJQ4qB0LyD6StDQZJUeE5B0pDVseC7A7q9Dbdc0u91Fi1axFFHHcVNN900oLX0V8+PecePH39I2xnRoTDQX6hmHcwXT5LaweEjSeqnxYsXc+qpp/LWt76VdevWAfD0009z0UUXMXv2bM4991yefPJJAObOncv8+fM5//zzmTJlCg8++CDXXnstp512GnPnzi3bnD9/Pp2dnUybNo2FCxeW9o6ODhYuXMisWbOYPn162e62bdu44IILmDlzJtdffz0D9RRNQ0GS+mHlypXccccdPPLII9x9992sWLECgHnz5vGpT32KlStXsmTJEm644Yayzvbt2/nRj37ErbfeyqWXXsr73/9+1qxZw2OPPcaqVauARtB0dXWxevVqHnzwQVavXl3WHz9+PA8//DDz589nyZIlAHzkIx/hnHPO4ZFHHuGyyy7j2WefHZD9G9HDR5LUXz/5yU+48sorGTt2LACXXXYZu3bt4qc//SlXXXVVWW737t1l+tJLLyUimD59OieccALTp08HYNq0aWzYsIEZM2awbNkyli5dyp49e9i0aRNPPPEEZ5xxBgBvf/vbAZg9ezZ33303AA899FCZvuSSSxg3btyA7J+hIEn9tPelni+//DLHHHNM+b/+vR1xxBEAHHbYYWW6Z37Pnj0888wzLFmyhBUrVjBu3Djmzp37it8X9KwzatQo9uzZs886BoLDR5LUD+eddx733HMPzz//PDt37uQ73/kOY8eOZfLkydx1111A49fEjz76aNPb3LFjB0ceeSSvec1r2Lx5M/fdd19Tddx+++0A3HfffWzfvv3gdmgvHilIGrLquJJv1qxZvPOd72TGjBmccsopnHvuuQDcfvvtzJ8/n49+9KO8+OKLzJkzhzPPPLOpbZ555pnMnDmTadOmMWXKFM4+++wDrrNw4UKuvvpqZs2axRvf+EZOPvnkQ9qvHjFQZ6zr0NnZmYfykB0vSZWGlrVr13LaaafVXcaQ0lefRcTKzOzsa3mHjyRJhaEgSSoMBUlDylAe8m63g+krQ0HSkDFmzBi2bdtmMDSh53kKY8aM6dd6Xn0kaciYNGkS3d3dbN26te5ShoSeJ6/1h6EgacgYPXp0v54ipv5z+EiSVLQsFCLipIj4cUSsjYg1EfHeqv3YiLg/Ip6qXsf1WufmiFgfEesi4sJW1SZJ6lsrjxT2AH+XmacBbwBujIjTgQXAA5k5FXigmqd6bw4wDbgI+ExEjGphfZKkvbQsFDJzU2Y+XE3vBNYCJwKXA7dVi90GXFFNXw7ckZm7M/MZYD1wVqvqkyT9vracU4iIDmAm8DPghMzcBI3gAI6vFjsReK7Xat1VmySpTVoeChFxFPBN4H2ZuWN/i/bR9nsXI0fEvIjoioguL0uTpIHV0lCIiNE0AuH2zLy7at4cEROr9ycCW6r2buCkXqtPAjbuvc3MXJqZnZnZOWHChNYVL0kjUCuvPgrgi8DazPxEr7fuBa6ppq8Bvt2rfU5EHBERk4GpwPJW1SdJ+n2t/PHa2cBfAo9FxKqq7YPALcCyiLgOeBa4CiAz10TEMuAJGlcu3ZiZL7WwPknSXloWCpn5H/R9ngDgLftYZzGwuFU1SZL2z180S5IKQ0GSVBgKkqTCUJAkFYaCJKkwFCRJhaEgSSoMBUlSYShIkgpDQZJUGAqSpMJQkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSCkNBklQYCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqTCUJAkFYaCJKkwFCRJhaEgSSoMBUlSYShIkgpDQZJUGAqSpMJQkCQVhoIkqTAUJElFy0IhIr4UEVsi4vFebYsi4pcRsar6u7jXezdHxPqIWBcRF7aqLknSvrXySOHLwEV9tN+amTOqv+8BRMTpwBxgWrXOZyJiVAtrkyT1oWWhkJkPAb9ucvHLgTsyc3dmPgOsB85qVW2SpL7VcU7hPRGxuhpeGle1nQg812uZ7qpNktRG7Q6FzwKvBWYAm4CPV+3Rx7LZ1wYiYl5EdEVE19atW1tSpCSNVG0NhczcnJkvZebLwBf43RBRN3BSr0UnARv3sY2lmdmZmZ0TJkxobcGSNMK0NRQiYmKv2SuBniuT7gXmRMQRETEZmAosb2dtkiQ4vFUbjoivA28CxkdEN7AQeFNEzKAxNLQBuB4gM9dExDLgCWAPcGNmvtSq2iRJfWtZKGTm1X00f3E/yy8GFreqHknSgfmLZklSYShIkgpDQZJUGAqSpKKpUIiIP2p1IZKk+jV7pPC5iFgeETdExDGtLEiSVJ+mQiEzzwH+gsavjrsi4msR8actrUyS1HZNn1PIzKeADwMfAN4IfDIinoyIt7eqOElSezV7TuGMiLgVWAu8Gbg0M0+rpm9tYX2SpDZq9hfNn6ZxA7sPZubzPY2ZuTEiPtySyiRJbddsKFwMPN9zP6KIOAwYk5n/l5lfbVl1kqS2avacwg+BV/eaH1u1SZKGkWZDYUxm/qZnppoe25qSJEl1aTYUfhsRs3pmImI28Px+lpckDUHNnlN4H3BXRPQ8DW0i8M6WVCRJqk1ToZCZKyLidcCpNJ6n/GRmvtjSyiRJbdefh+y8Huio1pkZEWTmV1pSlSSpFk2FQkR8FXgtsAroeUxmAoaCJA0jzR4pdAKnZ2a2shhJUr2avfroceAPW1mIJKl+zR4pjAeeiIjlwO6exsy8rCVVSZJq0WwoLGplEZKkwaHZS1IfjIhTgKmZ+cOIGAuMam1pkqR2a/bW2e8GvgF8vmo6EfhWi2qSJNWk2RPNNwJnAzugPHDn+FYVJUmqR7OhsDszX+iZiYjDafxOQZI0jDQbCg9GxAeBV1fPZr4L+E7rypIk1aHZUFgAbAUeA64Hvkfjec2SpGGk2auPXqbxOM4vtLYcSVKdmr330TP0cQ4hM6cMeEWSpNr0595HPcYAVwHHDnw5kqQ6NXVOITO39fr7ZWb+M/Dm1pYmSWq3ZoePZvWaPYzGkcPRLalIklSbZoePPt5reg+wAXjHgFcjSapVs1cfnd/qQiRJ9Wt2+Ohv9/d+Zn5iYMqRJNWpP1cfvR64t5q/FHgIeK4VRUmS6tGfh+zMysydABGxCLgrM/+6VYVJktqv2dtcnAy80Gv+BaBjwKuRJNWq2VD4KrA8IhZFxELgZ8BX9rdCRHwpIrZExOO92o6NiPsj4qnqdVyv926OiPURsS4iLjyYnZEkHZpmf7y2GHgXsB34H+BdmfmPB1jty8BFe7UtAB7IzKnAA9U8EXE6MAeYVq3zmYjwyW6S1GbNHikAjAV2ZOa/AN0RMXl/C2fmQ8Cv92q+HLitmr4NuKJX+x2ZuTsznwHWA2f1ozZJ0gBo9nGcC4EPADdXTaOBfzuIzzshMzcBVK89T287kVdeydRdtfVVy7yI6IqIrq1btx5ECZKkfWn2SOFK4DLgtwCZuZGBvc1F9NHW55PdMnNpZnZmZueECRMGsARJUrOh8EJmJtU/1BFx5EF+3uaImFhtYyKwpWrvBk7qtdwkYONBfoYk6SA1GwrLIuLzwDER8W7ghxzcA3fuBa6ppq8Bvt2rfU5EHFGdq5gKLD+I7UuSDsEBf7wWEQHcCbwO2AGcCvx9Zt5/gPW+DrwJGB8R3cBC4BYaAXMd8CyN5zKQmWsiYhnwBI0b7t2YmS8d7E5Jkg7OAUMhMzMivpWZs4H9BsFe6129j7feso/lFwOLm92+JGngNTt89F8R8fqWViJJql2z9z46H/ibiNhA4wqkoHEQcUarCpMktd9+QyEiTs7MZ4G3takeSVKNDnSk8C0ad0f9RUR8MzP/rA01SZJqcqBzCr1/VDallYVIkup3oFDIfUxLkoahAw0fnRkRO2gcMby6mobfnWj+g5ZWJ0lqq/2GQmZ6+2pJGkH6c+tsSdIwZyhIkgpDQZJUGAqSpMJQkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSCkNBklQYCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqTCUJAkFYaCJKkwFCRJhaEgSSoMBUlSYShIkgpDQZJUGAqSpMJQkCQVhoIkqTi8jg+NiA3ATuAlYE9mdkbEscCdQAewAXhHZm6voz5JGqnqPFI4PzNnZGZnNb8AeCAzpwIPVPOSpDYaTMNHlwO3VdO3AVfUV4okjUx1hUICP4iIlRExr2o7ITM3AVSvx/e1YkTMi4iuiOjaunVrm8qVpJGhlnMKwNmZuTEijgfuj4gnm10xM5cCSwE6OzuzVQVK0khUy5FCZm6sXrcA9wBnAZsjYiJA9bqljtokaSRr+5FCRBwJHJaZO6vpC4B/AO4FrgFuqV6/3e7a2qVjwXdr+dwNt1xSy+dKGjrqGD46AbgnIno+/2uZ+e8RsQJYFhHXAc8CV9VQmySNaG0Phcz8OXBmH+3bgLe0ux5J0u8MpktSJUk1MxQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqTCUJAkFYaCJKkwFCRJhaEgSSoMBUlSYShIkgpDQZJUGAqSpMJQkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSCkNBklQYCpKkwlCQJBWH112A2qdjwXdr++wNt1xS22dLap5HCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqTCUJAkFd7mQm1R1y026ry9xkjcZw19HilIkgpDQZJUDLpQiIiLImJdRKyPiAV11yNJI8mgCoWIGAX8K/A24HTg6og4vd6qJGnkGGwnms8C1mfmzwEi4g7gcuCJWqvSkFXnMyTq4nMz2mc49vVgC4UTged6zXcDf9x7gYiYB8yrZn8TEesO4nPGA786qAqHD/vAPugxYP0QHxuIrdRiyH0XDrGvT9nXG4MtFKKPtnzFTOZSYOkhfUhEV2Z2Hso2hjr7wD7oYT/YB70NqnMKNI4MTuo1PwnYWFMtkjTiDLZQWAFMjYjJEfEqYA5wb801SdKIMaiGjzJzT0S8B/g+MAr4UmauacFHHdLw0zBhH9gHPewH+6CIzDzwUpKkEWGwDR9JkmpkKEiSihEXCsPtNhoRsSEiHouIVRHRVbUdGxH3R8RT1eu4XsvfXO37uoi4sFf77Go76yPikxERVfsREXFn1f6ziOho+07uJSK+FBFbIuLxXm1t2eeIuKb6jKci4po27XKf9tEPiyLil9X3YVVEXNzrvWHVDxFxUkT8OCLWRsSaiHhv1T7ivgsDKjNHzB+Nk9dPA1OAVwGPAqfXXdch7tMGYPxebf8ELKimFwAfq6ZPr/b5CGBy1RejqveWA39C47ci9wFvq9pvAD5XTc8B7hwE+3weMAt4vJ37DBwL/Lx6HVdNjxtk/bAIuKmPZYddPwATgVnV9NHAf1f7OeK+CwP5N9KOFMptNDLzBaDnNhrDzeXAbdX0bcAVvdrvyMzdmfkMsB44KyImAn+Qmf+ZjW/8V/Zap2db3wDe0vN/UXXJzIeAX+/V3I59vhC4PzN/nZnbgfuBiwZ6/5q1j37Yl2HXD5m5KTMfrqZ3Amtp3BVhxH0XBtJIC4W+bqNxYk21DJQEfhARK6NxCxCAEzJzEzT+wwGOr9r3tf8nVtN7t79inczcA/wvcFwL9uNQtWOfh8r35z0RsboaXuoZOhnW/VAN68wEfobfhUMy0kLhgLfRGILOzsxZNO4se2NEnLefZfe1//vrl6HeZwO5z0OhLz4LvBaYAWwCPl61D9t+iIijgG8C78vMHftbtI+2YdEHA2mkhcKwu41GZm6sXrcA99AYIttcHRJTvW6pFt/X/ndX03u3v2KdiDgceA3ND1m0Uzv2edB/fzJzc2a+lJkvA1+g8X2AYdoPETGaRiDcnpl3V81+Fw7BSAuFYXUbjYg4MiKO7pkGLgAep7FPPVdDXAN8u5q+F5hTXVExGZgKLK8OsXdGxBuq8dK/2mudnm39OfCjatx1sGnHPn8fuCAixlXDMhdUbYNGzz+GlStpfB9gGPZDVe8XgbWZ+Yleb/ldOBR1n+lu9x9wMY2rFJ4GPlR3PYe4L1NoXE3xKLCmZ39ojHk+ADxVvR7ba50PVfu+juoKi6q9k8Y/IE8Dn+Z3v3YfA9xF46TccmDKINjvr9MYGnmRxv+xXdeufQaurdrXA+8ahP3wVeAxYDWNf9AmDtd+AM6hMWSzGlhV/V08Er8LA/nnbS4kScVIGz6SJO2HoSBJKgwFSVJhKEiSCkNBklQYCpKkwlCQJBX/D3JA1dkNCAreAAAAAElFTkSuQmCC\n", 677 | "text/plain": [ 678 | "
" 679 | ] 680 | }, 681 | "metadata": { 682 | "needs_background": "light" 683 | }, 684 | "output_type": "display_data" 685 | } 686 | ], 687 | "source": [ 688 | "demand_by_item.groupby(['item_id']).sum().plot.hist(bins=10)" 689 | ] 690 | }, 691 | { 692 | "cell_type": "code", 693 | "execution_count": 8, 694 | "id": "4f2df4a6", 695 | "metadata": {}, 696 | "outputs": [ 697 | { 698 | "data": { 699 | "text/plain": [ 700 | "" 701 | ] 702 | }, 703 | "execution_count": 8, 704 | "metadata": {}, 705 | "output_type": "execute_result" 706 | }, 707 | { 708 | "data": { 709 | "image/png": "iVBORw0KGgoAAAANSUhEUgAABJcAAAKKCAYAAACAkbqXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAABAL0lEQVR4nO3de5ztd13f+/eHbCRcQxICIoHuKIgQYwJE5BRRKBaiKRcvPER7JFRqaMR7e2pse05QH/HElhbFI7Q55WpRBMSCJ0bgARqwArlDCJcSJMIuCCFJgSpBE77nj/klmWwn+/Jd890z3z3P5+Mxjz37N7Ne893fdc0na62p1loAAAAAoMddtnoBAAAAAMzLcAkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADotmurF7DZ7ne/+7Xdu3dv9TIAAAAADhuXXXbZ51trx230tcNuuLR79+5ceumlW70MAAAAgMNGVf3FnX3Ny+IAAAAA6Ga4BAAAAEA3wyUAAAAAuh1277kEAAAAcKu//du/zZ49e3LTTTdt9VKmcOSRR+b444/PXe961wM+jeESAAAAcNjas2dP7n3ve2f37t2pqq1ezrbWWsv111+fPXv25IQTTjjg03lZHAAAAHDYuummm3LssccaLB2Aqsqxxx570M/yMlwCAAAADmsGSweuZ68MlwAAAAAOoRe+8IV50YtetNXLyO7du/P5z39+5Y73XAIAAAB2jN1nX7CpvWvPO31TezPyzCUAAACAwc4999w8/OEPz3d913flox/9aJLk4x//eE477bQ85jGPyROe8IR85CMfSZI897nPzVlnnZUnPelJ+fqv//pcdNFF+dEf/dE84hGPyHOf+9zbmmeddVZOPfXUnHjiiTnnnHNuO7579+6cc845efSjH52TTjrptu7111+fpzzlKXnUox6V5z//+Wmtbcq/zXAJAAAAYKDLLrssr3vd63LFFVfkTW96Uy655JIkyZlnnpnf+I3fyGWXXZYXvehF+fEf//HbTnPjjTfmne98Z1784hfnaU97Wn72Z382V199da666qpceeWVSdYGVpdeemk+8IEP5KKLLsoHPvCB205/v/vdL5dffnnOOuus216C94u/+Iv59m//9lxxxRV5+tOfnk9+8pOb8u/zsjgAAACAgd797nfne7/3e3OPe9wjSfL0pz89N910U/7sz/4sz3rWs277vq985Su3ff60pz0tVZWTTjopD3jAA3LSSSclSU488cRce+21OeWUU/L6178+559/fm6++eZ85jOfyYc+9KF8y7d8S5Lk+77v+5Ikj3nMY/KmN70pSfKud73rts9PP/30HH300Zvy7zNcAgAAABhs79/C9tWvfjX3ve99b3sW0t7udre7JUnucpe73Pb5rX+/+eab84lPfCIvetGLcskll+Too4/Oc5/73Nx0001/5/RHHHFEbr755jtdx2bwsjgAAACAgb7jO74jv//7v58vf/nL+dKXvpQ/+IM/yD3ucY+ccMIJecMb3pAkaa3l/e9//wE3v/jFL+ae97xnjjrqqHz2s5/NhRdeeEDreO1rX5skufDCC3PjjTf2/YP2YrgEAAAAMNCjH/3o/OAP/mBOOeWUfP/3f3+e8IQnJEle+9rX5uUvf3lOPvnknHjiiXnzm998wM2TTz45j3rUo3LiiSfmR3/0R/P4xz9+v6c555xz8q53vSuPfvSj87a3vS0PechDuv9N69VmvTP4dnHqqae2Sy+9dKuXAQAAAGwDH/7wh/OIRzxiq5cxlY32rKoua62dutH3e+YSAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAOCwdri93/RIPXtluAQAAAActo488shcf/31BkwHoLWW66+/PkceeeRBnW7XoPUAAAAAbLnjjz8+e/bsyXXXXbfVS5nCkUcemeOPP/6gTmO4BAAAABy27nrXu+aEE07Y6mUc1nbMcGn32Rcc8Pdee97pA1cCAAAAcPjwnksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG6GSwAAAAB0M1wCAAAAoJvhEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG6GSwAAAAB0M1wCAAAAoJvhEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG6GSwAAAAB0M1wCAAAAoJvhEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBu+x0uVdWDq+qPq+rDVXV1Vf30cvyYqnp7VX1s+fPodaf5haq6pqo+WlVPXXf8MVV11fK1l1RVLcfvVlW/uxx/X1XtXneaM5af8bGqOmNT//UAAAAArORAnrl0c5J/3lp7RJLHJXlBVT0yydlJ3tFae1iSdyx/z/K1Zyc5MclpSV5aVUcsrZclOTPJw5aP05bjz0tyY2vtoUlenORXl9YxSc5J8m1JHpvknPVDLAAAAAC21n6HS621z7TWLl8+/1KSDyd5UJJnJHn18m2vTvLM5fNnJHlda+0rrbVPJLkmyWOr6oFJ7tNae09rrSV5zV6nubX1xiRPXp7V9NQkb2+t3dBauzHJ23P7QAoAAACALXZQ77m0vFztUUnel+QBrbXPJGsDqCT3X77tQUk+te5ke5ZjD1o+3/v4HU7TWrs5yReSHLuPFgAAAADbwAEPl6rqXkl+L8nPtNa+uK9v3eBY28fx3tOsX9uZVXVpVV163XXX7WNpAAAAAGymAxouVdVdszZYem1r7U3L4c8uL3XL8ufnluN7kjx43cmPT/Lp5fjxGxy/w2mqaleSo5LcsI/WHbTWzm+tndpaO/W44447kH8SAAAAAJvgQH5bXCV5eZIPt9b+w7ovvSXJrb+97Ywkb153/NnLb4A7IWtv3H3x8tK5L1XV45bmc/Y6za2tH0jyzuV9md6a5ClVdfTyRt5PWY4BAAAAsA3sOoDveXySH0lyVVVduRz7V0nOS/L6qnpekk8meVaStNaurqrXJ/lQ1n7T3Ataa7cspzsryauS3D3JhctHsja8+q2quiZrz1h69tK6oap+Ockly/f9Umvthr5/KgAAAACbbb/Dpdban2bj9z5KkiffyWnOTXLuBscvTfLNGxy/KctwaoOvvSLJK/a3TgAAAAAOvYP6bXEAAAAAsJ7hEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG6GSwAAAAB0M1wCAAAAoJvhEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG6GSwAAAAB0M1wCAAAAoJvhEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG6GSwAAAAB0M1wCAAAAoJvhEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG6GSwAAAAB0M1wCAAAAoJvhEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANBtv8OlqnpFVX2uqj647tgLq+p/VNWVy8f3rPvaL1TVNVX10ap66rrjj6mqq5avvaSqajl+t6r63eX4+6pq97rTnFFVH1s+zti0fzUAAAAAm+JAnrn0qiSnbXD8xa21U5aPP0ySqnpkkmcnOXE5zUur6ojl+1+W5MwkD1s+bm0+L8mNrbWHJnlxkl9dWsckOSfJtyV5bJJzqurog/4XAgAAADDMfodLrbV3JbnhAHvPSPK61tpXWmufSHJNksdW1QOT3Ke19p7WWkvymiTPXHeaVy+fvzHJk5dnNT01ydtbaze01m5M8vZsPOQCAAAAYIus8p5LP1FVH1heNnfrM4oelORT675nz3LsQcvnex+/w2laazcn+UKSY/fR+juq6syqurSqLr3uuutW+CcBAAAAcDB6h0svS/INSU5J8pkk/345Xht8b9vH8d7T3PFga+e31k5trZ163HHH7WPZAAAAAGymruFSa+2zrbVbWmtfTfL/Zu09kZK1Zxc9eN23Hp/k08vx4zc4fofTVNWuJEdl7WV4d9YCAAAAYJvY1XOiqnpga+0zy1+/N8mtv0nuLUl+u6r+Q5Kvy9obd1/cWrulqr5UVY9L8r4kz0nyG+tOc0aS9yT5gSTvbK21qnprkl9Z95K7pyT5hZ71jrT77AsO6vuvPe/0QSsBAAAAOPT2O1yqqt9J8sQk96uqPVn7DW5PrKpTsvYytWuTPD9JWmtXV9Xrk3woyc1JXtBau2VJnZW13zx39yQXLh9J8vIkv1VV12TtGUvPXlo3VNUvJ7lk+b5faq0d6BuLAwAAAHAI7He41Fr7oQ0Ov3wf339uknM3OH5pkm/e4PhNSZ51J61XJHnF/tYIAAAAwNZY5bfFAQAAALDDGS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG6GSwAAAAB0M1wCAAAAoJvhEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOi2a6sXwJ3bffYFB/y91553+sCVAAAAAGzMM5cAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAboZLAAAAAHQzXAIAAACg266tXgCH3u6zLzjg7732vNMHrgQAAACYnWcuAQAAANDNM5fYNJ4RBQAAADuPZy4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3fy2OKYw6jfR+Q13AAAAsBrPXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKDbrq1eAByOdp99wQF/77XnnT5wJQAAADCWZy4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG6GSwAAAAB0M1wCAAAAoJvhEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG6GSwAAAAB0M1wCAAAAoJvhEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAboZLAAAAAHTb73Cpql5RVZ+rqg+uO3ZMVb29qj62/Hn0uq/9QlVdU1Ufraqnrjv+mKq6avnaS6qqluN3q6rfXY6/r6p2rzvNGcvP+FhVnbFp/2oAAAAANsWBPHPpVUlO2+vY2Une0Vp7WJJ3LH9PVT0yybOTnLic5qVVdcRympclOTPJw5aPW5vPS3Jja+2hSV6c5FeX1jFJzknybUkem+Sc9UMsAAAAALbefodLrbV3Jblhr8PPSPLq5fNXJ3nmuuOva619pbX2iSTXJHlsVT0wyX1aa+9prbUkr9nrNLe23pjkycuzmp6a5O2ttRtaazcmeXv+7pALAAAAgC3U+55LD2itfSZJlj/vvxx/UJJPrfu+PcuxBy2f7338Dqdprd2c5AtJjt1H6++oqjOr6tKquvS6667r/CcBAAAAcLA2+w29a4NjbR/He09zx4Otnd9aO7W1dupxxx13QAsFAAAAYHW9w6XPLi91y/Ln55bje5I8eN33HZ/k08vx4zc4fofTVNWuJEdl7WV4d9YCAAAAYJvY1Xm6tyQ5I8l5y59vXnf8t6vqPyT5uqy9cffFrbVbqupLVfW4JO9L8pwkv7FX6z1JfiDJO1trraremuRX1r2J91OS/ELneuGwsfvsCw74e6897/SBKwEAAIADGC5V1e8keWKS+1XVnqz9Brfzkry+qp6X5JNJnpUkrbWrq+r1ST6U5OYkL2it3bKkzsrab567e5ILl48keXmS36qqa7L2jKVnL60bquqXk1yyfN8vtdb2fmNxAAAAALbQfodLrbUfupMvPflOvv/cJOducPzSJN+8wfGbsgynNvjaK5K8Yn9rBAAAAGBrbPYbegMAAACwgxguAQAAANDNcAkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuu7Z6AcD2sPvsCw74e6897/SBKwEAAGAmnrkEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAt11bvQDg8Lb77AsO+HuvPe/0gSsBAABgBM9cAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALr5bXHAtPwmOgAAgK3nmUsAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALrt2uoFAGw3u8++4IC/99rzTh+4EgAAgO3PM5cAAAAA6Ga4BAAAAEA3L4sDOES83A4AADgceeYSAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOi2a6sXAMDqdp99wQF/77XnnT5wJQAAwE7jmUsAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG6GSwAAAAB0M1wCAAAAoJvhEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAbru2egEAbF+7z77ggL/32vNOH7gSAABguzJcAuCQM7QCAIDDh5fFAQAAANDNM5cAOGwczDOiEs+KAgCAzeCZSwAAAAB0M1wCAAAAoJuXxQHAAfAm5AAAsDHPXAIAAACgm+ESAAAAAN28LA4AtpCX2wEAMDvDJQA4DB3M0Co5uMHVqIGYQRsAwJy8LA4AAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOi2a6sXAAAw0u6zLzio77/2vNMHrQQA4PDkmUsAAAAAdDNcAgAAAKCbl8UBAHQ6mJfcebkdAHC48swlAAAAALoZLgEAAADQzXAJAAAAgG7ecwkAYJvxXk4AwEw8cwkAAACAbp65BACwQxzMM6ISz4oCAA6M4RIAACsb9VI+LxEEgO3Py+IAAAAA6LbScKmqrq2qq6rqyqq6dDl2TFW9vao+tvx59Lrv/4WquqaqPlpVT113/DFL55qqeklV1XL8blX1u8vx91XV7lXWCwAAAMDm2oxnLj2ptXZKa+3U5e9nJ3lHa+1hSd6x/D1V9cgkz05yYpLTkry0qo5YTvOyJGcmedjycdpy/HlJbmytPTTJi5P86iasFwAAAIBNMuJlcc9I8url81cneea6469rrX2ltfaJJNckeWxVPTDJfVpr72mttSSv2es0t7bemOTJtz6rCQAAAICtt+pwqSV5W1VdVlVnLsce0Fr7TJIsf95/Of6gJJ9ad9o9y7EHLZ/vffwOp2mt3ZzkC0mOXXHNAAAAAGySVX9b3ONba5+uqvsneXtVfWQf37vRM47aPo7v6zR3DK8Nts5Mkoc85CH7XjEAAAAAm2al4VJr7dPLn5+rqt9P8tgkn62qB7bWPrO85O1zy7fvSfLgdSc/Psmnl+PHb3B8/Wn2VNWuJEcluWGDdZyf5PwkOfXUU//O8AkAANbbffYFB/y91553+rZpA8B21P2yuKq6Z1Xd+9bPkzwlyQeTvCXJGcu3nZHkzcvnb0ny7OU3wJ2QtTfuvnh56dyXqupxy/spPWev09za+oEk71zelwkAAACAbWCVZy49IMnvL++vvSvJb7fW/qiqLkny+qp6XpJPJnlWkrTWrq6q1yf5UJKbk7ygtXbL0joryauS3D3JhctHkrw8yW9V1TVZe8bSs1dYLwAAAACbrHu41Fr78yQnb3D8+iRPvpPTnJvk3A2OX5rkmzc4flOW4RQAAOxko15u52V8AKxq1d8WBwAAAMAOZrgEAAAAQDfDJQAAAAC6rfKG3gAAAHfK+zkB7AyeuQQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG7e0BsAAJiKNwoH2F48cwkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADo5rfFAQAAxG+hA+jlmUsAAAAAdPPMJQAAgME8Kwo4nHnmEgAAAADdDJcAAAAA6OZlcQAAAJMa9XI7L+MDDobhEgAAAIeMwRUcfrwsDgAAAIBunrkEAADA9DwjCraO4RIAAADcie3wvlYH24ZDzXAJAAAADiOexcWhZrgEAAAA7JehFXfGG3oDAAAA0M0zlwAAAIAt4/2n5me4BAAAAByWvJTv0PCyOAAAAAC6eeYSAAAAwEEY9YyokS8RHPksLs9cAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG6GSwAAAAB0M1wCAAAAoJvhEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG6GSwAAAAB0M1wCAAAAoJvhEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdDNcAgAAAKCb4RIAAAAA3QyXAAAAAOhmuAQAAABAN8MlAAAAALoZLgEAAADQzXAJAAAAgG6GSwAAAAB0M1wCAAAAoJvhEgAAAADdDJcAAAAA6Ga4BAAAAEA3wyUAAAAAuhkuAQAAANDNcAkAAACAboZLAAAAAHQzXAIAAACgm+ESAAAAAN0MlwAAAADoZrgEAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuUwyXquq0qvpoVV1TVWdv9XoAAAAAWLPth0tVdUSS30zy3UkemeSHquqRW7sqAAAAAJIJhktJHpvkmtban7fW/ibJ65I8Y4vXBAAAAECSaq1t9Rr2qap+IMlprbV/uvz9R5J8W2vtJ9Z9z5lJzlz++vAkHz2IH3G/JJ/fpOXO3B3Znq07sq07vj1bd2R7tu7I9mzdke3ZuiPbs3VHtmfrjmzP1h3Z1h3fnq07sj1bd2R7tu7I9mzdke3t0P17rbXjNvrCrs1bzzC1wbE7TMRaa+cnOb8rXnVpa+3UntMeTt2R7dm6I9u649uzdUe2Z+uObM/WHdmerTuyPVt3ZHu27sj2bN2Rbd3x7dm6I9uzdUe2Z+uObM/WHdne7t0ZXha3J8mD1/39+CSf3qK1AAAAALDODMOlS5I8rKpOqKqvSfLsJG/Z4jUBAAAAkAleFtdau7mqfiLJW5MckeQVrbWrN/FHdL2c7jDsjmzP1h3Z1h3fnq07sj1bd2R7tu7I9mzdke3ZuiPbs3VHtmfrjmzrjm/P1h3Znq07sj1bd2R7tu7I9rbubvs39AYAAABg+5rhZXEAAAAAbFOGSwAAAAB0M1wCAAAAoJvhEgAAAADdDJe4TVXdf6vXcLCq6titXsPhzh6P57rH4WLGy/Io9uJ2bi/Gs8ccSi5vh8ao+xH3T+Pt1OvIjhouVdWpVfXHVfVfqurBVfX2qvpCVV1SVY9aoXvaus+PqqqXV9UHquq3q+oB23TNx+z1cWySi6vq6Ko6ZoXu11bVy6rqN6vq2Kp6YVVdVVWvr6oH9naX9nlVdb/l81Or6s+TvK+q/qKqvnOF7n2q6v+uqt+qqh/e62svXWXN+/iZF654+ntV1S9V1dXL5eG6qnpvVT13xe6QPd7Pz1x1L4acf6Muy6Oue0v7qOU8/EhVXb98fHg5dt8VutNd9ya8XIw674bcViztUfcjo27fRt5Xj9qLUY8BZryvvryq/k1VfcMq6ztU3ZHtgXs88joy223ysNvO/fzc7sdEs13elt6Q+779/MxV9njG+9RR3VGXt2GXiYG3F6Nuk0feVw+5LO+o4VKSlyb5t0kuSPJnSf5Ta+2oJGcvX+v1K+s+//dJPpPkaUkuSfKfVugm49b8+SSXrfu4NMmDkly+fN7rVUk+lORTSf44yZeTnJ7k3Un+4wrdJDm9tfb55fN/l+QHW2sPTfIPs7bvvV6ZpJL8XpJnV9XvVdXdlq89rjdaVY++k4/HJDllhfUmyWuT/HmSpyb5xSQvSfIjSZ5UVb+yrxPux5A9HrwXQ86/jLssj7ruJcnrk9yY5ImttWNba8cmedJy7A0rdKe67g1uvypjLhejzrtRtxXJuMvyqDWPvK8etRejHgO8KvPdVx+d5L5J/riqLq6qn62qr1ttqUO7I9uj9njkdWS22+Rht50DHxPNdnlLBt33DdzjGe9TR3VHXd5GPR5Kxt1ejLqOvCrj7qvHXJZbazvmI8kV6z7/5J19raN7+brPr9zra1f2dgev+V8k+aMkJ6079onBe7zqXnwkya7l8/fu9bWrVujufZ796yT/Lcmx68/bju4tSd6ZtRuDvT++vOJevH+vv1+y/HmXJB/Zhns8ci9GnX9XrPt80y7Lo657S+ejPV/bwsvFkPNu0svFqPNuyG3F0hh1PzLq9m3kffVW3KdeMai76l6Mur1Yf/49IWvDtb9c7kfO3G7dwWs+FHt85V5fW/VysXdvu98mj7ztHPKYaLbL23L6Ufd9o/Z4xvvUUd1Rl7chl4nl9Fes+3wzby9G3SYPWe9y+iGX5V3ZWW6qqqckOSpJq6pnttb+6/J0tVtW6N6/qn4ua/9H5j5VVW05d7L6s8OGrLm19qKqel2SF1fVp5Kck6Tt52QHYv2/9zX7+FqP30zyh1V1XpI/qqpfS/KmJE9OcuUK3btV1V1aa19NktbauVW1J8m7ktxrhe6Hkzy/tfaxvb+w7Pkq/qqqvr219qdV9bQkNyRJa+2rVVUrdEft8ci9GHX+DbksD7zuJclfVNW/TPLq1tpnk6TWXsrw3Kz9X49es133RrZH3caNOu9G3VaMvCyPWvOw++qBezHqccuM99W3aa29O8m7q+ons/Z/hn8wyfnbtTugPWqPRz6ene02edhtZ8Y+JkoyzeUtGXffN2qPp7tPHfy489afsZmXt1GXiWTc7cWo68jI++oxl+VVJl6zfSQ5Oclbk1yY5JuS/HqS/5nk6iSPX6F7zl4fxy3HvzbJa7bjmvf6GU9L8t4kf7kJrV9Kcq8Njj80yRs3of/EJL+b5IokVy37cmaSu67Q/LdJvmuD46cl+dgK3R9I8vA7+dozV9yHb0ly8XJZ+NMk37gcPy7JT23DPR65F6POv6GX5aW1ade9pXd0kl/N2v9BuWH5+PBy7JhNvlz84Xa97s14uRh13o28rdjr52zm/cita/7CsuaHb8aaM/C+euBe7OsxwN9fobsV99XPX/H24nWbdR4diu4haD9pwB6PfDw7223yyMdZQx4TzXZ5W7qj7vtG7fHe90/b/j51VHfgbfL6y8SNy8dmPZYddt836H5v5HqHXJZribDDVdXdk3xDa+2DW70W2Elc9zhcuCzfzl4AsIpR9yPunxhpp72h952qqn+ywmm/pqqeU1Xftfz9h6vq/6mqF1TVXTdvlX/n53aveTn9N1XVz1fVS5Kcl+T0qnrE5qzuDj9n76fxbbt2VT22qr51+fyRVfVzVfU9m9D9pqp6clXda6/jp93ZaTbhZ656ufiGqvoXVfXrVfXvq+qfVdVRKza/rarus3x+96r6xar6g6r61VXb+/m5K+3FiO7etxdJvjfJP9vutxcjuiNvO0dd5qrqp6rq+FXWdii7+/mZK18mRt2PjLgd2uBnfPtyW/+UzewuHpPkKZvdXrfmf7iZ3b1+xqbeVmzWPo+8Hxn1GGA/P3Pb3XYe6vvqzXgMtxWPLzbhcdao+6eR96mjHiffuuYnb+aaB99eHKrH9pt2PzLwvnrIXqxf7/I44Oc3ab1T3XaOvE4vvU0//zxzaVFVn2ytPaTztK9NsivJPbL29Nl75fbXWVZr7YzNWudeP3eVNf98kh9K8roke5bDxyd5dtae5nheZ/ctex/K2lNp35kkrbWn93RHtqvqnCTfnbXz8O1Jvi3JnyT5riRvba2d29n9qSQvyNpTOU9J8tOttTcvX7u8tfbonu4B/NxVLhc/lbWnzF6U5Huy9jrhG7M2APnx1tqfdHavTnJya+3mqjo/yV8neWPWriMnt9a+r6d7AD+3ey9GdWe8vRjVHbkXoy5zVfWFJH+V5ONJfifJG1pr1/Wuc3R3Pz9zpcvEwPuRUbdDF7fWHrt8/mNZu33+/SRPSfIHvesd2R655n38zFUvF6P2YtR1eshjgAP4udvutnPkffXAx3CH/PHFJlxHRl2WR10uhl1HZrssj3xsP/C2c+R99abvxaj1Lu3ZLm8jHyePuSyv8lq92T6SfOBOPq5K8pVVusufu5J8NskRy9/r1q9twzX/92zwGtAkX5PVXvt+eZL/krXXnX7n8udnls+/c8W9GNJe9vKIrF1xv5jkPsvxu69y/i3dey2f787ar/v86eXvV2zTy8VV6y6/90jyJ8vnD1llzUk+vP583OtrV27TvXB7MfdeDLnMZe219HfJ2oO9lye5Lmu/heWMJPfeht0h593SHnU/Mup26Ip1n1+S299P5p5Z/TceDWkP7I68XIxa86jr9JDHACP3edRt56g9vrWXMY/hRl0uRl5Hhq150OVi6HVk0JpH3l6Memx/xbrPN/O2c+R99abvxaj1Tnp5G/k4ecj5t9N+W9wDkjw1a/8HdL1K8mcrdO9SVV+TtSv/PbL2W11uSHK3JKs+ZW3Umr+a5OuS/MVexx+4fK3XqUl+Omu/Yvb/aK1dWVVfbq1dtEJzdPvm1totSf66qj7eWvtikrTWvlxVq+zFEa21/7W0rq2qJyZ5Y1X9vaydf6sYdblI1m7Absna5ffeSdJa++SKT7/8YFX9k9baK5O8v6pOba1dWlXfmORvV1zvqL1wezG+O3IvRl3mWlv7bUdvS/K25Xrx3Vn7v2wvytobIW6n7sjbilH3I8mY26G7VNXRWRviVVueGdZa+6uqunnF9Y5qj+qOvFyMWvOo6/SoxwDJfLedI++rRz2GG7XmkdeRUWsedbkYeR2Z7bI88rH9qNvOUffVo/Zi5GOL2S5vIx8nDzn/dtpw6f/L2oTuyr2/UFV/skL35Vl7R/sjsnan+Yaq+vMkj8vaU/pWMWrNP5PkHVX1sdz+ax0fkrV3n/+J3ujyH0cvrqo3LH9+Npt0ORvY/puqukdr7a+z9vrmJEmtvUZ2lRuxv6yqU24971pr/6uq/lGSVyQ5aZUFZ9zl4j8nuaSq3pvkO7L2mxlSVcdl+RWVnf5pkl+vqn+T5PNJ3lNrvw71U8vXVjFqL9xejO+O3ItRl7k73OG21v42yVuSvKXW3iRzu3VHnXfJoPuRjLsdOirJZVnb61ZVX9ta+8tae6+BVf+jYFR7VHfk5WLUmkddp0c9Bkjmu+0cdl898DHcqDWPvI6MWvOoy8XI68hsl+WRj+1H3Xb+TMbcV4/ai5/JmPUm813eRj5OHnL+ec+lTVJVX5ckrbVPV9V9s/Y65E+21i7e0oXtQ1XdJcljkzwoazdae5Jcsvzfic36GacneXxr7V9tVnOz21V1t9baVzY4fr8kD2ytXdXZPT5r/7fnLzf42uNba/+tpztaVZ2Y5BFJPtha+8gmt++d5Ouz9oByT2vts5vZn8WMtxejjN6Lzb7MVdU3ttb++2as7VB0Rxt1PzLydmiDn3WPJA9orX1ilvbINY+yWWsecJ0e8hhgtJG3nYfivnqzHx/O+PhixJpHXC5GX0dmuixvxWP7zbjtHHFfPXIvRv436kyXt6U5ZL2jzj/DpU1UVQ/I2pWgJfn0Zt6xjWrP1h3Znq07sj1yzRv8rHvd+rTMFTv2YnD7cNjj5ecN2eft3D3Ue7wZZry8zbjmDX6W2+Tbf9aw2+TNMOMez7jmUWbbC5eL/f6sbf0YblTXdW+/P2vbnndLZ1P3YkcNl6rqW5Kcn7UNvDDJz7fWbly+dts79Hd0T0nyH7P2dMb/sRw+Pmvv6v7jrbXLV1jzo5K8bLPbe615T9amwpvd3ey9GNLeou5ZrbUrerpL+1BcLjb1/NvHz1z1t66M2osh3f38zCG/0W3V9uF0eVt+7rb7zXmjuiMvx7Pdpw7eC7dDt5/+sLm92IS9mO06MqQ745pHnXeD1zxVd9Y17+NnbsvHcKO6o/47Z8br3n5+5rY775bTn5IB599Oe8+llyZ5YZL3Zu31j39aVU9vrX08q70p1quSPL+19r71B6vqcUlemeTkFdqvHNR+1WTdke2t6L5qhW4y2eWiqn7uzr6UtV+ruYpRezGkO3IvBranurwtjSF7MVs34867ZL771JF74XbodlPdXgzei9muI6O6I9ujuqPOu2S+vRjVHdke0p3xMdzANb8qY/47Z7rr3oTnXTLo/LvLiouazb1aa3/UWvufrbUXZe1Nwf5o2cRVnsJ1z73PmCRprb03a+/uvopR7dm6I9uzdUe2R3V/JcnRWfutT+s/7pXVb4fsxfj2bHucjNuL2boj93i2+1S3ybdzm3y7kXvhOjK+Pao76rxL5tsLl4vbzfgYbrbHFzNe92Y775JBe7HTnrlUVXVUa+0LSdJa++Oq+v4kv5fkmBW6F1bVBUlek9vf1f7BSZ6T5I9WWfDA9mzdke3ZuiPbo7qXJ/mvrbXL9v5CVa362+Lsxfj2bHucjNuL2boj93i2+1S3ybdzm3y7kXvhOjK+Pao76rxL5tsLl4vbzfgYbrbHFzNe92Y775JBe7HT3nPph5P8+TKRW3/8IUn+z9baj63Q/u4kz8gd39X+La21P1xhyUPbs3VHtmfrjmyP6FbVw5Nc31r7/AZfe0Bb/bd32Ivx7Wn2eOkO2YvZusvpR+3xdPepbpNva7pNvr058rrnOnII2oMuF8POu6UzzV6M7I5sT3h74fFF5rzuzXjeLY3Nv+611nzs9ZHkN2bqzrhme2Ev7IW92OrujGuerTvjmu2FvbAX9mKruzOu2V7Yi8OhO+Oat9Ne7LT3XDpQj5+sO7I9W3dke7buyPZs3ZHt2boj27N1R7Z1x7dn645sz9Yd2Z6tO7I9W3dke7buyPZs3ZHt2boj27rj27N1D7ptuAQAAABAN8MlAAAAALoZLm2sJuuObM/WHdmerTuyPVt3ZHu27sj2bN2Rbd3x7dm6I9uzdUe2Z+uObM/WHdmerTuyPVt3ZHu27si27vj2bN2DbhsubezXJ+uObM/WHdmerTuyPVt3ZHu27sj2bN2Rbd3x7dm6I9uzdUe2Z+uObM/WHdmerTuyPVt3ZHu27si27vj2bN2Db496Z/HZPpKcP1N3xjXvtL1IckSS5yf55SSP3+tr/2bFdQ1rz7THs55/9mLevdA9PNe8avdwuo7Yi/Hd7brmrTjvZlyzy8Xm7fGotsuF7nZpb9fuqOtILYEdoaqOubMvJXl/a+347dQd2Z6tO7I9sPufk9wjycVJfiTJRa21n1u+dnlr7dE93ZHt2fZ4ZHu2PR7Zthe6h7I9W3dpT3UdsRfjuyPbs93WL6efas0uF3fozvh41uVC95C1Z+su7THXkR02XLolyV/kjq8dbMvfH9Ra+5rt1B3Znq07sj2w+4HW2rcsn+9K8tIk90vyQ0ne21p7VE93ZHu2PR7Znm2PR7bthe6hbM/WXdpTXUfsxfjuyPZst/Uzrtnl4g7dGR/PulzoHrL2bN2lPeZ63Tqf8jTjR5KPJXnInXztU9utO+Oa7cUdTvuRDY79X0n+W5KPrbgXQ9qz7fGM55+9mH4vdCdd8+C9mOo6Yi8O2V5MteZR592Ma3a5OGSXi1Hnn8uF7vRrnvG+eqe9ofevJTn6Tr72b7dhd2R7tu7I9qjupVV12voDrbVfSvLKJLtX6I5s/1rm2uOR7dn2eGTbXugeyvZs3WS+68iobmIvDkV7VHfk45Zfy1xr/rW4XNxqxsezLhe6h7I9WzcZdR1ZZeJ1uH4k+YczdWdcs72wF/bCXmx1d8Y1z9adcc32wl7YC3ux1d0Z12wv7MXh0J1xzdtpL3bUey4dqFXfnO5Qd0e2Z+uObM/WHdmerTuyPVt3ZHu27si27vj2bN2R7dm6I9uzdUe2Z+uObM/WHdmerTuyPVt3ZFt3fHu2bk97p70s7kDV/r9lW3VHtmfrjmzP1h3Znq07sj1bd2R7tu7Itu749mzdke3ZuiPbs3VHtmfrjmzP1h3Znq07sj1bd2Rbd3x7tu5Btw2XNjbq6VwjnyY225rtxfjuyPZs3ZHt2boj27N1R7Z1x7dn645sz9Yd2Z6tO7I9W3dke7buyPZs3ZHt2boj27rj27N1D7ptuAQAAABAN8OljV07WXdke7buyPZs3ZHt2boj27N1R7Zn645s645vz9Yd2Z6tO7I9W3dke7buyPZs3ZHt2boj27N1R7Z1x7dn6x58e9Q7i2/HjyTfmuRr1/39OUnenOQlSY7Zbt0Z12wv7IW9sBdb3Z1xzbN1Z1yzvbAX9sJebHV3xjXbC3txOHRnXPOUe7HKomb7SHL5rZuV5DuSfDrJ9yf55SRv3G7dGddsL+yFvbAXW92dcc2zdWdcs72wF/bCXmx1d8Y12wt7cTh0Z1zzlHuxyqJm+0jy/nWf/2aSF677+5XbrTvjmu2FvbAX9mKruzOuebbujGu2F/bCXtiLre7OuGZ7YS8Oh+6Ma55xL3baey4dUVW7ls+fnOSd6762a4Pv3+ruyPZs3ZHt2boj27N1R7Zn645sz9Yd2dYd356tO7I9W3dke7buyPZs3ZHt2boj27N1R7Zn645s645vz9Yd1l51UbP5nSQXVdXnk3w5ybuTpKoemuQL27A7sj1bd2R7tu7I9mzdke3ZuiPbs3VHtnXHt2frjmzP1h3Znq07sj1bd2R7tu7I9mzdke3ZuiPbuuPbs3WHtWt56tOOUVWPS/LAJG9rrf3Vcuwbk9yrtXb5duvOuGZ7Mb4745rtxfjujGu2F/N2Z1yzvRjfnXHN9mJ8d8Y124vx3RnXbC/m7c645tn2YscNl5Kkqk5K8k3LXz/cWvvgdu6ObM/WHdmerTuyPVt3ZHu27sj2bN2Rbd3x7dm6I9uzdUe2Z+uObM/WHdmerTuyPVt3ZHu27si27vj2bN0R7R01XKqqo7L2K/YenOQDSSrJSUk+meQZrbUvbqfujGu2F+O7M67ZXozvzrhmezFvd8Y124vx3RnXbC/Gd2dcs70Y351xzfZi3u6Ma55yL3bYcOklSf4myb9srX11OXaXJOcluXtr7Se3U3fGNduL8d0Z12wvxndnXLO9mLc745rtxfjujGu2F+O7M67ZXozvzrhmezFvd8Y1z7gX3b++bsaPJB9KsmuD47uy9jSwbdWdcc32wl7YC3ux1d0Z1zxbd8Y12wt7YS/sxVZ3Z1yzvbAXh0N3xjXPuBd3yc7yN621m/c+uBz7yjbsjmzP1h3Znq07sj1bd2R7tu7I9mzdkW3d8e3ZuiPbs3VHtmfrjmzP1h3Znq07sj1bd2R7tu7Itu749mzdYe1dKy1pPkdW1aOy9prC9SrJ3bZhd2R7tu7I9mzdke3ZuiPbs3VHtmfrjmzrjm/P1h3Znq07sj1bd2R7tu7I9mzdke3ZuiPbs3VHtnXHt2frDmvvtPdc+uN9fb219qTt1B3Znq07sj1bd2R7tu7I9mzdke3ZuiPbuuPbs3VHtmfrjmzP1h3Znq07sj1bd2R7tu7I9mzdkW3d8e3ZuiPbO2q4BAAAAMDm2mkvi0tV3T/JC5KcmKRl7c2sfrO19rnt2B3Znq07sj1bd2R7tu7I9mzdke3ZuiPbuuPbs3VHtmfrjmzP1h3Znq07sj1bd2R7tu7I9mzdkW3d8e3ZuqPaO+oNvavq8UkuWf76miT/Zfn84uVr26o7sj1bd2R7tu7I9mzdke3ZuiPbs3VHtnXHt2frjmzP1h3Znq07sj1bd2R7tu7I9mzdke3ZuiPbuuPbs3WHttsKv8Juto8k703yqA2On5LkfdutO+Oa7YW9sBf2Yqu7M655tu6Ma7YX9sJe2Iut7s64ZnthLw6H7oxrnnEvdtQzl5Lcp7V2xd4HW2tXJrn3NuyObM/WHdmerTuyPVt3ZHu27sj2bN2Rbd3x7dm6I9uzdUe2Z+uObM/WHdmerTuyPVt3ZHu27si27vj2bN1h7Z02XKqqOnqDg8dktb0Y1R3Znq07sj1bd2R7tu7I9mzdke3ZuiPbuuPbs3VHtmfrjmzP1h3Znq07sj1bd2R7tu7I9mzdkW3d8e3ZuuPavU95mvEjyZlZe23hd2ZtInfvJE9M8r4kz99u3RnXbC/shb2wF1vdnXHNs3VnXLO9sBf2wl5sdXfGNdsLe3E4dGdc85R7scqiZvxI8o+SvCvJ9cvHu5I8bbt2Z1yzvbAX9sJebHV3xjXP1p1xzfbCXtgLe7HV3RnXbC/sxeHQnXHNs+1FLWEAAAAAOGirvlZvOlX13VV1UVV9vqquWz7/nu3anXHN9mJ8d8Y124vx3RnXbC/m7c64Znsxvjvjmu3F+O6Ma7YX47szrtlezNudcc3T7cVmPKVqlo8kP5bk0iT/IMl9lo9/kOTiJGdut+6Ma7YX9sJe2Iut7s645tm6M67ZXtgLe2Evtro745rthb04HLozrnnKvVhlUbN9JPlQkmM2OH5skg9vt+6Ma7YX9sJe2Iut7s645tm6M67ZXtgLe2Evtro745rthb04HLozrnnGvdhpL4ur1toNex9srV2/Tbsj27N1R7Zn645sz9Yd2Z6tO7I9W3dkW3d8e7buyPZs3ZHt2boj27N1R7Zn645sz9Yd2Z6tO7KtO749W3dYe6cNl75YVSfvfXA59qVt2B3Znq07sj1bd2R7tu7I9mzdke3ZuiPbuuPbs3VHtmfrjmzP1h3Znq07sj1bd2R7tu7I9mzdkW3d8e3ZusPau1Za0nz+eZK3VNUrk1yWpCX51iRnJPnft2F3ZHu27sj2bN2R7dm6I9uzdUe2Z+uObOuOb8/WHdmerTuyPVt3ZHu27sj2bN2R7dm6I9uzdUe2dce3Z+sOa9fy2rodo6oekOQFSU5MUkmuTvKbrbW/3I7dke3ZuiPbs3VHtmfrjmzP1h3Znq07sq07vj1bd2R7tu7I9mzdke3ZuiPbs3VHtmfrjmzP1h3Z1h3fnq07qr2jhktV9ZDW2idn6Y5sz9Yd2Z6tO7I9W3dke7buyPZs3ZFt3fHt2boj27N1R7Zn645sz9Yd2Z6tO7I9W3dke7buyLbu+PZs3ZHtnfaeS//11k+q6vcm6I5sz9Yd2Z6tO7I9W3dke7buyPZs3ZFt3fHt2boj27N1R7Zn645sz9Yd2Z6tO7I9W3dke7buyLbu+PZs3WHtnTZcqnWff/0E3ZHt2boj27N1R7Zn645sz9Yd2Z6tO7KtO749W3dke7buyPZs3ZHt2boj27N1R7Zn645sz9Yd2dYd356tO6y904ZL7U4+367dke3ZuiPbs3VHtmfrjmzP1h3Znq07sq07vj1bd2R7tu7I9mzdke3ZuiPbs3VHtmfrjmzP1h3Z1h3fnq07rL3T3nPpliR/lbVJ3d2T/PWtX0rSWmv32U7dGddsL8Z3Z1yzvRjfnXHN9mLe7oxrthfjuzOu2V6M7864Znsxvjvjmu3FvN0Z1zzlXuyk4RIAAAAAm2unvSwOAAAAgE1kuAQAAABAN8MlAIAOVfVny5+7q+qHB/+sf1ZVz9ng+O6q+uDInw0AsD+7tnoBAAAzaq39/eXT3Ul+OMlvD/xZ/3FUGwBgVZ65BADQoar+1/LpeUmeUFVXVtXPVtURVfXvquqSqvpAVT1/+f4nVtVFVfX6qvrvVXVeVf3jqrq4qq6qqm/Yx896YVX9i+Xzx1TV+6vqPUleMPwfCgCwH4ZLAACrOTvJu1trp7TWXpzkeUm+0Fr71iTfmuTHquqE5XtPTvLTSU5K8iNJvrG19tgk/znJTx7gz3tlkp9qrf1vm/mPAADoZbgEALC5npLkOVV1ZZL3JTk2ycOWr13SWvtMa+0rST6e5G3L8auy9vK6faqqo5Lct7V20XLotzZx3QAAXbznEgDA5qokP9lae+sdDlY9MclX1h366rq/fzUH9riskrTVlwgAsHk8cwkAYDVfSnLvdX9/a5KzququSVJV31hV99yMH9Ra+59JvlBV374c+seb0QUAWIVnLgEArOYDSW6uqvcneVWSX8/aS9wur6pKcl2SZ27iz/snSV5RVX+dtUEWAMCWqtY8sxoAAACAPl4WBwAAAEA3L4sDANgmqupfJ3nWXoff0Fo7dyvWAwBwILwsDgAAAIBuXhYHAAAAQDfDJQAAAAC6GS4BAAAA0M1wCQAAAIBuhksAAAAAdPv/ATC1lj/l3EEJAAAAAElFTkSuQmCC\n", 710 | "text/plain": [ 711 | "
" 712 | ] 713 | }, 714 | "metadata": { 715 | "needs_background": "light" 716 | }, 717 | "output_type": "display_data" 718 | } 719 | ], 720 | "source": [ 721 | "item_code_demand = sales.groupby(['item_id']).sum().reset_index()\n", 722 | "item_code_demand = item_code_demand.sort_values(by=['demand'], ascending=False)\n", 723 | "\n", 724 | "item_code_demand[:50].plot.bar(x='item_id', y='demand', figsize=(20, 10)) \n" 725 | ] 726 | }, 727 | { 728 | "cell_type": "markdown", 729 | "id": "a5e6411d", 730 | "metadata": {}, 731 | "source": [ 732 | "### Add aggregated metrics - proved to be effective in current results " 733 | ] 734 | }, 735 | { 736 | "cell_type": "code", 737 | "execution_count": 9, 738 | "id": "85748ccd", 739 | "metadata": {}, 740 | "outputs": [ 741 | { 742 | "data": { 743 | "text/plain": [ 744 | "5739000" 745 | ] 746 | }, 747 | "execution_count": 9, 748 | "metadata": {}, 749 | "output_type": "execute_result" 750 | } 751 | ], 752 | "source": [ 753 | "len(sales)" 754 | ] 755 | }, 756 | { 757 | "cell_type": "code", 758 | "execution_count": 10, 759 | "id": "4cb551b0", 760 | "metadata": {}, 761 | "outputs": [], 762 | "source": [ 763 | "aggregated_metrics = sales.sort_values(by=['item_id', 'store_id', 'state_id', 'timestamp'])" 764 | ] 765 | }, 766 | { 767 | "cell_type": "code", 768 | "execution_count": 11, 769 | "id": "c83c162d", 770 | "metadata": {}, 771 | "outputs": [ 772 | { 773 | "ename": "KeyboardInterrupt", 774 | "evalue": "", 775 | "output_type": "error", 776 | "traceback": [ 777 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 778 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 779 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'rolling_mean_t24'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'item_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'store_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'state_id'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'demand'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshift\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrolling\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m180\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0magg_demands\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maggregated_metrics\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 780 | "\u001b[0;32m\u001b[0m in \u001b[0;36magg_demands\u001b[0;34m(data_df)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0magg_demands\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'rolling_mean_t1'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'item_id'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'store_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'state_id'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'demand'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshift\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrolling\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m7\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mdata_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'rolling_std_t1'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'item_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'store_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'state_id'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'demand'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshift\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrolling\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m7\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'rolling_mean_t2'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'item_id'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'store_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'state_id'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'demand'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshift\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrolling\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m14\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'rolling_std_t2'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'item_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'store_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'state_id'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'demand'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshift\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrolling\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m14\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 781 | "\u001b[0;32m~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/groupby/generic.py\u001b[0m in \u001b[0;36mtransform\u001b[0;34m(self, func, engine, engine_kwargs, *args, **kwargs)\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 493\u001b[0m return self._transform_general(\n\u001b[0;32m--> 494\u001b[0;31m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine_kwargs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mengine_kwargs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 495\u001b[0m )\n\u001b[1;32m 496\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 782 | "\u001b[0;32m~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/groupby/generic.py\u001b[0m in \u001b[0;36m_transform_general\u001b[0;34m(self, func, engine, engine_kwargs, *args, **kwargs)\u001b[0m\n\u001b[1;32m 535\u001b[0m \u001b[0mNUMBA_FUNC_CACHE\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcache_key\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnumba_func\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 536\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 537\u001b[0;31m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 538\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 539\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mABCDataFrame\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mABCSeries\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 783 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0magg_demands\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'rolling_mean_t1'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'item_id'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'store_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'state_id'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'demand'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshift\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrolling\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m7\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mdata_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'rolling_std_t1'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'item_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'store_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'state_id'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'demand'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshift\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrolling\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m7\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'rolling_mean_t2'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'item_id'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'store_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'state_id'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'demand'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshift\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrolling\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m14\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'rolling_std_t2'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'item_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'store_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'state_id'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'demand'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshift\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrolling\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m14\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 784 | "\u001b[0;32m~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/window/rolling.py\u001b[0m in \u001b[0;36mstd\u001b[0;34m(self, ddof, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2100\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mddof\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2101\u001b[0m \u001b[0mnv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalidate_rolling_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"std\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2102\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mddof\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mddof\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2103\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2104\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mSubstitution\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"rolling\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mversionadded\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 785 | "\u001b[0;32m~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/window/rolling.py\u001b[0m in \u001b[0;36mstd\u001b[0;34m(self, ddof, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1546\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"std\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1547\u001b[0m \u001b[0mddof\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mddof\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1548\u001b[0;31m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1549\u001b[0m )\n\u001b[1;32m 1550\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 786 | "\u001b[0;32m~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/window/rolling.py\u001b[0m in \u001b[0;36m_apply\u001b[0;34m(self, func, center, require_min_periods, floor, is_weighted, name, use_numba_cache, **kwargs)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_along_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcalc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 587\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 588\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcalc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 589\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 590\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 787 | "\u001b[0;32m~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/window/rolling.py\u001b[0m in \u001b[0;36mcalc\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 574\u001b[0m \u001b[0mclosed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclosed\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 575\u001b[0m )\n\u001b[0;32m--> 576\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstart\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin_periods\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 577\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 578\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 788 | "\u001b[0;32m~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/window/rolling.py\u001b[0m in \u001b[0;36mzsqrt_func\u001b[0;34m(values, begin, end, min_periods)\u001b[0m\n\u001b[1;32m 1537\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1538\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mzsqrt_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbegin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin_periods\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1539\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mzsqrt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwindow_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbegin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin_periods\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mddof\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mddof\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1540\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1541\u001b[0m \u001b[0;31m# ddof passed again for compat with groupby.rolling\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 789 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 790 | ] 791 | } 792 | ], 793 | "source": [ 794 | "def agg_demands(data_df): \n", 795 | " data_df['rolling_mean_t1'] = data_df.groupby(['item_id','store_id', 'state_id'])['demand'].transform(lambda x: x.shift(60).rolling(7).mean())\n", 796 | " data_df['rolling_std_t1'] = data_df.groupby(['item_id', 'store_id', 'state_id'])['demand'].transform(lambda x: x.shift(60).rolling(7).std())\n", 797 | " data_df['rolling_mean_t2'] = data_df.groupby(['item_id','store_id', 'state_id'])['demand'].transform(lambda x: x.shift(60).rolling(14).mean())\n", 798 | " data_df['rolling_std_t2'] = data_df.groupby(['item_id', 'store_id', 'state_id'])['demand'].transform(lambda x: x.shift(60).rolling(14).std())\n", 799 | " data_df['rolling_mean_t4'] = data_df.groupby(['item_id','store_id', 'state_id'])['demand'].transform(lambda x: x.shift(60).rolling(30).mean())\n", 800 | " data_df['rolling_std_t4'] = data_df.groupby(['item_id', 'store_id', 'state_id'])['demand'].transform(lambda x: x.shift(60).rolling(30).std())\n", 801 | " data_df['rolling_mean_t12'] = data_df.groupby(['item_id', 'store_id', 'state_id'])['demand'].transform(lambda x: x.shift(60).rolling(90).mean())\n", 802 | " data_df['rolling_mean_t24'] = data_df.groupby(['item_id', 'store_id', 'state_id'])['demand'].transform(lambda x: x.shift(60).rolling(180).mean())\n", 803 | "\n", 804 | "agg_demands(aggregated_metrics) \n", 805 | " " 806 | ] 807 | }, 808 | { 809 | "cell_type": "code", 810 | "execution_count": null, 811 | "id": "e985f701", 812 | "metadata": {}, 813 | "outputs": [], 814 | "source": [ 815 | "def price_aug(data): \n", 816 | " data['lag_price_t1'] = data.groupby(['item_id','store_id', 'state_id'])['sell_price'].transform(lambda x: x.shift(1))\n", 817 | " data['price_change_t1'] = (data['lag_price_t1'] - data['sell_price']) / (data['lag_price_t1'])\n", 818 | " data['rolling_price_max_t365'] = data.groupby(['item_id','store_id', 'state_id'])['sell_price'].transform(lambda x: x.shift(1).rolling(365).max())\n", 819 | " data['price_change_t365'] = (data['rolling_price_max_t365'] - data['sell_price']) / (data['rolling_price_max_t365'])\n", 820 | " data['rolling_price_std_t7'] = data.groupby(['item_id','store_id', 'state_id'])['sell_price'].transform(lambda x: x.rolling(7).std())\n", 821 | " data['rolling_price_std_t30'] = data.groupby(['item_id','store_id', 'state_id'])['sell_price'].transform(lambda x: x.rolling(30).std())\n", 822 | " data.drop(['rolling_price_max_t365', 'lag_price_t1'], inplace = True, axis = 1)\n", 823 | " \n", 824 | "price_aug(related_ts) " 825 | ] 826 | }, 827 | { 828 | "cell_type": "code", 829 | "execution_count": null, 830 | "id": "a24a8f0a", 831 | "metadata": {}, 832 | "outputs": [], 833 | "source": [ 834 | "\n", 835 | "related_ts_aug_df = aggregated_metrics[['timestamp','item_id','store_id', 'state_id', 'rolling_mean_t1', 'rolling_mean_t2', 'rolling_mean_t4', 'rolling_mean_t12', 'rolling_mean_t24']]\n" 836 | ] 837 | }, 838 | { 839 | "cell_type": "code", 840 | "execution_count": null, 841 | "id": "cfb0ff8d", 842 | "metadata": {}, 843 | "outputs": [], 844 | "source": [ 845 | "related_ts = related_ts.merge(related_ts_aug_df, on=['item_id', 'timestamp', 'store_id', 'state_id'])" 846 | ] 847 | }, 848 | { 849 | "cell_type": "code", 850 | "execution_count": null, 851 | "id": "70f7e1c7", 852 | "metadata": {}, 853 | "outputs": [], 854 | "source": [ 855 | "len(related_ts_aug_df) " 856 | ] 857 | }, 858 | { 859 | "cell_type": "code", 860 | "execution_count": null, 861 | "id": "5088a967", 862 | "metadata": {}, 863 | "outputs": [], 864 | "source": [ 865 | "related_ts = related_ts.fillna(0)\n", 866 | "related_ts.to_csv('flattened_rts_agg_metrics.csv', index=False)" 867 | ] 868 | }, 869 | { 870 | "cell_type": "code", 871 | "execution_count": null, 872 | "id": "c0fcee7d", 873 | "metadata": { 874 | "scrolled": true 875 | }, 876 | "outputs": [], 877 | "source": [ 878 | "related_ts_aug_df" 879 | ] 880 | }, 881 | { 882 | "cell_type": "markdown", 883 | "id": "0c274f85", 884 | "metadata": {}, 885 | "source": [ 886 | "### Preparing clustering data - not included in the first phase " 887 | ] 888 | }, 889 | { 890 | "cell_type": "code", 891 | "execution_count": null, 892 | "id": "9ce38058", 893 | "metadata": {}, 894 | "outputs": [], 895 | "source": [ 896 | "items = sales[['item_id']].drop_duplicates()\n", 897 | "items" 898 | ] 899 | }, 900 | { 901 | "cell_type": "code", 902 | "execution_count": null, 903 | "id": "66524c15", 904 | "metadata": {}, 905 | "outputs": [], 906 | "source": [ 907 | "store_ids = sales[['store_id']].drop_duplicates()\n", 908 | "store_ids" 909 | ] 910 | }, 911 | { 912 | "cell_type": "code", 913 | "execution_count": null, 914 | "id": "6438f59f", 915 | "metadata": {}, 916 | "outputs": [], 917 | "source": [ 918 | "import math \n", 919 | "import random \n", 920 | "vec_index = {} \n", 921 | "vectors = [] \n", 922 | "rows = [] \n", 923 | "j = 0 \n", 924 | "\n", 925 | "sampled_items = items['item_id'].values\n", 926 | "random.shuffle(sampled_items)\n", 927 | "\n", 928 | "\n", 929 | "for item_id in sampled_items[:300]: \n", 930 | " for store_id in ['CA_1', 'TX_1', 'WI_1']:\n", 931 | " cur_vec = sales[(sales['item_id']==item_id) & (sales['store_id']==store_id)] \n", 932 | " vec = cur_vec.sort_values(by=['timestamp']).fillna(0.0)['demand'].tolist()[:360]\n", 933 | " vectors.append(vec)\n", 934 | " vec_index[(item_id, store_id)] = j\n", 935 | " j += 1 \n", 936 | "\n" 937 | ] 938 | }, 939 | { 940 | "cell_type": "code", 941 | "execution_count": null, 942 | "id": "f2cce640", 943 | "metadata": {}, 944 | "outputs": [], 945 | "source": [ 946 | "import numpy as np \n", 947 | "\n", 948 | "vectors = np.array(vectors)" 949 | ] 950 | }, 951 | { 952 | "cell_type": "code", 953 | "execution_count": null, 954 | "id": "101a273a", 955 | "metadata": {}, 956 | "outputs": [], 957 | "source": [ 958 | "!pip install tslearn" 959 | ] 960 | }, 961 | { 962 | "cell_type": "code", 963 | "execution_count": null, 964 | "id": "0c6a3d70", 965 | "metadata": {}, 966 | "outputs": [], 967 | "source": [ 968 | "from tslearn.utils import to_time_series_dataset\n", 969 | "from tslearn.preprocessing import TimeSeriesScalerMeanVariance\n", 970 | "from tslearn.clustering import TimeSeriesKMeans, silhouette_score" 971 | ] 972 | }, 973 | { 974 | "cell_type": "code", 975 | "execution_count": null, 976 | "id": "37ccd359", 977 | "metadata": {}, 978 | "outputs": [], 979 | "source": [ 980 | "\n", 981 | "# normalize time series to zero mean and unit variance\n", 982 | "X_train = TimeSeriesScalerMeanVariance().fit_transform(vectors)\n", 983 | "# X_train = np.array([vectors])\n", 984 | "print(X_train.shape)" 985 | ] 986 | }, 987 | { 988 | "cell_type": "code", 989 | "execution_count": null, 990 | "id": "bd9cc95b", 991 | "metadata": {}, 992 | "outputs": [], 993 | "source": [ 994 | "np.array([vectors])" 995 | ] 996 | }, 997 | { 998 | "cell_type": "code", 999 | "execution_count": null, 1000 | "id": "1d047909", 1001 | "metadata": {}, 1002 | "outputs": [], 1003 | "source": [ 1004 | "import os\n", 1005 | "# create required directory structure\n", 1006 | "!rm -rf tsl\n", 1007 | "dir_paths = ['./tsl', './tsl/models', './tsl/plots']\n", 1008 | "\n", 1009 | "for dir_path in dir_paths:\n", 1010 | " if not os.path.exists(dir_path):\n", 1011 | " os.makedirs(dir_path)" 1012 | ] 1013 | }, 1014 | { 1015 | "cell_type": "code", 1016 | "execution_count": null, 1017 | "id": "046b0bbb", 1018 | "metadata": {}, 1019 | "outputs": [], 1020 | "source": [ 1021 | "# algorithm configuration\n", 1022 | "algo = \"DTW_kmeans\"\n", 1023 | "metric = \"dtw\"\n", 1024 | "\n", 1025 | "# cluster configuration\n", 1026 | "N_CLUSTERS = 3\n", 1027 | "\n", 1028 | "model= TimeSeriesKMeans(n_clusters=N_CLUSTERS,\n", 1029 | " metric=metric,\n", 1030 | " n_jobs=-1,\n", 1031 | " random_state=0)\n", 1032 | "\n", 1033 | "y_pred = model.fit_predict(X_train)\n", 1034 | "\n", 1035 | "model.to_pickle(f\"./tsl/models/{algo}.pkl\")" 1036 | ] 1037 | }, 1038 | { 1039 | "cell_type": "code", 1040 | "execution_count": null, 1041 | "id": "7ad88e5f", 1042 | "metadata": {}, 1043 | "outputs": [], 1044 | "source": [ 1045 | "silhouette_score(X_train, y_pred, metric=\"dtw\")\n" 1046 | ] 1047 | }, 1048 | { 1049 | "cell_type": "code", 1050 | "execution_count": null, 1051 | "id": "05bf1dd1", 1052 | "metadata": {}, 1053 | "outputs": [], 1054 | "source": [ 1055 | "np.save(f\"./tls_{algo}_cluster_labels\", y_pred)" 1056 | ] 1057 | }, 1058 | { 1059 | "cell_type": "code", 1060 | "execution_count": null, 1061 | "id": "a1c74ddc", 1062 | "metadata": {}, 1063 | "outputs": [], 1064 | "source": [ 1065 | "%%time\n", 1066 | "import matplotlib.pyplot as plt\n", 1067 | "\n", 1068 | "for yi in range(N_CLUSTERS):\n", 1069 | " X_sub = X_train[y_pred == yi]\n", 1070 | " ts_cnt = pd.Series(y_pred[y_pred == yi]).shape[0]\n", 1071 | " fig = plt.figure()\n", 1072 | " plt.title(f\"{algo} | Cluster ID: {yi} | TS Count: {ts_cnt}\")\n", 1073 | " for xx in X_sub:\n", 1074 | " plt.plot(xx.ravel(), color='xkcd:sky blue', alpha=0.25)\n", 1075 | " fig.savefig(f\"./tsl/plots/{algo}_cls_lbl_{yi}.png\", dpi=150)\n", 1076 | " plt.show()\n", 1077 | " plt.close()" 1078 | ] 1079 | }, 1080 | { 1081 | "cell_type": "code", 1082 | "execution_count": null, 1083 | "id": "5049329a", 1084 | "metadata": {}, 1085 | "outputs": [], 1086 | "source": [ 1087 | "cluster_df = sales.copy()\n", 1088 | "cluster_df" 1089 | ] 1090 | }, 1091 | { 1092 | "cell_type": "code", 1093 | "execution_count": null, 1094 | "id": "896e288c", 1095 | "metadata": {}, 1096 | "outputs": [], 1097 | "source": [ 1098 | "cluster_pred = [] \n", 1099 | "for index, row in cluster_df.iterrows():\n", 1100 | " item_id = row['item_id']\n", 1101 | " store_id = row['store_id']\n", 1102 | " if (item_id, store_id) in vec_index:\n", 1103 | " cluster_pred.append(y_pred[vec_index[(item_id, store_id)]])\n", 1104 | " else:\n", 1105 | " cluster_pred.append(-1)\n", 1106 | "\n", 1107 | " \n", 1108 | "cluster_df['cluster'] = np.array(cluster_pred) " 1109 | ] 1110 | }, 1111 | { 1112 | "cell_type": "code", 1113 | "execution_count": null, 1114 | "id": "b74d59bb", 1115 | "metadata": {}, 1116 | "outputs": [], 1117 | "source": [ 1118 | "cluster_df" 1119 | ] 1120 | }, 1121 | { 1122 | "cell_type": "code", 1123 | "execution_count": null, 1124 | "id": "aecb100a", 1125 | "metadata": {}, 1126 | "outputs": [], 1127 | "source": [ 1128 | "cluster_df" 1129 | ] 1130 | }, 1131 | { 1132 | "cell_type": "code", 1133 | "execution_count": null, 1134 | "id": "42266806", 1135 | "metadata": {}, 1136 | "outputs": [], 1137 | "source": [ 1138 | "cluster2_f = cluster_df[cluster_df['cluster']==2].drop(['cluster'], axis=1)[['item_id','store_id','state_id','timestamp', 'demand']]\n", 1139 | "print(cluster2_f['demand'].sum() // len(cluster2_f['item_id'].unique()))\n", 1140 | "cluster2_f.to_csv('cluster_2.csv', index=False)\n", 1141 | "cluster1_f = cluster_df[cluster_df['cluster']==1].drop(['cluster'], axis=1)[['item_id', 'store_id', 'state_id','timestamp', 'demand']]\n", 1142 | "print(cluster1_f['demand'].sum() // len(cluster1_f['item_id'].unique()))\n", 1143 | "cluster1_f.to_csv('cluster_1.csv', index=False)\n", 1144 | "cluster0_f = cluster_df[cluster_df['cluster']==0].drop(['cluster'], axis=1)[['item_id', 'store_id','state_id','timestamp', 'demand']]\n", 1145 | "print(cluster0_f['demand'].sum() // len(cluster0_f['item_id'].unique()))\n", 1146 | "cluster0_f.to_csv('cluster_0.csv', index=False)\n", 1147 | "\n" 1148 | ] 1149 | }, 1150 | { 1151 | "cell_type": "code", 1152 | "execution_count": null, 1153 | "id": "899cb901", 1154 | "metadata": {}, 1155 | "outputs": [], 1156 | "source": [ 1157 | "meta_df" 1158 | ] 1159 | }, 1160 | { 1161 | "cell_type": "code", 1162 | "execution_count": null, 1163 | "id": "e36bb49e", 1164 | "metadata": {}, 1165 | "outputs": [], 1166 | "source": [ 1167 | "meta_df[['dept_id','cat_id']].drop_duplicates()" 1168 | ] 1169 | }, 1170 | { 1171 | "cell_type": "code", 1172 | "execution_count": null, 1173 | "id": "1c51b00c", 1174 | "metadata": {}, 1175 | "outputs": [], 1176 | "source": [ 1177 | "import matplotlib.pyplot as plt\n", 1178 | "\n", 1179 | "def analysis_cluster(cluster, meta, meta_id, group): \n", 1180 | " item_in_cluster = set(cluster['item_id'].unique())\n", 1181 | " meta_c = meta[meta[meta_id].isin(item_in_cluster)]\n", 1182 | " pt = meta_c.groupby([group]).count()[meta_id]\n", 1183 | " pt.plot.bar(subplots=True)\n", 1184 | " " 1185 | ] 1186 | }, 1187 | { 1188 | "cell_type": "markdown", 1189 | "id": "adc597a3", 1190 | "metadata": {}, 1191 | "source": [ 1192 | "#### can try different clusters to observe " 1193 | ] 1194 | }, 1195 | { 1196 | "cell_type": "code", 1197 | "execution_count": null, 1198 | "id": "29883fd0", 1199 | "metadata": {}, 1200 | "outputs": [], 1201 | "source": [ 1202 | "analysis_cluster(cluster0_f, meta_df, 'item_id', 'dept_id')" 1203 | ] 1204 | }, 1205 | { 1206 | "cell_type": "code", 1207 | "execution_count": null, 1208 | "id": "89e54257", 1209 | "metadata": {}, 1210 | "outputs": [], 1211 | "source": [ 1212 | "analysis_cluster(cluster1_f, meta_df, 'item_id', 'dept_id')" 1213 | ] 1214 | }, 1215 | { 1216 | "cell_type": "code", 1217 | "execution_count": null, 1218 | "id": "e36e134b", 1219 | "metadata": {}, 1220 | "outputs": [], 1221 | "source": [ 1222 | "analysis_cluster(cluster2_f, meta_df, 'item_id', 'dept_id')" 1223 | ] 1224 | }, 1225 | { 1226 | "cell_type": "raw", 1227 | "id": "4c7fbae2", 1228 | "metadata": {}, 1229 | "source": [ 1230 | "top_items = normal_demand_by_item.groupby(['item_id']).sum().reset_index().sort_values(by=['demand'], ascending=False)['item_id'][:50]\n", 1231 | "\n", 1232 | "\n", 1233 | "replicated = 1 # how extra copies have to be added for popular items \n", 1234 | "item_df = tts_df[tts_df['item_id'].isin(top_items)].copy()\n", 1235 | "item_rts_df = related_ts_aug_df[related_ts_aug_df['item_id'].isin(top_items)].copy()\n", 1236 | "item_meta_df = meta_df[meta_df['item_id'].isin(top_items)].copy()\n", 1237 | "for j in range(0, replicated): \n", 1238 | " copy_df = item_df.copy()\n", 1239 | " copy_df['item_id'] = copy_df['item_id'].apply(lambda x: str(x)+\"|\"+str(j))\n", 1240 | " copy_rts_df = item_rts_df.copy()\n", 1241 | " copy_rts_df['item_id'] = copy_rts_df['item_id'].apply(lambda x: str(x)+\"|\"+str(j))\n", 1242 | " copy_meta_df = item_meta_df.copy()\n", 1243 | " copy_meta_df['item_id'] = copy_meta_df['item_id'].apply(lambda x: str(x)+\"|\"+str(j))\n", 1244 | " tts_df = tts_df.append(copy_df)\n", 1245 | " related_ts_aug_df = related_ts_aug_df.append(copy_rts_df)\n", 1246 | " meta_df = meta_df.append(copy_meta_df)\n", 1247 | " " 1248 | ] 1249 | }, 1250 | { 1251 | "cell_type": "raw", 1252 | "id": "5d99f8e7", 1253 | "metadata": {}, 1254 | "source": [ 1255 | "tts_df.to_csv(\"tts_weight_1.csv\", index=False)\n", 1256 | "meta_df.to_csv(\"meta_weight_1.csv\", index=False)\n", 1257 | "related_ts_aug_df.to_csv(\"rts_weight_1.csv\", index=False)" 1258 | ] 1259 | }, 1260 | { 1261 | "cell_type": "code", 1262 | "execution_count": null, 1263 | "id": "6c4e2c59", 1264 | "metadata": {}, 1265 | "outputs": [], 1266 | "source": [] 1267 | } 1268 | ], 1269 | "metadata": { 1270 | "kernelspec": { 1271 | "display_name": "conda_python3", 1272 | "language": "python", 1273 | "name": "conda_python3" 1274 | }, 1275 | "language_info": { 1276 | "codemirror_mode": { 1277 | "name": "ipython", 1278 | "version": 3 1279 | }, 1280 | "file_extension": ".py", 1281 | "mimetype": "text/x-python", 1282 | "name": "python", 1283 | "nbconvert_exporter": "python", 1284 | "pygments_lexer": "ipython3", 1285 | "version": "3.6.13" 1286 | } 1287 | }, 1288 | "nbformat": 4, 1289 | "nbformat_minor": 5 1290 | } 1291 | --------------------------------------------------------------------------------