├── .gitignore ├── data ├── .gitignore ├── clean.sh └── get-tectonic.sh ├── .gitmodules ├── runs ├── .gitignore └── example │ ├── rejectx │ └── config.json │ └── baleen │ └── prefetch_ml-on-partial-hit │ └── config.json ├── chameleon ├── jupyter.service ├── 2-start-dedicated-server.ipynb └── 1-getting-started.ipynb ├── getting-started.sh ├── notebooks ├── includes │ ├── common-wr-csize-20230424.ipynb │ ├── includes-202312.ipynb │ └── common-20230414.ipynb ├── reproduce │ ├── exps-cluster-sample.ipynb │ └── reproduce_commands.sh └── paper-figs │ └── fig-18-peak-hrs-20230424.ipynb └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | tmp/ 2 | .ipynb_checkpoints 3 | notebooks/paper-figs/figs 4 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | !*.sh -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "BCacheSim"] 2 | path = BCacheSim 3 | url = https://github.com/wonglkd/BCacheSim.git 4 | -------------------------------------------------------------------------------- /runs/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Jupyter Notebook 4 | .ipynb_checkpoints 5 | # Except this file 6 | !.gitignore 7 | !config.json 8 | !*/ 9 | -------------------------------------------------------------------------------- /data/clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd "$(dirname "$0")" 3 | rm results_release.csv.gz || true 4 | rm -r tectonic || true 5 | rm -r breakdown-stats/ || true 6 | rm breakdowns.tar.gz || true 7 | rm storage_0.1.tar.gz || true 8 | -------------------------------------------------------------------------------- /chameleon/jupyter.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Jupyter Notebook server 3 | 4 | [Service] 5 | Type=simple 6 | ExecStart=/usr/bin/env python -m jupyterlab -y --no-browser --NotebookApp.token='' 7 | WorkingDirectory=/home/cc 8 | User=cc 9 | Group=cc 10 | Restart=on-failure 11 | RestartSec=5s 12 | [Install] 13 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /data/get-tectonic.sh: -------------------------------------------------------------------------------- 1 | if [ ! -f storage_0.1.tar.gz ]; then 2 | wget https://ftp.pdl.cmu.edu/pub/datasets/Baleen24/storage_0.1.tar.gz 3 | fi 4 | if [ ! -d tectonic ]; then 5 | tar xvf storage_0.1.tar.gz 6 | mv storage tectonic 7 | fi 8 | if [ ! -f results_release.csv.gz ]; then 9 | wget https://ftp.pdl.cmu.edu/pub/datasets/Baleen24/results_release.csv.gz 10 | fi 11 | if [ ! -f breakdowns.tar.gz ]; then 12 | wget https://ftp.pdl.cmu.edu/pub/datasets/Baleen24/breakdowns.tar.gz 13 | fi 14 | if [ ! -d breakdown-stats/ ]; then 15 | tar xvf breakdowns.tar.gz 16 | fi 17 | -------------------------------------------------------------------------------- /runs/example/rejectx/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "trace": "data/tectonic/201910/Region1/full_0_0.1.trace", 3 | "ram_cache": false, 4 | "ram_cache_size_gb": 10.0, 5 | "fifo": false, 6 | "lirs": false, 7 | "ap": "rejectx", 8 | "rejectx_ap": true, 9 | "learned_ap": false, 10 | "ram_ap_clone": false, 11 | "batch_size": 512, 12 | "offline_ap": false, 13 | "coinflip_ap": false, 14 | "ap_threshold": 1.0, 15 | "ap_probability": 0.508154, 16 | "learned_ap_filter_count": 6, 17 | "learned_size": false, 18 | "size_opt": "access", 19 | "block_level": false, 20 | "flip_threshold": true, 21 | "evict_by_episode": false, 22 | "prefetch_when": "never", 23 | "prefetch_range": "episode", 24 | "admit_only_prefetches": false, 25 | "output_dir": "runs/example/rejectx", 26 | "override": false, 27 | "write_mbps": 0, 28 | "size_gb": 366.475, 29 | "debug": false, 30 | "profile": false, 31 | "one_chunk": false, 32 | "log_req": false, 33 | "log_prefetch": false, 34 | "fast": false, 35 | "ignore_existing": false, 36 | "log_interval": 600.0, 37 | "stats_start": 86400.0, 38 | "log_decisions": false, 39 | "log_evictions": false, 40 | "log_episodes": false, 41 | "eviction_policy": "LRU" 42 | } -------------------------------------------------------------------------------- /runs/example/baleen/prefetch_ml-on-partial-hit/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "trace": "data/tectonic/201910/Region1/full_0_0.1.trace", 3 | "learned_ap_model_path": "tmp/example/201910_Region1_0_0.1/ea_5892.86_wr_35.599_admit_threshold_binary.model", 4 | "prefetcher_model_path": "tmp/example/201910_Region1_0_0.1/ea_5892.86_wr_35.599_prefetch_{k}.model", 5 | "output_dir": "runs/example/baleen/prefetch_ml-on-partial-hit", 6 | "ep_analysis": "runs/example/baleen/example/201910_Region1_0_0.1/offline_analysis_ea_5892.86.csv", 7 | "ram_cache": false, 8 | "ram_cache_size_gb": 10.0, 9 | "fifo": false, 10 | "lirs": false, 11 | "ap": "mlnew", 12 | "rejectx_ap": false, 13 | "learned_ap": true, 14 | "ram_ap_clone": false, 15 | "batch_size": 16, 16 | "offline_ap": false, 17 | "coinflip_ap": false, 18 | "ap_threshold": 0.798545, 19 | "ap_feat_subset": "meta+block+chunk", 20 | "learned_ap_filter_count": 6, 21 | "learned_ap_granularity": "both", 22 | "learned_size": false, 23 | "size_opt": "access", 24 | "block_level": false, 25 | "flip_threshold": true, 26 | "evict_by_episode": false, 27 | "prefetch_when": "partial", 28 | "prefetch_range": "acctime-episode-predict", 29 | "admit_only_prefetches": false, 30 | "override": false, 31 | "write_mbps": 0, 32 | "size_gb": 366.475, 33 | "debug": false, 34 | "profile": false, 35 | "one_chunk": false, 36 | "log_req": false, 37 | "log_prefetch": false, 38 | "fast": false, 39 | "ignore_existing": false, 40 | "log_interval": 600.0, 41 | "stats_start": 86400.0, 42 | "eviction_policy": "LRU", 43 | "log_decisions": false, 44 | "log_evictions": false, 45 | "log_episodes": false 46 | } 47 | -------------------------------------------------------------------------------- /getting-started.sh: -------------------------------------------------------------------------------- 1 | if [ ! -d "BCacheSim" ]; then 2 | echo "BCacheSim does not exist; are you in the correct directory? (Baleen-FAST24)" 3 | exit 1 4 | fi 5 | 6 | # We assume the repository has already been cloned; if not, run the line below: 7 | # git clone --recurse-submodules https://github.com/wonglkd/Baleen-FAST24.git 8 | 9 | cd Baleen-FAST24 10 | git pull --recurse-submodules 11 | 12 | # Install dependencies (Conda) 13 | conda env create -f BCacheSim/install/env_cachelib-py-3.11.yaml 14 | # Pypy is optional; used to speed up non-ML simulations 15 | # conda env create -f BCacheSim/install/env_cachelib-pypy-3.8.yaml 16 | conda activate cachelib-py-3.11 17 | 18 | # Install dependencies (pip) - alternative to Conda 19 | # python3 -m pip install -r BCacheSim/install/requirements.txt --user 20 | 21 | # Download trace files 22 | cd data 23 | bash get-tectonic.sh 24 | cd .. 25 | 26 | # Run RejectX (4 mins) 27 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --config runs/example/rejectx/config.json 28 | 29 | # Train Baleen (1 min) 30 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp example --policy PolicyUtilityServiceTimeSize2 --region Region1 --sample-ratio 0.1 --sample-start 0 --trace-group 201910 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.475 --output-base-dir runs/example/baleen --eviction-age 5892.856 --rl-init-kwargs filter_=prefetch --train-target-wr 35.599 --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk 31 | 32 | # Run Baleen (30 mins) 33 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --config runs/example/baleen/prefetch_ml-on-partial-hit/config.json 34 | 35 | # To examine the results, run Baleen-FAST24/notebooks/example/example.ipynb 36 | -------------------------------------------------------------------------------- /notebooks/includes/common-wr-csize-20230424.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "4b646a7c-9818-4e19-a950-958f5cb2bd37", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "def get_kwargs(df_, y, hue=\"ShortLabel\", errs=True):\n", 11 | " kwargs = dict(\n", 12 | " hue=hue,\n", 13 | " style=hue,\n", 14 | " markers=True,\n", 15 | " dashes=False,\n", 16 | " )\n", 17 | " if errs:\n", 18 | " kwargs.update(dict(\n", 19 | " err_kws=dict(capsize=10),\n", 20 | " linewidth=3,\n", 21 | " err_style=\"bars\",\n", 22 | " ))\n", 23 | " if hue == \"ShortLabel\":\n", 24 | " if all(k in maps.SHORT_COLORMAP for k in df_[hue]):\n", 25 | " kwargs[\"palette\"] = maps.SHORT_COLORMAP\n", 26 | " kwargs[\"hue_order\"] = [\n", 27 | " k for k in maps.SHORT_COLORMAP.keys() if k in df_[\"ShortLabel\"].values\n", 28 | " ]\n", 29 | " else:\n", 30 | " print(\"Missing - not using SHORT_COLORMAP\")\n", 31 | " print(set([k for k in df_[hue] if k not in maps.SHORT_COLORMAP]))\n", 32 | " if all(k in maps.SHORT_MARKERMAP for k in df_[hue]):\n", 33 | " kwargs[\"markers\"] = maps.SHORT_MARKERMAP\n", 34 | " else:\n", 35 | " print(\"Missing - not using SHORT_MARKERMAP\")\n", 36 | " print(set([k for k in df_[hue] if k not in maps.SHORT_MARKERMAP]))\n", 37 | " else:\n", 38 | " if all(k in maps.DEFAULT_COLORMAP for k in df_[hue]):\n", 39 | " kwargs[\"palette\"] = maps.DEFAULT_COLORMAP\n", 40 | " kwargs[\"hue_order\"] = [\n", 41 | " k for k in maps.DEFAULT_COLORMAP.keys() if k in df_[\"PlotLabel\"].values\n", 42 | " ]\n", 43 | " return kwargs\n", 44 | "\n", 45 | "\n", 46 | "def postplot(df_, target=True, target_v=None, targetlabel=True, figlabel=True):\n", 47 | " ax = plt.gca()\n", 48 | "\n", 49 | " if ax.get_legend():\n", 50 | " ax.legend()\n", 51 | " # g.get_legend().set_title(None)\n", 52 | "\n", 53 | " ax.set_ylim(0, None)\n", 54 | " ax.set_xlim(0, None)\n", 55 | " y = ax.get_ylabel()\n", 56 | " y = nice_ylabel(y)\n", 57 | " ax.set_ylabel(y, loc=\"top\")\n", 58 | " fig_labels = []\n", 59 | " if \"SampleRatio\" in df_.columns:\n", 60 | " if df_[\"SampleRatio\"].nunique() == 1:\n", 61 | " fig_labels.append(f\"{df_['SampleRatio'].unique()[0]:g}%\")\n", 62 | " else:\n", 63 | " print(\"Multiple SampleRatio:\", df_[\"SampleRatio\"].unique())\n", 64 | " if \"RegionLabel\" in df_.columns:\n", 65 | " if df_[\"RegionLabel\"].nunique() == 1:\n", 66 | " fig_labels.append(str(df_[\"RegionLabel\"].unique()[0]))\n", 67 | " if fig_labels and figlabel:\n", 68 | " add_fig_label(\", \".join(fig_labels))\n", 69 | " if target:\n", 70 | " ax.axvline(target_v, ls=\":\", c=\"black\")\n", 71 | " if targetlabel:\n", 72 | " maps.add_target_label(twr=target_v, fmt=\"3 DWPD\")\n", 73 | " \n", 74 | " \n", 75 | "import matplotlib.ticker as ticker\n", 76 | "\n", 77 | "\n", 78 | "def add_leg_to_subplot(loc=(2, 4, 4)):\n", 79 | " ax_0 = plt.subplot(loc[0], loc[1], 1)\n", 80 | " handles, labels = ax_0.get_legend_handles_labels()\n", 81 | " ax_0.get_legend().remove()\n", 82 | " ax = plt.subplot(*loc)\n", 83 | " ax.legend(handles, labels, loc=\"center\", title=\"Policy\")\n", 84 | " ax.set_axis_off()\n", 85 | " ax.get_xaxis().set_visible(False)\n", 86 | " ax.get_yaxis().set_visible(False)\n", 87 | "\n", 88 | "def postsubplot_wr(ax, i):\n", 89 | " if i == 0:\n", 90 | " ax.xaxis.set_major_locator(ticker.MaxNLocator(3))\n", 91 | " ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())\n", 92 | " ax.yaxis.set_major_locator(ticker.MaxNLocator(3))\n", 93 | " ax.yaxis.set_minor_locator(ticker.MultipleLocator(5))\n", 94 | " ax.tick_params(which=\"major\", length=6)\n", 95 | " ax.tick_params(which=\"minor\", length=4)\n", 96 | " ax.set_ylabel(\"\")\n", 97 | " ax.set_xlabel(\"\")\n", 98 | " \n", 99 | "reload(maps)\n", 100 | "contexts.use(\"single\")\n", 101 | "\n", 102 | "def plot_wrs_grid(df=None, y=\"P100ServiceTimeUtil@10m\", hue=\"ShortLabel\", x=\"Target DWPD\"):\n", 103 | " num_traces = df[\"RegionLabel\"].nunique()\n", 104 | "\n", 105 | " fig, ax = plt.subplots(\n", 106 | " nrows=2,\n", 107 | " ncols=4,\n", 108 | " sharex=True,\n", 109 | " sharey=False,\n", 110 | " figsize=(7 * 2, 3 * 2),\n", 111 | " layout=\"constrained\",\n", 112 | " )\n", 113 | " for i, (region, df_) in enumerate(df.groupby(\"RegionLabel\")):\n", 114 | " ax = plt.subplot(2, 4, i + 1 + (1 if i > 2 else 0))\n", 115 | " sns.lineplot(\n", 116 | " data=df_, x=x, y=y, **get_kwargs(df_, y, hue=hue), ax=ax, legend=i == 0\n", 117 | " )\n", 118 | " postplot(df_, target=False, targetlabel=False)\n", 119 | " postsubplot_wr(ax, i)\n", 120 | " add_leg_to_subplot((2,4,4))\n", 121 | " if \"Write Rate\" in x:\n", 122 | " fig.supxlabel(maps.l_wr)\n", 123 | " elif \"DWPD\" in x:\n", 124 | " fig.supxlabel(\"DWPD (Drive Writes Per Day)\")\n", 125 | " fig.supylabel(nice_ylabel(y))" 126 | ] 127 | } 128 | ], 129 | "metadata": { 130 | "kernelspec": { 131 | "display_name": "CPython 3.11 (CacheLib, Conda)", 132 | "language": "python", 133 | "name": "cachelib-py-3.11" 134 | }, 135 | "language_info": { 136 | "codemirror_mode": { 137 | "name": "ipython", 138 | "version": 3 139 | }, 140 | "file_extension": ".py", 141 | "mimetype": "text/x-python", 142 | "name": "python", 143 | "nbconvert_exporter": "python", 144 | "pygments_lexer": "ipython3", 145 | "version": "3.11.0" 146 | } 147 | }, 148 | "nbformat": 4, 149 | "nbformat_minor": 5 150 | } 151 | -------------------------------------------------------------------------------- /notebooks/includes/includes-202312.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "5f9c44f4-2df0-4f40-9ff5-b63aa4fbd315", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import datetime\n", 11 | "import platform; print(platform.python_implementation(), datetime.datetime.now().isoformat())\n", 12 | "_exp = 'UNDEFINED'" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "id": "ddb60874-28c5-496d-bb32-c0c930336fd5", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import sys\n", 23 | "sys.path.append('../..')\n", 24 | "from BCacheSim import cachesim\n", 25 | "from BCacheSim import episodic_analysis\n", 26 | "from BCacheSim.cachesim.sim_cache import _lookup_episode\n", 27 | "from BCacheSim.cachesim.sim_cache import Timestamp" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "id": "cb6f94d6-f93a-451b-b9da-7628c12abad2", 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import pickle\n", 38 | "import glob\n", 39 | "import random\n", 40 | "from tqdm.notebook import tqdm\n", 41 | "# import lightgbm as lgb\n", 42 | "import matplotlib.pyplot as plt\n", 43 | "import pandas as pd\n", 44 | "import numpy as np\n", 45 | "import seaborn as sns\n", 46 | "\n", 47 | "import BCacheSim.cachesim.dynamic_features as dfeature\n", 48 | "import BCacheSim.cachesim.utils as utils\n", 49 | "# try:\n", 50 | "# from sklearn.metrics import r2_score, roc_curve, precision_recall_curve, auc\n", 51 | "# except (ModuleNotFoundError, ImportError):\n", 52 | "# print(\"Cannot import sklearn\")\n", 53 | "from collections import Counter\n", 54 | "import time\n", 55 | "import traceback" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "id": "d28d5457-c40a-4868-8248-0c96621c8f52", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "from pprint import pprint" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "id": "b48a53c9-d8b6-4cf5-aa07-41518152c9e1", 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "from BCacheSim.episodic_analysis import experiments\n", 76 | "from BCacheSim.episodic_analysis import local_cluster\n", 77 | "from BCacheSim.episodic_analysis import ep_utils\n", 78 | "from BCacheSim.episodic_analysis import episodes\n", 79 | "from BCacheSim.episodic_analysis.episodes import Episode\n", 80 | "from BCacheSim.episodic_analysis import trace_utils\n", 81 | "from BCacheSim.episodic_analysis import policies\n", 82 | "from BCacheSim.episodic_analysis.exps import factory as ef\n", 83 | "from BCacheSim.episodic_analysis import monitor_exps as monitor\n", 84 | "from BCacheSim.episodic_analysis.ep_utils import flatten\n", 85 | "from BCacheSim.episodic_analysis.plotting import maps\n", 86 | "from BCacheSim.episodic_analysis import adaptors\n", 87 | "from BCacheSim.episodic_analysis.exps import results" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "id": "bff6b490-c8ee-48c3-9bf6-05c6e649e1e3", 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "result_cols = ['AP Threshold', 'Avg Eviction Age (s)', 'Write Rate (MB/s)', 'ServiceTimeSavedRatio', 'ServiceTimeSavedRatio1', 'IOPSSavedRatio']" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "id": "af3d845d-5f71-4808-968e-9806b3b45ad0", 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "from IPython.display import display, HTML\n", 108 | "display(HTML(\"\"))" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "id": "ca27e9d7-7f78-4cac-806c-b9f762ed3069", 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "from importlib import reload\n", 119 | "reload(monitor)\n", 120 | "filter_df = monitor.filter_df_dct" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "id": "50dfc95c-ea5d-424d-a545-43fb8c9b9ed6", 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "import pandas as pd\n", 131 | "# import seaborn as sns\n", 132 | "import os\n", 133 | "os.environ['SOURCE_DATE_EPOCH'] = '1631030919'\n", 134 | "pd.set_option('display.max_rows', 500)\n", 135 | "pd.set_option('display.max_columns', 500)\n", 136 | "pd.set_option('display.width', 1000)\n", 137 | "pd.set_option('display.max_colwidth', None)\n", 138 | "import matplotlib.pyplot as plt" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "id": "a08b29eb-6402-4e9d-a057-d5bae3776486", 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "from BCacheSim.episodic_analysis.plotting import maps\n", 149 | "from BCacheSim.episodic_analysis.plotting.maps import add_fig_label\n", 150 | "from BCacheSim.episodic_analysis.monitor_exps import filter_df_dct\n", 151 | "filter_df = filter_df_dct\n", 152 | "# add_fig_label, etc is inside\n", 153 | "from BCacheSim.episodic_analysis.plotting import contexts\n", 154 | "from BCacheSim.episodic_analysis.plotting import styles\n", 155 | "contexts.use('wide')\n", 156 | "# Used by results\n", 157 | "from BCacheSim.episodic_analysis.plotting import loader\n", 158 | "from BCacheSim.episodic_analysis.plotting import processors" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "id": "0aa35488-97f6-4107-8f72-cda89eaedd1b", 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "COLS = [\n", 169 | " 'ShortLabel',\n", 170 | " 'RegionLabel',\n", 171 | " maps.l_wr,\n", 172 | " 'P100ServiceTimeUtil@10m',\n", 173 | " 'P99.9ServiceTimePercent1',\n", 174 | " 'P99.9ServiceTimeUtil@10m',\n", 175 | " 'P99ServiceTimeUtil@10m',\n", 176 | " 'P50ServiceTimeUtil@10m',\n", 177 | " 'AP Threshold',\n", 178 | " 'Assumed Eviction Age (s)',\n", 179 | " 'Converged',\n", 180 | " 'Iteration',\n", 181 | " 'SampleStart',\n", 182 | " 'SampleRatio',\n", 183 | " 'Trace',\n", 184 | " 'ExperimentName',\n", 185 | " 'Filename',\n", 186 | "]" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "id": "19e7bdd3-14c4-4fb2-807e-4d483211615a", 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "savefig = maps.savefig" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "id": "2484c20c-ee6d-48f0-a9fe-94c980374f90", 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "def load_df(filename):\n", 207 | " dfc_raw_old = pd.read_csv(filename, low_memory=False)\n", 208 | " dfc_raw_old['Target Write Rate'] = pd.to_numeric(dfc_raw_old['Target Write Rate'], errors='coerce')\n", 209 | " dfc_raw_old['TraceGroup'] = dfc_raw_old['TraceGroup'].astype('str')\n", 210 | " return dfc_raw_old\n", 211 | "\n", 212 | "def combine_dfs(*args):\n", 213 | " return pd.concat(args).copy()\n", 214 | "\n", 215 | "def combine_with_old(filename, dfc_raw_):\n", 216 | " return combine_dfs(load_df(filename), df_raw_)" 217 | ] 218 | } 219 | ], 220 | "metadata": { 221 | "kernelspec": { 222 | "display_name": "CPython 3.11 (CacheLib, Conda)", 223 | "language": "python", 224 | "name": "cachelib-py-3.11" 225 | }, 226 | "language_info": { 227 | "codemirror_mode": { 228 | "name": "ipython", 229 | "version": 3 230 | }, 231 | "file_extension": ".py", 232 | "mimetype": "text/x-python", 233 | "name": "python", 234 | "nbconvert_exporter": "python", 235 | "pygments_lexer": "ipython3", 236 | "version": "3.11.0" 237 | } 238 | }, 239 | "nbformat": 4, 240 | "nbformat_minor": 5 241 | } 242 | -------------------------------------------------------------------------------- /notebooks/reproduce/exps-cluster-sample.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# NOTICE\n", 8 | "Caution: This notebook has not been tested in the artifact reproduction setting but will be useful as a reference point for future research -- be prepared to modify it. \n", 9 | "\n", 10 | "Use case: For researchers intending to run more experiments in a cluster setting.\n", 11 | "Requirements: Brooce, redis.\n", 12 | "\n", 13 | "You may contact the authors for further questions." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": { 20 | "ExecuteTime": { 21 | "start_time": "2023-03-09T03:48:15.338Z" 22 | } 23 | }, 24 | "outputs": [ 25 | { 26 | "name": "stdout", 27 | "output_type": "stream", 28 | "text": [ 29 | "CPython 2024-01-16T11:09:31.175421\n" 30 | ] 31 | }, 32 | { 33 | "data": { 34 | "text/html": [ 35 | "" 36 | ], 37 | "text/plain": [ 38 | "" 39 | ] 40 | }, 41 | "metadata": {}, 42 | "output_type": "display_data" 43 | }, 44 | { 45 | "name": "stdout", 46 | "output_type": "stream", 47 | "text": [ 48 | "CPU times: user 884 µs, sys: 0 ns, total: 884 µs\n", 49 | "Wall time: 1.12 ms\n" 50 | ] 51 | } 52 | ], 53 | "source": [ 54 | "%run ../includes/common-20230414.ipynb" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 2, 60 | "metadata": { 61 | "ExecuteTime": { 62 | "end_time": "2023-03-09T03:28:53.776917Z", 63 | "start_time": "2023-03-09T03:28:48.008837Z" 64 | }, 65 | "tags": [] 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "exp_common = dict(exp_date='20240101', group='fall24')\n", 70 | "base_params = {\n", 71 | " **ef.csize_wrs_from_dwpds(csizes=[ef.DEFAULT_CSIZE], dwpds=[7.5, 15, 3.75, 1, 20]),\n", 72 | " **ef.traces(\n", 73 | " regions=[\n", 74 | " '201910/Region1',\n", 75 | " '201910/Region2',\n", 76 | " '201910/Region3',\n", 77 | " '202110/Region4',\n", 78 | " '20230325/Region5',\n", 79 | " '20230325/Region6',\n", 80 | " '20230325/Region7',\n", 81 | " ],\n", 82 | " # This requires more samples to be downloaded\n", 83 | " # ratios=[.1], max_samples=10, sample_start=0),\n", 84 | " ratios=[.1], max_samples=1, sample_start=0),\n", 85 | "}" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 4, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "# Generates samples; requires full traces to be downloaded.\n", 95 | "# local_cluster.ensure_samples(base_params, only_wait=False)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 3, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "CPU times: user 6.29 s, sys: 524 ms, total: 6.81 s\n", 108 | "Wall time: 6.81 s\n" 109 | ] 110 | } 111 | ], 112 | "source": [ 113 | "%%time\n", 114 | "df_guess = pd.read_csv('../../data/results_release.csv.gz', low_memory=False)\n", 115 | "all_exps = {}" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 4, 121 | "metadata": { 122 | "tags": [] 123 | }, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "/users/dlwong/projects/Baleen-FAST24\n" 130 | ] 131 | } 132 | ], 133 | "source": [ 134 | "%cd ../.." 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 5, 140 | "metadata": { 141 | "ExecuteTime": { 142 | "end_time": "2023-03-09T03:29:30.843360Z", 143 | "start_time": "2023-03-09T03:29:07.590569Z" 144 | }, 145 | "tags": [] 146 | }, 147 | "outputs": [ 148 | { 149 | "data": { 150 | "application/vnd.jupyter.widget-view+json": { 151 | "model_id": "1685d11415514d9d9a6c852c147ae6bf", 152 | "version_major": 2, 153 | "version_minor": 0 154 | }, 155 | "text/plain": [ 156 | "0it [00:00, ?it/s]" 157 | ] 158 | }, 159 | "metadata": {}, 160 | "output_type": "display_data" 161 | }, 162 | { 163 | "data": { 164 | "application/vnd.jupyter.widget-view+json": { 165 | "model_id": "accbbd85aaf8413a8d7d3e7519daf27f", 166 | "version_major": 2, 167 | "version_minor": 0 168 | }, 169 | "text/plain": [ 170 | "0it [00:00, ?it/s]" 171 | ] 172 | }, 173 | "metadata": {}, 174 | "output_type": "display_data" 175 | }, 176 | { 177 | "data": { 178 | "application/vnd.jupyter.widget-view+json": { 179 | "model_id": "296a2e48f0d4474caa6c58773b7b6ae3", 180 | "version_major": 2, 181 | "version_minor": 0 182 | }, 183 | "text/plain": [ 184 | "0it [00:00, ?it/s]" 185 | ] 186 | }, 187 | "metadata": {}, 188 | "output_type": "display_data" 189 | }, 190 | { 191 | "name": "stdout", 192 | "output_type": "stream", 193 | "text": [ 194 | "CPU times: user 21.1 s, sys: 174 ms, total: 21.2 s\n", 195 | "Wall time: 21.3 s\n" 196 | ] 197 | }, 198 | { 199 | "data": { 200 | "text/plain": [ 201 | "" 202 | ] 203 | }, 204 | "execution_count": 5, 205 | "metadata": {}, 206 | "output_type": "execute_result" 207 | } 208 | ], 209 | "source": [ 210 | "%%time\n", 211 | "eff = ef.ExpFactory(name='alltraces', desc='All traces', **exp_common)\n", 212 | "eff.add_params({**ef.EXP_OPT, **ef.prefetchs(ef.PF_BASIC), **base_params})\n", 213 | "eff.add_params({**ef.EXP_STATIC, **ef.prefetchs(ef.PF_BASIC), **base_params})\n", 214 | "eff.add_params({**ef.EXP_MLNEW, **ef.prefetchs(ef.PF_BASIC), **base_params})\n", 215 | "eff.update(all_exps, df_guess=df_guess)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 6, 221 | "metadata": { 222 | "ExecuteTime": { 223 | "end_time": "2023-03-09T03:13:37.117767Z", 224 | "start_time": "2023-03-09T03:13:36.963658Z" 225 | }, 226 | "tags": [] 227 | }, 228 | "outputs": [ 229 | { 230 | "data": { 231 | "text/plain": [ 232 | "(700, dict_keys(['20240101_alltraces']))" 233 | ] 234 | }, 235 | "execution_count": 6, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": [ 241 | "all_exps_ = list(flatten(all_exps.values()))\n", 242 | "len(all_exps_), all_exps.keys()#, all_exps" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": { 249 | "tags": [] 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "# Useful if experiments failed\n", 254 | "# monitor.relaunch(all_exps)" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": { 261 | "ExecuteTime": { 262 | "start_time": "2023-03-08T21:43:49.136Z" 263 | } 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "monitor.save(all_exps)\n", 268 | "monitor.run_exps(all_exps, relaunch_stale=True, queue='par4', top=False, displayer=None, limit_running_exps=400)" 269 | ] 270 | } 271 | ], 272 | "metadata": { 273 | "kernelspec": { 274 | "display_name": "CPython 3.11 (CacheLib, Conda)", 275 | "language": "python", 276 | "name": "cachelib-py-3.11" 277 | }, 278 | "language_info": { 279 | "codemirror_mode": { 280 | "name": "ipython", 281 | "version": 3 282 | }, 283 | "file_extension": ".py", 284 | "mimetype": "text/x-python", 285 | "name": "python", 286 | "nbconvert_exporter": "python", 287 | "pygments_lexer": "ipython3", 288 | "version": "3.11.0" 289 | }, 290 | "toc-showtags": false 291 | }, 292 | "nbformat": 4, 293 | "nbformat_minor": 4 294 | } 295 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Artifact for Baleen (FAST 2024) 2 | 3 | _Baleen: ML Admission & Prefetching for Flash Caches_ 4 | 5 | _[Paper (Preprint)](https://wonglkd.fi-de.net/papers/Baleen-FAST24.pdf) | [Code](https://github.com/wonglkd/BCacheSim/) | [Data](https://ftp.pdl.cmu.edu/pub/datasets/Baleen24/) | [Video walkthrough](https://www.tiny.cc/BaleenArtifactYT) | [Reproduce on Chameleon](https://www.chameleoncloud.org/experiment/share/aa6fb454-6452-4fc8-994a-b028bfc3c82d)_ 6 | 7 | This repository is targeted at those seeking to reproduce the results found in the Baleen paper and contains a frozen copy of the code. 8 | If you are looking to use Baleen, please go to https://github.com/wonglkd/BCacheSim/ for the latest version. 9 | 10 | ![Artifact Available](https://sysartifacts.github.io/images/usenix_available.svg) 11 | ![Artifact Functional](https://sysartifacts.github.io/images/usenix_functional.svg) 12 | ![Results Reproduced](https://sysartifacts.github.io/images/usenix_reproduced.svg) 13 | 14 | **Scope:** this repository contains Python code to reproduce the **simulator** results in the Baleen paper. The testbed code modified a proprietary internal version of CacheLib and will not be released at this time, pending a rebase on the open-source version of CacheLib. Another key difference is that Meta's exact constants for the disk head time function will not be released, meaning that results will not be exactly the same; instead, we use constants (seek time and bandwidth) measured on the hard disks in our university testbed. 15 | 16 | **Nomenclature:** 17 | Some terms were renamed after coding for better clarity in the paper. However, they mean the same thing. 18 | 19 | - Service Time (in the code) was renamed to Disk Head Time (in the paper) 20 | - Chunks (in the code) are called segments (in the paper) 21 | 22 | 23 | ## Walkthrough Video 24 | We have verified that our instructions work on Chameleon, and have recorded a video showing the setup of the environment and the reproduction of the instructions below (YouTube: http://tiny.cc/BaleenArtifactYT). This video shows the setup on Chameleon, the running of the instructions below and the running of all notebooks successfully run with no error cells. 25 | 26 | [![Baleen Artifact Walkthrough (FAST 2024)](https://github.com/wonglkd/Baleen-FAST24/assets/2821951/4b1f348b-35fc-4262-a69f-a754c0ec99b9)](https://www.tiny.cc/BaleenArtifactYT) 27 | 28 | ## Getting Started 29 | 30 | _Time estimate: 60 mins (20 mins interactive)._ 31 | 32 | ### Installation (Chameleon Trovi) 33 | 34 | _Time estimate: 30 minutes (10 mins interactive)._ 35 | 36 | The recommended way is to use Chameleon Trovi, an academic cloud. Note that you will require an allocation; if you are affiliated with FAST, you can request to be added to the associated project (CHI-231080). To do this (and for any other issues with Chameleon), please contact the helpdesk at help@chameleoncloud.org. 37 | 38 | 1. Launch [artifact on Trovi](https://www.chameleoncloud.org/experiment/share/aa6fb454-6452-4fc8-994a-b028bfc3c82d) 39 | 2. (Optional) Open notebook `chameleon/1-getting-started.ipynb` which will walk you through the Getting Started section of this README. You may run one cell at a time, or click Run -> Run All Cells to execute all commands. If processes get killed, you need a dedicated server. 40 | 3. (Recommended) The shared JupyterHub has limited RAM/disk. Run notebook `chameleon/2-start-dedicated-server.ipynb`, which provisions a beefier node (for 7 days) that you can create a SSH tunnel to. 41 | 42 | ### Installation (local computer) 43 | 44 | Alternatively, you may do a manual install. These commands are also available in [getting-started.sh](getting-started.sh) for your convenience. 45 | 46 | 1. Clone the repository (if not already done) 47 | 48 | ``` 49 | git clone --recurse-submodules https://github.com/wonglkd/Baleen-FAST24.git 50 | cd Baleen-FAST24 51 | ``` 52 | 53 | Note: this repository uses submodules. As a reminder, when you pull, you'll likely want to use `git pull --recurse-submodules`. 54 | 55 | 2. Install Python dependencies with Conda/Mamba/Micromamba or pip. (We developed with Micromamba 1.4.1.) 56 | 57 | ``` 58 | conda env create -f BCacheSim/install/env_cachelib-py-3.11.yaml 59 | conda activate cachelib-py-3.11 60 | # PyPy is optional (for faster non-ML runs) 61 | # conda env create -f BCacheSim/install/env_cachelib-pypy-3.8.yaml 62 | ``` 63 | 64 | Alternatively, use pip: 65 | 66 | ``` 67 | python3 -m pip install --user -r BCacheSim/install/requirements.txt 68 | ``` 69 | 70 | 3. Download trace files (see [here](https://ftp.pdl.cmu.edu/pub/datasets/Baleen24/) for more details on the traces) 71 | 72 | ``` 73 | cd data 74 | bash get-tectonic.sh 75 | ``` 76 | 77 | ### Do a simple experiment 78 | 79 | _Time estimate: 30 minutes (10 mins interactive)._ 80 | 81 | 1. Manually run the simulator with the baseline RejectX. (4 mins) 82 | 83 | ``` 84 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --config runs/example/rejectx/config.json 85 | ``` 86 | 87 | 2. Manually train Baleen's ML models (25 secs) and run the simulator with Baleen (~30 mins). 88 | 89 | ``` 90 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp example --policy PolicyUtilityServiceTimeSize2 --region Region1 --sample-ratio 0.1 --sample-start 0 --trace-group 201910 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.475 --output-base-dir runs/example/baleen --eviction-age 5892.856 --rl-init-kwargs filter_=prefetch --train-target-wr 35.599 --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk 91 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --config runs/example/baleen/prefetch_ml-on-partial-hit/config.json 92 | ``` 93 | 94 | 3. Use [notebooks/example/example.ipynb](notebooks/example/example.ipynb) to view and plot results. 95 | 96 | 97 | ## Detailed Instructions 98 | 99 | This section assumes you have completed the 'Getting Started' section and have 100 | installed the code and downloaded the traces. 101 | 102 | As it requires too much computation time to rerun every single experiment, 103 | we suggest the following steps to maximize the use of reviewers' time in evaluating 104 | our paper. We supply our traces, code, and the intermediate results from our experimental runs. 105 | 106 | **Roadmap for evaluation:** 107 | 108 | 1. Test out Baleen's ML training & simulator (in Getting Started). 109 | - What: simulate RejectX baseline, train Baleen models, simulate Baleen 110 | - Expected results: notebooks/example/example.ipynb 111 | 2. Plot graphs using our intermediate results. 112 | - Example: notebooks/paper-figs/fig-01bc,17-202309.ipynb 113 | 3. Select additional simulations to run if desired. 114 | - See notebooks/reproduce/, in particular commands.ipynb and reproduce_commands.sh 115 | 116 | 117 | ## Directory structure 118 | 119 | - data: traces that are used as input 120 | - runs: where experiment results are stored 121 | - tmp: temporary directory for ML models, generated episode files 122 | - notebooks: Jupyter notebooks for experiments 123 | - notebooks/figs: Output directory for figures 124 | 125 | 126 | ## Additional notes 127 | 128 | 624 machine-days were used for the final runs to generate the results used in the paper. 129 | Each simulation of a ML policy takes at least 30 minutes, multiplied by 7 traces and 10 samples each. 130 | 131 | ## Future research 132 | 133 | notebooks/reproduce/exps-cluster-sample.ipynb will be useful to allow you to run experiments efficiently, but with more dependencies required (brooce, redis). 134 | 135 | ## Troubleshooting 136 | 137 | If you face any issues, please try the following things: 138 | 139 | 1. Making sure you have the latest version of the repository 140 | 141 | ``` 142 | git pull --recurse-submodules 143 | ``` 144 | 145 | 2. Making sure you have the latest copy of the data. 146 | 147 | ``` 148 | cd data 149 | bash clean.sh 150 | bash get-tectonic.sh 151 | ``` 152 | 153 | 3. If you need to get an allocation on Chameleon or face any difficulties with the platform itself, please contact their [helpdesk](mailto:help@chameleoncloud.org). 154 | 155 | ## Any questions? 156 | 157 | [Please raise a GitHub issue](https://github.com/wonglkd/Baleen-FAST24/issues/new). Support is best effort; you may also email me (contact details at https://wonglkd.fi-de.net). 158 | 159 | ## Reference 160 | 161 | **[Baleen: ML Admission & Prefetching for Flash Caches](https://www.usenix.org/conference/fast24/presentation/wong)**
162 | Daniel Lin-Kit Wong, Hao Wu, Carson Molder, Sathya Gunasekar, Jimmy Lu, Snehal Khandkar, Abhinav Sharma, Daniel S. Berger, Nathan Beckmann, Gregory R. Ganger
163 | USENIX FAST 2024 164 | -------------------------------------------------------------------------------- /chameleon/2-start-dedicated-server.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "4d8184f7-559a-4fdb-a66d-f69d2c38c6c6", 6 | "metadata": {}, 7 | "source": [ 8 | "# Note from Baleen authors\n", 9 | "\n", 10 | "You can skip this and use the shared JupyterHub alone to run the simple tests and plotting, but if you would like to reproduce Baleen's results more extensively, you will need at least a dedicated server, if not a cluster.\n", 11 | "\n", 12 | "Use this notebook to reserve and set up a dedicated server (otherwise, you will be using the shared JupyterHub with very limited storage and CPU constraints). This notebook is modified from the Jupyter artifact -- any modifications are labelled with \"BALEEN-SPECIFIC\" comments." 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "id": "31e1deff-9cc1-43bd-90e3-034edf1c6751", 18 | "metadata": {}, 19 | "source": [ 20 | "# Jupyter Notebook\n", 21 | "\n", 22 | "A Jupyter notebook is what you're looking at right now. It contains a mix of Markdown and code, which is a great interface for annotating code and allowing it to be run interactively. This notebook is (probably) running on Chameleon's JupyterHub server. This means that the code is being executed on a very resource-light instance which is only designed to interface with Chameleon APIs. \n", 23 | "\n", 24 | "Because Jupyter notebooks are such a great way to present experiment code with results and documentation, it makes sense that one would want to execute their experimen's code from within a Jupyter notebook _directly_ on a powerful compute host. That is what this notebook accomplishes!\n", 25 | "\n", 26 | "## Steps\n", 27 | "1. Reserve a node for your experiments\n", 28 | "2. Create an instance on that node\n", 29 | "3. Spawn a Jupyter server on that instance\n", 30 | "4. Connect to that Jupyter server" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "id": "f3778de1-1529-497f-bfd4-baaef1ad699c", 36 | "metadata": {}, 37 | "source": [ 38 | "## Experiment configuration\n", 39 | "\n", 40 | "We'll be running this server on a single node. The node will still be able to connect to other nodes on the same network if your experiment requires multiple nodes. Because this setup is so simple, you can configure the variables below to any valid configuration you want." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 1, 46 | "id": "792ae6c6-1911-46e8-af85-968b059387ea", 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "import os\n", 51 | "\n", 52 | "project_name = \"CHI-231080\" # Change this if necessary\n", 53 | "site_name = \"CHI@TACC\"\n", 54 | "node_type = \"compute_cascadelake_r\"\n", 55 | "image_name = \"CC-Ubuntu22.04\"\n", 56 | "network_name = \"sharednet1\"\n", 57 | "\n", 58 | "user = os.getenv(\"USER\")\n", 59 | "# Leases can be between 1 and 7 days\n", 60 | "lease_length = 7\n", 61 | "lease_name = f\"{user}-jupyter-server\"" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "id": "156bde30-a43b-461d-a4d2-8a07eefcdd82", 67 | "metadata": {}, 68 | "source": [ 69 | "With this configuration, we'll log into Chameleon so we can start provisioning our resources" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 2, 75 | "id": "04b88fde-d941-45be-83bb-490fef4f5f05", 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "Now using CHI@TACC:\n", 83 | "URL: https://chi.tacc.chameleoncloud.org\n", 84 | "Location: Austin, Texas, USA\n", 85 | "Support contact: help@chameleoncloud.org\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "import chi\n", 91 | "\n", 92 | "chi.use_site(site_name)\n", 93 | "chi.set(\"project_name\", project_name)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "id": "78e825fc-9e33-40c0-b683-f2729bd2a142", 99 | "metadata": {}, 100 | "source": [ 101 | "## Reserve a host\n", 102 | "\n", 103 | "With our configuration, let's reserve a host to run our notebook server on." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "id": "cc710e66-c763-4f3e-9ac9-dec4c26e4154", 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "import chi.lease\n", 114 | "\n", 115 | "# Reserve a host for the Jupyter server\n", 116 | "reservation = []\n", 117 | "chi.lease.add_node_reservation(\n", 118 | " reservation,\n", 119 | " node_type=node_type,\n", 120 | " count=1\n", 121 | ")\n", 122 | "# We need to use a floating IP \n", 123 | "# in order to access the Jupyter server from the computer you're using\n", 124 | "chi.lease.add_fip_reservation(reservation, count=1)\n", 125 | "\n", 126 | "start_date, end_date = chi.lease.lease_duration(days=lease_length)\n", 127 | "\n", 128 | "# Create the lease on Chameleon\n", 129 | "print(\"Submitting lease...\")\n", 130 | "lease = chi.lease.create_lease(\n", 131 | " lease_name,\n", 132 | " reservation,\n", 133 | " start_date=start_date,\n", 134 | " end_date=end_date\n", 135 | ")\n", 136 | "print(\"Waiting for lease to become active...\")\n", 137 | "lease = chi.lease.wait_for_active(lease[\"id\"])\n", 138 | "print(\"Lease is active!\")\n", 139 | "lease" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "id": "3cf68658-15c7-4b01-a2cd-5c097d207a40", 145 | "metadata": {}, 146 | "source": [ 147 | "## Spawning an instance\n", 148 | "\n", 149 | "With our resources in hand, we'll spawn an instance to run the Jupyter server on.\n", 150 | "\n", 151 | "In order to connect to the server from the computer you're using right now, you'll need to set up an SSH keypair on Chameleon. If you haven't done this yet, please check out [the docs](https://chameleoncloud.readthedocs.io/en/latest/getting-started/index.html#accessing-your-instance)." 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "id": "5dca1cb7-8980-4394-9f27-71587fa9fda8", 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "import chi.network\n", 162 | "import chi.server\n", 163 | "\n", 164 | "network_id = chi.network.get_network_id(network_name)\n", 165 | "server_name = f\"{user}-jupyter-notebook-server\"\n", 166 | "node_reservation = chi.lease.get_node_reservation(\n", 167 | " lease[\"id\"], \n", 168 | " node_type=node_type,\n", 169 | " count=1,\n", 170 | ")\n", 171 | "print(f\"Spawning server at {site_name}...\")\n", 172 | "notebook_server = chi.server.create_server(\n", 173 | " server_name,\n", 174 | " reservation_id=node_reservation,\n", 175 | " image_name=image_name,\n", 176 | " network_id=network_id,\n", 177 | " count=1,\n", 178 | ")\n", 179 | "print(\"Waiting for server to become active...\")\n", 180 | "chi.server.wait_for_active(notebook_server.id)\n", 181 | "print(f\"Server at {site_name} is active!\")" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "id": "a06e55dc-23fe-4b7f-a306-34e4aaa79259", 187 | "metadata": {}, 188 | "source": [ 189 | "We've created a server to run Jupyter on. In order to interact with the server from here on out, we'll need to connect via SSH over a floating IP address. So let's assign the floating IP we reserved and wait for SSH to be available." 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "id": "33f40dfc-ba86-4a39-b39a-adf14d76455b", 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "floating_ip = chi.lease.get_reserved_floating_ips(lease[\"id\"])[0]\n", 200 | "chi.server.associate_floating_ip(notebook_server.id, floating_ip)\n", 201 | "print(\"Associated floating IP with server.\")\n", 202 | "print(\"Waiting for SSH to become active...\")\n", 203 | "chi.server.wait_for_tcp(floating_ip, port=22)\n", 204 | "print(f\"Notebook server now accessible via SSH at {floating_ip}\")" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "id": "a2e9b052-01b3-4ee7-a921-9d0889a8143f", 210 | "metadata": {}, 211 | "source": [ 212 | "## Connecting to the server\n", 213 | "\n", 214 | "Now that we can access the server, let's connect to it so that we can install Jupyter." 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "id": "d169f32b-9441-4dc1-94ac-171d1754e105", 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "import chi.ssh\n", 225 | "\n", 226 | "remote = chi.ssh.Remote(floating_ip)\n", 227 | "remote.run(\"echo Hello from $(hostname)!\")" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "id": "1db0ebdb-9945-409d-b97b-a4a5eeac9bc7", 233 | "metadata": {}, 234 | "source": [ 235 | "## Setting up Jupyter\n", 236 | "\n", 237 | "Now that the server is ready, we will install and configure Jupyter.ipynb_checkpoints/" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "id": "e45eb25a-b338-41a4-a79c-f2d5d0de7ec5", 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "# Install Jupyter package\n", 248 | "if image_name.lower().startswith(\"cc-ubuntu\"):\n", 249 | " remote.run(\"sudo apt update && sudo apt install -y jupyter-notebook python3-jupyterlab-server\")\n", 250 | "else:\n", 251 | " remote.run(\"python3 -m pip install --upgrade pip && python3 -m pip install jupyter jupyterlab\")" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "id": "7314d34b-2727-4400-90db-0105aa36496c", 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "# BALEEN-SPECIFIC\n", 262 | "remote.run(\"git clone --recurse-submodules https://github.com/wonglkd/Baleen-FAST24.git\")" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "id": "174dedf5-c542-49f5-9d4b-d34f0284a417", 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "# BALEEN-SPECIFIC\n", 273 | "remote.run(\"python3 -m pip install --user -r Baleen-FAST24/BCacheSim/install/requirements.txt\")" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "id": "530ba078-cbf7-4793-bea2-e77f1fda3b07", 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [ 283 | "# Generate config\n", 284 | "remote.run(\"which jupyter\")" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "id": "25f6bdde-1e35-4fd9-be34-b61f4477da17", 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "# Generate config\n", 295 | "remote.run(\"jupyter notebook --generate-config\")" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "id": "07d7c82a-7ddf-4b76-9d4c-0857e4576419", 301 | "metadata": {}, 302 | "source": [ 303 | "### Creating a Jupyter service\n", 304 | "\n", 305 | "In order to have Jupyter run in the background and not interrupt the rest of this notebook, we'll install it as a service rather than run it directly." 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "id": "56ef9e08-80fa-4bd2-9e45-0b3a9036ae07", 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [ 315 | "# Copy the systemd service manifest onto the server\n", 316 | "remote.put(\"jupyter.service\")\n", 317 | "remote.run(\"sudo mv jupyter.service /etc/systemd/system\")" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "id": "eeda7bca-8751-4b22-a8a8-ae842fe28c40", 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "# Start the service\n", 328 | "remote.run(\"sudo systemctl daemon-reload\")\n", 329 | "remote.run(\"sudo systemctl enable jupyter.service\")\n", 330 | "remote.run(\"sudo systemctl start jupyter.service\")" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "id": "d3dd68b7-d355-4fff-adff-63fa04c5ca1b", 336 | "metadata": {}, 337 | "source": [ 338 | "### Connecting to the server securely\n", 339 | "\n", 340 | "**PLEASE READ, DO NOT IGNORE**\n", 341 | "\n", 342 | "The server we've set up is in a **very insecure** configuration. This, however, is fine, because it is only accepting connections from the host it's running on. This means that random people on the internet can't connect to it and exploit it. In order to maintain this security, it's imperative that you adhere to the following rules:\n", 343 | "\n", 344 | "1. **DO NOT, UNDER ANY CIRCUMSTANCES, MODIFY THE FIREWALL**\n", 345 | "2. **DO NOT, UNDER ANY CIRCUMSTANCES, CHANGE THE PORT, IP, OR HOST THAT THE JUPYTER SERVER IS LISTENING ON**\n", 346 | "\n", 347 | "If you do this, you may allow malicious actors to gain access to your Jupyter server, which will allow them to take complete control over your instance. They will absolutely ruin your experiment in order to mine Bitcoin and seed torrents. Your server will be found and exploited very quickly if you make bad changes to the configuration. If this notebook is having issues, and you're not sure you can fix it in a secure way, please submit a ticket to the Chameleon help desk.\n", 348 | "\n", 349 | "However, if we block anyone from connecting to the server remotely, how will we use it?\n", 350 | "\n", 351 | "Via an [SSH tunnel](https://www.ssh.com/academy/ssh/tunneling)! We will create a secure, encrypted tunnel to the Jupyter host, which will allow us to connect to the notebook server as if we are on the same host. This is the most secure way to remotely access services on Chameleon." 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "id": "d28339f9-e649-4ea6-b8b8-4659186f5ee0", 357 | "metadata": { 358 | "tags": [] 359 | }, 360 | "source": [ 361 | "#### Creating an SSH tunnel\n", 362 | "\n", 363 | "First upload a SSH key to this folder. You can generate one with the command: `ssh-keygen -t rsa -b 4096`\n", 364 | "\n", 365 | "Then open a terminal **on your local machine, not the Jupyter interface**, and run the command output by the cell below:" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": null, 371 | "id": "52f2c78b-b9e2-4a32-ada5-37d0bf5445e8", 372 | "metadata": {}, 373 | "outputs": [], 374 | "source": [ 375 | "print(f\"ssh -NT -o ServerAliveInterval=60 -L 8888:localhost:8888 cc@{floating_ip} -i \")" 376 | ] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "id": "85bf8518-6222-43ca-9907-5c14f800a9e9", 381 | "metadata": {}, 382 | "source": [ 383 | "If the above command didn't work, it's probably because you did not upload your local machine's SSH key to Chameleon. If that's the case, upload `~/.ssh/id_rsa.pub` to the same folder as this notebook, and run the cell below." 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "id": "a8f28049-8235-4e1e-b65a-f790f0015c6a", 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "# You may also use this command to open a terminal to the server\n", 394 | "print(f\"ssh cc@{floating_ip} -i \")" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": null, 400 | "id": "c3773446-45cb-4fba-bbf3-afd6c1757c6e", 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [ 404 | "import os\n", 405 | "\n", 406 | "local_keyfile_path = \"./id_rsa.pub\"\n", 407 | "if os.path.exists(local_keyfile_path):\n", 408 | " remote.put(local_keyfile_path, \"/tmp/id_rsa.pub\")\n", 409 | " remote.run(\"cat /tmp/id_rsa.pub >> ~/.ssh/authorized_keys\")\n", 410 | " print(\"Loaded SSH key onto remote host\")\n", 411 | "else:\n", 412 | " print(\"No key uploaded. Skipping\")" 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "id": "ab5b2ffc-023c-4095-9656-3fb60a88adcf", 418 | "metadata": {}, 419 | "source": [ 420 | "If you're able to run the `ssh` command from above without it exiting with an error, then you have successfully created an SSH tunnel! Now, you will be able to _securely_ access your Jupyter server at [http://localhost:8888](http://localhost:8888)." 421 | ] 422 | }, 423 | { 424 | "cell_type": "markdown", 425 | "id": "4aa8f281-07ac-4527-99d5-9578955e7825", 426 | "metadata": {}, 427 | "source": [ 428 | "## Teardown\n", 429 | "\n", 430 | "When we're done with the host we've loaded Jupyter on, we can free the resources we've reserved.\n", 431 | "\n", 432 | "**Warning: This will permanently delete your instance and all the data on it. Only do this if you've ensured that your work has been backed up.**\n", 433 | "\n", 434 | "We usually recommend experiment data be backed up to the [object store](https://chameleoncloud.readthedocs.io/en/latest/technical/swift.html)." 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": 16, 440 | "id": "5734b2ea-e70c-44d8-9654-7810daba784f", 441 | "metadata": {}, 442 | "outputs": [], 443 | "source": [ 444 | "do_teardown = False\n", 445 | "\n", 446 | "if do_teardown:\n", 447 | " chi.lease.delete_lease(lease[\"id\"])" 448 | ] 449 | } 450 | ], 451 | "metadata": { 452 | "kernelspec": { 453 | "display_name": "Python 3 (ipykernel)", 454 | "language": "python", 455 | "name": "python3" 456 | }, 457 | "language_info": { 458 | "codemirror_mode": { 459 | "name": "ipython", 460 | "version": 3 461 | }, 462 | "file_extension": ".py", 463 | "mimetype": "text/x-python", 464 | "name": "python", 465 | "nbconvert_exporter": "python", 466 | "pygments_lexer": "ipython3", 467 | "version": "3.10.9" 468 | } 469 | }, 470 | "nbformat": 4, 471 | "nbformat_minor": 5 472 | } 473 | -------------------------------------------------------------------------------- /notebooks/includes/common-20230414.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "b8b9e54b-b15d-46d2-960b-03ccef0434fd", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "%run ../includes/includes-202312.ipynb\n", 11 | "contexts.use(\"single\")" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "id": "7ee96217-9fee-46c3-93b8-1b0d14f82980", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "l_ST = \"DT\"" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "id": "bbf0965f-b422-4903-b7ef-8c3a04526716", 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "%%time\n", 32 | "reload(maps)\n", 33 | "def get_df(filename=\"../../data/results_release.csv.gz\"):\n", 34 | " dfc_raw_ = pd.read_csv(f\"{filename}\", low_memory=False)\n", 35 | " dfc_raw_[\"TraceGroup\"] = dfc_raw_[\"TraceGroup\"].astype(\"str\")\n", 36 | " dfc_raw_[\"Target Cache Size (TB)\"] = dfc_raw_[\"Target Cache Size\"] / 1000\n", 37 | " try:\n", 38 | " dfc_raw_, _ = maps.postproc(dfc_raw_)\n", 39 | " except:\n", 40 | " print(\"postproc failed\")\n", 41 | " raise\n", 42 | " # dfc_raw_ = proc_canon(dfc_raw_)\n", 43 | " return dfc_raw_\n", 44 | "\n", 45 | "def get_data(dfc_raw_=None, default_sample_ratio=.1, default_cl_o_ratio=1, default_sc_ratio=5, verbose=0, skipna=True,\n", 46 | " star_y=\"P100ServiceTimeUtil@10m\"):\n", 47 | " dfc_raw_ = dfc_raw_ if dfc_raw_ is not None else get_df()\n", 48 | " common_filter = {\n", 49 | " 'CanonExp': True,\n", 50 | " 'DWPDNotFar': True,\n", 51 | " \"Target Cache Size\": ef.DEFAULT_CSIZE,\n", 52 | " 'Trace': [\n", 53 | " '201910/Region1',\n", 54 | " '201910/Region3',\n", 55 | " '201910/Region2',\n", 56 | " '202110/Region4',\n", 57 | " '20230325/Region7',\n", 58 | " '20230325/Region6',\n", 59 | " '20230325/Region5',\n", 60 | " ],\n", 61 | " }\n", 62 | " df_exp = filter_df(dfc_raw_, common_filter)\n", 63 | " \n", 64 | " df_cl = filter_df(dfc_raw_, {\"Target Cache Size\": ef.DEFAULT_CSIZE, 'DWPDNotFar': True, 'Region': ['Region1'], 'SampleRatio': default_cl_o_ratio})\n", 65 | " df_o = filter_df(dfc_raw_, {\"Target Cache Size\": ef.DEFAULT_CSIZE, 'DWPDNotFar': True, 'Region': ['Region2'], 'SampleRatio': default_cl_o_ratio})\n", 66 | " df_sc = filter_df(dfc_raw_, {\"Target Cache Size\": ef.DEFAULT_CSIZE, 'DWPDNotFar': True, 'Region': ['Region3'], 'SampleRatio': default_sc_ratio})\n", 67 | " df_rest = df_exp[(df_exp['Region'] != 'Region1') & (df_exp['Region'] != 'Region3') & (df_exp['Region'] != 'Region2')]\n", 68 | " print(len(df_rest))\n", 69 | " df_rest = filter_df(df_rest, {'SampleRatio': default_sample_ratio})\n", 70 | " assert df_rest['SampleRatio'].nunique() == 1\n", 71 | " df_exp_sampledright = pd.concat([df_cl, df_sc, df_o, df_rest])\n", 72 | " df_exp_for_star = filter_df(df_exp_sampledright, {'PracticalAP': True})\n", 73 | "\n", 74 | " df_exp34 = filter_df(df_exp_sampledright, {'Target DWPD': 7.5})\n", 75 | " df_exp34_for_star = filter_df(df_exp_for_star, {'Target DWPD': 7.5})\n", 76 | " \n", 77 | " df_star34 = add_pfbest(df_exp34_for_star, y=star_y, verbose=verbose, skipna=skipna)\n", 78 | " df_exp34_all = pd.concat([df_exp34, df_star34])\n", 79 | " df_exp34_for_plot = filter_df(df_exp34_all, {\n", 80 | " 'ShortLabel': ['RejectX', 'CoinFlip', 'NoEps-ML', 'Baleen (No Prefetch)', 'Baleen'],\n", 81 | " 'PracticalAP': True,\n", 82 | " }, use_glob=False)\n", 83 | " df_exp34_for_cmp = filter_df(df_exp34_all, {\n", 84 | " 'ShortLabel': ['RejectX', 'CoinFlip', 'NoEps-ML', 'Baleen (No Prefetch)', 'Baleen', 'Baleen (ML Prefetch)', 'Baleen (ML-Range on Partial Hit)', 'Baleen (All on Partial Hit)', \"OPT AP (OPT Prefetch)\"],\n", 85 | " }, use_glob=False)\n", 86 | " \n", 87 | " df_star = add_pfbest(df_exp_for_star, y=star_y, columns=[\"Region\", \"Target DWPD\"], verbose=verbose, skipna=True)\n", 88 | " df_all = pd.concat([df_exp_sampledright, df_star])\n", 89 | " df_exp_for_plot = filter_df(df_all, {\n", 90 | " 'ShortLabel': ['RejectX', 'CoinFlip', 'NoEps-ML', 'Baleen (No Prefetch)', 'Baleen'],\n", 91 | " 'PracticalAP': True,\n", 92 | " }, use_glob=False)\n", 93 | " df_exp_for_cmp = filter_df(df_all, {\n", 94 | " 'ShortLabel': ['RejectX', 'CoinFlip', 'NoEps-ML', 'Baleen (No Prefetch)', 'Baleen', 'Baleen (ML Prefetch)', \"OPT AP (OPT Prefetch)\"],\n", 95 | " }, use_glob=False)\n", 96 | "\n", 97 | " \n", 98 | " return {\n", 99 | " 'raw': dfc_raw_,\n", 100 | " 'exp_': df_exp_sampledright,\n", 101 | " # 'exp': df_exp_sampledright,\n", 102 | " 'exp': df_all,\n", 103 | " 'exp_canon': df_exp_for_plot,\n", 104 | " 'exp_cmp': df_exp_for_cmp,\n", 105 | " 'exp34': df_exp34_all,\n", 106 | " 'exp34_canon': df_exp34_for_plot,\n", 107 | " 'exp34_cmp': df_exp34_for_cmp,\n", 108 | " }\n", 109 | "\n", 110 | "def latex_macros(dct, suffix='\\%'):\n", 111 | " for k, v in dct.items():\n", 112 | " print('\\\\newcommand{{\\{}}}{{{:.1f}{}}}'.format(k, v, suffix))" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "id": "b846297c-2265-443e-942d-ecc8bdc94083", 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "def add_pfbest(df_summary, y, order=None, idx=['ShortLabel'], columns=['Region'], verbose=1, skipna=True):\n", 123 | " \"\"\"Pick best option for each Region \"\"\"\n", 124 | " dcz = monitor.filter_df_dct(df_summary, {'PracticalAP': True})\n", 125 | " #dcz = sampleright(dcz, idx=['ShortLabel', 'AdmissionPolicyLabel', 'Prefetching', 'Target Write Rate', 'Target DWPD'])\n", 126 | " dfs_real = pd.pivot_table(dcz, values=[y], index=['AdmissionPolicyLabel','Prefetching'], columns=columns)\n", 127 | " # TODO: Check explicitly for the prefetching options we expect?\n", 128 | " if verbose >= 2:\n", 129 | " display(dfs_real)\n", 130 | " best_pf_option = dfs_real.reset_index(0).groupby(\"AdmissionPolicyLabel\")\n", 131 | " if 'Saved' in y:\n", 132 | " best_pf_option_ = best_pf_option.idxmax(skipna=skipna)\n", 133 | " best_pf_option = best_pf_option.max()\n", 134 | " else:\n", 135 | " best_pf_option_ = best_pf_option.idxmin(skipna=skipna)\n", 136 | " best_pf_option = best_pf_option.min()\n", 137 | " if verbose >= 2:\n", 138 | " display(best_pf_option)\n", 139 | " display(best_pf_option_)\n", 140 | " \n", 141 | " rows = []\n", 142 | " dqr = best_pf_option_[y]\n", 143 | " for i in range(len(columns)):\n", 144 | " dqr = dqr.stack() \n", 145 | " for hdr, bestpf in dqr.items():\n", 146 | " ap = hdr[0]\n", 147 | " filter_ = {\"AdmissionPolicyLabel\": ap, \"Prefetching\": bestpf}\n", 148 | " for i, col in enumerate(columns):\n", 149 | " filter_[col] = hdr[-1-i]\n", 150 | " bestz = monitor.filter_df_dct(dcz, filter_)\n", 151 | " bestz['ShortLabel'] = bestz['AdmissionPolicyLabel'] + ('*' if ap != 'Baleen' else '')\n", 152 | " bestz['PlotLabel'] = bestz['AdmissionPolicyLabel'] + '*'\n", 153 | " rows.append(bestz)\n", 154 | " return pd.concat(rows)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "id": "acb1d594-28d0-4bfc-a588-39c71761613f", 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "reload(maps)\n", 165 | "def plot_bar(\n", 166 | " df_,\n", 167 | " y=None,\n", 168 | " hue='ShortLabel',\n", 169 | " max_y=None,\n", 170 | " max_wr=80,\n", 171 | " regions=maps.REGIONS_CANON,\n", 172 | " points=False,\n", 173 | " legend=True,\n", 174 | " **kwargs\n", 175 | "):\n", 176 | " assert len(df_)\n", 177 | " df_ = df_.reset_index(drop=True)\n", 178 | " if regions is not None:\n", 179 | " df_ = df_[df_['Region'].isin(regions)]\n", 180 | " if hue == 'ShortLabel':\n", 181 | " if all(k in maps.SHORT_COLORMAP for k in df_[hue]):\n", 182 | " kwargs['palette'] = maps.SHORT_COLORMAP\n", 183 | " kwargs['hue_order'] = [k for k in maps.SHORT_COLORMAP.keys() if k in df_['ShortLabel'].values]\n", 184 | " else:\n", 185 | " print(\"Missing - not using SHORT_COLORMAP\")\n", 186 | " print(set([k for k in df_[hue] if k not in maps.SHORT_COLORMAP]))\n", 187 | " # print({k: k in maps.SHORT_COLORMAP for k in df_[hue]})\n", 188 | " else:\n", 189 | " if all(k in maps.DEFAULT_COLORMAP for k in df_[hue]):\n", 190 | " kwargs['palette'] = maps.DEFAULT_COLORMAP\n", 191 | " kwargs['hue_order'] = [k for k in maps.DEFAULT_COLORMAP.keys() if k in df_['PlotLabel'].values]\n", 192 | "\n", 193 | " if points:\n", 194 | " sns.stripplot(data=df_, y=y, x=\"RegionLabel\", hue=hue, dodge=True, jitter=True, legend=False, size=15, marker=\"$\\circ$\", zorder=0, **kwargs)\n", 195 | " g = sns.barplot(\n", 196 | " data=df_,\n", 197 | " x='RegionLabel',\n", 198 | " order=[maps.region_labels[x] for x in regions],\n", 199 | " y=y,\n", 200 | " hue=hue,\n", 201 | " linewidth=3,\n", 202 | " # elinewidth=2,\n", 203 | " errwidth=2.5,\n", 204 | " capsize=.03,\n", 205 | " **kwargs)\n", 206 | " if points:\n", 207 | " for patch in g.patches:\n", 208 | " clr = patch.get_facecolor()\n", 209 | " patch.set_edgecolor(clr)\n", 210 | " patch.set_facecolor((0,0,0,0))\n", 211 | "\n", 212 | " if regions is None or len(regions) >= 3:\n", 213 | " plt.xticks(rotation=90)\n", 214 | " if legend:\n", 215 | " g.get_legend().set_title(None)\n", 216 | " plt.legend(frameon=True, loc='lower right')\n", 217 | " \n", 218 | " ax = plt.gca()\n", 219 | " plt.xlabel(\"Trace\")\n", 220 | " y = nice_ylabel(y)\n", 221 | " plt.ylabel(y, loc='top')\n", 222 | " plt.grid(True, axis='y')" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "id": "4dc8a63b-952c-41be-93d1-938f881134a5", 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "figlabels = {\n", 233 | " \"peak-st-util\": \"P100ServiceTimeUtil@10m\",\n", 234 | " \"median-st-util\": \"P50ServiceTimeUtil@10m\",\n", 235 | " \"mean-st-util\": \"MeanServiceTimeUtil\",\n", 236 | " \"peak-st-ratio\": \"P100ServiceTimePercent@10m\",\n", 237 | " \"median-st-ratio\": \"P50ServiceTimePercent@10m\",\n", 238 | " \"mean-st-ratio\": \"MeanServiceTimeUsedPercent\",\n", 239 | " \"iops-miss-ratio\": maps.l_iop_f,\n", 240 | " \"bw-miss-ratio\": maps.l_bw_f,\n", 241 | "}\n", 242 | "niceylabel = {\n", 243 | " \"P100ServiceTimeUtil@10m\": \"Peak Backend Load (%)\",\n", 244 | " \"P50ServiceTimeUtil@10m\": \"Median Backend Load (%)\",\n", 245 | " \"MeanServiceTimeUtil\": \"Mean Backend Load (%)\",\n", 246 | " \"P100ServiceTimePercent@10m\": \"Peak Backend Load\\n(% of no cache)\",\n", 247 | " \"P50ServiceTimePercent@10m\": \"Median Backend Load\\n(% of no cache)\",\n", 248 | " \"MeanServiceTimeUsedPercent\": \"Mean Backend Load\\n(% of no cache)\",\n", 249 | " maps.l_iop_f: maps.l_iop_f,\n", 250 | " maps.l_bw_f: maps.l_bw_f,\n", 251 | "}" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "id": "de8214a2-507b-49ca-91c3-7305b9fb2588", 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "\n", 262 | "def nice_ylabel(y):\n", 263 | " if y in niceylabel:\n", 264 | " y = niceylabel[y]\n", 265 | " elif \"ServiceTime\" in y and \"@\" in y:\n", 266 | " ptile, dg = y.split(\"ServiceTime\")\n", 267 | " metric, window = dg.split(\"@\")\n", 268 | " y = f\"ST {metric} (%)\\n({ptile} @ {window})\"\n", 269 | " return y\n", 270 | "\n" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "id": "5d28d003-e2fd-489b-ae62-eb53152327dd", 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "def sampleright(df_, \n", 281 | " idx=['ShortLabel', 'AdmissionPolicyLabel', 'Prefetching'],\n", 282 | " all_ys=[maps.l_wr, 'DWPD'],\n", 283 | " verbose=0):\n", 284 | " cols = all_ys + [c for c in df_.columns if (c.startswith(\"P\") and '@' in c) or c.startswith(\"MeanServiceTime\") or (\"Ratio\" in c and c != \"SampleRatio\") or \"%\" in c]\n", 285 | " if verbose > 0:\n", 286 | " print(len(df_))\n", 287 | " df_ = df_.groupby(idx+['Region', 'RegionLabel', 'SampleRatio', 'SampleStart'])[cols].mean().reset_index()\n", 288 | " if verbose > 0:\n", 289 | " print(len(df_))\n", 290 | " df_ = df_.groupby(idx+['Region', 'RegionLabel', 'SampleRatio'])[cols].mean().reset_index()\n", 291 | " if verbose > 0:\n", 292 | " print(len(df_))\n", 293 | " df_ = df_.groupby(idx+['Region', 'RegionLabel'])[cols].mean().reset_index()\n", 294 | " if verbose > 0:\n", 295 | " print(len(df_))\n", 296 | " return df_\n", 297 | "def fillright(df_, \n", 298 | " idx=['ShortLabel', 'AdmissionPolicyLabel', 'Prefetching', 'Target Cache Size', 'Target Cache Size (TB)'],\n", 299 | " verbose=False,\n", 300 | " all_ys=[maps.l_wr, 'DWPD']):\n", 301 | " cols = all_ys + [c for c in df_.columns if (c.startswith(\"P\") and '@' in c) or c.startswith(\"MeanServiceTime\") or (\"Ratio\" in c and c != \"SampleRatio\") or \"%\" in c]\n", 302 | " df_ = df_.pivot(index=idx, columns=['Region', 'RegionLabel'], values=cols)\n", 303 | " if verbose:\n", 304 | " display(df_)\n", 305 | " df_ = df_.fillna(method='ffill')\n", 306 | " if verbose:\n", 307 | " display(df_)\n", 308 | " df_ = df_.stack().stack().reset_index()\n", 309 | " return df_" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "id": "44f9de4f-34ce-4d62-8a16-0c9fa290f389", 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [ 319 | "def get_kwargs(df_, y, hue=\"ShortLabel\", errs=True):\n", 320 | " kwargs = dict(\n", 321 | " hue=hue,\n", 322 | " style=hue,\n", 323 | " markers=True,\n", 324 | " dashes=False,\n", 325 | " )\n", 326 | " if errs:\n", 327 | " kwargs.update(dict(\n", 328 | " err_kws=dict(capsize=10),\n", 329 | " linewidth=3,\n", 330 | " err_style=\"bars\",\n", 331 | " ))\n", 332 | " if hue == \"ShortLabel\":\n", 333 | " if all(k in maps.SHORT_COLORMAP for k in df_[hue]):\n", 334 | " kwargs[\"palette\"] = maps.SHORT_COLORMAP\n", 335 | " kwargs[\"hue_order\"] = [\n", 336 | " k for k in maps.SHORT_COLORMAP.keys() if k in df_[\"ShortLabel\"].values\n", 337 | " ]\n", 338 | " else:\n", 339 | " print(\"Missing - not using SHORT_COLORMAP\")\n", 340 | " print(set([k for k in df_[hue] if k not in maps.SHORT_COLORMAP]))\n", 341 | " if all(k in maps.SHORT_MARKERMAP for k in df_[hue]):\n", 342 | " kwargs[\"markers\"] = maps.SHORT_MARKERMAP\n", 343 | " else:\n", 344 | " print(\"Missing - not using SHORT_MARKERMAP\")\n", 345 | " print(set([k for k in df_[hue] if k not in maps.SHORT_MARKERMAP]))\n", 346 | " else:\n", 347 | " if all(k in maps.DEFAULT_COLORMAP for k in df_[hue]):\n", 348 | " kwargs[\"palette\"] = maps.DEFAULT_COLORMAP\n", 349 | " kwargs[\"hue_order\"] = [\n", 350 | " k for k in maps.DEFAULT_COLORMAP.keys() if k in df_[\"PlotLabel\"].values\n", 351 | " ]\n", 352 | " return kwargs\n", 353 | "\n", 354 | "\n", 355 | "def postplot(df_, target=True, target_v=None, targetlabel=True, figlabel=True):\n", 356 | " ax = plt.gca()\n", 357 | "\n", 358 | " if ax.get_legend():\n", 359 | " ax.legend()\n", 360 | " # g.get_legend().set_title(None)\n", 361 | "\n", 362 | " ax.set_ylim(0, None)\n", 363 | " ax.set_xlim(0, None)\n", 364 | " y = ax.get_ylabel()\n", 365 | " y = nice_ylabel(y)\n", 366 | " ax.set_ylabel(y, loc=\"top\")\n", 367 | " fig_labels = []\n", 368 | " if \"SampleRatio\" in df_.columns:\n", 369 | " if df_[\"SampleRatio\"].nunique() == 1:\n", 370 | " fig_labels.append(f\"{df_['SampleRatio'].unique()[0]:g}%\")\n", 371 | " else:\n", 372 | " print(\"Multiple SampleRatio:\", df_[\"SampleRatio\"].unique())\n", 373 | " if \"RegionLabel\" in df_.columns:\n", 374 | " if df_[\"RegionLabel\"].nunique() == 1:\n", 375 | " fig_labels.append(str(df_[\"RegionLabel\"].unique()[0]))\n", 376 | " if fig_labels and figlabel:\n", 377 | " add_fig_label(\", \".join(fig_labels))\n", 378 | " if target:\n", 379 | " ax.axvline(target_v, ls=\":\", c=\"black\")\n", 380 | " if targetlabel:\n", 381 | " maps.add_target_label(twr=target_v, fmt=\"3 DWPD\")\n", 382 | " \n", 383 | " \n", 384 | "import matplotlib.ticker as ticker\n", 385 | "\n", 386 | "\n", 387 | "def add_leg_to_subplot(loc=(2, 4, 4)):\n", 388 | " ax_0 = plt.subplot(loc[0], loc[1], 1)\n", 389 | " handles, labels = ax_0.get_legend_handles_labels()\n", 390 | " ax_0.get_legend().remove()\n", 391 | " ax = plt.subplot(*loc)\n", 392 | " ax.legend(handles, labels, loc=\"center\", title=\"Policy\")\n", 393 | " ax.set_axis_off()\n", 394 | " ax.get_xaxis().set_visible(False)\n", 395 | " ax.get_yaxis().set_visible(False)\n", 396 | "\n", 397 | "def postsubplot_wr(ax, i):\n", 398 | " if i == 0:\n", 399 | " ax.xaxis.set_major_locator(ticker.MaxNLocator(3))\n", 400 | " ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())\n", 401 | " ax.yaxis.set_major_locator(ticker.MaxNLocator(3))\n", 402 | " ax.yaxis.set_minor_locator(ticker.MultipleLocator(5))\n", 403 | " ax.tick_params(which=\"major\", length=6)\n", 404 | " ax.tick_params(which=\"minor\", length=4)\n", 405 | " ax.set_ylabel(\"\")\n", 406 | " ax.set_xlabel(\"\")\n", 407 | " \n", 408 | "reload(maps)\n", 409 | "contexts.use(\"single\")\n", 410 | "\n", 411 | "def plot_wrs_grid(df=None, y=\"P100ServiceTimeUtil@10m\", hue=\"ShortLabel\", x=\"Target DWPD\"):\n", 412 | " num_traces = df[\"RegionLabel\"].nunique()\n", 413 | "\n", 414 | " fig, ax = plt.subplots(\n", 415 | " nrows=2,\n", 416 | " ncols=4,\n", 417 | " sharex=True,\n", 418 | " sharey=False,\n", 419 | " figsize=(7 * 2, 3 * 2),\n", 420 | " layout=\"constrained\",\n", 421 | " )\n", 422 | " for i, (region, df_) in enumerate(df.groupby(\"RegionLabel\")):\n", 423 | " ax = plt.subplot(2, 4, i + 1 + (1 if i > 2 else 0))\n", 424 | " sns.lineplot(\n", 425 | " data=df_, x=x, y=y, **get_kwargs(df_, y, hue=hue), ax=ax, legend=i == 0\n", 426 | " )\n", 427 | " postplot(df_, target=False, targetlabel=False)\n", 428 | " postsubplot_wr(ax, i)\n", 429 | " add_leg_to_subplot((2,4,4))\n", 430 | " if \"Write Rate\" in x:\n", 431 | " fig.supxlabel(maps.l_wr)\n", 432 | " elif \"DWPD\" in x:\n", 433 | " fig.supxlabel(\"DWPD (Drive Writes Per Day)\")\n", 434 | " fig.supylabel(nice_ylabel(y))" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": 7, 440 | "id": "10969374-672c-4e4f-ba50-425e408e01d6", 441 | "metadata": { 442 | "tags": [] 443 | }, 444 | "outputs": [], 445 | "source": [ 446 | "def postsubplot_csize(ax, i):\n", 447 | " if i == 0:\n", 448 | " ax.xaxis.set_major_locator(ticker.MultipleLocator(1))\n", 449 | " ax.xaxis.set_minor_locator(ticker.MultipleLocator(.2))\n", 450 | " ax.yaxis.set_major_locator(ticker.MaxNLocator(3))\n", 451 | " ax.yaxis.set_minor_locator(ticker.MultipleLocator(5))\n", 452 | " ax.tick_params(which=\"major\", length=6)\n", 453 | " ax.tick_params(which=\"minor\", length=4)\n", 454 | " ax.set_ylabel(\"\")\n", 455 | " ax.set_xlabel(\"\")\n", 456 | "\n", 457 | "\n", 458 | "def plot_csize_grid(df=None, y=\"P100ServiceTimeUtil@10m\", hue=\"ShortLabel\", x=\"Target Cache Size\"):\n", 459 | " num_traces = df[\"RegionLabel\"].nunique()\n", 460 | "\n", 461 | " fig, ax = plt.subplots(\n", 462 | " nrows=2,\n", 463 | " ncols=4,\n", 464 | " sharex=True,\n", 465 | " sharey=False,\n", 466 | " figsize=(7 * 2, 3 * 2),\n", 467 | " layout=\"constrained\",\n", 468 | " )\n", 469 | " for i, (region, df_) in enumerate(df.groupby(\"RegionLabel\")):\n", 470 | " ax = plt.subplot(2, 4, i + 1 + (1 if i > 2 else 0))\n", 471 | " sns.lineplot(\n", 472 | " data=df_, x=x, y=y, **get_kwargs(df_, y, hue=hue, errs=False), ax=ax, legend=i == 0\n", 473 | " )\n", 474 | " plt.axvline(.4, ls=':', c=\"black\", label=\"400GB\")\n", 475 | " postplot(df_, target=False, targetlabel=False)\n", 476 | " postsubplot_csize(ax, i)\n", 477 | " add_leg_to_subplot((2,4,4))\n", 478 | " fig.supxlabel(\"Cache Size (TB)\")\n", 479 | " fig.supylabel(nice_ylabel(y))\n", 480 | " # plt.xlim(0, 3000)\n", 481 | " plt.xlim(0, 2)" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": null, 487 | "id": "7ac7906c-93d7-41ba-88ed-bd99bd195dcb", 488 | "metadata": {}, 489 | "outputs": [], 490 | "source": [ 491 | "# fig-09\n", 492 | "def stats(df_summary, y, idx='ShortLabel', cmp='Baleen', agg='RegionLabel', verbose=0, show=True, bar=True):\n", 493 | " if verbose >= 3:\n", 494 | " display(pd.pivot_table(df_summary, values=[y], index=idx, columns=agg, margins=True, margins_name='Avg', aggfunc='count'))\n", 495 | " drt2 = pd.pivot_table(df_summary, values=[y], index=idx, columns=agg, margins=True, margins_name='Avg')\n", 496 | " drt2.drop('Avg', inplace=True)\n", 497 | " drtk = (drt2.loc[cmp] / drt2) * 100\n", 498 | " drtk = -(drtk - 100)\n", 499 | " drtk.drop(cmp, inplace=True)\n", 500 | " if agg:\n", 501 | " drtk.loc[:, (y,'Avg%')] = drtk[drtk.columns[:-1]].mean(axis=1)\n", 502 | " if verbose >= 2:\n", 503 | " print(\"Raw\")\n", 504 | " with pd.option_context('display.float_format', '{:.2f}'.format):\n", 505 | " display(drt2)\n", 506 | " if show:\n", 507 | " with pd.option_context('display.float_format', '{:.2f}%'.format):\n", 508 | " print(f\"Savings over {cmp}\")\n", 509 | " if bar:\n", 510 | " display((-drtk).style.format(\"{:.2f}%\").bar(color=['green','red'], axis=0, vmin=-50, vmax=50))\n", 511 | " else:\n", 512 | " display(drtk)\n", 513 | " return drt2, drtk" 514 | ] 515 | } 516 | ], 517 | "metadata": { 518 | "kernelspec": { 519 | "display_name": "Python 3 (ipykernel)", 520 | "language": "python", 521 | "name": "python3" 522 | }, 523 | "language_info": { 524 | "codemirror_mode": { 525 | "name": "ipython", 526 | "version": 3 527 | }, 528 | "file_extension": ".py", 529 | "mimetype": "text/x-python", 530 | "name": "python", 531 | "nbconvert_exporter": "python", 532 | "pygments_lexer": "ipython3", 533 | "version": "3.10.12" 534 | } 535 | }, 536 | "nbformat": 4, 537 | "nbformat_minor": 5 538 | } 539 | -------------------------------------------------------------------------------- /notebooks/reproduce/reproduce_commands.sh: -------------------------------------------------------------------------------- 1 | cd "$(dirname "$0")"/../.. 2 | # RejectX, Region5, 0.0 3 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityHits --region Region5 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230410_static_pf --eviction-age 8420.08 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 4 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/Region5/processed/full_0_0.1.trace --rejectx-ap --ap rejectx --ap-probability 0.022966 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230410_static_pf/20230325_Region5_0_0.1_366.475GB_WR35.599MBS/ap_rejectx/policy_hits/prefetch_never_episode/i_4_ea_8420.08 --prefetch-when never --prefetch-range episode --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230410_static_pf/20230325_Region5_0_0.1/offline_analysis_ea_8420.08.csv --ap-threshold 1 --offline-ap-decisions ../tmp/20230410_static_pf/20230325_Region5_0_0.1/decisions_utility_hits_fixed_ea_8420.08.pkl.bz --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230410_static_pf__20230325_Region5_0_0.1_366.475GB_WR35.599MBS__ap_rejectx__policy_hits__prefetch_never_episode__i_4_ea_8420.08__rejectx-ap-1_0.022966_lru_366.475GB__full_0_0.1_i=4 5 | # CoinFlip, Region5, 0.0 6 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityHits --region Region5 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230410_static_pf --eviction-age 7574.832 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 7 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/Region5/processed/full_0_0.1.trace --coinflip-ap --ap coinflip --ap-probability 0.054192 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230410_static_pf/20230325_Region5_0_0.1_366.475GB_WR35.599MBS/ap_coinflip/policy_hits/prefetch_never_episode/i_2_ea_7574.83 --prefetch-when never --prefetch-range episode --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230410_static_pf/20230325_Region5_0_0.1/offline_analysis_ea_7574.83.csv --offline-ap-decisions ../tmp/20230410_static_pf/20230325_Region5_0_0.1/decisions_utility_hits_fixed_ea_7574.83.pkl.bz --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230410_static_pf__20230325_Region5_0_0.1_366.475GB_WR35.599MBS__ap_coinflip__policy_hits__prefetch_never_episode__i_2_ea_7574.83__coinflip-ap-0.054192_lru_366.475GB__full_0_0.1_i=2 8 | # CoinFlip, Region6, 0.0 9 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityHits --region Region6 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230410_static_pf --eviction-age 6401.12 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 10 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/Region6/processed/full_0_0.1.trace --coinflip-ap --ap coinflip --ap-probability 0.048592 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230410_static_pf/20230325_Region6_0_0.1_366.475GB_WR35.599MBS/ap_coinflip/policy_hits/prefetch_never_episode/i_2_ea_6401.12 --prefetch-when never --prefetch-range episode --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230410_static_pf/20230325_Region6_0_0.1/offline_analysis_ea_6401.12.csv --offline-ap-decisions ../tmp/20230410_static_pf/20230325_Region6_0_0.1/decisions_utility_hits_fixed_ea_6401.12.pkl.bz --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230410_static_pf__20230325_Region6_0_0.1_366.475GB_WR35.599MBS__ap_coinflip__policy_hits__prefetch_never_episode__i_2_ea_6401.12__coinflip-ap-0.048592_lru_366.475GB__full_0_0.1_i=2 11 | # RejectX, Region6, 0.0 12 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityHits --region Region6 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230410_static_pf --eviction-age 6260.558 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 13 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/Region6/processed/full_0_0.1.trace --rejectx-ap --ap rejectx --ap-probability 0.044576 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230410_static_pf/20230325_Region6_0_0.1_366.475GB_WR35.599MBS/ap_rejectx/policy_hits/prefetch_never_episode/i_2_ea_6260.56 --prefetch-when never --prefetch-range episode --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230410_static_pf/20230325_Region6_0_0.1/offline_analysis_ea_6260.56.csv --ap-threshold 1 --offline-ap-decisions ../tmp/20230410_static_pf/20230325_Region6_0_0.1/decisions_utility_hits_fixed_ea_6260.56.pkl.bz --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230410_static_pf__20230325_Region6_0_0.1_366.475GB_WR35.599MBS__ap_rejectx__policy_hits__prefetch_never_episode__i_2_ea_6260.56__rejectx-ap-1_0.044576_lru_366.475GB__full_0_0.1_i=2 14 | # RejectX, Region7, 0.0 15 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityHits --region Region7 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230410_static_pf --eviction-age 6972.291 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 16 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/Region7/processed/full_0_0.1.trace --rejectx-ap --ap rejectx --ap-probability 0.092777 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230410_static_pf/20230325_Region7_0_0.1_366.475GB_WR35.599MBS/ap_rejectx/policy_hits/prefetch_never_episode/i_2_ea_6972.29 --prefetch-when never --prefetch-range episode --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230410_static_pf/20230325_Region7_0_0.1/offline_analysis_ea_6972.29.csv --ap-threshold 1 --offline-ap-decisions ../tmp/20230410_static_pf/20230325_Region7_0_0.1/decisions_utility_hits_fixed_ea_6972.29.pkl.bz --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230410_static_pf__20230325_Region7_0_0.1_366.475GB_WR35.599MBS__ap_rejectx__policy_hits__prefetch_never_episode__i_2_ea_6972.29__rejectx-ap-1_0.092777_lru_366.475GB__full_0_0.1_i=2 17 | # CoinFlip, Region7, 0.0 18 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityHits --region Region7 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230410_static_pf --eviction-age 7158.642 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 19 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/Region7/processed/full_0_0.1.trace --coinflip-ap --ap coinflip --ap-probability 0.052391 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230410_static_pf/20230325_Region7_0_0.1_366.475GB_WR35.599MBS/ap_coinflip/policy_hits/prefetch_never_episode/i_2_ea_7158.64 --prefetch-when never --prefetch-range episode --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230410_static_pf/20230325_Region7_0_0.1/offline_analysis_ea_7158.64.csv --offline-ap-decisions ../tmp/20230410_static_pf/20230325_Region7_0_0.1/decisions_utility_hits_fixed_ea_7158.64.pkl.bz --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230410_static_pf__20230325_Region7_0_0.1_366.475GB_WR35.599MBS__ap_coinflip__policy_hits__prefetch_never_episode__i_2_ea_7158.64__coinflip-ap-0.052391_lru_366.475GB__full_0_0.1_i=2 20 | # CoinFlip, Region5, 0.0 21 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityHits --region Region5 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230327_tracedrop_old --eviction-age 7526.9 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 22 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/processed/Region5/full_0_0.1.trace --coinflip-ap --ap coinflip --ap-probability 0.054172 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230327_tracedrop_old/ws.20230325_Region5_0_0.1_366.475GB_WR35.599MBS/ap_coinflip/policy_hits/prefetch_never_episode/i_3_ea_7526.9 --prefetch-when never --prefetch-range episode --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230327_tracedrop_old/ws.20230325_Region5_0_0.1/offline_analysis_ea_7526.9.csv --offline-ap-decisions ../tmp/20230327_tracedrop_old/ws.20230325_Region5_0_0.1/decisions_utility_hits_fixed_ea_7526.9.pkl.bz --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230327_tracedrop_old__ws.20230325_Region5_0_0.1_366.475GB_WR35.599MBS__ap_coinflip__policy_hits__prefetch_never_episode__i_3_ea_7526.9__coinflip-ap-0.054172_lru_366.475GB__full_0_0.1_i=3 23 | # RejectX, Region5, 0.0 24 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityHits --region Region5 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230327_tracedrop_old --eviction-age 5749.000563609678 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 25 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/processed/Region5/full_0_0.1.trace --rejectx-ap --ap rejectx --ap-probability 0.040123 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230327_tracedrop_old/ws.20230325_Region5_0_0.1_366.475GB_WR35.599MBS/ap_rejectx/policy_hits/prefetch_never_episode/i_20_ea_5749 --fast --prefetch-when never --prefetch-range episode --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230327_tracedrop_old/ws.20230325_Region5_0_0.1/offline_analysis_ea_5749.csv --ap-threshold 1 --offline-ap-decisions ../tmp/20230327_tracedrop_old/ws.20230325_Region5_0_0.1/decisions_utility_hits_fixed_ea_5749.pkl.bz --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230327_tracedrop_old__ws.20230325_Region5_0_0.1_366.475GB_WR35.599MBS__ap_rejectx__policy_hits__prefetch_never_episode__i_20_ea_5749__rejectx-ap-1_0.040123_lru_366.475GB__full_0_0.1_i=20 26 | # Baleen (No Prefetch), Region5, 0.0 27 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityServiceTimeSize2 --region Region5 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230327_tracedrop_old --suffix /ws.20230325_Region5_0_0.1/fs_meta+block+chunk/accs_15 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk --eviction-age 6139.361 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 28 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/processed/Region5/full_0_0.1.trace --learned-ap --ap mlnew --ap-threshold 0.605976 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230327_tracedrop_old/ws.20230325_Region5_0_0.1_366.475GB_WR35.599MBS/ap_mlnew/policy_servicetimesize/prefetch_never_episode/i_4_ea_6139.36 --prefetch-when never --prefetch-range episode --batch-size 16 --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230327_tracedrop_old/ws.20230325_Region5_0_0.1/fs_meta+block+chunk/accs_15/offline_analysis_ea_6139.36.csv --learn-ap-filtercount 6 --learn-ap-granularity both --offline-ap-decisions ../tmp/20230327_tracedrop_old/ws.20230325_Region5_0_0.1/fs_meta+block+chunk/accs_15/decisions_utility_service_time_size_fixed_ea_6139.36.pkl.bz --learned-ap-model-path ../tmp/20230327_tracedrop_old/ws.20230325_Region5_0_0.1/fs_meta+block+chunk/accs_15/ea_6139.36_wr_35.599_admit_threshold_binary.model --ap-feat-subset meta+block+chunk --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230327_tracedrop_old__ws.20230325_Region5_0_0.1_366.475GB_WR35.599MBS__ap_mlnew__policy_servicetimesize__prefetch_never_episode__i_4_ea_6139.36__ml-ap-0.605976_6_lru_366.475GB__full_0_0.1_i=4 29 | # RejectX, Region6, 0.0 30 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityHits --region Region6 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230327_tracedrop_old --eviction-age 6130.651 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 31 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/processed/Region6/full_0_0.1.trace --rejectx-ap --ap rejectx --ap-probability 0.044127 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230327_tracedrop_old/ws.20230325_Region6_0_0.1_366.475GB_WR35.599MBS/ap_rejectx/policy_hits/prefetch_never_episode/i_5_ea_6130.65 --prefetch-when never --prefetch-range episode --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230327_tracedrop_old/ws.20230325_Region6_0_0.1/offline_analysis_ea_6130.65.csv --ap-threshold 1 --offline-ap-decisions ../tmp/20230327_tracedrop_old/ws.20230325_Region6_0_0.1/decisions_utility_hits_fixed_ea_6130.65.pkl.bz --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230327_tracedrop_old__ws.20230325_Region6_0_0.1_366.475GB_WR35.599MBS__ap_rejectx__policy_hits__prefetch_never_episode__i_5_ea_6130.65__rejectx-ap-1_0.044127_lru_366.475GB__full_0_0.1_i=5 32 | # CoinFlip, Region6, 0.0 33 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityHits --region Region6 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230327_tracedrop_old --eviction-age 6298.433 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 34 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/processed/Region6/full_0_0.1.trace --coinflip-ap --ap coinflip --ap-probability 0.048628 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230327_tracedrop_old/ws.20230325_Region6_0_0.1_366.475GB_WR35.599MBS/ap_coinflip/policy_hits/prefetch_never_episode/i_6_ea_6298.43 --prefetch-when never --prefetch-range episode --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230327_tracedrop_old/ws.20230325_Region6_0_0.1/offline_analysis_ea_6298.43.csv --offline-ap-decisions ../tmp/20230327_tracedrop_old/ws.20230325_Region6_0_0.1/decisions_utility_hits_fixed_ea_6298.43.pkl.bz --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230327_tracedrop_old__ws.20230325_Region6_0_0.1_366.475GB_WR35.599MBS__ap_coinflip__policy_hits__prefetch_never_episode__i_6_ea_6298.43__coinflip-ap-0.048628_lru_366.475GB__full_0_0.1_i=6 35 | # Baleen (No Prefetch), Region6, 0.0 36 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityServiceTimeSize2 --region Region6 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230327_tracedrop_old --suffix /ws.20230325_Region6_0_0.1/fs_meta+block+chunk/accs_15 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk --eviction-age 4908.417 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 37 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/processed/Region6/full_0_0.1.trace --learned-ap --ap mlnew --ap-threshold 0.620165 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230327_tracedrop_old/ws.20230325_Region6_0_0.1_366.475GB_WR35.599MBS/ap_mlnew/policy_servicetimesize/prefetch_never_episode/i_3_ea_4908.42 --prefetch-when never --prefetch-range episode --batch-size 16 --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230327_tracedrop_old/ws.20230325_Region6_0_0.1/fs_meta+block+chunk/accs_15/offline_analysis_ea_4908.42.csv --learn-ap-filtercount 6 --learn-ap-granularity both --offline-ap-decisions ../tmp/20230327_tracedrop_old/ws.20230325_Region6_0_0.1/fs_meta+block+chunk/accs_15/decisions_utility_service_time_size_fixed_ea_4908.42.pkl.bz --learned-ap-model-path ../tmp/20230327_tracedrop_old/ws.20230325_Region6_0_0.1/fs_meta+block+chunk/accs_15/ea_4908.42_wr_35.599_admit_threshold_binary.model --ap-feat-subset meta+block+chunk --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230327_tracedrop_old__ws.20230325_Region6_0_0.1_366.475GB_WR35.599MBS__ap_mlnew__policy_servicetimesize__prefetch_never_episode__i_3_ea_4908.42__ml-ap-0.620165_6_lru_366.475GB__full_0_0.1_i=3 38 | # CoinFlip, Region7, 0.0 39 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityHits --region Region7 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230327_tracedrop_XX --eviction-age 6980.861 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 40 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/processed/Region7/full_0_0.1.trace --coinflip-ap --ap coinflip --ap-probability 0.052379 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1_366.475GB_WR35.599MBS/ap_coinflip/policy_hits/prefetch_never_episode/i_4_ea_6980.86 --prefetch-when never --prefetch-range episode --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1/offline_analysis_ea_6980.86.csv --offline-ap-decisions ../tmp/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1/decisions_utility_hits_fixed_ea_6980.86.pkl.bz --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230327_tracedrop_Region7__ws.20230325_Region7_0_0.1_366.475GB_WR35.599MBS__ap_coinflip__policy_hits__prefetch_never_episode__i_4_ea_6980.86__coinflip-ap-0.052379_lru_366.475GB__full_0_0.1_i=4 41 | # RejectX, Region7, 0.0 42 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityHits --region Region7 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230327_tracedrop_XX --eviction-age 6831.112 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 43 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/processed/Region7/full_0_0.1.trace --rejectx-ap --ap rejectx --ap-probability 0.089107 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1_366.475GB_WR35.599MBS/ap_rejectx/policy_hits/prefetch_never_episode/i_3_ea_6831.11 --prefetch-when never --prefetch-range episode --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1/offline_analysis_ea_6831.11.csv --ap-threshold 1 --offline-ap-decisions ../tmp/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1/decisions_utility_hits_fixed_ea_6831.11.pkl.bz --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230327_tracedrop_Region7__ws.20230325_Region7_0_0.1_366.475GB_WR35.599MBS__ap_rejectx__policy_hits__prefetch_never_episode__i_3_ea_6831.11__rejectx-ap-1_0.089107_lru_366.475GB__full_0_0.1_i=3 44 | # Baleen (No Prefetch), Region7, 0.0 45 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityServiceTimeSize2 --region Region7 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230327_tracedrop_XX --suffix /ws.20230325_Region7_0_0.1/fs_meta+block+chunk/accs_15 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk --eviction-age 5653.153 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 46 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/processed/Region7/full_0_0.1.trace --learned-ap --ap mlnew --ap-threshold 0.58094 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1_366.475GB_WR35.599MBS/ap_mlnew/policy_servicetimesize/prefetch_never_episode/i_2_ea_5653.15 --prefetch-when never --prefetch-range episode --batch-size 16 --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1/fs_meta+block+chunk/accs_15/offline_analysis_ea_5653.15.csv --learn-ap-filtercount 6 --learn-ap-granularity both --offline-ap-decisions ../tmp/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1/fs_meta+block+chunk/accs_15/decisions_utility_service_time_size_fixed_ea_5653.15.pkl.bz --learned-ap-model-path ../tmp/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1/fs_meta+block+chunk/accs_15/ea_5653.15_wr_35.599_admit_threshold_binary.model --ap-feat-subset meta+block+chunk --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230327_tracedrop_Region7__ws.20230325_Region7_0_0.1_366.475GB_WR35.599MBS__ap_mlnew__policy_servicetimesize__prefetch_never_episode__i_2_ea_5653.15__ml-ap-0.58094_6_lru_366.475GB__full_0_0.1_i=2 47 | # RejectX, Region4, 0.0 48 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityHits --region Region4 --sample-ratio 0.1 --sample-start 0.0 --trace-group 202110 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230412_XX_rerun2 --eviction-age 6687.063 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 49 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/202110/Region4/processed/full_0_0.1.trace --rejectx-ap --ap rejectx --ap-probability 0.374887 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230412_Region4_rerun2/202110_Region4_0_0.1_366.475GB_WR35.599MBS/ap_rejectx/policy_hits/prefetch_never_episode/i_6_ea_6687.06 --prefetch-when never --prefetch-range episode --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230412_Region4_rerun2/202110_Region4_0_0.1/offline_analysis_ea_6687.06.csv --ap-threshold 1 --offline-ap-decisions ../tmp/20230412_Region4_rerun2/202110_Region4_0_0.1/decisions_utility_hits_fixed_ea_6687.06.pkl.bz --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230412_Region4_rerun2__202110_Region4_0_0.1_366.475GB_WR35.599MBS__ap_rejectx__policy_hits__prefetch_never_episode__i_6_ea_6687.06__rejectx-ap-1_0.374887_lru_366.475GB__full_0_0.1_i=6 50 | # CoinFlip, Region4, 0.0 51 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityHits --region Region4 --sample-ratio 0.1 --sample-start 0.0 --trace-group 202110 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230412_XX_rerun2 --eviction-age 7309.307 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 52 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/202110/Region4/processed/full_0_0.1.trace --coinflip-ap --ap coinflip --ap-probability 0.079085 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230412_Region4_rerun2/202110_Region4_0_0.1_366.475GB_WR35.599MBS/ap_coinflip/policy_hits/prefetch_never_episode/i_6_ea_7309.31 --prefetch-when never --prefetch-range episode --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230412_Region4_rerun2/202110_Region4_0_0.1/offline_analysis_ea_7309.31.csv --offline-ap-decisions ../tmp/20230412_Region4_rerun2/202110_Region4_0_0.1/decisions_utility_hits_fixed_ea_7309.31.pkl.bz --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230412_Region4_rerun2__202110_Region4_0_0.1_366.475GB_WR35.599MBS__ap_coinflip__policy_hits__prefetch_never_episode__i_6_ea_7309.31__coinflip-ap-0.079085_lru_366.475GB__full_0_0.1_i=6 53 | # Baleen (No Prefetch), Region4, 0.0 54 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityServiceTimeSize2 --region Region4 --sample-ratio 0.1 --sample-start 0.0 --trace-group 202110 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230412_XX_rerun2 --suffix /202110_Region4_0_0.1/fs_meta+block+chunk/accs_15 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk --eviction-age 6359.472 --train-target-wr 35.599 --rl-init-kwargs filter_=noprefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 55 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/202110/Region4/processed/full_0_0.1.trace --learned-ap --ap mlnew --ap-threshold 0.827421 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230412_Region4_rerun2/202110_Region4_0_0.1_366.475GB_WR35.599MBS/ap_mlnew/policy_servicetimesize/prefetch_never_episode/i_6_ea_6359.47 --prefetch-when never --prefetch-range episode --batch-size 16 --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230412_Region4_rerun2/202110_Region4_0_0.1/fs_meta+block+chunk/accs_15/offline_analysis_ea_6359.47.csv --learn-ap-filtercount 6 --learn-ap-granularity both --offline-ap-decisions ../tmp/20230412_Region4_rerun2/202110_Region4_0_0.1/fs_meta+block+chunk/accs_15/decisions_utility_service_time_size_fixed_ea_6359.47.pkl.bz --learned-ap-model-path ../tmp/20230412_Region4_rerun2/202110_Region4_0_0.1/fs_meta+block+chunk/accs_15/ea_6359.47_wr_35.599_admit_threshold_binary.model --ap-feat-subset meta+block+chunk --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230412_Region4_rerun2__202110_Region4_0_0.1_366.475GB_WR35.599MBS__ap_mlnew__policy_servicetimesize__prefetch_never_episode__i_6_ea_6359.47__ml-ap-0.827421_6_lru_366.475GB__full_0_0.1_i=6 56 | # Baleen, Region4, 0.0 57 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityServiceTimeSize2 --region Region4 --sample-ratio 0.1 --sample-start 0.0 --trace-group 202110 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230412_XX_rerun2 --suffix /202110_Region4_0_0.1/fs_meta+block+chunk/accs_15 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk --eviction-age 6063.245 --train-target-wr 35.599 --rl-init-kwargs filter_=prefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 58 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/202110/Region4/processed/full_0_0.1.trace --learned-ap --ap mlnew --ap-threshold 0.903418 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230412_Region4_rerun2/202110_Region4_0_0.1_366.475GB_WR35.599MBS/ap_mlnew/policy_servicetimesize/prefetch_partial_acctime-all/i_6_ea_6063.24 --prefetch-when partial --prefetch-range acctime-all --batch-size 16 --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230412_Region4_rerun2/202110_Region4_0_0.1/fs_meta+block+chunk/accs_15/offline_analysis_ea_6063.24.csv --learn-ap-filtercount 6 --learn-ap-granularity both --offline-ap-decisions ../tmp/20230412_Region4_rerun2/202110_Region4_0_0.1/fs_meta+block+chunk/accs_15/decisions_utility_service_time_size_fixed_ea_6063.24.pkl.bz --learned-ap-model-path ../tmp/20230412_Region4_rerun2/202110_Region4_0_0.1/fs_meta+block+chunk/accs_15/ea_6063.24_wr_35.599_admit_threshold_binary.model --ap-feat-subset meta+block+chunk --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230412_Region4_rerun2__202110_Region4_0_0.1_366.475GB_WR35.599MBS__ap_mlnew__policy_servicetimesize__prefetch_partial_acctime-all__i_6_ea_6063.24__ml-ap-0.903418_6_lru_366.475GB__full_0_0.1_i=6 59 | # Baleen, Region5, 0.0 60 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityServiceTimeSize2 --region Region5 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230421_alltraces_pfalways --suffix /20230325_Region5_0_0.1/fs_meta+block+chunk/accs_15 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk --eviction-age 5936.225 --train-target-wr 35.599 --rl-init-kwargs filter_=prefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 61 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/Region5/processed/full_0_0.1.trace --learned-ap --ap mlnew --ap-threshold 0.701452 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230421_alltraces_pfalways/20230325_Region5_0_0.1_366.475GB_WR35.599MBS/ap_mlnew/policy_servicetimesize/prefetch_partial_acctime-episode-predict/i_4_ea_5936.23 --prefetch-when partial --prefetch-range acctime-episode-predict --batch-size 16 --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230421_alltraces_pfalways/20230325_Region5_0_0.1/fs_meta+block+chunk/accs_15/offline_analysis_ea_5936.23.csv --learn-ap-filtercount 6 --learn-ap-granularity both --prefetcher-model-path ../tmp/20230421_alltraces_pfalways/20230325_Region5_0_0.1/fs_meta+block+chunk/accs_15/ea_5936.23_wr_35.599_prefetch_{k}.model --offline-ap-decisions ../tmp/20230421_alltraces_pfalways/20230325_Region5_0_0.1/fs_meta+block+chunk/accs_15/decisions_utility_service_time_size_fixed_ea_5936.23.pkl.bz --learned-ap-model-path ../tmp/20230421_alltraces_pfalways/20230325_Region5_0_0.1/fs_meta+block+chunk/accs_15/ea_5936.23_wr_35.599_admit_threshold_binary.model --prefetcher-model-path ../tmp/20230421_alltraces_pfalways/20230325_Region5_0_0.1/fs_meta+block+chunk/accs_15/ea_5936.23_wr_35.599_prefetch_{k}.model --ap-feat-subset meta+block+chunk --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230421_alltraces_pfalways__20230325_Region5_0_0.1_366.475GB_WR35.599MBS__ap_mlnew__policy_servicetimesize__prefetch_partial_acctime-episode-predict__i_4_ea_5936.23__ml-ap-0.701452_6_lru_366.475GB__full_0_0.1_i=4 62 | # Baleen, Region6, 0.0 63 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityServiceTimeSize2 --region Region6 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230327_tracedrop_old --suffix /ws.20230325_Region6_0_0.1/fs_meta+block+chunk/accs_15 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk --eviction-age 4669.873 --train-target-wr 35.599 --rl-init-kwargs filter_=prefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 64 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/processed/Region6/full_0_0.1.trace --learned-ap --ap mlnew --ap-threshold 0.826378 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230327_tracedrop_old/ws.20230325_Region6_0_0.1_366.475GB_WR35.599MBS/ap_mlnew/policy_servicetimesize/prefetch_partial_acctime-all/i_3_ea_4669.87 --prefetch-when partial --prefetch-range acctime-all --batch-size 16 --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230327_tracedrop_old/ws.20230325_Region6_0_0.1/fs_meta+block+chunk/accs_15/offline_analysis_ea_4669.87.csv --learn-ap-filtercount 6 --learn-ap-granularity both --offline-ap-decisions ../tmp/20230327_tracedrop_old/ws.20230325_Region6_0_0.1/fs_meta+block+chunk/accs_15/decisions_utility_service_time_size_fixed_ea_4669.87.pkl.bz --learned-ap-model-path ../tmp/20230327_tracedrop_old/ws.20230325_Region6_0_0.1/fs_meta+block+chunk/accs_15/ea_4669.87_wr_35.599_admit_threshold_binary.model --ap-feat-subset meta+block+chunk --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230327_tracedrop_old__ws.20230325_Region6_0_0.1_366.475GB_WR35.599MBS__ap_mlnew__policy_servicetimesize__prefetch_partial_acctime-all__i_3_ea_4669.87__ml-ap-0.826378_6_lru_366.475GB__full_0_0.1_i=3 65 | # Baleen, Region7, 0.0 66 | ./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp exp_reproduce_ --policy PolicyUtilityServiceTimeSize2 --region Region7 --sample-ratio 0.1 --sample-start 0.0 --trace-group 20230325 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.47461 --output-base-dir ../tmp/exp_reproduce_20230327_tracedrop_XX --suffix /ws.20230325_Region7_0_0.1/fs_meta+block+chunk/accs_15 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk --eviction-age 5653.153 --train-target-wr 35.599 --rl-init-kwargs filter_=prefetch --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 67 | ./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --trace ../data/tectonic/20230325/processed/Region7/full_0_0.1.trace --learned-ap --ap mlnew --ap-threshold 0.58094 --size_gb 366.475 -o ../runs/exp_reproduce_spring23/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1_366.475GB_WR35.599MBS/ap_mlnew/policy_servicetimesize/prefetch_predict_acctime-episode-predict/i_2_ea_5653.15 --prefetch-when predict --prefetch-range acctime-episode-predict --batch-size 16 --prefetch-when-threshold 0.5 --log-interval 600 --ep-analysis ../runs/exp_reproduce_spring23/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1/fs_meta+block+chunk/accs_15/offline_analysis_ea_5653.15.csv --learn-ap-filtercount 6 --learn-ap-granularity both --prefetcher-model-path ../tmp/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1/fs_meta+block+chunk/accs_15/ea_5653.15_wr_35.599_prefetch_{k}.model --offline-ap-decisions ../tmp/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1/fs_meta+block+chunk/accs_15/decisions_utility_service_time_size_fixed_ea_5653.15.pkl.bz --learned-ap-model-path ../tmp/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1/fs_meta+block+chunk/accs_15/ea_5653.15_wr_35.599_admit_threshold_binary.model --prefetcher-model-path ../tmp/20230327_tracedrop_Region7/ws.20230325_Region7_0_0.1/fs_meta+block+chunk/accs_15/ea_5653.15_wr_35.599_prefetch_{k}.model --ap-feat-subset meta+block+chunk --job-id sim____users__dlwong__projects__cache-analysis__runs__spring23__20230327_tracedrop_Region7__ws.20230325_Region7_0_0.1_366.475GB_WR35.599MBS__ap_mlnew__policy_servicetimesize__prefetch_predict_acctime-episode-predict__i_2_ea_5653.15__ml-ap-0.58094_6_lru_366.475GB__full_0_0.1_i=2 68 | -------------------------------------------------------------------------------- /notebooks/paper-figs/fig-18-peak-hrs-20230424.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 8, 6 | "id": "bc586e81-f494-4bd7-8f3f-367b23d1ad13", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [ 11 | { 12 | "name": "stdout", 13 | "output_type": "stream", 14 | "text": [ 15 | "CPython 2024-01-15T08:21:27.645384\n" 16 | ] 17 | }, 18 | { 19 | "data": { 20 | "text/html": [ 21 | "" 22 | ], 23 | "text/plain": [ 24 | "" 25 | ] 26 | }, 27 | "metadata": {}, 28 | "output_type": "display_data" 29 | }, 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "CPU times: user 406 µs, sys: 285 µs, total: 691 µs\n", 35 | "Wall time: 710 µs\n" 36 | ] 37 | } 38 | ], 39 | "source": [ 40 | "%run ../includes/common-20230414.ipynb" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 9, 46 | "id": "3c7a4f9f-ff8d-470c-af05-eb5af414e1ed", 47 | "metadata": { 48 | "tags": [] 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "if not os.path.exists(\"../../data/breakdown-stats/\"):\n", 53 | " os.system(\"cd ../../data && bash get-tectonic.sh\")" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 10, 59 | "id": "f7d14040-802e-434e-9eaa-abf1562984ed", 60 | "metadata": { 61 | "tags": [] 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "dfc_raw = get_df()" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 11, 71 | "id": "8d3f8159-612a-4ac2-8b5d-d7dc2064446e", 72 | "metadata": { 73 | "tags": [] 74 | }, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "9065\n" 81 | ] 82 | }, 83 | { 84 | "name": "stderr", 85 | "output_type": "stream", 86 | "text": [ 87 | "/tmp/ipykernel_7176/3317348687.py:14: PerformanceWarning: dropping on a non-lexsorted multi-index without a level parameter may impact performance.\n", 88 | " best_pf_option_ = best_pf_option.idxmin(skipna=skipna)\n", 89 | "/tmp/ipykernel_7176/3317348687.py:14: PerformanceWarning: dropping on a non-lexsorted multi-index without a level parameter may impact performance.\n", 90 | " best_pf_option_ = best_pf_option.idxmin(skipna=skipna)\n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "dfs_1 = get_data(dfc_raw, default_sample_ratio=1)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 12, 101 | "id": "89230a41-3838-4354-87af-92a05af843df", 102 | "metadata": { 103 | "tags": [] 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "def relabel_leg_part(mapping=None, ax=None, **kwargs):\n", 108 | " ax = ax or plt.gca()\n", 109 | " h, labels = ax.get_legend_handles_labels()\n", 110 | " labels = [mapping[x.split('@')[0]] for x in labels]\n", 111 | " return plt.legend(h, labels, **kwargs)\n" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 13, 117 | "id": "0062b210-b14f-4477-81ba-2ed7182e8389", 118 | "metadata": { 119 | "tags": [] 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "def shorten_ticks():\n", 124 | " for ax in plt.gcf().get_axes():\n", 125 | " locs = ax.get_xticks()\n", 126 | " order = ax.get_xticklabels()\n", 127 | " ax.set_xticks(locs, [x.get_text().replace(\"Baleen (\", \"Baleen\\n(\") for x in order],\n", 128 | " va='center', ha='right', rotation_mode='anchor', rotation=90)\n" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 14, 134 | "id": "fe0130ed-437f-4f61-a5bc-8c3e362d913e", 135 | "metadata": { 136 | "tags": [] 137 | }, 138 | "outputs": [ 139 | { 140 | "data": { 141 | "text/plain": [ 142 | "array(['RejectX', 'Baleen (No Prefetch)', 'Baleen (ML Prefetch)'],\n", 143 | " dtype=object)" 144 | ] 145 | }, 146 | "metadata": {}, 147 | "output_type": "display_data" 148 | }, 149 | { 150 | "data": { 151 | "image/png": "\n", 152 | "text/plain": [ 153 | "
" 154 | ] 155 | }, 156 | "metadata": {}, 157 | "output_type": "display_data" 158 | } 159 | ], 160 | "source": [ 161 | "for region in ['Region1']:\n", 162 | " dtx = filter_df(dfs_1['exp'], {'Region': region, \n", 163 | " 'PracticalAP': True,\n", 164 | " 'AdmissionPolicyLabel': ['Baleen', 'RejectX'], \n", 165 | " \"ShortLabel\": [\"RejectX\", \"Baleen (No Prefetch)\", \"Baleen (ML Prefetch)\", \"Baleen (ML-Range on Partial Hit)\"],\n", 166 | " 'Target DWPD': 7.5, 'SampleStart': 0})\n", 167 | " dtx['Filename'] = dtx.apply(lambda v: f\"../../data/breakdown-stats/{v['Region']}/ap_{v['AdmissionPolicyLabel']}/pf_{v['Prefetch-Range']}_{v['Prefetch-When']}/full_{v['SampleStart']}_{v['SampleRatio']:g}_cache_perf.txt.lzma\", axis=1)\n", 168 | " try:\n", 169 | " sample_ratio = dtx['SampleRatio'].unique()[0]\n", 170 | " new_cols = dtx.apply(processors.maxstats, axis=1, result_type='expand')\n", 171 | " dtx_ = pd.concat([dtx, new_cols], axis='columns')\n", 172 | " display(dtx['ShortLabel'].unique())\n", 173 | " processors.plot_breakdowns(dtx_, x='ShortLabel', subplots_order=['RejectX', 'Baleen (ML Prefetch)'],\n", 174 | " order=None,\n", 175 | " sample_ratio=1, xperiod=600,\n", 176 | " figsize=(6.4,4.8*1.5),\n", 177 | " max_y=30,\n", 178 | " leg_kwargs=dict(bbox_to_anchor=[.5, 0], loc='upper center', frameon=True, facecolor='white', framealpha=1),\n", 179 | " stack_order=[processors.stonbw_label, processors.stonpf_label, processors.stonios_label])\n", 180 | " shorten_ticks()\n", 181 | " sample_ratio_t = '{:g}'.format(sample_ratio).replace(\".\",\"\")\n", 182 | " savefig(\"peak_breakdowns\", f\"{region}_0_{sample_ratio_t}_wr-34_st-util\")\n", 183 | " except Exception as e:\n", 184 | " #raise\n", 185 | " print(\"Error\")\n", 186 | " print(e)\n" 187 | ] 188 | } 189 | ], 190 | "metadata": { 191 | "kernelspec": { 192 | "display_name": "Python 3 (ipykernel)", 193 | "language": "python", 194 | "name": "python3" 195 | }, 196 | "language_info": { 197 | "codemirror_mode": { 198 | "name": "ipython", 199 | "version": 3 200 | }, 201 | "file_extension": ".py", 202 | "mimetype": "text/x-python", 203 | "name": "python", 204 | "nbconvert_exporter": "python", 205 | "pygments_lexer": "ipython3", 206 | "version": "3.10.12" 207 | } 208 | }, 209 | "nbformat": 4, 210 | "nbformat_minor": 5 211 | } 212 | -------------------------------------------------------------------------------- /chameleon/1-getting-started.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "4cbf2763-3cdf-4058-9087-ff53e1eff565", 6 | "metadata": {}, 7 | "source": [ 8 | "This is meant to match the 'Getting Started' section of the README, which is available at \n", 9 | "https://github.com/wonglkd/Baleen-FAST24" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "id": "7424a2e0-43b9-4276-a6c8-8a3b02528024", 15 | "metadata": {}, 16 | "source": [ 17 | "This notebook will guide you through the process of doing some quick experiments on the shared Jupyter server, which has limited RAM and disk. For more, you will likely want to start a dedicated server (see the other notebook)." 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "id": "059c0d87-aac4-47de-90fa-4383384033e0", 23 | "metadata": {}, 24 | "source": [ 25 | "# Getting started\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "id": "d8cd3e4e-2675-4c1b-81aa-9a488e03068e", 31 | "metadata": {}, 32 | "source": [ 33 | "### 1. Clone repository" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 1, 39 | "id": "9be624be-4dca-422e-b8f7-bf8a8dbc467e", 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "if [ ! -d Baleen-FAST24 ]; then\n", 44 | " git clone --recurse-submodules https://github.com/wonglkd/Baleen-FAST24.git\n", 45 | "fi" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "id": "1caa5685-a83b-42c8-89c5-8574f374fb7d", 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "cd Baleen-FAST24" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "id": "fbbddcff-db8f-4a89-b9e0-ea04f8afe1f2", 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "Fetching submodule BCacheSim\n", 69 | "Already up to date.\n" 70 | ] 71 | } 72 | ], 73 | "source": [ 74 | "git pull --recurse-submodules" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "id": "e83d5912-fb8a-4974-b6eb-70b302d71ee4", 80 | "metadata": {}, 81 | "source": [ 82 | "### 2. Install dependencies" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 6, 88 | "id": "41d6c963-2f7b-47db-b4ae-b9de32f54102", 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "WARNING: Ignoring invalid distribution -umpy (/opt/conda/lib/python3.10/site-packages)\n", 96 | "WARNING: Ignoring invalid distribution -umpy (/opt/conda/lib/python3.10/site-packages)\n", 97 | "Collecting lightgbm==3.3.5\n", 98 | " Using cached lightgbm-3.3.5-py3-none-manylinux1_x86_64.whl (2.0 MB)\n", 99 | "Collecting numpy==1.24.2\n", 100 | " Using cached numpy-1.24.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n", 101 | "Requirement already satisfied: pandas==1.5.3 in /opt/conda/lib/python3.10/site-packages (from -r BCacheSim/install/requirements.txt (line 3)) (1.5.3)\n", 102 | "Collecting scikit-learn==1.2.2\n", 103 | " Using cached scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.6 MB)\n", 104 | "Requirement already satisfied: spookyhash in /opt/conda/lib/python3.10/site-packages (from -r BCacheSim/install/requirements.txt (line 5)) (2.1.0)\n", 105 | "Collecting jsonargparse\n", 106 | " Using cached jsonargparse-4.27.1-py3-none-any.whl (189 kB)\n", 107 | "Requirement already satisfied: compress_json in /opt/conda/lib/python3.10/site-packages (from -r BCacheSim/install/requirements.txt (line 7)) (1.0.10)\n", 108 | "Collecting compress_pickle\n", 109 | " Using cached compress_pickle-2.1.0-py3-none-any.whl (24 kB)\n", 110 | "Collecting retry\n", 111 | " Using cached retry-0.9.2-py2.py3-none-any.whl (8.0 kB)\n", 112 | "Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from -r BCacheSim/install/requirements.txt (line 10)) (5.8.0)\n", 113 | "Requirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from -r BCacheSim/install/requirements.txt (line 11)) (4.65.0)\n", 114 | "Collecting scipy==1.10.1\n", 115 | " Using cached scipy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.4 MB)\n", 116 | "Collecting redis\n", 117 | " Using cached redis-5.0.1-py3-none-any.whl (250 kB)\n", 118 | "Collecting matplotlib==3.7.1\n", 119 | " Using cached matplotlib-3.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.6 MB)\n", 120 | "Collecting seaborn==0.12.1\n", 121 | " Using cached seaborn-0.12.1-py3-none-any.whl (288 kB)\n", 122 | "Requirement already satisfied: pqdict in /opt/conda/lib/python3.10/site-packages (from -r BCacheSim/install/requirements.txt (line 16)) (1.3.0)\n", 123 | "Requirement already satisfied: wheel in /opt/conda/lib/python3.10/site-packages (from lightgbm==3.3.5->-r BCacheSim/install/requirements.txt (line 1)) (0.40.0)\n", 124 | "Requirement already satisfied: python-dateutil>=2.8.1 in /opt/conda/lib/python3.10/site-packages (from pandas==1.5.3->-r BCacheSim/install/requirements.txt (line 3)) (2.8.2)\n", 125 | "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas==1.5.3->-r BCacheSim/install/requirements.txt (line 3)) (2021.1)\n", 126 | "Collecting joblib>=1.1.1\n", 127 | " Using cached joblib-1.3.2-py3-none-any.whl (302 kB)\n", 128 | "Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn==1.2.2->-r BCacheSim/install/requirements.txt (line 4)) (3.2.0)\n", 129 | "Requirement already satisfied: contourpy>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from matplotlib==3.7.1->-r BCacheSim/install/requirements.txt (line 14)) (1.2.0)\n", 130 | "Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.10/site-packages (from matplotlib==3.7.1->-r BCacheSim/install/requirements.txt (line 14)) (0.12.1)\n", 131 | "Requirement already satisfied: pyparsing>=2.3.1 in /opt/conda/lib/python3.10/site-packages (from matplotlib==3.7.1->-r BCacheSim/install/requirements.txt (line 14)) (2.4.7)\n", 132 | "Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from matplotlib==3.7.1->-r BCacheSim/install/requirements.txt (line 14)) (1.4.5)\n", 133 | "Requirement already satisfied: fonttools>=4.22.0 in /opt/conda/lib/python3.10/site-packages (from matplotlib==3.7.1->-r BCacheSim/install/requirements.txt (line 14)) (4.45.1)\n", 134 | "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from matplotlib==3.7.1->-r BCacheSim/install/requirements.txt (line 14)) (23.0)\n", 135 | "Requirement already satisfied: pillow>=6.2.0 in /opt/conda/lib/python3.10/site-packages (from matplotlib==3.7.1->-r BCacheSim/install/requirements.txt (line 14)) (10.1.0)\n", 136 | "Requirement already satisfied: PyYAML>=3.13 in /opt/conda/lib/python3.10/site-packages (from jsonargparse->-r BCacheSim/install/requirements.txt (line 6)) (5.3.1)\n", 137 | "Requirement already satisfied: py<2.0.0,>=1.4.26 in /opt/conda/lib/python3.10/site-packages (from retry->-r BCacheSim/install/requirements.txt (line 9)) (1.11.0)\n", 138 | "Requirement already satisfied: decorator>=3.4.2 in /opt/conda/lib/python3.10/site-packages (from retry->-r BCacheSim/install/requirements.txt (line 9)) (5.1.1)\n", 139 | "Collecting async-timeout>=4.0.2\n", 140 | " Using cached async_timeout-4.0.3-py3-none-any.whl (5.7 kB)\n", 141 | "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas==1.5.3->-r BCacheSim/install/requirements.txt (line 3)) (1.16.0)\n", 142 | "WARNING: Ignoring invalid distribution -umpy (/opt/conda/lib/python3.10/site-packages)\n", 143 | "Installing collected packages: retry, numpy, jsonargparse, joblib, compress_pickle, async-timeout, scipy, redis, scikit-learn, matplotlib, seaborn, lightgbm\n", 144 | " WARNING: The scripts f2py, f2py3 and f2py3.10 are installed in '/home/wonglkd_globusid_org/.local/bin' which is not on PATH.\n", 145 | " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n", 146 | "Successfully installed async-timeout-4.0.3 compress_pickle-2.1.0 joblib-1.3.2 jsonargparse-4.27.1 lightgbm-3.3.5 matplotlib-3.7.1 numpy-1.24.2 redis-5.0.1 retry-0.9.2 scikit-learn-1.2.2 scipy-1.10.1 seaborn-0.12.1\n", 147 | "WARNING: Ignoring invalid distribution -umpy (/opt/conda/lib/python3.10/site-packages)\n", 148 | "WARNING: Ignoring invalid distribution -umpy (/opt/conda/lib/python3.10/site-packages)\n", 149 | "WARNING: Ignoring invalid distribution -umpy (/opt/conda/lib/python3.10/site-packages)\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "python3 -m pip install --user -r BCacheSim/install/requirements.txt" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "id": "48123170-f101-4146-95d5-8b1efb2c14b1", 160 | "metadata": {}, 161 | "source": [ 162 | "### 3. Download trace files" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 9, 168 | "id": "cf0dd079-7518-4609-a1fe-2f8991a0c9b2", 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "bash: cd: data: No such file or directory\n" 176 | ] 177 | } 178 | ], 179 | "source": [ 180 | "cd data\n", 181 | "bash get-tectonic.sh\n", 182 | "cd .." 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "id": "bf72a703-d3d0-4b79-a46b-4a4b5145a50b", 188 | "metadata": {}, 189 | "source": [ 190 | "## Do a simple experiment" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "id": "6d768566-f3cf-435a-8fc3-ed6d802704fa", 196 | "metadata": {}, 197 | "source": [ 198 | "### Run RejectX baseline (6 mins)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 10, 204 | "id": "1c7548b8-8826-4f5e-9ec6-a84290e546e8", 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "bash: fg: %%time: no such job\n", 212 | "+ PYTHON=py\n", 213 | "+ ARGS='-B -m BCacheSim.cachesim.simulate_ap --config runs/example/rejectx/config.json'\n", 214 | "+ case \"$PYTHON\" in\n", 215 | "+ PYTHON_BIN=python\n", 216 | "+++ dirname ./BCacheSim/run_py.sh\n", 217 | "++ cd ./BCacheSim\n", 218 | "++ pwd\n", 219 | "+ DIR=/work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim\n", 220 | "+ cd /work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/..\n", 221 | "+ stdbuf -eL -oL python -B -m BCacheSim.cachesim.simulate_ap --config runs/example/rejectx/config.json\n", 222 | "{'admit_only_prefetches': False,\n", 223 | " 'ap': 'rejectx',\n", 224 | " 'ap_chunk_threshold': None,\n", 225 | " 'ap_feat_subset': None,\n", 226 | " 'ap_probability': 0.508154,\n", 227 | " 'ap_threshold': 1.0,\n", 228 | " 'batch_size': 512,\n", 229 | " 'block_level': False,\n", 230 | " 'cache_elems': None,\n", 231 | " 'cachelib_trace': None,\n", 232 | " 'coinflip_ap': False,\n", 233 | " 'config': ['runs/example/rejectx/config.json'],\n", 234 | " 'debug': False,\n", 235 | " 'early_evict': None,\n", 236 | " 'ep_analysis': None,\n", 237 | " 'evict_by_episode': False,\n", 238 | " 'eviction_policy': 'LRU',\n", 239 | " 'fast': False,\n", 240 | " 'fifo': False,\n", 241 | " 'flashieldprob_ap_min_hits': None,\n", 242 | " 'flip_threshold': True,\n", 243 | " 'hybrid_ap_threshold': None,\n", 244 | " 'ignore_existing': False,\n", 245 | " 'job_id': None,\n", 246 | " 'learned_ap': False,\n", 247 | " 'learned_ap_filter_count': 6,\n", 248 | " 'learned_ap_granularity': None,\n", 249 | " 'learned_ap_model_path': None,\n", 250 | " 'learned_size': False,\n", 251 | " 'limit': None,\n", 252 | " 'lirs': False,\n", 253 | " 'log_decisions': False,\n", 254 | " 'log_episodes': False,\n", 255 | " 'log_evictions': False,\n", 256 | " 'log_interval': 600.0,\n", 257 | " 'log_prefetch': False,\n", 258 | " 'log_req': False,\n", 259 | " 'offline_ap': False,\n", 260 | " 'offline_ap_decisions': None,\n", 261 | " 'one_chunk': False,\n", 262 | " 'opt_ap_threshold': None,\n", 263 | " 'optplus_args': None,\n", 264 | " 'output_dir': 'runs/example/rejectx',\n", 265 | " 'override': False,\n", 266 | " 'peak_strategy': None,\n", 267 | " 'prefetch': None,\n", 268 | " 'prefetch_range': 'episode',\n", 269 | " 'prefetch_when': 'never',\n", 270 | " 'prefetch_when_threshold': None,\n", 271 | " 'prefetcher_model_path': None,\n", 272 | " 'profile': False,\n", 273 | " 'ram_ap_clone': False,\n", 274 | " 'ram_cache': False,\n", 275 | " 'ram_cache_elems': None,\n", 276 | " 'ram_cache_size_gb': 10.0,\n", 277 | " 'rejectx_ap': True,\n", 278 | " 'rejectx_ap_factor': None,\n", 279 | " 'rejectx_ap_threshold': None,\n", 280 | " 'retrain_interval_hrs': None,\n", 281 | " 'size_gb': 366.475,\n", 282 | " 'size_opt': 'access',\n", 283 | " 'stats_start': 86400.0,\n", 284 | " 'trace': 'data/tectonic/201910/Region1/full_0_0.1.trace',\n", 285 | " 'tracefile': 'data/tectonic/201910/Region1/full_0_0.1.trace',\n", 286 | " 'train_history_hrs': None,\n", 287 | " 'ttl_model_path': None,\n", 288 | " 'write_mbps': 0}\n", 289 | "Command: /work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/cachesim/../run_py.sh py /work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/cachesim/simulate_ap.py --config runs/example/rejectx/config.json\n", 290 | "Output dir: runs/example/rejectx/rejectx-ap-1_0.508154_lru_366.475GB\n", 291 | "Logging to runs/example/rejectx/rejectx-ap-1_0.508154_lru_366.475GB/full_0_0.1.out\n", 292 | "Reading from file data/tectonic/201910/Region1/full_0_0.1.trace\n", 293 | "{'total_iops': 147794, 'total_iops_get': 127305, 'total_iops_put': 20489, 'max_key': ('9997015', Acc(block=9997015, offset=0, size=8388608, ts=1572298799.127971, features=op=OpType.GET_PERM, pipeline=1, namespace=9, user=23)), 'trace_duration_secs': 604641.98, 'start_ts': 1572074461.57806, 'end_ts': 1572679103.557401, 'filename': 'data/tectonic/201910/Region1/full_0_0.1.trace', 'trace_hash': b'859229345a0fa792582df0db8cd56c4d', 'kwargs_hash': '5a0a05', 'kwargs': {'only_gets': False}}\n", 294 | "[0] TimeLeft | [1] I | [2] TraceTime | [3] Hrs$ | [4] %GETs | [5] GETs$ | [6] PUTs$ | [7] STGet%$ | [8] PeakST%$ | [9] STGetNoCa%$ | [10] STPut%$ | [11] ReqMBs$ | [12] GetMBs$ | [13] PutMBs$ | [14] FlaMBs$ | [15] PreMBs$ | [16] STSaved%$ | [17] Accept%$ | [18] STLate%$ | [19] Wasted%$ | [20] EA$ | [21] AvgIAMax | [22] RAMGb | [23] EstTotalTime | [24] Speedup$ | [25] GET/Ts$ | [26] GET/s$ | [27] WarningRootEpNotfound | [28] WarningPutStartsAfterZero | [29] Flashcache/warningAdmittedEpsUnknown\n", 295 | "[0] 4.4m | [1] 1 | [2] 0.17h | [3] 0.2 | [4] 0.2 | [5] 228 | [6] 17 | [7] 20.75 | [8] 0.00 | [9] 29.16 | [10] 5.11 | [11] 881.34 | [12] 684.77 | [13] 216.82 | [14] 98.39 | [15] 0.00 | [16] 28.9% | [17] 14.38% | [18] 6.6% | [19] 0.0% | [20] 0 | [21] (10,28s) | [22] 0.2 | [23] 4.4m | [24] 1.26 | [25] 0.4 | [26] 476.3 | [27] 228 | [28] 17 | [29] 476\n", 296 | "[0] TimeLeft | [1] I | [2] TraceTime | [3] Hrs$ | [4] %GETs | [5] GETs$ | [6] PUTs$ | [7] STGet%$ | [8] PeakST%$ | [9] STGetNoCa%$ | [10] STPut%$ | [11] ReqMBs$ | [12] GetMBs$ | [13] PutMBs$ | [14] FlaMBs$ | [15] PreMBs$ | [16] STSaved%$ | [17] Accept%$ | [18] STLate%$ | [19] Wasted%$ | [20] EA$ | [21] AvgIAMax | [22] RAMGb | [23] EstTotalTime | [24] Speedup$ | [25] GET/Ts$ | [26] GET/s$ | [27] WarningRootEpNotfound | [28] WarningPutStartsAfterZero | [29] Flashcache/warningAdmittedEpsUnknown | [30] Flashcache/warningAdmitsPartial | [31] Flashcache/warningAdmitsPartialEpisodes | [32] Flashcache/warningEvictedEpisodenotfound\n", 297 | "[0] 4.7m | [1] 145 | [2] 1d0.17h | [3] 24.0 | [4] 14.3 | [5] 18030 | [6] 2960 | [7] 12.55 | [8] 23.10 | [9] 20.66 | [10] 5.09 | [11] 720.15 | [12] 445.05 | [13] 205.75 | [14] 30.57 | [15] 0.00 | [16] 39.2% | [17] 6.81% | [18] 2.9% | [19] 42.0% | [20] (1831,2h) | [21] (352,24m) | [22] 0.3 | [23] 5.5m | [24] 1.85 | [25] 0.2 | [26] 386.7 | [27] 18258 | [28] 2977 | [29] 21607 | [30] 1495 | [31] 571 | [32] 18606\n", 298 | "[0] 3.8m | [1] 290 | [2] 2d0.33h | [3] 24.2 | [4] 27.5 | [5] 16688 | [6] 3414 | [7] 12.18 | [8] 24.27 | [9] 16.26 | [10] 5.79 | [11] 521.68 | [12] 403.13 | [13] 233.57 | [14] 29.09 | [15] 0.00 | [16] 25.1% | [17] 3.56% | [18] 3.5% | [19] 46.8% | [20] (1830,2h) | [21] (338,22m) | [22] 0.3 | [23] 5.2m | [24] 2.13 | [25] 0.2 | [26] 408.4 | [27] 34946 | [28] 6391 | [29] 41854 | [30] 3992 | [31] 1238 | [32] 38853\n", 299 | "[0] 3.7m | [1] 434 | [2] 3d0.34h | [3] 24.0 | [4] 40.4 | [5] 16541 | [6] 3206 | [7] 12.22 | [8] 24.27 | [9] 16.37 | [10] 5.21 | [11] 528.04 | [12] 419.45 | [13] 207.33 | [14] 33.07 | [15] 0.00 | [16] 25.4% | [17] 2.73% | [18] 3.9% | [19] 46.0% | [20] (1702,2h) | [21] (329,22m) | [22] 0.3 | [23] 5.9m | [24] 1.80 | [25] 0.2 | [26] 344.5 | [27] 51487 | [28] 9597 | [29] 64722 | [30] 5662 | [31] 1906 | [32] 61721\n", 300 | "[0] 2.6m | [1] 579 | [2] 4d0.5h | [3] 24.2 | [4] 55.3 | [5] 18905 | [6] 3238 | [7] 14.95 | [8] 29.18 | [9] 20.01 | [10] 4.97 | [11] 672.47 | [12] 526.45 | [13] 194.52 | [14] 45.88 | [15] 0.00 | [16] 25.3% | [17] 2.70% | [18] 4.5% | [19] 37.4% | [20] (1317,1h) | [21] (318,21m) | [22] 0.3 | [23] 5.7m | [24] 1.68 | [25] 0.2 | [26] 364.3 | [27] 70392 | [28] 12835 | [29] 96642 | [30] 7839 | [31] 2767 | [32] 93641\n", 301 | "[0] 1.8m | [1] 723 | [2] 5d0.5h | [3] 24.0 | [4] 69.5 | [5] 18037 | [6] 2961 | [7] 15.14 | [8] 33.41 | [9] 19.43 | [10] 4.80 | [11] 657.17 | [12] 537.16 | [13] 190.83 | [14] 37.78 | [15] 0.00 | [16] 22.1% | [17] 1.72% | [18] 3.8% | [19] 37.7% | [20] (1465,2h) | [21] (326,21m) | [22] 0.3 | [23] 5.7m | [24] 1.75 | [25] 0.2 | [26] 365.1 | [27] 88429 | [28] 15796 | [29] 122753 | [30] 9853 | [31] 3618 | [32] 119752\n", 302 | "[0] 0.78m | [1] 868 | [2] 6d0.67h | [3] 24.2 | [4] 85.7 | [5] 20679 | [6] 2395 | [7] 16.10 | [8] 33.41 | [9] 21.25 | [10] 4.02 | [11] 702.84 | [12] 572.59 | [13] 161.70 | [14] 38.76 | [15] 0.00 | [16] 24.2% | [17] 1.43% | [18] 3.5% | [19] 40.3% | [20] (1442,2h) | [21] (339,22m) | [22] 0.3 | [23] 5.6m | [24] 1.63 | [25] 0.2 | [26] 387.9 | [27] 109108 | [28] 18191 | [29] 149733 | [30] 11526 | [31] 4352 | [32] 146732\n", 303 | "[0] TimeLeft | [1] I | [2] TraceTime | [3] Hrs$ | [4] %GETs | [5] GETs$ | [6] PUTs$ | [7] STGet%$ | [8] PeakST%$ | [9] STGetNoCa%$ | [10] STPut%$ | [11] ReqMBs$ | [12] GetMBs$ | [13] PutMBs$ | [14] FlaMBs$ | [15] PreMBs$ | [16] STSaved%$ | [17] Accept%$ | [18] STLate%$ | [19] Wasted%$ | [20] EA$ | [21] AvgIAMax | [22] RAMGb | [23] EstTotalTime | [24] Speedup$ | [25] GET/Ts$ | [26] GET/s$ | [27] WarningRootEpNotfound | [28] WarningPutStartsAfterZero | [29] Flashcache/warningAdmittedEpsUnknown | [30] Flashcache/warningAdmitsPartial | [31] Flashcache/warningAdmitsPartialEpisodes | [32] Flashcache/warningEvictedEpisodenotfound\n", 304 | "[0] | [1] 1008 | [2] 6d24h | [3] 23.3 | [4] 100.0 | [5] 18197 | [6] 2298 | [7] 14.11 | [8] 33.41 | [9] 20.14 | [10] 4.12 | [11] 679.91 | [12] 519.25 | [13] 167.19 | [14] 33.68 | [15] 0.00 | [16] 29.9% | [17] 1.02% | [18] 3.2% | [19] 36.4% | [20] (1564,2h) | [21] (348,23m) | [22] 0.4 | [23] 5.6m | [24] 1.78 | [25] 0.2 | [26] 387.2 | [27] 127305 | [28] 20489 | [29] 172322 | [30] 12776 | [31] 5002 | [32] 169321\n", 305 | "Results preview: \n", 306 | " Trace: {'region': '201910', 'sample_ratio': 0.1, 'start': 0.0, 'only_gets': False} \n", 307 | " AP: rejectx, RejectX(threshold=1.0, window=1525.48, factor=0.508154) \n", 308 | " Eviction Policy: LRU, \n", 309 | " Average TTL: 0.00 s \n", 310 | " Duration so far: 6 days 23 hrs 57 mins 22 secs \n", 311 | " Duration: 6 days 23 hrs 57 mins 22 secs (10 mins intervals) \n", 312 | " Service Time Utilization (%) - 18.76250 \n", 313 | " Service Time Utilization (%) [GET] - 13.90071 \n", 314 | " Service Time Utilization (%) [PUT] - 4.86179 \n", 315 | " Peak Service Time Utilization (%) - 44.35496 \n", 316 | " Peak Service Time Utilization (%) [PUT] - 13.78412 \n", 317 | " Peak Service Time Utilization (%) [GET] - 32.85580 \n", 318 | " P99 Service Time Utilization (%) - 29.60424 \n", 319 | " P99.9 Service Time Utilization (%) - 39.80833 \n", 320 | " P99.99 Service Time Utilization (%) - 43.90030 \n", 321 | " P99 Service Time Utilization (%) [GET] - 23.41674 \n", 322 | " P99.9 Service Time Utilization (%) [GET] - 30.71864 \n", 323 | " P99.99 Service Time Utilization (%) [GET] - 32.64209 \n", 324 | " P99 Service Time Utilization (%) [GET-NoCache] - 36.42881 \n", 325 | " P99.9 Service Time Utilization (%) [GET-NoCache] - 67.16339 \n", 326 | " P99.99 Service Time Utilization (%) [GET-NoCache] - 138.86694 \n", 327 | " P99 Service Time Utilization (%) [PUT] - 10.70621 \n", 328 | " P99.9 Service Time Utilization (%) [PUT] - 13.09393 \n", 329 | " P99.99 Service Time Utilization (%) [PUT] - 13.71510 \n", 330 | " P99 Service Time Saved ratio 1 - 0.35719 \n", 331 | " P99.9 Service Time Saved ratio 1 - 0.54263 \n", 332 | " P99.99 Service Time Saved ratio 1 - 0.76494 \n", 333 | " Peak Service Time Saved ratio 1 (range) - 0.77624 \n", 334 | " Peak Service Time Saved ratio 2 (1st M to end) - 0.77516 \n", 335 | " Service Time Saved ratio 1 (range) - 0.27468 \n", 336 | " Service Time Saved ratio 2 (first miss to end) - 0.27200 \n", 337 | " Service Time Saved ratio 3 (start to end) - 0.26977 \n", 338 | " Service Time Saved ratio - 0.26977 \n", 339 | " Bonus STS beyond STS(A) (Assumed EA too short) - 0.00000 \n", 340 | " Est Service Time Lost from late/readmits - 0.03611 \n", 341 | " ST + late/readmits potential - 0.31079 \n", 342 | " Potential STS(Analysis) from Admitted Episodes - 0.00000 \n", 343 | " Service Time Saved (Analysis @ -1.0MB/s, -1.0GB) - -1.00000 \n", 344 | " Service Time Saved (Analysis @ -1.0MB/s) - -1.00000 \n", 345 | " Potential ST Saved (PF) (Analysis) from Admitted Eps - 0.00 \n", 346 | " Wasted hits from late/readmits: 0 IOs, 172322 chunks \n", 347 | " IOPS saved ratio - 0.34584 \n", 348 | " IOPS saved ratio (Flash) - 0.3458 \n", 349 | " IOPS saved ratio (AdmitBuffer) - 0.0000 \n", 350 | " IOPS saved ratio (Partial) - 0.06836 \n", 351 | " IOPS saved ratio lost from late/readmits - 0.00000 \n", 352 | " IOPS SR (Analysis) from Admitted Episodes - 0.00000 \n", 353 | " IOPS SR (Analysis, ConstantThisEA) - -1.00000 \n", 354 | " GETs - 127305 / 127305 \n", 355 | " Saved - 44027 \n", 356 | " Misses - 83278 \n", 357 | " iops_requests/op/GET_TEMP - 31263 \n", 358 | " iops_requests/op/GET_PERM - 95897 \n", 359 | " iops_requests/op/GET_NOT_INIT - 145 \n", 360 | " GET (No Cache) ST - 19.2% \n", 361 | " service_time_nocache/op/GET_TEMP - 2.8% \n", 362 | " service_time_nocache/op/GET_PERM - 16.4% \n", 363 | " service_time_nocache/op/GET_NOT_INIT - 0.0% \n", 364 | " PUTs - 20489 \n", 365 | " puts_ios/op/PUT_TEMP - 8585 \n", 366 | " puts_ios/op/PUT_PERM - 11904 \n", 367 | " PUT ST - 4.9% \n", 368 | " service_time_writes/op/PUT_TEMP - 2.6% \n", 369 | " service_time_writes/op/PUT_PERM - 2.3% \n", 370 | " Episodes admitted - 8690 (Analysis: -1) \n", 371 | " Chunk hit ratio - 0.23684 \n", 372 | " Chunk hit ratio (Flash) - 0.23684 \n", 373 | " Flash Cache Hit Rate - 0.23684 \n", 374 | " Client Bandwidth [GET] - 640.37 MB/s \n", 375 | " Backend Bandwidth [GET+PUT] - 683.67 MB/s \n", 376 | " Backend Bandwidth [GET] - 489.11 MB/s \n", 377 | " Backend Bandwidth [PUT] - 194.56 MB/s \n", 378 | " Chunks Queried - 3097546 \n", 379 | " Chunks Saved - 708236 \n", 380 | " Chunks Fetched from Backend - 2365890 \n", 381 | " Flash writes - 172322 \n", 382 | " Acceptance Ratio - 0.07290 \n", 383 | " Flash write rate - 35.62 MB/s \n", 384 | " Writing Chunks from Admitted Episodes - 0.0 MB/s (WR Ratio: 0.00) \n", 385 | " Analysis Closest Write Rate - -1.00 \n", 386 | " Flash Wasted % of WR - 0.40 \n", 387 | " Flash Wasted (by admit) - \n", 388 | " Flash Wasted (by evict) - \n", 389 | " Cache Size - -1.00 GB (Analysis) vs 366.48 (Sim, 3002 items) \n", 390 | " Flash Avg Eviction Age - (1562.5,1.8h)\n", 391 | " Assumed Flash EA \n", 392 | " Too Short/Bonus Hits - 0 (STSR: 0.00) \n", 393 | " Too Long/Extra Writes - 0 (WRR: 0.00) \n", 394 | " Flash Mean Time in System - (2513.8,2.9h) \n", 395 | " Analysis Mean Time in System - -1 \n", 396 | " Time to warmup - 1917, 1 hr 21 mins 10 secs \n", 397 | " Simulator RAM usage - 0.4 GB \n", 398 | " Simulator Time - 5 mins 47 secs \n", 399 | " \n", 400 | "Results written to runs/example/rejectx/rejectx-ap-1_0.508154_lru_366.475GB/full_0_0.1_cache_perf.txt.stats.lzma\n", 401 | "Results written to runs/example/rejectx/rejectx-ap-1_0.508154_lru_366.475GB/full_0_0.1_cache_perf.txt.lzma\n", 402 | "Command:\n", 403 | "/work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/cachesim/../run_py.sh py /work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/cachesim/simulate_ap.py --config runs/example/rejectx/config.json\n", 404 | "Removed file runs/example/rejectx/rejectx-ap-1_0.508154_lru_366.475GB/full_0_0.1_cache_perf.txt.part.lzma\n", 405 | "Removed file runs/example/rejectx/rejectx-ap-1_0.508154_lru_366.475GB/full_0_0.1_cache_perf.txt.stats.part.lzma\n", 406 | "Complete\n", 407 | "+ exit\n", 408 | "++ '[' 2 = 1 ']'\n" 409 | ] 410 | } 411 | ], 412 | "source": [ 413 | "./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --config runs/example/rejectx/config.json" 414 | ] 415 | }, 416 | { 417 | "cell_type": "markdown", 418 | "id": "ebf73239-ea9a-4c46-af19-81cff0f90a84", 419 | "metadata": {}, 420 | "source": [ 421 | "### Train Baleen (3 mins)" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 5, 427 | "id": "bdc96581-cf18-46c4-a46f-309a20e3c07b", 428 | "metadata": {}, 429 | "outputs": [ 430 | { 431 | "name": "stdout", 432 | "output_type": "stream", 433 | "text": [ 434 | "+ PYTHON=py\n", 435 | "+ ARGS='-B -m BCacheSim.episodic_analysis.train --exp example --policy PolicyUtilityServiceTimeSize2 --region Region1 --sample-ratio 0.1 --sample-start 0 --trace-group 201910 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.475 --output-base-dir runs/example/baleen --eviction-age 5892.856 --rl-init-kwargs filter_=prefetch --train-target-wr 35.599 --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk'\n", 436 | "+ case \"$PYTHON\" in\n", 437 | "+ PYTHON_BIN=python\n", 438 | "+++ dirname ./BCacheSim/run_py.sh\n", 439 | "++ cd ./BCacheSim\n", 440 | "++ pwd\n", 441 | "+ DIR=/work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim\n", 442 | "+ cd /work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/..\n", 443 | "+ stdbuf -eL -oL python -B -m BCacheSim.episodic_analysis.train --exp example --policy PolicyUtilityServiceTimeSize2 --region Region1 --sample-ratio 0.1 --sample-start 0 --trace-group 201910 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.475 --output-base-dir runs/example/baleen --eviction-age 5892.856 --rl-init-kwargs filter_=prefetch --train-target-wr 35.599 --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk\n", 444 | "rl_init_kwargs: {'filter_': 'prefetch'}\n", 445 | "Sample cmd for debug: --exp example --policy PolicyUtilityServiceTimeSize2 --region Region1 --sample-ratio 0.1 --sample-start 0.0 --trace-group 201910 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.475 --output-base-dir runs/example/baleen --eviction-age 5892.86 --only-gets --rl-init-kwargs filter_=prefetch --train-target-wr 35.599 --train-models admit prefetch --no-episodes --train-split-secs-start 0 --train-split-secs-end 86400 \n", 446 | "Logging to tmp/example/201910_Region1_0_0.1/ea_5892.86_wr_35.599.out\n", 447 | "Cmd for rerun: bcachesimrun_py.sh py -B -m episodic_analysis.train --exp example --policy PolicyUtilityServiceTimeSize2 --region Region1 --sample-ratio 0.1 --sample-start 0 --trace-group 201910 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.475 --output-base-dir runs/example/baleen --eviction-age 5892.856 --rl-init-kwargs filter_=prefetch --train-target-wr 35.599 --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk\n", 448 | "Files to generate\n", 449 | "analysis runs/example/baleen/example/201910_Region1_0_0.1/offline_analysis_ea_5892.86.csv Exists\n", 450 | "thresholds tmp/example/201910_Region1_0_0.1/decisions_utility_service_time_size_fixed_ea_5892.86.pkl.bz Exists\n", 451 | "model_prefetch_offset_start tmp/example/201910_Region1_0_0.1/ea_5892.86_wr_35.599_prefetch_offset_start.model Exists\n", 452 | "model_prefetch_size tmp/example/201910_Region1_0_0.1/ea_5892.86_wr_35.599_prefetch_size.model Exists\n", 453 | "model_prefetch_offset_end tmp/example/201910_Region1_0_0.1/ea_5892.86_wr_35.599_prefetch_offset_end.model Exists\n", 454 | "model_prefetch_pred_net_pf_st_binary tmp/example/201910_Region1_0_0.1/ea_5892.86_wr_35.599_prefetch_pred_net_pf_st_binary.model Exists\n", 455 | "model_admit_threshold_binary tmp/example/201910_Region1_0_0.1/ea_5892.86_wr_35.599_admit_threshold_binary.model \n", 456 | "trace_kwargs: {'region': 'Region1', 'sample_ratio': 0.1, 'start': 0.0, 'trace_group': '201910', 'only_gets': True, 'min_ts_from_start': 0.0, 'max_ts_from_start': 86400.0, 'get_features': True}\n", 457 | "Reading from file data/tectonic/201910/Region1/full_0_0.1.trace\n", 458 | "Trace details: Duration=86401.23277902603, Start=1572074461.57806, End=1572160862.810839, NumAccesses=17987\n", 459 | "res_fn_kwargs: {'workers': 8, 'residency_fn': , 'residencylist_class': }\n", 460 | "gen_episodes: 100%|████████████████████████| 2848/2848 [00:22<00:00, 129.37it/s]\n", 461 | "tmp/example/201910_Region1_0_0.1/decisions_utility_service_time_size_fixed_ea_5892.86.pkl.bz already exists\n", 462 | "Primary targets only\n", 463 | " Assumed Eviction Age (s) Target Write Rate Target Cache Size Service Time Saved Ratio IOPSSavedRatio Write Rate (MB/s) Cache Size (GB) Mean Time In System (s) Episodes admitted Cutoff score Target\n", 464 | "0 5892.856 NaN 366.475 0.51475 0.581587 24.046243 366.666117 15614.335728 497 0.003161 Cache Size\n", 465 | "0 5892.856 34.0 NaN 0.54420 0.613665 34.053334 456.093015 13714.934724 638 0.002127 Write Rate\n", 466 | "Others\n", 467 | " Assumed Eviction Age (s) Target Write Rate Target Cache Size Service Time Saved Ratio IOPSSavedRatio Write Rate (MB/s) Cache Size (GB) Mean Time In System (s) Episodes admitted Cutoff score Target\n", 468 | "0 5892.856 NaN 366.475 0.514750 0.581587 24.046243 366.666117 15614.335728 497 0.003161 Cache Size\n", 469 | "0 5892.856 295.827666 NaN 0.645137 0.770890 295.827666 2084.838698 7216.616547 4121 -0.006925 Max Write Rate\n", 470 | "0 5892.856 154.763417 NaN 0.645602 0.769055 154.763417 1273.019727 8422.999618 2030 0.000019 Max Write Rate (No waste)\n", 471 | "0 5892.856 295.827666 NaN 0.645137 0.770890 295.827666 2084.838698 7216.616547 4121 -0.006925 Max Write Rate (Not empty)\n", 472 | "0 5892.856 34.000000 NaN 0.544200 0.613665 34.053334 456.093015 13714.934724 638 0.002127 Write Rate\n", 473 | "1 5892.856 50.000000 NaN 0.578215 0.651804 50.038349 587.630633 12025.452194 832 0.001547 Write Rate\n", 474 | "2 5892.856 100.000000 NaN 0.630011 0.722299 100.002913 955.524962 9784.290550 1421 0.000539 Write Rate\n", 475 | "3 5892.856 75.000000 NaN 0.609445 0.683160 75.042332 788.781667 10763.423851 1140 0.001054 Write Rate\n", 476 | "4 5892.856 20.000000 NaN 0.499783 0.558959 20.037330 332.235623 16978.773163 425 0.004146 Write Rate\n", 477 | "5 5892.856 10.000000 NaN 0.440463 0.481070 10.007091 212.469931 21741.504011 254 0.008635 Write Rate\n", 478 | "6 5892.856 60.000000 NaN 0.592313 0.673264 60.055567 665.754937 11351.704634 970 0.001104 Write Rate\n", 479 | "7 5892.856 90.000000 NaN 0.624182 0.704342 90.072500 897.515557 10203.513120 1312 0.000585 Write Rate\n", 480 | "8 5892.856 30.000000 NaN 0.534164 0.599766 30.001019 422.732227 14428.770075 572 0.002595 Write Rate\n", 481 | "Episode filter: prefetch\n", 482 | "#Episodes: 4121, #Accesses: 17987\n", 483 | "Training model for offset_start\n", 484 | "/home/wonglkd_globusid_org/.local/lib/python3.10/site-packages/lightgbm/engine.py:181: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n", 485 | " _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n", 486 | "/home/wonglkd_globusid_org/.local/lib/python3.10/site-packages/lightgbm/engine.py:239: UserWarning: 'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n", 487 | " _log_warning(\"'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. \"\n", 488 | "Training model for size\n", 489 | "Training model for offset_end\n", 490 | " MSE R2 Threshold Label Subset\n", 491 | "0 7.821374e+11 0.500186 0.5 offset_start Train\n", 492 | "1 9.388968e+11 0.417361 0.5 offset_start Test\n", 493 | "2 3.914104e+12 0.685385 0.5 size Train\n", 494 | "3 4.817162e+12 0.638239 0.5 size Test\n", 495 | "4 3.238404e+12 0.715996 0.5 offset_end Train\n", 496 | "5 3.923970e+12 0.683815 0.5 offset_end Test\n", 497 | "Training model for pred_net_pf_st_binary\n", 498 | "/home/wonglkd_globusid_org/.local/lib/python3.10/site-packages/lightgbm/engine.py:181: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n", 499 | " _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n", 500 | "/home/wonglkd_globusid_org/.local/lib/python3.10/site-packages/lightgbm/engine.py:239: UserWarning: 'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n", 501 | " _log_warning(\"'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. \"\n", 502 | "Training model for pf_benefit\n", 503 | "Training model for pred_pf_benefit\n", 504 | "Training model for pred_net_pf_st\n", 505 | "/work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/episodic_analysis/train_utils.py:20: RuntimeWarning: divide by zero encountered in scalar divide\n", 506 | " ret['FN/FP'] = ret['FN'] / ret['FP']\n", 507 | "/work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/episodic_analysis/train_utils.py:20: RuntimeWarning: divide by zero encountered in scalar divide\n", 508 | " ret['FN/FP'] = ret['FN'] / ret['FP']\n", 509 | " MSE R2 Threshold ... TNR FN/FP Accuracy\n", 510 | "0 7.821374e+11 0.500186 0.5 ... NaN NaN NaN\n", 511 | "1 9.388968e+11 0.417361 0.5 ... NaN NaN NaN\n", 512 | "2 3.914104e+12 0.685385 0.5 ... NaN NaN NaN\n", 513 | "3 4.817162e+12 0.638239 0.5 ... NaN NaN NaN\n", 514 | "4 3.238404e+12 0.715996 0.5 ... NaN NaN NaN\n", 515 | "5 3.923970e+12 0.683815 0.5 ... NaN NaN NaN\n", 516 | "0 1.955743e-02 0.000000 0.5 ... 1.0 inf 0.980044\n", 517 | "1 1.524496e-02 -0.001325 0.5 ... 1.0 inf 0.984536\n", 518 | "2 2.148027e-03 0.647603 0.5 ... NaN NaN NaN\n", 519 | "3 2.896311e-03 0.573482 0.5 ... NaN NaN NaN\n", 520 | "4 2.479886e-05 0.089543 0.5 ... NaN NaN NaN\n", 521 | "5 8.868613e-06 0.090182 0.5 ... NaN NaN NaN\n", 522 | "6 4.809722e-05 0.291816 0.5 ... NaN NaN NaN\n", 523 | "7 5.485238e-05 0.196444 0.5 ... NaN NaN NaN\n", 524 | "\n", 525 | "[14 rows x 13 columns]\n", 526 | "Source Episodes: 4121, Used Episodes: 4121\n", 527 | "Rows: 11227, Train: 7946, Test: 3281\n", 528 | "./BCacheSim/run_py.sh: line 31: 604 Killed stdbuf -eL -oL $PYTHON_BIN $ARGS\n" 529 | ] 530 | }, 531 | { 532 | "ename": "", 533 | "evalue": "137", 534 | "output_type": "error", 535 | "traceback": [] 536 | } 537 | ], 538 | "source": [ 539 | "./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp example --policy PolicyUtilityServiceTimeSize2 --region Region1 --sample-ratio 0.1 --sample-start 0 --trace-group 201910 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.475 --output-base-dir runs/example/baleen --eviction-age 5892.856 --rl-init-kwargs filter_=prefetch --train-target-wr 35.599 --train-models admit prefetch --train-split-secs-start 0 --train-split-secs-end 86400 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": 8, 545 | "id": "e5599518-4147-4744-9d98-620f6197aea7", 546 | "metadata": {}, 547 | "outputs": [ 548 | { 549 | "name": "stdout", 550 | "output_type": "stream", 551 | "text": [ 552 | "+ PYTHON=py\n", 553 | "+ ARGS='-B -m BCacheSim.episodic_analysis.train --exp example --policy PolicyUtilityServiceTimeSize2 --region Region1 --sample-ratio 0.1 --sample-start 0 --trace-group 201910 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.475 --output-base-dir runs/example/baleen --eviction-age 5892.856 --rl-init-kwargs filter_=prefetch --train-target-wr 35.599 --train-models admit --train-split-secs-start 0 --train-split-secs-end 86400 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk'\n", 554 | "+ case \"$PYTHON\" in\n", 555 | "+ PYTHON_BIN=python\n", 556 | "+++ dirname ./BCacheSim/run_py.sh\n", 557 | "++ cd ./BCacheSim\n", 558 | "++ pwd\n", 559 | "+ DIR=/work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim\n", 560 | "+ cd /work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/..\n", 561 | "+ stdbuf -eL -oL python -B -m BCacheSim.episodic_analysis.train --exp example --policy PolicyUtilityServiceTimeSize2 --region Region1 --sample-ratio 0.1 --sample-start 0 --trace-group 201910 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.475 --output-base-dir runs/example/baleen --eviction-age 5892.856 --rl-init-kwargs filter_=prefetch --train-target-wr 35.599 --train-models admit --train-split-secs-start 0 --train-split-secs-end 86400 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk\n", 562 | "rl_init_kwargs: {'filter_': 'prefetch'}\n", 563 | "Sample cmd for debug: --exp example --policy PolicyUtilityServiceTimeSize2 --region Region1 --sample-ratio 0.1 --sample-start 0.0 --trace-group 201910 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.475 --output-base-dir runs/example/baleen --eviction-age 5892.86 --only-gets --rl-init-kwargs filter_=prefetch --train-target-wr 35.599 --train-models admit --no-episodes --train-split-secs-start 0 --train-split-secs-end 86400 \n", 564 | "Logging to tmp/example/201910_Region1_0_0.1/ea_5892.86_wr_35.599.out\n", 565 | "Cmd for rerun: bcachesimrun_py.sh py -B -m episodic_analysis.train --exp example --policy PolicyUtilityServiceTimeSize2 --region Region1 --sample-ratio 0.1 --sample-start 0 --trace-group 201910 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.475 --output-base-dir runs/example/baleen --eviction-age 5892.856 --rl-init-kwargs filter_=prefetch --train-target-wr 35.599 --train-models admit --train-split-secs-start 0 --train-split-secs-end 86400 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk\n", 566 | "Files to generate\n", 567 | "analysis runs/example/baleen/example/201910_Region1_0_0.1/offline_analysis_ea_5892.86.csv Exists\n", 568 | "thresholds tmp/example/201910_Region1_0_0.1/decisions_utility_service_time_size_fixed_ea_5892.86.pkl.bz Exists\n", 569 | "model_admit_threshold_binary tmp/example/201910_Region1_0_0.1/ea_5892.86_wr_35.599_admit_threshold_binary.model \n", 570 | "trace_kwargs: {'region': 'Region1', 'sample_ratio': 0.1, 'start': 0.0, 'trace_group': '201910', 'only_gets': True, 'min_ts_from_start': 0.0, 'max_ts_from_start': 86400.0, 'get_features': True}\n", 571 | "Reading from file data/tectonic/201910/Region1/full_0_0.1.trace\n", 572 | "Trace details: Duration=86401.23277902603, Start=1572074461.57806, End=1572160862.810839, NumAccesses=17987\n", 573 | "res_fn_kwargs: {'workers': 8, 'residency_fn': , 'residencylist_class': }\n", 574 | "gen_episodes: 100%|████████████████████████| 2848/2848 [00:23<00:00, 120.67it/s]\n", 575 | "tmp/example/201910_Region1_0_0.1/decisions_utility_service_time_size_fixed_ea_5892.86.pkl.bz already exists\n", 576 | "Primary targets only\n", 577 | " Assumed Eviction Age (s) Target Write Rate Target Cache Size Service Time Saved Ratio IOPSSavedRatio Write Rate (MB/s) Cache Size (GB) Mean Time In System (s) Episodes admitted Cutoff score Target\n", 578 | "0 5892.856 NaN 366.475 0.515065 0.581753 24.138834 367.027391 15569.768264 498 0.003161 Cache Size\n", 579 | "0 5892.856 34.0 NaN 0.544200 0.613665 34.053334 456.093015 13714.934724 638 0.002127 Write Rate\n", 580 | "Others\n", 581 | " Assumed Eviction Age (s) Target Write Rate Target Cache Size Service Time Saved Ratio IOPSSavedRatio Write Rate (MB/s) Cache Size (GB) Mean Time In System (s) Episodes admitted Cutoff score Target\n", 582 | "0 5892.856 NaN 366.475 0.515065 0.581753 24.138834 367.027391 15569.768264 498 0.003161 Cache Size\n", 583 | "0 5892.856 295.827666 NaN 0.645137 0.770890 295.827666 2084.838698 7216.616547 4121 -0.006925 Max Write Rate\n", 584 | "0 5892.856 154.763417 NaN 0.645602 0.769055 154.763417 1273.019727 8422.999618 2030 0.000019 Max Write Rate (No waste)\n", 585 | "0 5892.856 295.827666 NaN 0.645137 0.770890 295.827666 2084.838698 7216.616547 4121 -0.006925 Max Write Rate (Not empty)\n", 586 | "0 5892.856 34.000000 NaN 0.544200 0.613665 34.053334 456.093015 13714.934724 638 0.002127 Write Rate\n", 587 | "1 5892.856 50.000000 NaN 0.578253 0.651915 50.061496 588.382074 12035.262371 834 0.001547 Write Rate\n", 588 | "2 5892.856 100.000000 NaN 0.630011 0.722299 100.002913 955.671769 9785.793814 1421 0.000539 Write Rate\n", 589 | "3 5892.856 75.000000 NaN 0.609445 0.683160 75.042332 788.755172 10763.062312 1140 0.001054 Write Rate\n", 590 | "4 5892.856 20.000000 NaN 0.499783 0.558959 20.037330 332.235623 16978.773163 425 0.004146 Write Rate\n", 591 | "5 5892.856 10.000000 NaN 0.440463 0.481070 10.007091 212.469931 21741.504011 254 0.008635 Write Rate\n", 592 | "6 5892.856 60.000000 NaN 0.592313 0.673264 60.055567 665.483812 11347.081731 970 0.001104 Write Rate\n", 593 | "7 5892.856 90.000000 NaN 0.624182 0.704342 90.072500 897.515557 10203.513120 1312 0.000585 Write Rate\n", 594 | "8 5892.856 30.000000 NaN 0.534164 0.599766 30.001019 422.732227 14428.770075 572 0.002595 Write Rate\n", 595 | "Episode filter: prefetch\n", 596 | "#Episodes: 4121, #Accesses: 17987\n", 597 | "Source Episodes: 4121, Used Episodes: 4121\n", 598 | "Rows: 11227, Train: 7908, Test: 3319\n", 599 | "./BCacheSim/run_py.sh: line 31: 674 Killed stdbuf -eL -oL $PYTHON_BIN $ARGS\n" 600 | ] 601 | }, 602 | { 603 | "ename": "", 604 | "evalue": "137", 605 | "output_type": "error", 606 | "traceback": [] 607 | } 608 | ], 609 | "source": [ 610 | "./BCacheSim/run_py.sh py -B -m BCacheSim.episodic_analysis.train --exp example --policy PolicyUtilityServiceTimeSize2 --region Region1 --sample-ratio 0.1 --sample-start 0 --trace-group 201910 --supplied-ea physical --target-wrs 34 50 100 75 20 10 60 90 30 --target-csizes 366.475 --output-base-dir runs/example/baleen --eviction-age 5892.856 --rl-init-kwargs filter_=prefetch --train-target-wr 35.599 --train-models admit --train-split-secs-start 0 --train-split-secs-end 86400 --ap-acc-cutoff 15 --ap-feat-subset meta+block+chunk" 611 | ] 612 | }, 613 | { 614 | "cell_type": "code", 615 | "execution_count": 10, 616 | "id": "d58c5068-3beb-487f-ac21-28fa0fa05821", 617 | "metadata": {}, 618 | "outputs": [ 619 | { 620 | "name": "stdout", 621 | "output_type": "stream", 622 | "text": [ 623 | "decisions_utility_service_time_size_fixed_ea_5892.86.pkl.bz\n", 624 | "ea_5892.86_wr_35.599.err\n", 625 | "ea_5892.86_wr_35.599.out\n", 626 | "ea_5892.86_wr_35.599_prefetch_offset_end.model\n", 627 | "ea_5892.86_wr_35.599_prefetch_offset_start.model\n", 628 | "ea_5892.86_wr_35.599_prefetch_pred_net_pf_st_binary.model\n", 629 | "ea_5892.86_wr_35.599_prefetch_size.model\n" 630 | ] 631 | } 632 | ], 633 | "source": [ 634 | "ls tmp/example/201910_Region1_0_0.1" 635 | ] 636 | }, 637 | { 638 | "cell_type": "markdown", 639 | "id": "3aa5c01c-676d-41f5-a473-5f9a3a56cf2a", 640 | "metadata": {}, 641 | "source": [ 642 | "## Run Baleen in simulation (30 mins)" 643 | ] 644 | }, 645 | { 646 | "cell_type": "code", 647 | "execution_count": 4, 648 | "id": "bab445a6-fc46-4e32-80c6-49055afcbe06", 649 | "metadata": {}, 650 | "outputs": [ 651 | { 652 | "name": "stdout", 653 | "output_type": "stream", 654 | "text": [ 655 | "+ PYTHON=py\n", 656 | "+ ARGS='-B -m BCacheSim.cachesim.simulate_ap --config runs/example/baleen/prefetch_ml-on-partial-hit/config.json'\n", 657 | "+ case \"$PYTHON\" in\n", 658 | "+ PYTHON_BIN=python\n", 659 | "+++ dirname ./BCacheSim/run_py.sh\n", 660 | "++ cd ./BCacheSim\n", 661 | "++ pwd\n", 662 | "+ DIR=/work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim\n", 663 | "+ cd /work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/..\n", 664 | "+ stdbuf -eL -oL python -B -m BCacheSim.cachesim.simulate_ap --config runs/example/baleen/prefetch_ml-on-partial-hit/config.json\n", 665 | "{'admit_only_prefetches': False,\n", 666 | " 'ap': 'mlnew',\n", 667 | " 'ap_chunk_threshold': None,\n", 668 | " 'ap_feat_subset': 'meta+block+chunk',\n", 669 | " 'ap_probability': None,\n", 670 | " 'ap_threshold': 0.798545,\n", 671 | " 'batch_size': 16,\n", 672 | " 'block_level': False,\n", 673 | " 'cache_elems': None,\n", 674 | " 'cachelib_trace': None,\n", 675 | " 'coinflip_ap': False,\n", 676 | " 'config': ['runs/example/baleen/prefetch_ml-on-partial-hit/config.json'],\n", 677 | " 'debug': False,\n", 678 | " 'early_evict': None,\n", 679 | " 'ep_analysis': 'runs/example/baleen/example/201910_Region1_0_0.1/offline_analysis_ea_5892.86.csv',\n", 680 | " 'evict_by_episode': False,\n", 681 | " 'eviction_policy': 'LRU',\n", 682 | " 'fast': False,\n", 683 | " 'fifo': False,\n", 684 | " 'flashieldprob_ap_min_hits': None,\n", 685 | " 'flip_threshold': True,\n", 686 | " 'hybrid_ap_threshold': None,\n", 687 | " 'ignore_existing': False,\n", 688 | " 'job_id': None,\n", 689 | " 'learned_ap': True,\n", 690 | " 'learned_ap_filter_count': 6,\n", 691 | " 'learned_ap_granularity': 'both',\n", 692 | " 'learned_ap_model_path': 'tmp/example/201910_Region1_0_0.1/ea_5892.86_wr_35.599_admit_threshold_binary.model',\n", 693 | " 'learned_size': False,\n", 694 | " 'limit': None,\n", 695 | " 'lirs': False,\n", 696 | " 'log_decisions': False,\n", 697 | " 'log_episodes': False,\n", 698 | " 'log_evictions': False,\n", 699 | " 'log_interval': 600.0,\n", 700 | " 'log_prefetch': False,\n", 701 | " 'log_req': False,\n", 702 | " 'offline_ap': False,\n", 703 | " 'offline_ap_decisions': None,\n", 704 | " 'one_chunk': False,\n", 705 | " 'opt_ap_threshold': None,\n", 706 | " 'optplus_args': None,\n", 707 | " 'output_dir': 'runs/example/baleen/prefetch_ml-on-partial-hit',\n", 708 | " 'override': False,\n", 709 | " 'peak_strategy': None,\n", 710 | " 'prefetch': None,\n", 711 | " 'prefetch_range': 'acctime-episode-predict',\n", 712 | " 'prefetch_when': 'partial',\n", 713 | " 'prefetch_when_threshold': None,\n", 714 | " 'prefetcher_model_path': 'tmp/example/201910_Region1_0_0.1/ea_5892.86_wr_35.599_prefetch_{k}.model',\n", 715 | " 'profile': False,\n", 716 | " 'ram_ap_clone': False,\n", 717 | " 'ram_cache': False,\n", 718 | " 'ram_cache_elems': None,\n", 719 | " 'ram_cache_size_gb': 10.0,\n", 720 | " 'rejectx_ap': False,\n", 721 | " 'rejectx_ap_factor': None,\n", 722 | " 'rejectx_ap_threshold': None,\n", 723 | " 'retrain_interval_hrs': None,\n", 724 | " 'size_gb': 366.475,\n", 725 | " 'size_opt': 'access',\n", 726 | " 'stats_start': 86400.0,\n", 727 | " 'trace': 'data/tectonic/201910/Region1/full_0_0.1.trace',\n", 728 | " 'tracefile': 'data/tectonic/201910/Region1/full_0_0.1.trace',\n", 729 | " 'train_history_hrs': None,\n", 730 | " 'ttl_model_path': None,\n", 731 | " 'write_mbps': 0}\n", 732 | "Command: /work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/cachesim/../run_py.sh py /work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/cachesim/simulate_ap.py --config runs/example/baleen/prefetch_ml-on-partial-hit/config.json\n", 733 | "Output dir: runs/example/baleen/prefetch_ml-on-partial-hit/ml-ap-0.798545_6_lru_366.475GB\n", 734 | "Logging to runs/example/baleen/prefetch_ml-on-partial-hit/ml-ap-0.798545_6_lru_366.475GB/full_0_0.1.out\n", 735 | "[LightGBM] [Fatal] Could not open tmp/example/201910_Region1_0_0.1/ea_5892.86_wr_35.599_admit_threshold_binary.model\n", 736 | "Traceback (most recent call last):\n", 737 | " File \"/opt/conda/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", 738 | " return _run_code(code, main_globals, None,\n", 739 | " File \"/opt/conda/lib/python3.10/runpy.py\", line 86, in _run_code\n", 740 | " exec(code, run_globals)\n", 741 | " File \"/work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/cachesim/simulate_ap.py\", line 183, in \n", 742 | " sim_cache.simulate_cache_driver(get_parsed_args())\n", 743 | " File \"/work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/cachesim/sim_cache.py\", line 1480, in simulate_cache_driver\n", 744 | " ap = aps.construct(\n", 745 | " File \"/work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/cachesim/admission_policies.py\", line 974, in construct\n", 746 | " ap = NewMLAP(threshold, model_path=options.learned_ap_model_path, **kwargs_)\n", 747 | " File \"/work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/cachesim/admission_policies.py\", line 349, in __init__\n", 748 | " super().__init__(*args, **kwargs)\n", 749 | " File \"/work/Baleen-FAST24-Artifact/Baleen-FAST24/BCacheSim/cachesim/admission_policies.py\", line 294, in __init__\n", 750 | " self.gbm = lgb.Booster(model_file=model_path)\n", 751 | " File \"/home/wonglkd_globusid_org/.local/lib/python3.10/site-packages/lightgbm/basic.py\", line 2639, in __init__\n", 752 | " _safe_call(_LIB.LGBM_BoosterCreateFromModelfile(\n", 753 | " File \"/home/wonglkd_globusid_org/.local/lib/python3.10/site-packages/lightgbm/basic.py\", line 125, in _safe_call\n", 754 | " raise LightGBMError(_LIB.LGBM_GetLastError().decode('utf-8'))\n", 755 | "lightgbm.basic.LightGBMError: Could not open tmp/example/201910_Region1_0_0.1/ea_5892.86_wr_35.599_admit_threshold_binary.model\n" 756 | ] 757 | }, 758 | { 759 | "ename": "", 760 | "evalue": "1", 761 | "output_type": "error", 762 | "traceback": [] 763 | } 764 | ], 765 | "source": [ 766 | "./BCacheSim/run_py.sh py -B -m BCacheSim.cachesim.simulate_ap --config runs/example/baleen/prefetch_ml-on-partial-hit/config.json" 767 | ] 768 | }, 769 | { 770 | "cell_type": "markdown", 771 | "id": "cc153bbb-c0de-4efe-96bd-6df4a88886dc", 772 | "metadata": {}, 773 | "source": [ 774 | "### To examine the results, run [Baleen-FAST24/notebooks/example/example.ipynb](Baleen-FAST24/notebooks/example/example.ipynb)" 775 | ] 776 | } 777 | ], 778 | "metadata": { 779 | "kernelspec": { 780 | "display_name": "Bash", 781 | "language": "bash", 782 | "name": "bash" 783 | }, 784 | "language_info": { 785 | "codemirror_mode": "shell", 786 | "file_extension": ".sh", 787 | "mimetype": "text/x-sh", 788 | "name": "bash" 789 | } 790 | }, 791 | "nbformat": 4, 792 | "nbformat_minor": 5 793 | } 794 | --------------------------------------------------------------------------------