├── .gitignore ├── .travis.yml ├── AUTHORS ├── CONTRIBUTING.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── experiments ├── .gitignore └── nexmark │ ├── HopcroftKarp.py │ ├── README.txt │ ├── bench.py │ ├── experiments.py │ ├── patterns.py │ ├── plot.py │ ├── plot_all.sh │ ├── plot_bin_shift_cdf.py │ ├── plot_latency_breakdown.py │ ├── plot_latency_timeline.py │ ├── plot_memory_timeline.py │ ├── plot_migration_queries_latency.py │ ├── requirements.txt │ ├── run_bench.sh │ └── run_paper.sh ├── nexmark ├── .gitignore ├── Cargo.lock ├── Cargo.toml └── src │ ├── bin │ ├── differential.rs │ ├── timely.rs │ └── word_count.rs │ ├── config.rs │ ├── event.rs │ ├── lib.rs │ ├── queries │ ├── mod.rs │ ├── q1.rs │ ├── q1_flex.rs │ ├── q2.rs │ ├── q2_flex.rs │ ├── q3.rs │ ├── q3_flex.rs │ ├── q4.rs │ ├── q4_flex.rs │ ├── q4_q6_common.rs │ ├── q4_q6_common_flex.rs │ ├── q5.rs │ ├── q5_flex.rs │ ├── q6.rs │ ├── q6_flex.rs │ ├── q7.rs │ ├── q7_flex.rs │ ├── q8.rs │ └── q8_flex.rs │ └── tools.rs ├── src ├── join.rs ├── lib.rs ├── notificator.rs ├── operator.rs ├── state_machine.rs └── stateful.rs └── tests └── distribution_test.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | build 3 | experiments/plots 4 | experiments/results 5 | experiments/*/results 6 | experiments/*/setups 7 | experiments/nexmark/package-lock.json 8 | experiments/nexmark/node_modules 9 | experiments/nexmark/charts 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | rust: 3 | - stable 4 | 5 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | The following people have contributed to Megaphone: 2 | 3 | Moritz Hoffmann 4 | Frank McSherry 5 | Andrea Lattuada 6 | John Liagouris 7 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Thank you for your interest in contributing! 2 | 3 | Here is some legal stuff that will make you regret clicking on this link. 4 | 5 | By submitting a pull request for this project, you are agreeing to license your contribution under the terms of the project's LICENSE file at the time of your submission (in case it changes or something). You are also certifying that you are in a position to make this agreement, in that you didn't nick your code from someone else, or some project with conflicting licensing requirements. 6 | 7 | If you would like to put explicit copyright notices somewhere, please leave them in the repository's COPYRIGHT file rather than in each file. 8 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "abomonation" 3 | version = "0.7.0" 4 | source = "registry+https://github.com/rust-lang/crates.io-index" 5 | 6 | [[package]] 7 | name = "abomonation_derive" 8 | version = "0.3.0" 9 | source = "registry+https://github.com/rust-lang/crates.io-index" 10 | dependencies = [ 11 | "quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", 12 | "syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)", 13 | "synstructure 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", 14 | ] 15 | 16 | [[package]] 17 | name = "bitflags" 18 | version = "1.0.4" 19 | source = "registry+https://github.com/rust-lang/crates.io-index" 20 | 21 | [[package]] 22 | name = "cloudabi" 23 | version = "0.0.3" 24 | source = "registry+https://github.com/rust-lang/crates.io-index" 25 | dependencies = [ 26 | "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", 27 | ] 28 | 29 | [[package]] 30 | name = "dynamic_scaling_mechanism" 31 | version = "0.0.1" 32 | dependencies = [ 33 | "abomonation 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", 34 | "abomonation_derive 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", 35 | "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", 36 | "rand 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", 37 | "timely 0.8.0 (git+https://github.com/TimelyDataflow/timely-dataflow.git)", 38 | "zipf 4.0.1 (registry+https://github.com/rust-lang/crates.io-index)", 39 | ] 40 | 41 | [[package]] 42 | name = "fnv" 43 | version = "1.0.6" 44 | source = "registry+https://github.com/rust-lang/crates.io-index" 45 | 46 | [[package]] 47 | name = "fuchsia-cprng" 48 | version = "0.1.1" 49 | source = "registry+https://github.com/rust-lang/crates.io-index" 50 | 51 | [[package]] 52 | name = "getopts" 53 | version = "0.2.18" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | dependencies = [ 56 | "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", 57 | ] 58 | 59 | [[package]] 60 | name = "libc" 61 | version = "0.2.48" 62 | source = "registry+https://github.com/rust-lang/crates.io-index" 63 | 64 | [[package]] 65 | name = "proc-macro2" 66 | version = "0.4.27" 67 | source = "registry+https://github.com/rust-lang/crates.io-index" 68 | dependencies = [ 69 | "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 70 | ] 71 | 72 | [[package]] 73 | name = "quote" 74 | version = "0.3.15" 75 | source = "registry+https://github.com/rust-lang/crates.io-index" 76 | 77 | [[package]] 78 | name = "quote" 79 | version = "0.6.11" 80 | source = "registry+https://github.com/rust-lang/crates.io-index" 81 | dependencies = [ 82 | "proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)", 83 | ] 84 | 85 | [[package]] 86 | name = "rand" 87 | version = "0.5.6" 88 | source = "registry+https://github.com/rust-lang/crates.io-index" 89 | dependencies = [ 90 | "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", 91 | "fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", 92 | "libc 0.2.48 (registry+https://github.com/rust-lang/crates.io-index)", 93 | "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", 94 | "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", 95 | ] 96 | 97 | [[package]] 98 | name = "rand_core" 99 | version = "0.3.1" 100 | source = "registry+https://github.com/rust-lang/crates.io-index" 101 | dependencies = [ 102 | "rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 103 | ] 104 | 105 | [[package]] 106 | name = "rand_core" 107 | version = "0.4.0" 108 | source = "registry+https://github.com/rust-lang/crates.io-index" 109 | 110 | [[package]] 111 | name = "serde" 112 | version = "1.0.87" 113 | source = "registry+https://github.com/rust-lang/crates.io-index" 114 | 115 | [[package]] 116 | name = "serde_derive" 117 | version = "1.0.87" 118 | source = "registry+https://github.com/rust-lang/crates.io-index" 119 | dependencies = [ 120 | "proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)", 121 | "quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)", 122 | "syn 0.15.26 (registry+https://github.com/rust-lang/crates.io-index)", 123 | ] 124 | 125 | [[package]] 126 | name = "syn" 127 | version = "0.11.11" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | dependencies = [ 130 | "quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", 131 | "synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)", 132 | "unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)", 133 | ] 134 | 135 | [[package]] 136 | name = "syn" 137 | version = "0.15.26" 138 | source = "registry+https://github.com/rust-lang/crates.io-index" 139 | dependencies = [ 140 | "proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)", 141 | "quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)", 142 | "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 143 | ] 144 | 145 | [[package]] 146 | name = "synom" 147 | version = "0.11.3" 148 | source = "registry+https://github.com/rust-lang/crates.io-index" 149 | dependencies = [ 150 | "unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)", 151 | ] 152 | 153 | [[package]] 154 | name = "synstructure" 155 | version = "0.6.1" 156 | source = "registry+https://github.com/rust-lang/crates.io-index" 157 | dependencies = [ 158 | "quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", 159 | "syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)", 160 | ] 161 | 162 | [[package]] 163 | name = "timely" 164 | version = "0.8.0" 165 | source = "git+https://github.com/TimelyDataflow/timely-dataflow.git#b8c38605d24713ffe921a2a6fbb8ef8c28996f66" 166 | dependencies = [ 167 | "abomonation 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", 168 | "abomonation_derive 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", 169 | "serde 1.0.87 (registry+https://github.com/rust-lang/crates.io-index)", 170 | "serde_derive 1.0.87 (registry+https://github.com/rust-lang/crates.io-index)", 171 | "timely_bytes 0.7.0 (git+https://github.com/TimelyDataflow/timely-dataflow.git)", 172 | "timely_communication 0.8.0 (git+https://github.com/TimelyDataflow/timely-dataflow.git)", 173 | "timely_logging 0.7.1 (git+https://github.com/TimelyDataflow/timely-dataflow.git)", 174 | ] 175 | 176 | [[package]] 177 | name = "timely_bytes" 178 | version = "0.7.0" 179 | source = "git+https://github.com/TimelyDataflow/timely-dataflow.git#b8c38605d24713ffe921a2a6fbb8ef8c28996f66" 180 | 181 | [[package]] 182 | name = "timely_communication" 183 | version = "0.8.0" 184 | source = "git+https://github.com/TimelyDataflow/timely-dataflow.git#b8c38605d24713ffe921a2a6fbb8ef8c28996f66" 185 | dependencies = [ 186 | "abomonation 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", 187 | "abomonation_derive 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", 188 | "getopts 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)", 189 | "serde 1.0.87 (registry+https://github.com/rust-lang/crates.io-index)", 190 | "serde_derive 1.0.87 (registry+https://github.com/rust-lang/crates.io-index)", 191 | "timely_bytes 0.7.0 (git+https://github.com/TimelyDataflow/timely-dataflow.git)", 192 | "timely_logging 0.7.1 (git+https://github.com/TimelyDataflow/timely-dataflow.git)", 193 | ] 194 | 195 | [[package]] 196 | name = "timely_logging" 197 | version = "0.7.1" 198 | source = "git+https://github.com/TimelyDataflow/timely-dataflow.git#b8c38605d24713ffe921a2a6fbb8ef8c28996f66" 199 | 200 | [[package]] 201 | name = "unicode-width" 202 | version = "0.1.5" 203 | source = "registry+https://github.com/rust-lang/crates.io-index" 204 | 205 | [[package]] 206 | name = "unicode-xid" 207 | version = "0.0.4" 208 | source = "registry+https://github.com/rust-lang/crates.io-index" 209 | 210 | [[package]] 211 | name = "unicode-xid" 212 | version = "0.1.0" 213 | source = "registry+https://github.com/rust-lang/crates.io-index" 214 | 215 | [[package]] 216 | name = "winapi" 217 | version = "0.3.6" 218 | source = "registry+https://github.com/rust-lang/crates.io-index" 219 | dependencies = [ 220 | "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 221 | "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 222 | ] 223 | 224 | [[package]] 225 | name = "winapi-i686-pc-windows-gnu" 226 | version = "0.4.0" 227 | source = "registry+https://github.com/rust-lang/crates.io-index" 228 | 229 | [[package]] 230 | name = "winapi-x86_64-pc-windows-gnu" 231 | version = "0.4.0" 232 | source = "registry+https://github.com/rust-lang/crates.io-index" 233 | 234 | [[package]] 235 | name = "zipf" 236 | version = "4.0.1" 237 | source = "registry+https://github.com/rust-lang/crates.io-index" 238 | dependencies = [ 239 | "rand 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", 240 | ] 241 | 242 | [metadata] 243 | "checksum abomonation 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f6b72851223d8747938515812ef24e3f678452a4e5201d61e1954ab378ad8601" 244 | "checksum abomonation_derive 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e0bb1889db0b87cd8ef839c56b5283e28c8db68cac904b400a6170c9af1e673c" 245 | "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" 246 | "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" 247 | "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" 248 | "checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" 249 | "checksum getopts 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "0a7292d30132fb5424b354f5dc02512a86e4c516fe544bb7a25e7f266951b797" 250 | "checksum libc 0.2.48 (registry+https://github.com/rust-lang/crates.io-index)" = "e962c7641008ac010fa60a7dfdc1712449f29c44ef2d4702394aea943ee75047" 251 | "checksum proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)" = "4d317f9caece796be1980837fd5cb3dfec5613ebdb04ad0956deea83ce168915" 252 | "checksum quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e920b65c65f10b2ae65c831a81a073a89edd28c7cce89475bff467ab4167a" 253 | "checksum quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)" = "cdd8e04bd9c52e0342b406469d494fcb033be4bdbe5c606016defbb1681411e1" 254 | "checksum rand 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c618c47cd3ebd209790115ab837de41425723956ad3ce2e6a7f09890947cacb9" 255 | "checksum rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" 256 | "checksum rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0e7a549d590831370895ab7ba4ea0c1b6b011d106b5ff2da6eee112615e6dc0" 257 | "checksum serde 1.0.87 (registry+https://github.com/rust-lang/crates.io-index)" = "2e20fde37801e83c891a2dc4ebd3b81f0da4d1fb67a9e0a2a3b921e2536a58ee" 258 | "checksum serde_derive 1.0.87 (registry+https://github.com/rust-lang/crates.io-index)" = "633e97856567e518b59ffb2ad7c7a4fd4c5d91d9c7f32dd38a27b2bf7e8114ea" 259 | "checksum syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad" 260 | "checksum syn 0.15.26 (registry+https://github.com/rust-lang/crates.io-index)" = "f92e629aa1d9c827b2bb8297046c1ccffc57c99b947a680d3ccff1f136a3bee9" 261 | "checksum synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a393066ed9010ebaed60b9eafa373d4b1baac186dd7e008555b0f702b51945b6" 262 | "checksum synstructure 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3a761d12e6d8dcb4dcf952a7a89b475e3a9d69e4a69307e01a470977642914bd" 263 | "checksum timely 0.8.0 (git+https://github.com/TimelyDataflow/timely-dataflow.git)" = "" 264 | "checksum timely_bytes 0.7.0 (git+https://github.com/TimelyDataflow/timely-dataflow.git)" = "" 265 | "checksum timely_communication 0.8.0 (git+https://github.com/TimelyDataflow/timely-dataflow.git)" = "" 266 | "checksum timely_logging 0.7.1 (git+https://github.com/TimelyDataflow/timely-dataflow.git)" = "" 267 | "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" 268 | "checksum unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8c1f860d7d29cf02cb2f3f359fd35991af3d30bac52c57d265a3c461074cb4dc" 269 | "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" 270 | "checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0" 271 | "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 272 | "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 273 | "checksum zipf 4.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a9d654b300e2eb573bb4bd25a1f7f00997b2caf3664f55b2755cd8b60565053e" 274 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dynamic_scaling_mechanism" 3 | version = "0.0.1" 4 | 5 | [features] 6 | default = ["bin-8"] 7 | 8 | bin-1 = [] 9 | bin-2 = [] 10 | bin-3 = [] 11 | bin-4 = [] 12 | bin-5 = [] 13 | bin-6 = [] 14 | bin-7 = [] 15 | bin-8 = [] 16 | bin-9 = [] 17 | bin-10 = [] 18 | bin-11 = [] 19 | bin-12 = [] 20 | bin-13 = [] 21 | bin-14 = [] 22 | bin-15 = [] 23 | bin-16 = [] 24 | bin-17 = [] 25 | bin-18 = [] 26 | bin-19 = [] 27 | bin-20 = [] 28 | 29 | fake_stateful = [] 30 | 31 | [dependencies] 32 | timely = { git = "https://github.com/TimelyDataflow/timely-dataflow.git" } 33 | #timely = { git = "https://github.com/antiguru/timely-dataflow.git", branch = "session_cease"} 34 | #timely = "0.8" 35 | #abomonation = { git = "https://github.com/frankmcsherry/abomonation.git" } 36 | abomonation = "^0.7" 37 | abomonation_derive = "^0.3" 38 | rand = "^0.5" 39 | fnv="1.0" 40 | zipf = "^4.0" 41 | 42 | [profile.release] 43 | # opt-level = 3 44 | debug = true 45 | # rpath = false 46 | # lto = false 47 | # codegen-units = 16 48 | # debug-assertions = false 49 | panic = "abort" 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017, 2018 ETH Zurich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /experiments/.gitignore: -------------------------------------------------------------------------------- 1 | venv/ 2 | __pycache__/ 3 | *.pyc 4 | gnuplot.script 5 | -------------------------------------------------------------------------------- /experiments/nexmark/HopcroftKarp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015, Sofiat Olaosebikan. All Rights Reserved 2 | 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation, either version 3 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program. If not, see . 15 | 16 | 17 | class HopcroftKarp(object): 18 | def __init__(self, graph): 19 | """ 20 | :param graph: an unweighted bipartite graph represented as a dictionary. 21 | Vertices in the left and right vertex set must have different labelling 22 | :return: a maximum matching of the given graph represented as a dictionary. 23 | """ 24 | self._matching = {} 25 | self._dfs_paths = [] 26 | self._dfs_parent = {} 27 | 28 | self._left = set(graph.keys()) 29 | self._right = set() 30 | 31 | for value in graph.values(): 32 | self._right.update(value) 33 | for vertex in self._left: 34 | for neighbour in graph[vertex]: 35 | if neighbour not in graph: 36 | graph[neighbour] = set() 37 | graph[neighbour].add(vertex) 38 | else: 39 | graph[neighbour].add(vertex) 40 | 41 | self._graph = graph 42 | 43 | def __bfs(self): 44 | layers = [] 45 | layer = set() 46 | for vertex in self._left: # for each free vertex in the left vertex set 47 | if vertex not in self._matching: # confirms that the vertex is free 48 | layer.add(vertex) 49 | layers.append(layer) 50 | visited = set() # to keep track of the visited vertices 51 | while True: 52 | # we take the most recent layer in the partitioning on every repeat 53 | layer = layers[-1] 54 | new_layer = set() # new list for subsequent layers 55 | for vertex in layer: 56 | if vertex in self._left: # if true, we traverse unmatched edges to vertices in right 57 | visited.add(vertex) 58 | for neighbour in self._graph[vertex]: 59 | # check if the neighbour is not already visited 60 | # check if vertex is matched or the edge between neighbour and vertex is not matched 61 | if neighbour not in visited and (vertex not in self._matching or neighbour != self._matching[vertex]): 62 | new_layer.add(neighbour) 63 | else: # we traverse matched edges to vertices in left 64 | visited.add(vertex) # we don't want to traverse the vertex again 65 | for neighbour in self._graph[vertex]: 66 | # check if the neighbour is not already visited 67 | # check if vertex is in the matching and if the edge between vertex and neighbour is matched 68 | if neighbour not in visited and (vertex in self._matching and neighbour == self._matching[vertex]): 69 | new_layer.add(neighbour) 70 | layers.append(new_layer) # we add the new layer to the set of layers 71 | # if new_layer is empty, we have to break the BFS while loop.... 72 | if len(new_layer) == 0: 73 | return layers # break 74 | # else, we terminate search at the first layer k where one or more free vertices in V are reached 75 | if any(vertex in self._right and vertex not in self._matching for vertex in new_layer): 76 | return layers # break 77 | # break 78 | 79 | # -------------------------------------------------------------------------------------------------------------- 80 | # if we are able to collate these free vertices, we run DFS recursively on each of them 81 | # this algorithm finds a maximal set of vertex disjoint augmenting paths of length k (shortest path), 82 | # stores them in P and increments M... 83 | # -------------------------------------------------------------------------------------------------------------- 84 | def __dfs(self, v, index, layers): 85 | """ 86 | we recursively run dfs on each vertices in free_vertex, 87 | 88 | :param v: vertices in free_vertex 89 | :return: True if P is not empty (i.e., the maximal set of vertex-disjoint alternating path of length k) 90 | and false otherwise. 91 | """ 92 | if index == 0: 93 | path = [v] 94 | while self._dfs_parent[v] != v: 95 | path.append(self._dfs_parent[v]) 96 | v = self._dfs_parent[v] 97 | self._dfs_paths.append(path) 98 | return True 99 | for neighbour in self._graph[v]: # check the neighbours of vertex 100 | if neighbour in layers[index - 1]: 101 | # if neighbour is in left, we are traversing unmatched edges.. 102 | if neighbour in self._dfs_parent: 103 | continue 104 | if (neighbour in self._left and (v not in self._matching or neighbour != self._matching[v])) or \ 105 | (neighbour in self._right and (v in self._matching and neighbour == self._matching[v])): 106 | self._dfs_parent[neighbour] = v 107 | if self.__dfs(neighbour, index-1, layers): 108 | return True 109 | return False 110 | 111 | def maximum_matching(self): 112 | while True: 113 | layers = self.__bfs() 114 | # we break out of the whole while loop if the most recent layer added to layers is empty 115 | # since if there are no vertices in the recent layer, then there is no way augmenting paths can be found 116 | if len(layers[-1]) == 0: 117 | break 118 | free_vertex = set([vertex for vertex in layers[-1] if vertex not in self._matching]) 119 | 120 | # the maximal set of vertex-disjoint augmenting path and parent dictionary 121 | # has to be cleared each time the while loop runs 122 | # self._dfs_paths.clear() - .clear() and .copy() attribute works for python 3.3 and above 123 | del self._dfs_paths[:] 124 | self._dfs_parent.clear() 125 | 126 | for vertex in free_vertex: # O(m) - every vertex considered once, each edge considered once 127 | # this creates a loop of the vertex to itself in the parent dictionary, 128 | self._dfs_parent[vertex] = vertex 129 | self.__dfs(vertex, len(layers)-1, layers) 130 | 131 | # if the set of paths is empty, nothing to add to the matching...break 132 | if len(self._dfs_paths) == 0: 133 | break 134 | 135 | # if not, we swap the matched and unmatched edges in the paths formed and add them to the existing matching. 136 | # the paths are augmenting implies the first and start vertices are free. Edges 1, 3, 5, .. are thus matched 137 | for path in self._dfs_paths: 138 | for i in range(len(path)): 139 | if i % 2 == 0: 140 | self._matching[path[i]] = path[i+1] 141 | self._matching[path[i+1]] = path[i] 142 | return self._matching 143 | -------------------------------------------------------------------------------- /experiments/nexmark/README.txt: -------------------------------------------------------------------------------- 1 | python3 -c 'import bench; bench.non_migrating(0)' --clusterpath /home/andreal/Src/dynamic-scaling-mechanism/nexmark --serverprefix andreal@fdr 2 | python3 -c 'import bench; bench.non_migrating(1)' --clusterpath /home/andreal/Src/dynamic-scaling-mechanism/nexmark --serverprefix andreal@fdr 3 | python3 -c 'import bench; bench.non_migrating(2)' --clusterpath /home/andreal/Src/dynamic-scaling-mechanism/nexmark --serverprefix andreal@fdr 4 | python3 -c 'import bench; bench.non_migrating(3)' --clusterpath /home/andreal/Src/dynamic-scaling-mechanism/nexmark --serverprefix andreal@fdr 5 | -------------------------------------------------------------------------------- /experiments/nexmark/experiments.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys, os, datetime 3 | from executor import execute 4 | import time 5 | import shlex 6 | 7 | is_worktree_clean = execute("cd `git rev-parse --show-toplevel`; git diff-index --quiet HEAD -- src/ Cargo.toml nexmark/src/ nexmark/Cargo.toml", check=False) 8 | 9 | if not is_worktree_clean: 10 | shall = input("Work directory dirty. Continue? (y/N) ").lower() == 'y' 11 | if not shall: 12 | sys.exit(1) 13 | 14 | current_commit = ("dirty-" if not is_worktree_clean else "") + execute("git rev-parse HEAD", capture=True) 15 | current_commit = current_commit[:16] 16 | 17 | def eprint(*args, level=None): 18 | attr = [] 19 | if level == "info": 20 | attr.append('1') 21 | attr.append('37') 22 | attr.append('4') 23 | elif level == "run": 24 | attr.append('32') 25 | print("[" + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "] ", '\x1b[%sm' % ';'.join(attr), *args, '\x1b[0m', file=sys.stderr, sep='') 26 | 27 | def ensure_dir(name): 28 | if not os.path.exists(name): 29 | os.makedirs(name) 30 | 31 | def wait_all(processes): 32 | for p in processes: 33 | # p.wait(use_spinner=False) 34 | while p.is_running: 35 | time.sleep(.1) 36 | p.check_errors() 37 | 38 | eprint("commit: {}".format(current_commit)) 39 | 40 | # assumes the experiment code is at this path on the cluster machine(s) 41 | cluster_src_path = None 42 | # cluster_server = "username@server" 43 | cluster_server = None 44 | 45 | def run_cmd(cmd, redirect=None, stderr=False, background=False, node="", dryrun=False): 46 | full_cmd = "cd {}; {}".format(cluster_src_path, cmd) 47 | # eprint("running on {}{}: {}".format(cluster_server, node, full_cmd)) 48 | # if redirect is not None and os.path.exists(redirect): 49 | # return execute("echo \"skipping {}\"".format(redirect), async=background) 50 | cmd = "ssh -o StrictHostKeyChecking=no -T {}{}.ethz.ch {}".format(cluster_server, node, shlex.quote(full_cmd))\ 51 | + (" > {}".format(shlex.quote(redirect)) if redirect else "")\ 52 | + (" 2> {}".format(shlex.quote(stderr)) if stderr else "") 53 | eprint("$ {}".format(cmd), level="run") 54 | if dryrun: 55 | return execute("echo dryrun {}".format(node), async=background) 56 | else: 57 | return execute(cmd, async=background) 58 | -------------------------------------------------------------------------------- /experiments/nexmark/patterns.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys, math, os 4 | from collections import namedtuple, defaultdict 5 | from HopcroftKarp import HopcroftKarp 6 | 7 | class InitialPattern(object): 8 | 9 | def __init__(self, bin_shift, workers): 10 | self._bin_shift = bin_shift 11 | self._workers = workers 12 | 13 | def generate_uniform(self): 14 | return [i % self._workers for i in range(2**self._bin_shift)] 15 | 16 | def generate_uniform_skew(self): 17 | generator = InitialPattern(self._bin_shift, self._workers) 18 | map = generator.generate_half() 19 | map = map[:len(map)//2] 20 | map.extend([i % self._workers for i in range(2**(self._bin_shift - 1), 2**self._bin_shift)]) 21 | return map 22 | 23 | def generate_half(self): 24 | return [(i // 2 * 2 + i % 2 * self._workers // 2) % self._workers for i in range(2**self._bin_shift)] 25 | 26 | class MigrationPattern(object): 27 | 28 | def __init__(self, current_map, target_map): 29 | self._current_map = current_map 30 | self._target_map = target_map 31 | 32 | def generate(self): 33 | pass 34 | 35 | class SuddenMigrationPattern(MigrationPattern): 36 | 37 | def generate(self): 38 | yield ("map", self._target_map) 39 | 40 | class FluidMigrationPattern(MigrationPattern): 41 | 42 | def generate(self): 43 | current_map = self._current_map 44 | for (i, (src, dst)) in enumerate(zip(current_map, self._target_map)): 45 | if src != dst: 46 | yield ("diff", {i: dst}) 47 | 48 | class BatchedFluidMigrationPattern(MigrationPattern): 49 | 50 | def generate(self): 51 | current_map = self._current_map.copy() 52 | # Migration as graph representation 53 | graph = defaultdict(set) 54 | # Remember edge labels. (src, dst) -> [bin] 55 | labels = defaultdict(list) 56 | for (i, (src, dst)) in enumerate(zip(self._current_map, self._target_map)): 57 | if src != dst: 58 | graph[src].add(str(dst)) 59 | labels[src, str(dst)].append(i) 60 | while True: 61 | # Compute maximum matching 62 | # Need to copy graph as graph will me modified 63 | matching = HopcroftKarp(graph.copy()).maximum_matching() 64 | # Diffs 65 | diffs = {} 66 | # Filter reverse matchings and update `current_map` at matching positions 67 | for src, dst in list(matching.items()): 68 | # Filter reverse matchings 69 | if isinstance(src, str): 70 | del matching[src] 71 | else: 72 | # The matching belongs to an edge in the migration graph 73 | if len(labels[src, dst]) > 0: 74 | # Determine the entry number 75 | entry = labels[src, dst].pop() 76 | # update map 77 | dst = int(dst) 78 | current_map[entry] = dst 79 | diffs[entry] = dst 80 | elif len(labels[src, dst]) == 0: 81 | # remove edge from graph 82 | graph[src].remove(dst) 83 | # We are done if there are no more matchings 84 | if len(diffs) == 0: 85 | break 86 | # Emit diffs 87 | yield ("diff", diffs) 88 | 89 | class PatternGenerator(object): 90 | 91 | def __init__(self, migration_pattern, initial_pattern, target_pattern): 92 | self._migration_pattern = migration_pattern 93 | self._initial_pattern = initial_pattern 94 | self._target_pattern = target_pattern 95 | 96 | def write_pattern(self, file, pattern, time): 97 | file.write("M ") 98 | file.write(str(time)) 99 | file.write(" ") 100 | for w in pattern: 101 | file.write(str(w)) 102 | file.write(" ") 103 | file.write('\n') 104 | 105 | def write_diff(self, file, pattern, time): 106 | file.write("D ") 107 | file.write(str(time)) 108 | file.write(" ") 109 | for b, w in sorted(pattern.items()): 110 | file.write(str(b)) 111 | file.write(" ") 112 | file.write(str(w)) 113 | file.write(" ") 114 | file.write('\n') 115 | 116 | def write(self, file, time): 117 | generator = self._migration_pattern(self._initial_pattern, self._target_pattern) 118 | i = 1 119 | for (type, pattern) in generator.generate(): 120 | i += 1 121 | if type == "diff": 122 | self.write_diff(file, pattern, time) 123 | elif type == "map": 124 | self.write_pattern(file, pattern, time) 125 | else: 126 | raise ValueError("Incorrect type: {}".format(type)) 127 | 128 | # for migration in BatchedFluidMigrationPattern([0,0,2,2], [0,1,2,3]).generate(): 129 | # print(migration) 130 | # for migration in BatchedFluidMigrationPattern([0,1,1,2,3], [0,1,2,2,3]).generate(): 131 | # print(migration) 132 | # for migration in BatchedFluidMigrationPattern([0,1,1,2,2,3], [1,1,2,1,2,3]).generate(): 133 | # print(migration) 134 | # print("---") 135 | # for migration in FluidMigrationPattern([0,1,1,2,2,3], [1,1,2,1,2,3]).generate(): 136 | # print(migration) 137 | # 138 | # print(InitialPattern(5, 16).generate_uniform()) 139 | # print(InitialPattern(5, 4).generate_half()) 140 | # # pattern_generator = InitialPattern(20, 32) 141 | # pattern_generator = InitialPattern(5, 4) 142 | # print("---") 143 | # initial_pattern = pattern_generator.generate_uniform() 144 | # generator = PatternGenerator(BatchedFluidMigrationPattern, initial_pattern, pattern_generator.generate_uniform_skew()) 145 | # generator.write(sys.stdout) 146 | 147 | -------------------------------------------------------------------------------- /experiments/nexmark/plot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys, os, json, itertools 4 | from collections import defaultdict 5 | 6 | def parse_filename(name): 7 | kvs = name.rstrip().split('+') 8 | def parsekv(kv): 9 | k, v = kv.split('=') 10 | if v.isdigit(): 11 | v = int(v) 12 | elif v == "True": 13 | v = True 14 | elif v == "False": 15 | v = False 16 | return (k, v) 17 | return (name, list(parsekv(kv) for kv in kvs)) 18 | 19 | def get_all_params(ps): 20 | result = defaultdict(set) 21 | for k, v in list(itertools.chain.from_iterable(ps)): 22 | result[k].add(v) 23 | return {k: sorted(v) for k, v in result.items()} 24 | 25 | def ensure_dir(name): 26 | if not os.path.exists(name): 27 | os.makedirs(name) 28 | 29 | def kv_to_string(config): 30 | keys = sorted(config.keys()) 31 | kv_pairs = [] 32 | for key in keys: 33 | 34 | value = config[key] 35 | if isinstance(value, (str, int)): 36 | kv_pairs.append((key, value)) 37 | else: 38 | kv_pairs.append((key, "|".join(value))) 39 | return "+".join(map(lambda p: "{}={}".format(p[0], p[1]), kv_pairs)) 40 | 41 | def kv_to_name(config): 42 | kv_pairs = [] 43 | for key, value in sorted(config): 44 | 45 | if isinstance(value, (str, int, float)): 46 | kv_pairs.append((key, value)) 47 | else: 48 | kv_pairs.append((key, ", ".join(value))) 49 | return "; ".join(map(lambda p: "{}: {}".format(p[0], p[1]), kv_pairs)) 50 | 51 | def get_files(results_dir): 52 | def is_done(p): 53 | return os.path.exists("{}/{}/done".format(results_dir, p)) 54 | files = [parse_filename(x) for x in os.listdir(results_dir) if is_done(x)] 55 | # print("all params:", get_all_params(list(zip(*files))[1]), file=sys.stderr) 56 | return files 57 | 58 | def _filtering_params(files, filtering): 59 | all_params = get_all_params(x[1] for x in files) 60 | single_params_value = set(x[0] for x in all_params.items() if len(x[1]) == 1) 61 | filtering_params = set(x[0] for x in filtering) 62 | for additional_filtering_param in single_params_value.difference(filtering_params): 63 | filtering.append((additional_filtering_param, all_params[additional_filtering_param][0])) 64 | return filtering 65 | 66 | def latency_plots(results_dir, files, filtering): 67 | filtering = _filtering_params(files, filtering) 68 | 69 | def experiment_name(experiment_dict): 70 | if not experiment_dict.get("queries", "").endswith("-flex"): 71 | return "Optimized" 72 | elif experiment_dict.get('fake_stateful', False): 73 | return "Non-stateful" 74 | else: 75 | return experiment_dict.get('migration', 'fluid') 76 | 77 | data = [] 78 | experiments = [] 79 | for filename, config in [x for x in sorted(files, key=lambda x: x[1]) if set(x[1]).issuperset(set(filtering))]: 80 | # print(filename) 81 | experiment_dict = dict(set(config).difference(set(filtering))) 82 | experiments.append(sorted(experiment_dict.items())) 83 | try: 84 | with open("{}/{}/stdout.0".format(results_dir, filename), 'r') as f: 85 | experiment_data = [dict(list({ 86 | "latency": int(x) / 1000000, 87 | "ccdf": float(y), 88 | "experiment": experiment_name(experiment_dict), 89 | }.items()) + list(experiment_dict.items())) for x, y in 90 | [x.split('\t')[1:3] for x in f.readlines() if x.startswith('latency_ccdf')]] 91 | data.append(experiment_data) 92 | except IOError as e: 93 | print("Unexpected error:", e) 94 | pass 95 | 96 | return (filtering, data, experiments) 97 | 98 | def memory_timeline_plots(results_dir, files, filtering): 99 | filtering = _filtering_params(files, filtering) 100 | 101 | data = [] 102 | experiments = [] 103 | for filename, config in [x for x in sorted(files, key=lambda x: x[1]) if set(x[1]).issuperset(set(filtering))]: 104 | experiment_dict = dict(set(config).difference(set(filtering))) 105 | experiments.append(sorted(experiment_dict.items())) 106 | try: 107 | with open("{}/{}/stdout.0".format(results_dir, filename), 'r') as f: 108 | experiment_data = [dict(list({ 109 | "time": float(x) / 1000000000, 110 | "RSS": float(y), 111 | "experiment": "m: {}, q: {}, r: {}".format(experiment_dict.get('migration', "None"), experiment_dict.get('queries', ""), experiment_dict.get('rate', 0)), 112 | }.items()) + list(experiment_dict.items())) for x, y in 113 | [x.split('\t')[1:3] for x in f.readlines() if x.startswith('statm_RSS')]] 114 | # data.extend(experiment_data[0::10]) 115 | data.append(experiment_data) 116 | except IOError as e: 117 | print("Unexpected error:", e) 118 | pass 119 | 120 | return (filtering, data, experiments) 121 | 122 | def latency_timeline_plots(results_dir, files, filtering): 123 | filtering = _filtering_params(files, filtering) 124 | # print(filtering) 125 | # [0.75, 0.50, 0.25, 0.05, 0.01, 0.001, 0.0] 126 | 127 | data = [] 128 | experiments = [] 129 | for filename, config in [x for x in sorted(files, key=lambda x: x[1]) if set(x[1]).issuperset(set(filtering))]: 130 | experiment_dict = dict(set(config).difference(set(filtering))) 131 | experiments.append(sorted(experiment_dict.items())) 132 | experiment_data = [] 133 | try: 134 | with open("{}/{}/stdout.0".format(results_dir, filename), 'r') as f: 135 | for vals in [x.split('\t')[1:] for x in f.readlines() if x.startswith('summary_timeline')]: 136 | # for p, l in [(.25, 1), (.5, 2), (.75, 3), (.99, 4), (.999, 5), (1, 6)]: 137 | for p, l in [(.25, 1), (.5, 2), (.99, 4), (1, 6)]: 138 | experiment_data.append(dict(list({ 139 | "time": float(vals[0]) / 1000000000, 140 | "latency": int(vals[l]) / 1000000, 141 | "p": p, 142 | "experiment": "m: {}, r: {}, f: {}".format(experiment_dict.get('migration', "?"), experiment_dict.get('rate', 0), experiment_dict.get('fake_stateful', False)), 143 | }.items()) + list(experiment_dict.items()))) 144 | data.append(experiment_data) 145 | except IOError as e: 146 | print("Unexpected error:", e) 147 | pass 148 | 149 | return (filtering, data, experiments) 150 | 151 | def latency_breakdown_plots(results_dir, files, filtering): 152 | filtering = _filtering_params(files, filtering) 153 | # print(filtering) 154 | # [0.75, 0.50, 0.25, 0.05, 0.01, 0.001, 0.0] 155 | 156 | data = [] 157 | experiments = [] 158 | for filename, config in [x for x in sorted(files, key=lambda x: x[1]) if set(x[1]).issuperset(set(filtering))]: 159 | experiment_dict = dict(set(config).difference(set(filtering))) 160 | experiments.append(sorted(experiment_dict.items())) 161 | experiment_data = [] 162 | try: 163 | with open("{}/{}/stdout.0".format(results_dir, filename), 'r') as f: 164 | filtered_max_latency = [0 for _ in range(6)] 165 | max_latency = [0 for _ in range(6)] 166 | lines = [x.strip().split('\t') for x in f.readlines()] 167 | median = "undef" 168 | control_times = [] 169 | for vals in lines: 170 | if vals[0].startswith('latency_ccdf'): 171 | if float(vals[2]) <= 0.7: 172 | median = int(vals[1]) 173 | break 174 | elif vals[0].startswith('control_time'): 175 | control_times.append(int(vals[1])) 176 | control_times.reverse() 177 | # print(median, file=sys.stderr) 178 | duration = 0 179 | migration_duration = 0 180 | migration_max = 0 181 | last_vals = None 182 | sample_interval = None 183 | ignore_count = 2 184 | last_was_migrating = False 185 | cached_vals = [] 186 | consider_measurement = False 187 | # print(control_times, file=sys.stderr) 188 | for vals in lines: 189 | if vals[0].startswith("migration_done"): 190 | # migration_done timestamp duration 191 | migration_duration += int(vals[2]) 192 | migration_max = max(migration_max, int(vals[2])) 193 | if vals[0].startswith('summary_timeline'): 194 | time = int(vals[1]) 195 | if sample_interval is None and last_vals is not None: 196 | sample_interval = time - int(last_vals[1]) 197 | if sample_interval is not None and len(control_times) > 0: 198 | if control_times[-1] + sample_interval <= time: 199 | # print(control_times[-1], sample_interval, time, file=sys.stderr) 200 | control_time = control_times.pop() 201 | # print(vals, file=sys.stderr) 202 | # for i in range(0, len(max_latency)): 203 | # v = int(vals[i + 2]) 204 | # max_latency[i] = max(max_latency[i], v) 205 | # if consider_measurement: 206 | # duration += int(vals[1]) - int(last_vals[1]) 207 | # else: 208 | # duration += int(vals[1]) - control_time 209 | consider_measurement = True 210 | if consider_measurement: 211 | if int(vals[3]) > 2 * median: 212 | # print(vals, file=sys.stderr) 213 | for i in range(0, len(max_latency)): 214 | v = int(vals[i + 2]) 215 | max_latency[i] = max(max_latency[i], v) 216 | duration += int(vals[1]) - int(last_vals[1]) 217 | else: 218 | consider_measurement = False 219 | # print(vals, file=sys.stderr) 220 | # if last_vals is not None: # and int(vals[1]) * 2 > int(lines[-1][1]): 221 | # # print(vals, migration_start, migration_end, max_latency) 222 | # # print(vals, median, file=sys.stderr) 223 | # if int(vals[3]) > 2 * median: 224 | # last_was_migrating = True 225 | # if ignore_count == 0: 226 | # # print(vals, file=sys.stderr) 227 | # for i in range(0, len(max_latency)): 228 | # v = int(vals[i + 2]) 229 | # filtered_max_latency[i] = max(filtered_max_latency[i], v) 230 | # else: 231 | # ignore_count -= 1 232 | # cached_vals.append(vals) 233 | # for i in range(0, len(max_latency)): 234 | # v = int(vals[i + 2]) 235 | # max_latency[i] = max(max_latency[i], v) 236 | # duration += int(vals[1]) - int(last_vals[1]) 237 | # elif last_was_migrating: 238 | # ignore_count = 2 239 | # last_was_migrating = False 240 | last_vals = vals 241 | 242 | # print(duration, max_latency, file=sys.stderr) 243 | if duration > 0 or migration_duration > 0: 244 | norm = 1000000000 245 | experiment_data.append(dict(list({ 246 | "migration_duration": duration/norm, 247 | "precise_duration": migration_duration/norm, 248 | "precise_max": migration_max/norm, 249 | "max_p_.25": max_latency[0]/norm, 250 | "max_p_.5": max_latency[1]/norm, 251 | "max_p_.75": max_latency[2]/norm, 252 | "max_p_.99": max_latency[3]/norm, 253 | "max_p_.999": max_latency[4]/norm, 254 | "max_p_1": max_latency[5]/norm, 255 | "filtered_max_p_.25": filtered_max_latency[0]/norm, 256 | "filtered_max_p_.5": filtered_max_latency[1]/norm, 257 | "filtered_max_p_.75": filtered_max_latency[2]/norm, 258 | "filtered_max_p_.99": filtered_max_latency[3]/norm, 259 | "filtered_max_p_.999": filtered_max_latency[4]/norm, 260 | "filtered_max_p_1": filtered_max_latency[5]/norm, 261 | "experiment": "m: {}, r: {}, f: {}".format(experiment_dict.get('migration', "?"), experiment_dict.get('rate', 0), experiment_dict.get('fake_stateful', False)), 262 | }.items()) + list(experiment_dict.items()))) 263 | data.append(experiment_data) 264 | except IOError as e: 265 | print("Unexpected error:", e) 266 | pass 267 | 268 | return (filtering, data, experiments) 269 | 270 | def plot_name(base_name): 271 | return os.path.basename(base_name)[:-3] 272 | 273 | def quote_str(s): 274 | if isinstance(s, str): 275 | return "{}".format(s) 276 | return str(s) 277 | 278 | if __name__ == "__main__" and len(sys.argv) >= 3 and sys.argv[1] == '--list-params': 279 | results_dir = sys.argv[2] 280 | print(json.dumps(get_all_params(x[1] for x in get_files(results_dir)))) 281 | -------------------------------------------------------------------------------- /experiments/nexmark/plot_bin_shift_cdf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # ./plot_latency_timeline.py results/98f4e2fa2e8bc839/ "[ ('bin_shift', 8), ('duration', 120), ('machine_local', True), ('processes', 2), ('workers', 8), ]" 4 | 5 | import argparse 6 | import json 7 | import os 8 | import sys 9 | 10 | from collections import defaultdict 11 | 12 | import plot 13 | 14 | parser = argparse.ArgumentParser(description="Plot") 15 | parser.add_argument('results_dir') 16 | parser.add_argument('filtering') 17 | parser.add_argument('--json', action='store_true') 18 | parser.add_argument('--gnuplot', action='store_true') 19 | parser.add_argument('--terminal', default="pdf") 20 | args = parser.parse_args() 21 | 22 | results_dir = args.results_dir 23 | files = plot.get_files(results_dir) 24 | filtering = eval(args.filtering) 25 | 26 | all_data = [] 27 | all_filtering = defaultdict(set) 28 | for bin_shift in range(4, 21): 29 | if bin_shift % 2 == 1: 30 | continue 31 | filtering_bin_shift = list(filtering) 32 | filtering_bin_shift.append(('bin_shift', bin_shift)) 33 | graph_filtering, data, experiments = plot.latency_plots(results_dir, files, filtering_bin_shift) 34 | for ds in data: 35 | for d in ds: 36 | d['bin_shift'] = bin_shift 37 | if d['queries'].endswith('-flex'): 38 | d['queries'] = d['queries'][:-5] 39 | else: 40 | # d['experiment'] = "" 41 | d['bin_shift'] = "Native" 42 | all_data.extend(data) 43 | for k, vs in graph_filtering.items(): 44 | for v in vs: 45 | all_filtering[k].add(v) 46 | graph_filtering = {} 47 | for k, v in all_filtering.items(): 48 | if len(v) == 1: 49 | graph_filtering[k] = next(iter(v)) 50 | else: 51 | graph_filtering[k] = ",".join(map(str, sorted(v))) 52 | graph_filtering = list(x for x in graph_filtering.items() if x[0] is not 'bin_shift') 53 | 54 | # print(graph_filtering, file=sys.stderr) 55 | data = all_data 56 | 57 | # Try to extract duration from filter pattern 58 | duration = next((x[1] for x in filtering if x[0] == 'duration'), 450) 59 | 60 | vega_lite = { 61 | "$schema": "https://vega.github.io/schema/vega-lite/v2.json", 62 | "hconcat": [ 63 | { 64 | "mark": "line", 65 | "encoding": { 66 | "x": { "field": "latency", "type": "quantitative", "axis": { "format": "e", "labelAngle": -90 }, "scale": { "type": "log" }}, 67 | "y": { "field": "ccdf", "type": "quantitative", "scale": { "type": "log" }}, 68 | # "row": { "field": "experiment", "type": "nominal" }, 69 | "column": { "field": "queries", "type": "nominal" }, 70 | "stroke": { "field": "bin_shift", "type": "nominal", "legend": None }, 71 | "shape": { "field": "bin_shift", "type": "nominal", "legend": None } 72 | } 73 | }, 74 | { 75 | "mark": "point", 76 | "encoding": { 77 | "shape": { "field": "bin_shift", "aggregate": "min", "type": "nominal", "legend": None }, 78 | "fill": { "field": "bin_shift", "aggregate": "min", "type": "nominal", "legend": None }, 79 | "y": { "field": "bin_shift", "type": "nominal", "title": None, 80 | # "axis": { "labels": False, "labelFont": "Times" } 81 | } 82 | } 83 | } 84 | ], 85 | "config": { 86 | # "text": { 87 | # "font": "Helvetica Neue", 88 | # }, 89 | "axis": { 90 | "labelFont": "monospace", 91 | "labelFontSize": 20, 92 | "titleFont": "cursive", 93 | "titleFontSize": 30, 94 | "titlePadding": 20 95 | }, 96 | "title": { 97 | "font": "monospace", 98 | "fontSize": 40, 99 | }, 100 | }, 101 | "data": { 102 | "values": [d for ds in data for d in ds] 103 | } 104 | }; 105 | 106 | title = plot.kv_to_name(dict(graph_filtering)) 107 | 108 | if args.json: 109 | extension = "json" 110 | elif args.gnuplot: 111 | extension = "gp" 112 | else: 113 | extension = "html" 114 | vega_lite["title"] = title 115 | 116 | 117 | html = """ 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 |
130 | 131 | 157 | 158 | 159 | """ 160 | 161 | 162 | commit = results_dir.rstrip('/').split('/')[-1] 163 | # print("commit:", commit, file=sys.stderr) 164 | 165 | plot.ensure_dir("charts/{}".format(commit)) 166 | 167 | graph_filename = "{}+{}.{}".format(plot.plot_name(__file__), plot.kv_to_string(dict(graph_filtering)), extension) 168 | chart_filename = "charts/{}/{}".format(commit, graph_filename) 169 | 170 | def sort_mixed(x): 171 | if isinstance(x, str): 172 | return -1 173 | else: 174 | return x 175 | 176 | with open(chart_filename, 'w') as c: 177 | if args.json: 178 | print(json.dumps(vega_lite), file=c) 179 | if args.gnuplot: 180 | all_headers = set() 181 | all_bin_shifts = set() 182 | 183 | for ds in data: 184 | for d in ds: 185 | for k in d.keys(): 186 | all_headers.add(k) 187 | all_bin_shifts.add(d["bin_shift"]) 188 | # all_headers.remove("bin_shift") 189 | all_headers = sorted(all_headers) 190 | graph_filtering_bin_shift = dict(graph_filtering) 191 | with open(chart_filename, 'w') as c: 192 | for bin_shift in sorted(all_bin_shifts, key=sort_mixed): 193 | print('"bin shift {}"'.format(bin_shift), file=c) 194 | print(" ".join(all_headers), file=c) 195 | for ds in data: 196 | for d in ds: 197 | if d["bin_shift"] == bin_shift: 198 | print(" ".join(map(plot.quote_str, [d[k] for k in all_headers])), file=c) 199 | print("\n", file=c) 200 | else: 201 | print(html, file=c) 202 | 203 | 204 | if args.gnuplot: 205 | gnuplot_terminal = args.terminal 206 | gnuplot_filename = "{}+{}.{}".format(plot.plot_name(__file__), plot.kv_to_string(dict(graph_filtering)), "gnuplot") 207 | gnuplot_filename = "charts/{}/{}".format(commit, gnuplot_filename) 208 | gnuplot_out_filename = "{}+{}.{}".format(plot.plot_name(__file__), plot.kv_to_string(dict(graph_filtering)), gnuplot_terminal) 209 | gnuplot_out_filename = "charts/{}/{}".format(commit, gnuplot_out_filename) 210 | ccdf_index = all_headers.index("ccdf") + 1 211 | latency_index = all_headers.index("latency") + 1 212 | bin_shift_index = all_headers.index("bin_shift") + 1 213 | 214 | # fix long titles 215 | if len(title) > 79: 216 | idx = title.find(" ", int(len(title) / 2)) 217 | if idx != -1: 218 | title = "{}\\n{}".format(title[:idx], title[idx:]) 219 | with open(gnuplot_filename, 'w') as c: 220 | print("""\ 221 | set terminal {gnuplot_terminal} font \"LinuxLibertine, 10\" 222 | set logscale x 223 | set logscale y 224 | 225 | set for [i=1:9] linetype i dashtype i 226 | 227 | set format x "10^{{%T}}" 228 | set format y "10^{{%T}}" 229 | set grid xtics ytics 230 | 231 | set xlabel "Latency [ns]" 232 | set ylabel "CCDF" 233 | set title "{title}" 234 | 235 | set key left bottom box 236 | 237 | set output '{gnuplot_out_filename}' 238 | stats '{chart_filename}' using 0 nooutput 239 | plot for [i=0:(STATS_blocks - 1)] '{chart_filename}' using {latency_index}:{ccdf_index} index i title column({bin_shift_index}) with points 240 | """.format(chart_filename=chart_filename, 241 | gnuplot_terminal=gnuplot_terminal, 242 | gnuplot_out_filename=gnuplot_out_filename, 243 | latency_index=latency_index, 244 | ccdf_index=ccdf_index, 245 | bin_shift_index=bin_shift_index, 246 | title=title.replace("_", "\\\\_"), 247 | ), file=c) 248 | print(gnuplot_filename) 249 | print(os.getcwd() + "/" + gnuplot_filename, file=sys.stderr) 250 | else: 251 | print(chart_filename) 252 | print(os.getcwd() + "/" + chart_filename, file=sys.stderr) 253 | -------------------------------------------------------------------------------- /experiments/nexmark/plot_latency_breakdown.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # ./plot_latency_timeline.py results/98f4e2fa2e8bc839/ "[ ('bin_shift', 8), ('duration', 120), ('machine_local', True), ('processes', 2), ('workers', 8), ]" 4 | 5 | import sys, os, shutil, json 6 | import argparse 7 | import plot 8 | from collections import defaultdict 9 | 10 | parser = argparse.ArgumentParser(description="Plot") 11 | parser.add_argument('results_dir') 12 | parser.add_argument('filtering') 13 | parser.add_argument('primary_group') 14 | parser.add_argument('secondary_group') 15 | parser.add_argument('--json', action='store_true') 16 | parser.add_argument('--gnuplot', action='store_true') 17 | parser.add_argument('--terminal', default="pdf") 18 | parser.add_argument('--filter', nargs='+', default=[]) 19 | parser.add_argument('--rename', default="{}") 20 | parser.add_argument('--name') 21 | args = parser.parse_args() 22 | print(args, file=sys.stderr) 23 | 24 | results_dir = args.results_dir 25 | files = plot.get_files(results_dir) 26 | # for f in files: 27 | # print(f[1]) 28 | filtering = eval(args.filtering) 29 | rename = eval(args.rename) 30 | 31 | if len(args.filter) > 0: 32 | graph_filtering = [] 33 | data = [] 34 | experiments = [] 35 | for filter in args.filter: 36 | filter = eval(filter) 37 | # print(filtering+filter, file=sys.stderr) 38 | gf, da, ex = plot.latency_breakdown_plots(results_dir, files, filtering + filter) 39 | for f in ex: 40 | # print(f, file=sys.stderr) 41 | f.extend(filter) 42 | graph_filtering.extend(gf) 43 | data.extend(da) 44 | experiments.extend(ex) 45 | # print("experiments", experiments, file=sys.stderr) 46 | else: 47 | graph_filtering, data, experiments = plot.latency_breakdown_plots(results_dir, files, filtering) 48 | # print(type(graph_filtering), type(data), type(experiments), file=sys.stderr) 49 | 50 | commit = results_dir.rstrip('/').split('/')[-1] 51 | # print("commit:", commit, file=sys.stderr) 52 | 53 | if len(data) == 0: 54 | print("No data found: {}".format(filtering), file=sys.stderr) 55 | exit(0) 56 | 57 | 58 | # Try to extract duration from filter pattern 59 | duration = next((x[1] for x in filtering if x[0] == 'duration'), 450) 60 | 61 | plot.ensure_dir("charts/{}".format(commit)) 62 | 63 | if args.json: 64 | extension = "json" 65 | elif args.gnuplot: 66 | extension = "gnuplot" 67 | else: 68 | extension = "html" 69 | 70 | plot_name = plot.kv_to_string(dict(graph_filtering)) 71 | 72 | def get_chart_filename(extension): 73 | name = "" 74 | if args.name: 75 | name = "_{}".format(args.name) 76 | graph_filename = "{}{}+{}.{}".format(plot.plot_name(__file__), name, plot_name, extension) 77 | return "charts/{}/{}".format(commit, graph_filename) 78 | 79 | chart_filename = get_chart_filename(extension) 80 | # title = ", ".join("{}: {}".format(k, v) for k, v in sorted(graph_filtering.items(), key=lambda t: t[0])) 81 | title = plot.kv_to_name(graph_filtering) 82 | 83 | # for ds in data: 84 | # print(ds) 85 | 86 | if args.gnuplot: 87 | 88 | def get_key(config, key): 89 | for k, v in config: 90 | if k == key: 91 | return v 92 | return None 93 | 94 | # Generate dataset 95 | all_headers = set() 96 | 97 | all_primary = defaultdict(list) 98 | all_secondary = defaultdict(list) 99 | 100 | for ds in data: 101 | for d in ds: 102 | for k in d.keys(): 103 | all_headers.add(k) 104 | for config, ds in zip(iter(experiments), iter(data)): 105 | all_primary[get_key(config, args.primary_group)].append((config, ds)) 106 | all_secondary[get_key(config, args.secondary_group)].append((config, ds)) 107 | 108 | if len(all_primary) < 1 or len(all_secondary) < 1: 109 | print("nothing found", file=sys.stderr) 110 | print("all_primary: {}, all_secondary={}".format(all_primary.keys(), all_secondary.keys()), file=sys.stderr) 111 | exit(0) 112 | 113 | all_headers.remove("experiment") 114 | all_headers = sorted(all_headers) 115 | 116 | migration_to_index = defaultdict(list) 117 | 118 | graph_filtering_bin_shift = dict(graph_filtering) 119 | 120 | dataset_filename = get_chart_filename("dataset") 121 | 122 | all_configs = [] 123 | 124 | def format_key(group, key): 125 | if group == "domain": 126 | return "{}M".format(int(key/1000000)) 127 | if key in rename: 128 | key = rename[key] 129 | return str(key) 130 | 131 | with open(dataset_filename, 'w') as c: 132 | index = 0 133 | # print(" ".join(all_headers), file=c) 134 | for key, item in sorted(all_primary.items()): 135 | key = format_key(args.primary_group, key) 136 | print("\"{}\"".format(str(key).replace("_", "\\\\_")), file=c) 137 | for config, ds in item: 138 | all_configs.append(config) 139 | # print("\"{}\"".format(plot.kv_to_name(config).replace("_", "\\\\_")), file=c) 140 | for d in ds: 141 | print(" ".join(map(plot.quote_str, [d[k] for k in all_headers])), file=c) 142 | # print("\n", file=c) 143 | index += 1 144 | print("\n", file=c) 145 | 146 | for key, item in sorted(all_secondary.items()): 147 | key = format_key(args.secondary_group, key) 148 | print("\"{}\"".format(str(key).replace("_", "\\\\_")), file=c) 149 | for config, ds in item: 150 | all_configs.append(config) 151 | # print("\"{}\"".format(plot.kv_to_name(config).replace("_", "\\\\_")), file=c) 152 | for d in ds: 153 | print(" ".join(map(plot.quote_str, [d[k] for k in all_headers])), file=c) 154 | print("\n", file=c) 155 | # index += 1 156 | 157 | config_reorder = sorted(map(lambda x: (x[1], x[0]), enumerate(all_configs))) 158 | 159 | # Generate gnuplot script 160 | gnuplot_terminal = args.terminal 161 | gnuplot_out_filename = get_chart_filename(gnuplot_terminal) 162 | duration_index = all_headers.index("migration_duration") + 1 163 | p_index = all_headers.index("max_p_1") + 1 164 | p50_index = all_headers.index("max_p_.5") + 1 165 | filtered_p_index = all_headers.index("filtered_max_p_1") + 1 166 | precise_duration_index = all_headers.index("precise_duration") + 1 167 | precise_max_index = all_headers.index("precise_max") + 1 168 | # fix long titles 169 | if len(title) > 79: 170 | idx = title.find(" ", int(len(title) / 2)) 171 | if idx != -1: 172 | title = "{}\\n{}".format(title[:idx], title[idx:]) 173 | with open(chart_filename, 'w') as c: 174 | print("""\ 175 | set terminal {gnuplot_terminal} font \"TimesNewRoman, 20\" enhanced dashed size 5.3, 3.7 crop 176 | set logscale y 177 | set logscale x 178 | 179 | # set format y "10^{{%T}}" 180 | set grid xtics ytics 181 | 182 | set size square 183 | 184 | # set xrange [10**7:10**11] 185 | 186 | set output '{gnuplot_out_filename}' 187 | stats '{dataset_filename}' using 0 nooutput 188 | if (STATS_blocks == 0) exit 189 | set for [i=1:STATS_blocks] linetype i dashtype i 190 | # set for [i=1:STATS_blocks] linetype i dashtype i 191 | # set title "{title}" 192 | # set yrange [10**floor(log10(STATS_min)): 10**ceil(log10(STATS_max))] 193 | set xrange [10**-1:10**3] 194 | set yrange [5*10**-3: 2*10**2] 195 | 196 | set ylabel "Max latency [s]" 197 | set xlabel "Duration [s]" 198 | set multiplot 199 | set origin 0, 0 200 | set lmargin 0 201 | set rmargin at screen .70 202 | set key title "{key1}" right outside top width 3 #Left at screen 1, .95 203 | plot for [i={index}:*] '{dataset_filename}' using {duration_index}:{p_index} index (i+0) with points title columnheader(1) 204 | set key title "{key2}" right outside bottom width 3 #Right at screen 1, .5 205 | # unset grid 206 | set xlabel " " 207 | set ylabel " " 208 | plot for [i=0:{index}-1] '{dataset_filename}' using {duration_index}:{p_index} index (i+0) with lines title columnheader(1) 209 | unset multiplot 210 | 211 | set ylabel "Max latency [s]" 212 | set xlabel "Duration [s]" 213 | set multiplot 214 | set origin 0, 0 215 | set rmargin at screen .70 216 | set key title "{key1}" right outside top width 3 #Left at screen 1, .95 217 | plot for [i={index}:*] '{dataset_filename}' using {precise_duration_index}:{precise_max_index} index (i+0) with points title columnheader(1) 218 | set key title "{key2}" right outside bottom width 3 #Right at screen 1, .5 219 | # unset grid 220 | set xlabel " " 221 | set ylabel " " 222 | plot for [i=0:{index}-1] '{dataset_filename}' using {precise_duration_index}:{precise_max_index} index (i+0) with lines title columnheader(1) 223 | unset multiplot 224 | """.format(dataset_filename=dataset_filename, 225 | gnuplot_terminal=gnuplot_terminal, 226 | gnuplot_out_filename=gnuplot_out_filename, 227 | duration_index=duration_index, 228 | p_index=p_index, 229 | p50_index=p50_index, 230 | filtered_p_index=filtered_p_index, 231 | precise_duration_index=precise_duration_index, 232 | precise_max_index=precise_max_index, 233 | title=title.replace("_", "\\\\_"), 234 | duration=duration, 235 | num_plots=len(migration_to_index), 236 | index=index, 237 | key2=args.primary_group.replace("_", " "), 238 | key1=format_key("", args.secondary_group).replace("_", " "), 239 | ), file=c) 240 | 241 | else: # json or html 242 | 243 | vega_lite = { 244 | "$schema": "https://vega.github.io/schema/vega-lite/v2.json", 245 | "title": ", ".join("{}: {}".format(k, v) for k, v in sorted(graph_filtering, key=lambda t: t[0])), 246 | "facet": { 247 | "column": {"field": "experiment", "type": "nominal"}, 248 | "row": {"field": "queries", "type": "nominal"}, 249 | }, 250 | "spec": { 251 | "width": 600, 252 | "layer": [ 253 | { 254 | "mark": { 255 | "type": "line", 256 | "clip": True, 257 | }, 258 | "encoding": { 259 | "x": {"field": "time", "type": "quantitative", "axis": {"labelAngle": -90}, 260 | "scale": {"domain": [duration / 2 - 20, duration / 2 + 20]}}, 261 | # "x": { "field": "time", "type": "quantitative", "axis": { "labelAngle": -90 }, "scale": {"domain": [duration / 2 - 20, duration / 2 + 20]} }, 262 | "y": {"field": "latency", "type": "quantitative", "axis": {"format": "e", "labelAngle": 0}, 263 | "scale": {"type": "log"}}, 264 | "color": {"field": "p", "type": "nominal"}, 265 | }, 266 | }, 267 | { 268 | "mark": "rule", 269 | "encoding": { 270 | "x": { 271 | "aggregate": "mean", 272 | "field": "time", 273 | "type": "quantitative", 274 | }, 275 | "color": {"value": "grey"}, 276 | "size": {"value": 1} 277 | } 278 | } 279 | ] 280 | }, 281 | "data": { 282 | "values": data 283 | }, 284 | }; 285 | 286 | if args.json: 287 | with open(chart_filename, 'w') as c: 288 | print(json.dumps(vega_lite), file=c) 289 | else: 290 | print(sorted(graph_filtering, key=lambda t: t[0])) 291 | vega_lite["title"] = title 292 | html = """ 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 |
303 | 304 | 311 | 312 | 313 | """ 314 | with open(chart_filename, 'w') as c: 315 | print(html, file=c) 316 | 317 | print(chart_filename) 318 | print(os.getcwd() + "/" + chart_filename, file=sys.stderr) 319 | -------------------------------------------------------------------------------- /experiments/nexmark/plot_latency_timeline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # ./plot_latency_timeline.py results/98f4e2fa2e8bc839/ "[ ('bin_shift', 8), ('duration', 120), ('machine_local', True), ('processes', 2), ('workers', 8), ]" 4 | 5 | import sys, os, shutil, json 6 | import argparse 7 | import plot 8 | from collections import defaultdict 9 | 10 | parser = argparse.ArgumentParser(description="Plot") 11 | parser.add_argument('results_dir') 12 | parser.add_argument('filtering') 13 | parser.add_argument('--json', action='store_true') 14 | parser.add_argument('--gnuplot', action='store_true') 15 | parser.add_argument('--terminal', default="pdf") 16 | parser.add_argument('--filter', nargs='+', default=[]) 17 | parser.add_argument('--rename', default="{}") 18 | parser.add_argument('--name') 19 | args = parser.parse_args() 20 | 21 | results_dir = args.results_dir 22 | files = plot.get_files(results_dir) 23 | # for f in files: 24 | # print(f[1]) 25 | filtering = eval(args.filtering) 26 | rename = eval(args.rename) 27 | 28 | if len(args.filter) > 0: 29 | graph_filtering = [] 30 | data = [] 31 | experiments = [] 32 | for filter in args.filter: 33 | filter = eval(filter) 34 | # print(filtering+filter, file=sys.stderr) 35 | gf, da, ex = plot.latency_timeline_plots(results_dir, files, filtering + filter) 36 | for f in ex: 37 | # print(f, file=sys.stderr) 38 | f.extend(filter) 39 | graph_filtering.extend(gf) 40 | data.extend(da) 41 | experiments.extend(ex) 42 | # print("experiments", experiments, file=sys.stderr) 43 | else: 44 | graph_filtering, data, experiments = plot.latency_timeline_plots(results_dir, files, filtering) 45 | 46 | commit = results_dir.rstrip('/').split('/')[-1] 47 | # print("commit:", commit, file=sys.stderr) 48 | 49 | if len(data) == 0: 50 | print("No data found: {}".format(filtering), file=sys.stderr) 51 | exit(0) 52 | 53 | 54 | # Try to extract duration from filter pattern 55 | duration = next((x[1] for x in filtering if x[0] == 'duration'), 450) 56 | 57 | plot.ensure_dir("charts/{}".format(commit)) 58 | 59 | if args.json: 60 | extension = "json" 61 | elif args.gnuplot: 62 | extension = "gnuplot" 63 | else: 64 | extension = "html" 65 | 66 | plot_name = plot.kv_to_string(dict(graph_filtering)) 67 | 68 | def get_chart_filename(extension): 69 | name = "" 70 | if args.name: 71 | name = "_{}".format(args.name) 72 | graph_filename = "{}{}+{}.{}".format(plot.plot_name(__file__), name, plot_name, extension) 73 | return "charts/{}/{}".format(commit, graph_filename) 74 | 75 | chart_filename = get_chart_filename(extension) 76 | # title = ", ".join("{}: {}".format(k, v) for k, v in sorted(graph_filtering.items(), key=lambda t: t[0])) 77 | title = plot.kv_to_name(graph_filtering) 78 | 79 | if args.gnuplot: 80 | 81 | # Generate dataset 82 | all_headers = set() 83 | all_percentiles = set() 84 | all_migrations = set() 85 | 86 | for ds in data: 87 | for d in ds: 88 | for k in d.keys(): 89 | all_headers.add(k) 90 | all_percentiles.add(d["p"]) 91 | all_migrations.add(d.get("migration", None)) 92 | all_headers.remove("experiment") 93 | all_headers = sorted(all_headers) 94 | 95 | migration_to_index = defaultdict(list) 96 | 97 | graph_filtering_bin_shift = dict(graph_filtering) 98 | 99 | dataset_filename = get_chart_filename("dataset") 100 | 101 | all_configs = [] 102 | 103 | def get_key(config, key): 104 | for k, v in config: 105 | if k == key: 106 | return v 107 | return None 108 | 109 | order = { None: 0, 'sudden': 1, 'fluid': 2, 'batched': 3} 110 | 111 | with open(dataset_filename, 'w') as c: 112 | index = 0 113 | # print(" ".join(all_headers), file=c) 114 | for p in sorted(all_percentiles): 115 | for ds, config in zip(iter(data), iter(experiments)): 116 | key = get_key(config, "migration") 117 | if key in rename: 118 | key = rename[key] 119 | else: 120 | key = (key, order[key]) 121 | migration_to_index[key].append(index) 122 | config_with_p = config + [('p', p)] 123 | all_configs.append(config_with_p) 124 | if p == 1: 125 | name = "max" 126 | else: 127 | name = plot.kv_to_name([('p', p)]) 128 | # print(p, name, file=sys.stderr) 129 | print("\"{}\"".format(name.replace("_", "\\\\_")), file=c) 130 | for d in ds: 131 | if d["p"] == p: 132 | print(" ".join(map(plot.quote_str, [d[k] for k in all_headers])), file=c) 133 | print("\n", file=c) 134 | index += 1 135 | 136 | config_reorder = sorted(map(lambda x: (x[1], x[0]), enumerate(all_configs))) 137 | 138 | # Generate gnuplot script 139 | gnuplot_terminal = args.terminal 140 | gnuplot_out_filename = get_chart_filename(gnuplot_terminal) 141 | time_index = all_headers.index("time") + 1 142 | latency_index = all_headers.index("latency") + 1 143 | # fix long titles 144 | if len(title) > 79: 145 | idx = title.find(" ", int(len(title) / 2)) 146 | if idx != -1: 147 | title = "{}\\n{}".format(title[:idx], title[idx:]) 148 | with open(chart_filename, 'w') as c: 149 | print("""\ 150 | set terminal {gnuplot_terminal} font \"TimesNewRoman, 16\" 151 | if (!exists("MP_LEFT")) MP_LEFT = .11 152 | if (!exists("MP_RIGHT")) MP_RIGHT = .98 153 | if (!exists("MP_BOTTOM")) MP_BOTTOM = .27 154 | if (!exists("MP_TOP")) MP_TOP = .86 155 | if (!exists("MP_GAP")) MP_GAP = 0.03 156 | 157 | max(x,y) = (x < y) ? y : x 158 | min(x,y) = (x < y) ? x : y 159 | 160 | set logscale y 161 | 162 | set format y "10^{{%T}}" 163 | set grid xtics ytics 164 | 165 | set ylabel "Latency [ms]" 166 | set xlabel "Time [s]" 167 | set xtics min(200, {duration}/20*10) 168 | set mxtics 4 169 | 170 | set xrange [0:{duration}*.99] 171 | # set xrange [1000:1300] 172 | 173 | set key at screen .5, screen 0.01 center bottom maxrows 1 maxcols 10 174 | # unset key 175 | 176 | set output '{gnuplot_out_filename}' 177 | stats '{dataset_filename}' using {latency_index} nooutput 178 | if (STATS_blocks == 0) exit 179 | set for [i=1:STATS_blocks] linetype i dashtype i 180 | # set yrange [10**floor(log10(STATS_min)): 10**ceil(log10(STATS_max))] 181 | set yrange [9*10**-1:10**max(4, ceil(log10(STATS_max)))] 182 | set bmargin at screen 0.24 183 | set multiplot layout 1, {num_plots} columnsfirst \\ 184 | margins screen MP_LEFT, MP_RIGHT, MP_BOTTOM, MP_TOP spacing screen MP_GAP 185 | """.format(dataset_filename=dataset_filename, 186 | gnuplot_terminal=gnuplot_terminal, 187 | gnuplot_out_filename=gnuplot_out_filename, 188 | latency_index=latency_index, 189 | time_index=time_index, 190 | title=title.replace("_", "\\\\_"), 191 | duration=duration, 192 | num_plots=len(migration_to_index) 193 | ), file=c) 194 | 195 | def print_plots(): 196 | for key in sorted(migration_to_index, key=lambda x: x[1]): 197 | print("""\ 198 | set title "{key}" 199 | plot for [i in "{indexes}"] '{dataset_filename}' using {time_index}:{latency_index} index (i+0) title columnheader(1) with lines linewidth 1 200 | unset key 201 | set format y ''; unset ylabel 202 | """.format(key=(key[0] if len(migration_to_index) != 1 else ""), 203 | indexes=" ".join(map(str, sorted(migration_to_index[key], reverse=True))), 204 | dataset_filename=dataset_filename, 205 | time_index=time_index, 206 | latency_index=latency_index 207 | ), file=c) 208 | 209 | print_plots() 210 | print("""\ 211 | set key at screen .5, screen 0.01 center bottom maxrows 1 maxcols 10 212 | set multiplot layout 1, {num_plots} columnsfirst \\ 213 | margins screen MP_LEFT, MP_RIGHT, MP_BOTTOM, MP_TOP spacing screen MP_GAP 214 | 215 | set xtics 100 216 | set xrange [{duration}*.63:{duration}*.85] 217 | set ylabel "Latency [ms]" 218 | set format y "10^{{%T}}" 219 | """.format(duration=duration, num_plots=len(migration_to_index)), file=c) 220 | 221 | print_plots() 222 | print("""\ 223 | set key at screen .5, screen 0.01 center bottom maxrows 1 maxcols 10 224 | set multiplot layout 1, {num_plots} columnsfirst \\ 225 | margins screen MP_LEFT, MP_RIGHT, MP_BOTTOM, MP_TOP spacing screen MP_GAP 226 | set xtics 20 227 | set xrange [{duration}*.65:{duration}*.71] 228 | set ylabel "Latency [ms]" 229 | set format y "10^{{%T}}" 230 | """.format(duration=duration, num_plots=len(migration_to_index)), file=c) 231 | print_plots() 232 | else: # json or html 233 | 234 | vega_lite = { 235 | "$schema": "https://vega.github.io/schema/vega-lite/v2.json", 236 | "title": ", ".join("{}: {}".format(k, v) for k, v in sorted(graph_filtering, key=lambda t: t[0])), 237 | "facet": { 238 | "column": {"field": "experiment", "type": "nominal"}, 239 | "row": {"field": "queries", "type": "nominal"}, 240 | }, 241 | "spec": { 242 | "width": 600, 243 | "layer": [ 244 | { 245 | "mark": { 246 | "type": "line", 247 | "clip": True, 248 | }, 249 | "encoding": { 250 | "x": {"field": "time", "type": "quantitative", "axis": {"labelAngle": -90}, 251 | "scale": {"domain": [duration / 2 - 20, duration / 2 + 20]}}, 252 | # "x": { "field": "time", "type": "quantitative", "axis": { "labelAngle": -90 }, "scale": {"domain": [duration / 2 - 20, duration / 2 + 20]} }, 253 | "y": {"field": "latency", "type": "quantitative", "axis": {"format": "e", "labelAngle": 0}, 254 | "scale": {"type": "log"}}, 255 | "color": {"field": "p", "type": "nominal"}, 256 | }, 257 | }, 258 | { 259 | "mark": "rule", 260 | "encoding": { 261 | "x": { 262 | "aggregate": "mean", 263 | "field": "time", 264 | "type": "quantitative", 265 | }, 266 | "color": {"value": "grey"}, 267 | "size": {"value": 1} 268 | } 269 | } 270 | ] 271 | }, 272 | "data": { 273 | "values": data 274 | }, 275 | }; 276 | 277 | if args.json: 278 | with open(chart_filename, 'w') as c: 279 | print(json.dumps(vega_lite), file=c) 280 | else: 281 | print(sorted(graph_filtering, key=lambda t: t[0])) 282 | vega_lite["title"] = title 283 | html = """ 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 |
294 | 295 | 302 | 303 | 304 | """ 305 | with open(chart_filename, 'w') as c: 306 | print(html, file=c) 307 | 308 | print(chart_filename) 309 | print(os.getcwd() + "/" + chart_filename, file=sys.stderr) 310 | -------------------------------------------------------------------------------- /experiments/nexmark/plot_memory_timeline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # ./plot_memory_timeline.py results/98f4e2fa2e8bc839/ "[ ('bin_shift', 8), ('duration', 120), ('machine_local', True), ('processes', 2), ('workers', 8), ]" 4 | 5 | import sys, os, shutil, json 6 | import argparse 7 | import plot 8 | from collections import defaultdict 9 | 10 | parser = argparse.ArgumentParser(description="Plot") 11 | parser.add_argument('results_dir') 12 | parser.add_argument('filtering') 13 | parser.add_argument('--json', action='store_true') 14 | parser.add_argument('--gnuplot', action='store_true') 15 | parser.add_argument('--terminal', default="pdf") 16 | parser.add_argument('--filter', nargs='+', default=[]) 17 | parser.add_argument('--rename', default="{}") 18 | parser.add_argument('--name') 19 | args = parser.parse_args() 20 | 21 | results_dir = args.results_dir 22 | files = plot.get_files(results_dir) 23 | # for f in files: 24 | # print(f[1]) 25 | filtering = eval(args.filtering) 26 | rename = eval(args.rename) 27 | 28 | if len(args.filter) > 0: 29 | graph_filtering = [] 30 | data = [] 31 | experiments = [] 32 | for filter in args.filter: 33 | filter = eval(filter) 34 | # print(filtering+filter, file=sys.stderr) 35 | gf, da, ex = plot.memory_timeline_plots(results_dir, files, filtering + filter) 36 | for f in ex: 37 | # print(f, file=sys.stderr) 38 | f.extend(filter) 39 | graph_filtering.extend(gf) 40 | data.extend(da) 41 | experiments.extend(ex) 42 | # print("experiments", experiments, file=sys.stderr) 43 | else: 44 | graph_filtering, data, experiments = plot.memory_timeline_plots(results_dir, files, filtering) 45 | 46 | commit = results_dir.rstrip('/').split('/')[-1] 47 | # print("commit:", commit, file=sys.stderr) 48 | 49 | if len(data) == 0: 50 | print("No data found: {}".format(filtering), file=sys.stderr) 51 | exit(0) 52 | 53 | 54 | # Try to extract duration from filter pattern 55 | duration = next((x[1] for x in filtering if x[0] == 'duration'), 450) 56 | 57 | plot.ensure_dir("charts/{}".format(commit)) 58 | 59 | if args.json: 60 | extension = "json" 61 | elif args.gnuplot: 62 | extension = "gnuplot" 63 | else: 64 | extension = "html" 65 | 66 | plot_name = plot.kv_to_string(dict(graph_filtering)) 67 | 68 | def get_chart_filename(extension): 69 | name = "" 70 | if args.name: 71 | name = "_{}".format(args.name) 72 | graph_filename = "{}{}+{}.{}".format(plot.plot_name(__file__), name, plot_name, extension) 73 | return "charts/{}/{}".format(commit, graph_filename) 74 | 75 | chart_filename = get_chart_filename(extension) 76 | # title = ", ".join("{}: {}".format(k, v) for k, v in sorted(graph_filtering.items(), key=lambda t: t[0])) 77 | title = plot.kv_to_name(graph_filtering) 78 | 79 | if args.gnuplot: 80 | def name(config): 81 | config = dict(config) 82 | migration = config.get("migration", "UNKNOWN FIXME") 83 | if migration in rename: 84 | migration = rename[migration] 85 | return migration 86 | 87 | 88 | # Generate dataset 89 | all_headers = set() 90 | 91 | for ds in data: 92 | for d in ds: 93 | for k in d.keys(): 94 | all_headers.add(k) 95 | all_headers.remove("experiment") 96 | all_headers = sorted(all_headers) 97 | 98 | graph_filtering_bin_shift = dict(graph_filtering) 99 | 100 | dataset_filename = get_chart_filename("dataset") 101 | 102 | all_configs = [] 103 | 104 | def get_key(config, key): 105 | for k, v in config: 106 | if k == key: 107 | return v 108 | return None 109 | 110 | with open(dataset_filename, 'w') as c: 111 | index = 0 112 | # print(" ".join(all_headers), file=c) 113 | for ds, config in zip(iter(data), iter(experiments)): 114 | all_configs.append(config) 115 | print("\"{}\"".format(name(config).replace("_", "\\\\_")), file=c) 116 | for d in ds: 117 | print(" ".join(map(plot.quote_str, [d[k] for k in all_headers])), file=c) 118 | print("\n", file=c) 119 | index += 1 120 | 121 | config_reorder = sorted(map(lambda x: (x[1], x[0]), enumerate(all_configs))) 122 | 123 | # Generate gnuplot script 124 | gnuplot_terminal = args.terminal 125 | gnuplot_out_filename = get_chart_filename(gnuplot_terminal) 126 | time_index = all_headers.index("time") + 1 127 | rss_index = all_headers.index("RSS") + 1 128 | # fix long titles 129 | if len(title) > 79: 130 | idx = title.find(" ", int(len(title) / 2)) 131 | if idx != -1: 132 | title = "{}\\n{}".format(title[:idx], title[idx:]) 133 | with open(chart_filename, 'w') as c: 134 | print("""\ 135 | set terminal {gnuplot_terminal} font \"TimesNewRoman, 10\" size 3.3, 2.0 monochrome 136 | # set logscale y 137 | 138 | # set format y "10^{{%T}}" 139 | set format y '%.1s%cB' 140 | set grid xtics ytics 141 | set mytics 4 142 | set mxtics 4 143 | 144 | set ylabel "RSS" 145 | set xlabel "Time [s]" 146 | 147 | set xrange [0:{duration}+20] 148 | 149 | # set key at screen .5, screen 0.01 center bottom maxrows 1 maxcols 10 150 | set key top left 151 | # unset key 152 | 153 | set output '{gnuplot_out_filename}' 154 | stats '{dataset_filename}' using {rss_index} nooutput 155 | if (STATS_blocks == 0) exit 156 | set for [i=1:STATS_blocks] linetype i dashtype i 157 | # set yrange [10**floor(log10(STATS_min)): 10**ceil(log10(STATS_max))] 158 | # set title "{title}" 159 | set bmargin at screen 0.24 160 | plot for [i=0:*] '{dataset_filename}' using {time_index}:{rss_index} index i title columnheader(1) with lines 161 | """.format(dataset_filename=dataset_filename, 162 | gnuplot_terminal=gnuplot_terminal, 163 | gnuplot_out_filename=gnuplot_out_filename, 164 | rss_index=rss_index, 165 | time_index=time_index, 166 | title=title.replace("_", "\\\\_"), 167 | duration=duration, 168 | ), file=c) 169 | 170 | else: # json or html 171 | 172 | vega_lite = { 173 | "$schema": "https://vega.github.io/schema/vega-lite/v2.json", 174 | "title": ", ".join("{}: {}".format(k, v) for k, v in sorted(graph_filtering, key=lambda t: t[0])), 175 | "width": 600, 176 | "mark": { 177 | "type": "line", 178 | "clip": True, 179 | }, 180 | "encoding": { 181 | "x": { "field": "time", "type": "quantitative", "axis": { "labelAngle": -90 }, "scale": {"domain": [0,450]} }, 182 | "y": { "field": "RSS", "type": "quantitative", "axis": { "format": "s", "labelAngle": 0 }, "scale": { "type": "log" }}, 183 | "row": { "field": "experiment", "type": "nominal" }, 184 | }, 185 | "data": { 186 | "values": data 187 | } 188 | }; 189 | 190 | if args.json: 191 | with open(chart_filename, 'w') as c: 192 | print(json.dumps(vega_lite), file=c) 193 | else: 194 | print(sorted(graph_filtering, key=lambda t: t[0])) 195 | vega_lite["title"] = title 196 | html = """ 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 |
207 | 208 | 215 | 216 | 217 | """ 218 | with open(chart_filename, 'w') as c: 219 | print(html, file=c) 220 | 221 | print(chart_filename) 222 | print(os.getcwd() + "/" + chart_filename, file=sys.stderr) 223 | -------------------------------------------------------------------------------- /experiments/nexmark/plot_migration_queries_latency.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # ./plot_migration_queries_latency.py results/98f4e2fa2e8bc839/ "[ ('bin_shift', 8), ('duration', 120), ('machine_local', True), ('processes', 2), ('workers', 8), ]" 4 | 5 | import sys, os, shutil, json 6 | import argparse 7 | import plot 8 | 9 | parser = argparse.ArgumentParser(description="Plot") 10 | parser.add_argument('results_dir') 11 | parser.add_argument('filtering') 12 | parser.add_argument('--json', action='store_true') 13 | parser.add_argument('--gnuplot', action='store_true') 14 | parser.add_argument('--table', action='store_true') 15 | parser.add_argument('--terminal', default="pdf") 16 | parser.add_argument('--filter', nargs='+', default=[]) 17 | parser.add_argument('--rename', default="{}") 18 | parser.add_argument('--name') 19 | args = parser.parse_args() 20 | 21 | results_dir = args.results_dir 22 | files = plot.get_files(results_dir) 23 | filtering = eval(args.filtering) 24 | rename = eval(args.rename) 25 | 26 | if len(args.filter) > 0: 27 | graph_filtering = [] 28 | data = [] 29 | experiments = [] 30 | for filter in args.filter: 31 | filter = eval(filter) 32 | # print(filtering+filter, file=sys.stderr) 33 | gf, da, ex = plot.latency_plots(results_dir, files, filtering + filter) 34 | for f in ex: 35 | # print(f, file=sys.stderr) 36 | f.extend(filter) 37 | graph_filtering.extend(gf) 38 | data.extend(da) 39 | experiments.extend(ex) 40 | # print("experiments", experiments, file=sys.stderr) 41 | else: 42 | graph_filtering, data, experiments = plot.latency_plots(results_dir, files, filtering) 43 | 44 | commit = results_dir.rstrip('/').split('/')[-1] 45 | # print("commit:", commit, file=sys.stderr) 46 | 47 | if len(data) == 0: 48 | print("No data found: {}".format(filtering), file=sys.stderr) 49 | exit(0) 50 | 51 | plot.ensure_dir("charts/{}".format(commit)) 52 | 53 | if args.json: 54 | extension = "json" 55 | elif args.gnuplot: 56 | extension = "gnuplot" 57 | elif args.table: 58 | extension = "tex" 59 | else: 60 | extension = "html" 61 | 62 | plot_name = plot.kv_to_string(dict(graph_filtering)) 63 | 64 | def get_chart_filename(extension): 65 | name = "" 66 | if args.name: 67 | name = "_{}".format(args.name) 68 | graph_filename = "{}{}+{}.{}".format(plot.plot_name(__file__), name, plot_name, extension) 69 | return "charts/{}/{}".format(commit, graph_filename) 70 | 71 | chart_filename = get_chart_filename(extension) 72 | # title = ", ".join("{}: {}".format(k, v) for k, v in sorted(graph_filtering.items(), key=lambda t: t[0])) 73 | title = plot.kv_to_name(graph_filtering) 74 | 75 | if args.gnuplot: 76 | def name(config): 77 | config = dict(config) 78 | if "native" in config.get('backend', ""): 79 | return "Native" 80 | return config.get("bin_shift", "UNKNOWN FIXME") 81 | 82 | # Generate dataset 83 | all_headers = set() 84 | 85 | for ds in data: 86 | for d in ds: 87 | for k in d.keys(): 88 | all_headers.add(k) 89 | # all_headers.remove("bin_shift") 90 | all_headers = sorted(all_headers) 91 | 92 | dataset_filename = get_chart_filename("dataset") 93 | 94 | with open(dataset_filename, 'w') as c: 95 | # print(" ".join(all_headers), file=c) 96 | for ds, config in zip(iter(data), iter(experiments)): 97 | print("\"{}\"".format(name(config)), file=c) 98 | for d in ds: 99 | print(" ".join(map(plot.quote_str, [d[k] for k in all_headers])), file=c) 100 | print("\n", file=c) 101 | 102 | # Generate gnuplot script 103 | gnuplot_terminal = args.terminal 104 | gnuplot_out_filename = get_chart_filename(gnuplot_terminal) 105 | ccdf_index = all_headers.index("ccdf") + 1 106 | latency_index = all_headers.index("latency") + 1 107 | # fix long titles 108 | if len(title) > 79: 109 | idx = title.find(" ", int(len(title) / 2)) 110 | if idx != -1: 111 | title = "{}\\n{}".format(title[:idx], title[idx:]) 112 | with open(chart_filename, 'w') as c: 113 | print("""\ 114 | set terminal {gnuplot_terminal} font \"TimesNewRoman, 20\" 115 | set logscale x 116 | set logscale y 117 | 118 | set format x "10^{{%T}}" 119 | set format y "10^{{%T}}" 120 | set grid xtics ytics 121 | 122 | set yrange [.00001:1] 123 | set xrange [0.5:2000] 124 | 125 | set xlabel "Latency [ms]" 126 | set ylabel "CCDF" 127 | #set title "{title}" 128 | 129 | set key outside right 130 | 131 | set output '{gnuplot_out_filename}' 132 | stats '{dataset_filename}' using 0 nooutput 133 | if (STATS_blocks == 0) exit 134 | set for [i=1:STATS_blocks] linetype i dashtype i 135 | plot for [i=0:(STATS_blocks - 1)] '{dataset_filename}' using {latency_index}:{ccdf_index} index i title columnheader(1) with lines linewidth 2 136 | """.format(dataset_filename=dataset_filename, 137 | gnuplot_terminal=gnuplot_terminal, 138 | gnuplot_out_filename=gnuplot_out_filename, 139 | latency_index=latency_index, 140 | ccdf_index=ccdf_index, 141 | title=title.replace("_", "\\\\_"), 142 | ), file=c) 143 | elif args.table: 144 | tex_filename = get_chart_filename(extension) 145 | 146 | with open(tex_filename, 'w') as c: 147 | def name(config): 148 | config = dict(config) 149 | if "native" in config.get('backend', ""): 150 | return "Native" 151 | return config.get("bin_shift", "UNKNOWN FIXME") 152 | 153 | def format_lat(d, c): 154 | print("& {:.2f} ".format(d['latency']), file=c) 155 | 156 | 157 | # print(" ".join(all_headers), file=c) 158 | 159 | print("Experiment & 90\\% & 99\\% & 99.99\\% & max \\tabularnewline", file=c) 160 | print("\\midrule", file=c) 161 | 162 | for ds, config in zip(iter(data), iter(experiments)): 163 | # print(config, file=sys.stderr) 164 | print("{} ".format(name(config)), file=c) 165 | for d in ds: 166 | if d['ccdf'] <= 0.1: 167 | format_lat(d, c) 168 | break 169 | for d in ds: 170 | if d['ccdf'] <= 0.01: 171 | format_lat(d, c) 172 | break 173 | for d in ds: 174 | if d['ccdf'] <= 0.001: 175 | format_lat(d, c) 176 | break 177 | format_lat(ds[-1], c) 178 | 179 | # print(" ".join(map(plot.quote_str, [d[k] for k in all_headers])), file=c) 180 | print("\\tabularnewline\n", file=c) 181 | 182 | else: # json or html 183 | vega_lite = { 184 | "$schema": "https://vega.github.io/schema/vega-lite/v2.json", 185 | "hconcat": [ 186 | { 187 | "mark": "line", 188 | "encoding": { 189 | "x": { "field": "latency", "type": "quantitative", "axis": { "format": "e", "labelAngle": -90 }, "scale": { "type": "log" }}, 190 | "y": { "field": "ccdf", "type": "quantitative", "scale": { "type": "log" } }, 191 | "row": { "field": "experiment", "type": "nominal" }, 192 | "column": { "field": "queries", "type": "nominal" }, 193 | "stroke": { "field": "rate", "type": "nominal", "legend": None }, 194 | "shape": { "field": "rate", "type": "nominal", "legend": None } 195 | } 196 | }, 197 | { 198 | "mark": "point", 199 | "encoding": { 200 | "shape": { "field": "rate", "aggregate": "min", "type": "nominal", "legend": None }, 201 | "fill": { "field": "rate", "aggregate": "min", "type": "nominal", "legend": None }, 202 | "y": { "field": "rate", "type": "nominal", "title": None } 203 | } 204 | } 205 | ], 206 | "data": { 207 | "values": [d for ds in data for d in ds] 208 | } 209 | }; 210 | 211 | if args.json: 212 | with open(chart_filename, 'w') as c: 213 | print(json.dumps(vega_lite), file=c) 214 | else: 215 | print(sorted(graph_filtering, key=lambda t: t[0])) 216 | vega_lite["title"] = title 217 | html = """ 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 |
228 | 229 | 236 | 237 | 238 | """ 239 | with open(chart_filename, 'w') as c: 240 | print(html, file=c) 241 | 242 | print(chart_filename) 243 | print(os.getcwd() + "/" + chart_filename, file=sys.stderr) 244 | -------------------------------------------------------------------------------- /experiments/nexmark/requirements.txt: -------------------------------------------------------------------------------- 1 | coloredlogs==10.0 2 | executor==20.0 3 | fasteners==0.14.1 4 | hopcroftkarp==1.2.4 5 | humanfriendly==4.12.1 6 | monotonic==1.5 7 | property-manager==2.3.1 8 | six==1.11.0 9 | verboselogs==1.7 10 | -------------------------------------------------------------------------------- /experiments/nexmark/run_bench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | trap "exit" INT TERM ERR 3 | trap "kill 0" EXIT 4 | 5 | CLUSTERUSER="${CLUSTERUSER:-$USER}" 6 | clusterpath="${CLUSTERPATH:-`git rev-parse --show-toplevel`}/nexmark" 7 | serverprefix="${CLUSTERUSER}@${SERVER:-fdr}" 8 | 9 | function run { # command index groups additional 10 | # xterm +hold -e 11 | python3 -c "import bench; bench.$1($2, $3)" --clusterpath "${clusterpath}" --serverprefix "${serverprefix}" $4 || exit $? 12 | } 13 | 14 | function run_group { # name 15 | run "$1" "0" "1" --build-only 16 | for i in $(seq 0 $(($group - 1))) 17 | do 18 | run "$1" "$i" "$group" --no-build & 19 | done 20 | wait 21 | } 22 | export -f run_group 23 | -------------------------------------------------------------------------------- /experiments/nexmark/run_paper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source run_bench.sh 3 | 4 | group=1 5 | run_group "paper_micro_no_migr" 6 | run_group "paper_micro_migr" 7 | run_group "paper_nx" 8 | -------------------------------------------------------------------------------- /nexmark/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | /target 3 | **/*.rs.bk 4 | -------------------------------------------------------------------------------- /nexmark/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "nexmark" 3 | version = "0.1.0" 4 | authors = ["Frank McSherry "] 5 | 6 | [features] 7 | default = ["dynamic_scaling_mechanism/bin-8"] 8 | fake_stateful = ["dynamic_scaling_mechanism/fake_stateful"] 9 | 10 | [dependencies] 11 | differential-dataflow = { git = "https://github.com/TimelyDataflow/differential-dataflow.git" } 12 | #differential-dataflow = "0.8" 13 | timely = { git = "https://github.com/TimelyDataflow/timely-dataflow.git"} 14 | #timely = { git = "https://github.com/antiguru/timely-dataflow.git", branch = "session_cease"} 15 | #timely = "0.8" 16 | #abomonation = { git = "https://github.com/frankmcsherry/abomonation.git" } 17 | abomonation = "^0.7" 18 | abomonation_derive = "0.3" 19 | serde = "*" 20 | serde_json = "*" 21 | serde_derive = "*" 22 | rand = "*" 23 | fnv="1.0" 24 | streaming-harness = { version = "^0.1", features = ["hdrhist-support"] } 25 | hdrhist = "0.5.0" 26 | dynamic_scaling_mechanism = { version = "0.0.1", path = "..", default_features = false } 27 | 28 | [dependencies.clap] 29 | version = "~2.32" 30 | default-features = false 31 | 32 | #[patch.crates-io] 33 | #timely = { git = 'https://github.com/frankmcsherry/timely-dataflow.git' } 34 | 35 | [profile.release] 36 | # opt-level = 3 37 | debug = true 38 | # rpath = false 39 | # lto = false 40 | # codegen-units = 16 41 | # debug-assertions = false 42 | #panic = "abort" 43 | 44 | -------------------------------------------------------------------------------- /nexmark/src/config.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Result, Error, ErrorKind}; 2 | use std::collections::HashMap; 3 | use std::str::FromStr; 4 | 5 | /// This is a simple command line options parser. 6 | #[derive(Clone, Default)] 7 | pub struct Config { 8 | args: HashMap 9 | } 10 | 11 | impl Config { 12 | pub fn new() -> Self { 13 | Config{ args: HashMap::new() } 14 | } 15 | 16 | /// Parses the command line arguments into a new Config object. 17 | /// 18 | /// Its parsing strategy is as follows: 19 | /// If an argument starts with --, the remaining string is used as the key 20 | /// and the next argument as the associated value. 21 | /// Otherwise the argument is used as the next positional value, counting 22 | /// from zero. 23 | /// 24 | pub fn from>(mut cmd_args: I) -> Result { 25 | let mut args = HashMap::new(); 26 | let mut i = 0; 27 | while let Some(arg) = cmd_args.next() { 28 | if arg.starts_with("--") { 29 | match cmd_args.next() { 30 | Some(value) => args.insert(format!("{}", &arg[2..]), value), 31 | None => return Err(Error::new(ErrorKind::Other, "No corresponding value.")) 32 | }; 33 | } else { 34 | args.insert(format!("{}", i), arg); 35 | i = i+1; 36 | } 37 | } 38 | Ok(Config{ args: args }) 39 | } 40 | 41 | /// Inserts the given value for the given key. 42 | /// 43 | /// If the key already exists, its value is overwritten. 44 | #[allow(dead_code)] 45 | pub fn insert(&mut self, key: &str, value: String) { 46 | self.args.insert(String::from(key), value); 47 | } 48 | 49 | /// Returns the value for the given key, if available. 50 | pub fn get(&self, key: &str) -> Option { 51 | self.args.get(key).map(|x| x.clone()) 52 | } 53 | 54 | /// Returns the value for the given key automatically parsed if possible. 55 | pub fn get_as(&self, key: &str) -> Option { 56 | self.args.get(key).map_or(None, |x| x.parse::().ok()) 57 | } 58 | 59 | /// Returns the value for the given key or a default value if the key does not exist. 60 | pub fn get_or(&self, key: &str, default: &str) -> String { 61 | self.args.get(key).map_or(String::from(default), |x| x.clone()) 62 | } 63 | 64 | /// Returns the value for the given key automatically parsed, or a default value if the key does not exist. 65 | pub fn get_as_or(&self, key: &str, default: T) -> T { 66 | self.get_as(key).unwrap_or(default) 67 | } 68 | } 69 | 70 | 71 | 72 | use std::f64::consts::PI; 73 | 74 | // type Id = usize; 75 | // type Date = usize; 76 | 77 | // const MIN_STRING_LENGTH: usize = 3; 78 | const BASE_TIME: usize = 0; //1436918400_000; 79 | 80 | fn split_string_arg(string: String) -> Vec { 81 | string.split(",").map(String::from).collect::>() 82 | } 83 | 84 | // trait NEXMarkRng { 85 | // fn gen_string(&mut self, usize) -> String; 86 | // fn gen_price(&mut self) -> usize; 87 | // } 88 | 89 | // impl NEXMarkRng for StdRng { 90 | // fn gen_string(&mut self, max: usize) -> String { 91 | // let len = self.gen_range(MIN_STRING_LENGTH, max); 92 | // String::from((0..len).map(|_|{ 93 | // if self.gen_range(0, 13) == 0 { String::from(" ") } 94 | // else { from_u32('a' as u32+self.gen_range(0, 26)).unwrap().to_string() } 95 | // }).collect::>().join("").trim()) 96 | // } 97 | 98 | // fn gen_price(&mut self) -> usize { 99 | // (10.0_f32.powf(self.gen::() * 6.0) * 100.0).round() as usize 100 | // } 101 | // } 102 | 103 | #[derive(PartialEq)] 104 | enum RateShape { 105 | Square, 106 | Sine, 107 | } 108 | 109 | #[derive(Clone)] 110 | pub struct NEXMarkConfig { 111 | pub active_people: usize, 112 | pub in_flight_auctions: usize, 113 | pub out_of_order_group_size: usize, 114 | pub hot_seller_ratio: usize, 115 | pub hot_auction_ratio: usize, 116 | pub hot_bidder_ratio: usize, 117 | pub first_event_id: usize, 118 | pub first_event_number: usize, 119 | pub base_time_ns: usize, 120 | pub step_length: usize, 121 | pub events_per_epoch: usize, 122 | pub epoch_period: f32, 123 | pub inter_event_delays_ns: Vec, 124 | // Originally constants 125 | pub num_categories: usize, 126 | pub auction_id_lead: usize, 127 | pub hot_seller_ratio_2: usize, 128 | pub hot_auction_ratio_2: usize, 129 | pub hot_bidder_ratio_2: usize, 130 | pub person_proportion: usize, 131 | pub auction_proportion: usize, 132 | pub bid_proportion: usize, 133 | pub proportion_denominator: usize, 134 | pub first_auction_id: usize, 135 | pub first_person_id: usize, 136 | pub first_category_id: usize, 137 | pub person_id_lead: usize, 138 | pub sine_approx_steps: usize, 139 | pub us_states: Vec, 140 | pub us_cities: Vec, 141 | pub first_names: Vec, 142 | pub last_names: Vec, 143 | } 144 | 145 | impl NEXMarkConfig { 146 | pub fn new(config: &Config) -> Self{ 147 | let active_people = config.get_as_or("active-people", 1000); 148 | let in_flight_auctions = config.get_as_or("in-flight-auctions", 100); 149 | let out_of_order_group_size = config.get_as_or("out-of-order-group-size", 1); 150 | let hot_seller_ratio = config.get_as_or("hot-seller-ratio", 4); 151 | let hot_auction_ratio = config.get_as_or("hot-auction-ratio", 2); 152 | let hot_bidder_ratio = config.get_as_or("hot-bidder-ratio", 4); 153 | let first_event_id = config.get_as_or("first-event-id", 0); 154 | let first_event_number = config.get_as_or("first-event-number", 0); 155 | let num_categories = config.get_as_or("num-categories", 5); 156 | let auction_id_lead = config.get_as_or("auction-id-lead", 10); 157 | let hot_seller_ratio_2 = config.get_as_or("hot-seller-ratio-2", 100); 158 | let hot_auction_ratio_2 = config.get_as_or("hot-auction-ratio-2", 100); 159 | let hot_bidder_ratio_2 = config.get_as_or("hot-bidder-ratio-2", 100); 160 | let person_proportion = config.get_as_or("person-proportion", 1); 161 | let auction_proportion = config.get_as_or("auction-proportion", 3); 162 | let bid_proportion = config.get_as_or("bid-proportion", 46); 163 | let proportion_denominator = person_proportion+auction_proportion+bid_proportion; 164 | let first_auction_id = config.get_as_or("first-auction-id", 1000); 165 | let first_person_id = config.get_as_or("first-person-id", 1000); 166 | let first_category_id = config.get_as_or("first-category-id", 10); 167 | let person_id_lead = config.get_as_or("person-id-lead", 10); 168 | let sine_approx_steps = config.get_as_or("sine-approx-steps", 10); 169 | let base_time_ns = config.get_as_or("base-time", BASE_TIME); 170 | let us_states = split_string_arg(config.get_or("us-states", "AZ,CA,ID,OR,WA,WY")); 171 | let us_cities = split_string_arg(config.get_or("us-cities", "phoenix,los angeles,san francisco,boise,portland,bend,redmond,seattle,kent,cheyenne")); 172 | let first_names = split_string_arg(config.get_or("first-names", "peter,paul,luke,john,saul,vicky,kate,julie,sarah,deiter,walter")); 173 | let last_names = split_string_arg(config.get_or("last-names", "shultz,abrams,spencer,white,bartels,walton,smith,jones,noris")); 174 | let rate_shape = if config.get_or("rate-shape", "sine") == "sine"{ RateShape::Sine }else{ RateShape::Square }; 175 | let rate_period = config.get_as_or("rate-period", 600); 176 | let first_rate = config.get_as_or("first-event-rate", config.get_as_or("events-per-second", 1_000)); 177 | let next_rate = config.get_as_or("next-event-rate", first_rate); 178 | let ns_per_unit = config.get_as_or("us-per-unit", 1_000_000_000); // Rate is in μs 179 | let generators = config.get_as_or("threads", 1) as f64; 180 | // Calculate inter event delays array. 181 | let mut inter_event_delays_ns = Vec::new(); 182 | let rate_to_period = |r| (ns_per_unit) as f64 / r as f64; 183 | if first_rate == next_rate { 184 | inter_event_delays_ns.push(rate_to_period(first_rate) * generators); 185 | } else { 186 | match rate_shape { 187 | RateShape::Square => { 188 | inter_event_delays_ns.push(rate_to_period(first_rate) * generators); 189 | inter_event_delays_ns.push(rate_to_period(next_rate) * generators); 190 | }, 191 | RateShape::Sine => { 192 | let mid = (first_rate + next_rate) as f64 / 2.0; 193 | let amp = (first_rate - next_rate) as f64 / 2.0; 194 | for i in 0..sine_approx_steps { 195 | let r = (2.0 * PI * i as f64) / sine_approx_steps as f64; 196 | let rate = mid + amp * r.cos(); 197 | inter_event_delays_ns.push(rate_to_period(rate.round() as usize) * generators); 198 | } 199 | } 200 | } 201 | } 202 | // Calculate events per epoch and epoch period. 203 | let n = if rate_shape == RateShape::Square { 2 } else { sine_approx_steps }; 204 | let step_length = (rate_period + n - 1) / n; 205 | let events_per_epoch = 0; 206 | let epoch_period = 0.0; 207 | if inter_event_delays_ns.len() > 1 { 208 | panic!("non-constant rate not supported"); 209 | // for inter_event_delay in &inter_event_delays { 210 | // let num_events_for_this_cycle = (step_length * 1_000_000) as f64 / inter_event_delay; 211 | // events_per_epoch += num_events_for_this_cycle.round() as usize; 212 | // epoch_period += (num_events_for_this_cycle * inter_event_delay) / 1000.0; 213 | // } 214 | } 215 | NEXMarkConfig { 216 | active_people: active_people, 217 | in_flight_auctions: in_flight_auctions, 218 | out_of_order_group_size: out_of_order_group_size, 219 | hot_seller_ratio: hot_seller_ratio, 220 | hot_auction_ratio: hot_auction_ratio, 221 | hot_bidder_ratio: hot_bidder_ratio, 222 | first_event_id: first_event_id, 223 | first_event_number: first_event_number, 224 | base_time_ns: base_time_ns, 225 | step_length: step_length, 226 | events_per_epoch: events_per_epoch, 227 | epoch_period: epoch_period, 228 | inter_event_delays_ns: inter_event_delays_ns, 229 | // Originally constants 230 | num_categories: num_categories, 231 | auction_id_lead: auction_id_lead, 232 | hot_seller_ratio_2: hot_seller_ratio_2, 233 | hot_auction_ratio_2: hot_auction_ratio_2, 234 | hot_bidder_ratio_2: hot_bidder_ratio_2, 235 | person_proportion: person_proportion, 236 | auction_proportion: auction_proportion, 237 | bid_proportion: bid_proportion, 238 | proportion_denominator: proportion_denominator, 239 | first_auction_id: first_auction_id, 240 | first_person_id: first_person_id, 241 | first_category_id: first_category_id, 242 | person_id_lead: person_id_lead, 243 | sine_approx_steps: sine_approx_steps, 244 | us_states: us_states, 245 | us_cities: us_cities, 246 | first_names: first_names, 247 | last_names: last_names, 248 | } 249 | } 250 | 251 | /// 252 | pub fn event_timestamp_ns(&self, event_number: usize) -> usize { 253 | // if self.inter_event_delays.len() == 1 { 254 | return self.base_time_ns + ((event_number as f64 * self.inter_event_delays_ns[0]) as usize); 255 | // } 256 | 257 | // let epoch = event_number / self.events_per_epoch; 258 | // let mut event_i = event_number % self.events_per_epoch; 259 | // let mut offset_in_epoch = 0.0; 260 | // for inter_event_delay in &self.inter_event_delays { 261 | // let num_events_for_this_cycle = (self.step_length * 1_000_000) as f32 / inter_event_delay; 262 | // if self.out_of_order_group_size < num_events_for_this_cycle.round() as usize { 263 | // let offset_in_cycle = event_i as f32 * inter_event_delay; 264 | // return self.base_time + (epoch as f32 * self.epoch_period + offset_in_epoch + offset_in_cycle / 1000.0).round() as usize; 265 | // } 266 | // event_i -= num_events_for_this_cycle.round() as usize; 267 | // offset_in_epoch += (num_events_for_this_cycle * inter_event_delay) / 1000.0; 268 | // } 269 | // return 0 270 | } 271 | 272 | pub fn next_adjusted_event(&self, events_so_far: usize) -> usize { 273 | let n = self.out_of_order_group_size; 274 | let event_number = self.first_event_number + events_so_far; 275 | (event_number / n) * n + (event_number * 953) % n 276 | } 277 | } 278 | 279 | pub struct NexMarkInputTimes { 280 | config: NEXMarkConfig, 281 | next: Option, 282 | events_so_far: usize, 283 | end: u64, 284 | time_dilation: usize, 285 | peers: usize, 286 | } 287 | 288 | impl NexMarkInputTimes { 289 | pub fn new(config: NEXMarkConfig, end: u64, time_dilation: usize, peers: usize) -> Self { 290 | let mut this = Self { 291 | config, 292 | next: None, 293 | events_so_far: 0, 294 | end, 295 | time_dilation, 296 | peers, 297 | }; 298 | this.make_next(); 299 | this 300 | } 301 | 302 | fn make_next(&mut self) { 303 | let ts = self.config.event_timestamp_ns( 304 | self.config.next_adjusted_event(self.events_so_far)) as u64; 305 | let ts = ts / self.time_dilation as u64; 306 | if ts < self.end { 307 | self.events_so_far += self.peers; 308 | self.next = Some(ts); 309 | } else { 310 | self.next = None; 311 | }; 312 | } 313 | } 314 | 315 | impl Iterator for NexMarkInputTimes { 316 | type Item = u64; 317 | 318 | fn next(&mut self) -> Option { 319 | let s = self.next; 320 | self.make_next(); 321 | s 322 | } 323 | } 324 | 325 | impl ::streaming_harness::input::InputTimeResumableIterator for NexMarkInputTimes { 326 | fn peek(&mut self) -> Option<&u64> { 327 | self.next.as_ref() 328 | } 329 | 330 | fn end(&self) -> bool { 331 | self.next.is_none() 332 | } 333 | } 334 | 335 | -------------------------------------------------------------------------------- /nexmark/src/event.rs: -------------------------------------------------------------------------------- 1 | use rand::Rng; 2 | use rand::rngs::SmallRng; 3 | use std::cmp::{max, min}; 4 | 5 | use config::NEXMarkConfig; 6 | 7 | trait NEXMarkRng { 8 | fn gen_string(&mut self, usize) -> String; 9 | fn gen_price(&mut self) -> usize; 10 | } 11 | 12 | impl NEXMarkRng for SmallRng { 13 | fn gen_string(&mut self, _max: usize) -> String { 14 | String::new() 15 | // use std::iter; 16 | // use rand::distributions::Alphanumeric; 17 | // 18 | // let len = self.gen_range(MIN_STRING_LENGTH, max); 19 | // iter::repeat(()) 20 | // .map(|()| self.sample(Alphanumeric)) 21 | // .take(len) 22 | // .collect() 23 | } 24 | 25 | fn gen_price(&mut self) -> usize { 26 | (10.0_f32.powf(self.gen::() * 6.0) * 100.0).round() as usize 27 | } 28 | } 29 | 30 | type Id = usize; 31 | #[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Serialize, Deserialize, Debug, Abomonation, Hash, Copy, Default)] 32 | pub struct Date(usize); 33 | 34 | impl Date { 35 | pub fn new(date_time: usize) -> Date { 36 | Date(date_time) 37 | } 38 | } 39 | 40 | impl ::std::ops::Deref for Date { 41 | type Target = usize; 42 | fn deref(&self) -> &Self::Target { 43 | &self.0 44 | } 45 | } 46 | impl ::std::ops::Add for Date { 47 | type Output = Self; 48 | 49 | fn add(self, other: Self) -> Self { 50 | Date(self.0 + other.0) 51 | } 52 | } 53 | impl ::std::ops::Sub for Date { 54 | type Output = Self; 55 | 56 | fn sub(self, other: Self) -> Self { 57 | Date(self.0 - other.0) 58 | } 59 | } 60 | 61 | //const MIN_STRING_LENGTH: usize = 3; 62 | // const BASE_TIME: usize = 1436918400_000; 63 | 64 | // fn split_string_arg(string: String) -> Vec { 65 | // string.split(",").map(String::from).collect::>() 66 | // } 67 | 68 | #[derive(Serialize, Deserialize, Abomonation, Debug)] 69 | struct EventCarrier { 70 | time: Date, 71 | event: Event, 72 | } 73 | 74 | #[derive(Eq, PartialEq, Clone, Serialize, Deserialize, Debug, Abomonation)] 75 | #[serde(tag = "type")] 76 | pub enum Event { 77 | Person(Person), 78 | Auction(Auction), 79 | Bid(Bid), 80 | } 81 | 82 | impl Event { 83 | 84 | pub fn time(&self) -> Date { 85 | match *self { 86 | Event::Person(ref p) => p.date_time, 87 | Event::Auction(ref a) => a.date_time, 88 | Event::Bid(ref b) => b.date_time, 89 | } 90 | } 91 | 92 | pub fn create(events_so_far: usize, rng: &mut SmallRng, nex: &mut NEXMarkConfig) -> Self { 93 | let rem = nex.next_adjusted_event(events_so_far) % nex.proportion_denominator; 94 | let timestamp = Date(nex.event_timestamp_ns(nex.next_adjusted_event(events_so_far))); 95 | let id = nex.first_event_id + nex.next_adjusted_event(events_so_far); 96 | 97 | if rem < nex.person_proportion { 98 | Event::Person(Person::new(id, timestamp, rng, nex)) 99 | } else if rem < nex.person_proportion + nex.auction_proportion { 100 | Event::Auction(Auction::new(events_so_far, id, timestamp, rng, nex)) 101 | } else { 102 | Event::Bid(Bid::new(id, timestamp, rng, nex)) 103 | } 104 | } 105 | 106 | pub fn id(&self) -> Id { 107 | match *self { 108 | Event::Person(ref p) => p.id, 109 | Event::Auction(ref a) => a.id, 110 | Event::Bid(ref b) => b.auction, // Bid eventss don't have ids, so use the associated auction id 111 | } 112 | } 113 | 114 | // pub fn new(events_so_far: usize, nex: &mut NEXMarkConfig) -> Self { 115 | // let rem = nex.next_adjusted_event(events_so_far) % nex.proportion_denominator; 116 | // let timestamp = nex.event_timestamp(nex.next_adjusted_event(events_so_far)); 117 | // let id = nex.first_event_id + nex.next_adjusted_event(events_so_far); 118 | 119 | // let mut seed = [0u8; 32]; 120 | // for i in 0 .. 8 { 121 | // seed[i] = ((id >> (8 * i)) & 0xFF) as u8; 122 | // } 123 | 124 | // let mut rng = StdRng::from_seed(seed); 125 | 126 | // if rem < nex.person_proportion { 127 | // Event::Person(Person::new(id, timestamp, &mut rng, nex)) 128 | // } else if rem < nex.person_proportion + nex.auction_proportion { 129 | // Event::Auction(Auction::new(events_so_far, id, timestamp, &mut rng, nex)) 130 | // } else { 131 | // Event::Bid(Bid::new(id, timestamp, &mut rng, nex)) 132 | // } 133 | // } 134 | } 135 | 136 | // impl ToData for String{ 137 | // fn to_data(self) -> Result<(usize, Event)> { 138 | // serde_json::from_str(&self) 139 | // .map(|c: EventCarrier| (c.time, c.event)) 140 | // .map_err(|e| e.into()) 141 | // } 142 | // } 143 | 144 | // impl FromData for Event { 145 | // fn from_data(&self, t: &usize) -> String { 146 | // serde_json::to_string(&EventCarrier{ time: t.clone(), event: self.clone()}).unwrap() 147 | // } 148 | // } 149 | 150 | #[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Serialize, Deserialize, Debug, Abomonation, Hash)] 151 | pub struct Person{ 152 | pub id: Id, 153 | pub name: String, 154 | pub email_address: String, 155 | pub credit_card: String, 156 | pub city: String, 157 | pub state: String, 158 | pub date_time: Date 159 | } 160 | 161 | impl Person { 162 | pub fn from(event: Event) -> Option { 163 | match event { 164 | Event::Person(p) => Some(p), 165 | _ => None 166 | } 167 | } 168 | 169 | fn new(id: usize, time: Date, rng: &mut SmallRng, nex: &NEXMarkConfig) -> Self { 170 | Person { 171 | id: Self::last_id(id, nex) + nex.first_person_id, 172 | name: String::new(), 173 | email_address: String::new(), 174 | credit_card: String::new(), 175 | // FIXME: Figure out a faster way to allocate the strings MH 176 | // name: format!("{} {}", 177 | // *rng.choose(&nex.first_names).unwrap(), 178 | // *rng.choose(&nex.last_names).unwrap()), 179 | // email_address: format!("{}@{}.com", rng.gen_string(7), rng.gen_string(5)), 180 | // credit_card: (0..4).map(|_| format!("{:04}", rng.gen_range(0, 10000))).collect::>().join(" "), 181 | city: rng.choose(&nex.us_cities).unwrap().clone(), 182 | state: rng.choose(&nex.us_states).unwrap().clone(), 183 | date_time: time, 184 | } 185 | } 186 | 187 | fn next_id(id: usize, rng: &mut SmallRng, nex: &NEXMarkConfig) -> Id { 188 | let people = Self::last_id(id, nex) + 1; 189 | let active = min(people, nex.active_people); 190 | people - active + rng.gen_range(0, active + nex.person_id_lead) 191 | } 192 | 193 | fn last_id(id: usize, nex: &NEXMarkConfig) -> Id { 194 | let epoch = id / nex.proportion_denominator; 195 | let mut offset = id % nex.proportion_denominator; 196 | if nex.person_proportion <= offset { offset = nex.person_proportion - 1; } 197 | epoch * nex.person_proportion + offset 198 | } 199 | } 200 | 201 | #[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Serialize, Deserialize, Debug, Abomonation, Hash)] 202 | pub struct Auction{ 203 | pub id: Id, 204 | pub item_name: String, 205 | pub description: String, 206 | pub initial_bid: usize, 207 | pub reserve: usize, 208 | pub date_time: Date, 209 | pub expires: Date, 210 | pub seller: Id, 211 | pub category: Id, 212 | } 213 | // unsafe_abomonate!(Auction : id, item_name, description, initial_bid, reserve, date_time, expires, seller, category); 214 | 215 | impl Auction { 216 | pub fn from(event: Event) -> Option { 217 | match event { 218 | Event::Auction(p) => Some(p), 219 | _ => None 220 | } 221 | } 222 | 223 | fn new(events_so_far: usize, id: usize, time: Date, rng: &mut SmallRng, nex: &NEXMarkConfig) -> Self { 224 | let initial_bid = rng.gen_price(); 225 | let seller = if rng.gen_range(0, nex.hot_seller_ratio) > 0 { 226 | (Person::last_id(id, nex) / nex.hot_seller_ratio_2) * nex.hot_seller_ratio_2 227 | } else { 228 | Person::next_id(id, rng, nex) 229 | }; 230 | Auction { 231 | id: Self::last_id(id, nex) + nex.first_auction_id, 232 | item_name: rng.gen_string(20), 233 | description: rng.gen_string(100), 234 | initial_bid: initial_bid, 235 | reserve: initial_bid + rng.gen_price(), 236 | date_time: time, 237 | expires: time + Self::next_length(events_so_far, rng, time, nex), 238 | seller: seller + nex.first_person_id, 239 | category: nex.first_category_id + rng.gen_range(0, nex.num_categories), 240 | } 241 | } 242 | 243 | fn next_id(id: usize, rng: &mut SmallRng, nex: &NEXMarkConfig) -> Id { 244 | let max_auction = Self::last_id(id, nex); 245 | let min_auction = if max_auction < nex.in_flight_auctions { 0 } else { max_auction - nex.in_flight_auctions }; 246 | min_auction + rng.gen_range(0, max_auction - min_auction + 1 + nex.auction_id_lead) 247 | } 248 | 249 | fn last_id(id: usize, nex: &NEXMarkConfig) -> Id { 250 | let mut epoch = id / nex.proportion_denominator; 251 | let mut offset = id % nex.proportion_denominator; 252 | if offset < nex.person_proportion { 253 | epoch -= 1; 254 | offset = nex.auction_proportion - 1; 255 | } else if nex.person_proportion + nex.auction_proportion <= offset { 256 | offset = nex.auction_proportion - 1; 257 | } else { 258 | offset -= nex.person_proportion; 259 | } 260 | epoch * nex.auction_proportion + offset 261 | } 262 | 263 | fn next_length(events_so_far: usize, rng: &mut SmallRng, time: Date, nex: &NEXMarkConfig) -> Date { 264 | let current_event = nex.next_adjusted_event(events_so_far); 265 | let events_for_auctions = (nex.in_flight_auctions * nex.proportion_denominator) / nex.auction_proportion; 266 | let future_auction = nex.event_timestamp_ns(current_event+events_for_auctions); 267 | 268 | let horizon = future_auction - time.0; 269 | Date(1 + rng.gen_range(0, max(horizon * 2, 1))) 270 | } 271 | } 272 | 273 | #[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Serialize, Deserialize, Debug, Abomonation, Hash)] 274 | pub struct Bid{ 275 | pub auction: Id, 276 | pub bidder: Id, 277 | pub price: usize, 278 | pub date_time: Date, 279 | } 280 | // unsafe_abomonate!(Bid : auction, bidder, price, date_time); 281 | 282 | impl Bid { 283 | pub fn from(event: Event) -> Option { 284 | match event { 285 | Event::Bid(p) => Some(p), 286 | _ => None 287 | } 288 | } 289 | 290 | fn new(id: usize, time: Date, rng: &mut SmallRng, nex: &NEXMarkConfig) -> Self { 291 | let auction = if 0 < rng.gen_range(0, nex.hot_auction_ratio){ 292 | (Auction::last_id(id, nex) / nex.hot_auction_ratio_2) * nex.hot_auction_ratio_2 293 | } else { 294 | Auction::next_id(id, rng, nex) 295 | }; 296 | let bidder = if 0 < rng.gen_range(0, nex.hot_bidder_ratio) { 297 | (Person::last_id(id, nex) / nex.hot_bidder_ratio_2) * nex.hot_bidder_ratio_2 + 1 298 | } else { 299 | Person::next_id(id, rng, nex) 300 | }; 301 | Bid { 302 | auction: auction + nex.first_auction_id, 303 | bidder: bidder + nex.first_person_id, 304 | price: rng.gen_price(), 305 | date_time: time, 306 | } 307 | } 308 | } 309 | -------------------------------------------------------------------------------- /nexmark/src/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate abomonation; 2 | #[macro_use] 3 | extern crate abomonation_derive; 4 | extern crate timely; 5 | extern crate serde; 6 | extern crate serde_json; 7 | #[macro_use] 8 | extern crate serde_derive; 9 | extern crate rand; 10 | extern crate streaming_harness; 11 | extern crate dynamic_scaling_mechanism; 12 | extern crate fnv; 13 | 14 | pub mod config; 15 | pub mod event; 16 | pub mod tools; 17 | 18 | pub mod queries; 19 | 20 | 21 | use std::hash::Hash; 22 | use std::hash::Hasher; 23 | 24 | pub fn calculate_hash(t: &T) -> u64 { 25 | let mut h: ::fnv::FnvHasher = Default::default(); 26 | t.hash(&mut h); 27 | h.finish() 28 | } 29 | -------------------------------------------------------------------------------- /nexmark/src/queries/mod.rs: -------------------------------------------------------------------------------- 1 | use ::std::rc::Rc; 2 | use timely::dataflow::{Scope, Stream}; 3 | use timely::dataflow::operators::capture::event::link::EventLink; 4 | use timely::dataflow::operators::capture::Replay; 5 | 6 | use dynamic_scaling_mechanism::Control; 7 | use event::{Bid, Auction, Person, Date}; 8 | 9 | mod q1; 10 | mod q1_flex; 11 | mod q2; 12 | mod q2_flex; 13 | mod q3; 14 | mod q3_flex; 15 | mod q4_q6_common; 16 | mod q4_q6_common_flex; 17 | mod q4; 18 | mod q4_flex; 19 | mod q5; 20 | mod q5_flex; 21 | mod q6; 22 | mod q6_flex; 23 | mod q7; 24 | mod q7_flex; 25 | mod q8; 26 | mod q8_flex; 27 | 28 | pub use self::q1::q1; 29 | pub use self::q1_flex::q1_flex; 30 | pub use self::q2::q2; 31 | pub use self::q2_flex::q2_flex; 32 | pub use self::q3::q3; 33 | pub use self::q3_flex::q3_flex; 34 | pub use self::q4_q6_common::q4_q6_common; 35 | pub use self::q4_q6_common_flex::q4_q6_common_flex; 36 | pub use self::q4::q4; 37 | pub use self::q4_flex::q4_flex; 38 | pub use self::q5::q5; 39 | pub use self::q5_flex::q5_flex; 40 | pub use self::q6::q6; 41 | pub use self::q6_flex::q6_flex; 42 | pub use self::q7::q7; 43 | pub use self::q7_flex::q7_flex; 44 | pub use self::q8::q8; 45 | pub use self::q8_flex::q8_flex; 46 | 47 | pub struct NexmarkInput<'a> { 48 | pub control: &'a Rc>, 49 | pub bids: &'a Rc>, 50 | pub auctions: &'a Rc>, 51 | pub people: &'a Rc>, 52 | pub closed_auctions: &'a Rc>, 53 | pub closed_auctions_flex: &'a Rc>, 54 | } 55 | 56 | impl<'a> NexmarkInput<'a> { 57 | pub fn control>(&self, scope: &mut S) -> Stream { 58 | Some(self.control.clone()).replay_into(scope) 59 | } 60 | 61 | pub fn bids>(&self, scope: &mut S) -> Stream { 62 | Some(self.bids.clone()).replay_into(scope) 63 | } 64 | 65 | pub fn auctions>(&self, scope: &mut S) -> Stream { 66 | Some(self.auctions.clone()).replay_into(scope) 67 | } 68 | 69 | pub fn people>(&self, scope: &mut S) -> Stream { 70 | Some(self.people.clone()).replay_into(scope) 71 | } 72 | 73 | pub fn closed_auctions>(&self, scope: &mut S) -> Stream { 74 | Some(self.closed_auctions.clone()).replay_into(scope) 75 | } 76 | 77 | pub fn closed_auctions_flex>(&self, scope: &mut S) -> Stream { 78 | Some(self.closed_auctions_flex.clone()).replay_into(scope) 79 | } 80 | } 81 | 82 | 83 | #[derive(Copy, Clone)] 84 | pub struct NexmarkTimer { 85 | pub time_dilation: usize 86 | } 87 | 88 | impl NexmarkTimer { 89 | 90 | #[inline(always)] 91 | fn to_nexmark_time (self, x: usize) -> Date { 92 | debug_assert!(x.checked_mul(self.time_dilation).is_some(), "multiplication failed: {} * {}", x, self.time_dilation); 93 | Date::new(x * self.time_dilation) 94 | } 95 | 96 | #[inline(always)] 97 | fn from_nexmark_time(self, x: Date) -> usize{ 98 | *x / self.time_dilation 99 | } 100 | 101 | } 102 | -------------------------------------------------------------------------------- /nexmark/src/queries/q1.rs: -------------------------------------------------------------------------------- 1 | use ::timely::dataflow::{Scope, Stream}; 2 | use timely::dataflow::operators::{Map}; 3 | 4 | use ::event::Bid; 5 | 6 | use {queries::NexmarkInput, queries::NexmarkTimer}; 7 | 8 | pub fn q1>(input: &NexmarkInput, _nt: NexmarkTimer, scope: &mut S) -> Stream 9 | { 10 | input.bids(scope) 11 | .map_in_place(|b| b.price = (b.price * 89) / 100) 12 | } 13 | -------------------------------------------------------------------------------- /nexmark/src/queries/q1_flex.rs: -------------------------------------------------------------------------------- 1 | use ::timely::dataflow::{Scope, Stream}; 2 | use timely::dataflow::operators::Map; 3 | 4 | use dynamic_scaling_mechanism::operator::StatefulOperator; 5 | use ::event::{Bid}; 6 | use ::calculate_hash; 7 | 8 | use {queries::NexmarkInput, queries::NexmarkTimer}; 9 | 10 | pub fn q1_flex>(input: &NexmarkInput, _nt: NexmarkTimer, scope: &mut S) -> Stream 11 | { 12 | let control = input.control(scope); 13 | 14 | input.bids(scope) 15 | .distribute(&control, |bid| calculate_hash(&bid.auction), "q0-flex") 16 | .map_in_place(|(_, _, b)| b.price = (b.price * 89) / 100) 17 | } 18 | -------------------------------------------------------------------------------- /nexmark/src/queries/q2.rs: -------------------------------------------------------------------------------- 1 | use ::timely::dataflow::{Scope, Stream}; 2 | use timely::dataflow::operators::{Filter, Map}; 3 | 4 | use {queries::NexmarkInput, queries::NexmarkTimer}; 5 | 6 | pub fn q2>(input: &NexmarkInput, _nt: NexmarkTimer, scope: &mut S) -> Stream 7 | { 8 | let auction_skip = 123; 9 | input.bids(scope) 10 | .filter(move |b| b.auction % auction_skip == 0) 11 | .map(|b| (b.auction, b.price)) 12 | } 13 | -------------------------------------------------------------------------------- /nexmark/src/queries/q2_flex.rs: -------------------------------------------------------------------------------- 1 | use ::timely::dataflow::{Scope, Stream}; 2 | use timely::dataflow::operators::{Filter, Map}; 3 | 4 | use dynamic_scaling_mechanism::operator::StatefulOperator; 5 | use ::calculate_hash; 6 | 7 | use {queries::NexmarkInput, queries::NexmarkTimer}; 8 | 9 | pub fn q2_flex>(input: &NexmarkInput, _nt: NexmarkTimer, scope: &mut S) -> Stream 10 | { 11 | let control = input.control(scope); 12 | 13 | let auction_skip = 123; 14 | let state_stream = input.bids(scope) 15 | .distribute(&control, |bid| calculate_hash(&bid.auction), "q2-flex"); 16 | state_stream 17 | .filter(move |(_, _, b)| b.auction % auction_skip == 0) 18 | .map(|(_, _, b)| (b.auction, b.price)) 19 | 20 | } 21 | -------------------------------------------------------------------------------- /nexmark/src/queries/q3.rs: -------------------------------------------------------------------------------- 1 | 2 | use ::std::collections::HashMap; 3 | use ::timely::dataflow::{Scope, Stream}; 4 | use timely::dataflow::channels::pact::Exchange; 5 | use timely::dataflow::operators::{Filter, Operator}; 6 | 7 | use ::event::{Auction, Person}; 8 | 9 | use {queries::NexmarkInput, queries::NexmarkTimer}; 10 | 11 | pub fn q3>(input: &NexmarkInput, _nt: NexmarkTimer, scope: &mut S) -> Stream 12 | { 13 | let auctions = input.auctions(scope) 14 | .filter(|a| a.category == 10); 15 | 16 | let people = input.people(scope) 17 | .filter(|p| p.state == "OR" || p.state == "ID" || p.state == "CA"); 18 | 19 | let mut auctions_buffer = vec![]; 20 | let mut people_buffer = vec![]; 21 | 22 | auctions 23 | .binary( 24 | &people, 25 | Exchange::new(|a: &Auction| a.seller as u64), 26 | Exchange::new(|p: &Person| p.id as u64), 27 | "Q3 Join", 28 | |_capability, _info| { 29 | let mut state1 = HashMap::new(); 30 | let mut state2 = HashMap::::new(); 31 | 32 | move |input1, input2, output| { 33 | 34 | // Process each input auction. 35 | input1.for_each(|time, data| { 36 | data.swap(&mut auctions_buffer); 37 | let mut session = output.session(&time); 38 | for auction in auctions_buffer.drain(..) { 39 | if let Some(person) = state2.get(&auction.seller) { 40 | session.give(( 41 | person.name.clone(), 42 | person.city.clone(), 43 | person.state.clone(), 44 | auction.id)); 45 | } 46 | state1.entry(auction.seller).or_insert(Vec::new()).push(auction); 47 | } 48 | }); 49 | 50 | // Process each input person. 51 | input2.for_each(|time, data| { 52 | data.swap(&mut people_buffer); 53 | let mut session = output.session(&time); 54 | for person in people_buffer.drain(..) { 55 | if let Some(auctions) = state1.get(&person.id) { 56 | for auction in auctions.iter() { 57 | session.give(( 58 | person.name.clone(), 59 | person.city.clone(), 60 | person.state.clone(), 61 | auction.id)); 62 | } 63 | } 64 | state2.insert(person.id, person); 65 | } 66 | }); 67 | } 68 | } 69 | ) 70 | } 71 | -------------------------------------------------------------------------------- /nexmark/src/queries/q3_flex.rs: -------------------------------------------------------------------------------- 1 | 2 | use ::std::collections::HashMap; 3 | use ::timely::dataflow::{Scope, Stream}; 4 | use timely::dataflow::operators::Filter; 5 | 6 | use dynamic_scaling_mechanism::operator::StatefulOperator; 7 | use ::event::{Auction, Person}; 8 | use ::calculate_hash; 9 | 10 | use {queries::NexmarkInput, queries::NexmarkTimer}; 11 | 12 | pub fn q3_flex>(input: &NexmarkInput, _nt: NexmarkTimer, scope: &mut S) -> Stream 13 | { 14 | let control = input.control(scope); 15 | 16 | let auctions = input.auctions(scope) 17 | .filter(|a| a.category == 10); 18 | 19 | let people = input.people(scope) 20 | .filter(|p| p.state == "OR" || p.state == "ID" || p.state == "CA"); 21 | 22 | auctions.stateful_binary(&control, &people, |a| calculate_hash(&a.seller), |p| calculate_hash(&p.id), "q3-flex join", |cap, data, auction_bin, people_bin, output| { 23 | let mut session = output.session(&cap); 24 | let people_state: &mut HashMap<_, Person> = people_bin.state(); 25 | for (_time, auction) in data.drain(..) { 26 | if let Some(mut person) = people_state.get(&auction.seller) { 27 | session.give((person.name.clone(), 28 | person.city.clone(), 29 | person.state.clone(), 30 | auction.id)); 31 | } 32 | // Update auction state 33 | let bin: &mut HashMap<_, Auction> = auction_bin.state(); 34 | bin.insert(auction.seller, auction); 35 | }; 36 | }, |cap, data, auction_bin, people_bin, output| { 37 | let mut session = output.session(&cap); 38 | let auction_state = auction_bin.state(); 39 | for (_time, person) in data.drain(..) { 40 | if let Some(mut auction) = auction_state.get(&person.id) { 41 | session.give((person.name.clone(), 42 | person.city.clone(), 43 | person.state.clone(), 44 | auction.id)); 45 | } 46 | // Update people state 47 | people_bin.state().insert(person.id, person); 48 | }; 49 | }) 50 | } 51 | -------------------------------------------------------------------------------- /nexmark/src/queries/q4.rs: -------------------------------------------------------------------------------- 1 | 2 | use ::timely::dataflow::{Scope, Stream}; 3 | use timely::dataflow::channels::pact::Exchange; 4 | use timely::dataflow::operators::{Map, Operator}; 5 | 6 | use {queries::NexmarkInput, queries::NexmarkTimer}; 7 | 8 | pub fn q4>(input: &NexmarkInput, _nt: NexmarkTimer, scope: &mut S) -> Stream 9 | { 10 | 11 | input.closed_auctions(scope) 12 | .map(|(a, b)| (a.category, b.price)) 13 | .unary(Exchange::new(|x: &(usize, usize)| x.0 as u64), "Q4 Average", 14 | |_cap, _info| { 15 | 16 | // Stores category -> (total, count) 17 | let mut state = std::collections::HashMap::new(); 18 | 19 | move |input, output| { 20 | input.for_each(|time, data| { 21 | let mut session = output.session(&time); 22 | for (category, price) in data.iter().cloned() { 23 | let entry = state.entry(category).or_insert((0, 0)); 24 | entry.0 += price; 25 | entry.1 += 1; 26 | session.give((category, entry.0 / entry.1)); 27 | } 28 | }) 29 | } 30 | }) 31 | } 32 | -------------------------------------------------------------------------------- /nexmark/src/queries/q4_flex.rs: -------------------------------------------------------------------------------- 1 | 2 | use ::std::collections::HashMap; 3 | use ::timely::dataflow::{Scope, Stream}; 4 | use timely::dataflow::operators::Map; 5 | 6 | use dynamic_scaling_mechanism::operator::StatefulOperator; 7 | use ::calculate_hash; 8 | 9 | use {queries::NexmarkInput, queries::NexmarkTimer}; 10 | 11 | pub fn q4_flex>(input: &NexmarkInput, _nt: NexmarkTimer, scope: &mut S) -> Stream 12 | { 13 | let control = input.control(scope); 14 | 15 | input.closed_auctions_flex(scope) 16 | .map(|(a, b)| (a.category, b.price)) 17 | .stateful_unary(&control, |x: &(usize, usize)| calculate_hash(&x.0), "Q4 Average", 18 | |cap, data, bin, output| { 19 | let mut session = output.session(&cap); 20 | let state: &mut HashMap<_, _> = bin.state(); 21 | for (_time, (category, price)) in data.drain(..) { 22 | let entry = state.entry(category).or_insert((0usize, 0usize)); 23 | entry.0 += price; 24 | entry.1 += 1; 25 | session.give((category, entry.0 / entry.1)); 26 | } 27 | }) 28 | } 29 | -------------------------------------------------------------------------------- /nexmark/src/queries/q4_q6_common.rs: -------------------------------------------------------------------------------- 1 | 2 | use ::std::collections::HashMap; 3 | use ::timely::dataflow::{Scope, Stream}; 4 | use timely::dataflow::channels::pact::Exchange; 5 | use timely::dataflow::operators::{Capability, Operator}; 6 | 7 | use ::event::{Auction, Bid}; 8 | 9 | use {queries::NexmarkInput, queries::NexmarkTimer}; 10 | 11 | pub fn q4_q6_common>(input: &NexmarkInput, nt: NexmarkTimer, scope: &mut S) -> Stream 12 | { 13 | let bids = input.bids(scope); 14 | let auctions = input.auctions(scope); 15 | 16 | bids.binary_frontier( 17 | &auctions, 18 | Exchange::new(|b: &Bid| b.auction as u64), 19 | Exchange::new(|a: &Auction| a.id as u64), 20 | "Q4 Auction close", 21 | |_capability, _info| { 22 | let mut state: HashMap<_, (Option<_>, Vec)> = std::collections::HashMap::new(); 23 | let mut opens = std::collections::BinaryHeap::new(); 24 | 25 | let mut capability: Option> = None; 26 | use std::collections::hash_map::Entry; 27 | use std::cmp::Reverse; 28 | 29 | fn is_valid_bid(bid: &Bid, auction: &Auction) -> bool { 30 | bid.price >= auction.reserve && auction.date_time <= bid.date_time && bid.date_time < auction.expires 31 | } 32 | 33 | move |input1, input2, output| { 34 | 35 | // Record each bid. 36 | // NB: We don't summarize as the max, because we don't know which are valid. 37 | input1.for_each(|time, data| { 38 | for bid in data.iter().cloned() { 39 | // eprintln!("[{:?}] bid: {:?}", time.time().inner, bid); 40 | let mut entry = state.entry(bid.auction).or_insert((None, Vec::new())); 41 | if let Some(ref auction) = entry.0 { 42 | debug_assert!(entry.1.len() <= 1); 43 | if is_valid_bid(&bid, auction) { 44 | // bid must fall between auction creation and expiration 45 | if let Some(existing) = entry.1.get(0).cloned() { 46 | if existing.price < bid.price { 47 | entry.1[0] = bid; 48 | } 49 | } else { 50 | entry.1.push(bid); 51 | } 52 | } 53 | } else { 54 | opens.push((Reverse(bid.date_time), bid.auction)); 55 | if capability.as_ref().map(|c| nt.to_nexmark_time(*c.time()) <= bid.date_time) != Some(true) { 56 | capability = Some(time.delayed(&nt.from_nexmark_time(bid.date_time))); 57 | } 58 | entry.1.push(bid); 59 | } 60 | } 61 | }); 62 | 63 | // Record each auction. 64 | input2.for_each(|time, data| { 65 | for auction in data.iter().cloned() { 66 | // eprintln!("[{:?}] auction: {:?}", time.time().inner, auction); 67 | if capability.as_ref().map(|c| nt.to_nexmark_time(*c.time()) <= auction.expires) != Some(true) { 68 | capability = Some(time.delayed(&nt.from_nexmark_time(auction.expires))); 69 | } 70 | opens.push((Reverse(auction.expires), auction.id)); 71 | let mut entry = state.entry(auction.id).or_insert((None, Vec::new())); 72 | debug_assert!(entry.0.is_none()); 73 | entry.1.retain(|bid| is_valid_bid(&bid, &auction)); 74 | if let Some(bid) = entry.1.iter().max_by_key(|bid| bid.price).cloned() { 75 | entry.1.clear(); 76 | entry.1.push(bid); 77 | } 78 | entry.0 = Some(auction); 79 | } 80 | }); 81 | 82 | // Use frontiers to determine which auctions to close. 83 | if let Some(ref capability) = capability { 84 | let complete1 = input1.frontier.frontier().get(0).cloned().unwrap_or(usize::max_value()); 85 | let complete2 = input2.frontier.frontier().get(0).cloned().unwrap_or(usize::max_value()); 86 | let complete = std::cmp::min(complete1, complete2); 87 | 88 | let mut session = output.session(capability); 89 | while opens.peek().map(|x| complete == usize::max_value() || (x.0).0 < nt.to_nexmark_time(complete)) == Some(true) { 90 | // eprintln!("[{:?}] opens.len(): {} state.len(): {} {:?}", capability.time().inner, opens.len(), state.len(), state.iter().map(|x| (x.1).1.len()).sum::()); 91 | 92 | let (Reverse(time), auction) = opens.pop().unwrap(); 93 | let mut entry = state.entry(auction); 94 | if let Entry::Occupied(mut entry) = entry { 95 | let delete = { 96 | let auction_bids = entry.get_mut(); 97 | if let Some(ref auction) = auction_bids.0 { 98 | if time == auction.expires { 99 | // Auction expired, clean up state 100 | if let Some(winner) = auction_bids.1.pop() { 101 | session.give((auction.clone(), winner)); 102 | } 103 | true 104 | } else { 105 | false 106 | } 107 | } else { 108 | auction_bids.1.retain(|bid| bid.date_time > time); 109 | auction_bids.1.is_empty() 110 | } 111 | }; 112 | if delete { 113 | entry.remove_entry(); 114 | } 115 | } 116 | } 117 | } 118 | 119 | // Downgrade capability. 120 | if let Some(head) = opens.peek() { 121 | capability.as_mut().map(|c| c.downgrade(&nt.from_nexmark_time((head.0).0))); 122 | } else { 123 | capability = None; 124 | } 125 | } 126 | } 127 | ) 128 | } 129 | -------------------------------------------------------------------------------- /nexmark/src/queries/q4_q6_common_flex.rs: -------------------------------------------------------------------------------- 1 | 2 | use ::std::collections::HashMap; 3 | use ::timely::dataflow::{Scope, Stream}; 4 | 5 | use dynamic_scaling_mechanism::Bin; 6 | use dynamic_scaling_mechanism::operator::StatefulOperator; 7 | use ::event::{Auction, Bid}; 8 | use ::calculate_hash; 9 | 10 | use queries::{NexmarkInput, NexmarkTimer}; 11 | 12 | pub fn q4_q6_common_flex>(input: &NexmarkInput, nt: NexmarkTimer, scope: &mut S) -> Stream 13 | { 14 | let control = input.control(scope); 15 | 16 | let bids = input.bids(scope); 17 | let auctions = input.auctions(scope); 18 | 19 | bids.stateful_binary_input(&control, 20 | &auctions, 21 | |bid: &Bid| calculate_hash(&bid.auction), 22 | |a: &Auction| calculate_hash(&a.id), 23 | "Q4 Auction close", 24 | move |state, cap, _time, data, _output| { 25 | for (_, key_id, bid) in &*data { 26 | state.get(*key_id).notificator().notify_at_data(cap, nt.from_nexmark_time(bid.date_time), bid.clone()); 27 | } 28 | }, 29 | move |state, cap, _time, data, _output| { 30 | for (_target, key_id, auction) in &*data { 31 | // Request notification for the auction's expiration time, which is used to look into the auctions_state 32 | state.get(*key_id).notificator().notify_at_data(cap, nt.from_nexmark_time(auction.expires), auction.clone()); 33 | } 34 | }, 35 | |_cap, data, bid_bin, _auction_bin, _output| { 36 | let state: &mut HashMap<_, _> = bid_bin.state(); 37 | for (_time, bid) in data.drain(..) { 38 | // Update bin state 39 | state.entry(bid.auction).or_insert_with(Vec::new).push(bid); 40 | }; 41 | }, 42 | |cap, auction_data, bid_bin, _auction_bin: &mut Bin<_, Vec<()>, _>, output| { 43 | let mut session = output.session(&cap); 44 | let bid_state = bid_bin.state(); 45 | for (_time, auction) in auction_data.drain(..) { 46 | if let Some(mut bids) = bid_state.remove(&auction.id) { 47 | bids.retain(|b| 48 | auction.date_time <= b.date_time && 49 | b.date_time < auction.expires && 50 | b.price >= auction.reserve); 51 | bids.sort_by(|b1, b2| b1.price.cmp(&b2.price)); 52 | if let Some(winner) = bids.pop() { 53 | session.give((auction, winner)); 54 | } 55 | } 56 | } 57 | }, 58 | ) 59 | } 60 | -------------------------------------------------------------------------------- /nexmark/src/queries/q5.rs: -------------------------------------------------------------------------------- 1 | 2 | use ::std::collections::HashMap; 3 | use ::timely::dataflow::{Scope, Stream}; 4 | use timely::dataflow::channels::pact::Exchange; 5 | use timely::dataflow::operators::{Map, Operator, CapabilitySet}; 6 | 7 | use ::event::Date; 8 | 9 | use {queries::NexmarkInput, queries::NexmarkTimer}; 10 | 11 | pub fn q5>(input: &NexmarkInput, nt: NexmarkTimer, scope: &mut S, window_slice_count: usize, window_slide_ns: usize) -> Stream 12 | { 13 | input.bids(scope) 14 | .map(move |b| (b.auction, Date::new(((*b.date_time / window_slide_ns) + 1) * window_slide_ns))) 15 | // TODO: Could pre-aggregate pre-exchange, if there was reason to do so. 16 | .unary_frontier(Exchange::new(|b: &(usize, _)| b.0 as u64), "Q5 Accumulate", 17 | |capability, _info| { 18 | let mut cap_set = CapabilitySet::new(); 19 | cap_set.insert(capability); 20 | 21 | let mut additions = HashMap::new(); 22 | let mut deletions = HashMap::new(); 23 | let mut accumulations = HashMap::new(); 24 | 25 | let mut bids_buffer = vec![]; 26 | 27 | move |input, output| { 28 | input.for_each(|time, data| { 29 | data.swap(&mut bids_buffer); 30 | 31 | for (auction, a_time) in bids_buffer.drain(..) { 32 | additions 33 | .entry(nt.from_nexmark_time(a_time)) 34 | .or_insert_with(Vec::new) 35 | .push(auction); 36 | } 37 | }); 38 | 39 | // Extract and order times we can now process. 40 | let mut times = { 41 | let add_times = additions.keys().filter(|t| !input.frontier.less_equal(t)).cloned(); 42 | let del_times = deletions.keys().filter(|t| !input.frontier.less_equal(t)).cloned(); 43 | add_times.chain(del_times).collect::>() 44 | }; 45 | times.sort(); 46 | times.dedup(); 47 | 48 | for time in times.drain(..) { 49 | if let Some(additions) = additions.remove(&time) { 50 | for &auction in additions.iter() { 51 | *accumulations.entry(auction).or_insert(0) += 1; 52 | } 53 | let new_time = time + (window_slice_count * window_slide_ns); 54 | deletions.insert(new_time, additions); 55 | } 56 | if let Some(deletions) = deletions.remove(&time) { 57 | for auction in deletions.into_iter() { 58 | use std::collections::hash_map::Entry; 59 | match accumulations.entry(auction) { 60 | Entry::Occupied(mut entry) => { 61 | *entry.get_mut() -= 1; 62 | if *entry.get_mut() == 0 { 63 | entry.remove(); 64 | } 65 | }, 66 | _ => panic!("entry has to exist"), 67 | } 68 | } 69 | } 70 | let time = cap_set.delayed(&time); 71 | // TODO: This only accumulates per *worker*, not globally! 72 | if let Some((_count, auction)) = accumulations.iter().map(|(&a, &c)| (c, a)).max() { 73 | output.session(&time).give(auction); 74 | } 75 | } 76 | cap_set.downgrade(&input.frontier.frontier()); 77 | } 78 | }) 79 | } 80 | -------------------------------------------------------------------------------- /nexmark/src/queries/q5_flex.rs: -------------------------------------------------------------------------------- 1 | 2 | use ::std::collections::HashMap; 3 | use ::timely::dataflow::{Scope, Stream}; 4 | use timely::dataflow::operators::Map; 5 | 6 | use dynamic_scaling_mechanism::operator::StatefulOperator; 7 | use ::event::Date; 8 | use ::calculate_hash; 9 | 10 | use {queries::NexmarkInput, queries::NexmarkTimer}; 11 | 12 | pub fn q5_flex>(input: &NexmarkInput, nt: NexmarkTimer, scope: &mut S, window_slice_count: usize, window_slide_ns: usize) -> Stream 13 | { 14 | let control = input.control(scope); 15 | 16 | let bids = input.bids(scope) 17 | // Discretize bid's datetime based on slides 18 | .map(move |b| (b.auction, Date::new(((*b.date_time / window_slide_ns) + 1) * window_slide_ns))); 19 | // TODO: Could pre-aggregate pre-exchange, if there was reason to do so. 20 | 21 | #[derive(Abomonation, Eq, PartialEq, Clone)] 22 | enum InsDel { Ins(T), Del(T) }; 23 | let mut in_buffer = Vec::new(); 24 | 25 | // Partitions by auction id 26 | bids.stateful_unary_input(&control, |(auction, _time)| calculate_hash(auction), "q5-flex", move |state, cap, _time, data, _output| { 27 | data.swap(&mut in_buffer); 28 | 29 | for (_, key_id, (auction, a_time)) in in_buffer.drain(..) { 30 | let not = state.get(key_id).notificator(); 31 | not.notify_at_data(cap, nt.from_nexmark_time(a_time), InsDel::Ins(auction)); 32 | not.notify_at_data(cap, nt.from_nexmark_time(Date::new(*a_time + window_slice_count * window_slide_ns)), InsDel::Del(auction)); 33 | } 34 | }, move |cap, data, bid_bin, output| { 35 | // Process additions (if any) 36 | // println!("data.len(): {:?}", data.len()); 37 | for (_time, action) in data.drain(..) { 38 | match action { 39 | InsDel::Ins(auction) => { 40 | // Stash pending deletions 41 | let bid_state: &mut HashMap<_, _> = bid_bin.state(); 42 | let slot = bid_state.entry(auction).or_insert(0); 43 | *slot += 1; 44 | }, 45 | InsDel::Del(auction) => { 46 | let slot = bid_bin.state().entry(auction).or_insert(0); 47 | *slot -= 1; 48 | } 49 | } 50 | } 51 | bid_bin.state().retain(|_k, v| *v != 0); 52 | // Output results (if any) 53 | // TODO: This only accumulates per *bin*, not globally! 54 | if let Some((auction, _count)) = bid_bin.state().iter().max_by_key(|(_auction_id, count)| *count) { 55 | let mut session = output.session(&cap); 56 | session.give(*auction); 57 | } 58 | }) 59 | } 60 | -------------------------------------------------------------------------------- /nexmark/src/queries/q6.rs: -------------------------------------------------------------------------------- 1 | 2 | use std::collections::VecDeque; 3 | use ::timely::dataflow::{Scope, Stream}; 4 | use timely::dataflow::channels::pact::Exchange; 5 | use timely::dataflow::operators::{Map, Operator}; 6 | 7 | use {queries::NexmarkInput, queries::NexmarkTimer}; 8 | 9 | pub fn q6>(input: &NexmarkInput, _nt: NexmarkTimer, scope: &mut S) -> Stream 10 | { 11 | 12 | input.closed_auctions(scope) 13 | .map(|(_a, b)| (b.bidder, b.price)) 14 | .unary(Exchange::new(|x: &(usize, usize)| x.0 as u64), "Q6 Average", 15 | |_cap, _info| { 16 | 17 | // Store bidder -> [prices; 10] 18 | let mut state = std::collections::HashMap::new(); 19 | 20 | move |input, output| { 21 | input.for_each(|time, data| { 22 | let mut session = output.session(&time); 23 | for (bidder, price) in data.iter().cloned() { 24 | let entry = state.entry(bidder).or_insert_with(VecDeque::new); 25 | if entry.len() >= 10 { entry.pop_back(); } 26 | entry.push_front(price); 27 | let sum: usize = entry.iter().sum(); 28 | session.give((bidder, sum / entry.len())); 29 | } 30 | }); 31 | } 32 | }) 33 | } 34 | -------------------------------------------------------------------------------- /nexmark/src/queries/q6_flex.rs: -------------------------------------------------------------------------------- 1 | 2 | use ::std::collections::HashMap; 3 | use ::timely::dataflow::{Scope, Stream}; 4 | use timely::dataflow::operators::Map; 5 | 6 | use dynamic_scaling_mechanism::operator::StatefulOperator; 7 | use ::calculate_hash; 8 | 9 | use {queries::NexmarkInput, queries::NexmarkTimer}; 10 | 11 | pub fn q6_flex>(input: &NexmarkInput, _nt: NexmarkTimer, scope: &mut S) -> Stream 12 | { 13 | let control = input.control(scope); 14 | let winners = input.closed_auctions_flex(scope) 15 | .map(|(_a, b)| (b.bidder, b.price)); 16 | 17 | winners.stateful_unary(&control, |(b, _p)| calculate_hash(b), "q6-flex", |cap, data, bin, output| { 18 | let mut session = output.session(&cap); 19 | let state: &mut HashMap<_, _> = bin.state(); 20 | for (_time, (bidder, price)) in data.drain(..) { 21 | let entry = state.entry(bidder).or_insert_with(Vec::new); 22 | while entry.len() >= 10 { entry.remove(0); } 23 | entry.push(price); 24 | let sum: usize = entry.iter().sum(); 25 | session.give((bidder, sum / entry.len())); 26 | } 27 | }) 28 | } 29 | -------------------------------------------------------------------------------- /nexmark/src/queries/q7.rs: -------------------------------------------------------------------------------- 1 | 2 | use ::timely::dataflow::{Scope, Stream}; 3 | use timely::dataflow::channels::pact::{Pipeline, Exchange}; 4 | use timely::dataflow::operators::{Capability, Map, Operator}; 5 | 6 | use ::event::Date; 7 | 8 | use {queries::NexmarkInput, queries::NexmarkTimer}; 9 | 10 | pub fn q7>(input: &NexmarkInput, nt: NexmarkTimer, scope: &mut S, window_size_ns: usize) -> Stream 11 | { 12 | 13 | input.bids(scope) 14 | .map(move |b| (Date::new(((*b.date_time / window_size_ns) + 1) * window_size_ns), b.price)) 15 | .unary_frontier(Pipeline, "Q7 Pre-reduce", |_cap, _info| { 16 | 17 | // Tracks the worker-local maximal bid for each capability. 18 | let mut maxima = Vec::<(Capability, usize)>::new(); 19 | 20 | move |input, output| { 21 | input.for_each(|time, data| { 22 | for (window, price) in data.iter().cloned() { 23 | if let Some(position) = maxima.iter().position(|x| *(x.0).time() == nt.from_nexmark_time(window)) { 24 | if maxima[position].1 < price { 25 | maxima[position].1 = price; 26 | } 27 | } else { 28 | maxima.push((time.delayed(&nt.from_nexmark_time(window)), price)); 29 | } 30 | } 31 | }); 32 | 33 | for &(ref capability, price) in maxima.iter() { 34 | if !input.frontier.less_than(capability.time()) { 35 | output.session(&capability).give((*capability.time(), price)); 36 | } 37 | } 38 | 39 | maxima.retain(|(capability, _)| input.frontier.less_than(capability)); 40 | } 41 | }) 42 | .unary_frontier(Exchange::new(move |x: &(usize, usize)| (x.0 / window_size_ns) as u64), "Q7 All-reduce", |_cap, _info| { 43 | 44 | // Tracks the global maximal bid for each capability. 45 | let mut maxima = Vec::<(Capability, usize)>::new(); 46 | 47 | move |input, output| { 48 | input.for_each(|time, data| { 49 | for (window, price) in data.iter().cloned() { 50 | if let Some(position) = maxima.iter().position(|x| *(x.0).time() == window) { 51 | if maxima[position].1 < price { 52 | maxima[position].1 = price; 53 | } 54 | } else { 55 | maxima.push((time.delayed(&window), price)); 56 | } 57 | } 58 | }); 59 | 60 | for &(ref capability, price) in maxima.iter() { 61 | if !input.frontier.less_than(capability.time()) { 62 | output.session(&capability).give(price); 63 | } 64 | } 65 | 66 | maxima.retain(|(capability, _)| input.frontier.less_than(capability)); 67 | } 68 | }) 69 | } 70 | -------------------------------------------------------------------------------- /nexmark/src/queries/q7_flex.rs: -------------------------------------------------------------------------------- 1 | 2 | use ::std::collections::HashMap; 3 | use ::timely::dataflow::{Scope, Stream}; 4 | use timely::dataflow::channels::pact::Exchange; 5 | use timely::dataflow::operators::{FrontierNotificator, Map, Operator}; 6 | 7 | use dynamic_scaling_mechanism::operator::StatefulOperator; 8 | use ::event::Date; 9 | use ::calculate_hash; 10 | 11 | use {queries::NexmarkInput, queries::NexmarkTimer}; 12 | 13 | pub fn q7_flex>(input: &NexmarkInput, nt: NexmarkTimer, scope: &mut S, window_size_ns: usize) -> Stream 14 | { 15 | let control = input.control(scope); 16 | 17 | let bids = input.bids(scope) 18 | .map(move |b| (b.auction, Date::new(((*b.date_time / window_size_ns) + 1) * window_size_ns), b.price)); 19 | 20 | 21 | // Partition by auction id to avoid serializing the computation 22 | bids.stateful_unary_input(&control, |(a, _window, _price)| calculate_hash(a), "q7-flex pre-reduce", move |state, cap, _time, data, _output| { 23 | for (_, key_id, (_auction, window, price)) in data.iter() { 24 | let not = state.get(*key_id).notificator(); 25 | not.notify_at_data(cap, nt.from_nexmark_time(*window), (*window, *price)); 26 | } 27 | }, |cap, maxima, bid_bin, output| { 28 | let mut windows = HashMap::new(); 29 | let bid_state: &mut HashMap<_, _> = bid_bin.state(); 30 | for (_time, (window, price)) in maxima.drain(..) { 31 | let open_windows = bid_state.entry(window).or_insert(0); 32 | if *open_windows < price { 33 | *open_windows = price; 34 | windows.insert(window, price); 35 | } 36 | } 37 | let mut session = output.session(&cap); 38 | session.give_iterator(windows.drain()); 39 | }) 40 | // Aggregate the partial counts. This doesn't need to be stateful since we request notification upon a window firing time and then we drop the state immediately after processing 41 | .unary_frontier(Exchange::new(move |x: &(Date, usize)| (*x.0 / window_size_ns) as u64), "Q7 All-reduce", |_cap, _info| 42 | { 43 | let mut pending_maxima: HashMap<_, Vec<_>> = Default::default(); 44 | let mut notificator = FrontierNotificator::new(); 45 | move |input, output| { 46 | input.for_each(|time, data| { 47 | for (window, price) in data.iter().cloned() { 48 | let slot = pending_maxima.entry(window).or_insert_with(Vec::new); 49 | slot.push(price); 50 | notificator.notify_at(time.delayed(&nt.from_nexmark_time(window))); 51 | } 52 | }); 53 | while let Some(time) = notificator.next(&[input.frontier()]) { 54 | if let Some(mut maxima) = pending_maxima.remove(&nt.to_nexmark_time(*time.time())) { 55 | if let Some(max_price) = maxima.drain(..).max() { 56 | output.session(&time).give(max_price); 57 | } 58 | } 59 | } 60 | } 61 | }) 62 | 63 | } 64 | -------------------------------------------------------------------------------- /nexmark/src/queries/q8.rs: -------------------------------------------------------------------------------- 1 | 2 | use ::timely::dataflow::{Scope, Stream}; 3 | use timely::dataflow::channels::pact::Exchange; 4 | use timely::dataflow::operators::{Map, Operator}; 5 | 6 | use {queries::NexmarkInput, queries::NexmarkTimer}; 7 | 8 | pub fn q8>(input: &NexmarkInput, nt: NexmarkTimer, scope: &mut S) -> Stream 9 | { 10 | let auctions = input.auctions(scope) 11 | .map(|a| (a.seller, a.date_time)); 12 | 13 | let people = input.auctions(scope) 14 | .map(|p| (p.id, p.date_time)); 15 | 16 | people 17 | .binary_frontier( 18 | &auctions, 19 | Exchange::new(|p: &(usize, _)| p.0 as u64), 20 | Exchange::new(|a: &(usize, _)| a.0 as u64), 21 | "Q8 join", 22 | |_capability, _info| { 23 | let window_size_ns = 12 * 60 * 60 * 1_000_000_000; 24 | let mut new_people = std::collections::HashMap::new(); 25 | let mut auctions = Vec::new(); 26 | 27 | move |input1, input2, output| { 28 | 29 | // Notice new people. 30 | input1.for_each(|_time, data| { 31 | for (person, time) in data.iter().cloned() { 32 | new_people.insert(person, time); 33 | } 34 | }); 35 | 36 | // Notice new auctions. 37 | input2.for_each(|time, data| { 38 | let mut data_vec = vec![]; 39 | data.swap(&mut data_vec); 40 | auctions.push((time.retain(), data_vec)); 41 | }); 42 | 43 | // Determine least timestamp we might still see. 44 | let complete1 = input1.frontier.frontier().get(0).cloned().unwrap_or(usize::max_value()); 45 | let complete2 = input2.frontier.frontier().get(0).cloned().unwrap_or(usize::max_value()); 46 | let complete = std::cmp::min(complete1, complete2); 47 | 48 | for (capability, auctions) in auctions.iter_mut() { 49 | if *capability.time() < complete { 50 | { 51 | let mut session = output.session(&capability); 52 | for &(person, time) in auctions.iter() { 53 | if time < nt.to_nexmark_time(complete) { 54 | if let Some(p_time) = new_people.get(&person) { 55 | if *time < **p_time + window_size_ns { 56 | session.give(person); 57 | } 58 | } 59 | } 60 | } 61 | auctions.retain(|&(_, time)| time >= nt.to_nexmark_time(complete)); 62 | } 63 | if let Some(minimum) = auctions.iter().map(|x| x.1).min() { 64 | capability.downgrade(&nt.from_nexmark_time(minimum)); 65 | } 66 | } 67 | } 68 | auctions.retain(|&(_, ref list)| !list.is_empty()); 69 | // println!("auctions.len: {:?}", auctions.len()); 70 | // for thing in auctions.iter() { 71 | // println!("\t{:?} (len: {:?}) vs {:?}", thing.0, thing.1.len(), complete); 72 | // } 73 | } 74 | }) 75 | } 76 | -------------------------------------------------------------------------------- /nexmark/src/queries/q8_flex.rs: -------------------------------------------------------------------------------- 1 | 2 | use ::std::collections::HashMap; 3 | use ::timely::dataflow::{Scope, Stream}; 4 | use timely::dataflow::operators::Map; 5 | 6 | use dynamic_scaling_mechanism::Bin; 7 | use dynamic_scaling_mechanism::operator::StatefulOperator; 8 | use ::calculate_hash; 9 | 10 | use {queries::NexmarkInput, queries::NexmarkTimer}; 11 | 12 | pub fn q8_flex>(input: &NexmarkInput, _nt: NexmarkTimer, scope: &mut S) -> Stream 13 | { 14 | let control = input.control(scope); 15 | 16 | let auctions = input.auctions(scope) 17 | .map(|a| (a.seller, a.date_time)); 18 | 19 | let people = input.auctions(scope) 20 | .map(|p| (p.id, p.date_time)); 21 | 22 | let window_size_ns = 12 * 60 * 60 * 1_000_000_000; 23 | people.stateful_binary(&control, &auctions, |(p, _d)| calculate_hash(p), |(s, _d)| calculate_hash(s), "q8-flex", |_cap, data, people_bin: &mut Bin<_, HashMap<_, _>, _>, _auctions_state: &mut Bin<_, Vec<()>, _>, _output| { 24 | // Update people state 25 | for (_time, (person, date)) in data.drain(..) { 26 | people_bin.state().entry(person as u64).or_insert(*date); 27 | } 28 | }, move |cap, data, people_bin, _auctions_bin, output| { 29 | for (_time, (seller, date)) in data.drain(..) { 30 | if let Some(p_time) = people_bin.state().get(&(seller as u64)) { 31 | if *date < *p_time + window_size_ns { 32 | output.session(cap).give(seller); 33 | } 34 | } 35 | } 36 | }) 37 | } 38 | -------------------------------------------------------------------------------- /nexmark/src/tools.rs: -------------------------------------------------------------------------------- 1 | use streaming_harness::util::ToNanos; 2 | use dynamic_scaling_mechanism::{ControlInst}; 3 | 4 | #[derive(Clone, Copy, Debug)] 5 | pub enum ParseError {} 6 | 7 | #[derive(Debug, PartialEq, Eq)] 8 | pub enum ExperimentMapMode { 9 | None, 10 | Sudden, 11 | // OneByOne, 12 | Fluid, 13 | File(String), 14 | } 15 | 16 | impl ::std::str::FromStr for ExperimentMapMode { 17 | type Err = ParseError; 18 | 19 | fn from_str(s: &str) -> Result { 20 | let map_mode = match s { 21 | "none" => ExperimentMapMode::None, 22 | "sudden" => ExperimentMapMode::Sudden, 23 | // "one-by-one" => ExperimentMapMode::OneByOne, 24 | "fluid" => ExperimentMapMode::Fluid, 25 | file_name => ExperimentMapMode::File(file_name.to_string()), 26 | }; 27 | Ok(map_mode) 28 | } 29 | } 30 | 31 | impl ExperimentMapMode { 32 | pub fn instructions(&self, peers: usize, duration_ns: u64) -> Result)>, String> { 33 | match self { 34 | ExperimentMapMode::None => { 35 | let mut map = vec![0; 1 << ::dynamic_scaling_mechanism::BIN_SHIFT]; 36 | for (i, element) in map.iter_mut().enumerate() { 37 | *element = i % peers; 38 | }; 39 | Ok(vec![(0, vec![ControlInst::Map(map)])]) 40 | } 41 | ExperimentMapMode::Sudden => { 42 | let mut map = vec![0; 1 << ::dynamic_scaling_mechanism::BIN_SHIFT]; 43 | // TODO(moritzo) HAAAACCCCKKK 44 | if peers != 2 { 45 | for (i, v) in map.iter_mut().enumerate() { 46 | *v = ((i / 2) * 2 + (i % 2) * peers / 2) % peers; 47 | } 48 | } 49 | let initial_map = map.clone(); 50 | for i in 0..map.len() { 51 | map[i] = i % peers; 52 | 53 | // if i % batches_per_migration == batches_per_migration - 1 { 54 | // eprintln!("debug: setting up reconfiguration: {:?}", map); 55 | // control_plan.push((rounds * 1_000_000_000, Control::new(control_counter, 1, ControlInst::Map(map.clone())))); 56 | // control_counter += 1; 57 | // } 58 | }; 59 | Ok(vec![(duration_ns/3, vec![ControlInst::Map(initial_map)]), (2*duration_ns/3, vec![ControlInst::Map(map)])]) 60 | }, 61 | ExperimentMapMode::Fluid => { 62 | let mut map = vec![0; 1 << ::dynamic_scaling_mechanism::BIN_SHIFT]; 63 | // TODO(moritzo) HAAAACCCCKKK 64 | if peers != 2 { 65 | for (i, v) in map.iter_mut().enumerate() { 66 | *v = ((i / 2) * 2 + (i % 2) * peers / 2) % peers; 67 | } 68 | } 69 | let initial_map = map.clone(); 70 | let mut configurations = Vec::new(); 71 | configurations.push((duration_ns / 3, vec![ControlInst::Map(initial_map)])); 72 | for i in 0..map.len() { 73 | map[i] = i % peers; 74 | configurations.push((2 * duration_ns / 3, vec![ControlInst::Move(::dynamic_scaling_mechanism::BinId::new(i), i % peers)])); 75 | }; 76 | Ok(configurations) 77 | }, 78 | ExperimentMapMode::File(migrations_file) => { 79 | let f = ::std::fs::File::open(migrations_file).map_err(|e| e.to_string())?; 80 | let file = ::std::io::BufReader::new(&f); 81 | use ::std::io::BufRead; 82 | let mut instructions = Vec::new(); 83 | for line in file.lines() { 84 | let line = line.map_err(|e| e.to_string())?; 85 | let mut parts = line.split_whitespace(); 86 | let indicator = parts.next().expect("Missing map/diff indicator"); 87 | let ts: u64 = parts.next().expect("missing time stamp").parse().expect("Failed to parse time stamp"); 88 | let instr = match indicator { 89 | "M" => (ts, vec![ControlInst::Map(parts.map(|x| x.parse().expect("Failed to parse parts")).collect())]), 90 | "D" => { 91 | let parts: Vec = parts.map(|x| x.parse().unwrap()).collect(); 92 | let inst = parts.chunks(2).map(|x| ControlInst::Move(::dynamic_scaling_mechanism::BinId::new(x[0]), x[1])).collect(); 93 | (ts, inst) 94 | }, 95 | _ => return Err("Incorrect input found in map file".to_string()), 96 | }; 97 | instructions.push(instr); 98 | } 99 | Ok(instructions) 100 | }, 101 | // _ => panic!("unsupported map mode"), 102 | } 103 | 104 | } 105 | 106 | } 107 | 108 | pub fn statm_reporter() -> ::std::sync::Arc<::std::sync::atomic::AtomicBool> { 109 | 110 | // Read and report RSS every 100ms 111 | let statm_reporter_running = ::std::sync::Arc::new(::std::sync::atomic::AtomicBool::new(true)); 112 | { 113 | let statm_reporter_running = statm_reporter_running.clone(); 114 | ::std::thread::spawn(move || { 115 | use std::io::Read; 116 | let timer = ::std::time::Instant::now(); 117 | let mut iteration = 0; 118 | while statm_reporter_running.load(::std::sync::atomic::Ordering::SeqCst) { 119 | let mut stat_s = String::new(); 120 | let mut statm_f = ::std::fs::File::open("/proc/self/statm").expect("can't open /proc/self/statm"); 121 | statm_f.read_to_string(&mut stat_s).expect("can't read /proc/self/statm"); 122 | let pages: u64 = stat_s.split_whitespace().nth(1).expect("wooo").parse().expect("not a number"); 123 | let rss = pages * 4096; 124 | 125 | let elapsed_ns = timer.elapsed().to_nanos(); 126 | println!("statm_RSS\t{}\t{}", elapsed_ns, rss); 127 | #[allow(deprecated)] 128 | ::std::thread::sleep_ms(500 - (elapsed_ns / 1_000_000 - iteration * 500) as u32); 129 | iteration += 1; 130 | } 131 | }); 132 | } 133 | statm_reporter_running 134 | } 135 | -------------------------------------------------------------------------------- /src/join.rs: -------------------------------------------------------------------------------- 1 | //! General purpose state transition operator. 2 | use std::hash::Hash; 3 | 4 | use fnv::FnvHashMap as HashMap; 5 | 6 | use timely::ExchangeData; 7 | use timely::dataflow::{Stream, Scope}; 8 | 9 | use operator::StatefulOperator; 10 | use ::Bin; 11 | 12 | fn calculate_hash(t: &T) -> u64 { 13 | use ::std::hash::Hasher; 14 | let mut h: ::fnv::FnvHasher = Default::default(); 15 | t.hash(&mut h); 16 | h.finish() 17 | } 18 | 19 | trait BinarySkeleton 20 | where 21 | S: Scope, // The containing scope 22 | S::Timestamp: ::timely::order::TotalOrder, 23 | K: ExchangeData+Hash+Eq, 24 | V: ExchangeData, // Input data 25 | { 26 | fn left_join(&mut self, other: &Stream, name: &str, control: &Stream) -> Stream 27 | where 28 | V2: ExchangeData+Eq, 29 | ; 30 | } 31 | 32 | impl BinarySkeleton for Stream 33 | where 34 | S: Scope, // The containing scope 35 | S::Timestamp: ::timely::order::TotalOrder, 36 | K: ExchangeData+Hash+Eq, 37 | V: ExchangeData+Eq, // Input data 38 | { 39 | fn left_join(&mut self, other: &Stream, name: &str, control: &Stream) -> Stream 40 | where 41 | V2: ExchangeData+Eq, 42 | { 43 | self.stateful_binary(&control, other, |t| calculate_hash(&t.0), |t| calculate_hash(&t.0), name, |cap, data, bin1: &mut Bin<_, HashMap, _>, bin2: &mut Bin<_, HashMap>, _>, output| { 44 | let mut session = output.session(&cap); 45 | let bin: &mut HashMap<_, _> = bin2.state(); 46 | for (_time, (key, value)) in data { 47 | if let Some(mut d2) = bin.remove(&key) { 48 | session.give_iterator(d2.drain(..).map(|d| (key.clone(), value.clone(), d))); 49 | } 50 | bin1.state().insert(key.clone(), value.clone()); 51 | }; 52 | }, |cap, data, bin1, bin2, output| { 53 | let mut session = output.session(&cap); 54 | let state1 = bin1.state(); 55 | let state2 = bin2.state(); 56 | for (_time, (key, value)) in data { 57 | if let Some(d1) = state1.get(&key) { 58 | session.give((key.clone(), d1.clone(), value.clone())); 59 | } else { 60 | state2.entry(key.clone()).or_insert_with(Vec::new).push(value.clone()); 61 | } 62 | }; 63 | }) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![deny(missing_docs)] 2 | //! Megaphone is a library to provide migratable operators for timely dataflow. 3 | 4 | extern crate fnv; 5 | extern crate timely; 6 | extern crate abomonation; 7 | #[macro_use] extern crate abomonation_derive; 8 | 9 | mod stateful; 10 | pub mod state_machine; 11 | pub mod join; 12 | pub mod notificator; 13 | pub mod operator; 14 | 15 | use timely::order::{PartialOrder, TotalOrder}; 16 | use timely::progress::frontier::Antichain; 17 | use timely::progress::Timestamp; 18 | 19 | /// A control message consisting of a sequence number, a total count of messages to be expected 20 | /// and an instruction. 21 | #[derive(Abomonation, Clone, Debug)] 22 | pub struct Control { 23 | sequence: u64, 24 | count: usize, 25 | 26 | inst: ControlInst, 27 | } 28 | 29 | /// A bin identifier. Wraps a `usize`. 30 | #[derive(Abomonation, Clone, Copy, Debug, Ord, PartialOrd, Eq, PartialEq)] 31 | pub struct BinId(usize); 32 | 33 | impl BinId { 34 | /// Construct a new `BinId` from a `usize`. 35 | pub fn new(bin: usize) -> Self { 36 | BinId(bin) 37 | } 38 | } 39 | 40 | type KeyType = u64; 41 | 42 | /// Opaque key identifier. 43 | #[derive(Abomonation, Clone, Copy, Debug, Ord, PartialOrd, Eq, PartialEq)] 44 | pub struct Key(KeyType); 45 | 46 | impl Key { 47 | /// Calculate the bin id for this key. 48 | /// 49 | /// Warning: will go away once `BIN_SHIFT` ceases to exist. 50 | pub fn bin(self) -> usize { 51 | key_to_bin(self) 52 | } 53 | } 54 | 55 | /// Compute the bin for a key. 56 | /// 57 | /// Warning: will go away once `BIN_SHIFT` ceases to exist. 58 | #[inline(always)] 59 | pub fn key_to_bin(key: Key) -> usize { 60 | (key.0 >> (::std::mem::size_of::() * 8 - BIN_SHIFT)) as usize 61 | } 62 | 63 | impl ::std::ops::Deref for BinId { 64 | type Target = usize; 65 | fn deref(&self) -> &Self::Target { 66 | &self.0 67 | } 68 | } 69 | 70 | impl ::std::ops::Deref for Key { 71 | type Target = u64; 72 | fn deref(&self) -> &Self::Target { 73 | &self.0 74 | } 75 | } 76 | 77 | /// A control instruction 78 | #[derive(Abomonation, Clone, Debug)] 79 | pub enum ControlInst { 80 | /// Provide a new map 81 | Map(Vec), 82 | /// Provide a map update 83 | Move(BinId, /*worker*/ usize), 84 | /// No-op 85 | None, 86 | } 87 | 88 | impl Control { 89 | /// Construct a new `Control` 90 | pub fn new(sequence: u64, count: usize, inst: ControlInst) -> Self { 91 | Self { sequence, count, inst } 92 | } 93 | } 94 | 95 | /// A compiled set of control instructions 96 | #[derive(Debug)] 97 | pub struct ControlSet { 98 | /// Its sequence number 99 | pub sequence: u64, 100 | /// The frontier at which to apply the instructions 101 | pub frontier: Antichain, 102 | /// Explicit mapping of bins to workers 103 | pub map: Vec, 104 | } 105 | 106 | impl ControlSet { 107 | 108 | /// Obtain the current bin to destination mapping 109 | pub fn map(&self) -> &Vec { 110 | &self.map 111 | } 112 | 113 | } 114 | 115 | /// A builder to compile `ControlSet`s. 116 | #[derive(Default)] 117 | pub struct ControlSetBuilder { 118 | sequence: Option, 119 | frontier: Vec, 120 | instructions: Vec, 121 | 122 | count: Option, 123 | } 124 | 125 | impl ControlSetBuilder { 126 | 127 | /// Add a new `Control` to this builder. 128 | /// 129 | /// TODO: Currently, it will assert if count and sequence numbers do not match. Should fail 130 | /// gracefully instead. 131 | pub fn apply(&mut self, control: Control) { 132 | if self.count.is_none() { 133 | self.count = Some(control.count); 134 | } 135 | if let Some(ref mut count) = self.count { 136 | assert!(*count > 0, "Received incorrect number of Controls"); 137 | *count -= 1; 138 | } 139 | if let Some(sequence) = self.sequence { 140 | assert_eq!(sequence, control.sequence, "Received control with inconsistent sequence number"); 141 | } else { 142 | self.sequence = Some(control.sequence); 143 | } 144 | match control.inst { 145 | ControlInst::None => {}, 146 | inst => self.instructions.push(inst), 147 | }; 148 | 149 | } 150 | 151 | /// Provide a frontier to be used to construct the configuration's `Antichain` from. 152 | pub fn frontier>(&mut self, caps: I) { 153 | self.frontier.extend(caps); 154 | } 155 | 156 | /// Build a `ControlSet` by consuming this builder. 157 | pub fn build(self, previous: &ControlSet) -> ControlSet { 158 | assert_eq!(0, self.count.unwrap_or(0)); 159 | let mut frontier = Antichain::new(); 160 | for f in self.frontier {frontier.insert(f);} 161 | 162 | let mut map = previous.map().clone(); 163 | 164 | for inst in self.instructions { 165 | match inst { 166 | ControlInst::Map(ref new_map) => { 167 | assert_eq!(1 << BIN_SHIFT, new_map.len(), "provided map does not have correct len: {} != {}", 1 << BIN_SHIFT, new_map.len()); 168 | map.clear(); 169 | map.extend( new_map.iter()); 170 | }, 171 | ControlInst::Move(BinId(bin), target) => { 172 | assert!(bin < (1 << BIN_SHIFT)); 173 | map[bin] = target 174 | }, 175 | ControlInst::None => {}, 176 | } 177 | } 178 | 179 | ControlSet { 180 | sequence: self.sequence.unwrap(), 181 | frontier, 182 | map, 183 | } 184 | } 185 | } 186 | 187 | /// State abstraction. It encapsulates state assorted by bins and a notificator. 188 | pub struct State 189 | where 190 | T: Timestamp + TotalOrder, 191 | { 192 | bins: Vec>>, 193 | } 194 | 195 | impl State 196 | where 197 | T: Timestamp + TotalOrder, 198 | { 199 | /// Construct a new `State` with the provided vector of bins and a default `FrontierNotificator`. 200 | fn new(bins: Vec>>) -> Self { 201 | Self { bins } 202 | } 203 | 204 | /// Get the state associated with a key from this bin. Asserts if the state is not available. 205 | pub fn get(&mut self, key: Key) -> &mut Bin { 206 | assert!(self.bins[key_to_bin(key)].is_some(), "Accessing bin {} for key {:?}", key_to_bin(key), key); 207 | self.bins[key_to_bin(key)].as_mut().expect("Trying to access non-available bin") 208 | } 209 | 210 | /// Iterate all bins. This might go away. 211 | pub fn scan(&mut self, mut f: F) { 212 | for state in &mut self.bins { 213 | if let Some(bin) = state.as_mut() { 214 | f(&mut bin.data); 215 | } 216 | } 217 | } 218 | 219 | } 220 | 221 | /// A bin with data and a notificator. 222 | pub struct Bin 223 | where 224 | T: Timestamp + TotalOrder, 225 | { 226 | data: D, 227 | notificator: ::stateful::Notificator, 228 | } 229 | 230 | impl Bin 231 | where 232 | T: Timestamp + TotalOrder, 233 | { 234 | /// Obtain a mutable reference to the associated state object. 235 | pub fn state(&mut self) -> &mut D { 236 | &mut self.data 237 | } 238 | 239 | /// Obtain a mutable reference to the notificator. 240 | pub fn notificator(&mut self) -> &mut ::stateful::Notificator { 241 | &mut self.notificator 242 | } 243 | } 244 | 245 | impl Default for Bin 246 | where 247 | T: Timestamp + TotalOrder, 248 | D: Default, 249 | { 250 | /// Creates an empty `HashMap`, with the `Default` value for the hasher. 251 | fn default() -> Self { 252 | Self { 253 | data: Default::default(), 254 | notificator: ::stateful::Notificator::new(), 255 | } 256 | } 257 | } 258 | /// Static bin-shift parameter. Enable feature "bin-1" with no default features to set this value. 259 | #[cfg(feature = "bin-1")] 260 | pub const BIN_SHIFT: usize = 1; 261 | 262 | /// Static bin-shift parameter. Enable feature "bin-2" with no default features to set this value. 263 | #[cfg(feature = "bin-2")] 264 | pub const BIN_SHIFT: usize = 2; 265 | 266 | /// Static bin-shift parameter. Enable feature "bin-3" with no default features to set this value. 267 | #[cfg(feature = "bin-3")] 268 | pub const BIN_SHIFT: usize = 3; 269 | 270 | /// Static bin-shift parameter. Enable feature "bin-4" with no default features to set this value. 271 | #[cfg(feature = "bin-4")] 272 | pub const BIN_SHIFT: usize = 4; 273 | 274 | /// Static bin-shift parameter. Enable feature "bin-5" with no default features to set this value. 275 | #[cfg(feature = "bin-5")] 276 | pub const BIN_SHIFT: usize = 5; 277 | 278 | /// Static bin-shift parameter. Enable feature "bin-6" with no default features to set this value. 279 | #[cfg(feature = "bin-6")] 280 | pub const BIN_SHIFT: usize = 6; 281 | 282 | /// Static bin-shift parameter. Enable feature "bin-7" with no default features to set this value. 283 | #[cfg(feature = "bin-7")] 284 | pub const BIN_SHIFT: usize = 7; 285 | 286 | /// Static bin-shift parameter. Enable feature "bin-8" with no default features to set this value. 287 | #[cfg(feature = "bin-8")] 288 | pub const BIN_SHIFT: usize = 8; 289 | 290 | /// Static bin-shift parameter. Enable feature "bin-9" with no default features to set this value. 291 | #[cfg(feature = "bin-9")] 292 | pub const BIN_SHIFT: usize = 9; 293 | 294 | /// Static bin-shift parameter. Enable feature "bin-10" with no default features to set this value. 295 | #[cfg(feature = "bin-10")] 296 | pub const BIN_SHIFT: usize = 10; 297 | 298 | /// Static bin-shift parameter. Enable feature "bin-11" with no default features to set this value. 299 | #[cfg(feature = "bin-11")] 300 | pub const BIN_SHIFT: usize = 11; 301 | 302 | /// Static bin-shift parameter. Enable feature "bin-12" with no default features to set this value. 303 | #[cfg(feature = "bin-12")] 304 | pub const BIN_SHIFT: usize = 12; 305 | 306 | /// Static bin-shift parameter. Enable feature "bin-13" with no default features to set this value. 307 | #[cfg(feature = "bin-13")] 308 | pub const BIN_SHIFT: usize = 13; 309 | 310 | /// Static bin-shift parameter. Enable feature "bin-14" with no default features to set this value. 311 | #[cfg(feature = "bin-14")] 312 | pub const BIN_SHIFT: usize = 14; 313 | 314 | /// Static bin-shift parameter. Enable feature "bin-15" with no default features to set this value. 315 | #[cfg(feature = "bin-15")] 316 | pub const BIN_SHIFT: usize = 15; 317 | 318 | /// Static bin-shift parameter. Enable feature "bin-16" with no default features to set this value. 319 | #[cfg(feature = "bin-16")] 320 | pub const BIN_SHIFT: usize = 16; 321 | 322 | /// Static bin-shift parameter. Enable feature "bin-17" with no default features to set this value. 323 | #[cfg(feature = "bin-17")] 324 | pub const BIN_SHIFT: usize = 17; 325 | 326 | /// Static bin-shift parameter. Enable feature "bin-18" with no default features to set this value. 327 | #[cfg(feature = "bin-18")] 328 | pub const BIN_SHIFT: usize = 18; 329 | 330 | /// Static bin-shift parameter. Enable feature "bin-19" with no default features to set this value. 331 | #[cfg(feature = "bin-19")] 332 | pub const BIN_SHIFT: usize = 19; 333 | 334 | #[cfg(feature = "bin-20")] 335 | /// Static bin-shift parameter. Enable feature "bin-20" with no default features to set this value. 336 | pub const BIN_SHIFT: usize = 20; 337 | -------------------------------------------------------------------------------- /src/notificator.rs: -------------------------------------------------------------------------------- 1 | //! Specialized notificators for Megaphone. 2 | 3 | use std::collections::BinaryHeap; 4 | 5 | use timely::order::TotalOrder; 6 | use timely::progress::frontier::MutableAntichain; 7 | use timely::progress::Timestamp; 8 | use timely::dataflow::operators::Capability; 9 | 10 | /// Common trait to all notificator implementations. 11 | /// 12 | /// Currently only prvides `drain`. TODO: Allow to schedule notifications. 13 | pub trait Notify { 14 | /// Drain all pending notifications that are not in advance of `frontiers`. 15 | /// 16 | /// If drain returns `Some(cap)` this indicates that notifications were enqueud to `buffer`. 17 | /// The buffer may be cleared by `drain`. 18 | fn drain(&mut self, frontiers: &[&MutableAntichain], buffer: &mut Vec<(T, D)>) -> Option>; 19 | } 20 | 21 | /// Tracks requests for notification and delivers available notifications. 22 | /// 23 | /// `TotalOrderFrontierNotificator` is meant to manage the delivery of requested notifications in the 24 | /// presence of inputs that may have outstanding messages to deliver. 25 | /// The notificator inspects the frontiers, as presented from the outside, for each input. 26 | /// Requested notifications can be served only once there are no frontier elements less-or-equal 27 | /// to them, and there are no other pending notification requests less than them. Each will be 28 | /// less-or-equal to itself, so we want to dodge that corner case. 29 | /// 30 | /// The `TotalOrderFrontierNotificator` is specialized for totally-ordered time domains. It will 31 | /// only keep a single minimum-element capability around and does not retain pending capabilities 32 | /// for each outstanding notification. 33 | /// 34 | /// # Examples 35 | /// ``` 36 | /// extern crate timely; 37 | /// extern crate dynamic_scaling_mechanism; 38 | /// use std::collections::HashMap; 39 | /// use timely::dataflow::operators::{Input, Inspect}; 40 | /// use timely::dataflow::operators::generic::operator::Operator; 41 | /// use timely::dataflow::channels::pact::Pipeline; 42 | /// use ::dynamic_scaling_mechanism::notificator::{Notify, TotalOrderFrontierNotificator}; 43 | /// 44 | /// timely::execute(timely::Configuration::Thread, |worker| { 45 | /// let (mut in1, mut in2) = worker.dataflow::(|scope| { 46 | /// let (in1_handle, in1) = scope.new_input(); 47 | /// let (in2_handle, in2) = scope.new_input(); 48 | /// in1.binary_frontier(&in2, Pipeline, Pipeline, "example", |mut _default_cap, _info| { 49 | /// let mut notificator = TotalOrderFrontierNotificator::new(); 50 | /// let mut notificator_buffer = Vec::new(); 51 | /// let mut input_buffer: Vec = Vec::new(); 52 | /// let mut input2_buffer: Vec = Vec::new(); 53 | /// move |input1, input2, output| { 54 | /// while let Some((time, data)) = input1.next() { 55 | /// let cap = time.retain(); 56 | /// data.swap(&mut input_buffer); 57 | /// for d in input_buffer.drain(..) { 58 | /// notificator.notify_at_data(&cap, *cap.time(), d); 59 | /// } 60 | /// } 61 | /// while let Some((time, data)) = input2.next() { 62 | /// let cap = time.retain(); 63 | /// data.swap(&mut input2_buffer); 64 | /// for d in input2_buffer.drain(..) { 65 | /// notificator.notify_at_data(&cap, *cap.time(), d); 66 | /// } 67 | /// } 68 | /// if let Some(cap) = notificator.drain(&[input1.frontier(), input2.frontier()], &mut notificator_buffer) { 69 | /// for (time, data) in notificator_buffer.drain(..) { 70 | /// output.session(&cap).give(data); 71 | /// } 72 | /// } 73 | /// } 74 | /// }).inspect_batch(|t, x| println!("{:?} -> {:?}", t, x)); 75 | /// 76 | /// (in1_handle, in2_handle) 77 | /// }); 78 | /// 79 | /// for i in 1..10 { 80 | /// in1.send(i - 1); 81 | /// in1.advance_to(i); 82 | /// in2.send(i - 1); 83 | /// in2.advance_to(i); 84 | /// } 85 | /// in1.close(); 86 | /// in2.close(); 87 | /// }).unwrap(); 88 | /// ``` 89 | #[derive(Default)] 90 | pub struct TotalOrderFrontierNotificator { 91 | capability: Option>, 92 | pending: BinaryHeap>, 93 | } 94 | 95 | impl TotalOrderFrontierNotificator { 96 | 97 | /// Allocates a new `TotalOrderFrontierNotificator` with initial capabilities. 98 | pub fn from>>(iter: I) -> Self { 99 | let pending: Vec<_> = iter.into_iter().collect(); 100 | let capability = pending.iter().min_by_key(|x| x.time()).cloned(); 101 | Self { 102 | capability, 103 | pending: pending.into_iter().map(|x| OrderReversed{ element: x.time().clone(), data: ()}).collect(), 104 | } 105 | } 106 | 107 | /// Requests a notification at the time associated with capability `cap`. Takes ownership of 108 | /// the capability. 109 | /// 110 | /// In order to request a notification at future timestamp, obtain a capability for the new 111 | /// timestamp first, as shown in the example. 112 | /// 113 | /// #Examples 114 | /// ``` 115 | /// extern crate timely; 116 | /// extern crate dynamic_scaling_mechanism; 117 | /// use timely::dataflow::operators::ToStream; 118 | /// use timely::dataflow::operators::generic::operator::Operator; 119 | /// use timely::dataflow::channels::pact::Pipeline; 120 | /// use dynamic_scaling_mechanism::notificator::TotalOrderFrontierNotificator; 121 | /// 122 | /// timely::example(|scope| { 123 | /// (0..10).to_stream(scope) 124 | /// .unary_frontier(Pipeline, "example", |_, _| { 125 | /// let mut notificator = TotalOrderFrontierNotificator::new(); 126 | /// let mut buffer = Vec::new(); 127 | /// move |input, output| { 128 | /// input.for_each(|cap, data| { 129 | /// data.swap(&mut buffer); 130 | /// output.session(&cap).give_iterator(buffer.drain(..)); 131 | /// let mut time = cap.time().clone(); 132 | /// time += 1; 133 | /// notificator.notify_at(&cap.delayed(&time)); 134 | /// }); 135 | /// notificator.for_each(&[input.frontier()], |cap, time, _| { 136 | /// println!("done with time: {:?}", time); 137 | /// }); 138 | /// } 139 | /// }); 140 | /// }); 141 | /// ``` 142 | #[inline] 143 | pub fn notify_at(&mut self, cap: &Capability) { 144 | self.notify_at_data(cap, cap.time().clone(), ()); 145 | } 146 | 147 | /// Repeatedly calls `logic` till exhaustion of the notifications made available by inspecting 148 | /// the frontiers. 149 | /// 150 | /// `logic` receives a capability for `t`, the timestamp being notified. 151 | #[inline] 152 | pub fn for_each<'a, F: FnMut(&Capability, T, &mut Self)>(&mut self, frontiers: &'a [&'a MutableAntichain], mut logic: F) { 153 | let mut vec = Vec::new(); 154 | if let Some(cap) = self.drain(frontiers, &mut vec) { 155 | for (time, _data) in vec { 156 | logic(&cap, time, self) 157 | } 158 | } 159 | } 160 | } 161 | 162 | impl TotalOrderFrontierNotificator { 163 | /// Allocates a new `TotalOrderFrontierNotificator`. 164 | pub fn new() -> Self { 165 | Self { 166 | capability: None, 167 | pending: Default::default(), 168 | // available: ::std::collections::BinaryHeap::new(), 169 | } 170 | } 171 | 172 | /// Requests a notification at the time associated with capability `cap`. Takes ownership of 173 | /// the capability. 174 | /// 175 | /// In order to request a notification at future timestamp, obtain a capability for the new 176 | /// timestamp first, as shown in the example. 177 | /// 178 | /// #Examples 179 | /// ``` 180 | /// extern crate timely; 181 | /// extern crate dynamic_scaling_mechanism; 182 | /// use timely::dataflow::operators::ToStream; 183 | /// use timely::dataflow::operators::generic::operator::Operator; 184 | /// use timely::dataflow::channels::pact::Pipeline; 185 | /// use dynamic_scaling_mechanism::notificator::TotalOrderFrontierNotificator; 186 | /// 187 | /// timely::example(|scope| { 188 | /// (0..10).to_stream(scope) 189 | /// .unary_frontier(Pipeline, "example", |_, _| { 190 | /// let mut notificator = TotalOrderFrontierNotificator::new(); 191 | /// let mut buffer = Vec::new(); 192 | /// move |input, output| { 193 | /// input.for_each(|cap, data| { 194 | /// data.swap(&mut buffer); 195 | /// output.session(&cap).give_iterator(buffer.drain(..)); 196 | /// let mut time = cap.time().clone(); 197 | /// time += 1; 198 | /// notificator.notify_at_data(&cap.retain(), time, ()); 199 | /// }); 200 | /// notificator.for_each_data(&[input.frontier()], |cap, time, data, _| { 201 | /// println!("done with time: {:?} at: {:?}", cap.time(), data); 202 | /// }); 203 | /// } 204 | /// }); 205 | /// }); 206 | /// ``` 207 | #[inline] 208 | pub fn notify_at_data(&mut self, cap: &Capability, time: T, data: D) { 209 | assert!(cap.time().less_equal(&time), "provided capability must be <= notification time, found {:?} and {:?}", cap.time(), time); 210 | self.pending.push(OrderReversed { element: time, data}); 211 | if self.capability.as_ref().map_or(true, |c| c.time() > cap.time()) { 212 | self.capability = Some(cap.clone()) 213 | } 214 | } 215 | 216 | /// Repeatedly calls `logic` till exhaustion of the notifications made available by inspecting 217 | /// the frontiers. 218 | /// 219 | /// `logic` receives a capability for `t`, the timestamp being notified. 220 | #[inline] 221 | pub fn for_each_data<'a, F: FnMut(&Capability, T, D, &mut Self)>(&mut self, frontiers: &'a [&'a MutableAntichain], mut logic: F) { 222 | let mut vec = Vec::new(); 223 | if let Some(cap) = self.drain(frontiers, &mut vec) { 224 | for (time, data) in vec { 225 | logic(&cap, time, data, self); 226 | } 227 | } 228 | } 229 | 230 | /// Descructures the notificator to obtain pending `(time, data)` pairs. 231 | pub fn pending(self) -> impl Iterator { 232 | self.pending.into_iter().map(|e| (e.element, e.data)) 233 | } 234 | } 235 | 236 | impl Notify for TotalOrderFrontierNotificator { 237 | 238 | #[inline] 239 | fn drain(&mut self, frontiers: &[&MutableAntichain], buffer: &mut Vec<(T, D)>) -> Option> { 240 | // By invariant, nothing in self.available is greater_equal anything in self.pending. 241 | // It should be safe to append any ordered subset of self.pending to self.available, 242 | // in that the sequence of capabilities in self.available will remain non-decreasing. 243 | 244 | let mut result = None; 245 | if !self.pending.is_empty() { 246 | buffer.clear(); 247 | while self.pending.peek().map_or(false, |or| frontiers.iter().all(|f| !f.less_equal(&or.element))) { 248 | let min = self.pending.pop().unwrap(); 249 | buffer.push((min.element, min.data)); 250 | } 251 | if !buffer.is_empty() { 252 | result = Some(self.capability.as_ref().unwrap().clone()); 253 | } 254 | 255 | if let Some(cap) = self.capability.as_mut() { 256 | if let Some(pending) = self.pending.peek() { 257 | if cap.time().less_than(&pending.element) { 258 | cap.downgrade(&pending.element); 259 | } 260 | } 261 | } 262 | } 263 | if self.pending.is_empty() { 264 | self.capability.take(); 265 | } 266 | 267 | if frontiers.iter().all(|f| f.is_empty()) { 268 | self.capability.take(); 269 | } 270 | result 271 | } 272 | } 273 | 274 | struct OrderReversed { 275 | pub element: T, 276 | pub data: D, 277 | } 278 | 279 | impl PartialOrd for OrderReversed { 280 | fn partial_cmp(&self, other: &Self) -> Option<::std::cmp::Ordering> { 281 | other.element.partial_cmp(&self.element) 282 | } 283 | } 284 | impl Ord for OrderReversed { 285 | fn cmp(&self, other: &Self) -> ::std::cmp::Ordering { 286 | other.element.cmp(&self.element) 287 | } 288 | } 289 | impl PartialEq for OrderReversed { 290 | fn eq(&self, other: &Self) -> bool { 291 | other.element.eq(&self.element) 292 | } 293 | } 294 | impl Eq for OrderReversed {} 295 | -------------------------------------------------------------------------------- /src/state_machine.rs: -------------------------------------------------------------------------------- 1 | //! General purpose state transition operator, implemented with Megaphone. 2 | use std::hash::Hash; 3 | 4 | use fnv::FnvHashMap as HashMap; 5 | 6 | use timely::ExchangeData; 7 | use timely::dataflow::{Stream, Scope}; 8 | use timely::Data; 9 | 10 | use operator::StatefulOperator; 11 | use ::Control; 12 | 13 | /// Provide a general-purpose state machine operator that can be migrated without changes to the 14 | /// `fold` implementation. 15 | pub trait BinnedStateMachine 16 | where 17 | S: Scope, 18 | S::Timestamp: ::timely::order::TotalOrder, 19 | K: ExchangeData+Hash+Eq, 20 | V: ExchangeData+Eq, 21 | D: ExchangeData + Default + 'static, 22 | { 23 | /// Tracks a state for each presented key, using user-supplied state transition logic. 24 | /// 25 | /// The transition logic `fold` may mutate the state, and produce both output records and 26 | /// a `bool` indicating that it is appropriate to deregister the state, cleaning up once 27 | /// the state is no longer helpful. 28 | /// 29 | /// #Examples 30 | /// ``` 31 | /// ``` 32 | fn stateful_state_machine< 33 | R: Data, // output type 34 | I: IntoIterator, // type of output iterator 35 | F: Fn(&K, V, &mut D)->(bool, I)+'static, // state update logic 36 | H: Fn(&K)->u64+'static, // "hash" function for keys 37 | >(&self, fold: F, hash: H, control: &Stream) -> Stream where S::Timestamp : Hash+Eq; 38 | } 39 | 40 | impl BinnedStateMachine for Stream 41 | where 42 | S: Scope, 43 | S::Timestamp: ::timely::order::TotalOrder, 44 | K: ExchangeData+Hash+Eq, 45 | V: ExchangeData+Eq, 46 | D: ExchangeData + Default + 'static, 47 | { 48 | fn stateful_state_machine< 49 | R: Data, // output type 50 | I: IntoIterator, // type of output iterator 51 | F: Fn(&K, V, &mut D) -> (bool, I) + 'static, // state update logic 52 | H: Fn(&K)->u64+'static, // "hash" function for keys 53 | >(&self, fold: F, hash: H, control: &Stream) -> Stream where S::Timestamp : Hash+Eq { 54 | 55 | self.stateful_unary(control, move |(k, _v)| hash(&k), "StateMachine", move |cap, iter, bin, output| { 56 | let mut session = output.session(&cap); 57 | let states: &mut HashMap<_, _> = bin.state(); 58 | for (_time, (key, val)) in iter.drain(..) { 59 | let (remove, output) = { 60 | if !states.contains_key(&key) { 61 | states.insert(key.clone(), Default::default()); 62 | } 63 | let state = states.get_mut(&key).unwrap(); 64 | fold(&key, val.clone(), state) 65 | }; 66 | if remove { states.remove(&key); } 67 | session.give_iterator(output.into_iter()); 68 | } 69 | }) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /tests/distribution_test.rs: -------------------------------------------------------------------------------- 1 | 2 | extern crate timely; 3 | extern crate dynamic_scaling_mechanism; 4 | 5 | use timely::dataflow::*; 6 | use timely::dataflow::operators::{Input, Probe, Map, Inspect}; 7 | 8 | use timely::Configuration; 9 | 10 | use dynamic_scaling_mechanism::{BIN_SHIFT, ControlInst, Control}; 11 | use dynamic_scaling_mechanism::state_machine::BinnedStateMachine; 12 | 13 | #[test] 14 | fn default_configuration() { 15 | timely::execute(Configuration::Process(2), |worker| { 16 | 17 | // these results happen to be right, but aren't guaranteed. 18 | // the system is at liberty to re-order within a timestamp. 19 | let mut result = vec![(0, 0), (0, 2), (0, 6), (0, 12), (0, 20), 20 | (1, 1), (1, 4), (1, 9), (1, 16), (1, 25)]; 21 | 22 | let index = worker.index(); 23 | let mut input = InputHandle::new(); 24 | let mut control_input = InputHandle::new(); 25 | let mut probe = ProbeHandle::new(); 26 | 27 | worker.dataflow(|scope| { 28 | let control = scope.input_from(&mut control_input); 29 | let input = scope.input_from(&mut input); 30 | input 31 | .map(|x| (x % 2, x)) 32 | .stateful_state_machine( 33 | |_key, val, agg| { 34 | *agg += val; 35 | (false, Some((*_key, *agg))) 36 | }, 37 | |key| *key as u64 38 | , 39 | &control 40 | ) 41 | .inspect(move |x| { 42 | assert!(result.contains(x)); 43 | result.retain(|e| e != x); 44 | }) 45 | .probe_with(&mut probe); 46 | }); 47 | 48 | control_input.advance_to(10); 49 | // introduce data and watch! 50 | for round in 0..10 { 51 | if index == 0 { 52 | input.send(round); 53 | } 54 | input.advance_to(round + 1); 55 | while probe.less_than(input.time()) { 56 | worker.step(); 57 | } 58 | } 59 | 60 | }).unwrap(); 61 | } 62 | 63 | #[test] 64 | fn custom_configuration() { 65 | timely::execute(Configuration::Process(2), |worker| { 66 | 67 | // these results happen to be right, but aren't guaranteed. 68 | // the system is at liberty to re-order within a timestamp. 69 | let mut result = vec![(0, 0), (0, 2), (0, 6), (0, 12), (0, 20), 70 | (1, 1), (1, 4), (1, 9), (1, 16), (1, 25)]; 71 | 72 | let index = worker.index(); 73 | let mut input = InputHandle::new(); 74 | let mut control_input = InputHandle::new(); 75 | let mut probe = ProbeHandle::new(); 76 | 77 | worker.dataflow(|scope| { 78 | let control = scope.input_from(&mut control_input); 79 | let input = scope.input_from(&mut input); 80 | input 81 | .map(|x| (x % 2, x)) 82 | .stateful_state_machine( 83 | |_key, val, agg| { 84 | *agg += val; 85 | (false, Some((*_key, *agg))) 86 | }, 87 | |key| *key as u64 88 | , 89 | &control 90 | ) 91 | .inspect(move |x| { 92 | assert!(result.contains(x), "Got {:?}, expected one of {:?}", x, result); 93 | result.retain(|e| e != x); 94 | }) 95 | .probe_with(&mut probe); 96 | }); 97 | 98 | control_input.send(Control::new(0, 1, ControlInst::Map(vec![0; 1 << BIN_SHIFT]))); 99 | control_input.advance_to(5); 100 | control_input.send(Control::new(1, 1, ControlInst::Map(vec![1; 1 << BIN_SHIFT]))); 101 | control_input.advance_to(10); 102 | // introduce data and watch! 103 | for round in 0..10 { 104 | if index == 0 { 105 | input.send(round); 106 | } 107 | input.advance_to(round + 1); 108 | while probe.less_than(input.time()) { 109 | worker.step(); 110 | } 111 | } 112 | 113 | }).unwrap(); 114 | } 115 | 116 | #[test] 117 | fn adjacent_configuration() { 118 | timely::execute(Configuration::Process(2), |worker| { 119 | 120 | // these results happen to be right, but aren't guaranteed. 121 | // the system is at liberty to re-order within a timestamp. 122 | let mut result = vec![(0, 0), (0, 2), (0, 6), (0, 12), (0, 20), 123 | (1, 1), (1, 4), (1, 9), (1, 16), (1, 25)]; 124 | 125 | let index = worker.index(); 126 | let mut input = InputHandle::new(); 127 | let mut control_input = InputHandle::new(); 128 | let mut probe = ProbeHandle::new(); 129 | 130 | worker.dataflow(|scope| { 131 | let control = scope.input_from(&mut control_input); 132 | let input = scope.input_from(&mut input); 133 | input 134 | .map(|x| (x % 2, x)) 135 | .stateful_state_machine( 136 | |_key, val, agg| { 137 | *agg += val; 138 | (false, Some((*_key, *agg))) 139 | }, 140 | |key| *key as u64 141 | , 142 | &control 143 | ) 144 | .inspect(move |x| { 145 | assert!(result.contains(x)); 146 | result.retain(|e| e != x); 147 | }) 148 | .probe_with(&mut probe); 149 | }); 150 | 151 | control_input.send(Control::new(0, 1, ControlInst::Map(vec![0; 1 << BIN_SHIFT]))); 152 | control_input.advance_to(1); 153 | control_input.send(Control::new(1, 1, ControlInst::Map(vec![1; 1 << BIN_SHIFT]))); 154 | control_input.advance_to(10); 155 | // introduce data and watch! 156 | for round in 0..10 { 157 | if index == 0 { 158 | input.send(round); 159 | } 160 | input.advance_to(round + 1); 161 | while probe.less_than(input.time()) { 162 | worker.step(); 163 | } 164 | } 165 | 166 | }).unwrap(); 167 | } 168 | 169 | 170 | #[test] 171 | #[should_panic] 172 | fn error_seq_configuration() { 173 | timely::execute(Configuration::Process(2), |worker| { 174 | 175 | // these results happen to be right, but aren't guaranteed. 176 | // the system is at liberty to re-order within a timestamp. 177 | let mut result = vec![(0, 0), (0, 2), (0, 6), (0, 12), (0, 20), 178 | (1, 1), (1, 4), (1, 9), (1, 16), (1, 25)]; 179 | 180 | let index = worker.index(); 181 | let mut input = InputHandle::new(); 182 | let mut control_input = InputHandle::new(); 183 | let mut probe = ProbeHandle::new(); 184 | 185 | worker.dataflow(|scope| { 186 | let control = scope.input_from(&mut control_input); 187 | let input = scope.input_from(&mut input); 188 | input 189 | .map(|x| (x % 2, x)) 190 | .stateful_state_machine( 191 | |_key, val, agg| { 192 | *agg += val; 193 | (false, Some((*_key, *agg))) 194 | }, 195 | |key| *key as u64 196 | , 197 | &control 198 | ) 199 | .inspect(move |x| { 200 | assert!(result.contains(x)); 201 | result.retain(|e| e != x); 202 | }) 203 | .probe_with(&mut probe); 204 | }); 205 | 206 | control_input.advance_to(3); 207 | control_input.send(Control::new(10, 1, ControlInst::Map(vec![0; 1 << BIN_SHIFT]))); 208 | // worker.step(); 209 | // control_input.advance_to(4); 210 | control_input.send(Control::new(9, 1, ControlInst::Map(vec![1; 1 << BIN_SHIFT]))); 211 | worker.step(); 212 | control_input.advance_to(10); 213 | // introduce data and watch! 214 | for round in 0..10 { 215 | if index == 0 { 216 | input.send(round); 217 | } 218 | input.advance_to(round + 1); 219 | while probe.less_than(input.time()) { 220 | worker.step(); 221 | } 222 | } 223 | 224 | }).unwrap(); 225 | } 226 | --------------------------------------------------------------------------------