├── .gitignore ├── EXPERIMENTS.md ├── LICENSE ├── README.md ├── c ├── Cargo.toml ├── annotate │ ├── Cargo.toml │ └── src │ │ ├── main.rs │ │ ├── parser │ │ ├── annotation.rs │ │ ├── cdecl.rs │ │ └── mod.rs │ │ ├── resources │ │ ├── annotation.c.template │ │ ├── argstruct.c.template │ │ ├── callback.c.template │ │ ├── file.c.template │ │ └── function.c.template │ │ └── writer.rs ├── benchmarks │ ├── blackscholes │ │ ├── Makefile │ │ ├── benchmark-breakdown.sh │ │ ├── benchmark-pieces.sh │ │ ├── benchmark.sh │ │ └── blackscholes.c │ ├── breakdown.py │ ├── get-data.sh │ ├── gotham │ │ ├── Makefile │ │ ├── benchmark.sh │ │ ├── gotham.c │ │ ├── gotham.h │ │ ├── gotham_composer.c │ │ └── gotham_composer.h │ ├── haversine │ │ ├── Makefile │ │ ├── benchmark-pieces.sh │ │ ├── benchmark.sh │ │ └── haversine.c │ ├── nashville │ │ ├── Makefile │ │ ├── benchmark-breakdown.sh │ │ ├── benchmark-pieces.sh │ │ ├── benchmark.sh │ │ ├── nashville.c │ │ ├── nashville.h │ │ ├── nashville_composer.c │ │ ├── nashville_composer.h │ │ ├── nashville_parallel.c │ │ └── nashville_parallel.h │ ├── nbody │ │ ├── Makefile │ │ ├── benchmark.sh │ │ ├── nbody.c │ │ ├── nbody.h │ │ ├── nbody_composer.c │ │ ├── nbody_composer.h │ │ ├── nbody_mkl.c │ │ └── nbody_mkl.h │ ├── run-all.sh │ └── shallow_water │ │ ├── Makefile │ │ ├── README.md │ │ ├── benchmark.sh │ │ ├── shallow_water.c │ │ ├── shallow_water.h │ │ ├── shallow_water_composer.c │ │ ├── shallow_water_composer.h │ │ ├── shallow_water_mkl.c │ │ └── shallow_water_mkl.h ├── composer │ ├── Cargo.toml │ ├── build.rs │ └── src │ │ ├── error.rs │ │ ├── lib.rs │ │ ├── runtime │ │ ├── memory.rs │ │ ├── mod.rs │ │ └── tasks.rs │ │ └── util.rs └── lib │ ├── ImageMagick │ ├── Makefile │ ├── imagemagick.annotation │ ├── splitters.c │ └── splitters.h │ └── composer_mkl │ ├── Makefile │ ├── README.md │ ├── mkl.annotation │ ├── mkl_extensions.c │ ├── mkl_extensions.h │ ├── splitters.c │ ├── vec.c │ └── vec.h └── python ├── benchmarks ├── birth_analysis │ ├── benchmark.sh │ ├── birth_analysis.py │ └── birth_analysis_composer.py ├── blackscholes │ ├── benchmark-batch.sh │ ├── benchmark.sh │ ├── blackscholes.py │ └── blackscholes_numba.py ├── crime_index │ ├── benchmark.sh │ └── crime_index.py ├── data_cleaning │ ├── benchmark.sh │ └── data_cleaning.py ├── datasets │ ├── birth_analysis │ │ ├── babynames.txt.gz │ │ └── replicate-csv │ └── movielens │ │ └── replicate-csv ├── get-data.sh ├── haversine │ ├── benchmark.sh │ ├── haversine.py │ └── haversine_numba.py ├── movielens │ ├── benchmark.sh │ ├── movielens.py │ └── movielens_composer.py ├── nbody │ ├── benchmark.sh │ ├── nbody.py │ ├── nbody_boh.py │ └── nbody_numba.py ├── requirements.txt ├── run-all.sh ├── setup-env.sh ├── shallow_water │ ├── benchmark.sh │ ├── shallow_water.py │ └── shallow_water_numba.py ├── speechtag │ ├── benchmark.sh │ ├── speechtag.py │ └── speechtag_composer.py └── weld-python │ ├── __init__.py │ ├── benchmark-weld.sh │ ├── bindings.py │ ├── bindings_latest.py │ ├── compiled.py │ ├── encoders.py │ ├── test.py │ ├── types.py │ ├── weldobject.py │ └── weldtypes.py ├── lib ├── composer_numpy │ ├── __init__.py │ └── annotated.py └── composer_pandas │ ├── __init__.py │ └── annotated.py └── pycomposer ├── pycomposer ├── __init__.py ├── annotation.py ├── composer.py ├── dag.py ├── split_types.py ├── unevaluated.py └── vm │ ├── __init__.py │ ├── driver.py │ ├── instruction.py │ ├── program.py │ └── vm.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | generated/ 3 | **/*.rs.bk 4 | Cargo.lock 5 | composer.h 6 | __pycache__ 7 | *.pyc 8 | .DS_Store 9 | 10 | # Ignore built libraries. 11 | *.dylib 12 | *.so 13 | *.swp 14 | *.swo 15 | *.dSYM/ 16 | 17 | # ignore debugging code 18 | *.s 19 | *.S 20 | *.ll 21 | 22 | bench 23 | out 24 | eigen 25 | perf.data* 26 | 27 | *.jpg 28 | *.tif 29 | 30 | # Ignore Python virtualenv. 31 | python/benchmarks/benchmarks 32 | 33 | # Ignore various projects 34 | weld/ 35 | weld-latest/ 36 | bohrium/ 37 | 38 | # Data 39 | babynames* 40 | ml-* 41 | 42 | # Benchmark output 43 | *.stdout 44 | *.stderr 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 2 | 3 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 4 | 5 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 6 | 7 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 8 | 9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Split Annotations 2 | 3 | This is the main source code repository for Split Annotations. It contains the source code for the C implementation, the Python implementation, and the benchmarks from the SOSP 2019 paper. 4 | 5 | Split annotations (SAs) are a system for enabling optimizations such as pipelining and parallelization underneath existing libraries. Other approaches for enabling these optimizations, such as intermediate representations, compilers, or DSLs, are heavyweight solutions that require re-architecting existing code. Unlike these approaches, SAs enable these optimizations _without requiring changes to existing library functions_. 6 | 7 | ## Installing from Source 8 | 9 | 1. Make sure you have the required dependencies: 10 | 11 | * Python 3.5 12 | * `virtualenv` 13 | * The latest version of [Rust](https://rustup.rs/). See the instructions in the link. 14 | * `git` 15 | * `pkgconfig`. You can download it as follows: 16 | 17 | ```bash 18 | sudo apt-get install pkg-config 19 | ``` 20 | 21 | * The `build-essential` package on Linux distributions. You can download it as follows: 22 | 23 | ```bash 24 | sudo apt-get update 25 | sudo apt-get install build-essential 26 | ``` 27 | 28 | To build the C implementation: 29 | 30 | 2. Clone this repository and set the `$SA_HOME` environment variable (the latter is not necessary but simplifies the remaining steps): 31 | 32 | ```bash 33 | cd $HOME 34 | git clone https://github.com/weld-project/split-annotations.git 35 | cd split-annotations 36 | export SA_HOME=`pwd` 37 | ``` 38 | 39 | 3. Build the C implementation: 40 | 41 | ```bash 42 | cd $SA_HOME/c 43 | cargo build --release 44 | ``` 45 | 46 | 4. Optionally build the provided annotated C libraries (Intel MKL and ImageMagick). See `EXPERIMENTS.md` for directions on how to build MKL and ImageMagick, and then: 47 | 48 | ```bash 49 | cd $SA_HOME/c/lib/composer_mkl 50 | make 51 | cd $SA_HOME/c/lib/ImageMagick 52 | make 53 | ``` 54 | 55 | The Python implementation does not require any special installation, but running the benchmarks requires certain dependencies. See the instructions in `EXPERIMENTS.md`. 56 | 57 | ## Get Help 58 | 59 | If you need help installing or using split annotations, or have general questions about the project, feel free to either create a GitHub issue or email shoumik @ stanford . edu (with the spaces removed). 60 | -------------------------------------------------------------------------------- /c/Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | 3 | members = [ "annotate", "composer" ] 4 | -------------------------------------------------------------------------------- /c/annotate/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "annotate" 3 | version = "0.1.0" 4 | authors = ["Shoumik Palkar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | clap = "2.3" 9 | composer = { path = "../composer" } 10 | nom = "^4" 11 | serde_json = "1.0" 12 | subprocess = "0.1.18" 13 | env_logger = "0.6.0" 14 | log = "0.4.6" 15 | -------------------------------------------------------------------------------- /c/annotate/src/main.rs: -------------------------------------------------------------------------------- 1 | //! A command line tool for annotating C functions with splitability annotations. 2 | //! 3 | //! This tool takes as input an annotations file, which defines splitability annotations, and 4 | //! outputs C header files that can be compiled with a normal C program. The header files define 5 | //! functions that support the splitability annotation runtime. 6 | 7 | // Parses the annotation file. 8 | mod parser; 9 | mod writer; 10 | 11 | use env_logger; 12 | use log::*; 13 | 14 | use std::fs::{DirBuilder, File}; 15 | use std::io::{Read, Write}; 16 | use std::path::PathBuf; 17 | 18 | use clap::{Arg, App}; 19 | 20 | 21 | fn main() -> std::io::Result<()> { 22 | let matches = App::new("annotate") 23 | .version("0.1.0") 24 | .author("Shoumik Palkar ") 25 | .about("Command line tool for annotating C functions with splitability annotations") 26 | .arg(Arg::with_name("INPUT") 27 | .help("annotation file") 28 | .required(true) 29 | .index(1)) 30 | .arg(Arg::with_name("dir") 31 | .short("d") 32 | .help("name of directory where output header files are written (default: 'generated')") 33 | .takes_value(true) 34 | .required(false)) 35 | .arg(Arg::with_name("force") 36 | .short("f") 37 | .help("Forces generation, even if a directory with the specified name already exists.") 38 | .required(false)) 39 | .arg(Arg::with_name("prefix") 40 | .short("p") 41 | .help("prefix for generated functions (default: 'c_')") 42 | .takes_value(true) 43 | .required(false)) 44 | .arg(Arg::with_name("header") 45 | .short("n") 46 | .help("name for include file with all definitions (default: 'generated')") 47 | .takes_value(true) 48 | .required(false)) 49 | .arg(Arg::with_name("includes") 50 | .short("i") 51 | .help("Includes (without '.h') that should be added at the top of the main header file.") 52 | .takes_value(true) 53 | .required(false)) 54 | .get_matches(); 55 | 56 | // Initialize logging. 57 | let mut builder = env_logger::Builder::from_default_env(); 58 | builder.default_format_timestamp(true) 59 | .init(); 60 | 61 | let input_file = matches.value_of("INPUT").unwrap(); 62 | let mut input_file = File::open(input_file)?; 63 | 64 | let mut annotations = String::new(); 65 | input_file.read_to_string(&mut annotations)?; 66 | 67 | // Convert the string annotation file into annotation objects. 68 | let annotations = parser::parse(&annotations)?; 69 | 70 | let path = matches.value_of("dir").unwrap_or("generated"); 71 | 72 | // If force is enabled, remove the existing directory. 73 | if matches.is_present("force") { 74 | std::fs::remove_dir_all(path)?; 75 | } 76 | 77 | // Create a directory. 78 | DirBuilder::new().create(path)?; 79 | 80 | let mut files = vec![]; 81 | let mut functions = vec![]; 82 | 83 | for annotation in annotations { 84 | let mut writer = writer::AnnotationHeaderWriter::new(annotation); 85 | 86 | info!("Generating header for {}...", writer.function_header()); 87 | 88 | let header = writer.write(); 89 | 90 | files.push(writer.filename()); 91 | functions.push(writer.function_header()); 92 | 93 | let mut filename = PathBuf::new(); 94 | filename.push(path); 95 | filename.push(writer.filename()); 96 | filename.set_extension("h"); 97 | 98 | let mut f = File::create(filename).unwrap(); 99 | f.write(header.as_bytes())?; 100 | } 101 | 102 | let includes = matches.value_of("includes").unwrap_or(""); 103 | let includes = includes.split(",") 104 | .map(|e| e.to_string()) 105 | .filter(|e| e.len() != 0) 106 | .collect(); 107 | 108 | // Write the final header. 109 | let mut include_writer = writer::IncludeHeaderWriter::new(files, functions, includes, path.to_string()); 110 | let header = include_writer.write(); 111 | 112 | let mut filename = PathBuf::new(); 113 | filename.push(path); 114 | filename.push(matches.value_of("include").unwrap_or("generated")); 115 | filename.set_extension("h"); 116 | 117 | let mut f = File::create(filename).unwrap(); 118 | f.write(header.as_bytes())?; 119 | 120 | Ok(()) 121 | } 122 | -------------------------------------------------------------------------------- /c/annotate/src/parser/cdecl.rs: -------------------------------------------------------------------------------- 1 | //! Parsing for C function headers. 2 | 3 | use composer::CDecl; 4 | 5 | use nom::*; 6 | use nom::types::CompleteByteSlice; 7 | 8 | use std::str; 9 | 10 | use super::ident; 11 | 12 | /// Parses a C type. 13 | /// 14 | /// This currently supports parsing any regular type, or types with pointers. 15 | /// 16 | /// # Unsupported Features 17 | /// 18 | /// * Function Pointers 19 | /// * Fixed size arrays 20 | named_complete!( 21 | pub parse_c_type, 22 | map!( 23 | ws!(do_parse!( 24 | is_const: opt!(tag_s!("const")) >> 25 | tag: opt!(alt!( 26 | tag_s!("struct") | 27 | tag_s!("union") 28 | )) >> 29 | ty: ident >> 30 | pointers: many0!(char!('*')) >> 31 | (is_const, tag, ty, pointers) 32 | )), 33 | |(is_const, tag, ty, ptrs): (Option, Option, String, Vec)| { 34 | let is_const = is_const.map(|ref v| str::from_utf8(v).unwrap()); 35 | let tag = tag.map(|ref v| str::from_utf8(v).unwrap()); 36 | 37 | let mut name = String::new(); 38 | 39 | if let Some(is_const) = is_const { 40 | name.push_str(is_const); 41 | name.push(' '); 42 | } 43 | 44 | if let Some(tag) = tag { 45 | name.push_str(tag); 46 | name.push(' '); 47 | }; 48 | 49 | name.push_str(&ty); 50 | 51 | for _ in 0..ptrs.len() { 52 | name.push('*'); 53 | } 54 | name 55 | } 56 | ) 57 | ); 58 | 59 | /// Parses a C parameter, which is a type followed by an optional name. 60 | named_complete!( 61 | parse_c_parameter<(String, Option)>, 62 | ws!(do_parse!( 63 | ty: parse_c_type >> 64 | arg_name: opt!(ident) >> 65 | (ty, arg_name) 66 | )) 67 | ); 68 | 69 | 70 | /// Parses a single C function declaration. 71 | named_complete!( 72 | pub parse_c_decl, 73 | map!( 74 | ws!(do_parse!( 75 | return_type: parse_c_type >> 76 | func_name: ident >> 77 | arguments: delimited!( 78 | char!('('), 79 | separated_list_complete!(ws!(char!(',')), parse_c_parameter), 80 | char!(')') 81 | ) >> 82 | char!(';') >> 83 | (return_type, func_name, arguments) 84 | )), 85 | |n: (String, String, Vec<(String, Option)>)| CDecl::new(n.0, n.1, n.2) 86 | ) 87 | ); 88 | 89 | #[cfg(test)] 90 | fn check_type(input: &str, expected: Option<&str>) { 91 | let result = parse_c_type(CompleteByteSlice(input.as_bytes())); 92 | if let Some(expected) = expected { 93 | assert_eq!(expected, result.unwrap().1); 94 | } else { 95 | result.expect_err("Expected an error"); 96 | } 97 | } 98 | 99 | #[cfg(test)] 100 | fn check_parameter(input: &str, expected: Option<(&str, Option<&str>)>) { 101 | let result = parse_c_parameter(CompleteByteSlice(input.as_bytes())); 102 | if let Some(expected) = expected { 103 | let result = result.unwrap().1; 104 | assert_eq!(result, (String::from(expected.0), expected.1.map(|v| String::from(v)))); 105 | } else { 106 | result.expect_err("Expected an error"); 107 | } 108 | } 109 | 110 | #[cfg(test)] 111 | fn check_decl(input: &str, expected: Option) { 112 | let result = parse_c_decl(CompleteByteSlice(input.as_bytes())); 113 | if let Some(expected) = expected { 114 | let result = result.unwrap().1; 115 | assert_eq!(result, expected); 116 | } else { 117 | result.expect_err("Expected an error"); 118 | } 119 | } 120 | 121 | #[test] 122 | fn simple_type() { 123 | check_type("int", Some("int")); 124 | } 125 | 126 | #[test] 127 | fn pointer_type() { 128 | check_type("int*", Some("int*")); 129 | } 130 | 131 | #[test] 132 | fn const_type() { 133 | check_type("const int", Some("const int")); 134 | } 135 | 136 | #[test] 137 | fn pointer_type_with_space() { 138 | check_type("int *", Some("int*")); 139 | } 140 | 141 | #[test] 142 | fn struct_type() { 143 | check_type("struct myStruct", Some("struct myStruct")); 144 | } 145 | 146 | #[test] 147 | fn const_struct_type() { 148 | check_type("const struct myStruct", Some("const struct myStruct")); 149 | } 150 | 151 | #[test] 152 | fn struct_pointer_type() { 153 | check_type("struct myStruct *", Some("struct myStruct*")); 154 | } 155 | 156 | #[test] 157 | fn const_struct_pointer_type() { 158 | check_type("const struct myStruct *", Some("const struct myStruct*")); 159 | } 160 | 161 | #[test] 162 | fn double_pointer_type() { 163 | check_type("int**", Some("int**")); 164 | } 165 | 166 | #[test] 167 | fn simple_parameter() { 168 | check_parameter("int x", Some(("int", Some("x")))) 169 | } 170 | 171 | #[test] 172 | fn pointer_parameter() { 173 | check_parameter("int *x", Some(("int*", Some("x")))) 174 | } 175 | 176 | #[test] 177 | fn struct_parameter() { 178 | check_parameter("struct foo *x", Some(("struct foo*", Some("x")))) 179 | } 180 | 181 | #[test] 182 | fn no_name_parameter() { 183 | check_parameter("struct foo *", Some(("struct foo*", None))) 184 | } 185 | 186 | #[test] 187 | fn incomplete_type_parameter() { 188 | check_parameter("struc foo *x", Some(("struc", Some("foo")))) 189 | } 190 | 191 | #[test] 192 | fn basic_decl() { 193 | check_decl("int foo();", Some(CDecl::new("int", "foo", vec![]))); 194 | } 195 | 196 | #[test] 197 | fn decl_with_arg() { 198 | check_decl("int foo(int);", Some(CDecl::new("int", "foo", vec![("int", None)]))); 199 | } 200 | 201 | #[test] 202 | fn decl_with_many_args() { 203 | let args = vec![ 204 | ("int", None), 205 | ("struct myStruct**", None), 206 | ("float*", None) 207 | ]; 208 | let expected = CDecl::new("int", "foo", args); 209 | check_decl("int foo(int, struct myStruct **, float *);", Some(expected)); 210 | } 211 | 212 | #[test] 213 | fn decl_with_some_named_args() { 214 | let args = vec![ 215 | ("int", None), 216 | ("struct myStruct**", Some("arg2")), 217 | ("float*", None) 218 | ]; 219 | let expected = CDecl::new("int", "foo", args); 220 | check_decl("int foo(int, struct myStruct **arg2, float *);", Some(expected)); 221 | } 222 | 223 | -------------------------------------------------------------------------------- /c/annotate/src/resources/annotation.c.template: -------------------------------------------------------------------------------- 1 | static AnnotationRef __{function_name}_function_annotations__() {{ 2 | static AnnotationRef s; 3 | if (s) {{ 4 | return s; 5 | }} else {{ 6 | s = InitFromJson("{annotation_json}"); 7 | 8 | {set_split_type_info} 9 | }} 10 | return s; 11 | }} 12 | -------------------------------------------------------------------------------- /c/annotate/src/resources/argstruct.c.template: -------------------------------------------------------------------------------- 1 | struct __{function_name}_callable__ {{ 2 | {struct_field_list} 3 | }} __attribute__((packed)); 4 | -------------------------------------------------------------------------------- /c/annotate/src/resources/callback.c.template: -------------------------------------------------------------------------------- 1 | intptr_t __{function_name}_callback__(const void *a) {{ 2 | struct __{function_name}_callable__ *arg = (struct __{function_name}_callable__ *)a; 3 | {return_value}{function_name}({callback_call_list}); 4 | return {final_return_value}; 5 | }} 6 | -------------------------------------------------------------------------------- /c/annotate/src/resources/file.c.template: -------------------------------------------------------------------------------- 1 | #ifndef _{function_name}_COMPOSER_DEFINED_ 2 | #define _{function_name}_COMPOSER_DEFINED_ 3 | 4 | /** Generated by Composer -- this should not be modified directly! **/ 5 | 6 | #include 7 | 8 | #ifdef __cplusplus 9 | extern "C" {{ 10 | #endif 11 | 12 | {externs} 13 | 14 | {generator} 15 | {argstruct} 16 | {callback} 17 | {callable} 18 | 19 | #ifdef __cplusplus 20 | }} 21 | #endif 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /c/annotate/src/resources/function.c.template: -------------------------------------------------------------------------------- 1 | {return_type} {prefix}{function_name}({argument_list}) {{ 2 | if (composer_evaluate()) {{ 3 | {return_tag} {function_name}({function_call_args}); 4 | }} else {{ 5 | composer_protect_all(); 6 | struct __{function_name}_callable__ v; 7 | {argstruct_construct_list} 8 | {register_return}composer_register_function(__{function_name}_function_annotations__(), 9 | __{function_name}_callback__, 10 | (void *)&v, {has_return_value}); 11 | {return_line} 12 | }} 13 | }} 14 | -------------------------------------------------------------------------------- /c/benchmarks/blackscholes/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # Libraries 3 | LD_LIBRARY_PATH=-L../../target/release -L../../lib/composer_mkl 4 | 5 | OS=$(shell uname -s) 6 | 7 | ifeq ($(OS), Darwin) 8 | CC=gcc-7 9 | CPP=g++-7 10 | LDFLAGS= 11 | INTEL=-DMKL_ILP64 -m64 -I${MKLROOT}/include -L${MKLROOT}/lib -Wl,-rpath,${MKLROOT}/lib -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl 12 | else ifeq ($(OS), Linux) 13 | CC=gcc-5 14 | CPP=g++-5 15 | LDFLAGS=-Wl,-rpath-link -Wno-format 16 | INTEL= -DMKL_ILP64 -m64 -I${MKLROOT}/compilers_and_libraries/linux/mkl/include -L${MKLROOT}/compilers_and_libraries/linux/mkl/lib/intel64 -I${MKLROOT}/include -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl 17 | else 18 | $(error Unsupported platform: $(OS)) 19 | endif 20 | 21 | CFLAGS=-O3 -Wall -pedantic 22 | INCLUDE=-I../../composer/ -I../../lib/composer_mkl 23 | EXEC=bench 24 | 25 | .PHONY: all clean 26 | 27 | all: 28 | $(CC) $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) blackscholes.c -o $(EXEC) -lcomposer -lcomposer_mkl -lm $(INTEL) 29 | 30 | clean: 31 | rm -rf *.o $(EXEC) $(VLIB) 32 | -------------------------------------------------------------------------------- /c/benchmarks/blackscholes/benchmark-breakdown.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | tasks=( mklcomposer ) 6 | threads=( 16 ) 7 | 8 | for task in "${tasks[@]}"; do 9 | rm -f $task.stdout $task.stderr 10 | git log | head -1 > $task.stderr 11 | git log | head -1 > $task.stdout 12 | done 13 | 14 | # For composer... 15 | export RUST_LOG=info 16 | export OMP_NUM_THREADS=1 17 | 18 | for i in {1..5}; do 19 | for task in "${tasks[@]}"; do 20 | for nthreads in "${threads[@]}"; do 21 | ./bench -m $task -s 30 -t $nthreads >> $task.stdout 2>> $task.stderr 22 | done 23 | done 24 | done 25 | -------------------------------------------------------------------------------- /c/benchmarks/blackscholes/benchmark-pieces.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | tasks=( mklcomposer ) 6 | pieces=( 256 512 1024 2048 4096 8192 16384 ) 7 | 8 | for task in "${tasks[@]}"; do 9 | rm -f $task.stdout $task.stderr 10 | git log | head -1 > $task.stderr 11 | git log | head -1 > $task.stdout 12 | done 13 | 14 | for i in {1..5}; do 15 | for task in "${tasks[@]}"; do 16 | for npieces in "${pieces[@]}"; do 17 | ./bench -m $task -s 30 -p $npieces -t 16 >> $task.stdout 2>> $task.stderr 18 | done 19 | done 20 | done 21 | -------------------------------------------------------------------------------- /c/benchmarks/blackscholes/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | tasks=( mkl mklcomposer ) 6 | threads=( 1 2 4 8 16 ) 7 | runs=${1:-1} 8 | size=30 9 | 10 | for task in "${tasks[@]}"; do 11 | rm -f $task.stdout $task.stderr 12 | git log | head -1 > $task.stderr 13 | git log | head -1 > $task.stdout 14 | done 15 | 16 | for i in {1..$runs}; do 17 | for nthreads in "${threads[@]}"; do 18 | export OMP_NUM_THREADS=$nthreads 19 | ./bench -m mkl -s $size -t $nthreads >> mkl.stdout 2>> mkl.stderr 20 | done 21 | done 22 | 23 | # Set the number of threads in the environment variable to 1, to prevent 24 | # the MKL functions from launching N threads per task. 25 | export OMP_NUM_THREADS=1 26 | for i in {1..$runs}; do 27 | for nthreads in "${threads[@]}"; do 28 | ./bench -m mklcomposer -s $size -t $nthreads >> mklcomposer.stdout 2>> mklcomposer.stderr 29 | done 30 | done 31 | -------------------------------------------------------------------------------- /c/benchmarks/breakdown.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parser for log data for system overhead breakdown. 3 | """ 4 | 5 | class ThreadTime(object): 6 | def __init__(self, thread): 7 | self.thread = thread 8 | self.split_time = None 9 | self.driver_time = None 10 | self.merge_time = None 11 | 12 | @property 13 | def task_time(self): 14 | return self.driver_time - self.split_time 15 | 16 | def this_thread(self, line): 17 | return line.find("thread {}".format(self.thread)) != -1 18 | 19 | def parse(self, line): 20 | if self.this_thread(line): 21 | self.parse_split_time(line) 22 | self.parse_driver_time(line) 23 | self.parse_merge_time(line) 24 | 25 | def parse_split_time(self, line): 26 | if self.split_time is not None: 27 | return 28 | if line.find("total split time: ") != -1: 29 | self.split_time = float(line.split("total split time: ")[1].strip()) 30 | 31 | def parse_driver_time(self, line): 32 | if self.driver_time is not None: 33 | return 34 | if line.find("driver time: ") != -1: 35 | self.driver_time = float(line.split("driver time: ")[1].split(' ')[0].strip()) 36 | 37 | def parse_merge_time(self, line): 38 | if self.merge_time is not None: 39 | return 40 | if line.find("merge time: ") != -1: 41 | self.merge_time = float(line.split("merge time: ")[1].strip()) 42 | 43 | def __str__(self): 44 | return "{},{},{}".format(self.split_time, self.task_time, self.merge_time) 45 | 46 | class Run(object): 47 | def __init__(self, threads, run_delimiter): 48 | self.threads = threads 49 | self.run_delimiter = run_delimiter 50 | self.unprotect = None 51 | self.planner = None 52 | self.final_merge = None 53 | self.thread_times = [ThreadTime(i) for i in xrange(threads)] 54 | 55 | def parse(self, line): 56 | self.parse_unprotect(line) 57 | self.parse_planner(line) 58 | for thread in self.thread_times: 59 | thread.parse(line) 60 | self.parse_final_merge(line) 61 | 62 | def parse_unprotect(self, line): 63 | if self.unprotect is not None: 64 | return 65 | if line.find("Unprotect memory: ") != -1: 66 | self.unprotect = float(line.split("Unprotect memory: ")[1].strip()) 67 | 68 | def parse_planner(self, line): 69 | if self.planner is not None: 70 | return 71 | if line.find("Planner time: ") != -1: 72 | self.planner = float(line.split("Planner time: ")[1].strip()) 73 | 74 | def parse_final_merge(self, line): 75 | if self.final_merge is not None: 76 | return 77 | if line.find("final merge time: ") != -1: 78 | self.final_merge = float(line.split("final merge time: ")[1].strip()) 79 | 80 | def finished(self, line): 81 | if line.find(self.run_delimiter) != -1: 82 | return True 83 | else: 84 | return False 85 | 86 | def __str__(self): 87 | average_split_time = max([t.split_time for t in self.thread_times]) 88 | average_task_time = max([t.task_time for t in self.thread_times]) 89 | average_merge_time = max([t.merge_time for t in self.thread_times]) + self.final_merge 90 | return "{:.5f},{:.5f},{:.5f},{:.5f},{:.5f}".format(self.unprotect, self.planner, average_split_time, average_task_time, average_merge_time) 91 | 92 | def parse(filename, threads, run_delimiter): 93 | current_run = Run(threads, run_delimiter) 94 | print "unprotect,planner,split,task,merge" 95 | with open(filename) as f: 96 | for line in f: 97 | if current_run.finished(line): 98 | print current_run 99 | current_run = Run(threads, run_delimiter) 100 | else: 101 | current_run.parse(line) 102 | 103 | # black scholes 104 | # parse("blackscholes/breakdown-results/mklcomposer.stderr", 16, "First put value") 105 | 106 | parse("nashville/breakdown-results/composer.stderr", 16, "image size:") 107 | -------------------------------------------------------------------------------- /c/benchmarks/get-data.sh: -------------------------------------------------------------------------------- 1 | # Gets the data for all C experiments and puts it in the proper place so the benchmark script runs. 2 | rm -rf datasets 3 | wget https://www.spacetelescope.org/static/archives/images/publicationtiff40k/heic1502a.tif 4 | mkdir -p datasets/ 5 | mv heic1502a.tif datasets/heic1502a-40k.tif 6 | 7 | -------------------------------------------------------------------------------- /c/benchmarks/gotham/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # Libraries 3 | LD_LIBRARY_PATH=-L../../target/release -L../../lib/ImageMagick/ 4 | 5 | OS=$(shell uname -s) 6 | 7 | ifeq ($(OS), Darwin) 8 | CC=gcc-7 9 | CPP=g++-7 10 | LDFLAGS= 11 | MAGICK=$(shell pkg-config --cflags --libs MagickWand) 12 | else ifeq ($(OS), Linux) 13 | CC=gcc-5 14 | CPP=g++-5 15 | LDFLAGS=-Wl,-rpath-link -Wno-format 16 | MAGICK=$(shell pkg-config --cflags --libs MagickWand) 17 | else 18 | $(error Unsupported platform: $(OS)) 19 | endif 20 | 21 | CFLAGS=-O3 -Wall -pedantic -Wno-discarded-qualifiers 22 | INCLUDE=-I../../composer/ -I/usr/local/include/ImageMagick-7/MagickWand -I../../lib/ImageMagick 23 | EXEC=bench 24 | 25 | .PHONY: all clean 26 | 27 | all: 28 | $(CC) -fopenmp $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) gotham.c gotham_composer.c -o $(EXEC) $(MAGICK) -lcomposer -lcomposer_imagemagick -lm 29 | 30 | clean: 31 | rm -rf *.o $(EXEC) 32 | -------------------------------------------------------------------------------- /c/benchmarks/gotham/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | tasks=( naive composer ) 6 | threads=( 1 2 4 8 16 ) 7 | runs=${1:-1} 8 | 9 | for task in "${tasks[@]}"; do 10 | rm -f $task.stderr $task.stdout 11 | git log | head -1 > $task.stderr 12 | git log | head -1 > $task.stdout 13 | done 14 | 15 | for i in {1..$runs}; do 16 | for task in "${tasks[@]}"; do 17 | for nthreads in "${threads[@]}"; do 18 | taskset -a -c 0-19 ./bench -m $task -i ../datasets/heic1502a-40k.tif -t $nthreads >> $task.stdout 2>> $task.stderr 19 | done 20 | done 21 | done 22 | -------------------------------------------------------------------------------- /c/benchmarks/gotham/gotham.c: -------------------------------------------------------------------------------- 1 | // Andromeda: https://www.spacetelescope.org/images/heic1502a/ 2 | // 3 | // https://www.spacetelescope.org/static/archives/images/publicationtiff40k/heic1502a.tif 4 | 5 | #include 6 | 7 | #ifdef __linux__ 8 | #define _GNU_SOURCE 9 | #endif 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | #include "gotham.h" 26 | #include "gotham_composer.h" 27 | 28 | typedef enum { 29 | UNKNOWN = 0, 30 | NAIVE, 31 | COMPOSER, 32 | } exec_mode_t; 33 | 34 | // Number of threads. 35 | long threads = 1; 36 | // Mode to use 37 | exec_mode_t mode; 38 | // input filename. 39 | char input_filename[2048]; 40 | // TODO This should be autotuned/based on the input size. 41 | int piece_size = 20; 42 | // Should the result image be written to an output file? 43 | // 44 | // Used offline to check for correctness. 45 | int write_out = 0; 46 | 47 | MagickWand *do_gotham(MagickWand *input_wand) { 48 | // modulate 120,10,100 49 | MagickModulateImage(input_wand, HUE, SATURATION, VALUE); 50 | 51 | // colorize 52 | PixelWand *colorize = NewPixelWand(); 53 | PixelWand *alpha = NewPixelWand(); 54 | PixelSetColor(colorize,"#222b6d"); 55 | PixelSetColor(alpha, "rgb(20%,20%,20%)"); 56 | MagickColorizeImage(input_wand, colorize, alpha); 57 | MagickColorizeImage(input_wand, colorize, alpha); 58 | MagickColorizeImage(input_wand, colorize, alpha); 59 | MagickColorizeImage(input_wand, colorize, alpha); 60 | MagickColorizeImage(input_wand, colorize, alpha); 61 | 62 | // gamma 0.5 63 | MagickGammaImage(input_wand, GAMMA); 64 | 65 | // contrast 66 | MagickContrastImage(input_wand, 1); 67 | // contrast 68 | MagickContrastImage(input_wand, 1); 69 | 70 | DestroyPixelWand(alpha); 71 | DestroyPixelWand(colorize); 72 | } 73 | 74 | MagickWand *gotham_simple(MagickWand *input_wand) { 75 | do_gotham(input_wand); 76 | 77 | return input_wand; 78 | } 79 | 80 | exec_mode_t get_mode(char *s) { 81 | if (strcmp("naive", s) == 0) { 82 | return NAIVE; 83 | } else if (strcmp("composer", s) == 0) { 84 | return COMPOSER; 85 | } else { 86 | return UNKNOWN; 87 | } 88 | } 89 | 90 | void print_usage(char **argv) { 91 | fprintf(stderr, "%s -i -m [-t -h -o ]\n", argv[0]); 92 | fprintf(stderr, "Available modes:\n"); 93 | fprintf(stderr, "\tnaive\n" 94 | "\tcomposer\n" 95 | ); 96 | } 97 | 98 | int power_of_two(long x) { 99 | return x && !(x & (x - 1)); 100 | } 101 | 102 | void parse_args(int argc, char **argv) { 103 | int opt; 104 | while ((opt = getopt(argc, argv, "i:m:t:h:o")) != -1) { 105 | switch (opt) { 106 | case 'i': 107 | sprintf(input_filename, "%s", optarg); 108 | break; 109 | case 'm': 110 | mode = get_mode(optarg); 111 | if (mode == UNKNOWN) { 112 | print_usage(argv); 113 | exit(EXIT_FAILURE); 114 | } 115 | break; 116 | case 't': 117 | threads = atol(optarg); 118 | if (!power_of_two(threads) || threads > 40) { 119 | fprintf(stderr, "threads must be power-of-2 and < 16\n"); 120 | exit(EXIT_FAILURE); 121 | } 122 | break; 123 | case 'o': 124 | write_out = 1; 125 | break; 126 | case 'h': 127 | default: 128 | print_usage(argv); 129 | exit(EXIT_FAILURE); 130 | } 131 | } 132 | } 133 | 134 | int main(int argc,char **argv) { 135 | 136 | parse_args(argc, argv); 137 | if (mode == UNKNOWN || strlen(input_filename) == 0) { 138 | print_usage(argv); 139 | exit(EXIT_FAILURE); 140 | } 141 | 142 | struct stat s; 143 | if (stat(input_filename, &s) == -1) { 144 | perror("Input file error"); 145 | exit(EXIT_FAILURE); 146 | } 147 | 148 | // Need to call this before any of the other library functions. 149 | if (mode == COMPOSER) { 150 | composer_init(threads, piece_size); 151 | omp_set_num_threads(1); 152 | } else { 153 | omp_set_num_threads(threads); 154 | } 155 | 156 | printf("Input file: %s (%ld bytes) Piece Size: %d Threads: %ld Mode: %d\n", 157 | input_filename, s.st_size, piece_size, threads, mode); 158 | 159 | MagickWandGenesis(); 160 | 161 | MagickWand *wand = NewMagickWand(); 162 | 163 | printf("Reading image..."); 164 | fflush(stdout); 165 | MagickReadImage(wand, input_filename); 166 | printf("done.\n"); 167 | fflush(stdout); 168 | 169 | struct timeval start, end, diff; 170 | gettimeofday(&start, NULL); 171 | 172 | // Run function 173 | switch (mode) { 174 | case NAIVE: 175 | wand = gotham_simple(wand); 176 | break; 177 | case COMPOSER: 178 | wand = gotham_composer(wand); 179 | break; 180 | case UNKNOWN: 181 | default: 182 | fprintf(stderr, "unsupported case"); 183 | exit(EXIT_FAILURE); 184 | } 185 | gettimeofday(&end, NULL); 186 | 187 | timersub(&end, &start, &diff); 188 | double runtime = (double)diff.tv_sec + ((double)diff.tv_usec / 1000000.0); 189 | 190 | printf("%f seconds\n", runtime); 191 | fflush(stderr); 192 | 193 | if (write_out) { 194 | printf("Writing image..."); 195 | fflush(stdout); 196 | char output[256]; 197 | sprintf(output, "output-%d.jpg", mode); 198 | MagickWriteImage(wand, output); 199 | printf("done.\n"); 200 | fflush(stdout); 201 | } 202 | 203 | wand = DestroyMagickWand(wand); 204 | MagickWandTerminus(); 205 | } 206 | -------------------------------------------------------------------------------- /c/benchmarks/gotham/gotham.h: -------------------------------------------------------------------------------- 1 | #ifndef _NASHVILLE_H_ 2 | #define _NASHVILLE_H_ 3 | 4 | #define GAMMA (0.5) 5 | #define HUE (120) 6 | #define SATURATION (10) 7 | #define VALUE (100) 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /c/benchmarks/gotham/gotham_composer.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef __linux__ 4 | #define _GNU_SOURCE 5 | #endif 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | #include 19 | 20 | #include "gotham_composer.h" 21 | #include "gotham.h" 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | MagickWand *c_do_gotham(MagickWand *input_wand) { 28 | // modulate 120,10,100 29 | c_MagickModulateImage(input_wand, HUE, SATURATION, VALUE); 30 | 31 | // colorize 32 | PixelWand *colorize = c_NewPixelWand(); 33 | PixelWand *alpha = c_NewPixelWand(); 34 | c_PixelSetColor(colorize,"#222b6d"); 35 | c_PixelSetColor(alpha, "rgb(20%,20%,20%)"); 36 | c_MagickColorizeImage(input_wand, colorize, alpha); 37 | c_MagickColorizeImage(input_wand, colorize, alpha); 38 | c_MagickColorizeImage(input_wand, colorize, alpha); 39 | c_MagickColorizeImage(input_wand, colorize, alpha); 40 | c_MagickColorizeImage(input_wand, colorize, alpha); 41 | 42 | // gamma 0.5 43 | c_MagickGammaImage(input_wand, GAMMA); 44 | 45 | c_DestroyPixelWand(alpha); 46 | c_DestroyPixelWand(colorize); 47 | 48 | // contrast 49 | c_MagickContrastImage(input_wand, 1); 50 | // contrast 51 | c_MagickContrastImage(input_wand, 1); 52 | } 53 | 54 | MagickWand *gotham_composer(MagickWand *input_wand) { 55 | 56 | c_do_gotham(input_wand); 57 | 58 | // TODO we can do this automatically by adding a "mut" 59 | composer_emit(&input_wand, sizeof(input_wand), (intptr_t)WandSplit_merge); 60 | composer_execute(); 61 | 62 | return input_wand; 63 | } 64 | -------------------------------------------------------------------------------- /c/benchmarks/gotham/gotham_composer.h: -------------------------------------------------------------------------------- 1 | #ifndef _NASHVILLE_COMPOSER_H_ 2 | #define _NASHVILLE_COMPOSER_H_ 3 | 4 | #include 5 | 6 | MagickWand *gotham_composer(MagickWand *input_wand); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /c/benchmarks/haversine/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # Libraries 3 | LD_LIBRARY_PATH=-L../../target/release -L../../lib/composer_mkl 4 | 5 | OS=$(shell uname -s) 6 | 7 | ifeq ($(OS), Darwin) 8 | CC=gcc-7 9 | CPP=g++-7 10 | LDFLAGS= 11 | INTEL=-DMKL_ILP64 -m64 -I${MKLROOT}/include -L${MKLROOT}/lib -Wl,-rpath,${MKLROOT}/lib -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl 12 | else ifeq ($(OS), Linux) 13 | CC=gcc-5 14 | CPP=g++-5 15 | LDFLAGS=-Wl,-rpath-link -Wno-format 16 | INTEL= -DMKL_ILP64 -m64 -I${MKLROOT}/include -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl 17 | else 18 | $(error Unsupported platform: $(OS)) 19 | endif 20 | 21 | CFLAGS=-O3 -Wall -pedantic 22 | INCLUDE=-I../../composer/ -I../../lib/composer_mkl 23 | EXEC=bench 24 | 25 | .PHONY: all clean 26 | 27 | all: 28 | $(CC) -fopenmp $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) haversine.c -o $(EXEC) -lcomposer -lcomposer_mkl -lm $(INTEL) 29 | 30 | 31 | clean: 32 | rm -rf *.o $(EXEC) $(VLIB) 33 | -------------------------------------------------------------------------------- /c/benchmarks/haversine/benchmark-pieces.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | tasks=( mklcomposer ) 6 | pieces=( 256 512 1024 2048 4096 8192 16384 ) 7 | 8 | for task in "${tasks[@]}"; do 9 | rm -f $task.stdout $task.stderr 10 | git log | head -1 > $task.stderr 11 | git log | head -1 > $task.stdout 12 | done 13 | 14 | for i in {1..5}; do 15 | for task in "${tasks[@]}"; do 16 | for npieces in "${pieces[@]}"; do 17 | ./bench -m $task -s 30 -p $npieces -t 16 >> $task.stdout 2>> $task.stderr 18 | done 19 | done 20 | done 21 | -------------------------------------------------------------------------------- /c/benchmarks/haversine/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | tasks=( mkl mklcomposer ) 6 | threads=( 1 2 4 8 16 ) 7 | runs=${1:-1} 8 | size=30 9 | 10 | for task in "${tasks[@]}"; do 11 | rm -f $task.stdout $task.stderr 12 | git log | head -1 > $task.stderr 13 | git log | head -1 > $task.stdout 14 | done 15 | 16 | for i in {1..$runs}; do 17 | for nthreads in "${threads[@]}"; do 18 | export OMP_NUM_THREADS=$nthreads 19 | ./bench -m mkl -s $size -t $nthreads >> mkl.stdout 2>> mkl.stderr 20 | done 21 | done 22 | 23 | # Set the number of threads in the environment variable to 1, to prevent 24 | # the MKL functions from launching N threads per task. 25 | export OMP_NUM_THREADS=1 26 | for i in {1..$runs}; do 27 | for nthreads in "${threads[@]}"; do 28 | ./bench -m mklcomposer -s $size -t $nthreads >> mklcomposer.stdout 2>> mklcomposer.stderr 29 | done 30 | done 31 | -------------------------------------------------------------------------------- /c/benchmarks/nashville/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # Libraries 3 | LD_LIBRARY_PATH=-L../../target/release -L../../lib/ImageMagick/ 4 | 5 | OS=$(shell uname -s) 6 | 7 | ifeq ($(OS), Darwin) 8 | CC=gcc-7 9 | CPP=g++-7 10 | LDFLAGS= 11 | MAGICK=$(shell pkg-config --cflags --libs MagickWand) 12 | else ifeq ($(OS), Linux) 13 | CC=gcc-5 14 | CPP=g++-5 15 | LDFLAGS=-Wl,-rpath-link -Wno-format 16 | MAGICK=$(shell pkg-config --cflags --libs MagickWand) 17 | else 18 | $(error Unsupported platform: $(OS)) 19 | endif 20 | 21 | CFLAGS=-O3 -Wall -pedantic -Wno-discarded-qualifiers 22 | INCLUDE=-I../../composer/ -I/usr/local/include/ImageMagick-7/MagickWand -I../../lib/ImageMagick 23 | EXEC=bench 24 | 25 | .PHONY: all clean 26 | 27 | all: 28 | $(CC) -fopenmp $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) nashville.c nashville_parallel.c nashville_composer.c -o $(EXEC) $(MAGICK) -lcomposer -lcomposer_imagemagick -lm 29 | 30 | clean: 31 | rm -rf *.o $(EXEC) 32 | -------------------------------------------------------------------------------- /c/benchmarks/nashville/benchmark-breakdown.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | tasks=( composer ) 6 | threads=( 16 ) 7 | 8 | for task in "${tasks[@]}"; do 9 | rm -f $task.stderr $task.stdout 10 | git log | head -1 > $task.stderr 11 | git log | head -1 > $task.stdout 12 | done 13 | 14 | export RUST_LOG=info 15 | 16 | for i in {1..5}; do 17 | for task in "${tasks[@]}"; do 18 | for nthreads in "${threads[@]}"; do 19 | taskset 0xffff ./bench -m $task -p 4096 -i ~/heic1502a-40k.tif -t $nthreads >> $task.stdout 2>> $task.stderr 20 | done 21 | done 22 | done 23 | -------------------------------------------------------------------------------- /c/benchmarks/nashville/benchmark-pieces.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | tasks=( composer ) 6 | pieces=( 32 256 512 1024 2048 4096 8192 16384 ) 7 | 8 | for task in "${tasks[@]}"; do 9 | rm -f $task.stdout $task.stderr 10 | git log | head -1 > $task.stderr 11 | git log | head -1 > $task.stdout 12 | done 13 | 14 | for i in {1..5}; do 15 | for task in "${tasks[@]}"; do 16 | for npieces in "${pieces[@]}"; do 17 | RUST_LOG=info ./bench -m $task -i ~/heic1502a-40k.tif -p $npieces -t 8 >> $task.stdout 2>> $task.stderr 18 | done 19 | done 20 | done 21 | -------------------------------------------------------------------------------- /c/benchmarks/nashville/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | tasks=( naive composer ) 6 | threads=( 1 2 4 8 16 ) 7 | runs=${1:-1} 8 | 9 | for task in "${tasks[@]}"; do 10 | rm -f $task.stderr $task.stdout 11 | git log | head -1 > $task.stderr 12 | git log | head -1 > $task.stdout 13 | done 14 | 15 | for i in {1..$runs}; do 16 | for task in "${tasks[@]}"; do 17 | for nthreads in "${threads[@]}"; do 18 | /usr/bin/time ./bench -m $task -i ../datasets/heic1502a-40k.tif -t $nthreads >> $task.stdout 2>> $task.stderr 19 | done 20 | done 21 | done 22 | -------------------------------------------------------------------------------- /c/benchmarks/nashville/nashville.c: -------------------------------------------------------------------------------- 1 | // Andromeda: https://www.spacetelescope.org/images/heic1502a/ 2 | // 3 | // https://www.spacetelescope.org/static/archives/images/publicationtiff40k/heic1502a.tif 4 | 5 | #include 6 | 7 | #ifdef __linux__ 8 | #define _GNU_SOURCE 9 | #endif 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | #include "nashville.h" 26 | #include "nashville_parallel.h" 27 | #include "nashville_composer.h" 28 | 29 | typedef enum { 30 | UNKNOWN = 0, 31 | NAIVE, 32 | COMPOSER, 33 | PARALLEL 34 | } exec_mode_t; 35 | 36 | // Number of threads. 37 | long threads = 1; 38 | // Mode to use 39 | exec_mode_t mode; 40 | // input filename. 41 | char input_filename[2048]; 42 | // TODO This should be autotuned/based on the input size. 43 | int pieces = 20; 44 | // Should the result be written to a file? 45 | int write_out = 0; 46 | 47 | // Actual work for the colortone function. 48 | void do_colortone(MagickWand *wand, 49 | const char *color, 50 | const char *compose_opt, 51 | int negate, 52 | MagickWand *colorized_wand, 53 | MagickWand *colorspace_wand) { 54 | 55 | // Colorize image. 56 | PixelWand *colorize = NewPixelWand(); 57 | PixelWand *alpha = NewPixelWand(); 58 | PixelSetColor(colorize, color); 59 | PixelSetColor(alpha, "#fff"); 60 | MagickColorizeImage(colorized_wand, colorize, alpha); 61 | 62 | // Convert to grayspace. 63 | MagickSetImageColorspace(colorspace_wand, GRAYColorspace); 64 | if (negate) { 65 | MagickNegateImage(colorspace_wand, 1); 66 | } 67 | 68 | MagickSetImageArtifact(wand, "compose:args", compose_opt); 69 | MagickCompositeImage(wand, colorspace_wand, BlendCompositeOp, 1, 0, 0); 70 | MagickCompositeImage(wand, colorized_wand, BlendCompositeOp, 1, 0, 0); 71 | 72 | // Cleanup. 73 | colorize = DestroyPixelWand(colorize); 74 | alpha = DestroyPixelWand(alpha); 75 | 76 | } 77 | 78 | MagickWand *colortone_simple(MagickWand *input_wand, 79 | const char *color, 80 | const char *compose_opt, 81 | int negate) { 82 | 83 | MagickWand *wand = CloneMagickWand(input_wand); 84 | MagickWand *colorized_wand = CloneMagickWand(wand); 85 | MagickWand *colorspace_wand = CloneMagickWand(wand); 86 | 87 | do_colortone(wand, color, compose_opt, negate, colorized_wand, colorspace_wand); 88 | do_colortone(wand, color, compose_opt, negate, colorized_wand, colorspace_wand); 89 | MagickModulateImage(wand, HUE, SATURATION, VALUE); 90 | MagickGammaImage(wand, GAMMA); 91 | 92 | colorized_wand = DestroyMagickWand(colorized_wand); 93 | colorspace_wand = DestroyMagickWand(colorspace_wand); 94 | 95 | return wand; 96 | } 97 | 98 | exec_mode_t get_mode(char *s) { 99 | if (strcmp("naive", s) == 0) { 100 | return NAIVE; 101 | } else if (strcmp("composer", s) == 0) { 102 | return COMPOSER; 103 | } else if (strcmp("parallel", s) == 0) { 104 | return PARALLEL; 105 | } else { 106 | return UNKNOWN; 107 | } 108 | } 109 | 110 | void print_usage(char **argv) { 111 | fprintf(stderr, "%s -i -m [-t -h]\n", argv[0]); 112 | fprintf(stderr, "Available modes:\n"); 113 | fprintf(stderr, "\tnaive\n" 114 | "\tcomposer\n" 115 | ); 116 | } 117 | 118 | int power_of_two(long x) { 119 | return x && !(x & (x - 1)); 120 | } 121 | 122 | void parse_args(int argc, char **argv) { 123 | int opt; 124 | while ((opt = getopt(argc, argv, "i:m:p:t:h:o")) != -1) { 125 | switch (opt) { 126 | case 'i': 127 | sprintf(input_filename, "%s", optarg); 128 | break; 129 | case 'm': 130 | mode = get_mode(optarg); 131 | if (mode == UNKNOWN) { 132 | print_usage(argv); 133 | exit(EXIT_FAILURE); 134 | } 135 | break; 136 | case 't': 137 | threads = atol(optarg); 138 | break; 139 | case 'o': 140 | write_out = 1; 141 | break; 142 | case 'p': 143 | pieces = atol(optarg); 144 | if (pieces < 0) { 145 | fprintf(stderr, "pieces must be > 0\n"); 146 | exit(EXIT_FAILURE); 147 | } 148 | break; 149 | case 'h': 150 | default: 151 | print_usage(argv); 152 | exit(EXIT_FAILURE); 153 | } 154 | } 155 | } 156 | 157 | int main(int argc,char **argv) { 158 | 159 | parse_args(argc, argv); 160 | if (mode == UNKNOWN || strlen(input_filename) == 0) { 161 | print_usage(argv); 162 | exit(EXIT_FAILURE); 163 | } 164 | 165 | struct stat s; 166 | if (stat(input_filename, &s) == -1) { 167 | perror("Input file error"); 168 | exit(EXIT_FAILURE); 169 | } 170 | 171 | // Need to call this before any of the other library functions. 172 | if (mode == COMPOSER) { 173 | omp_set_num_threads(threads); 174 | composer_init(threads, pieces); 175 | } else { 176 | omp_set_num_threads(threads); 177 | } 178 | 179 | printf("Input file: %s (%ld bytes) Piece Size: %d Threads: %ld Mode: %d\n", 180 | input_filename, s.st_size, pieces, threads, mode); 181 | 182 | MagickWandGenesis(); 183 | 184 | MagickWand *wand = NewMagickWand(); 185 | 186 | printf("Reading image..."); 187 | fflush(stdout); 188 | MagickReadImage(wand, input_filename); 189 | printf("done.\n"); 190 | fflush(stdout); 191 | 192 | MagickWand *result; 193 | 194 | struct timeval start, end, diff; 195 | gettimeofday(&start, NULL); 196 | 197 | // Run function 198 | switch (mode) { 199 | case NAIVE: 200 | result = colortone_simple(wand, "#222b6d", "50,50", 1); 201 | break; 202 | case PARALLEL: 203 | result = colortone_parallel(wand, "#222b6d", "50,50", 1, threads); 204 | break; 205 | case COMPOSER: 206 | result = colortone_composer(wand, "#222b6d", "50,50", 1); 207 | break; 208 | case UNKNOWN: 209 | default: 210 | fprintf(stderr, "unsupported case"); 211 | exit(EXIT_FAILURE); 212 | } 213 | gettimeofday(&end, NULL); 214 | 215 | timersub(&end, &start, &diff); 216 | double runtime = (double)diff.tv_sec + ((double)diff.tv_usec / 1000000.0); 217 | 218 | printf("%f seconds\n", runtime); 219 | fflush(stderr); 220 | 221 | if (write_out) { 222 | printf("Writing image..."); 223 | fflush(stdout); 224 | char output[256]; 225 | sprintf(output, "output-%d.jpg", mode); 226 | // MagickWriteImage(result, output); 227 | printf("done.\n"); 228 | fflush(stdout); 229 | } 230 | 231 | wand = DestroyMagickWand(wand); 232 | result = DestroyMagickWand(result); 233 | MagickWandTerminus(); 234 | } 235 | -------------------------------------------------------------------------------- /c/benchmarks/nashville/nashville.h: -------------------------------------------------------------------------------- 1 | #ifndef _NASHVILLE_H_ 2 | #define _NASHVILLE_H_ 3 | 4 | #define GAMMA (0.7) 5 | #define HUE (100) 6 | #define SATURATION (150) 7 | #define VALUE (100) 8 | 9 | // Actual work for the colortone function, without Composer. 10 | void do_colortone(MagickWand *wand, 11 | const char *color, 12 | const char *compose_opt, 13 | int negate, 14 | MagickWand *colorized_wand, 15 | MagickWand *colorspace_wand); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /c/benchmarks/nashville/nashville_composer.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef __linux__ 4 | #define _GNU_SOURCE 5 | #endif 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | #include 19 | 20 | #include "nashville_composer.h" 21 | #include "nashville.h" 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | // Actual work for the colortone function. 28 | void c_do_colortone(MagickWand *wand, 29 | const char *color, 30 | const char *compose_opt, 31 | int negate, 32 | MagickWand *colorized_wand, 33 | MagickWand *colorspace_wand) { 34 | 35 | // Colorize image. 36 | PixelWand *colorize = c_NewPixelWand(); 37 | PixelWand *alpha = c_NewPixelWand(); 38 | c_PixelSetColor(colorize, color); 39 | c_PixelSetColor(alpha, "#fff"); 40 | c_MagickColorizeImage(colorized_wand, colorize, alpha); 41 | 42 | // Convert to grayspace. 43 | c_MagickSetImageColorspace(colorspace_wand, GRAYColorspace); 44 | if (negate) { 45 | c_MagickNegateImage(colorspace_wand, 1); 46 | } 47 | 48 | c_MagickSetImageArtifact(wand, "compose:args", compose_opt); 49 | c_MagickCompositeImage(wand, colorspace_wand, BlendCompositeOp, 1, 0, 0); 50 | c_MagickCompositeImage(wand, colorized_wand, BlendCompositeOp, 1, 0, 0); 51 | 52 | // Cleanup. 53 | colorize = c_DestroyPixelWand(colorize); 54 | alpha = c_DestroyPixelWand(alpha); 55 | } 56 | 57 | MagickWand *colortone_composer(MagickWand *input_wand, 58 | const char *color, 59 | const char *compose_opt, 60 | int negate) { 61 | 62 | MagickWand *wand = c_CloneMagickWand(input_wand); 63 | MagickWand *colorized_wand = c_CloneMagickWand(wand); 64 | MagickWand *colorspace_wand = c_CloneMagickWand(wand); 65 | 66 | c_do_colortone(wand, color, compose_opt, negate, colorized_wand, colorspace_wand); 67 | c_do_colortone(wand, color, compose_opt, negate, colorized_wand, colorspace_wand); 68 | 69 | // WriteImage (wand, colorized_wand, colorspace_wand, before execution: need 70 | // the output split type of each variable. If the output split type is 71 | // broadcast, we can just return one of the partitions instead of the result 72 | // of a merge. TODO Justify the logic behind doing this (or why it makes 73 | // sense to define API in this way). 74 | 75 | c_MagickModulateImage(wand, HUE, SATURATION, VALUE); 76 | c_MagickGammaImage(wand, GAMMA); 77 | 78 | colorized_wand = c_DestroyMagickWand(colorized_wand); 79 | colorspace_wand = c_DestroyMagickWand(colorspace_wand); 80 | 81 | // TODO we can do this automatically by adding a "mut" 82 | composer_emit(&wand, sizeof(wand), (intptr_t)WandSplit_merge); 83 | 84 | composer_execute(); 85 | return wand; 86 | } 87 | -------------------------------------------------------------------------------- /c/benchmarks/nashville/nashville_composer.h: -------------------------------------------------------------------------------- 1 | #ifndef _NASHVILLE_COMPOSER_H_ 2 | #define _NASHVILLE_COMPOSER_H_ 3 | 4 | #include 5 | 6 | MagickWand *colortone_composer(MagickWand *input_wand, const char *color, const char *compose_opt, int negate); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /c/benchmarks/nashville/nashville_parallel.c: -------------------------------------------------------------------------------- 1 | #ifdef __linux__ 2 | #define _GNU_SOURCE 3 | #endif 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | #include 17 | 18 | #include "nashville.h" 19 | 20 | typedef struct piece_ { 21 | MagickWand *piece; 22 | int index; 23 | } piece_t; 24 | 25 | int compare(const void *a, const void *b) { 26 | const piece_t *left = (const piece_t *)a; 27 | const piece_t *right = (const piece_t *)b; 28 | return left->index - right->index; 29 | } 30 | 31 | MagickWand *aggregate_seq(piece_t *pieces, int count) { 32 | MagickWand *results = NewMagickWand(); 33 | MagickResetIterator(results); 34 | 35 | for (int i = 0; i < count; i++) { 36 | MagickSetLastIterator(results); 37 | MagickAddImage(results, pieces[i].piece); 38 | } 39 | 40 | MagickResetIterator(results); 41 | MagickWand *final = MagickAppendImages(results, 1); 42 | DestroyMagickWand(results); 43 | 44 | return final; 45 | } 46 | 47 | MagickWand *aggregate_par(piece_t *pieces, int count, int threads) { 48 | 49 | // Holds aggregation state. 50 | MagickWand **results = (MagickWand **)malloc(sizeof(MagickWand *) * threads); 51 | for (int i = 0; i < threads; i++) { 52 | results[i] = NewMagickWand(); 53 | MagickResetIterator(results[i]); 54 | } 55 | 56 | int values_per_thread = count / threads; 57 | printf("values per piece: %d\n", values_per_thread); 58 | 59 | #pragma omp parallel for 60 | for (int i = 0; i < threads; i++) { 61 | int start = i * values_per_thread; 62 | int end = (i + 1) * values_per_thread; 63 | 64 | if (i == threads - 1) { 65 | end = count; 66 | } 67 | 68 | MagickWand *result = results[i]; 69 | 70 | // printf("thread %d: %d->%d\n", omp_get_thread_num(), start, end); 71 | for (int j = start; j < end; j++) { 72 | MagickSetLastIterator(result); 73 | MagickAddImage(result, pieces[j].piece); 74 | } 75 | 76 | MagickResetIterator(result); 77 | MagickWand *final = MagickAppendImages(result, 1); 78 | 79 | result = DestroyMagickWand(result); 80 | results[i] = final; 81 | } 82 | 83 | MagickWand *final_iterator = NewMagickWand(); 84 | MagickResetIterator(final_iterator); 85 | for (int i = 0; i < threads; i++) { 86 | MagickSetLastIterator(final_iterator); 87 | MagickAddImage(final_iterator, results[i]); 88 | } 89 | MagickResetIterator(final_iterator); 90 | MagickWand *final = MagickAppendImages(final_iterator, 1); 91 | 92 | for (int i = 0; i < threads; i++) { 93 | DestroyMagickWand(results[i]); 94 | } 95 | free(results); 96 | 97 | return final; 98 | } 99 | 100 | MagickWand *colortone_parallel(MagickWand *input_wand, const char *color, const char *compose_opt, int negate, int threads) { 101 | size_t width = MagickGetImageWidth(input_wand); 102 | size_t height = MagickGetImageHeight(input_wand); 103 | 104 | printf("Image is (%ld x %ld) pixels\n", width, height); 105 | 106 | // We want each chunk to be close to the L2 cache size. 107 | const int l2_cache_size_bytes = 262144 * 3; 108 | // Number of rows to process per batch. 109 | size_t region_height = l2_cache_size_bytes / width; 110 | if (region_height == 0) { 111 | region_height = 1; 112 | } 113 | region_height = 199; 114 | 115 | // TODO this might shave off a few things. 116 | int num_regions = height / region_height; 117 | printf("Regions: %d\n", num_regions); 118 | 119 | struct timeval start, end, diff; 120 | gettimeofday(&start, NULL); 121 | 122 | piece_t *pieces = malloc(num_regions * sizeof(piece_t)); 123 | 124 | #pragma omp parallel for 125 | for (int i = 0; i < num_regions; i++) { 126 | /* 127 | printf("%d Looking at region (%ld -> %ld, %ld -> %ld)\n", i, 128 | 0l, 0l + width, 129 | region_height * i, region_height * i + region_height); 130 | */ 131 | MagickWand *wand = MagickGetImageRegion(input_wand, width, 132 | region_height, 0, region_height * i); 133 | 134 | MagickWand *colorized_wand = CloneMagickWand(wand); 135 | MagickWand *colorspace_wand = CloneMagickWand(wand); 136 | 137 | do_colortone(wand, color, compose_opt, negate, colorized_wand, colorspace_wand); 138 | do_colortone(wand, color, compose_opt, negate, colorized_wand, colorspace_wand); 139 | MagickModulateImage(wand, HUE, SATURATION, VALUE); 140 | MagickGammaImage(wand, GAMMA); 141 | 142 | colorized_wand = DestroyMagickWand(colorized_wand); 143 | colorspace_wand = DestroyMagickWand(colorspace_wand); 144 | 145 | pieces[i].index = i; 146 | pieces[i].piece = wand; 147 | } 148 | 149 | gettimeofday(&end, NULL); 150 | timersub(&end, &start, &diff); 151 | double runtime = (double)diff.tv_sec + ((double)diff.tv_usec / 1000000.0); 152 | printf("Processing runtime: %.3f seconds\n", runtime); 153 | fflush(stdout); 154 | 155 | gettimeofday(&start, NULL); 156 | 157 | // Sort pieces by their index. 158 | qsort(pieces, num_regions, sizeof(piece_t), compare); 159 | 160 | gettimeofday(&end, NULL); 161 | timersub(&end, &start, &diff); 162 | runtime = (double)diff.tv_sec + ((double)diff.tv_usec / 1000000.0); 163 | printf("Sort runtime: %.3f seconds\n", runtime); 164 | fflush(stdout); 165 | 166 | gettimeofday(&start, NULL); 167 | 168 | MagickWand *final; 169 | if (num_regions / threads > 16) { 170 | printf("parallel aggregation\n"); 171 | final = aggregate_par(pieces, num_regions, threads); 172 | } else { 173 | printf("sequential aggregation\n"); 174 | final = aggregate_seq(pieces, num_regions); 175 | } 176 | 177 | free(pieces); 178 | 179 | gettimeofday(&end, NULL); 180 | timersub(&end, &start, &diff); 181 | runtime = (double)diff.tv_sec + ((double)diff.tv_usec / 1000000.0); 182 | printf("Total aggregation runtime: %.3f seconds\n", runtime); 183 | fflush(stdout); 184 | 185 | return final; 186 | } 187 | -------------------------------------------------------------------------------- /c/benchmarks/nashville/nashville_parallel.h: -------------------------------------------------------------------------------- 1 | #ifndef _NASHVILLE_PARALLEL_H_ 2 | #define _NASHVILLE_PARALLEL_H_ 3 | 4 | #include 5 | 6 | MagickWand *colortone_parallel(MagickWand *input_wand, const char *color, const char *compose_opt, int negate, int threads); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /c/benchmarks/nbody/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # Libraries 3 | LD_LIBRARY_PATH=-L../../target/release -L../../lib/composer_mkl 4 | 5 | OS=$(shell uname -s) 6 | 7 | ifeq ($(OS), Darwin) 8 | CC=gcc-7 9 | LDFLAGS= 10 | INTEL= -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl 11 | else ifeq ($(OS), Linux) 12 | CC=gcc-5 13 | LDFLAGS=-Wl,-rpath-link -Wno-format 14 | INTEL= -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl 15 | else 16 | $(error Unsupported platform: $(OS)) 17 | endif 18 | 19 | CFLAGS=-O3 -Wall -pedantic 20 | INCLUDE=-I../../composer/ -I../../lib/composer_mkl 21 | EXEC=bench 22 | 23 | .PHONY: all clean 24 | 25 | all: 26 | $(CC) -fopenmp $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) nbody.c nbody_mkl.c nbody_composer.c -o $(EXEC) -lcomposer -lcomposer_mkl -lm $(INTEL) 27 | 28 | asm: 29 | $(CC) $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) -fopenmp nbody.c -S 30 | 31 | clean: 32 | rm -rf *.o *.s $(EXEC) $(VLIB) 33 | -------------------------------------------------------------------------------- /c/benchmarks/nbody/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | tasks=( mkl mklcomposer ) 6 | threads=( 1 2 4 8 16 ) 7 | runs=${1:-1} 8 | 9 | for task in "${tasks[@]}"; do 10 | rm -f $task.stderr $task.stdout 11 | git log | head -1 > $task.stderr 12 | git log | head -1 > $task.stdout 13 | done 14 | 15 | for i in {1..$runs}; do 16 | for task in "${tasks[@]}"; do 17 | for nthreads in "${threads[@]}"; do 18 | ./bench -m $task -s 32768 -t $nthreads -i 3 -p 8192 >> $task.stdout 2>> $task.stderr 19 | done 20 | done 21 | done 22 | -------------------------------------------------------------------------------- /c/benchmarks/nbody/nbody.h: -------------------------------------------------------------------------------- 1 | #ifndef _NBODY_H_ 2 | #define _NBODY_H_ 3 | 4 | // Constants. 5 | #define G ((double)(6.67384e-11)) 6 | #define dt ((double)(60 * 60 * 24 * 365.25)) 7 | #define r_ly ((double)(9.4607e15)) 8 | #define m_sol ((double)(1.9891e30)) 9 | 10 | typedef struct galaxy { 11 | MKL_INT n; 12 | double *m; 13 | double *x; 14 | double *y; 15 | double *z; 16 | double *vx; 17 | double *vy; 18 | double *vz; 19 | } galaxy_t; 20 | 21 | galaxy_t inputs(long n, int lazy); 22 | 23 | void set_delta(MKL_INT n, const double *x, double *out); 24 | void set_pm(MKL_INT n, const double *x, double *out); 25 | 26 | void print_vector(int n, const double *v); 27 | void print_matrix(int n, const double *v); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /c/benchmarks/nbody/nbody_composer.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "nbody.h" 12 | #include "nbody_composer.h" 13 | 14 | #include 15 | 16 | /** Computes Sum(G * pm / r ** 2 * (dx / r)). 17 | * 18 | * Diagonal elements are not counted in the sum. 19 | * 20 | */ 21 | void composer_compute_force(MKL_INT n, 22 | double *dx, double *pm, double *r, 23 | double *tmp1, 24 | double *output, 25 | int first) { 26 | 27 | MKL_INT size = n * n; 28 | 29 | if (!first) { 30 | composer_execute(); 31 | } 32 | 33 | c_vdMuli(size, pm, G, tmp1); 34 | c_vdPowx(size, r, 2.0, output); 35 | c_vdDiv(size, tmp1, output, tmp1); 36 | c_vdDiv(size, dx, r, output); 37 | c_vdMul(size, tmp1, output, tmp1); 38 | 39 | memset(output, 0, sizeof(double) * n); 40 | 41 | #pragma omp parallel for 42 | for (MKL_INT i = 0; i < n; i++) { 43 | double sum = 0.0; 44 | for (MKL_INT j = 0; j < n; j++) { 45 | // Ignore diagonal elements. 46 | if (i != j) { 47 | sum += tmp1[i*n + j]; 48 | } 49 | } 50 | output[i] += sum; 51 | } 52 | } 53 | 54 | void composer_move(MKL_INT n, 55 | double *m, double *x, double *y, double *z, double *vx, double *vy, double *vz, 56 | // Temporaries that have n * n space. 57 | double *dx, double *dy, double *dz, double *pm, double *r, double *tmp1, double *tmp2) { 58 | 59 | set_delta(n, x, dx); 60 | set_delta(n, y, dy); 61 | set_delta(n, z, dz); 62 | set_pm(n, m, pm); 63 | 64 | MKL_INT size = n * n; 65 | 66 | // r = sqrt(dx**2 + dy**2 + dz**2) 67 | c_vdPowx(size, dx, 2.0, tmp1); 68 | c_vdPowx(size, dy, 2.0, tmp2); 69 | c_vdAdd(size, tmp1, tmp2, tmp1); 70 | c_vdPowx(size, dz, 2.0, tmp2); 71 | c_vdAdd(size, tmp1, tmp2, tmp1); 72 | c_vdSqrt(size, tmp1, r); 73 | 74 | composer_compute_force(n, dx, pm, r, tmp1, tmp2, 1); 75 | c_vdDiv(n, tmp2, m, tmp1); 76 | c_vdMuli(n, tmp1, dt, tmp1); 77 | c_vdAdd(n, vx, tmp1, vx); 78 | 79 | c_vdMuli(n, vx, dt, tmp1); 80 | c_vdAdd(n, x, tmp1, x); 81 | 82 | composer_compute_force(n, dy, pm, r, tmp1, tmp2, 0); 83 | c_vdDiv(n, tmp2, m, tmp1); 84 | c_vdMuli(n, tmp1, dt, tmp1); 85 | c_vdAdd(n, vy, tmp1, vy); 86 | 87 | c_vdMuli(n, vy, dt, tmp1); 88 | c_vdAdd(n, y, tmp1, y); 89 | 90 | composer_compute_force(n, dz, pm, r, tmp1, tmp2, 0); 91 | c_vdDiv(n, tmp2, m, tmp1); 92 | c_vdMuli(n, tmp1, dt, tmp1); 93 | c_vdAdd(n, vz, tmp1, vz); 94 | 95 | c_vdMuli(n, vz, dt, tmp1); 96 | c_vdAdd(n, z, tmp1, z); 97 | } 98 | 99 | void run_mkl_composer(int iterations, MKL_INT n, 100 | double *m, 101 | double *x, double *y, double *z, double *vx, double *vy, double *vz) { 102 | 103 | vec_t dx = new_vec(n * n, 0); 104 | vec_t dy = new_vec(n * n, 0); 105 | vec_t dz = new_vec(n * n, 0); 106 | vec_t pm = new_vec(n * n, 0); 107 | vec_t r = new_vec(n * n, 0); 108 | vec_t tmp1 = new_vec(n * n, 0); 109 | vec_t tmp2 = new_vec(n * n, 0); 110 | 111 | for (int i = 0; i < iterations; i++) { 112 | printf("iteration %d\n", i); 113 | composer_move(n, m, x, y, z, vx, vy, vz, 114 | dx.data, dy.data, dz.data, pm.data, r.data, tmp1.data, tmp2.data); 115 | } 116 | } 117 | 118 | -------------------------------------------------------------------------------- /c/benchmarks/nbody/nbody_composer.h: -------------------------------------------------------------------------------- 1 | #ifndef _NBODY_COMPOSER_H_ 2 | #define _NBODY_COMPOSER_H_ 3 | 4 | #include 5 | 6 | void run_mkl_composer(int iterations, MKL_INT n, 7 | double *m, 8 | double *x, double *y, double *z, double *vx, double *vy, double *vz); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /c/benchmarks/nbody/nbody_mkl.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "nbody.h" 11 | #include "nbody_mkl.h" 12 | 13 | /** Computes Sum(G * pm / r ** 2 * (dx / r)). 14 | * 15 | * Diagonal elements are not counted in the sum. 16 | * 17 | */ 18 | void compute_force(MKL_INT n, 19 | double *dx, double *pm, double *r, 20 | double *tmp1, 21 | double *output) { 22 | 23 | MKL_INT size = n * n; 24 | 25 | vdMuli(size, pm, G, tmp1); 26 | vdPowx(size, r, 2.0, output); 27 | vdDiv(size, tmp1, output, tmp1); 28 | vdDiv(size, dx, r, output); 29 | vdMul(size, tmp1, output, tmp1); 30 | 31 | memset(output, 0, sizeof(double) * n); 32 | 33 | #pragma omp parallel for 34 | for (MKL_INT i = 0; i < n; i++) { 35 | double sum = 0.0; 36 | for (MKL_INT j = 0; j < n; j++) { 37 | // Ignore diagonal elements. 38 | if (i != j) { 39 | // Causes some imprecision compared to reference? 40 | sum += tmp1[i*n + j]; 41 | } 42 | } 43 | output[i] += sum; 44 | } 45 | } 46 | 47 | void move(MKL_INT n, 48 | double *m, double *x, double *y, double *z, double *vx, double *vy, double *vz, 49 | // Temporaries that have n * n space. 50 | double *dx, double *dy, double *dz, double *pm, double *r, double *tmp1, double *tmp2) { 51 | 52 | set_delta(n, x, dx); 53 | set_delta(n, y, dy); 54 | set_delta(n, z, dz); 55 | set_pm(n, m, pm); 56 | 57 | MKL_INT size = n * n; 58 | 59 | // r = sqrt(dx**2 + dy**2 + dz**2) 60 | vdPowx(size, dx, 2.0, tmp1); 61 | vdPowx(size, dy, 2.0, tmp2); 62 | vdAdd(size, tmp1, tmp2, tmp1); 63 | vdPowx(size, dz, 2.0, tmp2); 64 | vdAdd(size, tmp1, tmp2, tmp1); 65 | vdSqrt(size, tmp1, r); 66 | 67 | compute_force(n, dx, pm, r, tmp1, tmp2); 68 | vdDiv(n, tmp2, m, tmp1); 69 | vdMuli(n, tmp1, dt, tmp1); 70 | vdAdd(n, vx, tmp1, vx); 71 | 72 | vdMuli(n, vx, dt, tmp1); 73 | vdAdd(n, x, tmp1, x); 74 | 75 | compute_force(n, dy, pm, r, tmp1, tmp2); 76 | vdDiv(n, tmp2, m, tmp1); 77 | vdMuli(n, tmp1, dt, tmp1); 78 | vdAdd(n, vy, tmp1, vy); 79 | 80 | vdMuli(n, vy, dt, tmp1); 81 | vdAdd(n, y, tmp1, y); 82 | 83 | compute_force(n, dz, pm, r, tmp1, tmp2); 84 | vdDiv(n, tmp2, m, tmp1); 85 | vdMuli(n, tmp1, dt, tmp1); 86 | vdAdd(n, vz, tmp1, vz); 87 | 88 | vdMuli(n, vz, dt, tmp1); 89 | vdAdd(n, z, tmp1, z); 90 | } 91 | 92 | void run_mkl(int iterations, MKL_INT n, 93 | double *m, 94 | double *x, double *y, double *z, double *vx, double *vy, double *vz) { 95 | 96 | vec_t dx = new_vec(n * n, 0); 97 | vec_t dy = new_vec(n * n, 0); 98 | vec_t dz = new_vec(n * n, 0); 99 | vec_t pm = new_vec(n * n, 0); 100 | vec_t r = new_vec(n * n, 0); 101 | vec_t tmp1 = new_vec(n * n, 0); 102 | vec_t tmp2 = new_vec(n * n, 0); 103 | 104 | for (int i = 0; i < iterations; i++) { 105 | printf("iteration %d\n", i); 106 | move(n, m, x, y, z, vx, vy, vz, 107 | dx.data, dy.data, dz.data, pm.data, r.data, tmp1.data, tmp2.data); 108 | } 109 | } 110 | 111 | -------------------------------------------------------------------------------- /c/benchmarks/nbody/nbody_mkl.h: -------------------------------------------------------------------------------- 1 | #ifndef _NBODY_MKL_H_ 2 | #define _NBODY_MKL_H_ 3 | 4 | #include 5 | 6 | void run_mkl(int iterations, MKL_INT n, 7 | double *m, 8 | double *x, double *y, double *z, double *vx, double *vy, double *vz); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /c/benchmarks/run-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Runs all the C-based benchmarks. Specifically, this runs: 4 | # - Black Scholes with MKL 5 | # - Haversine with MKL 6 | # - nBody with MKL 7 | # - Shallow Water with MKL 8 | # - Nashville with ImageMagick 9 | # - Gotham with ImageMagick 10 | 11 | set -x 12 | 13 | # Get the data for Nashville and Gotham 14 | ./get-data.sh 15 | 16 | rm -rf results/ 17 | mkdir results/ 18 | 19 | tasks=( blackscholes haversine nbody shallow_water gotham nashville ) 20 | 21 | # Write system information. 22 | git log | head -1 > results/CONFIG.txt 23 | uname -a >> results/CONFIG.txt 24 | lsb_release -d >> results/CONFIG.txt 25 | 26 | for task in "${tasks[@]}"; do 27 | echo "Executing $task" 28 | pushd $task 29 | make 30 | ./benchmark.sh 31 | popd 32 | mkdir results/$task 33 | mv $task/*.std* results/$task 34 | done 35 | -------------------------------------------------------------------------------- /c/benchmarks/shallow_water/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # Libraries 3 | LD_LIBRARY_PATH=-L../../target/release -L../../lib/composer_mkl 4 | 5 | OS=$(shell uname -s) 6 | 7 | ifeq ($(OS), Darwin) 8 | CC=gcc-7 9 | LDFLAGS= 10 | INTEL= -DMKL_ILP64 -m64 -I${MKLROOT}/include -L${MKLROOT}/lib -Wl,-rpath,${MKLROOT}/lib -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl 11 | else ifeq ($(OS), Linux) 12 | CC=gcc-5 13 | LDFLAGS=-Wl,-rpath-link -Wno-format 14 | INTEL= -DMKL_ILP64 -m64 -I${MKLROOT}/include -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl 15 | else 16 | $(error Unsupported platform: $(OS)) 17 | endif 18 | 19 | CFLAGS=-O3 -Wall -pedantic 20 | INCLUDE=-I../../composer/ -I../../lib/composer_mkl 21 | EXEC=bench 22 | 23 | .PHONY: all clean 24 | 25 | all: 26 | $(CC) -fopenmp $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) shallow_water_mkl.c shallow_water_composer.c shallow_water.c -o $(EXEC) -lcomposer -lcomposer_mkl -lm $(INTEL) 27 | 28 | asm: 29 | $(CC) $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) -fopenmp shallow_water_mkl.c shallow_water_composer.c shallow_water.c -S 30 | 31 | clean: 32 | rm -rf *.o *.s $(EXEC) $(VLIB) 33 | -------------------------------------------------------------------------------- /c/benchmarks/shallow_water/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Shallow Water Benchmark 3 | 4 | This benchmark is based on the reference solution provided [here](https://github.com/mrocklin/ShallowWater/blob/master/shallowwater_simple.py). The actual workload simulates the flow of a disturbed fluid based on the equations described [here](http://en.wikipedia.org/wiki/Shallow_water_equations). 5 | 6 | Since thie benchmark is more complex than Haversine and Black Scholes, it is divided into several files: 7 | 8 | * `shallow_water.c` is the driver and contains `main()` and utilities for creating inputs, etc. 9 | * `shallow_water_mkl.c` implements the workload using MKL. 10 | * `shallow_water_composer.c` implements the workload using Composer. This basically just adds the `c_` prefix to all the MKL functions that are supported (note that if we were using C++, we could've just copied and pasted the MKL file and used namespaces). 11 | -------------------------------------------------------------------------------- /c/benchmarks/shallow_water/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | tasks=( mkl mklcomposer ) 6 | threads=( 1 2 4 8 16 ) 7 | runs=${1:-1} 8 | 9 | for task in "${tasks[@]}"; do 10 | rm -f $task.stderr $task.stdout 11 | git log | head -1 > $task.stderr 12 | git log | head -1 > $task.stdout 13 | done 14 | 15 | for i in {1..$runs}; do 16 | for task in "${tasks[@]}"; do 17 | for nthreads in "${threads[@]}"; do 18 | ./bench -m $task -s 16384 -t $nthreads -i 10 >> $task.stdout 2>> $task.stderr 19 | done 20 | done 21 | done 22 | -------------------------------------------------------------------------------- /c/benchmarks/shallow_water/shallow_water.h: -------------------------------------------------------------------------------- 1 | #ifndef _SHALLOW_WATER_H_ 2 | #define _SHALLOW_WATER_H_ 3 | 4 | #include 5 | 6 | // Inputs to the simulation. 7 | typedef struct input { 8 | MKL_INT n; 9 | double *u; 10 | double *v; 11 | double *eta; 12 | double g; 13 | double b; 14 | double dt; 15 | double grid_spacing; 16 | } input_t; 17 | 18 | /** Initialize inputs. 19 | * 20 | * The inputs are initialized to be consistent with shallowwater_reference.py. 21 | * 22 | */ 23 | input_t inputs(long n, int lazy); 24 | 25 | /** Prints an n * n matrix to stdout. */ 26 | void print_matrix(int n, const double *v); 27 | 28 | /** Shifts the input vector along the given axis by amount. 29 | * 30 | * If axis is 0, rolls along columns. If axis is 1, rolls along rows. 31 | * The input should be an n * n matrix. 32 | */ 33 | void roll( 34 | // Inputs 35 | MKL_INT n, const double *restrict input, int axis, int amount, 36 | // Output 37 | double *restrict output); 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /c/benchmarks/shallow_water/shallow_water_composer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "mkl_vml_functions.h" 5 | #include "mkl.h" 6 | 7 | #include 8 | #include 9 | 10 | #include "shallow_water.h" 11 | #include "shallow_water_composer.h" 12 | 13 | #include 14 | 15 | void c_spatialDerivative( 16 | // Inputs 17 | MKL_INT n, const double *restrict input, int axis, double grid_spacing, 18 | // Temporaries 19 | double *restrict tmp1, double *restrict tmp2, 20 | // Outputs 21 | double *output) { 22 | 23 | MKL_INT size = n * n; 24 | 25 | composer_execute(); 26 | roll(n, input, axis, -1, tmp1); 27 | roll(n, input, axis, 1, tmp2); 28 | 29 | c_vdSub(size, tmp1, tmp2, output); 30 | c_vdDivi(size, output, grid_spacing * 2.0, output); 31 | } 32 | 33 | void c_d_dx( 34 | // Inputs 35 | MKL_INT n, const double *restrict input, double grid_spacing, 36 | // Temporaries 37 | double *restrict tmp1, double *restrict tmp2, 38 | // Output 39 | double *output) { 40 | c_spatialDerivative(n, input, 1, grid_spacing, tmp1, tmp2, output); 41 | } 42 | 43 | void c_d_dy( 44 | // Inputs 45 | MKL_INT n, const double *restrict input, double grid_spacing, 46 | // Temporaries 47 | double *restrict tmp1, double *restrict tmp2, 48 | // Output 49 | double *output) { 50 | c_spatialDerivative(n, input, 0, grid_spacing, tmp1, tmp2, output); 51 | } 52 | 53 | void c_d_dt( 54 | MKL_INT n, double *eta, double *u, double *v, 55 | double g, double b, double grid_spacing, 56 | // Outputs 57 | double *du_dt, double *dv_dt, double *deta_dt, 58 | // Temporaries 59 | double *tmp1, double *tmp2, double *tmp3, double *tmp4) { 60 | 61 | MKL_INT size = n * n; 62 | 63 | // STAGE 1: 64 | 65 | // du_dt = -g*d_dx(eta) - u*b 66 | c_d_dx(n, eta, grid_spacing, tmp2, tmp3, tmp1); 67 | c_vdMuli(size, tmp1, -g, tmp1); 68 | c_vdMuli(size, u, b, tmp2); 69 | c_vdSub(size, tmp1, tmp2, du_dt); 70 | 71 | // STAGE 2: 72 | // dv_dt = -g*d_dy(eta) - v*b 73 | c_d_dy(n, eta, grid_spacing, tmp2, tmp3, tmp1); 74 | c_vdMuli(size, tmp1, -g, tmp1); 75 | c_vdMuli(size, v, b, tmp2); 76 | c_vdSub(size, tmp1, tmp2, dv_dt); 77 | 78 | // STAGE 3 (not pipelined) 79 | 80 | // tmp1 = -d_dx(u * eta) 81 | c_vdMul(size, u, eta, tmp4); 82 | c_d_dx(n, tmp4, grid_spacing, tmp2, tmp3, tmp1); 83 | c_vdMuli(size, tmp1, -1, tmp1); 84 | 85 | // STAGE 4 (not pipelined) 86 | 87 | // deta_dt = d_dy(v * eta) 88 | c_vdMul(size, v, eta, tmp4); 89 | c_d_dy(n, tmp4, grid_spacing, tmp2, tmp3, deta_dt); 90 | 91 | // deta_dt = -d_dx(u*eta) - d_dy(v*eta) 92 | c_vdSub(size, tmp1, deta_dt, deta_dt); 93 | } 94 | 95 | void c_evolveEuler( 96 | // Inputs and Outputs 97 | MKL_INT n, double *eta, double *u, double *v, 98 | double g, double b, double dt, double grid_spacing, 99 | // Temporaries 100 | double *du_dt, double *dv_dt, double *deta_dt, 101 | double *tmp1, double *tmp2, double *tmp3, double *tmp4) { 102 | 103 | c_d_dt( 104 | n, eta, u, v, 105 | g, b, grid_spacing, 106 | du_dt, dv_dt, deta_dt, 107 | tmp1, tmp2, tmp3, tmp4); 108 | 109 | MKL_INT size = n * n; 110 | 111 | // eta = eta + deta_dt + dt 112 | c_vdMuli(size, deta_dt, dt, tmp1); 113 | c_vdAdd(size, eta, tmp1, eta); 114 | 115 | // u = u + du_dt * dt 116 | c_vdMuli(size, du_dt, dt, tmp1); 117 | c_vdAdd(size, u, tmp1, u); 118 | 119 | // v = v + dv_dt * dt 120 | c_vdMuli(size, dv_dt, dt, tmp1); 121 | c_vdAdd(size, v, tmp1, v); 122 | } 123 | 124 | void run_mkl_composer( 125 | int iterations, 126 | MKL_INT n, 127 | double *eta, // Lazy 128 | double *u, // Lazy 129 | double *v, // Lazy 130 | double g, 131 | double b, 132 | double dt, 133 | double grid_spacing) { 134 | 135 | long size = n * n; 136 | 137 | // Generate outputs and temporaries. 138 | // 139 | // We mark these as lazy. 140 | vec_t du_dt = new_vec(size, 1); 141 | vec_t dv_dt = new_vec(size, 1); 142 | vec_t deta_dt = new_vec(size, 1); 143 | 144 | vec_t tmp1 = new_vec(size, 1); 145 | vec_t tmp2 = new_vec(size, 1); 146 | vec_t tmp3 = new_vec(size, 1); 147 | vec_t tmp4 = new_vec(size, 1); 148 | 149 | // TODO Benchmark with and without temporaries. 150 | // 151 | // Marking all of these as temporaries is not right because they are only 152 | // temporaries for the full program -- not temporaries in any particular 153 | // stage. 154 | /* 155 | composer_register_temporary(&du_dt.data, sizeof(double*)); 156 | composer_register_temporary(&dv_dt.data, sizeof(double*)); 157 | composer_register_temporary(&deta_dt.data, sizeof(double*)); 158 | composer_register_temporary(&tmp1.data, sizeof(double*)); 159 | composer_register_temporary(&tmp2.data, sizeof(double*)); 160 | composer_register_temporary(&tmp3.data, sizeof(double*)); 161 | */ 162 | 163 | double time = 0; 164 | 165 | for (int i = 0; i < iterations; i++) { 166 | fprintf(stderr, "iteration %d\n", i); 167 | c_evolveEuler(n, eta, u, v, g, b, dt, grid_spacing, 168 | du_dt.data, dv_dt.data, deta_dt.data, 169 | tmp1.data, tmp2.data, tmp3.data, tmp4.data); 170 | time += dt; 171 | 172 | // Force execution at the end of an iteration. 173 | composer_execute(); 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /c/benchmarks/shallow_water/shallow_water_composer.h: -------------------------------------------------------------------------------- 1 | #ifndef _SHALLOW_WATER_COMPOSER_H_ 2 | #define _SHALLOW_WATER_COMPOSER_H_ 3 | 4 | /** Run the shallow water simulation with MKL and Composer. */ 5 | void run_mkl_composer( 6 | int iterations, 7 | MKL_INT n, 8 | double *eta, 9 | double *u, 10 | double *v, 11 | double g, 12 | double b, 13 | double dt, 14 | double grid_spacing); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /c/benchmarks/shallow_water/shallow_water_mkl.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "mkl_vml_functions.h" 5 | #include "mkl.h" 6 | 7 | #include 8 | #include 9 | 10 | #include "shallow_water.h" 11 | 12 | void spatialDerivative( 13 | // Inputs 14 | MKL_INT n, const double *restrict input, int axis, double grid_spacing, 15 | // Temporaries 16 | double *restrict tmp1, double *restrict tmp2, 17 | // Outputs 18 | double *output) { 19 | 20 | MKL_INT size = n * n; 21 | 22 | roll(n, input, axis, -1, tmp1); 23 | roll(n, input, axis, 1, tmp2); 24 | 25 | vdSub(size, tmp1, tmp2, output); 26 | vdDivi(size, output, grid_spacing * 2.0, output); 27 | } 28 | 29 | void d_dx( 30 | // Inputs 31 | MKL_INT n, const double *restrict input, double grid_spacing, 32 | // Temporaries 33 | double *restrict tmp1, double *restrict tmp2, 34 | // Output 35 | double *output) { 36 | spatialDerivative(n, input, 1, grid_spacing, tmp1, tmp2, output); 37 | } 38 | 39 | void d_dy( 40 | // Inputs 41 | MKL_INT n, const double *restrict input, double grid_spacing, 42 | // Temporaries 43 | double *restrict tmp1, double *restrict tmp2, 44 | // Output 45 | double *output) { 46 | spatialDerivative(n, input, 0, grid_spacing, tmp1, tmp2, output); 47 | } 48 | 49 | void d_dt( 50 | MKL_INT n, double *eta, double *u, double *v, 51 | double g, double b, double grid_spacing, 52 | // Outputs 53 | double *du_dt, double *dv_dt, double *deta_dt, 54 | // Temporaries 55 | double *tmp1, double *tmp2, double *tmp3, double *tmp4) { 56 | 57 | MKL_INT size = n * n; 58 | 59 | // STAGE 1: 60 | 61 | // du_dt = -g*d_dx(eta) - u*b 62 | d_dx(n, eta, grid_spacing, tmp2, tmp3, tmp1); 63 | vdMuli(size, tmp1, -g, tmp1); 64 | vdMuli(size, u, b, tmp2); 65 | vdSub(size, tmp1, tmp2, du_dt); 66 | 67 | // STAGE 2: 68 | // dv_dt = -g*d_dy(eta) - v*b 69 | d_dy(n, eta, grid_spacing, tmp2, tmp3, tmp1); 70 | vdMuli(size, tmp1, -g, tmp1); 71 | vdMuli(size, v, b, tmp2); 72 | vdSub(size, tmp1, tmp2, dv_dt); 73 | 74 | // STAGE 3 (not pipelined) 75 | 76 | // tmp1 = -d_dx(u * eta) 77 | vdMul(size, u, eta, tmp4); 78 | d_dx(n, tmp4, grid_spacing, tmp2, tmp3, tmp1); 79 | vdMuli(size, tmp1, -1, tmp1); 80 | 81 | // deta_dt = d_dy(v * eta) 82 | vdMul(size, v, eta, tmp4); 83 | d_dy(n, tmp4, grid_spacing, tmp2, tmp3, deta_dt); 84 | 85 | // deta_dt = -d_dx(u*eta) - d_dy(v*eta) 86 | vdSub(size, tmp1, deta_dt, deta_dt); 87 | } 88 | 89 | void evolveEuler( 90 | // Inputs and Outputs 91 | MKL_INT n, double *eta, double *u, double *v, 92 | double g, double b, double dt, double grid_spacing, 93 | // Temporaries 94 | double *du_dt, double *dv_dt, double *deta_dt, 95 | double *tmp1, double *tmp2, double *tmp3, double *tmp4) { 96 | 97 | d_dt( 98 | n, eta, u, v, 99 | g, b, grid_spacing, 100 | du_dt, dv_dt, deta_dt, 101 | tmp1, tmp2, tmp3, tmp4); 102 | 103 | MKL_INT size = n * n; 104 | 105 | // eta = eta + deta_dt + dt 106 | vdMuli(size, deta_dt, dt, tmp1); 107 | vdAdd(size, eta, tmp1, eta); 108 | 109 | // u = u + du_dt * dt 110 | vdMuli(size, du_dt, dt, tmp1); 111 | vdAdd(size, u, tmp1, u); 112 | 113 | // v = v + dv_dt * dt 114 | vdMuli(size, dv_dt, dt, tmp1); 115 | vdAdd(size, v, tmp1, v); 116 | } 117 | 118 | void run_mkl( 119 | int iterations, 120 | MKL_INT n, 121 | double *eta, 122 | double *u, 123 | double *v, 124 | double g, 125 | double b, 126 | double dt, 127 | double grid_spacing) { 128 | 129 | long size = n * n; 130 | 131 | // Generate outputs and temporaries. 132 | vec_t du_dt = new_vec(size, 0); 133 | vec_t dv_dt = new_vec(size, 0); 134 | vec_t deta_dt = new_vec(size, 0); 135 | 136 | vec_t tmp1 = new_vec(size, 0); 137 | vec_t tmp2 = new_vec(size, 0); 138 | vec_t tmp3 = new_vec(size, 0); 139 | vec_t tmp4 = new_vec(size, 0); 140 | 141 | double time = 0; 142 | 143 | for (int i = 0; i < iterations; i++) { 144 | fprintf(stderr, "iteration %d\n", i); 145 | evolveEuler(n, eta, u, v, g, b, dt, grid_spacing, 146 | du_dt.data, dv_dt.data, deta_dt.data, 147 | tmp1.data, tmp2.data, tmp3.data, tmp4.data); 148 | time += dt; 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /c/benchmarks/shallow_water/shallow_water_mkl.h: -------------------------------------------------------------------------------- 1 | #ifndef _SHALLOW_WATER_MKL_H_ 2 | #define _SHALLOW_WATER_MKL_H_ 3 | 4 | /** Run the shallow water simulation with MKL. */ 5 | void run_mkl( 6 | int iterations, 7 | MKL_INT n, 8 | double *eta, 9 | double *u, 10 | double *v, 11 | double g, 12 | double b, 13 | double dt, 14 | double grid_spacing); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /c/composer/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "composer" 3 | version = "0.1.0" 4 | authors = ["Shoumik Palkar "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | libc = "0.2" 9 | byteorder = "1.3.1" 10 | serde = { version = "1.0", features = ["derive"] } 11 | serde_json = "1.0" 12 | lazy_static = "1.2.0" 13 | fnv = "1.0.6" 14 | env_logger = "0.6.0" 15 | log = "0.4.6" 16 | crossbeam-utils = "0.6" 17 | 18 | [build-dependencies] 19 | cbindgen = "0.8.0" 20 | 21 | [lib] 22 | crate-type = ["cdylib", "rlib"] 23 | -------------------------------------------------------------------------------- /c/composer/build.rs: -------------------------------------------------------------------------------- 1 | 2 | use cbindgen; 3 | use std::env; 4 | 5 | fn main() { 6 | let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); 7 | cbindgen::Builder::new() 8 | .with_crate(crate_dir) 9 | .with_language(cbindgen::Language::C) 10 | .with_include_guard("_COMPOSER_H_") 11 | .generate() 12 | .expect("Unable to generate bindings") 13 | .write_to_file("composer.h"); 14 | } 15 | -------------------------------------------------------------------------------- /c/composer/src/error.rs: -------------------------------------------------------------------------------- 1 | //! Errors in Composer. 2 | 3 | use std::error; 4 | use std::fmt; 5 | 6 | /// Macro for creating an error. 7 | #[macro_export] 8 | macro_rules! composer_err { 9 | ( $($arg:tt)* ) => ({ 10 | ::std::result::Result::Err($crate::Error::new(format!($($arg)*))) 11 | }) 12 | } 13 | 14 | /// Errors produced by the annotation system. 15 | #[derive(Debug, Clone)] 16 | pub struct Error(String); 17 | 18 | /// A custom result type for functions in this library. 19 | pub type Result = std::result::Result; 20 | 21 | impl Error { 22 | pub fn new>(description: T) -> Error { 23 | Error(description.into()) 24 | } 25 | } 26 | 27 | impl fmt::Display for Error { 28 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 29 | write!(f, "{}", self.0) 30 | } 31 | } 32 | 33 | impl error::Error for Error { 34 | fn cause(&self) -> Option<&error::Error> { 35 | None 36 | } 37 | } 38 | 39 | impl From for Error { 40 | fn from(e: String) -> Error { 41 | Error(e) 42 | } 43 | } 44 | 45 | impl From for std::io::Error { 46 | fn from(e: Error) -> std::io::Error { 47 | std::io::Error::new(std::io::ErrorKind::Other, e.to_string()) 48 | } 49 | } 50 | 51 | impl<'a> From<&'a str> for Error { 52 | fn from(e: &'a str) -> Error { 53 | Error(String::from(e)) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /c/composer/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! The composer runtime and its associated components. 2 | //! 3 | //! Most of the interesting stuff is in the `runtime` module. This contains both the FFI functions 4 | //! that the wrapper functions call, as well as the runtime itself, which launches parallel tasks. 5 | //! 6 | pub mod runtime; 7 | pub mod util; 8 | 9 | #[macro_use] 10 | mod error; 11 | 12 | pub use error::*; 13 | 14 | use log::*; 15 | 16 | use std::collections::HashMap; 17 | use std::fmt; 18 | 19 | use serde::{Serialize, Deserialize}; 20 | use serde_json; 21 | 22 | // TODO change the initializer/next to actual function pointer types. 23 | type FunctionPointer = usize; 24 | 25 | /// A C declaration. 26 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] 27 | pub struct CDecl { 28 | pub return_type: String, 29 | pub func_name: String, 30 | pub arguments: Vec<(String, Option)>, 31 | } 32 | 33 | impl CDecl { 34 | pub fn new(return_type: T, func_name: T, arguments: Vec<(T, Option)>) -> CDecl 35 | where T: Into { 36 | CDecl { 37 | return_type: return_type.into(), 38 | func_name: func_name.into(), 39 | arguments: arguments.into_iter() 40 | .map(|(ty, name)| (ty.into(), name.map(|n| n.into()))) 41 | .collect(), 42 | } 43 | } 44 | 45 | pub fn is_void(&self) -> bool { 46 | return self.return_type == "void" 47 | } 48 | 49 | pub fn strip_type_qualifiers(s: &str) -> String { 50 | s.replace("const ", "") 51 | } 52 | } 53 | 54 | impl fmt::Display for CDecl { 55 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 56 | let arguments = self.arguments.iter().map(|(ty, name)| { 57 | format!("{}{}", &ty, 58 | name.as_ref() 59 | .map(|n| format!(" {}", n)) 60 | .unwrap_or("".to_string()) 61 | ) 62 | }); 63 | let arguments = util::join("", ", ", arguments); 64 | write!(f, "{} {}({})", self.return_type, self.func_name, arguments) 65 | } 66 | } 67 | 68 | /// Split type information that is known only at runtime. 69 | /// 70 | /// This includes information such as function pointer values and the sizes of types, which can 71 | /// only be known when the header files produced by the annotator tool are compiled. 72 | #[derive(Debug,Clone,PartialEq,Eq,Hash,Serialize,Deserialize)] 73 | pub struct SplitTypeRuntimeInfo { 74 | /// Function pointer to initializer. 75 | initializer: usize, 76 | /// Function pointer to retrieve next value. 77 | next: usize, 78 | /// Size of the value that the splitter splits. 79 | data_size: usize, 80 | } 81 | 82 | /// A split type. 83 | /// 84 | /// Split types can either be braodcast (copy the value to each worker), generic (i.e., they can 85 | /// take on any type) or named. 86 | #[derive(Debug, Clone,PartialEq,Eq,Hash,Serialize,Deserialize)] 87 | pub enum SplitType { 88 | Broadcast { 89 | runtime: Option, 90 | }, 91 | Generic { 92 | name: String, 93 | }, 94 | Named { 95 | name: String, 96 | arguments: usize, 97 | runtime: Option, 98 | }, 99 | } 100 | 101 | impl SplitType { 102 | /// Returns the string name of this split type. 103 | pub fn name(&self) -> Option<&str> { 104 | match *self { 105 | SplitType::Broadcast { .. } => None, 106 | SplitType::Generic { ref name } => Some(name), 107 | SplitType::Named { ref name, .. } => Some(name), 108 | } 109 | } 110 | 111 | pub fn is_broadcast(&self) -> bool { 112 | match *self { 113 | SplitType::Broadcast { .. } => true, 114 | _ => false, 115 | } 116 | } 117 | 118 | pub fn is_named(&self) -> bool { 119 | match *self { 120 | SplitType::Named { .. } => true, 121 | _ => false, 122 | } 123 | } 124 | 125 | /// Returns the runtime information about the split type. 126 | /// 127 | /// Panics if this type is generic or if the runtime information is not present. 128 | pub fn runtime_info(&self) -> &SplitTypeRuntimeInfo { 129 | match *self { 130 | SplitType::Broadcast { ref runtime, .. } => runtime.as_ref().unwrap(), 131 | SplitType::Named { ref runtime, .. } => runtime.as_ref().unwrap(), 132 | SplitType::Generic { .. } => { 133 | panic!("Attempted to retrieve runtime information from generic type.") 134 | } 135 | } 136 | } 137 | } 138 | 139 | /// A parameter within the context of an annotation. 140 | /// 141 | /// This struct defines the split type along with the arguments that are fed to the type to instantiate 142 | /// it. Arguments are indices into the function argument list (e.g., `[1,2,4]` means that the first, 143 | /// second, and fourth arguments should be passed to the split type initializer). Generic split 144 | /// types should not have any arguments. 145 | #[derive(Debug, Clone, PartialEq,Serialize,Deserialize)] 146 | pub struct SplitTypeParameter { 147 | pub ty: SplitType, 148 | pub arguments: Vec, 149 | } 150 | 151 | /// A self-contained annotation over a C function. 152 | #[derive(Debug, Clone, PartialEq,Serialize,Deserialize)] 153 | pub struct Annotation { 154 | pub function: CDecl, 155 | pub params: Vec, 156 | pub return_param: Option, 157 | pub defaults: HashMap, 158 | } 159 | 160 | /// Entry points for parsing and creating annotations from strings. 161 | impl Annotation { 162 | /// Parses an annotation from a JSON string. 163 | pub fn from_json(s: &str) -> Result { 164 | Ok(serde_json::from_str(s).unwrap()) 165 | } 166 | 167 | } 168 | 169 | /// Methods for runtime instantiation of annotations. 170 | /// 171 | /// These methods are called from the generated C code (via FFI). 172 | impl Annotation { 173 | /// Sets the runtime information for an argument. 174 | fn set_type_runtime_info(&mut self, 175 | index: usize, 176 | rt: SplitTypeRuntimeInfo) -> Result<()> { 177 | let param = self.params.get_mut(index).unwrap(); 178 | match param.ty { 179 | SplitType::Named { ref mut runtime, .. } | SplitType::Broadcast { ref mut runtime, .. } => { 180 | *runtime = Some(rt); 181 | } 182 | SplitType::Generic { .. } => { 183 | info!("instantiated {:?} with no runtime info.", self); 184 | } 185 | } 186 | Ok(()) 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /c/composer/src/util.rs: -------------------------------------------------------------------------------- 1 | //! Shared utility functions. 2 | 3 | /// Joins a an iterator of strings using a delimiter. 4 | pub fn join<'a>(start: impl Into, 5 | sep: impl Into<&'a str>, 6 | strings: impl std::iter::Iterator) -> String { 7 | 8 | let sep = sep.into(); 9 | strings.enumerate().fold(start.into(), |mut buf, (i, val)| { 10 | if i > 0 { 11 | buf.push_str(sep); 12 | } 13 | buf.push_str(&val); 14 | buf 15 | }) 16 | } 17 | -------------------------------------------------------------------------------- /c/lib/ImageMagick/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS=-fPIC -g -O3 -Wall 2 | LDFLAGS=-shared 3 | LD_LIBRARY_PATH=../../target/release 4 | 5 | OS=$(shell uname -s) 6 | 7 | ifeq ($(OS), Darwin) 8 | CC=gcc-7 9 | LIB=dylib 10 | MAGICK=$(shell pkg-config --cflags --libs MagickWand) 11 | else ifeq ($(OS), Linux) 12 | CC=gcc-5 13 | LIB=so 14 | MAGICK=$(shell pkg-config --cflags --libs MagickWand) 15 | else 16 | $(error Unsupported platform: $(OS)) 17 | endif 18 | 19 | INCLUDE=-I../../composer/ -I/usr/local/include/ImageMagick-7/MagickWand 20 | 21 | .PHONY: all annotate clean 22 | 23 | all: annotate 24 | $(CC) $(INCLUDE) $(MAGICK) -fopenmp -L$(LD_LIBRARY_PATH) $(CFLAGS) $(LDFLAGS) splitters.c -o libcomposer_imagemagick.$(LIB) -lcomposer $(INTEL) 25 | 26 | annotate: 27 | rm -rf generated 28 | annotate -i MagickWand imagemagick.annotation 29 | 30 | clean: 31 | rm -rf a.out generated libcomposer_imagemagick.$(LIB) *.dSYM 32 | -------------------------------------------------------------------------------- /c/lib/ImageMagick/imagemagick.annotation: -------------------------------------------------------------------------------- 1 | @sa(wand: WandSplit()) -> WandSplit() { 2 | MagickWand *CloneMagickWand(MagickWand *); 3 | MagickWand *DestroyMagickWand(MagickWand *); 4 | } 5 | 6 | @sa() -> broadcast { 7 | PixelWand *NewPixelWand(); 8 | } 9 | 10 | @sa(wand: broadcast) -> broadcast { 11 | PixelWand *DestroyPixelWand(PixelWand *); 12 | } 13 | 14 | @sa(wand: broadcast, color: broadcast) { 15 | void PixelSetColor(PixelWand *, const char *); 16 | } 17 | 18 | @sa(wand: WandSplit(), colorize: broadcast, alpha: broadcast) -> broadcast { 19 | MagickBooleanType MagickColorizeImage(MagickWand *, PixelWand *, PixelWand *); 20 | } 21 | 22 | @sa(wand: WandSplit(), colorspace: broadcast) -> broadcast { 23 | MagickBooleanType MagickSetImageColorspace(MagickWand *, const ColorspaceType); 24 | } 25 | 26 | @sa(wand: WandSplit(), flag: broadcast) -> broadcast { 27 | MagickBooleanType MagickNegateImage(MagickWand *, const MagickBooleanType); 28 | MagickBooleanType MagickContrastImage(MagickWand *, const MagickBooleanType); 29 | } 30 | 31 | @sa(wand: WandSplit(), key: broadcast, value: broadcast) -> broadcast { 32 | MagickBooleanType MagickSetImageArtifact(MagickWand *, const char*, const char*); 33 | } 34 | 35 | @sa(wand: WandSplit(), hue: broadcast, saturation: broadcast, value: broadcast) -> broadcast { 36 | MagickBooleanType MagickModulateImage(MagickWand *, const double, const double, const double); 37 | } 38 | 39 | @sa(wand: WandSplit(), gamma: broadcast) -> broadcast { 40 | MagickBooleanType MagickGammaImage(MagickWand *, const double); 41 | } 42 | 43 | @sa(wand: WandSplit(), other: WandSplit(), kind: broadcast, 44 | clip: broadcast, x_offset: broadcast, y_offset: broadcast) -> broadcast { 45 | 46 | MagickBooleanType MagickCompositeImage(MagickWand *, 47 | const MagickWand *,const CompositeOperator, 48 | const MagickBooleanType, const ssize_t, const ssize_t); 49 | } 50 | -------------------------------------------------------------------------------- /c/lib/ImageMagick/splitters.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | #define DEBUG 0 8 | 9 | #define DBG(fmt, ...) \ 10 | do { if (DEBUG) fprintf(stderr, "%s:%d:%s(): " fmt "\n", __FILE__, \ 11 | __LINE__, __func__, __VA_ARGS__); } while (0) 12 | 13 | 14 | struct WandSplit { 15 | MagickWand *wand; 16 | size_t width; 17 | size_t height; 18 | }; 19 | 20 | void* WandSplit_new(MagickWand **wand_to_split, struct WandSplit_init_args *_, int64_t *items) { 21 | struct WandSplit *splitter = (struct WandSplit *)malloc(sizeof(struct WandSplit)); 22 | splitter->wand = *wand_to_split; 23 | 24 | // We'll split the image by row, since there are nice methods for reconstructing an image 25 | // in this way that are builtin. 26 | splitter->width = MagickGetImageWidth(splitter->wand); 27 | splitter->height = MagickGetImageHeight(splitter->wand); 28 | *items = splitter->height; 29 | DBG("items: %ld", splitter->height); 30 | return (void *)splitter; 31 | } 32 | 33 | SplitterStatus WandSplit_next(const void *s, 34 | int64_t start, 35 | int64_t end, 36 | MagickWand **out) { 37 | 38 | const struct WandSplit *splitter = (const struct WandSplit *)s; 39 | DBG("start: %ld end: %ld height: %ld", start, end, splitter->height); 40 | 41 | if (splitter->height <= start) { 42 | DBG("finished got range (%ld %ld)", start, end); 43 | return SplitterFinished; 44 | } else { 45 | size_t region_height = (end - start); 46 | if (splitter->height < end) { 47 | DBG("clipping region height by %ld", end - splitter->height); 48 | region_height = splitter->height - start; 49 | } 50 | DBG("range: %ld, %ld", start, start + region_height); 51 | 52 | MagickWand *wand = MagickGetImageRegion(splitter->wand, splitter->width, region_height, 0, start); 53 | *out = wand; 54 | return SplitterContinue; 55 | } 56 | } 57 | 58 | MagickWand *aggregate_seq(MagickWand **pieces, int64_t count) { 59 | MagickWand *results = NewMagickWand(); 60 | MagickResetIterator(results); 61 | 62 | DBG("consturcted results image %p", results); 63 | 64 | for (int i = 0; i < count; i++) { 65 | DBG("adding image %d", i); 66 | fflush(stderr); 67 | MagickSetLastIterator(results); 68 | MagickAddImage(results, pieces[i]); 69 | } 70 | 71 | MagickResetIterator(results); 72 | MagickWand *final = MagickAppendImages(results, 1); 73 | DestroyMagickWand(results); 74 | 75 | return final; 76 | } 77 | 78 | MagickWand *aggregate_par(MagickWand **pieces, int count, int threads) { 79 | // Holds aggregation state. 80 | MagickWand **results = (MagickWand **)malloc(sizeof(MagickWand *) * threads); 81 | for (int i = 0; i < threads; i++) { 82 | results[i] = NewMagickWand(); 83 | MagickResetIterator(results[i]); 84 | } 85 | 86 | int values_per_thread = count / threads; 87 | printf("values per piece: %d\n", values_per_thread); 88 | 89 | #pragma omp parallel for 90 | for (int i = 0; i < threads; i++) { 91 | int start = i * values_per_thread; 92 | int end = (i + 1) * values_per_thread; 93 | 94 | if (i == threads - 1) { 95 | end = count; 96 | } 97 | 98 | MagickWand *result = results[i]; 99 | 100 | // printf("thread %d: %d->%d\n", omp_get_thread_num(), start, end); 101 | for (int j = start; j < end; j++) { 102 | MagickSetLastIterator(result); 103 | MagickAddImage(result, pieces[j]); 104 | } 105 | 106 | MagickResetIterator(result); 107 | MagickWand *final = MagickAppendImages(result, 1); 108 | 109 | result = DestroyMagickWand(result); 110 | results[i] = final; 111 | } 112 | 113 | MagickWand *final_iterator = NewMagickWand(); 114 | MagickResetIterator(final_iterator); 115 | for (int i = 0; i < threads; i++) { 116 | MagickSetLastIterator(final_iterator); 117 | MagickAddImage(final_iterator, results[i]); 118 | } 119 | MagickResetIterator(final_iterator); 120 | MagickWand *final = MagickAppendImages(final_iterator, 1); 121 | 122 | for (int i = 0; i < threads; i++) { 123 | DestroyMagickWand(results[i]); 124 | } 125 | free(results); 126 | 127 | return final; 128 | } 129 | 130 | void *WandSplit_merge(const void *s, int64_t length, int64_t threads) { 131 | MagickWand *final; 132 | MagickWand **pieces = (MagickWand **)s; 133 | 134 | if (length == 1) { 135 | DBG("only one item: returning it %d", 0); 136 | return ((MagickWand **)s)[0]; 137 | } 138 | 139 | MagickWand *results = NewMagickWand(); 140 | MagickResetIterator(results); 141 | for (int i = 0; i < length; i++) { 142 | MagickSetLastIterator(results); 143 | MagickAddImage(results, pieces[i]); 144 | } 145 | MagickResetIterator(results); 146 | final = MagickAppendImages(results, 1); 147 | DestroyMagickWand(results); 148 | // DBG("aggregate_seq: %p", aggregate_seq); 149 | // final = aggregate_seq(pieces, length); 150 | 151 | return (void *)final; 152 | } 153 | -------------------------------------------------------------------------------- /c/lib/ImageMagick/splitters.h: -------------------------------------------------------------------------------- 1 | #ifndef _MAGICK_SPLITTER_H_ 2 | #define _MAGICK_SPLITTER_H_ 3 | 4 | void *WandSplit_merge(const void *s, int64_t length, int64_t threads); 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /c/lib/composer_mkl/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS=-fPIC -g -O3 -Wall 2 | LDFLAGS=-shared 3 | INCLUDE=-I../../composer -I. 4 | LD_LIBRARY_PATH=../../target/release 5 | 6 | OS=$(shell uname -s) 7 | 8 | ifeq ($(OS), Darwin) 9 | CC=gcc-7 10 | LIB=dylib 11 | INTEL= -L${MKLROOT}/lib -Wl,-rpath,${MKLROOT}/lib -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl 12 | else ifeq ($(OS), Linux) 13 | CC=gcc-5 14 | LIB=so 15 | INTEL= -I${MKLROOT}/compilers_and_libraries/linux/mkl/include -L${MKLROOT}/compilers_and_libraries/linux/mkl/lib/intel64 -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl 16 | else 17 | $(error Unsupported platform: $(OS)) 18 | endif 19 | 20 | 21 | .PHONY: all annotate clean 22 | 23 | all: annotate 24 | $(CC) $(INCLUDE) -L$(LD_LIBRARY_PATH) $(CFLAGS) $(LDFLAGS) vec.c splitters.c mkl_extensions.c -o libcomposer_mkl.$(LIB) -lcomposer $(INTEL) 25 | 26 | annotate: 27 | rm -rf generated 28 | ../../target/release/annotate -i mkl,mkl_extensions mkl.annotation 29 | 30 | clean: 31 | rm -rf a.out generated libcomposer_mkl.$(LIB) *.dSYM 32 | -------------------------------------------------------------------------------- /c/lib/composer_mkl/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Controlling Threads 3 | 4 | * Make sure the `OMP_NUM_THREADS` variable is set to 1 when running with composer! 5 | -------------------------------------------------------------------------------- /c/lib/composer_mkl/mkl.annotation: -------------------------------------------------------------------------------- 1 | @sa (n: SizeSplit(), a: RegularSplit(n), b: RegularSplit(n), out: RegularSplit(n)) { 2 | void vdAdd(MKL_INT n, double *a, double *b, double *out); 3 | void vdDiv(MKL_INT n, double *a, double *b, double *out); 4 | void vdMul(MKL_INT n, double *a, double *b, double *out); 5 | void vdSub(MKL_INT n, double *a, double *b, double *out); 6 | } 7 | 8 | @sa (n: SizeSplit(), a: RegularSplit(n), b: RegularSplit(n)) { 9 | void vdAsin(MKL_INT n, double *a, double *out); 10 | void vdCos(MKL_INT n, double *a, double *out); 11 | void vdErf(MKL_INT n, double *a, double *out); 12 | void vdExp(MKL_INT n, double *a, double *out); 13 | void vdLog1p(MKL_INT n, double *a, double *out); 14 | void vdSin(MKL_INT n, double *a, double *out); 15 | void vdSqrt(MKL_INT n, double *a, double *out); 16 | } 17 | 18 | @sa (n: SizeSplit(), a: RegularSplit(n), b: broadcast, out: RegularSplit(n)) { 19 | void vdAddi(MKL_INT n, double *a, double b, double *out); 20 | void vdDivi(MKL_INT n, double *a, double b, double *out); 21 | void vdMuli(MKL_INT n, double *a, double b, double *out); 22 | void vdSubi(MKL_INT n, double *a, double b, double *out); 23 | void vdPowx(MKL_INT n, double *a, double b, double *out); 24 | } 25 | 26 | @sa (n: SizeSplit(), a: broadcast, b: RegularSplit(n), out: RegularSplit(n)) { 27 | void vdSubvi(MKL_INT n, double a, double *b, double *out); 28 | } 29 | -------------------------------------------------------------------------------- /c/lib/composer_mkl/mkl_extensions.c: -------------------------------------------------------------------------------- 1 | 2 | #define INFINITE_PIECES (-1) 3 | 4 | #include "mkl.h" 5 | 6 | #include "mkl_extensions.h" 7 | #include 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | // Extensions to MKL, since it doesn't support immediates 14 | // Unary Operators with Immediate values 15 | 16 | void vdAddi(MKL_INT length, double *a, double b, double *result) { 17 | for (size_t i = 0; i < length; i++) { 18 | result[i] = a[i] + b; 19 | } 20 | } 21 | 22 | void vdSubi(MKL_INT length, double *a, double b, double *result) { 23 | for (size_t i = 0; i < length; i++) { 24 | result[i] = a[i] - b; 25 | } 26 | } 27 | 28 | void vdMuli(MKL_INT length, double *a, double b, double *result) { 29 | for (size_t i = 0; i < length; i++) { 30 | result[i] = a[i] * b; 31 | } 32 | } 33 | 34 | void vdDivi(MKL_INT length, double *a, double b, double *result) { 35 | for (size_t i = 0; i < length; i++) { 36 | result[i] = a[i] / b; 37 | } 38 | } 39 | 40 | void vdSubvi(MKL_INT length, double a, double *b, double *result) { 41 | for (size_t i = 0; i < length; i++) { 42 | result[i] = a - b[i]; 43 | } 44 | } 45 | 46 | void vdDivvi(MKL_INT length, double a, double *b, double *result) { 47 | for (size_t i = 0; i < length; i++) { 48 | result[i] = a / b[i]; 49 | } 50 | } 51 | 52 | #ifdef __cplusplus 53 | } 54 | #endif 55 | -------------------------------------------------------------------------------- /c/lib/composer_mkl/mkl_extensions.h: -------------------------------------------------------------------------------- 1 | #ifndef _MKL_EXTENSIONS_H_ 2 | #define _MKL_EXTENSIONS_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void vdAddi(MKL_INT length, double *a, double b, double *result); 9 | void vdSubi(MKL_INT length, double *a, double b, double *result); 10 | void vdMuli(MKL_INT length, double *a, double b, double *result); 11 | void vdDivi(MKL_INT length, double *a, double b, double *result); 12 | void vdSubvi(MKL_INT length, double a, double *b, double *result); 13 | void vdDivvi(MKL_INT length, double a, double *b, double *result); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /c/lib/composer_mkl/splitters.c: -------------------------------------------------------------------------------- 1 | 2 | #include "generated/generated.h" 3 | #include 4 | 5 | #define DEBUG 0 6 | 7 | #define DBG(fmt, ...) \ 8 | do { if (DEBUG) fprintf(stderr, "%s:%d:%s(): " fmt "\n", __FILE__, \ 9 | __LINE__, __func__, __VA_ARGS__); } while (0) 10 | 11 | struct RegularSplit { 12 | double *base; 13 | int size; 14 | }; 15 | 16 | struct SizeSplit { 17 | size_t size; 18 | }; 19 | 20 | void* RegularSplit_new(double **item_to_split, struct RegularSplit_init_args *a, int64_t *items) { 21 | struct RegularSplit *splitter = (struct RegularSplit *)malloc(sizeof(struct RegularSplit)); 22 | splitter->base = *item_to_split; 23 | splitter->size = a->_0; 24 | DBG("base=%p, size=%d\n", item_to_split, a->_0); 25 | 26 | *items = splitter->size; 27 | return (void *)splitter; 28 | } 29 | 30 | SplitterStatus RegularSplit_next(const void *s, 31 | int64_t start, 32 | int64_t end, 33 | double **out) { 34 | 35 | const struct RegularSplit *splitter = (const struct RegularSplit *)s; 36 | DBG("start=%lld, end=%lld, size=%d", start, end, splitter->size); 37 | if (splitter->size < start) { 38 | return SplitterFinished; 39 | } else { 40 | *out = splitter->base + start; 41 | return SplitterContinue; 42 | } 43 | } 44 | 45 | void* SizeSplit_new(MKL_INT *item_to_split, struct SizeSplit_init_args *_unused, int64_t *items) { 46 | struct SizeSplit *splitter = (struct SizeSplit *)malloc(sizeof(struct SizeSplit)); 47 | splitter->size = *item_to_split; 48 | *items = splitter->size; 49 | return (void *)splitter; 50 | } 51 | 52 | SplitterStatus SizeSplit_next(const void *s, 53 | int64_t start, 54 | int64_t end, 55 | MKL_INT *out) { 56 | 57 | struct SizeSplit *splitter = (struct SizeSplit *)s; 58 | DBG("start=%lld, end=%lld, size=%zu", start, end, splitter->size); 59 | if (splitter->size < start) { 60 | return SplitterFinished; 61 | } else if (splitter->size < end) { 62 | *out = (splitter->size - start); 63 | return SplitterContinue; 64 | } else { 65 | *out = (end - start); 66 | return SplitterContinue; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /c/lib/composer_mkl/vec.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct { 11 | double *data; 12 | size_t length; 13 | } vec_t; 14 | 15 | vec_t new_vec(size_t length, int lazy) { 16 | vec_t result; 17 | result.data = (double *)composer_malloc(sizeof(double) * length, lazy); 18 | result.length = length; 19 | return result; 20 | } 21 | 22 | vec_t new_vec_nolazy(size_t length) { 23 | vec_t result; 24 | result.data = (double *)malloc(sizeof(double) * length); 25 | assert(result.data); 26 | result.length = length; 27 | return result; 28 | } 29 | 30 | // Initialize a vector where the value is val. 31 | vec_t vvals(size_t length, double val, int lazy) { 32 | vec_t result; 33 | size_t size = sizeof(double) * length; 34 | result.data = (double *)composer_malloc(size, 0); 35 | result.length = length; 36 | for (int i = 0; i < length; i++) { 37 | result.data[i] = val; 38 | } 39 | 40 | if (lazy) { 41 | composer_tolazy(result.data); 42 | } 43 | 44 | return result; 45 | } 46 | 47 | #ifdef __cplusplus 48 | } 49 | #endif 50 | -------------------------------------------------------------------------------- /c/lib/composer_mkl/vec.h: -------------------------------------------------------------------------------- 1 | #ifndef _VEC_H_ 2 | #define _VEC_H_ 3 | 4 | /** A small convinience library for vectors used with composer. */ 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct { 11 | double *data; 12 | size_t length; 13 | } vec_t; 14 | 15 | 16 | vec_t new_vec(size_t length, int lazy); 17 | vec_t new_vec_nolazy(size_t length); 18 | vec_t vvals(size_t length, double val, int lazy); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /python/benchmarks/birth_analysis/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | source ../benchmarks/bin/activate 6 | 7 | # File to use. babynames.txt is for testing. babynames-xlarge.txt is for benchmark. 8 | filename="../datasets/birth_analysis/_data/babynames-xlarge.txt" 9 | runs=${1:-1} 10 | 11 | tasks=( composer naive ) 12 | threads=( 1 2 4 8 16 ) 13 | 14 | for task in "${tasks[@]}"; do 15 | rm -f $task.stdout $task.stderr 16 | git log | head -1 > $task.stderr 17 | git log | head -1 > $task.stdout 18 | done 19 | 20 | for i in {1..$runs}; do 21 | python birth_analysis.py -f $filename >> naive.stdout 2>> naive.stderr 22 | for nthreads in "${threads[@]}"; do 23 | python birth_analysis_composer.py -f $filename -t $nthreads >> composer.stdout 2>> composer.stderr 24 | done 25 | done 26 | -------------------------------------------------------------------------------- /python/benchmarks/birth_analysis/birth_analysis.py: -------------------------------------------------------------------------------- 1 | 2 | import argparse 3 | import pandas as pd 4 | import sys 5 | import time 6 | 7 | def get_top1000(group): 8 | return group.sort_values(by='births', ascending=False)[0:1000] 9 | 10 | def analyze(top1000): 11 | start1 = time.time() 12 | all_names = pd.Series(top1000.name.unique()) 13 | lesley_like = all_names[all_names.str.lower().str.contains('lesl')] 14 | filtered = top1000[top1000.name.isin(lesley_like)] 15 | table = filtered.pivot_table('births', index='year', 16 | columns='sex', aggfunc='sum') 17 | 18 | table = table.div(table.sum(1), axis=0) 19 | end1 = time.time() 20 | print("Analysis:", end1 - start1) 21 | return table 22 | 23 | def run(filename): 24 | years = range(1880, 2011) 25 | pieces = [] 26 | columns = ['year', 'sex', 'name', 'births'] 27 | 28 | sys.stdout.write("Reading data...") 29 | sys.stdout.flush() 30 | names = pd.read_csv(filename, names=columns) 31 | print("done.") 32 | sys.stdout.flush() 33 | 34 | print("Size of names:", len(names)) 35 | 36 | e2e_start = time.time() 37 | 38 | # Time preprocessing step 39 | start0 = time.time() 40 | grouped = names.groupby(['year', 'sex']) 41 | end0 = time.time() 42 | print("GroupBy:", end0 - start0) 43 | start0 = end0 44 | 45 | top1000 = grouped.apply(get_top1000) 46 | top1000.reset_index(inplace=True, drop=True) 47 | 48 | end0 = time.time() 49 | print("Apply:", end0-start0) 50 | print("Elements in top1000:", len(top1000)) 51 | 52 | result = analyze(top1000) 53 | 54 | e2e_end = time.time() 55 | print("Total time:", e2e_end - e2e_start) 56 | 57 | print(top1000['births'].sum()) 58 | 59 | def main(): 60 | parser = argparse.ArgumentParser( 61 | description="Birth Analysis." 62 | ) 63 | parser.add_argument('-f', "--filename", type=str, default="babynames.txt", help="Input file") 64 | args = parser.parse_args() 65 | 66 | filename = args.filename 67 | 68 | print("File:", filename) 69 | mi = run(filename) 70 | 71 | 72 | if __name__ == "__main__": 73 | main() 74 | -------------------------------------------------------------------------------- /python/benchmarks/birth_analysis/birth_analysis_composer.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | sys.path.append("../../lib/") 5 | sys.path.append("../../pycomposer/") 6 | 7 | import argparse 8 | import composer_pandas as pd 9 | import time 10 | 11 | def analyze(top1000): 12 | start1 = time.time() 13 | all_names = pd.Series(top1000.name.unique()) 14 | lesley_like = all_names[all_names.str.lower().str.contains('lesl')] 15 | filtered = top1000[top1000.name.isin(lesley_like)] 16 | table = filtered.pivot_table('births', index='year', 17 | columns='sex', aggfunc='sum') 18 | 19 | table = table.div(table.sum(1), axis=0) 20 | end1 = time.time() 21 | print("Analysis:", end1 - start1) 22 | return table 23 | 24 | def get_top1000(group): 25 | return group.sort_values(by='births', ascending=False)[0:1000] 26 | 27 | def run(filename, threads): 28 | years = range(1880, 2011) 29 | columns = ['year', 'sex', 'name', 'births'] 30 | 31 | sys.stdout.write("Reading data...") 32 | sys.stdout.flush() 33 | names = pd.read_csv(filename, names=columns) 34 | print("done") 35 | 36 | print("Size of names:", len(names)) 37 | 38 | e2e_start = time.time() 39 | 40 | start0 = time.time() 41 | grouped = pd.dfgroupby(names, ['year', 'sex']) 42 | top1000 = pd.gbapply(grouped, get_top1000) 43 | pd.evaluate(workers=threads) 44 | top1000 = top1000.value 45 | top1000.reset_index(inplace=True, drop=True) 46 | print(len(top1000)) 47 | 48 | """ 49 | grouped: Dag Operation 50 | GBApply Takes a DAG operation and stores it in its type. The operation must be a GroupBy 51 | GBApply has type ApplySplit. It's combiner: 52 | 1. Combines the results of the dataFrame 53 | 2. Resets index 54 | 3. Gets the keys from the DAG operation 55 | 4. Calls groupBy again 56 | 5. Calls apply again. 57 | """ 58 | 59 | localreduce_start = time.time() 60 | top1000 = top1000.groupby(['year', 'sex']).apply(get_top1000) 61 | localreduce_end = time.time() 62 | print("Local reduction:", localreduce_end - localreduce_start) 63 | top1000.reset_index(inplace=True, drop=True) 64 | end0 = time.time() 65 | 66 | print("Apply:", end0-start0) 67 | print("Elements in top1000:", len(top1000)) 68 | 69 | result = analyze(top1000) 70 | 71 | e2e_end = time.time() 72 | print("Total time:", e2e_end - e2e_start) 73 | 74 | print(top1000['births'].sum()) 75 | 76 | def main(): 77 | parser = argparse.ArgumentParser( 78 | description="Birth Analysis with Composer." 79 | ) 80 | parser.add_argument('-f', "--filename", type=str, default="babynames.txt", help="Input file") 81 | parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.") 82 | args = parser.parse_args() 83 | 84 | filename = args.filename 85 | threads = args.threads 86 | 87 | print("File:", filename) 88 | print("Threads:", threads) 89 | mi = run(filename, threads) 90 | 91 | 92 | if __name__ == "__main__": 93 | main() 94 | -------------------------------------------------------------------------------- /python/benchmarks/blackscholes/benchmark-batch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | source ../benchmarks/bin/activate 6 | 7 | size=30 8 | 9 | tasks=( composerbatch ) 10 | batchsizes=( 512 2048 4096 8192 16384 32768 8388608 16777216 33554432 ) 11 | 12 | for task in "${tasks[@]}"; do 13 | rm -f $task.stdout $task.stderr 14 | git log | head -1 > $task.stderr 15 | git log | head -1 > $task.stdout 16 | done 17 | 18 | for i in {1..5}; do 19 | for batchsize in "${batchsizes[@]}"; do 20 | taskset -a -c 0-9,20-29 python blackscholes.py -m composer -s $size -t 16 -p $batchsize >> $task.stdout 2>> $task.stderr 21 | done 22 | done 23 | -------------------------------------------------------------------------------- /python/benchmarks/blackscholes/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | source ../benchmarks/bin/activate 6 | 7 | tasks=( naive numba composer ) 8 | size=30 9 | runs=${1:-1} 10 | threads=( 1 2 4 8 16 ) 11 | 12 | for task in "${tasks[@]}"; do 13 | rm -f $task.stdout $task.stderr 14 | git log | head -1 > $task.stderr 15 | git log | head -1 > $task.stdout 16 | done 17 | 18 | for i in {1..$runs}; do 19 | for nthreads in "${threads[@]}"; do 20 | NUMBA_NUM_THREADS=$nthreads python blackscholes_numba.py -s $size >> numba.stdout 2>> numba.stderr 21 | done 22 | done 23 | 24 | for i in {1..$runs}; do 25 | for nthreads in "${threads[@]}"; do 26 | python blackscholes.py -m composer -s $size -t $nthreads >> composer.stdout 2>> composer.stderr 27 | done 28 | done 29 | 30 | for i in {1..$runs}; do 31 | for nthreads in "${threads[@]}"; do 32 | python blackscholes.py -m naive -s $size -t $nthreads >> naive.stdout 2>> naive.stderr 33 | done 34 | done 35 | -------------------------------------------------------------------------------- /python/benchmarks/blackscholes/blackscholes.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | sys.path.append("../../lib/") 4 | sys.path.append("../../pycomposer/") 5 | 6 | import argparse 7 | import math 8 | import scipy.special as ss 9 | import time 10 | 11 | def get_data(size, composer): 12 | if composer: 13 | import composer_numpy as np 14 | else: 15 | import numpy as np 16 | 17 | price = np.ones(size, dtype="float64") * 4.0 18 | strike = np.ones(size, dtype="float64") * 4.0 19 | t = np.ones(size, dtype="float64") * 4.0 20 | rate = np.ones(size, dtype="float64") * 4.0 21 | vol = np.ones(size, dtype="float64") * 4.0 22 | 23 | return price, strike, t, rate, vol 24 | 25 | def bs(price, strike, t, rate, vol, composer, threads, piece_size): 26 | 27 | if composer: 28 | import composer_numpy as np 29 | else: 30 | import numpy as np 31 | 32 | c05 = 3.0 33 | c10 = 1.5 34 | invsqrt2 = 1.0 / math.sqrt(2.0) 35 | 36 | start = time.time() 37 | 38 | tmp = np.ones(len(price), dtype="float64") 39 | vol_sqrt = np.ones(len(price), dtype="float64") 40 | rsig = np.ones(len(price), dtype="float64") 41 | d1 = np.ones(len(price), dtype="float64") 42 | d2 = np.ones(len(price), dtype="float64") 43 | 44 | # Outputs 45 | call = np.ones(len(price), dtype="float64") 46 | put = np.ones(len(price), dtype="float64") 47 | end = time.time() 48 | print("Allocation:", end - start) 49 | 50 | start = time.time() 51 | 52 | np.multiply(vol, vol, out=rsig) 53 | np.multiply(rsig, c05, out=rsig) 54 | np.add(rsig, rate, out=rsig) 55 | 56 | np.sqrt(t, out=vol_sqrt) 57 | np.multiply(vol_sqrt, vol, out=vol_sqrt) 58 | 59 | np.multiply(rsig, t, out=tmp) 60 | np.divide(price, strike, out=d1) 61 | np.log2(d1, out=d1) 62 | np.add(d1, tmp, out=d1) 63 | 64 | np.divide(d1, vol_sqrt, out=d1) 65 | np.subtract(d1, vol_sqrt, out=d2) 66 | 67 | # d1 = c05 + c05 * erf(d1 * invsqrt2) 68 | np.multiply(d1, invsqrt2, out=d1) 69 | 70 | if composer: 71 | np.erf(d1, out=d1) 72 | else: 73 | ss.erf(d1, out=d1) 74 | 75 | np.multiply(d1, c05, out=d1) 76 | np.add(d1, c05, out=d1) 77 | 78 | # d2 = c05 + c05 * erf(d2 * invsqrt2) 79 | np.multiply(d2, invsqrt2, out=d2) 80 | 81 | if composer: 82 | np.erf(d2, out=d2) 83 | else: 84 | ss.erf(d2, out=d2) 85 | 86 | np.multiply(d2, c05, out=d2) 87 | np.add(d2, c05, out=d2) 88 | 89 | # Reuse existing buffers 90 | e_rt = vol_sqrt 91 | tmp2 = rsig 92 | 93 | # e_rt = exp(-rate * t) 94 | np.multiply(rate, -1.0, out=e_rt) 95 | np.multiply(e_rt, t, out=e_rt) 96 | np.exp(e_rt, out=e_rt) 97 | 98 | # call = price * d1 - e_rt * strike * d2 99 | # 100 | # tmp = price * d1 101 | # tmp2 = e_rt * strike * d2 102 | # call = tmp - tmp2 103 | np.multiply(price, d1, out=tmp) 104 | np.multiply(e_rt, strike, out=tmp2) 105 | np.multiply(tmp2, d2, out=tmp2) 106 | np.subtract(tmp, tmp2, out=call) 107 | 108 | # put = e_rt * strike * (c10 - d2) - price * (c10 - d1) 109 | # tmp = e_rt * strike 110 | # tmp2 = (c10 - d2) 111 | # put = tmp - tmp2 112 | # tmp = c10 - d1 113 | # tmp = price * tmp 114 | # put = put - tmp 115 | np.multiply(e_rt, strike, out=tmp) 116 | np.subtract(c10, d2, out=tmp2) 117 | np.multiply(tmp, tmp2, out=put) 118 | np.subtract(c10, d1, out=tmp) 119 | np.multiply(price, tmp, out=tmp) 120 | np.subtract(put, tmp, out=put) 121 | 122 | end = time.time() 123 | print("Build time:", end - start) 124 | 125 | if composer: 126 | np.evaluate(workers=threads, batch_size=piece_size) 127 | 128 | end = time.time() 129 | print("Runtime:", end - start) 130 | 131 | return call, put 132 | 133 | def run(): 134 | parser = argparse.ArgumentParser( 135 | description="Chained Adds pipelining test on a single thread." 136 | ) 137 | parser.add_argument('-s', "--size", type=int, default=27, help="Size of each array") 138 | parser.add_argument('-p', "--piece_size", type=int, default=16384, help="Size of each piece.") 139 | parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.") 140 | parser.add_argument('-v', "--verbosity", type=str, default="none", help="Log level (debug|info|warning|error|critical|none)") 141 | parser.add_argument('-m', "--mode", type=str, required=False, help="Mode (composer|naive)") 142 | args = parser.parse_args() 143 | 144 | size = (1 << args.size) 145 | piece_size = args.piece_size 146 | threads = args.threads 147 | loglevel = args.verbosity 148 | mode = args.mode.strip().lower() 149 | 150 | assert threads >= 1 151 | 152 | print("Size:", size) 153 | print("Piece Size:", piece_size) 154 | print("Threads:", threads) 155 | print("Log Level", loglevel) 156 | print("Mode:", mode) 157 | 158 | if mode == "composer": 159 | composer = True 160 | elif mode == "naive": 161 | composer = False 162 | else: 163 | raise ValueError("invalid mode", mode) 164 | 165 | sys.stdout.write("Generating data...") 166 | sys.stdout.flush() 167 | a, b, c, d, e = get_data(size, composer) 168 | print("done.") 169 | 170 | call, put = bs(a, b, c, d, e, composer, threads, piece_size) 171 | print("Call:", call) 172 | print("Put:", put) 173 | 174 | if __name__ == "__main__": 175 | run() 176 | -------------------------------------------------------------------------------- /python/benchmarks/blackscholes/blackscholes_numba.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | import argparse 5 | import scipy.special as ss 6 | import time 7 | 8 | import numpy as np 9 | from numba import njit, jit 10 | from numba import vectorize, float64 11 | 12 | def get_data(size): 13 | price = np.ones(size, dtype="float64") * 4.0 14 | strike = np.ones(size, dtype="float64") * 4.0 15 | t = np.ones(size, dtype="float64") * 4.0 16 | rate = np.ones(size, dtype="float64") * 4.0 17 | vol = np.ones(size, dtype="float64") * 4.0 18 | 19 | return price, strike, t, rate, vol 20 | 21 | @njit(parallel=True) 22 | def bs(price, strike, t, rate, vol): 23 | """ 24 | This is the cookie-cutter implementation. 25 | """ 26 | c05 = 3.0 27 | c10 = 1.5 28 | invsqrt2 = 1.0 / np.sqrt(2.0) 29 | 30 | c05 = np.float64(3.0) 31 | c10 = np.float64(1.5) 32 | 33 | rsig = rate + (vol**2) * c05 34 | vol_sqrt = vol * np.sqrt(t) 35 | 36 | d1 = (np.log(price / strike) + rsig * t) / vol_sqrt 37 | d2 = d1 - vol_sqrt 38 | 39 | d1 = c05 + c05 * np.exp(d1 * invsqrt2) 40 | d2 = c05 + c05 * np.exp(d2 * invsqrt2) 41 | 42 | e_rt = np.exp((-rate) * t) 43 | 44 | call = price * d1 - e_rt * strike * d2 45 | put = e_rt * strike * (c10 - d2) - price * (c10 - d1) 46 | return call, put 47 | 48 | 49 | def run(): 50 | parser = argparse.ArgumentParser( 51 | description="Chained Adds pipelining test on a single thread." 52 | ) 53 | parser.add_argument('-s', "--size", type=int, default=27, help="Size of each array") 54 | parser.add_argument('-p', "--piece_size", type=int, default=16384, help="Size of each piece.") 55 | parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.") 56 | parser.add_argument('-v', "--verbosity", type=str, default="none", help="Log level (debug|info|warning|error|critical|none)") 57 | args = parser.parse_args() 58 | 59 | size = (1 << args.size) 60 | piece_size = args.piece_size 61 | threads = args.threads 62 | loglevel = args.verbosity 63 | 64 | assert threads >= 1 65 | 66 | print("Size:", size) 67 | print("Piece Size:", piece_size) 68 | print("Threads:", threads) 69 | print("Log Level", loglevel) 70 | 71 | sys.stdout.write("Generating data...") 72 | sys.stdout.flush() 73 | a, b, c, d, e = get_data(size) 74 | print("done") 75 | 76 | 77 | start = time.time() 78 | call, put = bs(a, b, c, d, e) 79 | end = time.time() 80 | print(end-start) 81 | 82 | if __name__ == "__main__": 83 | run() 84 | 85 | -------------------------------------------------------------------------------- /python/benchmarks/crime_index/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | source ../benchmarks/bin/activate 6 | 7 | size=29 8 | tasks=( naive composer ) 9 | threads=( 1 2 4 8 16) 10 | runs=${1:-1} 11 | 12 | for task in "${tasks[@]}"; do 13 | rm -f $task.stdout $task.stderr 14 | git log | head -1 > $task.stderr 15 | git log | head -1 > $task.stdout 16 | done 17 | 18 | for task in "${tasks[@]}"; do 19 | for i in {1..$runs}; do 20 | for nthreads in "${threads[@]}"; do 21 | python crime_index.py -m $task -t $nthreads -s $size >> $task.stdout 2>> $task.stderr 22 | done 23 | done 24 | done 25 | -------------------------------------------------------------------------------- /python/benchmarks/crime_index/crime_index.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import argparse 4 | import sys 5 | 6 | sys.path.append("../../lib") 7 | sys.path.append("../../pycomposer") 8 | 9 | import numpy as np 10 | import time 11 | 12 | import composer_pandas as pd 13 | 14 | def gen_data(size): 15 | total_population = np.ones(size, dtype="float64") * 500000 16 | adult_population = np.ones(size, dtype="float64") * 250000 17 | num_robberies = np.ones(size, dtype="float64") * 1000 18 | return pd.Series(total_population), pd.Series(adult_population), pd.Series(num_robberies) 19 | 20 | def crime_index_composer(total_population, adult_population, num_robberies, threads): 21 | # Get all city information with total population greater than 500,000 22 | big_cities = pd.greater_than(total_population, 500000.0) 23 | big_cities.dontsend = True 24 | big_cities = pd.mask(total_population, big_cities, 0.0) 25 | big_cities.dontsend = True 26 | 27 | double_pop = pd.multiply(adult_population, 2.0) 28 | double_pop.dontsend = True 29 | double_pop = pd.add(big_cities, double_pop) 30 | double_pop.dontsend = True 31 | multiplied = pd.multiply(num_robberies, 2000.0) 32 | multiplied.dontsend = True 33 | double_pop = pd.subtract(double_pop, multiplied) 34 | double_pop.dontsend = True 35 | crime_index = pd.divide(double_pop, 100000.0) 36 | crime_index.dontsend = True 37 | 38 | 39 | gt = pd.greater_than(crime_index, 0.02) 40 | gt.dontsend = True 41 | crime_index = pd.mask(crime_index, gt, 0.032) 42 | crime_index.dontsend = True 43 | lt = pd.less_than(crime_index, 0.01) 44 | crime_index = pd.mask(crime_index, lt, 0.005) 45 | crime_index.dontsend = True 46 | 47 | result = pd.pandasum(crime_index) 48 | pd.evaluate(workers=threads) 49 | return result.value 50 | 51 | def crime_index_pandas(total_population, adult_population, num_robberies): 52 | print(len(total_population)) 53 | big_cities = total_population > 500000 54 | big_cities = total_population.mask(big_cities, 0.0) 55 | double_pop = adult_population * 2 + big_cities - (num_robberies * 2000.0) 56 | crime_index = double_pop / 100000 57 | crime_index = crime_index.mask(crime_index > 0.02, 0.032) 58 | crime_index = crime_index.mask(crime_index < 0.01, 0.005) 59 | return crime_index.sum() 60 | 61 | def run(): 62 | parser = argparse.ArgumentParser(description="Crime Index") 63 | parser.add_argument('-s', "--size", type=int, default=26, help="Size of each array") 64 | parser.add_argument('-p', "--piece_size", type=int, default=16384*2, help="Size of each piece.") 65 | parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.") 66 | parser.add_argument('-m', "--mode", type=str, required=True, help="Mode (composer|naive)") 67 | args = parser.parse_args() 68 | 69 | size = (1 << args.size) 70 | piece_size = args.piece_size 71 | threads = args.threads 72 | mode = args.mode.strip().lower() 73 | 74 | assert mode == "composer" or mode == "naive" 75 | assert threads >= 1 76 | 77 | print("Size:", size) 78 | print("Piece Size:", piece_size) 79 | print("Threads:", threads) 80 | print("Mode:", mode) 81 | 82 | sys.stdout.write("Generating data...") 83 | sys.stdout.flush() 84 | inputs = gen_data(size) 85 | print("done.") 86 | 87 | start = time.time() 88 | if mode == "composer": 89 | result = crime_index_composer(inputs[0], inputs[1], inputs[2], threads) 90 | elif mode == "naive": 91 | result = crime_index_pandas(*inputs) 92 | end = time.time() 93 | 94 | print(end - start, "seconds") 95 | print(result) 96 | 97 | if __name__ == "__main__": 98 | run() 99 | 100 | -------------------------------------------------------------------------------- /python/benchmarks/data_cleaning/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | source ../benchmarks/bin/activate 6 | 7 | size=29 8 | tasks=( naive composer ) 9 | threads=( 1 2 4 8 16 ) 10 | runs=${1:-1} 11 | 12 | for task in "${tasks[@]}"; do 13 | rm -f $task.stdout $task.stderr 14 | git log | head -1 > $task.stderr 15 | git log | head -1 > $task.stdout 16 | done 17 | 18 | for task in "${tasks[@]}"; do 19 | for i in {1..$runs}; do 20 | for nthreads in "${threads[@]}"; do 21 | python data_cleaning.py -m $task -t $nthreads -s $size >> $task.stdout 2>> $task.stderr 22 | done 23 | done 24 | done 25 | -------------------------------------------------------------------------------- /python/benchmarks/data_cleaning/data_cleaning.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # The usual preamble 4 | import numpy as np 5 | import time 6 | import argparse 7 | 8 | import sys 9 | 10 | sys.path.append("../../lib") 11 | sys.path.append("../../pycomposer/") 12 | 13 | import composer_pandas as pd 14 | 15 | def gen_data(size): 16 | values = ["1234567" for _ in range(size)] 17 | return pd.Series(data=values) 18 | 19 | def datacleaning_pandas(requests): 20 | requests = requests.str.slice(0, 5) 21 | zero_zips = requests == "00000" 22 | requests = requests.mask(zero_zips, np.nan) 23 | requests = requests.unique() 24 | return requests 25 | 26 | def datacleaning_composer(requests, threads): 27 | # Fix requests with extra digits 28 | requests = pd.series_str_slice(requests, 0, 5) 29 | requests.dontsend = True 30 | 31 | # Fix requests with 00000 zipcodes 32 | zero_zips = pd.equal(requests, "00000") 33 | zero_zips.dontsend = True 34 | requests = pd.mask(requests, zero_zips, np.nan) 35 | requests.dontsend = True 36 | requests = pd.unique(requests) 37 | pd.evaluate(workers=threads) 38 | requests = requests.value 39 | return requests 40 | 41 | def run(): 42 | parser = argparse.ArgumentParser( 43 | description="Data Cleaning" 44 | ) 45 | parser.add_argument('-s', "--size", type=int, default=26, help="Size of each array") 46 | parser.add_argument('-p', "--piece_size", type=int, default=16384*2, help="Size of each piece.") 47 | parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.") 48 | parser.add_argument('-v', "--verbosity", type=str, default="none", help="Log level (debug|info|warning|error|critical|none)") 49 | parser.add_argument('-m', "--mode", type=str, required=True, help="Mode (composer|naive)") 50 | args = parser.parse_args() 51 | 52 | size = (1 << args.size) 53 | piece_size = args.piece_size 54 | threads = args.threads 55 | loglevel = args.verbosity 56 | mode = args.mode.strip().lower() 57 | 58 | assert mode == "composer" or mode == "naive" 59 | assert threads >= 1 60 | 61 | print("Size:", size) 62 | print("Piece Size:", piece_size) 63 | print("Threads:", threads) 64 | print("Log Level", loglevel) 65 | print("Mode:", mode) 66 | 67 | sys.stdout.write("Generating data...") 68 | sys.stdout.flush() 69 | inputs = gen_data(size) 70 | print("done.") 71 | 72 | start = time.time() 73 | if mode == "composer": 74 | result = datacleaning_composer(inputs, threads) 75 | elif mode == "naive": 76 | result = datacleaning_pandas(inputs) 77 | end = time.time() 78 | print(end - start, "seconds") 79 | print(result) 80 | 81 | if __name__ == "__main__": 82 | run() 83 | 84 | -------------------------------------------------------------------------------- /python/benchmarks/datasets/birth_analysis/babynames.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weld-project/split-annotations/d835cc54476336e7f4355d87e820595aeddcc442/python/benchmarks/datasets/birth_analysis/babynames.txt.gz -------------------------------------------------------------------------------- /python/benchmarks/datasets/birth_analysis/replicate-csv: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import argparse 4 | import csv 5 | import random 6 | 7 | def transformCSV_birth_analysis(inputFile, outputFile, replicationFactor): 8 | years = [str(i) for i in range(0, 1000 * replicationFactor)] 9 | gender = [str(i) for i in range(0, 4)] 10 | with open(inputFile, 'r') as f1, open(outputFile, 'w') as f2: 11 | lineId = 0 12 | numCols = 0 13 | count = 0 14 | for line in csv.reader(f1, delimiter=','): 15 | if lineId == 0: 16 | numCols = len(line) 17 | if len(line) != numCols: 18 | continue 19 | line_ele_list = [str(line_ele) for line_ele in line] 20 | 21 | for factor in range(0, replicationFactor): 22 | count += 1 23 | new_year = years[count % len(years)] 24 | # This is a hack. We need to get to the bottom of pandas ordering on sorts 25 | # which is causing grizzly to include "Leslyn" when pandas does not 26 | # for birth analysis (sort on groupmerger). 27 | num_diff = str(int(line_ele_list[2]) + count) 28 | new_gender = gender[count % len(gender)] 29 | new_line_list = [new_year, line_ele_list[0], line_ele_list[1], num_diff] 30 | f2.write(','.join(new_line_list)) 31 | f2.write('\n') 32 | lineId += 1 33 | 34 | def transformCSV(inputFile, outputFile, replicationFactor): 35 | with open(inputFile, 'r') as f1, open(outputFile, 'w') as f2: 36 | lineId = 0 37 | numCols = 0 38 | for line in csv.reader(f1, delimiter=','): 39 | if lineId == 0: 40 | numCols = len(line) 41 | f2.write(','.join([str(line_ele) for line_ele in line])) 42 | f2.write('\n') 43 | else: 44 | if len(line) != numCols: 45 | continue 46 | for i in xrange(replicationFactor): 47 | f2.write(','.join([str(line_ele) for line_ele in line])) 48 | f2.write('\n') 49 | lineId += 1 50 | 51 | if __name__ == '__main__': 52 | parser = argparse.ArgumentParser( 53 | description=("Produce plot of data dumped in provided data file") 54 | ) 55 | parser.add_argument('-i', "--inputFile", required=True, 56 | help="Input CSV file") 57 | parser.add_argument('-o', "--outputFile", required=True, 58 | help="Output CSV file") 59 | parser.add_argument('-r', "--replicationFactor", default=1, type=int, 60 | help="Number of times to replicate input row in output file") 61 | 62 | cmdLineArgs = parser.parse_args() 63 | optDict = vars(cmdLineArgs) 64 | if optDict["inputFile"].split("/")[-1].startswith("yob"): 65 | transformCSV_birth_analysis(optDict["inputFile"], optDict["outputFile"], 66 | optDict["replicationFactor"]) 67 | else: 68 | transformCSV(optDict["inputFile"], optDict["outputFile"], 69 | optDict["replicationFactor"]) 70 | -------------------------------------------------------------------------------- /python/benchmarks/datasets/movielens/replicate-csv: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import argparse 4 | import csv 5 | import random 6 | 7 | def transformCSV(inputFile, outputFile, replicationFactor): 8 | with open(inputFile, 'r') as f1, open(outputFile, 'w') as f2: 9 | lineId = 0 10 | numCols = 0 11 | for line in f1: 12 | line = line.split("::") 13 | if lineId == 0: 14 | numCols = len(line) 15 | f2.write('::'.join([str(line_ele) for line_ele in line])) 16 | else: 17 | if len(line) != numCols: 18 | continue 19 | for i in xrange(replicationFactor): 20 | f2.write('::'.join([str(line_ele) for line_ele in line])) 21 | lineId += 1 22 | 23 | if __name__ == '__main__': 24 | parser = argparse.ArgumentParser( 25 | description=("Produce plot of data dumped in provided data file") 26 | ) 27 | parser.add_argument('-i', "--inputFile", required=True, 28 | help="Input CSV file") 29 | parser.add_argument('-o', "--outputFile", required=True, 30 | help="Output CSV file") 31 | parser.add_argument('-r', "--replicationFactor", default=1, type=int, 32 | help="Number of times to replicate input row in output file") 33 | 34 | cmdLineArgs = parser.parse_args() 35 | optDict = vars(cmdLineArgs) 36 | transformCSV(optDict["inputFile"], optDict["outputFile"], optDict["replicationFactor"]) 37 | -------------------------------------------------------------------------------- /python/benchmarks/get-data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | cd datasets 6 | TOP=`pwd` 7 | 8 | # MovieLens dataset for the MovieLens workload. 9 | cd movielens 10 | wget http://files.grouplens.org/datasets/movielens/ml-1m.zip 11 | unzip ml-1m.zip 12 | rm -rf _data 13 | mkdir -p _data 14 | mv ml-1m _data 15 | rm ml-1m.zip 16 | ./replicate-csv -i _data/ml-1m/movies.dat -o _data/ml-1m/movies-large.dat -r 7 17 | ./replicate-csv -i _data/ml-1m/ratings.dat -o _data/ml-1m/ratings-large.dat -r 7 18 | ./replicate-csv -i _data/ml-1m/users.dat -o _data/ml-1m/users-large.dat -r 7 19 | 20 | # Birth Analysis 21 | cd $TOP 22 | cd birth_analysis 23 | gunzip -k babynames.txt.gz 24 | rm -rf _data 25 | mkdir -p _data 26 | mv babynames.txt _data 27 | ./replicate-csv -i _data/babynames.txt -o _data/babynames-xlarge.txt -r 80 28 | -------------------------------------------------------------------------------- /python/benchmarks/haversine/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | source ../benchmarks/bin/activate 6 | 7 | size=30 8 | runs=${1:-1} 9 | tasks=( numba naive composer ) 10 | threads=( 1 2 4 8 16 ) 11 | 12 | for task in "${tasks[@]}"; do 13 | rm -f $task.stdout $task.stderr 14 | git log | head -1 > $task.stderr 15 | git log | head -1 > $task.stdout 16 | done 17 | 18 | for i in {1..$runs}; do 19 | for nthreads in "${threads[@]}"; do 20 | NUMBA_NUM_THREADS=$nthreads python haversine_numba.py -s $size >> numba.stdout 2>> numba.stderr 21 | done 22 | done 23 | 24 | for i in {1..$runs}; do 25 | for nthreads in "${threads[@]}"; do 26 | python haversine.py -m composer -s $size -t $nthreads >> composer.stdout 2>> composer.stderr 27 | done 28 | done 29 | 30 | for i in {1..$runs}; do 31 | for nthreads in "${threads[@]}"; do 32 | python haversine.py -m naive -s $size >> naive.stdout 2>> naive.stderr 33 | done 34 | done 35 | -------------------------------------------------------------------------------- /python/benchmarks/haversine/haversine.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | sys.path.append("../../lib/") 4 | sys.path.append("../../pycomposer/") 5 | 6 | import argparse 7 | import math 8 | import time 9 | 10 | def get_data(size, composer): 11 | if composer: 12 | import composer_numpy as np 13 | else: 14 | import numpy as np 15 | 16 | lats = np.ones(size, dtype="float64") * 0.0698132 17 | lons = np.ones(size, dtype="float64") * 0.0698132 18 | return lats, lons 19 | 20 | def haversine(lat2, lon2, composer, threads): 21 | if composer: 22 | import composer_numpy as np 23 | else: 24 | import numpy as np 25 | 26 | lat1 = 0.70984286 27 | lon1 = 1.23892197 28 | MILES_CONST = 3959.0 29 | 30 | start = time.time() 31 | a = np.zeros(len(lat2), dtype="float64") 32 | dlat = np.zeros(len(lat2), dtype="float64") 33 | dlon = np.zeros(len(lat2), dtype="float64") 34 | end = time.time() 35 | print("Allocation time:", end-start) 36 | 37 | start = time.time() 38 | np.subtract(lat2, lat1, out=dlat) 39 | np.subtract(lon2, lon1, out=dlon) 40 | 41 | # dlat = sin(dlat / 2.0) ** 2.0 42 | np.divide(dlat, 2.0, out=dlat) 43 | np.sin(dlat, out=dlat) 44 | np.multiply(dlat, dlat, out=dlat) 45 | 46 | # a = cos(lat1) * cos(lat2) 47 | lat1_cos = math.cos(lat1) 48 | np.cos(lat2, out=a) 49 | np.multiply(a, lat1_cos, out=a) 50 | 51 | # a = a + sin(dlon / 2.0) ** 2.0 52 | np.divide(dlon, 2.0, out=dlon) 53 | np.sin(dlon, out=dlon) 54 | np.multiply(dlon, dlon, out=dlon) 55 | np.multiply(a, dlon, out=a) 56 | np.add(dlat, a, out=a) 57 | 58 | c = a 59 | np.sqrt(a, out=a) 60 | np.arcsin(a, out=a) 61 | np.multiply(a, 2.0, out=c) 62 | 63 | mi = c 64 | np.multiply(c, MILES_CONST, out=mi) 65 | 66 | if composer: 67 | np.evaluate(workers=threads) 68 | 69 | end = time.time() 70 | print("Runtime:", end-start) 71 | 72 | return mi 73 | 74 | def run(): 75 | parser = argparse.ArgumentParser( 76 | description="Haversine distance computation." 77 | ) 78 | parser.add_argument('-s', "--size", type=int, default=26, help="Size of each array") 79 | parser.add_argument('-p', "--piece_size", type=int, default=16384, help="Size of each piece.") 80 | parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.") 81 | parser.add_argument('-v', "--verbosity", type=str, default="none", help="Log level (debug|info|warning|error|critical|none)") 82 | parser.add_argument('-m', "--mode", type=str, required=True, help="Mode (composer|naive)") 83 | args = parser.parse_args() 84 | 85 | size = (1 << args.size) 86 | piece_size = args.piece_size 87 | threads = args.threads 88 | loglevel = args.verbosity 89 | mode = args.mode.strip().lower() 90 | 91 | print("Size:", size) 92 | print("Piece Size:", piece_size) 93 | print("Threads:", threads) 94 | print("Log Level", loglevel) 95 | print("Mode:", mode) 96 | 97 | if mode == "composer": 98 | composer = True 99 | elif mode == "naive": 100 | composer = False 101 | else: 102 | raise ValueError("unknown mode", mode) 103 | 104 | sys.stdout.write("Generating data...") 105 | sys.stdout.flush() 106 | lats, lons = get_data(size, composer) 107 | print("done.") 108 | 109 | 110 | mi = haversine(lats, lons, composer, threads) 111 | print(mi) 112 | 113 | if __name__ == "__main__": 114 | run() 115 | 116 | -------------------------------------------------------------------------------- /python/benchmarks/haversine/haversine_numba.py: -------------------------------------------------------------------------------- 1 | 2 | import argparse 3 | import math 4 | import sys 5 | import time 6 | 7 | import numpy as np 8 | from numba import njit 9 | 10 | def get_data(size): 11 | lats = np.ones(size, dtype="float64") * 0.0698132 12 | lons = np.ones(size, dtype="float64") * 0.0698132 13 | return lats, lons 14 | 15 | 16 | @njit(parallel=True) 17 | def haversine(lat2, lon2): 18 | lat1 = 0.70984286 19 | lon1 = 1.23892197 20 | miles_constant = 3959.0 21 | dlat = lat2 - lat1 22 | dlon = lon2 - lon1 23 | a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2 24 | c = 2.0 * np.arcsin(np.sqrt(a)) 25 | mi = miles_constant * c 26 | return mi 27 | 28 | def run(): 29 | parser = argparse.ArgumentParser( 30 | description="Haversine distance computation." 31 | ) 32 | parser.add_argument('-s', "--size", type=int, default=26, help="Size of each array") 33 | parser.add_argument('-p', "--piece_size", type=int, default=16384, help="Size of each piece.") 34 | parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.") 35 | parser.add_argument('-v', "--verbosity", type=str, default="none", help="Log level (debug|info|warning|error|critical|none)") 36 | args = parser.parse_args() 37 | 38 | size = (1 << args.size) 39 | piece_size = args.piece_size 40 | threads = args.threads 41 | loglevel = args.verbosity 42 | 43 | print("Size:", size) 44 | print("Piece Size:", piece_size) 45 | print("Threads:", threads) 46 | print("Log Level", loglevel) 47 | 48 | sys.stdout.write("Generating data...") 49 | sys.stdout.flush() 50 | lats, lons = get_data(size) 51 | print("done.") 52 | 53 | 54 | start = time.time() 55 | mi = haversine(lats, lons) 56 | end = time.time() 57 | print("Runtime:", end - start) 58 | print(mi) 59 | 60 | if __name__ == "__main__": 61 | run() 62 | 63 | -------------------------------------------------------------------------------- /python/benchmarks/movielens/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | source ../benchmarks/bin/activate 6 | 7 | runs=${1:-1} 8 | tasks=( naive composer ) 9 | threads=( 1 2 4 8 16 ) 10 | 11 | for task in "${tasks[@]}"; do 12 | rm -f $task.stdout $task.stderr 13 | git log | head -1 > $task.stderr 14 | git log | head -1 > $task.stdout 15 | done 16 | 17 | for i in {1..$runs}; do 18 | for nthreads in "${threads[@]}"; do 19 | python movielens_composer.py -t $nthreads >> composer.stdout 2>> composer.stderr 20 | done 21 | done 22 | 23 | for i in {1..$runs}; do 24 | python movielens.py >> naive.stdout 2>> naive.stderr 25 | done 26 | -------------------------------------------------------------------------------- /python/benchmarks/movielens/movielens.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import sys 3 | import time 4 | 5 | import warnings 6 | warnings.filterwarnings('ignore') 7 | 8 | # Make display smaller 9 | pd.options.display.max_rows = 10 10 | 11 | sys.stdout.write("Reading data...") 12 | sys.stdout.flush() 13 | unames = ['user_id', 'gender', 'age', 'occupation', 'zip'] 14 | users = pd.read_table('../datasets/movielens/_data/ml-1m/users-large.dat', sep='::', header=None, 15 | names=unames) 16 | 17 | rnames = ['user_id', 'movie_id', 'rating', 'timestamp'] 18 | ratings = pd.read_table('../datasets/movielens/_data/ml-1m/ratings-large.dat', sep='::', header=None, 19 | names=rnames) 20 | 21 | mnames = ['movie_id', 'title', 'genres'] 22 | movies = pd.read_table('../datasets/movielens/_data/ml-1m/movies-large.dat', sep='::', header=None, 23 | names=mnames) 24 | print("Done") 25 | 26 | e2e_start = time.time() 27 | 28 | start = time.time() 29 | data = pd.merge(ratings, users) 30 | end = time.time() 31 | print("Merge 1:", end - start) 32 | start = end 33 | data = pd.merge(data, movies) 34 | end = time.time() 35 | print("Merge 2:", end - start) 36 | start = end 37 | print(len(data)) 38 | data = data[data['age'] > 45] 39 | print(len(data)) 40 | end = time.time() 41 | print("Filter:", end - start) 42 | start = end 43 | 44 | mean_ratings = data.pivot_table('rating', index='title', columns='gender', 45 | aggfunc='mean') 46 | end = time.time() 47 | print("Pivot:", end - start) 48 | start = end 49 | 50 | ratings_by_title = data.groupby('title').size() 51 | end = time.time() 52 | print("GroupBy size:", end - start) 53 | start = end 54 | 55 | active_titles = ratings_by_title.index[ratings_by_title >= 250] 56 | mean_ratings = mean_ratings.loc[active_titles] 57 | mean_ratings['diff'] = mean_ratings['M'] - mean_ratings['F'] 58 | sorted_by_diff = mean_ratings.sort_values(by='diff') 59 | end = time.time() 60 | print("Diff:", end - start) 61 | start = end 62 | 63 | rating_std_by_title = data.groupby('title')['rating'].std() 64 | end = time.time() 65 | print("GroupBy std:", end - start) 66 | start = end 67 | 68 | rating_std_by_title = rating_std_by_title.loc[active_titles] 69 | rating_std_by_title = rating_std_by_title.sort_values(ascending=False)[:10] 70 | end = time.time() 71 | print("Sort:", end - start) 72 | start = end 73 | 74 | e2e_end = time.time() 75 | 76 | print(sorted_by_diff.head()) 77 | print(rating_std_by_title.head()) 78 | 79 | print("Total:", e2e_end - e2e_start) 80 | -------------------------------------------------------------------------------- /python/benchmarks/movielens/movielens_composer.py: -------------------------------------------------------------------------------- 1 | 2 | import argparse 3 | import sys 4 | 5 | sys.path.append("../../lib/") 6 | sys.path.append("../../pycomposer/") 7 | 8 | import composer_pandas as pd 9 | import time 10 | 11 | import warnings 12 | warnings.filterwarnings('ignore') 13 | 14 | def run(threads): 15 | # Make display smaller 16 | pd.options.display.max_rows = 10 17 | 18 | sys.stdout.write("Reading data...") 19 | sys.stdout.flush() 20 | unames = ['user_id', 'gender', 'age', 'occupation', 'zip'] 21 | users = pd.read_table('../datasets/movielens/_data/ml-1m/users-large.dat', sep='::', header=None, 22 | names=unames) 23 | 24 | rnames = ['user_id', 'movie_id', 'rating', 'timestamp'] 25 | ratings = pd.read_table('../datasets/movielens/_data/ml-1m/ratings-large.dat', sep='::', header=None, 26 | names=rnames) 27 | 28 | mnames = ['movie_id', 'title', 'genres'] 29 | movies = pd.read_table('../datasets/movielens/_data/ml-1m/movies-large.dat', sep='::', header=None, 30 | names=mnames) 31 | print("Done") 32 | 33 | e2e_start = time.time() 34 | 35 | start = time.time() 36 | tmp = pd.merge(ratings, users) 37 | tmp.dontsend = True 38 | data = pd.merge(tmp, movies) 39 | data.dontsend = True 40 | data = pd.filter(data, 'age', 45) 41 | pd.evaluate(workers=threads) 42 | data = data.value 43 | end = time.time() 44 | print("Merge 2:", end - start) 45 | start = end 46 | 47 | mean_ratings = data.pivot_table('rating', index='title', columns='gender', 48 | aggfunc='mean') 49 | end = time.time() 50 | print("Pivot:", end - start) 51 | start = end 52 | 53 | """ 54 | ratings_by_title = pd.dfgroupby(data, 'title') 55 | ratings_by_title = pd.gbsize(ratings_by_title) 56 | pd.evaluate(workers=threads) 57 | ratings_by_title = ratings_by_title.value 58 | """ 59 | ratings_by_title = data.groupby('title').size() 60 | end = time.time() 61 | 62 | print("GroupBy size:", end - start) 63 | start = end 64 | 65 | active_titles = ratings_by_title.index[ratings_by_title >= 250] 66 | mean_ratings = mean_ratings.loc[active_titles] 67 | mean_ratings['diff'] = mean_ratings['M'] - mean_ratings['F'] 68 | sorted_by_diff = mean_ratings.sort_values(by='diff') 69 | end = time.time() 70 | print("Diff:", end - start) 71 | start = end 72 | 73 | rating_std_by_title = data.groupby('title')['rating'].std() 74 | end = time.time() 75 | print("GroupBy std:", end - start) 76 | start = end 77 | 78 | rating_std_by_title = rating_std_by_title.loc[active_titles] 79 | rating_std_by_title = rating_std_by_title.sort_values(ascending=False)[:10] 80 | end = time.time() 81 | print("Sort:", end - start) 82 | start = end 83 | 84 | e2e_end = time.time() 85 | 86 | print(sorted_by_diff.head()) 87 | print(rating_std_by_title.head()) 88 | 89 | print("Total:", e2e_end - e2e_start) 90 | 91 | def main(): 92 | parser = argparse.ArgumentParser( 93 | description="MovieLens with Composer." 94 | ) 95 | parser.add_argument('-t', "--threads", type=int, default=16, help="Number of threads.") 96 | args = parser.parse_args() 97 | 98 | threads = args.threads 99 | 100 | print("Threads:", threads) 101 | mi = run(threads) 102 | 103 | 104 | if __name__ == "__main__": 105 | main() 106 | -------------------------------------------------------------------------------- /python/benchmarks/nbody/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | source ../benchmarks/bin/activate 6 | 7 | runs=${1:-1} 8 | size=15 9 | iterations=5 10 | tasks=( numba naive composer bohrium ) 11 | threads=( 1 2 4 8 16 ) 12 | 13 | for task in "${tasks[@]}"; do 14 | rm -f $task.stdout $task.stderr 15 | git log | head -1 > $task.stderr 16 | git log | head -1 > $task.stdout 17 | done 18 | 19 | for i in {1..$runs}; do 20 | for nthreads in "${threads[@]}"; do 21 | NUMBA_NUM_THREADS=$nthreads python nbody_numba.py -s $size -i $iterations >> numba.stdout 2>> numba.stderr 22 | done 23 | done 24 | 25 | for i in {1..$runs}; do 26 | for nthreads in "${threads[@]}"; do 27 | python nbody.py -m composer -s $size -i $iterations -t $nthreads >> composer.stdout 2>> composer.stderr 28 | done 29 | done 30 | 31 | for i in {1..$runs}; do 32 | python nbody.py -m naive -s $size -i $iterations -t $nthreads >> naive.stdout 2>> naive.stderr 33 | done 34 | 35 | for i in {1..$runs}; do 36 | for nthreads in "${threads[@]}"; do 37 | OMP_NUM_THREADS=$nthreads python nbody_boh.py -s $size -i $iterations -t $nthreads >> bohrium.stdout 2>> bohrium.stderr 38 | done 39 | done 40 | -------------------------------------------------------------------------------- /python/benchmarks/nbody/nbody_boh.py: -------------------------------------------------------------------------------- 1 | """ 2 | NBody in N^2 complexity 3 | 4 | Note that we are using only Newtonian forces and do not consider relativity 5 | Neither do we consider collisions between stars 6 | Thus some of our stars will accelerate to speeds beyond c 7 | This is done to keep the simulation simple enough for teaching purposes 8 | 9 | All the work is done in the calc_force, move and random_galaxy functions. 10 | To vectorize the code these are the functions to transform. 11 | 12 | https://benchpress.readthedocs.io/autodoc_benchmarks/nbody_nice.html 13 | """ 14 | 15 | import argparse 16 | import sys 17 | import time 18 | 19 | import bohrium as np 20 | import numpy 21 | 22 | def fill_diagonal(a, val): 23 | """ Set diagonal of 2D matrix a to val in-place. """ 24 | d, _ = a.shape 25 | a.shape = d * d 26 | a[::d + 1] = val 27 | a.shape = (d, d) 28 | 29 | def random_galaxy(N): 30 | """ Generate a galaxy of random bodies """ 31 | m = np.array((numpy.arange(0.0, 1.0, step=1.0 / N) + np.float64(10)) * np.float64(m_sol/10)) 32 | x = np.array((numpy.arange(0.0, 1.0, step=1.0 / N) - np.float64(0.5)) * np.float64(r_ly/100)) 33 | y = np.array((numpy.arange(0.0, 1.0, step=1.0 / N) - np.float64(0.5)) * np.float64(r_ly/100)) 34 | z = np.array((numpy.arange(0.0, 1.0, step=1.0 / N) - np.float64(0.5)) * np.float64(r_ly/100)) 35 | vx = np.zeros(N, dtype=np.float64) 36 | vy = np.zeros(N, dtype=np.float64) 37 | vz = np.zeros(N, dtype=np.float64) 38 | 39 | assert len(m) == N 40 | return m, x, y, z, vx, vy, vz 41 | 42 | def move(m, x, y, z, vx, vy, vz, dt, temporaries): 43 | """ Move the bodies. 44 | 45 | first find forces and change velocity and then move positions. 46 | """ 47 | 48 | start = time.time() 49 | 50 | dx = x - x[:,None] 51 | dy = numpy.subtract(y, y[:,None]) 52 | dz = numpy.subtract(z, z[:,None]) 53 | pm = numpy.multiply(m, m[:,None]) 54 | 55 | end = time.time() 56 | print("Step 0:", end - start) 57 | 58 | start = end 59 | 60 | r = np.sqrt(dx ** 2 + dy ** 2 + dz ** 2) 61 | tmp = G * pm / r ** 2 62 | Fx = tmp * (dx / r) 63 | Fy = tmp * (dy / r) 64 | Fz = tmp * (dz / r) 65 | 66 | end = time.time() 67 | print("Step 1:", end - start) 68 | start = end 69 | 70 | fill_diagonal(Fx, 0.0) 71 | fill_diagonal(Fy, 0.0) 72 | fill_diagonal(Fz, 0.0) 73 | end = time.time() 74 | print("Step 2:", end - start) 75 | start = end 76 | 77 | mdt = m / dt 78 | 79 | # Update state. 80 | vx += np.add.reduce(Fx, axis=1) / mdt 81 | vy += np.add.reduce(Fy, axis=1) / mdt 82 | vz += np.add.reduce(Fz, axis=1) / mdt 83 | x += vx * dt 84 | y += vy * dt 85 | z += vz * dt 86 | 87 | end = time.time() 88 | print("Step 3:", end - start) 89 | start = end 90 | return Fx, Fy, Fz 91 | 92 | def simulate(m, x, y, z, vx, vy, vz, timesteps): 93 | 94 | temporaries = ( 95 | np.ones((size, size), dtype="float64"), 96 | np.ones((size, size), dtype="float64"), 97 | np.ones((size, size), dtype="float64"), 98 | np.ones((size, size), dtype="float64") 99 | ) 100 | 101 | 102 | start = time.time() 103 | for i in range(timesteps): 104 | ret = move(m, x, y, z, vx, vy, vz, dt, temporaries) 105 | np.flush() 106 | print(x, y, z) 107 | end = time.time() 108 | print("Simulation time:", end - start) 109 | 110 | ####################################################################3 111 | # ENTRY POINT 112 | ####################################################################3 113 | 114 | parser = argparse.ArgumentParser( 115 | description="N-Body benchmark." 116 | ) 117 | parser.add_argument('-s', "--size", type=int, default=10, help="Size of each array") 118 | parser.add_argument('-i', "--iterations", type=int, default=1, help="Iterations of simulation") 119 | parser.add_argument('-p', "--piece_size", type=int, default=16384, help="Size of each piece.") 120 | parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.") 121 | parser.add_argument('-v', "--verbosity", type=str, default="none",\ 122 | help="Log level (debug|info|warning|error|critical|none)") 123 | args = parser.parse_args() 124 | 125 | size = (1 << args.size) 126 | iterations = args.iterations 127 | piece_size = args.piece_size 128 | threads = args.threads 129 | loglevel = args.verbosity 130 | 131 | assert threads >= 1 132 | 133 | print("Size:", size) 134 | print("Piece Size:", piece_size) 135 | print("Threads:", threads) 136 | print("Log Level", loglevel) 137 | 138 | # Constants 139 | G = np.float64(6.67384e-11) # m/(kg*s^2) 140 | dt = np.float64(60*60*24*365.25) # Years in seconds 141 | r_ly = np.float64(9.4607e15) # Lightyear in m 142 | m_sol = np.float64(1.9891e30) # Solar mass in kg 143 | 144 | np.seterr(divide='ignore', invalid='ignore') 145 | 146 | sys.stdout.write("Generating data...") 147 | sys.stdout.flush() 148 | m, x, y, z, vx, vy, vz = random_galaxy(size) 149 | print("done.") 150 | 151 | simulate(m, x, y, z, vx, vy, vz, iterations) 152 | print(x) 153 | -------------------------------------------------------------------------------- /python/benchmarks/nbody/nbody_numba.py: -------------------------------------------------------------------------------- 1 | """ 2 | NBody in N^2 complexity 3 | 4 | Note that we are using only Newtonian forces and do not consider relativity 5 | Neither do we consider collisions between stars 6 | Thus some of our stars will accelerate to speeds beyond c 7 | This is done to keep the simulation simple enough for teaching purposes 8 | 9 | All the work is done in the calc_force, move and random_galaxy functions. 10 | To vectorize the code these are the functions to transform. 11 | 12 | https://benchpress.readthedocs.io/autodoc_benchmarks/nbody_nice.html 13 | """ 14 | 15 | import argparse 16 | import sys 17 | import time 18 | 19 | import numpy as np 20 | from numba import njit 21 | 22 | def fill_diagonal(a, val): 23 | """ Set diagonal of 2D matrix a to val in-place. """ 24 | d, _ = a.shape 25 | a.shape = d * d 26 | a[::d + 1] = val 27 | a.shape = (d, d) 28 | 29 | def random_galaxy(N): 30 | """ Generate a galaxy of random bodies """ 31 | m = (np.arange(0.0, 1.0, step=1.0 / N) + np.float64(10)) * np.float64(m_sol/10) 32 | x = (np.arange(0.0, 1.0, step=1.0 / N) - np.float64(0.5)) * np.float64(r_ly/100) 33 | y = (np.arange(0.0, 1.0, step=1.0 / N) - np.float64(0.5)) * np.float64(r_ly/100) 34 | z = (np.arange(0.0, 1.0, step=1.0 / N) - np.float64(0.5)) * np.float64(r_ly/100) 35 | vx = np.zeros(N, dtype=np.float64) 36 | vy = np.zeros(N, dtype=np.float64) 37 | vz = np.zeros(N, dtype=np.float64) 38 | 39 | assert len(m) == N 40 | return m, x, y, z, vx, vy, vz 41 | 42 | @njit(parallel=True) 43 | def calc_force1(pm, m, x, y, z, dx, dy, dz): 44 | """Calculate forces between bodies 45 | 46 | F = ((G m_a m_b)/r^2)/((x_b-x_a)/r) 47 | 48 | """ 49 | r = np.sqrt(dx ** 2 + dy ** 2 + dz ** 2) 50 | tmp = G * pm / r ** 2 51 | Fx = tmp * (dx / r) 52 | Fy = tmp * (dy / r) 53 | Fz = tmp * (dz / r) 54 | return Fx, Fy, Fz 55 | 56 | 57 | @njit(parallel=True) 58 | def calc_force2(m, Fx, Fy, Fz, x, y, z, vx, vy, vz, dt): 59 | vx += Fx / m * dt 60 | vy += Fy / m * dt 61 | vz += Fz / m * dt 62 | x += vx * dt 63 | y += vy * dt 64 | z += vz * dt 65 | 66 | 67 | def move(m, x, y, z, vx, vy, vz, dt, temporaries): 68 | """ Move the bodies. 69 | 70 | first find forces and change velocity and then move positions. 71 | """ 72 | dx, dy, dz, pm = temporaries 73 | 74 | start = time.time() 75 | np.subtract(x, x[:,None], out=dx) 76 | np.subtract(y, y[:,None], out=dy) 77 | np.subtract(z, z[:,None], out=dz) 78 | np.multiply(m, m[:,None], out=pm) 79 | end = time.time() 80 | print("Step 0:", end - start) 81 | start = end 82 | 83 | Fx, Fy, Fz = calc_force1(pm, m, x, y, z, dx, dy, dz) 84 | end = time.time() 85 | print("Step 1:", end - start) 86 | start = end 87 | 88 | fill_diagonal(Fx, 0.0) 89 | fill_diagonal(Fy, 0.0) 90 | fill_diagonal(Fz, 0.0) 91 | end = time.time() 92 | print("Step 2:", end - start) 93 | start = end 94 | 95 | Fx2 = Fx[:,0] 96 | np.add.reduce(Fx, axis=1, out=Fx2) 97 | Fy2 = Fy[:,0] 98 | np.add.reduce(Fy, axis=1, out=Fy2) 99 | Fz2 = Fz[:,0] 100 | np.add.reduce(Fz, axis=1, out=Fz2) 101 | 102 | end = time.time() 103 | print("Step 3:", end - start) 104 | start = end 105 | 106 | calc_force2(m, Fx2, Fy2, Fz2, x, y, z, vx, vy, vz, dt) 107 | end = time.time() 108 | print("Step 4:", end - start) 109 | start = end 110 | 111 | def simulate(m, x, y, z, vx, vy, vz, timesteps): 112 | 113 | temporaries = ( 114 | np.ones((size, size), dtype="float64"), 115 | np.ones((size, size), dtype="float64"), 116 | np.ones((size, size), dtype="float64"), 117 | np.ones((size, size), dtype="float64") 118 | ) 119 | 120 | 121 | start = time.time() 122 | for i in range(timesteps): 123 | ret = move(m, x, y, z, vx, vy, vz, dt, temporaries) 124 | end = time.time() 125 | print("Simulation time:", end - start) 126 | 127 | ####################################################################3 128 | # ENTRY POINT 129 | ####################################################################3 130 | 131 | parser = argparse.ArgumentParser( 132 | description="N-Body benchmark." 133 | ) 134 | parser.add_argument('-s', "--size", type=int, default=10, help="Size of each array") 135 | parser.add_argument('-i', "--iterations", type=int, default=1, help="Iterations of simulation") 136 | parser.add_argument('-p', "--piece_size", type=int, default=16384, help="Size of each piece.") 137 | parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.") 138 | parser.add_argument('-v', "--verbosity", type=str, default="none",\ 139 | help="Log level (debug|info|warning|error|critical|none)") 140 | args = parser.parse_args() 141 | 142 | size = (1 << args.size) 143 | iterations = args.iterations 144 | piece_size = args.piece_size 145 | threads = args.threads 146 | loglevel = args.verbosity 147 | 148 | assert threads >= 1 149 | 150 | print("Size:", size) 151 | print("Piece Size:", piece_size) 152 | print("Threads:", threads) 153 | print("Log Level", loglevel) 154 | 155 | # Constants 156 | G = np.float64(6.67384e-11) # m/(kg*s^2) 157 | dt = np.float64(60*60*24*365.25) # Years in seconds 158 | r_ly = np.float64(9.4607e15) # Lightyear in m 159 | m_sol = np.float64(1.9891e30) # Solar mass in kg 160 | 161 | np.seterr(divide='ignore', invalid='ignore') 162 | 163 | sys.stdout.write("Generating data...") 164 | sys.stdout.flush() 165 | m, x, y, z, vx, vy, vz = random_galaxy(size) 166 | print("done.") 167 | 168 | simulate(m, x, y, z, vx, vy, vz, iterations) 169 | print(x) 170 | -------------------------------------------------------------------------------- /python/benchmarks/requirements.txt: -------------------------------------------------------------------------------- 1 | atomicwrites==1.3.0 2 | attrs==19.1.0 3 | blis==0.2.4 4 | bohrium==0.10.2.post29 5 | bohrium-api==0.10.2.post29 6 | certifi==2019.3.9 7 | chardet==3.0.4 8 | cloudpickle==0.8.1 9 | cymem==2.0.2 10 | idna==2.8 11 | joblib==0.13.2 12 | jsonschema==2.6.0 13 | llvmlite==0.28.0 14 | more-itertools==7.0.0 15 | murmurhash==1.0.2 16 | numba==0.43.1 17 | numpy==1.16.2 18 | pandas==0.24.2 19 | pathlib2==2.3.3 20 | plac==0.9.6 21 | pluggy==0.9.0 22 | preshed==2.0.1 23 | py==1.8.0 24 | pyarrow==0.13.0 25 | pytest==4.4.0 26 | python-dateutil==2.8.0 27 | pytz==2019.1 28 | requests==2.21.0 29 | scipy==1.2.1 30 | sharedmem==0.3.5 31 | six==1.12.0 32 | spacy==2.1.3 33 | srsly==0.0.5 34 | thinc==7.0.4 35 | tqdm==4.31.1 36 | urllib3==1.24.2 37 | wasabi==0.2.1 38 | -------------------------------------------------------------------------------- /python/benchmarks/run-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | # Name of the environment 6 | rm -rf benchmarks 7 | ./setup-env.sh 8 | 9 | # Get the data 10 | ./get-data.sh 11 | 12 | rm -rf results/ 13 | mkdir results/ 14 | 15 | tasks=( blackscholes birth_analysis crime_index data_cleaning haversine movielens nbody shallow_water speechtag ) 16 | 17 | # Write system information. 18 | git log | head -1 > results/CONFIG.txt 19 | uname -a >> results/CONFIG.txt 20 | lsb_release -d >> results/CONFIG.txt 21 | 22 | for task in "${tasks[@]}"; do 23 | echo "Executing $task" 24 | pushd $task 25 | ./benchmark.sh 26 | popd 27 | mkdir results/$task 28 | mv $task/*.std* results/$task 29 | done 30 | -------------------------------------------------------------------------------- /python/benchmarks/setup-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | virtualenv -p python3.5 benchmarks 4 | source benchmarks/bin/activate 5 | 6 | # Install everything 7 | pip install -r requirements.txt 8 | -------------------------------------------------------------------------------- /python/benchmarks/shallow_water/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | source ../benchmarks/bin/activate 6 | 7 | size=14 8 | runs=${1:-1} 9 | iterations=10 10 | tasks=( numba composer naive bohrium ) 11 | threads=( 1 2 4 8 16 ) 12 | 13 | for task in "${tasks[@]}"; do 14 | rm -f $task.stdout $task.stderr 15 | git log | head -1 > $task.stderr 16 | git log | head -1 > $task.stdout 17 | done 18 | 19 | for i in {1..$runs}; do 20 | for nthreads in "${threads[@]}"; do 21 | NUMBA_NUM_THREADS=$nthreads python shallow_water_numba.py -s $size -i $iterations >> numba.stdout 2>> numba.stderr 22 | done 23 | done 24 | 25 | for i in {1..$runs}; do 26 | for nthreads in "${threads[@]}"; do 27 | OMP_NUM_THREADS=$nthreads python shallow_water.py -m bohrium -s $size -i $iterations -t $nthreads >> bohrium.stdout 2>> bohrium.stderr 28 | done 29 | done 30 | 31 | unset OMP_NUM_THREADS 32 | 33 | for i in {1..$runs}; do 34 | for nthreads in "${threads[@]}"; do 35 | python shallow_water.py -m composer -s $size -i $iterations -t $nthreads >> composer.stdout 2>> composer.stderr 36 | done 37 | done 38 | 39 | for i in {1..$runs}; do 40 | python shallow_water.py -m naive -s $size -i $iterations -t 1 >> naive.stdout 2>> naive.stderr 41 | done 42 | -------------------------------------------------------------------------------- /python/benchmarks/shallow_water/shallow_water_numba.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from numba import njit 4 | 5 | import argparse 6 | import sys 7 | import time 8 | 9 | @njit(parallel=True) 10 | def spatial_derivative(A, axis=0): 11 | """ 12 | Compute derivative of array A using balanced finite differences 13 | Axis specifies direction of spatial derivative (d/dx or d/dy) 14 | 15 | dA[i] = A[i+1] - A[i-1] / 2 16 | ... or with grid spacing included ... 17 | dA[i]/dx = A[i+1] - A[i-1] / 2dx 18 | 19 | Used By: 20 | d_dx 21 | d_dy 22 | """ 23 | return (np.roll(A, -1) - np.roll(A, 1)) / (grid_spacing*2.) 24 | 25 | @njit(parallel=True) 26 | def d_dx(A): 27 | return spatial_derivative(A, 1) 28 | 29 | @njit(parallel=True) 30 | def d_dy(A): 31 | return spatial_derivative(A, 0) 32 | 33 | @njit(parallel=True) 34 | def d_dt(eta, u, v, g, b=0): 35 | """ 36 | http://en.wikipedia.org/wiki/Shallow_water_equations#Non-conservative_form 37 | """ 38 | 39 | du_dt = -g*d_dx(eta) - b*u 40 | dv_dt = -g*d_dy(eta) - b*v 41 | 42 | H = 0#eta.mean() - our definition of eta includes this term 43 | deta_dt = -d_dx(u * (H+eta)) - d_dy(v * (H+eta)) 44 | 45 | eta = eta + deta_dt * dt 46 | u = u + du_dt * dt 47 | v = v + dv_dt * dt 48 | return eta, u, v 49 | 50 | 51 | def evolveEuler(eta, u, v, g, dt): 52 | """ 53 | Evolve state (eta, u, v, g) forward in time using simple Euler method 54 | x_{n+1} = x_{n} + dx/dt * d_t 55 | 56 | Returns an generator / infinite list of all states in the evolution 57 | """ 58 | elapsedTime = 0 59 | yield eta, u, v, elapsedTime # return initial conditions as first state in sequence 60 | 61 | while(True): 62 | eta, u, v = d_dt(eta, u, v, g) 63 | elapsedTime += dt 64 | yield eta, u, v, elapsedTime 65 | 66 | def simulate(eta, u, v, g, dt, iterations): 67 | 68 | trajectory = evolveEuler(eta, u, v, g, dt) 69 | 70 | # Figure with initial conditions 71 | 72 | start = time.time() 73 | 74 | eta, u, v, elapsedTime = next(trajectory) 75 | for i in range(iterations): 76 | eta, u, v, elapsedTime = next(trajectory) 77 | print(eta[0][0]) 78 | 79 | end = time.time() 80 | print("total time:", end - start) 81 | 82 | print("Final State:") 83 | print(eta[0][0]) 84 | 85 | 86 | #################################################################### 87 | # ENTRY POINT 88 | #################################################################### 89 | 90 | parser = argparse.ArgumentParser( 91 | description="Shallow Water benchmark." 92 | ) 93 | parser.add_argument('-s', "--size", type=int, default=10, help="Size of each array") 94 | parser.add_argument('-i', "--iterations", type=int, default=1, help="Iterations of simulation") 95 | parser.add_argument('-p', "--piece_size", type=int, default=16384, help="Size of each piece.") 96 | parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.") 97 | parser.add_argument('-v', "--verbosity", type=str, default="none",\ 98 | help="Log level (debug|info|warning|error|critical|none)") 99 | args = parser.parse_args() 100 | 101 | size = (1 << args.size) 102 | iterations = args.iterations 103 | piece_size = args.piece_size 104 | threads = args.threads 105 | loglevel = args.verbosity 106 | 107 | assert threads >= 1 108 | 109 | print("Size:", size) 110 | print("Piece Size:", piece_size) 111 | print("Threads:", threads) 112 | print("Log Level", loglevel) 113 | print("Mode:", "Numba") 114 | 115 | sys.stdout.write("Generating data...") 116 | sys.stdout.flush() 117 | 118 | # Initial Conditions 119 | n = size 120 | 121 | # velocity in x direction 122 | u = np.zeros((n, n)) 123 | # velocity in y direction 124 | v = np.zeros((n, n)) 125 | # pressure deviation (like height) 126 | eta = np.ones((n, n)) 127 | 128 | # Set eta. 129 | for i in range(n): 130 | eta[i] *= 0.1 * i 131 | 132 | # Constants 133 | G = np.float64(6.67384e-11) # m/(kg*s^2) 134 | dt = np.float64(60*60*24*365.25) # Years in seconds 135 | r_ly = np.float64(9.4607e15) # Lightyear in m 136 | m_sol = np.float64(1.9891e30) # Solar mass in kg 137 | b = np.float64(0.0) 138 | 139 | box_size = 1. 140 | grid_spacing = 1.0 * box_size / n 141 | g = 1. 142 | dt = grid_spacing / 100. 143 | print("done.") 144 | 145 | simulate(eta, u, v, g, dt, iterations) 146 | -------------------------------------------------------------------------------- /python/benchmarks/speechtag/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | source ../benchmarks/bin/activate 6 | 7 | tasks=( composer ) 8 | threads=( 1 2 4 8 16 ) 9 | runs=${1:-1} 10 | 11 | for task in "${tasks[@]}"; do 12 | rm -f $task.stdout $task.stderr 13 | git log | head -1 > $task.stderr 14 | git log | head -1 > $task.stdout 15 | done 16 | 17 | for i in {1..$runs}; do 18 | for task in "${tasks[@]}"; do 19 | for nthreads in "${threads[@]}"; do 20 | python speechtag_composer.py -n $nthreads >> $task.stdout 2>> $task.stderr 21 | done 22 | done 23 | done 24 | 25 | tasks=( spacy ) 26 | threads=( 1 ) 27 | 28 | for task in "${tasks[@]}"; do 29 | rm -f $task.stdout $task.stderr 30 | git log | head -1 > $task.stderr 31 | git log | head -1 > $task.stdout 32 | done 33 | 34 | for i in {1..$runs}; do 35 | for task in "${tasks[@]}"; do 36 | for nthreads in "${threads[@]}"; do 37 | python speechtag.py >> $task.stdout 2>> $task.stderr 38 | done 39 | done 40 | done 41 | -------------------------------------------------------------------------------- /python/benchmarks/speechtag/speechtag.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf8 3 | """Example of multi-processing with Joblib. Here, we're exporting 4 | part-of-speech-tagged, true-cased, (very roughly) sentence-separated text, with 5 | each "sentence" on a newline, and spaces between tokens. Data is loaded from 6 | the IMDB movie reviews dataset and will be loaded automatically via Thinc's 7 | built-in dataset loader. 8 | 9 | Compatible with: spaCy v2.0.0+ 10 | Last tested with: v2.1.0 11 | Prerequisites: pip install joblib 12 | 13 | 14 | Adapted from https://github.com/explosion/spaCy/blob/master/examples/pipeline/multi_processing.py 15 | """ 16 | 17 | import plac 18 | import sys 19 | import spacy 20 | from spacy.util import minibatch 21 | import thinc.extra.datasets 22 | import time 23 | 24 | @plac.annotations( 25 | model=("Model name (needs tagger)", "positional", None, str), 26 | n_jobs=("Number of workers", "option", "n", int), 27 | batch_size=("Batch-size for each process", "option", "b", int), 28 | limit=("Limit of entries from the dataset", "option", "l", int), 29 | ) 30 | def main(model="en_core_web_sm", n_jobs=4, batch_size=1000, limit=10000): 31 | nlp = spacy.load(model) # load spaCy model 32 | print("Loaded model '%s'" % model) 33 | 34 | # load and pre-process the IMDB dataset 35 | sys.stdout.write("Loading IMDB data...") 36 | data, _ = thinc.extra.datasets.imdb() 37 | print("done.") 38 | texts, _ = zip(*data[-limit:]) 39 | 40 | start = time.time() 41 | process(nlp, texts) 42 | end = time.time() 43 | print("Total:", end - start) 44 | 45 | def process(nlp, texts): 46 | print(nlp.pipe_names) 47 | for doc in nlp.pipe(texts): 48 | sentence = " ".join(represent_word(w) for w in doc if not w.is_space) 49 | sentence += "\n" 50 | 51 | def represent_word(word): 52 | text = word.text 53 | # True-case, i.e. try to normalize sentence-initial capitals. 54 | # Only do this if the lower-cased form is more probable. 55 | if ( 56 | text.istitle() 57 | and is_sent_begin(word) 58 | and word.prob < word.doc.vocab[text.lower()].prob 59 | ): 60 | text = text.lower() 61 | return text + "|" + word.tag_ 62 | 63 | def is_sent_begin(word): 64 | if word.i == 0: 65 | return True 66 | elif word.i >= 2 and word.nbor(-1).text in (".", "!", "?", "..."): 67 | return True 68 | else: 69 | return False 70 | 71 | if __name__ == "__main__": 72 | plac.call(main) 73 | -------------------------------------------------------------------------------- /python/benchmarks/speechtag/speechtag_composer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf8 3 | """Example of multi-processing with Joblib. Here, we're exporting 4 | part-of-speech-tagged, true-cased, (very roughly) sentence-separated text, with 5 | each "sentence" on a newline, and spaces between tokens. Data is loaded from 6 | the IMDB movie reviews dataset and will be loaded automatically via Thinc's 7 | built-in dataset loader. 8 | 9 | Compatible with: spaCy v2.0.0+ 10 | Last tested with: v2.1.0 11 | Prerequisites: pip install joblib 12 | 13 | 14 | Adapted from https://github.com/explosion/spaCy/blob/master/examples/pipeline/multi_processing.py 15 | """ 16 | 17 | import plac 18 | import sys 19 | import spacy 20 | from spacy.util import minibatch 21 | import thinc.extra.datasets 22 | import time 23 | 24 | sys.path.append("../../pycomposer/") 25 | 26 | from pycomposer import * 27 | 28 | @plac.annotations( 29 | model=("Model name (needs tagger)", "positional", None, str), 30 | n_jobs=("Number of workers", "option", "n", int), 31 | batch_size=("Batch-size for each process", "option", "b", int), 32 | limit=("Limit of entries from the dataset", "option", "l", int), 33 | ) 34 | def main(model="en_core_web_sm", n_jobs=4, batch_size=1000, limit=10000): 35 | nlp = spacy.load(model) # load spaCy model 36 | print("Loaded model '%s'" % model) 37 | 38 | # load and pre-process the IMDB dataset 39 | sys.stdout.write("Loading IMDB data...") 40 | data, _ = thinc.extra.datasets.imdb() 41 | print("done.") 42 | texts, _ = zip(*data[-limit:]) 43 | 44 | start = time.time() 45 | process(nlp, texts) 46 | evaluate(workers=n_jobs, batch_size=batch_size) 47 | end = time.time() 48 | print("Total:", end - start) 49 | 50 | class TextBatchSplit(SplitType): 51 | def combine(self, values): 52 | """ No need to combine text batches""" 53 | pass 54 | 55 | def split(self, start, end, texts): 56 | return minibatch(texts, size=(end-start)) 57 | 58 | @sa((Broadcast(), TextBatchSplit()), {}, Broadcast()) 59 | def process(nlp, texts): 60 | print(nlp.pipe_names) 61 | for doc in nlp.pipe(texts): 62 | sentence = " ".join(represent_word(w) for w in doc if not w.is_space) 63 | sentence += "\n" 64 | 65 | def represent_word(word): 66 | text = word.text 67 | # True-case, i.e. try to normalize sentence-initial capitals. 68 | # Only do this if the lower-cased form is more probable. 69 | if ( 70 | text.istitle() 71 | and is_sent_begin(word) 72 | and word.prob < word.doc.vocab[text.lower()].prob 73 | ): 74 | text = text.lower() 75 | return text + "|" + word.tag_ 76 | 77 | def is_sent_begin(word): 78 | if word.i == 0: 79 | return True 80 | elif word.i >= 2 and word.nbor(-1).text in (".", "!", "?", "..."): 81 | return True 82 | else: 83 | return False 84 | 85 | if __name__ == "__main__": 86 | plac.call(main) 87 | -------------------------------------------------------------------------------- /python/benchmarks/weld-python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weld-project/split-annotations/d835cc54476336e7f4355d87e820595aeddcc442/python/benchmarks/weld-python/__init__.py -------------------------------------------------------------------------------- /python/benchmarks/weld-python/benchmark-weld.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | size=14 6 | 7 | tasks=( weld ) 8 | threads=( 1 2 4 8 16 32 ) 9 | 10 | for task in "${tasks[@]}"; do 11 | rm -f $task.stdout $task.stderr 12 | git log | head -1 > $task.stderr 13 | git log | head -1 > $task.stdout 14 | done 15 | 16 | # Weld doesn't seem to free memory properly in this setup, so just run it ten times and add up... 17 | for i in {1..5}; do 18 | for task in "${tasks[@]}"; do 19 | for nthreads in "${threads[@]}"; do 20 | python shallow_water_weld.py -s $size -i 1 -t $nthreads >> $task.stdout 2>> $task.stderr 21 | done 22 | done 23 | done 24 | -------------------------------------------------------------------------------- /python/benchmarks/weld-python/bindings.py: -------------------------------------------------------------------------------- 1 | # 2 | # Implements a wrapper around the Weld API. 3 | # 4 | 5 | from ctypes import * 6 | 7 | import os 8 | import platform 9 | import copy 10 | 11 | import pkg_resources 12 | 13 | system = platform.system() 14 | if system == 'Linux': 15 | lib_file = "../weld/target/release/libweld.so" 16 | elif system == 'Windows': 17 | lib_file = "libweld.dll" 18 | elif system == 'Darwin': 19 | lib_file = "libweld.dylib" 20 | else: 21 | raise OSError("Unsupported platform {}", system) 22 | 23 | lib_file = pkg_resources.resource_filename(__name__, lib_file) 24 | 25 | weld = CDLL(lib_file, mode=RTLD_GLOBAL) 26 | 27 | # Used for some type checking carried out by ctypes 28 | 29 | class c_weld_module(c_void_p): 30 | pass 31 | 32 | class c_weld_conf(c_void_p): 33 | pass 34 | 35 | class c_weld_err(c_void_p): 36 | pass 37 | 38 | class c_weld_value(c_void_p): 39 | pass 40 | 41 | class WeldModule(c_void_p): 42 | 43 | def __init__(self, code, conf, err): 44 | weld_module_compile = weld.weld_module_compile 45 | weld_module_compile.argtypes = [ 46 | c_char_p, c_weld_conf, c_weld_err] 47 | weld_module_compile.restype = c_weld_module 48 | 49 | code = c_char_p(code) 50 | self.module = weld_module_compile(code, conf.conf, err.error) 51 | 52 | def run(self, conf, arg, err): 53 | weld_module_run = weld.weld_module_run 54 | # module, conf, arg, &err 55 | weld_module_run.argtypes = [ 56 | c_weld_module, c_weld_conf, c_weld_value, c_weld_err] 57 | weld_module_run.restype = c_weld_value 58 | ret = weld_module_run(self.module, conf.conf, arg.val, err.error) 59 | return WeldValue(ret, assign=True) 60 | 61 | def __del__(self): 62 | weld_module_free = weld.weld_module_free 63 | weld_module_free.argtypes = [c_weld_module] 64 | weld_module_free.restype = None 65 | weld_module_free(self.module) 66 | 67 | 68 | class WeldValue(c_void_p): 69 | 70 | def __init__(self, value, assign=False): 71 | if assign is False: 72 | weld_value_new = weld.weld_value_new 73 | weld_value_new.argtypes = [c_void_p] 74 | weld_value_new.restype = c_weld_value 75 | self.val = weld_value_new(value) 76 | else: 77 | self.val = value 78 | self.freed = False 79 | 80 | def _check(self): 81 | if self.freed: 82 | raise ValueError("Attempted to use freed WeldValue") 83 | 84 | def data(self): 85 | self._check() 86 | weld_value_data = weld.weld_value_data 87 | weld_value_data.argtypes = [c_weld_value] 88 | weld_value_data.restype = c_void_p 89 | return weld_value_data(self.val) 90 | 91 | def memory_usage(self): 92 | self._check() 93 | weld_value_memory_usage = weld.weld_value_memory_usage 94 | weld_value_memory_usage.argtypes = [c_weld_value] 95 | weld_value_memory_usage.restype = c_int64 96 | return weld_value_memory_usage(self.val) 97 | 98 | def free(self): 99 | self._check() 100 | weld_value_free = weld.weld_value_free 101 | weld_value_free.argtypes = [c_weld_value] 102 | weld_value_free.restype = None 103 | self.freed = True 104 | return weld_value_free(self.val) 105 | 106 | 107 | class WeldConf(c_void_p): 108 | 109 | def __init__(self): 110 | weld_conf_new = weld.weld_conf_new 111 | weld_conf_new.argtypes = [] 112 | weld_conf_new.restype = c_weld_conf 113 | self.conf = weld_conf_new() 114 | 115 | def get(self, key): 116 | key = c_char_p(key) 117 | weld_conf_get = weld.weld_conf_get 118 | weld_conf_get.argtypes = [c_weld_conf, c_char_p] 119 | weld_conf_get.restype = c_char_p 120 | val = weld_conf_get(self.conf, key) 121 | return copy.copy(val) 122 | 123 | def set(self, key, value): 124 | key = c_char_p(key) 125 | value = c_char_p(value) 126 | weld_conf_set = weld.weld_conf_set 127 | weld_conf_set.argtypes = [c_weld_conf, c_char_p, c_char_p] 128 | weld_conf_set.restype = None 129 | weld_conf_set(self.conf, key, value) 130 | 131 | def __del__(self): 132 | weld_conf_free = weld.weld_conf_free 133 | weld_conf_free.argtypes = [c_weld_conf] 134 | weld_conf_free.restype = None 135 | weld_conf_free(self.conf) 136 | 137 | 138 | class WeldError(c_void_p): 139 | 140 | def __init__(self): 141 | weld_error_new = weld.weld_error_new 142 | weld_error_new.argtypes = [] 143 | weld_error_new.restype = c_weld_err 144 | self.error = weld_error_new() 145 | 146 | def code(self): 147 | weld_error_code = weld.weld_error_code 148 | weld_error_code.argtypes = [c_weld_err] 149 | weld_error_code.restype = c_uint64 150 | return weld_error_code(self.error) 151 | 152 | def message(self): 153 | weld_error_message = weld.weld_error_message 154 | weld_error_message.argtypes = [c_weld_err] 155 | weld_error_message.restype = c_char_p 156 | val = weld_error_message(self.error) 157 | return copy.copy(val) 158 | 159 | def __del__(self): 160 | weld_error_free = weld.weld_error_free 161 | weld_error_free.argtypes = [c_weld_err] 162 | weld_error_free.restype = None 163 | weld_error_free(self.error) 164 | 165 | WeldLogLevelOff = 0 166 | WeldLogLevelError = 1 167 | WeldLogLevelWarn = 2 168 | WeldLogLevelInfo = 3 169 | WeldLogLevelDebug = 4 170 | WeldLogLevelTrace = 5 171 | 172 | def weld_set_log_level(log_level): 173 | """ 174 | Sets the log_level for Weld: 175 | 0 = No Logs, 176 | 1 = Error, 177 | 2 = Warn, 178 | 3 = Info, 179 | 4 = Debug, 180 | 5 = Trace. 181 | """ 182 | weld.weld_set_log_level(log_level) 183 | -------------------------------------------------------------------------------- /python/benchmarks/weld-python/compiled.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | 4 | from bindings import * 5 | # from bindings_latest import * 6 | import weldtypes 7 | 8 | import ctypes 9 | import time 10 | 11 | import numpy as np 12 | 13 | # Global num threads setting. 14 | THREADS = [ "1" ] 15 | 16 | class WeldEncoder(object): 17 | """ 18 | An abstract class that must be overwridden by libraries. This class 19 | is used to marshall objects from Python types to Weld types. 20 | """ 21 | def encode(obj): 22 | """ 23 | """ 24 | raise NotImplementedError 25 | 26 | def py_to_weld_type(self, obj): 27 | raise NotImplementedError 28 | 29 | 30 | class WeldDecoder(object): 31 | """ 32 | An abstract class that must be overwridden by libraries. This class 33 | is used to marshall objects from Weld types to Python types. 34 | """ 35 | def decode(obj, restype): 36 | """ 37 | Decodes obj, assuming object is of type `restype`. obj's Python 38 | type is ctypes.POINTER(restype.ctype_class). 39 | """ 40 | raise NotImplementedError 41 | 42 | # Returns a wrapped ctypes Structure 43 | def args_factory(arg_names, arg_types): 44 | class Args(ctypes.Structure): 45 | _fields_ = list(zip(arg_names, arg_types)) 46 | return Args 47 | 48 | def compile(program, arg_types, restype, decoder, verbose=False): 49 | """Compiles a program and returns a function for calling it. 50 | 51 | Parameters 52 | ---------- 53 | 54 | program : a string representing a Weld program. 55 | arg_types : a tuple of (type, encoder) 56 | decoder : a decoder for the returned value. 57 | """ 58 | 59 | start = time.time() 60 | 61 | conf = WeldConf() 62 | err = WeldError() 63 | module = WeldModule(program, conf, err) 64 | if err.code() != 0: 65 | raise ValueError("Could not compile function: {}".format(err.message())) 66 | end = time.time() 67 | 68 | if verbose: 69 | print("Weld compile time:", end - start) 70 | 71 | def func(*args): 72 | # Field names. 73 | names = [] 74 | # C type of each argument. 75 | arg_c_types = [] 76 | # Encoded version of each argument. 77 | encoded = [] 78 | 79 | for (i, (arg, arg_type)) in enumerate(zip(args, arg_types)): 80 | names.append("_{}".format(i)) 81 | if isinstance(arg_type, WeldEncoder): 82 | arg_c_types.append(arg_type.py_to_weld_type(arg).ctype_class) 83 | encoded.append(arg_type.encode(arg)) 84 | else: 85 | # Primitive type with a builtin encoder 86 | assert isinstance(arg, arg_type) 87 | ctype = weldtypes.encoder(arg_type) 88 | arg_c_types.append(ctype) 89 | encoded.append(ctype(arg)) 90 | 91 | Args = args_factory(names, arg_c_types) 92 | raw_args = Args() 93 | 94 | for name, value in zip(names, encoded): 95 | setattr(raw_args, name, value) 96 | 97 | raw_args_pointer = ctypes.cast(ctypes.byref(raw_args), ctypes.c_void_p) 98 | weld_input = WeldValue(raw_args_pointer) 99 | conf = WeldConf() 100 | 101 | # 100GB Memory limit 102 | conf.set("weld.memory.limit", "100000000000") 103 | conf.set("weld.threads", THREADS[0]) 104 | 105 | err = WeldError() 106 | 107 | result = module.run(conf, weld_input, err) 108 | if err.code() != 0: 109 | raise ValueError("Error while running function: {}".format(err.message())) 110 | 111 | pointer_type = POINTER(restype.ctype_class) 112 | data = ctypes.cast(result.data(), pointer_type) 113 | result = decoder.decode(data, restype) 114 | 115 | return result 116 | 117 | return func 118 | -------------------------------------------------------------------------------- /python/benchmarks/weld-python/encoders.py: -------------------------------------------------------------------------------- 1 | 2 | from bindings import * 3 | from compiled import WeldEncoder, WeldDecoder 4 | from weldtypes import * 5 | 6 | import numpy as np 7 | import ctypes 8 | 9 | 10 | def dtype_to_weld_type(dtype): 11 | if dtype == 'int32': 12 | return WeldInt() 13 | elif dtype == 'int64': 14 | return WeldLong() 15 | elif dtype == 'float32': 16 | return WeldFloat() 17 | elif dtype == 'float64': 18 | return WeldDouble() 19 | else: 20 | raise ValueError("unsupported dtype {}".format(dtype)) 21 | 22 | 23 | class NumpyArrayEncoder(WeldEncoder): 24 | 25 | def _check(self, obj): 26 | """ 27 | Checks whether this NumPy array is supported by Weld. 28 | """ 29 | assert isinstance(obj, np.ndarray) 30 | 31 | def encode(self, obj): 32 | self._check(obj) 33 | elem_type = dtype_to_weld_type(obj.dtype) 34 | c_class = WeldVec(elem_type).ctype_class 35 | elem_class = elem_type.ctype_class 36 | ptr = obj.ctypes.data_as(POINTER(elem_class)) 37 | # obj.size gives the correct value for multi-dimensional arrays. 38 | size = ctypes.c_int64(obj.size) 39 | return c_class(ptr=ptr, size=size) 40 | 41 | def py_to_weld_type(self, obj): 42 | self._check(obj) 43 | return WeldVec(dtype_to_weld_type(obj.dtype)) 44 | 45 | 46 | class NumpyArrayDecoder(WeldDecoder): 47 | def decode(self, obj, restype): 48 | # This stuff is same as grizzly. 49 | if restype == WeldInt(): 50 | data = WeldValue(obj).data() 51 | result = ctypes.cast(data, ctypes.POINTER(c_int)).contents.value 52 | return np.int32(result) 53 | elif restype == WeldLong(): 54 | data = WeldValue(obj).data() 55 | result = ctypes.cast(data, ctypes.POINTER(c_long)).contents.value 56 | return np.int64(result) 57 | elif restype == WeldFloat(): 58 | data = WeldValue(obj).data() 59 | result = ctypes.cast(data, ctypes.POINTER(c_float)).contents.value 60 | return np.float32(result) 61 | elif restype == WeldDouble(): 62 | data = WeldValue(obj).data() 63 | result = ctypes.cast(data, ctypes.POINTER(c_double)).contents.value 64 | return np.float64(result) 65 | 66 | # is a WeldVec() - depending on the types, need to make minor changes. 67 | assert isinstance(restype, WeldVec) 68 | obj = obj.contents 69 | size = obj.size 70 | data = obj.ptr 71 | dtype = restype.elemType.ctype_class 72 | 73 | if restype == WeldVec(WeldInt()) or restype == WeldVec(WeldFloat()): 74 | # these have same sizes. 75 | ArrayType = ctypes.c_float*size 76 | elif restype == WeldVec(WeldLong()) or restype == WeldVec(WeldDouble()): 77 | ArrayType = ctypes.c_double*size 78 | 79 | array_pointer = ctypes.cast(data, ctypes.POINTER(ArrayType)) 80 | result = np.frombuffer(array_pointer.contents, dtype=dtype,count=size) 81 | return result 82 | 83 | class StructDecoder(WeldDecoder): 84 | 85 | def __init__(self, types, decoders): 86 | for decoder in decoders: 87 | assert isinstance(decoder, WeldDecoder) 88 | for ty in types: 89 | assert isinstance(ty, WeldType) 90 | assert len(types) == len(decoders) 91 | self.decoders = decoders 92 | self.types = types 93 | 94 | def decode(self, obj, restype): 95 | decoded = [] 96 | result_struct = ctypes.cast(obj, ctypes.POINTER(restype.ctype_class)).contents 97 | for (ty, decoder, (name, field_type)) in zip(self.types, self.decoders, result_struct._fields_): 98 | value = getattr(result_struct, name) 99 | decoded.append(decoder.decode(ctypes.pointer(value), ty)) 100 | return tuple(decoded) 101 | 102 | class ScalarDecoder(WeldDecoder): 103 | def decode(self, obj, restype): 104 | assert isinstance(restype, WeldLong) 105 | result = obj.contents.value 106 | return result 107 | -------------------------------------------------------------------------------- /python/benchmarks/weld-python/test.py: -------------------------------------------------------------------------------- 1 | 2 | code = "|a: vec[i64], b: vec[i64]| {a, result(for(zip(a, b), appender, |b, i, e| merge(b, e.$0 + e.$1)))}" 3 | 4 | from compiled import * 5 | from encoders import * 6 | 7 | import numpy as np 8 | 9 | myfunc = compile(code, (NumpyArrayEncoder(), NumpyArrayEncoder()), WeldVec(WeldLong()), NumpyArrayDecoder()) 10 | 11 | a = np.ones(5, dtype=np.int64) 12 | b = np.ones(5, dtype=np.int64) 13 | 14 | print(myfunc(a, b)) 15 | -------------------------------------------------------------------------------- /python/benchmarks/weld-python/weldtypes.py: -------------------------------------------------------------------------------- 1 | from ctypes import * 2 | 3 | def encoder(ty): 4 | if ty == int: 5 | return c_long 6 | elif ty == float: 7 | return c_double 8 | elif ty == str: 9 | return c_char_p 10 | raise ValueError 11 | 12 | class WeldType(object): 13 | def __str__(self): 14 | return "type" 15 | 16 | def __hash__(self): 17 | return hash(str(self)) 18 | 19 | def __eq__(self, other): 20 | return hash(other) == hash(self) 21 | 22 | @property 23 | def ctype_class(self): 24 | raise NotImplementedError 25 | 26 | 27 | class WeldChar(WeldType): 28 | def __str__(self): 29 | return "i8" 30 | 31 | @property 32 | def ctype_class(self): 33 | return c_wchar_p 34 | 35 | 36 | class WeldBit(WeldType): 37 | def __str__(self): 38 | return "bool" 39 | 40 | @property 41 | def ctype_class(self): 42 | return c_bool 43 | 44 | 45 | class WeldInt(WeldType): 46 | 47 | def __str__(self): 48 | return "i32" 49 | 50 | @property 51 | def ctype_class(self): 52 | return c_int 53 | 54 | 55 | class WeldLong(WeldType): 56 | 57 | def __str__(self): 58 | return "i64" 59 | 60 | @property 61 | def ctype_class(self): 62 | return c_long 63 | 64 | 65 | class WeldFloat(WeldType): 66 | 67 | def __str__(self): 68 | return "f32" 69 | 70 | @property 71 | def ctype_class(self): 72 | return c_float 73 | 74 | 75 | class WeldDouble(WeldType): 76 | 77 | def __str__(self): 78 | return "f64" 79 | 80 | @property 81 | def ctype_class(self): 82 | return c_double 83 | 84 | 85 | class WeldVec(WeldType): 86 | # Kind of a hack, but ctypes requires that the class instance returned is 87 | # the same object. Every time we create a new Vec instance (templatized by 88 | # type), we cache it here. 89 | _singletons = {} 90 | 91 | def __init__(self, elemType): 92 | self.elemType = elemType 93 | 94 | def __str__(self): 95 | return "vec[%s]" % str(self.elemType) 96 | 97 | @property 98 | def ctype_class(self): 99 | def vec_factory(elemType): 100 | class Vec(Structure): 101 | _fields_ = [ 102 | ("ptr", POINTER(elemType.ctype_class)), 103 | ("size", c_long), 104 | ] 105 | return Vec 106 | 107 | if self.elemType not in WeldVec._singletons: 108 | WeldVec._singletons[self.elemType] = vec_factory(self.elemType) 109 | return WeldVec._singletons[self.elemType] 110 | 111 | 112 | class WeldStruct(WeldType): 113 | _singletons = {} 114 | 115 | def __init__(self, field_types): 116 | assert False not in [isinstance(e, WeldType) for e in field_types] 117 | self.field_types = field_types 118 | 119 | def __str__(self): 120 | return "{" + ",".join([str(f) for f in self.field_types]) + "}" 121 | 122 | @property 123 | def ctype_class(self): 124 | def struct_factory(field_types): 125 | class Struct(Structure): 126 | _fields_ = [("_" + str(i), t.ctype_class) for i, t in enumerate(field_types)] 127 | return Struct 128 | 129 | if frozenset(self.field_types) not in WeldVec._singletons: 130 | WeldStruct._singletons[ 131 | frozenset(self.field_types)] = struct_factory(self.field_types) 132 | return WeldStruct._singletons[frozenset(self.field_types)] 133 | -------------------------------------------------------------------------------- /python/lib/composer_numpy/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | # Fall back to NumPy if we don't support something. 3 | from numpy import * 4 | 5 | from .annotated import * 6 | from pycomposer import evaluate 7 | -------------------------------------------------------------------------------- /python/lib/composer_numpy/annotated.py: -------------------------------------------------------------------------------- 1 | from pycomposer import * 2 | import time 3 | 4 | import sharedmem 5 | import numpy as np 6 | import scipy.special as ss 7 | 8 | from copy import deepcopy as dc 9 | 10 | class NdArraySplit(SplitType): 11 | 12 | def __init__(self): 13 | self.slice_col = False 14 | self.merge = False 15 | 16 | def combine(self, values): 17 | if self.merge: 18 | return np.concatenate(values) 19 | 20 | def split(self, start, end, value): 21 | if isinstance(value, np.ndarray): 22 | shape = value.shape 23 | ndims = len(value.shape) 24 | if ndims == 1: 25 | if start >= shape[0]: 26 | return STOP_ITERATION 27 | return value[start:min(end, shape[0])] 28 | elif ndims == 2: 29 | if shape[1] == 1: 30 | return value 31 | if self.slice_col: 32 | return value[:,start:end] 33 | else: 34 | return value[start:end,:] 35 | else: 36 | return NotImplementedError("ndarray with dim > 2 not supported") 37 | else: 38 | # Scalar. 39 | return value 40 | 41 | def elements(self, value): 42 | if isinstance(value, np.ndarray): 43 | if len(value.shape) == 2 and value.shape[1] == 1: 44 | return value.shape[0] 45 | return value.shape[-1] 46 | 47 | def __str__(self): 48 | return "NdArraySplit" 49 | 50 | _args = (NdArraySplit(), NdArraySplit()) 51 | _kwargs = { 'out' : mut(NdArraySplit()), 'axis': Broadcast() } 52 | _ret = NdArraySplit() 53 | 54 | 55 | # Binary ops. 56 | add = sa(dc(_args), dc(_kwargs), dc(_ret))(np.add) 57 | subtract = sa(dc(_args), dc(_kwargs), dc(_ret))(np.subtract) 58 | multiply = sa(dc(_args), dc(_kwargs), dc(_ret))(np.multiply) 59 | divide = sa(dc(_args), dc(_kwargs), dc(_ret))(np.divide) 60 | power = sa(dc(_args), dc(_kwargs), dc(_ret))(np.power) 61 | 62 | _args = (NdArraySplit(),) 63 | 64 | # Unary ops. 65 | log = sa(dc(_args), dc(_kwargs), dc(_ret))(np.log) 66 | log2 = sa(dc(_args), dc(_kwargs), dc(_ret))(np.log2) 67 | exp = sa(dc(_args), dc(_kwargs), dc(_ret))(np.exp) 68 | sin = sa(dc(_args), dc(_kwargs), dc(_ret))(np.sin) 69 | arcsin = sa(dc(_args), dc(_kwargs), dc(_ret))(np.arcsin) 70 | cos = sa(dc(_args), dc(_kwargs), dc(_ret))(np.cos) 71 | sqrt = sa(dc(_args), dc(_kwargs), dc(_ret))(np.sqrt) 72 | erf = sa(dc(_args), dc(_kwargs), dc(_ret))(ss.erf) 73 | 74 | # addreduce = np.add.reduce 75 | addreduce = sa(dc(_args), dc(_kwargs), dc(_ret))(np.add.reduce) 76 | 77 | _args = (NdArraySplit(), Broadcast()) 78 | _kwargs = { 'axis': Broadcast() } 79 | # roll = sa(dc(_args), dc(_kwargs), dc(NdArraySplit()))(np.roll) 80 | 81 | def ones(shape, dtype=None, order='C'): 82 | result = sharedmem.empty(shape) 83 | result[:] = np.ones(shape, dtype, order)[:] 84 | return result 85 | 86 | def zeros(shape, dtype=None, order='C'): 87 | result = sharedmem.empty(shape) 88 | result[:] = np.zeros(shape, dtype, order)[:] 89 | return result 90 | -------------------------------------------------------------------------------- /python/lib/composer_pandas/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | # Fall back to NumPy if we don't support something. 3 | from pandas import * 4 | 5 | from .annotated import * 6 | from pycomposer import evaluate 7 | -------------------------------------------------------------------------------- /python/lib/composer_pandas/annotated.py: -------------------------------------------------------------------------------- 1 | """ 2 | Annotations for Pandas functions. 3 | 4 | Note: For convinience, we just write a wrapper function that calls the Pandas function, and then 5 | use those functions instead. We could equivalently just replace methods on the DataFrame class too and 6 | split `self` instead of the DataFrame passed in here. 7 | """ 8 | 9 | from pycomposer import * 10 | import time 11 | 12 | import numpy as np 13 | import pandas as pd 14 | 15 | from copy import deepcopy as dc 16 | 17 | class UniqueSplit(SplitType): 18 | """ For the result of Unique """ 19 | def combine(self, values): 20 | if len(values) > 0: 21 | return np.unique(np.concatenate(values)) 22 | else: 23 | return np.array([]) 24 | 25 | def split(self, values): 26 | raise ValueError 27 | 28 | class DataFrameSplit(SplitType): 29 | def combine(self, values): 30 | do_combine = False 31 | for val in values: 32 | if val is not None: 33 | do_combine = True 34 | 35 | if do_combine and len(values) > 0: 36 | return pd.concat(values) 37 | 38 | def split(self, start, end, value): 39 | if not isinstance(value, pd.DataFrame) and not isinstance(value, pd.Series): 40 | # Assume this is a constant (str, int, etc.). 41 | return value 42 | return value[start:end] 43 | 44 | def elements(self, value): 45 | if not isinstance(value, pd.DataFrame) and not isinstance(value, pd.Series): 46 | return None 47 | return len(value) 48 | 49 | class SumSplit(SplitType): 50 | def combine(self, values): 51 | return sum(values) 52 | 53 | def split(self, start, end, value): 54 | raise ValueError("can't split sum values") 55 | 56 | class GroupBySplit(SplitType): 57 | def combine(self, values): 58 | return None 59 | 60 | def split(self, start, end, value): 61 | raise ValueError("can't split groupby values") 62 | 63 | class SizeSplit(SplitType): 64 | def combine(self, values): 65 | return pd.concat(values) 66 | 67 | def split(self, start, end, value): 68 | raise ValueError("can't split size values") 69 | 70 | def dfgroupby(df, keys): 71 | return df.groupby(keys) 72 | 73 | def merge(left, right): 74 | return pd.merge(left, right) 75 | 76 | def gbapply(grouped, func): 77 | return grouped.apply(func) 78 | 79 | def gbsize(grouped): 80 | return grouped.size() 81 | 82 | def filter(df, column, target): 83 | return df[df[column] > target] 84 | 85 | @sa((DataFrameSplit(), DataFrameSplit()), {}, DataFrameSplit()) 86 | def divide(series, value): 87 | result = (series / value) 88 | return result 89 | 90 | @sa((DataFrameSplit(), DataFrameSplit()), {}, DataFrameSplit()) 91 | def multiply(series, value): 92 | result = (series * value) 93 | return result 94 | 95 | @sa((DataFrameSplit(), DataFrameSplit()), {}, DataFrameSplit()) 96 | def subtract(series, value): 97 | result = (series - value) 98 | return result 99 | 100 | @sa((DataFrameSplit(), DataFrameSplit()), {}, DataFrameSplit()) 101 | def add(series, value): 102 | result = (series + value) 103 | return result 104 | 105 | @sa((DataFrameSplit(), DataFrameSplit()), {}, DataFrameSplit()) 106 | def equal(series, value): 107 | result = (series == value) 108 | return result 109 | 110 | @sa((DataFrameSplit(), DataFrameSplit()), {}, DataFrameSplit()) 111 | def greater_than(series, value): 112 | result = (series >= value) 113 | return result 114 | 115 | @sa((DataFrameSplit(), DataFrameSplit()), {}, DataFrameSplit()) 116 | def less_than(series, value): 117 | result = (series < value) 118 | return result 119 | 120 | @sa((DataFrameSplit(),), {}, SumSplit()) 121 | def pandasum(series): 122 | result = series.sum() 123 | return result 124 | 125 | @sa((DataFrameSplit(),), {}, UniqueSplit()) 126 | def unique(series): 127 | result = series.unique() 128 | return result 129 | 130 | @sa((DataFrameSplit(),), {}, DataFrameSplit()) 131 | def series_str(series): 132 | result = series.str 133 | return result 134 | 135 | @sa((DataFrameSplit(), DataFrameSplit(), Broadcast()), {}, DataFrameSplit()) 136 | def mask(series, cond, val): 137 | result = series.mask(cond, val) 138 | return result 139 | 140 | @sa((DataFrameSplit(), Broadcast(), Broadcast()), {}, DataFrameSplit()) 141 | def series_str_slice(series, start, end): 142 | result = series.str.slice(start, end) 143 | return result 144 | 145 | @sa((DataFrameSplit(),), {}, DataFrameSplit()) 146 | def pandanot(series): 147 | return ~series 148 | 149 | @sa((DataFrameSplit(), Broadcast()), {}, DataFrameSplit()) 150 | def series_str_contains(series, target): 151 | result = series.str.contains(target) 152 | return result 153 | 154 | dfgroupby = sa((DataFrameSplit(), Broadcast()), {}, GroupBySplit())(dfgroupby) 155 | merge = sa((DataFrameSplit(), Broadcast()), {}, DataFrameSplit())(merge) 156 | filter = sa((DataFrameSplit(), Broadcast(), Broadcast()), {}, DataFrameSplit())(filter) 157 | 158 | # Return split type should be ApplySplit(subclass of DataFrameSplit), and it 159 | # should take the first argument as a parameter. The parameter is guaranteed to 160 | # be a dag.Operation. The combiner can then use the `by` arguments to groupby 161 | # in the combiner again, and then apply again. 162 | gbapply = sa((GroupBySplit(), Broadcast()), {}, DataFrameSplit())(gbapply) 163 | gbsize = sa((GroupBySplit(), Broadcast()), {}, SizeSplit())(gbsize) 164 | -------------------------------------------------------------------------------- /python/pycomposer/pycomposer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .composer import sa, evaluate, mut 3 | from .split_types import SplitType, Broadcast 4 | from .vm.driver import STOP_ITERATION 5 | 6 | # Import the generics. 7 | from .split_types import A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z 8 | -------------------------------------------------------------------------------- /python/pycomposer/pycomposer/annotation.py: -------------------------------------------------------------------------------- 1 | 2 | import functools 3 | 4 | from inspect import signature, Parameter, Signature 5 | 6 | from .split_types import Broadcast 7 | 8 | class Mut(object): 9 | """ Marker that marks values in an annotation as mutable. """ 10 | 11 | __slots__ = [ "value" ] 12 | def __init__(self, value): 13 | self.value = value 14 | 15 | # Constructor for mutables. 16 | mut = lambda x: Mut(x) 17 | 18 | class Annotation(object): 19 | """ An annotation on a function. 20 | 21 | Annotations map arguments (by index for regular arguments and by name for 22 | keyword arguments) to their split type. 23 | 24 | """ 25 | 26 | __slots__ = [ "mutables", "arg_types", "return_type", "kwarg_types" ] 27 | 28 | def __init__(self, func, types, kwtypes, return_type): 29 | """ Initialize an annotation for a function invocation with the given 30 | arguments. 31 | 32 | Parameters 33 | __________ 34 | 35 | func : the function that was invoked. 36 | types : the split types of the non-keyword arguments and return type. 37 | kwtypes : the split types of the keyword arguments. 38 | 39 | """ 40 | 41 | try: 42 | sig = signature(func) 43 | args = [(name, param) for (name, param) in sig.parameters.items()\ 44 | if param.kind == Parameter.POSITIONAL_OR_KEYWORD] 45 | 46 | num_required_types = 0 47 | for (name, param) in args: 48 | if param.default is Parameter.empty: 49 | num_required_types += 1 50 | 51 | if len(types) != num_required_types: 52 | raise ValueError("invalid number of arguments in annotation (expected {}, got {})".format(len(args), len(types))) 53 | 54 | # Make sure there's no extraneous args. 55 | kwargs = set([name for (name, param) in args if param.default is not Parameter.empty]) 56 | 57 | for name in kwargs: 58 | if name not in kwtypes: 59 | kwtypes[name] = Broadcast() 60 | 61 | for name in kwtypes: 62 | assert(name in kwargs) 63 | 64 | except ValueError as e: 65 | pass 66 | # print("WARN: Continuing without verification of annotation") 67 | 68 | # The mutable values. These are indices for positionals and string 69 | # names for keyword args. 70 | self.mutables = set() 71 | 72 | # The argument types. 73 | self.arg_types = [] 74 | 75 | for (i, ty) in enumerate(types): 76 | if isinstance(ty, Mut): 77 | self.arg_types.append(ty.value) 78 | self.mutables.add(i) 79 | else: 80 | self.arg_types.append(ty) 81 | 82 | # The return type. This can be None if the function doesn't return anything. 83 | self.return_type = return_type 84 | 85 | # Dictionary of kwarg types. 86 | self.kwarg_types = dict() 87 | for (key, value) in kwtypes.items(): 88 | if isinstance(value, Mut): 89 | self.kwarg_types[key] = value.value 90 | self.mutables.add(key) 91 | else: 92 | self.kwarg_types[key] = value 93 | 94 | 95 | def types(self): 96 | """ Iterate over the split types in this annotation. """ 97 | for ty in self.arg_types: 98 | yield ty 99 | for ty in self.kwarg_types.values(): 100 | yield ty 101 | yield self.return_type 102 | 103 | def __str__(self): 104 | if len(self.arg_types) > 0: 105 | args = ", ".join([str(t) for t in self.arg_types]) 106 | else: 107 | args = ", " if len(self.kwarg_types) > 0 else "" 108 | 109 | if len(self.kwarg_types) > 0: 110 | args += ", " 111 | args += ", ".join(["{}={}".format(k, v) for (k,v) in self.kwarg_types.items()]) 112 | 113 | return "({}) -> {}".format(args, self.return_type) 114 | -------------------------------------------------------------------------------- /python/pycomposer/pycomposer/composer.py: -------------------------------------------------------------------------------- 1 | 2 | from .annotation import Annotation, mut 3 | from .dag import LogicalPlan, evaluate_dag 4 | from .split_types import * 5 | from .vm.driver import DEFAULT_BATCH_SIZE 6 | 7 | import functools 8 | 9 | import copy 10 | 11 | # The task graph. 12 | _DAG = LogicalPlan() 13 | 14 | class sa(object): 15 | """ A splitability annotation.""" 16 | 17 | def __init__(self, types, kwtypes, return_type): 18 | """ A splitability annotation. 19 | 20 | Parameters 21 | ---------- 22 | 23 | postypes : a tuple of split types for each positional argument. The number of elements in the tuple must match the number 24 | of positional arguments in the funciton. 25 | 26 | kwtypes : a dictionary of split types for each keyword argument. Providing 27 | split types for keyword arguments is optional. If a keyword argument does 28 | not have a split type, its split type will default to "broadcast." 29 | 30 | return_type : split type of the value returned by this function. 31 | 32 | """ 33 | self.types = types 34 | self.kwtypes = kwtypes 35 | self.return_type = return_type 36 | 37 | def __call__(self, func): 38 | annotation = Annotation(func, self.types, self.kwtypes, self.return_type) 39 | 40 | @functools.wraps(func) 41 | def _decorated(*args, **kwargs): 42 | return _DAG.register(func, args, kwargs, annotation) 43 | 44 | return _decorated 45 | 46 | def evaluate(workers=1, batch_size=DEFAULT_BATCH_SIZE, profile=False): 47 | evaluate_dag(_DAG, workers, batch_size, profile) 48 | -------------------------------------------------------------------------------- /python/pycomposer/pycomposer/unevaluated.py: -------------------------------------------------------------------------------- 1 | """ A singleton representing an unevaluated computation. """ 2 | 3 | class _Unevaluated: 4 | """ An unevaluated value. 5 | 6 | Users should access the UNEVALUATED singleton instead of 7 | making instances of this directly. 8 | 9 | """ 10 | __slots__ = [] 11 | 12 | UNEVALUATED = _Unevaluated() 13 | 14 | 15 | -------------------------------------------------------------------------------- /python/pycomposer/pycomposer/vm/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .program import Program 3 | from .driver import Driver, STOP_ITERATION 4 | -------------------------------------------------------------------------------- /python/pycomposer/pycomposer/vm/instruction.py: -------------------------------------------------------------------------------- 1 | 2 | from abc import ABC, abstractmethod 3 | import types 4 | 5 | from .driver import STOP_ITERATION 6 | 7 | class Instruction(ABC): 8 | """ 9 | An instruction that updates an operation in a lazy DAG. 10 | """ 11 | 12 | @abstractmethod 13 | def evaluate(self, thread, start, end, values, context): 14 | """ 15 | Evaluates an instruction. 16 | 17 | Parameters 18 | ---------- 19 | 20 | thread : the thread that is currently executing 21 | start : the start index of the current split value. 22 | end : the end index of the current split value 23 | values : a global value map holding the inputs. 24 | context : map holding execution state (arg ID -> value). 25 | 26 | """ 27 | pass 28 | 29 | class Split(Instruction): 30 | """ 31 | An instruction that splits the inputs to an operation. 32 | """ 33 | 34 | def __init__(self, target, ty): 35 | """ 36 | A Split instruction takes an argument and split type and applies 37 | the splitter on the argument. 38 | 39 | Parameters 40 | ---------- 41 | 42 | target : the arg ID that will be split. 43 | ty : the split type. 44 | """ 45 | self.target = target 46 | self.ty = ty 47 | self.splitter = None 48 | 49 | def __str__(self): 50 | return "v{} = split {}:{}".format(self.target, self.target, self.ty) 51 | 52 | def evaluate(self, thread, start, end, values, context): 53 | """ Returns values from the split. """ 54 | 55 | if self.splitter is None: 56 | # First time - check if the splitter is actually a generator. 57 | result = self.ty.split(start, end, values[self.target]) 58 | if isinstance(result, types.GeneratorType): 59 | self.splitter = result 60 | result = next(self.splitter) 61 | else: 62 | self.splitter = self.ty.split 63 | else: 64 | if isinstance(self.splitter, types.GeneratorType): 65 | result = next(self.splitter) 66 | else: 67 | result = self.splitter(start, end, values[self.target]) 68 | 69 | if isinstance(result, str) and result == STOP_ITERATION: 70 | return STOP_ITERATION 71 | else: 72 | context[self.target].append(result) 73 | 74 | class Call(Instruction): 75 | """ An instruction that calls an SA-enabled function. """ 76 | def __init__(self, target, func, args, kwargs, ty): 77 | self.target = target 78 | # Function to call. 79 | self.func = func 80 | # Arguments: list of targets. 81 | self.args = args 82 | # Keyword arguments: Maps { name -> target } 83 | self.kwargs = kwargs 84 | # Return split type. 85 | self.ty = ty 86 | 87 | def __str__(self): 88 | args = ", ".join(map(lambda a: "v" + str(a), self.args)) 89 | kwargs = list(map(lambda v: "{}=v{}".format(v[0], v[1]), self.kwargs.items())) 90 | arguments = ", ".join([args] + kwargs) 91 | return "v{} = call {}({}):{}".format(self.target, self.func.__name__, arguments, str(self.ty)) 92 | 93 | def get_args(self, context): 94 | return [ context[target][-1] for target in self.args ] 95 | 96 | def get_kwargs(self, context): 97 | return dict([ (name, context[target][-1]) for (name, target) in self.kwargs.items() ]) 98 | 99 | def evaluate(self, _thread, _start, _end, _values, context): 100 | """ 101 | Evaluates a function call by gathering arguments and calling the 102 | function. 103 | 104 | """ 105 | args = self.get_args(context) 106 | kwargs = self.get_kwargs(context) 107 | context[self.target].append(self.func(*args, **kwargs)) 108 | -------------------------------------------------------------------------------- /python/pycomposer/pycomposer/vm/program.py: -------------------------------------------------------------------------------- 1 | 2 | from .driver import STOP_ITERATION 3 | from .instruction import Split 4 | 5 | class Program: 6 | """ 7 | A Composer Virtual Machine Program. 8 | 9 | A program stores a sequence of instructions to execute. 10 | 11 | """ 12 | 13 | __slots__ = ["ssa_counter", "insts", "registered", "index"] 14 | 15 | def __init__(self): 16 | # Counter for registering instructions. 17 | self.ssa_counter = 0 18 | # Instruction list. 19 | self.insts = [] 20 | # Registered values. Maps SSA value to real value. 21 | self.registered = {} 22 | 23 | def get(self, value): 24 | """ 25 | Get the SSA value for a value, or None if the value is not registered. 26 | 27 | value : The value to lookup 28 | 29 | """ 30 | for num, val in self.registered.items(): 31 | if value is val: 32 | return num 33 | 34 | def set_range_end(self, range_end): 35 | for inst in self.insts: 36 | if isinstance(inst, Split): 37 | inst.ty.range_end = range_end 38 | 39 | def step(self, thread, piece_start, piece_end, values, context): 40 | """ 41 | Step the program and return whether are still items to process. 42 | """ 43 | for task in self.insts: 44 | result = task.evaluate(thread, piece_start, piece_end, values, context) 45 | if isinstance(result, str) and result == STOP_ITERATION: 46 | return False 47 | return True 48 | 49 | def elements(self, values): 50 | """Returns the number of elements that this program will process. 51 | 52 | This quantity is retrieved by querying the Split instructions in the program. 53 | 54 | """ 55 | elements = None 56 | for inst in self.insts: 57 | if isinstance(inst, Split): 58 | e = inst.ty.elements(values[inst.target]) 59 | if e is None: 60 | continue 61 | if elements is not None: 62 | assert(elements == e, inst) 63 | else: 64 | elements = e 65 | return elements 66 | 67 | def __str__(self): 68 | return "\n".join([str(i) for i in self.insts]) 69 | -------------------------------------------------------------------------------- /python/pycomposer/pycomposer/vm/vm.py: -------------------------------------------------------------------------------- 1 | 2 | from .program import Program 3 | 4 | class VM: 5 | """ 6 | A Composer virtual machine, which holds a program and its associated data. 7 | """ 8 | def __init__(self): 9 | # Counter for argument IDs 10 | self.ssa_counter = 0 11 | # Program 12 | self.program = Program() 13 | # Values, mapping argID -> values 14 | self.values = dict() 15 | 16 | def get(self, value): 17 | """ 18 | Get the SSA value for a value, or None if the value is not registered. 19 | 20 | value : The value to lookup 21 | 22 | """ 23 | for num, val in self.values.items(): 24 | if value is val: 25 | return num 26 | 27 | def register_value(self, value): 28 | """ 29 | Register a counter to a value. 30 | """ 31 | arg_id = self.ssa_counter 32 | self.ssa_counter += 1 33 | self.values[arg_id] = value 34 | return arg_id 35 | 36 | -------------------------------------------------------------------------------- /python/pycomposer/requirements.txt: -------------------------------------------------------------------------------- 1 | cloudpickle 2 | 3 | -------------------------------------------------------------------------------- /python/pycomposer/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from distutils.core import setup 4 | 5 | setup(name='pycomposer', 6 | version='0.1.0', 7 | description='Python Composer library', 8 | author='Shoumik Palkar', 9 | author_email='shoumik@cs.stanford.edu', 10 | url='https://www.github.com/sppalkia/annotator', 11 | packages=['cloudpickle'], 12 | ) 13 | --------------------------------------------------------------------------------