├── .gitignore
├── EXPERIMENTS.md
├── LICENSE
├── README.md
├── c
    ├── Cargo.toml
    ├── annotate
    │   ├── Cargo.toml
    │   └── src
    │   │   ├── main.rs
    │   │   ├── parser
    │   │       ├── annotation.rs
    │   │       ├── cdecl.rs
    │   │       └── mod.rs
    │   │   ├── resources
    │   │       ├── annotation.c.template
    │   │       ├── argstruct.c.template
    │   │       ├── callback.c.template
    │   │       ├── file.c.template
    │   │       └── function.c.template
    │   │   └── writer.rs
    ├── benchmarks
    │   ├── blackscholes
    │   │   ├── Makefile
    │   │   ├── benchmark-breakdown.sh
    │   │   ├── benchmark-pieces.sh
    │   │   ├── benchmark.sh
    │   │   └── blackscholes.c
    │   ├── breakdown.py
    │   ├── get-data.sh
    │   ├── gotham
    │   │   ├── Makefile
    │   │   ├── benchmark.sh
    │   │   ├── gotham.c
    │   │   ├── gotham.h
    │   │   ├── gotham_composer.c
    │   │   └── gotham_composer.h
    │   ├── haversine
    │   │   ├── Makefile
    │   │   ├── benchmark-pieces.sh
    │   │   ├── benchmark.sh
    │   │   └── haversine.c
    │   ├── nashville
    │   │   ├── Makefile
    │   │   ├── benchmark-breakdown.sh
    │   │   ├── benchmark-pieces.sh
    │   │   ├── benchmark.sh
    │   │   ├── nashville.c
    │   │   ├── nashville.h
    │   │   ├── nashville_composer.c
    │   │   ├── nashville_composer.h
    │   │   ├── nashville_parallel.c
    │   │   └── nashville_parallel.h
    │   ├── nbody
    │   │   ├── Makefile
    │   │   ├── benchmark.sh
    │   │   ├── nbody.c
    │   │   ├── nbody.h
    │   │   ├── nbody_composer.c
    │   │   ├── nbody_composer.h
    │   │   ├── nbody_mkl.c
    │   │   └── nbody_mkl.h
    │   ├── run-all.sh
    │   └── shallow_water
    │   │   ├── Makefile
    │   │   ├── README.md
    │   │   ├── benchmark.sh
    │   │   ├── shallow_water.c
    │   │   ├── shallow_water.h
    │   │   ├── shallow_water_composer.c
    │   │   ├── shallow_water_composer.h
    │   │   ├── shallow_water_mkl.c
    │   │   └── shallow_water_mkl.h
    ├── composer
    │   ├── Cargo.toml
    │   ├── build.rs
    │   └── src
    │   │   ├── error.rs
    │   │   ├── lib.rs
    │   │   ├── runtime
    │   │       ├── memory.rs
    │   │       ├── mod.rs
    │   │       └── tasks.rs
    │   │   └── util.rs
    └── lib
    │   ├── ImageMagick
    │       ├── Makefile
    │       ├── imagemagick.annotation
    │       ├── splitters.c
    │       └── splitters.h
    │   └── composer_mkl
    │       ├── Makefile
    │       ├── README.md
    │       ├── mkl.annotation
    │       ├── mkl_extensions.c
    │       ├── mkl_extensions.h
    │       ├── splitters.c
    │       ├── vec.c
    │       └── vec.h
└── python
    ├── benchmarks
        ├── birth_analysis
        │   ├── benchmark.sh
        │   ├── birth_analysis.py
        │   └── birth_analysis_composer.py
        ├── blackscholes
        │   ├── benchmark-batch.sh
        │   ├── benchmark.sh
        │   ├── blackscholes.py
        │   └── blackscholes_numba.py
        ├── crime_index
        │   ├── benchmark.sh
        │   └── crime_index.py
        ├── data_cleaning
        │   ├── benchmark.sh
        │   └── data_cleaning.py
        ├── datasets
        │   ├── birth_analysis
        │   │   ├── babynames.txt.gz
        │   │   └── replicate-csv
        │   └── movielens
        │   │   └── replicate-csv
        ├── get-data.sh
        ├── haversine
        │   ├── benchmark.sh
        │   ├── haversine.py
        │   └── haversine_numba.py
        ├── movielens
        │   ├── benchmark.sh
        │   ├── movielens.py
        │   └── movielens_composer.py
        ├── nbody
        │   ├── benchmark.sh
        │   ├── nbody.py
        │   ├── nbody_boh.py
        │   └── nbody_numba.py
        ├── requirements.txt
        ├── run-all.sh
        ├── setup-env.sh
        ├── shallow_water
        │   ├── benchmark.sh
        │   ├── shallow_water.py
        │   └── shallow_water_numba.py
        ├── speechtag
        │   ├── benchmark.sh
        │   ├── speechtag.py
        │   └── speechtag_composer.py
        └── weld-python
        │   ├── __init__.py
        │   ├── benchmark-weld.sh
        │   ├── bindings.py
        │   ├── bindings_latest.py
        │   ├── compiled.py
        │   ├── encoders.py
        │   ├── test.py
        │   ├── types.py
        │   ├── weldobject.py
        │   └── weldtypes.py
    ├── lib
        ├── composer_numpy
        │   ├── __init__.py
        │   └── annotated.py
        └── composer_pandas
        │   ├── __init__.py
        │   └── annotated.py
    └── pycomposer
        ├── pycomposer
            ├── __init__.py
            ├── annotation.py
            ├── composer.py
            ├── dag.py
            ├── split_types.py
            ├── unevaluated.py
            └── vm
            │   ├── __init__.py
            │   ├── driver.py
            │   ├── instruction.py
            │   ├── program.py
            │   └── vm.py
        ├── requirements.txt
        └── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | target/
 2 | generated/
 3 | **/*.rs.bk
 4 | Cargo.lock
 5 | composer.h
 6 | __pycache__
 7 | *.pyc
 8 | .DS_Store
 9 | 
10 | # Ignore built libraries.
11 | *.dylib
12 | *.so
13 | *.swp
14 | *.swo
15 | *.dSYM/
16 | 
17 | # ignore debugging code
18 | *.s
19 | *.S
20 | *.ll
21 | 
22 | bench
23 | out
24 | eigen
25 | perf.data*
26 | 
27 | *.jpg
28 | *.tif
29 | 
30 | # Ignore Python virtualenv.
31 | python/benchmarks/benchmarks
32 | 
33 | # Ignore various projects
34 | weld/
35 | weld-latest/
36 | bohrium/
37 | 
38 | # Data
39 | babynames*
40 | ml-*
41 | 
42 | # Benchmark output
43 | *.stdout
44 | *.stderr
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 2 | 
 3 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 4 | 
 5 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 6 | 
 7 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
 8 | 
 9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Split Annotations
 2 | 
 3 | This is the main source code repository for Split Annotations. It contains the source code for the C implementation, the Python implementation, and the benchmarks from the SOSP 2019 paper.
 4 | 
 5 | Split annotations (SAs) are a system for enabling optimizations such as pipelining and parallelization underneath existing libraries. Other approaches for enabling these optimizations, such as intermediate representations, compilers, or DSLs, are heavyweight solutions that require re-architecting existing code. Unlike these approaches, SAs enable these optimizations _without requiring changes to existing library functions_.
 6 | 
 7 | ## Installing from Source
 8 | 
 9 | 1. Make sure you have the required dependencies:
10 | 
11 |   * Python 3.5
12 |   * `virtualenv`
13 |   * The latest version of [Rust](https://rustup.rs/). See the instructions in the link.
14 |   * `git`
15 |   * `pkgconfig`. You can download it as follows:
16 |   
17 |   ```bash
18 |   sudo apt-get install pkg-config
19 |   ```
20 |   
21 |   * The `build-essential` package on Linux distributions. You can download it as follows:
22 |   
23 |   ```bash
24 |   sudo apt-get update
25 |   sudo apt-get install build-essential
26 |   ```
27 |   
28 | To build the C implementation:
29 |   
30 | 2. Clone this repository and set the `$SA_HOME` environment variable (the latter is not necessary but simplifies the remaining steps):
31 | 
32 |   ```bash
33 |   cd $HOME
34 |   git clone https://github.com/weld-project/split-annotations.git
35 |   cd split-annotations
36 |   export SA_HOME=`pwd`
37 |   ```
38 |   
39 | 3. Build the C implementation:
40 | 
41 |   ```bash
42 |   cd $SA_HOME/c
43 |   cargo build --release
44 |   ```
45 |    
46 | 4. Optionally build the provided annotated C libraries (Intel MKL and ImageMagick). See `EXPERIMENTS.md` for directions on how to build MKL and ImageMagick, and then:
47 | 
48 |   ```bash
49 |   cd $SA_HOME/c/lib/composer_mkl
50 |   make
51 |   cd $SA_HOME/c/lib/ImageMagick
52 |   make
53 |   ```
54 |   
55 | The Python implementation does not require any special installation, but running the benchmarks requires certain dependencies. See the instructions in `EXPERIMENTS.md`.
56 | 
57 | ## Get Help
58 | 
59 | If you need help installing or using split annotations, or have general questions about the project, feel free to either create a GitHub issue or email shoumik @ stanford . edu (with the spaces removed).
60 | 


--------------------------------------------------------------------------------
/c/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | 
3 | members = [ "annotate", "composer" ]
4 | 


--------------------------------------------------------------------------------
/c/annotate/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "annotate"
 3 | version = "0.1.0"
 4 | authors = ["Shoumik Palkar <shoumik@cs.stanford.edu>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | clap = "2.3"
 9 | composer = { path = "../composer" }
10 | nom = "^4"
11 | serde_json = "1.0"
12 | subprocess = "0.1.18"
13 | env_logger = "0.6.0"
14 | log = "0.4.6"
15 | 


--------------------------------------------------------------------------------
/c/annotate/src/main.rs:
--------------------------------------------------------------------------------
  1 | //! A command line tool for annotating C functions with splitability annotations.
  2 | //!
  3 | //! This tool takes as input an annotations file, which defines splitability annotations, and
  4 | //! outputs C header files that can be compiled with a normal C program. The header files define
  5 | //! functions that support the splitability annotation runtime.
  6 | 
  7 | // Parses the annotation file.
  8 | mod parser;
  9 | mod writer;
 10 | 
 11 | use env_logger;
 12 | use log::*;
 13 | 
 14 | use std::fs::{DirBuilder, File};
 15 | use std::io::{Read, Write};
 16 | use std::path::PathBuf;
 17 | 
 18 | use clap::{Arg, App};
 19 | 
 20 | 
 21 | fn main() -> std::io::Result<()> {
 22 |     let matches = App::new("annotate")
 23 |         .version("0.1.0")
 24 |         .author("Shoumik Palkar <shoumik@cs.stanford.edu>")
 25 |         .about("Command line tool for annotating C functions with splitability annotations")
 26 |         .arg(Arg::with_name("INPUT")
 27 |              .help("annotation file")
 28 |              .required(true)
 29 |              .index(1))
 30 |         .arg(Arg::with_name("dir")
 31 |              .short("d")
 32 |              .help("name of directory where output header files are written (default: 'generated')")
 33 |              .takes_value(true)
 34 |              .required(false))
 35 |         .arg(Arg::with_name("force")
 36 |              .short("f")
 37 |              .help("Forces generation, even if a directory with the specified name already exists.")
 38 |              .required(false))
 39 |         .arg(Arg::with_name("prefix")
 40 |              .short("p")
 41 |              .help("prefix for generated functions (default: 'c_')")
 42 |              .takes_value(true)
 43 |              .required(false))
 44 |         .arg(Arg::with_name("header")
 45 |              .short("n")
 46 |              .help("name for include file with all definitions (default: 'generated')")
 47 |              .takes_value(true)
 48 |              .required(false))
 49 |         .arg(Arg::with_name("includes")
 50 |              .short("i")
 51 |              .help("Includes (without '.h') that should be added at the top of the main header file.")
 52 |              .takes_value(true)
 53 |              .required(false))
 54 |         .get_matches();
 55 | 
 56 |     // Initialize logging.
 57 |     let mut builder = env_logger::Builder::from_default_env();
 58 |     builder.default_format_timestamp(true)
 59 |            .init();
 60 | 
 61 |     let input_file = matches.value_of("INPUT").unwrap();
 62 |     let mut input_file = File::open(input_file)?;
 63 | 
 64 |     let mut annotations = String::new();
 65 |     input_file.read_to_string(&mut annotations)?;
 66 |     
 67 |     // Convert the string annotation file into annotation objects.
 68 |     let annotations = parser::parse(&annotations)?;
 69 | 
 70 |     let path = matches.value_of("dir").unwrap_or("generated");
 71 | 
 72 |     // If force is enabled, remove the existing directory.
 73 |     if matches.is_present("force") {
 74 |         std::fs::remove_dir_all(path)?;
 75 |     }
 76 | 
 77 |     // Create a directory.
 78 |     DirBuilder::new().create(path)?;
 79 | 
 80 |     let mut files = vec![];
 81 |     let mut functions = vec![];
 82 | 
 83 |     for annotation in annotations {
 84 |         let mut writer = writer::AnnotationHeaderWriter::new(annotation);
 85 | 
 86 |         info!("Generating header for {}...", writer.function_header());
 87 | 
 88 |         let header = writer.write();
 89 | 
 90 |         files.push(writer.filename());
 91 |         functions.push(writer.function_header());
 92 | 
 93 |         let mut filename = PathBuf::new();
 94 |         filename.push(path);
 95 |         filename.push(writer.filename());
 96 |         filename.set_extension("h");
 97 | 
 98 |         let mut f = File::create(filename).unwrap();
 99 |         f.write(header.as_bytes())?;
100 |     }
101 | 
102 |     let includes = matches.value_of("includes").unwrap_or("");
103 |     let includes = includes.split(",")
104 |                         .map(|e| e.to_string())
105 |                         .filter(|e| e.len() != 0)
106 |                         .collect();
107 | 
108 |     // Write the final header.
109 |     let mut include_writer = writer::IncludeHeaderWriter::new(files, functions, includes, path.to_string());
110 |     let header = include_writer.write();
111 | 
112 |     let mut filename = PathBuf::new();
113 |     filename.push(path);
114 |     filename.push(matches.value_of("include").unwrap_or("generated"));
115 |     filename.set_extension("h");
116 | 
117 |     let mut f = File::create(filename).unwrap();
118 |     f.write(header.as_bytes())?;
119 | 
120 |     Ok(())
121 | }
122 | 


--------------------------------------------------------------------------------
/c/annotate/src/parser/cdecl.rs:
--------------------------------------------------------------------------------
  1 | //! Parsing for C function headers.
  2 | 
  3 | use composer::CDecl;
  4 | 
  5 | use nom::*;
  6 | use nom::types::CompleteByteSlice;
  7 | 
  8 | use std::str;
  9 | 
 10 | use super::ident;
 11 | 
 12 | /// Parses a C type.
 13 | ///
 14 | /// This currently supports parsing any regular type, or types with pointers.
 15 | ///
 16 | /// # Unsupported Features
 17 | ///
 18 | /// * Function Pointers
 19 | /// * Fixed size arrays
 20 | named_complete!(
 21 |     pub parse_c_type<String>,
 22 |     map!(
 23 |         ws!(do_parse!(
 24 |                 is_const: opt!(tag_s!("const")) >>
 25 |                 tag: opt!(alt!(
 26 |                         tag_s!("struct") |
 27 |                         tag_s!("union")
 28 |                 )) >>
 29 |                 ty: ident >>
 30 |                 pointers: many0!(char!('*')) >>
 31 |                 (is_const, tag, ty, pointers)
 32 |         )),
 33 |         |(is_const, tag, ty, ptrs): (Option<CompleteByteSlice>, Option<CompleteByteSlice>, String, Vec<char>)| {
 34 |             let is_const = is_const.map(|ref v| str::from_utf8(v).unwrap());
 35 |             let tag = tag.map(|ref v| str::from_utf8(v).unwrap());
 36 | 
 37 |             let mut name = String::new();
 38 | 
 39 |             if let Some(is_const) = is_const {
 40 |                 name.push_str(is_const);
 41 |                 name.push(' ');
 42 |             }
 43 | 
 44 |             if let Some(tag) = tag {
 45 |                 name.push_str(tag);
 46 |                 name.push(' ');
 47 |             };
 48 | 
 49 |             name.push_str(&ty);
 50 | 
 51 |             for _ in 0..ptrs.len() {
 52 |                 name.push('*');
 53 |             }
 54 |             name
 55 |         }
 56 |     )
 57 | );
 58 | 
 59 | /// Parses a C parameter, which is a type followed by an optional name.
 60 | named_complete!(
 61 |     parse_c_parameter<(String, Option<String>)>,
 62 |     ws!(do_parse!(
 63 |             ty: parse_c_type >>
 64 |             arg_name: opt!(ident) >>
 65 |             (ty, arg_name)
 66 |     ))
 67 | );
 68 | 
 69 | 
 70 | /// Parses a single C function declaration.
 71 | named_complete!(
 72 |     pub parse_c_decl<CDecl>,
 73 |     map!(
 74 |         ws!(do_parse!(
 75 |             return_type: parse_c_type >>
 76 |             func_name: ident >>
 77 |             arguments: delimited!(
 78 |                 char!('('),
 79 |                 separated_list_complete!(ws!(char!(',')), parse_c_parameter),
 80 |                 char!(')')
 81 |             ) >>
 82 |             char!(';') >>
 83 |             (return_type, func_name, arguments)
 84 |         )),
 85 |         |n: (String, String, Vec<(String, Option<String>)>)| CDecl::new(n.0, n.1, n.2)
 86 |     )
 87 | );
 88 | 
 89 | #[cfg(test)]
 90 | fn check_type(input: &str, expected: Option<&str>) {
 91 |     let result = parse_c_type(CompleteByteSlice(input.as_bytes()));
 92 |     if let Some(expected) = expected {
 93 |         assert_eq!(expected, result.unwrap().1);
 94 |     } else {
 95 |         result.expect_err("Expected an error");
 96 |     }
 97 | }
 98 | 
 99 | #[cfg(test)]
100 | fn check_parameter(input: &str, expected: Option<(&str, Option<&str>)>) {
101 |     let result = parse_c_parameter(CompleteByteSlice(input.as_bytes()));
102 |     if let Some(expected) = expected {
103 |         let result = result.unwrap().1;
104 |         assert_eq!(result, (String::from(expected.0), expected.1.map(|v| String::from(v))));
105 |     } else {
106 |         result.expect_err("Expected an error");
107 |     }
108 | }
109 | 
110 | #[cfg(test)]
111 | fn check_decl(input: &str, expected: Option<CDecl>) {
112 |     let result = parse_c_decl(CompleteByteSlice(input.as_bytes()));
113 |     if let Some(expected) = expected {
114 |         let result = result.unwrap().1;
115 |         assert_eq!(result, expected);
116 |     } else {
117 |         result.expect_err("Expected an error");
118 |     }
119 | }
120 | 
121 | #[test]
122 | fn simple_type() {
123 |     check_type("int", Some("int")); 
124 | }
125 | 
126 | #[test]
127 | fn pointer_type() {
128 |     check_type("int*", Some("int*")); 
129 | }
130 | 
131 | #[test]
132 | fn const_type() {
133 |     check_type("const int", Some("const int")); 
134 | }
135 | 
136 | #[test]
137 | fn pointer_type_with_space() {
138 |     check_type("int *", Some("int*")); 
139 | }
140 | 
141 | #[test]
142 | fn struct_type() {
143 |     check_type("struct myStruct", Some("struct myStruct")); 
144 | }
145 | 
146 | #[test]
147 | fn const_struct_type() {
148 |     check_type("const struct myStruct", Some("const struct myStruct")); 
149 | }
150 | 
151 | #[test]
152 | fn struct_pointer_type() {
153 |     check_type("struct myStruct *", Some("struct myStruct*")); 
154 | }
155 | 
156 | #[test]
157 | fn const_struct_pointer_type() {
158 |     check_type("const struct myStruct *", Some("const struct myStruct*")); 
159 | }
160 | 
161 | #[test]
162 | fn double_pointer_type() {
163 |     check_type("int**", Some("int**")); 
164 | }
165 | 
166 | #[test]
167 | fn simple_parameter() {
168 |     check_parameter("int x", Some(("int", Some("x"))))
169 | }
170 | 
171 | #[test]
172 | fn pointer_parameter() {
173 |     check_parameter("int *x", Some(("int*", Some("x"))))
174 | }
175 | 
176 | #[test]
177 | fn struct_parameter() {
178 |     check_parameter("struct foo *x", Some(("struct foo*", Some("x"))))
179 | }
180 | 
181 | #[test]
182 | fn no_name_parameter() {
183 |     check_parameter("struct foo *", Some(("struct foo*", None)))
184 | }
185 | 
186 | #[test]
187 | fn incomplete_type_parameter() {
188 |     check_parameter("struc foo *x", Some(("struc", Some("foo"))))
189 | }
190 | 
191 | #[test]
192 | fn basic_decl() {
193 |     check_decl("int foo();", Some(CDecl::new("int", "foo", vec![])));
194 | }
195 | 
196 | #[test]
197 | fn decl_with_arg() {
198 |     check_decl("int foo(int);", Some(CDecl::new("int", "foo", vec![("int", None)])));
199 | }
200 | 
201 | #[test]
202 | fn decl_with_many_args() {
203 |     let args = vec![
204 |         ("int", None),
205 |         ("struct myStruct**", None),
206 |         ("float*", None)
207 |     ];
208 |     let expected = CDecl::new("int", "foo", args);
209 |     check_decl("int foo(int, struct myStruct **, float *);", Some(expected));
210 | }
211 | 
212 | #[test]
213 | fn decl_with_some_named_args() {
214 |     let args = vec![
215 |         ("int", None),
216 |         ("struct myStruct**", Some("arg2")),
217 |         ("float*", None)
218 |     ];
219 |     let expected = CDecl::new("int", "foo", args);
220 |     check_decl("int foo(int, struct myStruct **arg2, float *);", Some(expected));
221 | }
222 | 
223 | 


--------------------------------------------------------------------------------
/c/annotate/src/resources/annotation.c.template:
--------------------------------------------------------------------------------
 1 | static AnnotationRef __{function_name}_function_annotations__() {{
 2 |   static AnnotationRef s;
 3 |   if (s) {{
 4 |     return s;
 5 |   }} else {{
 6 |     s = InitFromJson("{annotation_json}");
 7 | 
 8 |     {set_split_type_info}
 9 |   }}
10 |   return s;
11 | }}
12 | 


--------------------------------------------------------------------------------
/c/annotate/src/resources/argstruct.c.template:
--------------------------------------------------------------------------------
1 | struct __{function_name}_callable__ {{
2 | {struct_field_list}
3 | }} __attribute__((packed));
4 | 


--------------------------------------------------------------------------------
/c/annotate/src/resources/callback.c.template:
--------------------------------------------------------------------------------
1 | intptr_t __{function_name}_callback__(const void *a) {{
2 |   struct __{function_name}_callable__ *arg = (struct __{function_name}_callable__ *)a;
3 |   {return_value}{function_name}({callback_call_list});
4 |   return {final_return_value};
5 | }}
6 | 


--------------------------------------------------------------------------------
/c/annotate/src/resources/file.c.template:
--------------------------------------------------------------------------------
 1 | #ifndef _{function_name}_COMPOSER_DEFINED_
 2 | #define _{function_name}_COMPOSER_DEFINED_
 3 | 
 4 | /** Generated by Composer -- this should not be modified directly! **/
 5 | 
 6 | #include <composer.h>
 7 | 
 8 | #ifdef __cplusplus
 9 | extern "C" {{
10 | #endif
11 | 
12 | {externs}
13 | 
14 | {generator}
15 | {argstruct}
16 | {callback}
17 | {callable}
18 | 
19 | #ifdef __cplusplus
20 | }}
21 | #endif
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/c/annotate/src/resources/function.c.template:
--------------------------------------------------------------------------------
 1 | {return_type} {prefix}{function_name}({argument_list}) {{
 2 |   if (composer_evaluate()) {{
 3 |     {return_tag} {function_name}({function_call_args});
 4 |   }} else {{
 5 |     composer_protect_all(); 
 6 |     struct __{function_name}_callable__ v;
 7 |     {argstruct_construct_list}
 8 |     {register_return}composer_register_function(__{function_name}_function_annotations__(),
 9 |       __{function_name}_callback__,
10 |       (void *)&v, {has_return_value}); 
11 |     {return_line}
12 |   }}
13 | }}
14 | 


--------------------------------------------------------------------------------
/c/benchmarks/blackscholes/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | # Libraries
 3 | LD_LIBRARY_PATH=-L../../target/release -L../../lib/composer_mkl
 4 | 
 5 | OS=$(shell uname -s)
 6 | 
 7 | ifeq ($(OS), Darwin)
 8 | CC=gcc-7
 9 | CPP=g++-7
10 | LDFLAGS=
11 | INTEL=-DMKL_ILP64 -m64 -I${MKLROOT}/include -L${MKLROOT}/lib -Wl,-rpath,${MKLROOT}/lib -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl
12 | else ifeq ($(OS), Linux)
13 | CC=gcc-5
14 | CPP=g++-5
15 | LDFLAGS=-Wl,-rpath-link -Wno-format
16 | INTEL= -DMKL_ILP64 -m64 -I${MKLROOT}/compilers_and_libraries/linux/mkl/include -L${MKLROOT}/compilers_and_libraries/linux/mkl/lib/intel64 -I${MKLROOT}/include -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl
17 | else
18 | $(error Unsupported platform: $(OS))
19 | endif
20 | 
21 | CFLAGS=-O3 -Wall -pedantic
22 | INCLUDE=-I../../composer/ -I../../lib/composer_mkl
23 | EXEC=bench
24 | 
25 | .PHONY: all clean
26 | 
27 | all:
28 | 	$(CC) $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) blackscholes.c -o $(EXEC) -lcomposer -lcomposer_mkl -lm $(INTEL)
29 | 
30 | clean:
31 | 	rm -rf *.o $(EXEC) $(VLIB)
32 | 


--------------------------------------------------------------------------------
/c/benchmarks/blackscholes/benchmark-breakdown.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | tasks=( mklcomposer )
 6 | threads=( 16 )
 7 | 
 8 | for task in "${tasks[@]}"; do 
 9 |   rm -f $task.stdout $task.stderr
10 |   git log | head -1 > $task.stderr
11 |   git log | head -1 > $task.stdout
12 | done
13 | 
14 | # For composer...
15 | export RUST_LOG=info
16 | export OMP_NUM_THREADS=1
17 | 
18 | for i in {1..5}; do
19 |   for task in "${tasks[@]}"; do 
20 |     for nthreads in "${threads[@]}"; do 
21 |      ./bench -m $task -s 30 -t $nthreads >> $task.stdout 2>> $task.stderr
22 |     done
23 |   done
24 | done
25 | 


--------------------------------------------------------------------------------
/c/benchmarks/blackscholes/benchmark-pieces.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | tasks=( mklcomposer )
 6 | pieces=( 256 512 1024 2048 4096 8192 16384 )
 7 | 
 8 | for task in "${tasks[@]}"; do
 9 |   rm -f $task.stdout $task.stderr
10 |   git log | head -1 > $task.stderr
11 |   git log | head -1 > $task.stdout
12 | done
13 | 
14 | for i in {1..5}; do
15 |   for task in "${tasks[@]}"; do
16 |     for npieces in "${pieces[@]}"; do
17 |       ./bench -m $task -s 30 -p $npieces -t 16 >> $task.stdout 2>> $task.stderr
18 |     done
19 |   done
20 | done
21 | 


--------------------------------------------------------------------------------
/c/benchmarks/blackscholes/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | tasks=( mkl mklcomposer )
 6 | threads=( 1 2 4 8 16 )
 7 | runs=${1:-1}
 8 | size=30
 9 | 
10 | for task in "${tasks[@]}"; do 
11 |   rm -f $task.stdout $task.stderr
12 |   git log | head -1 > $task.stderr
13 |   git log | head -1 > $task.stdout
14 | done
15 | 
16 | for i in {1..$runs}; do
17 |   for nthreads in "${threads[@]}"; do 
18 |    export OMP_NUM_THREADS=$nthreads
19 |    ./bench -m mkl -s $size -t $nthreads >> mkl.stdout 2>> mkl.stderr
20 |   done
21 | done
22 | 
23 | # Set the number of threads in the environment variable to 1, to prevent
24 | # the MKL functions from launching N threads per task.
25 | export OMP_NUM_THREADS=1
26 | for i in {1..$runs}; do
27 |   for nthreads in "${threads[@]}"; do 
28 |    ./bench -m mklcomposer -s $size -t $nthreads >> mklcomposer.stdout 2>> mklcomposer.stderr
29 |   done
30 | done
31 | 


--------------------------------------------------------------------------------
/c/benchmarks/breakdown.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Parser for log data for system overhead breakdown.
  3 | """
  4 | 
  5 | class ThreadTime(object):
  6 |     def __init__(self, thread):
  7 |         self.thread = thread
  8 |         self.split_time = None
  9 |         self.driver_time = None
 10 |         self.merge_time = None
 11 | 
 12 |     @property
 13 |     def task_time(self):
 14 |         return self.driver_time - self.split_time
 15 | 
 16 |     def this_thread(self, line):
 17 |         return line.find("thread {}".format(self.thread)) != -1
 18 | 
 19 |     def parse(self, line):
 20 |         if self.this_thread(line):
 21 |             self.parse_split_time(line)
 22 |             self.parse_driver_time(line)
 23 |             self.parse_merge_time(line)
 24 | 
 25 |     def parse_split_time(self, line):
 26 |         if self.split_time is not None:
 27 |             return
 28 |         if line.find("total split time: ") != -1:
 29 |             self.split_time = float(line.split("total split time: ")[1].strip())
 30 | 
 31 |     def parse_driver_time(self, line):
 32 |         if self.driver_time is not None:
 33 |             return
 34 |         if line.find("driver time: ") != -1:
 35 |             self.driver_time = float(line.split("driver time: ")[1].split(' ')[0].strip())
 36 | 
 37 |     def parse_merge_time(self, line):
 38 |         if self.merge_time is not None:
 39 |             return
 40 |         if line.find("merge time: ") != -1:
 41 |             self.merge_time = float(line.split("merge time: ")[1].strip())
 42 | 
 43 |     def __str__(self):
 44 |         return "{},{},{}".format(self.split_time, self.task_time, self.merge_time)
 45 | 
 46 | class Run(object):
 47 |     def __init__(self, threads, run_delimiter):
 48 |         self.threads = threads
 49 |         self.run_delimiter = run_delimiter
 50 |         self.unprotect = None
 51 |         self.planner = None
 52 |         self.final_merge = None
 53 |         self.thread_times = [ThreadTime(i) for i in xrange(threads)]
 54 | 
 55 |     def parse(self, line):
 56 |         self.parse_unprotect(line)
 57 |         self.parse_planner(line)
 58 |         for thread in self.thread_times:
 59 |             thread.parse(line)
 60 |         self.parse_final_merge(line)
 61 | 
 62 |     def parse_unprotect(self, line):
 63 |         if self.unprotect is not None:
 64 |             return
 65 |         if line.find("Unprotect memory: ") != -1:
 66 |             self.unprotect = float(line.split("Unprotect memory: ")[1].strip())
 67 | 
 68 |     def parse_planner(self, line):
 69 |         if self.planner is not None:
 70 |             return
 71 |         if line.find("Planner time: ") != -1:
 72 |             self.planner = float(line.split("Planner time: ")[1].strip())
 73 | 
 74 |     def parse_final_merge(self, line):
 75 |         if self.final_merge is not None:
 76 |             return
 77 |         if line.find("final merge time: ") != -1:
 78 |             self.final_merge = float(line.split("final merge time: ")[1].strip())
 79 | 
 80 |     def finished(self, line):
 81 |         if line.find(self.run_delimiter) != -1:
 82 |             return True
 83 |         else:
 84 |             return False
 85 | 
 86 |     def __str__(self):
 87 |         average_split_time = max([t.split_time for t in self.thread_times])
 88 |         average_task_time = max([t.task_time for t in self.thread_times])
 89 |         average_merge_time = max([t.merge_time for t in self.thread_times]) + self.final_merge
 90 |         return "{:.5f},{:.5f},{:.5f},{:.5f},{:.5f}".format(self.unprotect, self.planner, average_split_time, average_task_time, average_merge_time)
 91 | 
 92 | def parse(filename, threads, run_delimiter):
 93 |     current_run = Run(threads, run_delimiter)
 94 |     print "unprotect,planner,split,task,merge"
 95 |     with open(filename) as f:
 96 |         for line in f:
 97 |             if current_run.finished(line):
 98 |                 print current_run
 99 |                 current_run = Run(threads, run_delimiter)
100 |             else:
101 |                 current_run.parse(line)
102 | 
103 | # black scholes
104 | # parse("blackscholes/breakdown-results/mklcomposer.stderr", 16, "First put value")
105 | 
106 | parse("nashville/breakdown-results/composer.stderr", 16, "image size:")
107 | 


--------------------------------------------------------------------------------
/c/benchmarks/get-data.sh:
--------------------------------------------------------------------------------
1 | # Gets the data for all C experiments and puts it in the proper place so the benchmark script runs.
2 | rm -rf datasets
3 | wget https://www.spacetelescope.org/static/archives/images/publicationtiff40k/heic1502a.tif
4 | mkdir -p datasets/
5 | mv heic1502a.tif datasets/heic1502a-40k.tif 
6 | 
7 | 


--------------------------------------------------------------------------------
/c/benchmarks/gotham/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | # Libraries
 3 | LD_LIBRARY_PATH=-L../../target/release -L../../lib/ImageMagick/
 4 | 
 5 | OS=$(shell uname -s)
 6 | 
 7 | ifeq ($(OS), Darwin)
 8 | CC=gcc-7
 9 | CPP=g++-7
10 | LDFLAGS=
11 | MAGICK=$(shell pkg-config --cflags --libs MagickWand)
12 | else ifeq ($(OS), Linux)
13 | CC=gcc-5
14 | CPP=g++-5
15 | LDFLAGS=-Wl,-rpath-link -Wno-format
16 | MAGICK=$(shell pkg-config --cflags --libs MagickWand)
17 | else
18 | $(error Unsupported platform: $(OS))
19 | endif
20 | 
21 | CFLAGS=-O3 -Wall -pedantic -Wno-discarded-qualifiers
22 | INCLUDE=-I../../composer/ -I/usr/local/include/ImageMagick-7/MagickWand -I../../lib/ImageMagick
23 | EXEC=bench
24 | 
25 | .PHONY: all clean
26 | 
27 | all:
28 | 	$(CC) -fopenmp $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) gotham.c gotham_composer.c -o $(EXEC) $(MAGICK) -lcomposer -lcomposer_imagemagick -lm
29 | 
30 | clean:
31 | 	rm -rf *.o $(EXEC)
32 | 


--------------------------------------------------------------------------------
/c/benchmarks/gotham/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | tasks=( naive composer )
 6 | threads=( 1 2 4 8 16 )
 7 | runs=${1:-1}
 8 | 
 9 | for task in "${tasks[@]}"; do 
10 |   rm -f $task.stderr $task.stdout
11 |   git log | head -1 > $task.stderr
12 |   git log | head -1 > $task.stdout
13 | done
14 | 
15 | for i in {1..$runs}; do
16 |   for task in "${tasks[@]}"; do 
17 |     for nthreads in "${threads[@]}"; do 
18 |       taskset -a -c 0-19 ./bench -m $task -i ../datasets/heic1502a-40k.tif -t $nthreads >> $task.stdout 2>> $task.stderr
19 |     done
20 |   done
21 | done
22 | 


--------------------------------------------------------------------------------
/c/benchmarks/gotham/gotham.c:
--------------------------------------------------------------------------------
  1 | // Andromeda: https://www.spacetelescope.org/images/heic1502a/
  2 | //
  3 | // https://www.spacetelescope.org/static/archives/images/publicationtiff40k/heic1502a.tif
  4 | 
  5 | #include <stdlib.h>
  6 | 
  7 | #ifdef __linux__
  8 | #define _GNU_SOURCE
  9 | #endif
 10 | #include <stdio.h>
 11 | 
 12 | #include <assert.h>
 13 | #include <math.h>
 14 | #include <string.h>
 15 | #include <unistd.h>
 16 | 
 17 | #include <sys/stat.h>
 18 | #include <sys/time.h>
 19 | 
 20 | #include <MagickWand.h>
 21 | 
 22 | #include <omp.h>
 23 | #include <composer.h>
 24 | 
 25 | #include "gotham.h"
 26 | #include "gotham_composer.h"
 27 | 
 28 | typedef enum {
 29 |   UNKNOWN = 0,
 30 |   NAIVE,
 31 |   COMPOSER,
 32 | } exec_mode_t;
 33 | 
 34 | // Number of threads.
 35 | long threads = 1;
 36 | // Mode to use
 37 | exec_mode_t mode;
 38 | // input filename.
 39 | char input_filename[2048];
 40 | // TODO This should be autotuned/based on the input size.
 41 | int piece_size = 20;
 42 | // Should the result image be written to an output file?
 43 | //
 44 | // Used offline to check for correctness.
 45 | int write_out = 0;
 46 | 
 47 | MagickWand *do_gotham(MagickWand *input_wand) {
 48 |   // modulate 120,10,100
 49 |   MagickModulateImage(input_wand, HUE, SATURATION, VALUE);
 50 | 
 51 |   // colorize
 52 |   PixelWand *colorize = NewPixelWand();
 53 |   PixelWand *alpha = NewPixelWand();
 54 | 	PixelSetColor(colorize,"#222b6d");
 55 | 	PixelSetColor(alpha, "rgb(20%,20%,20%)");
 56 | 	MagickColorizeImage(input_wand, colorize, alpha);
 57 | 	MagickColorizeImage(input_wand, colorize, alpha);
 58 | 	MagickColorizeImage(input_wand, colorize, alpha);
 59 | 	MagickColorizeImage(input_wand, colorize, alpha);
 60 | 	MagickColorizeImage(input_wand, colorize, alpha);
 61 | 
 62 |   // gamma 0.5
 63 |   MagickGammaImage(input_wand, GAMMA);
 64 | 
 65 |   // contrast
 66 |   MagickContrastImage(input_wand, 1);
 67 |   // contrast
 68 |   MagickContrastImage(input_wand, 1);
 69 | 
 70 |   DestroyPixelWand(alpha);
 71 |   DestroyPixelWand(colorize);
 72 | }
 73 | 
 74 | MagickWand *gotham_simple(MagickWand *input_wand) {
 75 |   do_gotham(input_wand);
 76 | 
 77 |   return input_wand;
 78 | }
 79 | 
 80 | exec_mode_t get_mode(char *s) {
 81 |   if (strcmp("naive", s) == 0) {
 82 |     return NAIVE;
 83 |   } else if (strcmp("composer", s) == 0) {
 84 |     return COMPOSER;
 85 |   } else {
 86 |     return UNKNOWN;
 87 |   }
 88 | }
 89 | 
 90 | void print_usage(char **argv) {
 91 |   fprintf(stderr, "%s -i <filename> -m <mode> [-t <threads> -h -o <enables writing out result>]\n", argv[0]);
 92 |   fprintf(stderr, "Available modes:\n");
 93 |   fprintf(stderr, "\tnaive\n"
 94 |                   "\tcomposer\n"
 95 |                   );
 96 | }
 97 | 
 98 | int power_of_two(long x) {
 99 |   return x && !(x & (x - 1));
100 | }
101 | 
102 | void parse_args(int argc, char **argv) {
103 |   int opt;
104 |   while ((opt = getopt(argc, argv, "i:m:t:h:o")) != -1) {
105 |     switch (opt) {
106 |       case 'i':
107 |         sprintf(input_filename, "%s", optarg);
108 |         break;
109 |       case 'm':
110 |         mode = get_mode(optarg);
111 |         if (mode == UNKNOWN) {
112 |           print_usage(argv);
113 |           exit(EXIT_FAILURE);
114 |         }
115 |         break;
116 |       case 't':
117 |         threads = atol(optarg);
118 |         if (!power_of_two(threads) || threads > 40) {
119 |           fprintf(stderr, "threads must be power-of-2 and < 16\n");
120 |           exit(EXIT_FAILURE);
121 |         }
122 |         break;
123 |       case 'o':
124 |         write_out = 1;
125 |         break;
126 |       case 'h':
127 |       default:
128 |         print_usage(argv);
129 |         exit(EXIT_FAILURE);
130 |     }
131 |   }
132 | }
133 | 
134 | int main(int argc,char **argv) {
135 | 
136 |   parse_args(argc, argv);
137 |   if (mode == UNKNOWN || strlen(input_filename) == 0) {
138 |     print_usage(argv);
139 |     exit(EXIT_FAILURE);
140 |   }
141 | 
142 |   struct stat s;
143 |   if (stat(input_filename, &s) == -1) {
144 |     perror("Input file error");
145 |     exit(EXIT_FAILURE);
146 |   }
147 | 
148 |   // Need to call this before any of the other library functions.
149 |   if (mode == COMPOSER) {
150 |     composer_init(threads, piece_size);
151 |     omp_set_num_threads(1);
152 |   } else {
153 |     omp_set_num_threads(threads);
154 |   }
155 | 
156 |   printf("Input file: %s (%ld bytes) Piece Size: %d Threads: %ld Mode: %d\n",
157 |       input_filename, s.st_size, piece_size, threads, mode);
158 | 
159 |   MagickWandGenesis();
160 | 
161 |   MagickWand *wand = NewMagickWand();
162 | 
163 |   printf("Reading image...");
164 |   fflush(stdout);
165 |   MagickReadImage(wand, input_filename);
166 |   printf("done.\n");
167 |   fflush(stdout);
168 | 
169 |   struct timeval start, end, diff;
170 |   gettimeofday(&start, NULL);
171 | 
172 |   // Run function
173 |   switch (mode) {
174 |     case NAIVE:
175 |       wand = gotham_simple(wand);
176 |       break;
177 |     case COMPOSER:
178 |       wand = gotham_composer(wand);
179 |       break;
180 |     case UNKNOWN:
181 |     default:
182 |       fprintf(stderr, "unsupported case");
183 |       exit(EXIT_FAILURE);
184 |   }
185 |   gettimeofday(&end, NULL);
186 | 
187 |   timersub(&end, &start, &diff);
188 |   double runtime = (double)diff.tv_sec + ((double)diff.tv_usec / 1000000.0);
189 | 
190 |   printf("%f seconds\n", runtime);
191 |   fflush(stderr);
192 | 
193 |   if (write_out) {
194 |     printf("Writing image...");
195 |     fflush(stdout);
196 |     char output[256];
197 |     sprintf(output, "output-%d.jpg", mode);
198 |     MagickWriteImage(wand, output);
199 |     printf("done.\n");
200 |     fflush(stdout);
201 |   }
202 | 
203 |   wand = DestroyMagickWand(wand);
204 |   MagickWandTerminus();
205 | }
206 | 


--------------------------------------------------------------------------------
/c/benchmarks/gotham/gotham.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NASHVILLE_H_
 2 | #define _NASHVILLE_H_
 3 | 
 4 | #define GAMMA       (0.5)
 5 | #define HUE         (120)
 6 | #define SATURATION   (10)
 7 | #define VALUE       (100)
 8 | 
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/c/benchmarks/gotham/gotham_composer.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | 
 3 | #ifdef __linux__
 4 | #define _GNU_SOURCE
 5 | #endif
 6 | #include <stdio.h>
 7 | 
 8 | #include <assert.h>
 9 | #include <math.h>
10 | #include <string.h>
11 | #include <unistd.h>
12 | 
13 | #include <sys/stat.h>
14 | #include <sys/time.h>
15 | 
16 | #include <MagickWand.h>
17 | 
18 | #include <omp.h>
19 | 
20 | #include "gotham_composer.h"
21 | #include "gotham.h"
22 | 
23 | #include <splitters.h>
24 | #include <composer.h>
25 | #include <generated/generated.h>
26 | 
27 | MagickWand *c_do_gotham(MagickWand *input_wand) {
28 |   // modulate 120,10,100
29 |   c_MagickModulateImage(input_wand, HUE, SATURATION, VALUE);
30 | 
31 |   // colorize
32 |   PixelWand *colorize = c_NewPixelWand();
33 |   PixelWand *alpha = c_NewPixelWand();
34 | 	c_PixelSetColor(colorize,"#222b6d");
35 | 	c_PixelSetColor(alpha, "rgb(20%,20%,20%)");
36 | 	c_MagickColorizeImage(input_wand, colorize, alpha);
37 | 	c_MagickColorizeImage(input_wand, colorize, alpha);
38 | 	c_MagickColorizeImage(input_wand, colorize, alpha);
39 | 	c_MagickColorizeImage(input_wand, colorize, alpha);
40 | 	c_MagickColorizeImage(input_wand, colorize, alpha);
41 | 
42 |   // gamma 0.5
43 |   c_MagickGammaImage(input_wand, GAMMA);
44 | 
45 |   c_DestroyPixelWand(alpha);
46 |   c_DestroyPixelWand(colorize);
47 | 
48 |   // contrast
49 |   c_MagickContrastImage(input_wand, 1);
50 |   // contrast
51 |   c_MagickContrastImage(input_wand, 1);
52 | }
53 | 
54 | MagickWand *gotham_composer(MagickWand *input_wand) {
55 | 
56 |   c_do_gotham(input_wand);
57 | 
58 |   // TODO we can do this automatically by adding a "mut"
59 |   composer_emit(&input_wand, sizeof(input_wand), (intptr_t)WandSplit_merge);
60 |   composer_execute();
61 | 
62 |   return input_wand;
63 | }
64 | 


--------------------------------------------------------------------------------
/c/benchmarks/gotham/gotham_composer.h:
--------------------------------------------------------------------------------
1 | #ifndef _NASHVILLE_COMPOSER_H_
2 | #define _NASHVILLE_COMPOSER_H_
3 | 
4 | #include <MagickWand.h>
5 | 
6 | MagickWand *gotham_composer(MagickWand *input_wand);
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/c/benchmarks/haversine/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | # Libraries
 3 | LD_LIBRARY_PATH=-L../../target/release -L../../lib/composer_mkl
 4 | 
 5 | OS=$(shell uname -s)
 6 | 
 7 | ifeq ($(OS), Darwin)
 8 | CC=gcc-7
 9 | CPP=g++-7
10 | LDFLAGS=
11 | INTEL=-DMKL_ILP64 -m64 -I${MKLROOT}/include -L${MKLROOT}/lib -Wl,-rpath,${MKLROOT}/lib -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl
12 | else ifeq ($(OS), Linux)
13 | CC=gcc-5
14 | CPP=g++-5
15 | LDFLAGS=-Wl,-rpath-link -Wno-format
16 | INTEL= -DMKL_ILP64 -m64 -I${MKLROOT}/include -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl
17 | else
18 | $(error Unsupported platform: $(OS))
19 | endif
20 | 
21 | CFLAGS=-O3 -Wall -pedantic
22 | INCLUDE=-I../../composer/ -I../../lib/composer_mkl
23 | EXEC=bench
24 | 
25 | .PHONY: all clean
26 | 
27 | all:
28 | 	$(CC) -fopenmp $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) haversine.c -o $(EXEC) -lcomposer -lcomposer_mkl -lm $(INTEL)
29 | 
30 | 
31 | clean:
32 | 	rm -rf *.o $(EXEC) $(VLIB)
33 | 


--------------------------------------------------------------------------------
/c/benchmarks/haversine/benchmark-pieces.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | tasks=( mklcomposer )
 6 | pieces=( 256 512 1024 2048 4096 8192 16384 )
 7 | 
 8 | for task in "${tasks[@]}"; do
 9 |   rm -f $task.stdout $task.stderr
10 |   git log | head -1 > $task.stderr
11 |   git log | head -1 > $task.stdout
12 | done
13 | 
14 | for i in {1..5}; do
15 |   for task in "${tasks[@]}"; do
16 |     for npieces in "${pieces[@]}"; do
17 |       ./bench -m $task -s 30 -p $npieces -t 16 >> $task.stdout 2>> $task.stderr
18 |     done
19 |   done
20 | done
21 | 


--------------------------------------------------------------------------------
/c/benchmarks/haversine/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | tasks=( mkl mklcomposer )
 6 | threads=( 1 2 4 8 16 )
 7 | runs=${1:-1}
 8 | size=30
 9 | 
10 | for task in "${tasks[@]}"; do 
11 |   rm -f $task.stdout $task.stderr
12 |   git log | head -1 > $task.stderr
13 |   git log | head -1 > $task.stdout
14 | done
15 | 
16 | for i in {1..$runs}; do
17 |   for nthreads in "${threads[@]}"; do 
18 |    export OMP_NUM_THREADS=$nthreads
19 |    ./bench -m mkl -s $size -t $nthreads >> mkl.stdout 2>> mkl.stderr
20 |   done
21 | done
22 | 
23 | # Set the number of threads in the environment variable to 1, to prevent
24 | # the MKL functions from launching N threads per task.
25 | export OMP_NUM_THREADS=1
26 | for i in {1..$runs}; do
27 |   for nthreads in "${threads[@]}"; do 
28 |    ./bench -m mklcomposer -s $size -t $nthreads >> mklcomposer.stdout 2>> mklcomposer.stderr
29 |   done
30 | done
31 | 


--------------------------------------------------------------------------------
/c/benchmarks/nashville/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | # Libraries
 3 | LD_LIBRARY_PATH=-L../../target/release -L../../lib/ImageMagick/
 4 | 
 5 | OS=$(shell uname -s)
 6 | 
 7 | ifeq ($(OS), Darwin)
 8 | CC=gcc-7
 9 | CPP=g++-7
10 | LDFLAGS=
11 | MAGICK=$(shell pkg-config --cflags --libs MagickWand)
12 | else ifeq ($(OS), Linux)
13 | CC=gcc-5
14 | CPP=g++-5
15 | LDFLAGS=-Wl,-rpath-link -Wno-format
16 | MAGICK=$(shell pkg-config --cflags --libs MagickWand)
17 | else
18 | $(error Unsupported platform: $(OS))
19 | endif
20 | 
21 | CFLAGS=-O3 -Wall -pedantic -Wno-discarded-qualifiers
22 | INCLUDE=-I../../composer/ -I/usr/local/include/ImageMagick-7/MagickWand -I../../lib/ImageMagick
23 | EXEC=bench
24 | 
25 | .PHONY: all clean
26 | 
27 | all:
28 | 	$(CC) -fopenmp $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) nashville.c nashville_parallel.c nashville_composer.c -o $(EXEC) $(MAGICK) -lcomposer -lcomposer_imagemagick -lm
29 | 
30 | clean:
31 | 	rm -rf *.o $(EXEC)
32 | 


--------------------------------------------------------------------------------
/c/benchmarks/nashville/benchmark-breakdown.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | tasks=( composer )
 6 | threads=( 16 )
 7 | 
 8 | for task in "${tasks[@]}"; do 
 9 |   rm -f $task.stderr $task.stdout
10 |   git log | head -1 > $task.stderr
11 |   git log | head -1 > $task.stdout
12 | done
13 | 
14 | export RUST_LOG=info
15 | 
16 | for i in {1..5}; do
17 |   for task in "${tasks[@]}"; do 
18 |     for nthreads in "${threads[@]}"; do 
19 |       taskset 0xffff ./bench -m $task -p 4096 -i ~/heic1502a-40k.tif -t $nthreads >> $task.stdout 2>> $task.stderr
20 |     done
21 |   done
22 | done
23 | 


--------------------------------------------------------------------------------
/c/benchmarks/nashville/benchmark-pieces.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | tasks=( composer )
 6 | pieces=( 32 256 512 1024 2048 4096 8192 16384 )
 7 | 
 8 | for task in "${tasks[@]}"; do
 9 |   rm -f $task.stdout $task.stderr
10 |   git log | head -1 > $task.stderr
11 |   git log | head -1 > $task.stdout
12 | done
13 | 
14 | for i in {1..5}; do
15 |   for task in "${tasks[@]}"; do
16 |     for npieces in "${pieces[@]}"; do
17 |       RUST_LOG=info ./bench -m $task -i ~/heic1502a-40k.tif -p $npieces -t 8 >> $task.stdout 2>> $task.stderr
18 |     done
19 |   done
20 | done
21 | 


--------------------------------------------------------------------------------
/c/benchmarks/nashville/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | tasks=( naive composer )
 6 | threads=( 1 2 4 8 16 )
 7 | runs=${1:-1}
 8 | 
 9 | for task in "${tasks[@]}"; do 
10 |   rm -f $task.stderr $task.stdout
11 |   git log | head -1 > $task.stderr
12 |   git log | head -1 > $task.stdout
13 | done
14 | 
15 | for i in {1..$runs}; do
16 |   for task in "${tasks[@]}"; do 
17 |     for nthreads in "${threads[@]}"; do 
18 |       /usr/bin/time ./bench -m $task -i ../datasets/heic1502a-40k.tif -t $nthreads >> $task.stdout 2>> $task.stderr
19 |     done
20 |   done
21 | done
22 | 


--------------------------------------------------------------------------------
/c/benchmarks/nashville/nashville.c:
--------------------------------------------------------------------------------
  1 | // Andromeda: https://www.spacetelescope.org/images/heic1502a/
  2 | //
  3 | // https://www.spacetelescope.org/static/archives/images/publicationtiff40k/heic1502a.tif
  4 | 
  5 | #include <stdlib.h>
  6 | 
  7 | #ifdef __linux__
  8 | #define _GNU_SOURCE
  9 | #endif
 10 | #include <stdio.h>
 11 | 
 12 | #include <assert.h>
 13 | #include <math.h>
 14 | #include <string.h>
 15 | #include <unistd.h>
 16 | 
 17 | #include <sys/stat.h>
 18 | #include <sys/time.h>
 19 | 
 20 | #include <MagickWand.h>
 21 | 
 22 | #include <omp.h>
 23 | #include <composer.h>
 24 | 
 25 | #include "nashville.h"
 26 | #include "nashville_parallel.h"
 27 | #include "nashville_composer.h"
 28 | 
 29 | typedef enum {
 30 |   UNKNOWN = 0,
 31 |   NAIVE,
 32 |   COMPOSER,
 33 |   PARALLEL
 34 | } exec_mode_t;
 35 | 
 36 | // Number of threads.
 37 | long threads = 1;
 38 | // Mode to use
 39 | exec_mode_t mode;
 40 | // input filename.
 41 | char input_filename[2048];
 42 | // TODO This should be autotuned/based on the input size.
 43 | int pieces = 20;
 44 | // Should the result be written to a file?
 45 | int write_out = 0;
 46 | 
 47 | // Actual work for the colortone function.
 48 | void do_colortone(MagickWand *wand,
 49 |     const char *color,
 50 |     const char *compose_opt,
 51 |     int negate,
 52 |     MagickWand *colorized_wand,
 53 |     MagickWand *colorspace_wand) {
 54 | 
 55 |     // Colorize image. 
 56 |     PixelWand *colorize = NewPixelWand();
 57 |     PixelWand *alpha = NewPixelWand();
 58 |     PixelSetColor(colorize, color);
 59 |     PixelSetColor(alpha, "#fff");
 60 |     MagickColorizeImage(colorized_wand, colorize, alpha);
 61 | 
 62 |     // Convert to grayspace.
 63 |     MagickSetImageColorspace(colorspace_wand, GRAYColorspace);
 64 |     if (negate) {
 65 |       MagickNegateImage(colorspace_wand, 1);
 66 |     }
 67 | 
 68 |     MagickSetImageArtifact(wand, "compose:args", compose_opt);
 69 |     MagickCompositeImage(wand, colorspace_wand, BlendCompositeOp, 1, 0, 0);
 70 |     MagickCompositeImage(wand, colorized_wand, BlendCompositeOp, 1, 0, 0);
 71 | 
 72 |     // Cleanup.
 73 |     colorize = DestroyPixelWand(colorize);
 74 |     alpha = DestroyPixelWand(alpha);
 75 | 
 76 | }
 77 | 
 78 | MagickWand *colortone_simple(MagickWand *input_wand,
 79 |     const char *color,
 80 |     const char *compose_opt,
 81 |     int negate) {
 82 | 
 83 |   MagickWand *wand = CloneMagickWand(input_wand);
 84 |   MagickWand *colorized_wand = CloneMagickWand(wand);
 85 |   MagickWand *colorspace_wand = CloneMagickWand(wand);
 86 | 
 87 |   do_colortone(wand, color, compose_opt, negate, colorized_wand, colorspace_wand);
 88 |   do_colortone(wand, color, compose_opt, negate, colorized_wand, colorspace_wand);
 89 |   MagickModulateImage(wand, HUE, SATURATION, VALUE);
 90 |   MagickGammaImage(wand, GAMMA);
 91 | 
 92 |   colorized_wand = DestroyMagickWand(colorized_wand);
 93 |   colorspace_wand = DestroyMagickWand(colorspace_wand);
 94 | 
 95 |   return wand;
 96 | }
 97 | 
 98 | exec_mode_t get_mode(char *s) {
 99 |   if (strcmp("naive", s) == 0) {
100 |     return NAIVE;
101 |   } else if (strcmp("composer", s) == 0) {
102 |     return COMPOSER;
103 |   } else if (strcmp("parallel", s) == 0) {
104 |     return PARALLEL;
105 |   } else {
106 |     return UNKNOWN;
107 |   }
108 | }
109 | 
110 | void print_usage(char **argv) {
111 |   fprintf(stderr, "%s -i <filename> -m <mode> [-t <threads> -h]\n", argv[0]);
112 |   fprintf(stderr, "Available modes:\n");
113 |   fprintf(stderr, "\tnaive\n"
114 |                   "\tcomposer\n"
115 |                   );
116 | }
117 | 
118 | int power_of_two(long x) {
119 |   return x && !(x & (x - 1));
120 | }
121 | 
122 | void parse_args(int argc, char **argv) {
123 |   int opt;
124 |   while ((opt = getopt(argc, argv, "i:m:p:t:h:o")) != -1) {
125 |     switch (opt) {
126 |       case 'i':
127 |         sprintf(input_filename, "%s", optarg);
128 |         break;
129 |       case 'm':
130 |         mode = get_mode(optarg);
131 |         if (mode == UNKNOWN) {
132 |           print_usage(argv);
133 |           exit(EXIT_FAILURE);
134 |         }
135 |         break;
136 |       case 't':
137 |         threads = atol(optarg);
138 |         break;
139 |       case 'o':
140 |         write_out = 1;
141 |         break;
142 |       case 'p':
143 |         pieces = atol(optarg);
144 |         if (pieces < 0) {
145 |           fprintf(stderr, "pieces must be > 0\n");
146 |           exit(EXIT_FAILURE);
147 |         }
148 |         break;
149 |       case 'h':
150 |       default:
151 |         print_usage(argv);
152 |         exit(EXIT_FAILURE);
153 |     }
154 |   }
155 | }
156 | 
157 | int main(int argc,char **argv) {
158 | 
159 |   parse_args(argc, argv);
160 |   if (mode == UNKNOWN || strlen(input_filename) == 0) {
161 |     print_usage(argv);
162 |     exit(EXIT_FAILURE);
163 |   }
164 | 
165 |   struct stat s;
166 |   if (stat(input_filename, &s) == -1) {
167 |     perror("Input file error");
168 |     exit(EXIT_FAILURE);
169 |   }
170 | 
171 |   // Need to call this before any of the other library functions.
172 |   if (mode == COMPOSER) {
173 |     omp_set_num_threads(threads);
174 |     composer_init(threads, pieces);
175 |   } else {
176 |     omp_set_num_threads(threads);
177 |   }
178 | 
179 |   printf("Input file: %s (%ld bytes) Piece Size: %d Threads: %ld Mode: %d\n",
180 |       input_filename, s.st_size, pieces, threads, mode);
181 | 
182 |   MagickWandGenesis();
183 | 
184 |   MagickWand *wand = NewMagickWand();
185 | 
186 |   printf("Reading image...");
187 |   fflush(stdout);
188 |   MagickReadImage(wand, input_filename);
189 |   printf("done.\n");
190 |   fflush(stdout);
191 | 
192 |   MagickWand *result;
193 | 
194 |   struct timeval start, end, diff;
195 |   gettimeofday(&start, NULL);
196 | 
197 |   // Run function
198 |   switch (mode) {
199 |     case NAIVE:
200 |       result = colortone_simple(wand, "#222b6d", "50,50", 1);
201 |       break;
202 |     case PARALLEL:
203 |       result = colortone_parallel(wand, "#222b6d", "50,50", 1, threads);
204 |       break;
205 |     case COMPOSER:
206 |       result = colortone_composer(wand, "#222b6d", "50,50", 1);
207 |       break;
208 |     case UNKNOWN:
209 |     default:
210 |       fprintf(stderr, "unsupported case");
211 |       exit(EXIT_FAILURE);
212 |   }
213 |   gettimeofday(&end, NULL);
214 | 
215 |   timersub(&end, &start, &diff);
216 |   double runtime = (double)diff.tv_sec + ((double)diff.tv_usec / 1000000.0);
217 | 
218 |   printf("%f seconds\n", runtime);
219 |   fflush(stderr);
220 | 
221 |   if (write_out) {
222 |     printf("Writing image...");
223 |     fflush(stdout);
224 |     char output[256];
225 |     sprintf(output, "output-%d.jpg", mode);
226 |     // MagickWriteImage(result, output);
227 |     printf("done.\n");
228 |     fflush(stdout);
229 |   }
230 | 
231 |   wand = DestroyMagickWand(wand);
232 |   result = DestroyMagickWand(result);
233 |   MagickWandTerminus();
234 | }
235 | 


--------------------------------------------------------------------------------
/c/benchmarks/nashville/nashville.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NASHVILLE_H_
 2 | #define _NASHVILLE_H_
 3 | 
 4 | #define GAMMA       (0.7)
 5 | #define HUE         (100)
 6 | #define SATURATION   (150)
 7 | #define VALUE       (100)
 8 | 
 9 | // Actual work for the colortone function, without Composer.
10 | void do_colortone(MagickWand *wand,
11 |     const char *color,
12 |     const char *compose_opt,
13 |     int negate,
14 |     MagickWand *colorized_wand,
15 |     MagickWand *colorspace_wand);
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/c/benchmarks/nashville/nashville_composer.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | 
 3 | #ifdef __linux__
 4 | #define _GNU_SOURCE
 5 | #endif
 6 | #include <stdio.h>
 7 | 
 8 | #include <assert.h>
 9 | #include <math.h>
10 | #include <string.h>
11 | #include <unistd.h>
12 | 
13 | #include <sys/stat.h>
14 | #include <sys/time.h>
15 | 
16 | #include <MagickWand.h>
17 | 
18 | #include <omp.h>
19 | 
20 | #include "nashville_composer.h"
21 | #include "nashville.h"
22 | 
23 | #include <splitters.h>
24 | #include <composer.h>
25 | #include <generated/generated.h>
26 | 
27 | // Actual work for the colortone function.
28 | void c_do_colortone(MagickWand *wand,
29 |     const char *color,
30 |     const char *compose_opt,
31 |     int negate,
32 |     MagickWand *colorized_wand,
33 |     MagickWand *colorspace_wand) {
34 | 
35 |     // Colorize image. 
36 |     PixelWand *colorize = c_NewPixelWand();
37 |     PixelWand *alpha = c_NewPixelWand();
38 |     c_PixelSetColor(colorize, color);
39 |     c_PixelSetColor(alpha, "#fff");
40 |     c_MagickColorizeImage(colorized_wand, colorize, alpha);
41 | 
42 |     // Convert to grayspace.
43 |     c_MagickSetImageColorspace(colorspace_wand, GRAYColorspace);
44 |     if (negate) {
45 |       c_MagickNegateImage(colorspace_wand, 1);
46 |     }
47 | 
48 |     c_MagickSetImageArtifact(wand, "compose:args", compose_opt);
49 |     c_MagickCompositeImage(wand, colorspace_wand, BlendCompositeOp, 1, 0, 0);
50 |     c_MagickCompositeImage(wand, colorized_wand, BlendCompositeOp, 1, 0, 0);
51 | 
52 |     // Cleanup.
53 |     colorize = c_DestroyPixelWand(colorize);
54 |     alpha = c_DestroyPixelWand(alpha);
55 | }
56 | 
57 | MagickWand *colortone_composer(MagickWand *input_wand,
58 |     const char *color,
59 |     const char *compose_opt,
60 |     int negate) {
61 | 
62 |   MagickWand *wand = c_CloneMagickWand(input_wand);
63 |   MagickWand *colorized_wand = c_CloneMagickWand(wand);
64 |   MagickWand *colorspace_wand = c_CloneMagickWand(wand);
65 | 
66 |   c_do_colortone(wand, color, compose_opt, negate, colorized_wand, colorspace_wand);
67 |   c_do_colortone(wand, color, compose_opt, negate, colorized_wand, colorspace_wand);
68 |   
69 |   // WriteImage (wand, colorized_wand, colorspace_wand, before execution: need
70 |   // the output split type of each variable. If the output split type is
71 |   // broadcast, we can just return one of the partitions instead of the result
72 |   // of a merge. TODO Justify the logic behind doing this (or why it makes
73 |   // sense to define API in this way).
74 | 
75 |   c_MagickModulateImage(wand, HUE, SATURATION, VALUE);
76 |   c_MagickGammaImage(wand, GAMMA);
77 | 
78 |   colorized_wand = c_DestroyMagickWand(colorized_wand);
79 |   colorspace_wand = c_DestroyMagickWand(colorspace_wand);
80 | 
81 |   // TODO we can do this automatically by adding a "mut"
82 |   composer_emit(&wand, sizeof(wand), (intptr_t)WandSplit_merge);
83 | 
84 |   composer_execute();
85 |   return wand;
86 | }
87 | 


--------------------------------------------------------------------------------
/c/benchmarks/nashville/nashville_composer.h:
--------------------------------------------------------------------------------
1 | #ifndef _NASHVILLE_COMPOSER_H_
2 | #define _NASHVILLE_COMPOSER_H_
3 | 
4 | #include <MagickWand.h>
5 | 
6 | MagickWand *colortone_composer(MagickWand *input_wand, const char *color, const char *compose_opt, int negate);
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/c/benchmarks/nashville/nashville_parallel.c:
--------------------------------------------------------------------------------
  1 | #ifdef __linux__
  2 | #define _GNU_SOURCE
  3 | #endif
  4 | #include <stdio.h>
  5 | 
  6 | #include <assert.h>
  7 | #include <math.h>
  8 | #include <string.h>
  9 | #include <unistd.h>
 10 | 
 11 | #include <sys/stat.h>
 12 | #include <sys/time.h>
 13 | 
 14 | #include <MagickWand.h>
 15 | 
 16 | #include <omp.h>
 17 | 
 18 | #include "nashville.h"
 19 | 
 20 | typedef struct piece_ {
 21 |   MagickWand *piece;
 22 |   int index;
 23 | } piece_t;
 24 | 
 25 | int compare(const void *a, const void *b) {
 26 |   const piece_t *left = (const piece_t *)a;
 27 |   const piece_t *right = (const piece_t *)b;
 28 |   return left->index - right->index;
 29 | }
 30 | 
 31 | MagickWand *aggregate_seq(piece_t *pieces, int count) {
 32 |   MagickWand *results = NewMagickWand();
 33 |   MagickResetIterator(results);
 34 | 
 35 |   for (int i = 0; i < count; i++) {
 36 |     MagickSetLastIterator(results);
 37 |     MagickAddImage(results, pieces[i].piece); 
 38 |   }
 39 | 
 40 |   MagickResetIterator(results);
 41 |   MagickWand *final = MagickAppendImages(results, 1);
 42 |   DestroyMagickWand(results);
 43 | 
 44 |   return final;
 45 | }
 46 | 
 47 | MagickWand *aggregate_par(piece_t *pieces, int count, int threads) {
 48 | 
 49 |   // Holds aggregation state.
 50 |   MagickWand **results = (MagickWand **)malloc(sizeof(MagickWand *) * threads);
 51 |   for (int i = 0; i < threads; i++) {
 52 |     results[i] = NewMagickWand();
 53 |     MagickResetIterator(results[i]);
 54 |   }
 55 | 
 56 |   int values_per_thread = count / threads;
 57 |   printf("values per piece: %d\n", values_per_thread);
 58 | 
 59 | #pragma omp parallel for
 60 |   for (int i = 0; i < threads; i++) {
 61 |     int start = i * values_per_thread;
 62 |     int end = (i + 1) * values_per_thread;
 63 | 
 64 |     if (i == threads - 1) {
 65 |       end = count;
 66 |     }
 67 | 
 68 |     MagickWand *result = results[i];
 69 | 
 70 |     // printf("thread %d: %d->%d\n", omp_get_thread_num(), start, end);
 71 |     for (int j = start; j < end; j++) {
 72 |       MagickSetLastIterator(result);
 73 |       MagickAddImage(result, pieces[j].piece); 
 74 |     }
 75 | 
 76 |     MagickResetIterator(result);
 77 |     MagickWand *final = MagickAppendImages(result, 1);
 78 | 
 79 |     result = DestroyMagickWand(result);
 80 |     results[i] = final;
 81 |   }
 82 | 
 83 |   MagickWand *final_iterator = NewMagickWand();
 84 |   MagickResetIterator(final_iterator);
 85 |   for (int i = 0; i < threads; i++) {
 86 |     MagickSetLastIterator(final_iterator);
 87 |     MagickAddImage(final_iterator, results[i]);
 88 |   }
 89 |   MagickResetIterator(final_iterator);
 90 |   MagickWand *final = MagickAppendImages(final_iterator, 1);
 91 | 
 92 |   for (int i = 0; i < threads; i++) {
 93 |     DestroyMagickWand(results[i]);
 94 |   }
 95 |   free(results);
 96 | 
 97 |   return final;
 98 | }
 99 | 
100 | MagickWand *colortone_parallel(MagickWand *input_wand, const char *color, const char *compose_opt, int negate, int threads) {
101 |   size_t width = MagickGetImageWidth(input_wand);
102 |   size_t height = MagickGetImageHeight(input_wand);
103 | 
104 |   printf("Image is (%ld x %ld) pixels\n", width, height);
105 | 
106 |   // We want each chunk to be close to the L2 cache size.
107 |   const int l2_cache_size_bytes = 262144 * 3;
108 |   // Number of rows to process per batch.
109 |   size_t region_height = l2_cache_size_bytes / width;
110 |   if (region_height == 0) {
111 |     region_height = 1;
112 |   }
113 |   region_height = 199;
114 | 
115 |   // TODO this might shave off a few things.
116 |   int num_regions = height / region_height;
117 |   printf("Regions: %d\n", num_regions);
118 | 
119 |   struct timeval start, end, diff;
120 |   gettimeofday(&start, NULL);
121 | 
122 |   piece_t *pieces = malloc(num_regions * sizeof(piece_t));
123 | 
124 |   #pragma omp parallel for
125 |   for (int i = 0; i < num_regions; i++) {
126 |     /*
127 |     printf("%d Looking at region (%ld -> %ld, %ld -> %ld)\n", i,
128 |         0l, 0l + width,
129 |         region_height * i, region_height * i + region_height);
130 |     */
131 |     MagickWand *wand = MagickGetImageRegion(input_wand, width,
132 |         region_height, 0, region_height * i);
133 | 
134 |     MagickWand *colorized_wand = CloneMagickWand(wand);
135 |     MagickWand *colorspace_wand = CloneMagickWand(wand);
136 | 
137 |     do_colortone(wand, color, compose_opt, negate, colorized_wand, colorspace_wand);
138 |     do_colortone(wand, color, compose_opt, negate, colorized_wand, colorspace_wand);
139 |     MagickModulateImage(wand, HUE, SATURATION, VALUE);
140 |     MagickGammaImage(wand, GAMMA);
141 | 
142 |     colorized_wand = DestroyMagickWand(colorized_wand);
143 |     colorspace_wand = DestroyMagickWand(colorspace_wand);
144 | 
145 |     pieces[i].index = i;
146 |     pieces[i].piece = wand;
147 |   }
148 | 
149 |   gettimeofday(&end, NULL);
150 |   timersub(&end, &start, &diff);
151 |   double runtime = (double)diff.tv_sec + ((double)diff.tv_usec / 1000000.0);
152 |   printf("Processing runtime: %.3f seconds\n", runtime);
153 |   fflush(stdout);
154 | 
155 |   gettimeofday(&start, NULL);
156 | 
157 |   // Sort pieces by their index.
158 |   qsort(pieces, num_regions, sizeof(piece_t), compare);
159 | 
160 |   gettimeofday(&end, NULL);
161 |   timersub(&end, &start, &diff);
162 |   runtime = (double)diff.tv_sec + ((double)diff.tv_usec / 1000000.0);
163 |   printf("Sort runtime: %.3f seconds\n", runtime);
164 |   fflush(stdout);
165 | 
166 |   gettimeofday(&start, NULL);
167 | 
168 |   MagickWand *final;
169 |   if (num_regions / threads > 16) {
170 |     printf("parallel aggregation\n");
171 |     final = aggregate_par(pieces, num_regions, threads); 
172 |   } else {
173 |     printf("sequential aggregation\n");
174 |     final = aggregate_seq(pieces, num_regions); 
175 |   }
176 | 
177 |   free(pieces);
178 | 
179 |   gettimeofday(&end, NULL);
180 |   timersub(&end, &start, &diff);
181 |   runtime = (double)diff.tv_sec + ((double)diff.tv_usec / 1000000.0);
182 |   printf("Total aggregation runtime: %.3f seconds\n", runtime);
183 |   fflush(stdout);
184 | 
185 |   return final;
186 | }
187 | 


--------------------------------------------------------------------------------
/c/benchmarks/nashville/nashville_parallel.h:
--------------------------------------------------------------------------------
1 | #ifndef _NASHVILLE_PARALLEL_H_
2 | #define _NASHVILLE_PARALLEL_H_
3 | 
4 | #include <MagickWand.h>
5 | 
6 | MagickWand *colortone_parallel(MagickWand *input_wand, const char *color, const char *compose_opt, int negate, int threads);
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/c/benchmarks/nbody/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | # Libraries
 3 | LD_LIBRARY_PATH=-L../../target/release -L../../lib/composer_mkl
 4 | 
 5 | OS=$(shell uname -s)
 6 | 
 7 | ifeq ($(OS), Darwin)
 8 | CC=gcc-7
 9 | LDFLAGS=
10 | INTEL= -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl
11 | else ifeq ($(OS), Linux)
12 | CC=gcc-5
13 | LDFLAGS=-Wl,-rpath-link -Wno-format
14 | INTEL= -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl
15 | else
16 | $(error Unsupported platform: $(OS))
17 | endif
18 | 
19 | CFLAGS=-O3 -Wall -pedantic
20 | INCLUDE=-I../../composer/ -I../../lib/composer_mkl
21 | EXEC=bench
22 | 
23 | .PHONY: all clean
24 | 
25 | all:
26 | 	$(CC) -fopenmp $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) nbody.c nbody_mkl.c nbody_composer.c -o $(EXEC) -lcomposer -lcomposer_mkl -lm $(INTEL)
27 | 
28 | asm:
29 | 	$(CC) $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) -fopenmp nbody.c -S
30 | 
31 | clean:
32 | 	rm -rf *.o *.s $(EXEC) $(VLIB)
33 | 


--------------------------------------------------------------------------------
/c/benchmarks/nbody/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | tasks=( mkl mklcomposer )
 6 | threads=( 1 2 4 8 16 )
 7 | runs=${1:-1}
 8 | 
 9 | for task in "${tasks[@]}"; do 
10 |   rm -f $task.stderr $task.stdout
11 |   git log | head -1 > $task.stderr
12 |   git log | head -1 > $task.stdout
13 | done
14 | 
15 | for i in {1..$runs}; do
16 |   for task in "${tasks[@]}"; do 
17 |     for nthreads in "${threads[@]}"; do 
18 |       ./bench -m $task -s 32768 -t $nthreads -i 3 -p 8192 >> $task.stdout 2>> $task.stderr
19 |     done
20 |   done
21 | done
22 | 


--------------------------------------------------------------------------------
/c/benchmarks/nbody/nbody.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NBODY_H_
 2 | #define _NBODY_H_
 3 | 
 4 | // Constants.
 5 | #define G     ((double)(6.67384e-11))
 6 | #define dt    ((double)(60 * 60 * 24 * 365.25))
 7 | #define r_ly  ((double)(9.4607e15))
 8 | #define m_sol ((double)(1.9891e30))
 9 | 
10 | typedef struct galaxy {
11 |   MKL_INT n;
12 |   double *m;
13 |   double *x;
14 |   double *y;
15 |   double *z;
16 |   double *vx;
17 |   double *vy;
18 |   double *vz;
19 | } galaxy_t;
20 | 
21 | galaxy_t inputs(long n, int lazy); 
22 | 
23 | void set_delta(MKL_INT n, const double *x, double *out); 
24 | void set_pm(MKL_INT n, const double *x, double *out);
25 | 
26 | void print_vector(int n, const double *v);
27 | void print_matrix(int n, const double *v);
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/c/benchmarks/nbody/nbody_composer.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdlib.h>
  3 | #include <stdio.h>
  4 | 
  5 | #include <composer.h>
  6 | #include <mkl.h>
  7 | #include <mkl_extensions.h>
  8 | #include <string.h>
  9 | #include <vec.h>
 10 | 
 11 | #include "nbody.h"
 12 | #include "nbody_composer.h"
 13 | 
 14 | #include <generated/generated.h>
 15 | 
 16 | /** Computes Sum(G * pm / r ** 2 * (dx / r)).
 17 |  *
 18 |  * Diagonal elements are not counted in the sum.
 19 |  *
 20 |  */
 21 | void composer_compute_force(MKL_INT n,
 22 |     double *dx, double *pm, double *r,
 23 |     double *tmp1,
 24 |     double *output,
 25 |     int first) {
 26 | 
 27 |   MKL_INT size = n * n;
 28 | 
 29 |   if (!first) {
 30 |     composer_execute();
 31 |   }
 32 | 
 33 |   c_vdMuli(size, pm, G, tmp1);
 34 |   c_vdPowx(size, r, 2.0, output);
 35 |   c_vdDiv(size, tmp1, output, tmp1);
 36 |   c_vdDiv(size, dx, r, output);
 37 |   c_vdMul(size, tmp1, output, tmp1);
 38 | 
 39 |   memset(output, 0, sizeof(double) * n);
 40 | 
 41 |   #pragma omp parallel for
 42 |   for (MKL_INT i = 0; i < n; i++) {
 43 |     double sum = 0.0;
 44 |     for (MKL_INT j = 0; j < n; j++) {
 45 |       // Ignore diagonal elements.
 46 |       if (i != j) {
 47 |         sum += tmp1[i*n + j];
 48 |       }
 49 |     }
 50 |     output[i] += sum;
 51 |   }
 52 | }
 53 | 
 54 | void composer_move(MKL_INT n,
 55 |     double *m, double *x, double *y, double *z, double *vx, double *vy, double *vz,
 56 |     // Temporaries that have n * n space.
 57 |     double *dx, double *dy, double *dz, double *pm, double *r, double *tmp1, double *tmp2) {
 58 | 
 59 |   set_delta(n, x, dx);
 60 |   set_delta(n, y, dy);
 61 |   set_delta(n, z, dz);
 62 |   set_pm(n, m, pm);
 63 | 
 64 |   MKL_INT size = n * n;
 65 | 
 66 |   // r = sqrt(dx**2 + dy**2 + dz**2)
 67 |   c_vdPowx(size, dx, 2.0, tmp1);
 68 |   c_vdPowx(size, dy, 2.0, tmp2);
 69 |   c_vdAdd(size, tmp1, tmp2, tmp1);
 70 |   c_vdPowx(size, dz, 2.0, tmp2);
 71 |   c_vdAdd(size, tmp1, tmp2, tmp1);
 72 |   c_vdSqrt(size, tmp1, r);
 73 | 
 74 |   composer_compute_force(n, dx, pm, r, tmp1, tmp2, 1);
 75 |   c_vdDiv(n, tmp2, m, tmp1);
 76 |   c_vdMuli(n, tmp1, dt, tmp1);
 77 |   c_vdAdd(n, vx, tmp1, vx);
 78 | 
 79 |   c_vdMuli(n, vx, dt, tmp1);
 80 |   c_vdAdd(n, x, tmp1, x);
 81 | 
 82 |   composer_compute_force(n, dy, pm, r, tmp1, tmp2, 0);
 83 |   c_vdDiv(n, tmp2, m, tmp1);
 84 |   c_vdMuli(n, tmp1, dt, tmp1);
 85 |   c_vdAdd(n, vy, tmp1, vy);
 86 | 
 87 |   c_vdMuli(n, vy, dt, tmp1);
 88 |   c_vdAdd(n, y, tmp1, y);
 89 | 
 90 |   composer_compute_force(n, dz, pm, r, tmp1, tmp2, 0);
 91 |   c_vdDiv(n, tmp2, m, tmp1);
 92 |   c_vdMuli(n, tmp1, dt, tmp1);
 93 |   c_vdAdd(n, vz, tmp1, vz);
 94 | 
 95 |   c_vdMuli(n, vz, dt, tmp1);
 96 |   c_vdAdd(n, z, tmp1, z);
 97 | }
 98 | 
 99 | void run_mkl_composer(int iterations, MKL_INT n,
100 |     double *m,
101 |     double *x, double *y, double *z, double *vx, double *vy, double *vz) {
102 | 
103 |   vec_t dx = new_vec(n * n, 0);
104 |   vec_t dy = new_vec(n * n, 0);
105 |   vec_t dz = new_vec(n * n, 0);
106 |   vec_t pm = new_vec(n * n, 0);
107 |   vec_t r = new_vec(n * n, 0);
108 |   vec_t tmp1 = new_vec(n * n, 0);
109 |   vec_t tmp2 = new_vec(n * n, 0);
110 | 
111 |   for (int i = 0; i < iterations; i++) {
112 |     printf("iteration %d\n", i);
113 |     composer_move(n, m, x, y, z, vx, vy, vz,
114 |         dx.data, dy.data, dz.data, pm.data, r.data, tmp1.data, tmp2.data);
115 |   }
116 | }
117 | 
118 | 


--------------------------------------------------------------------------------
/c/benchmarks/nbody/nbody_composer.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NBODY_COMPOSER_H_
 2 | #define _NBODY_COMPOSER_H_
 3 | 
 4 | #include <mkl.h>
 5 | 
 6 | void run_mkl_composer(int iterations, MKL_INT n,
 7 |     double *m,
 8 |     double *x, double *y, double *z, double *vx, double *vy, double *vz);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/c/benchmarks/nbody/nbody_mkl.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdlib.h>
  3 | #include <stdio.h>
  4 | 
  5 | #include <mkl.h>
  6 | #include <mkl_extensions.h>
  7 | #include <string.h>
  8 | #include <vec.h>
  9 | 
 10 | #include "nbody.h"
 11 | #include "nbody_mkl.h"
 12 | 
 13 | /** Computes Sum(G * pm / r ** 2 * (dx / r)).
 14 |  *
 15 |  * Diagonal elements are not counted in the sum.
 16 |  *
 17 |  */
 18 | void compute_force(MKL_INT n,
 19 |     double *dx, double *pm, double *r,
 20 |     double *tmp1,
 21 |     double *output) {
 22 | 
 23 |   MKL_INT size = n * n;
 24 | 
 25 |   vdMuli(size, pm, G, tmp1);
 26 |   vdPowx(size, r, 2.0, output);
 27 |   vdDiv(size, tmp1, output, tmp1);
 28 |   vdDiv(size, dx, r, output);
 29 |   vdMul(size, tmp1, output, tmp1);
 30 | 
 31 |   memset(output, 0, sizeof(double) * n);
 32 | 
 33 |   #pragma omp parallel for
 34 |   for (MKL_INT i = 0; i < n; i++) {
 35 |     double sum = 0.0;
 36 |     for (MKL_INT j = 0; j < n; j++) {
 37 |       // Ignore diagonal elements.
 38 |       if (i != j) {
 39 |         // Causes some imprecision compared to reference?
 40 |         sum += tmp1[i*n + j];
 41 |       }
 42 |     }
 43 |     output[i] += sum;
 44 |   }
 45 | }
 46 | 
 47 | void move(MKL_INT n,
 48 |     double *m, double *x, double *y, double *z, double *vx, double *vy, double *vz,
 49 |     // Temporaries that have n * n space.
 50 |     double *dx, double *dy, double *dz, double *pm, double *r, double *tmp1, double *tmp2) {
 51 | 
 52 |   set_delta(n, x, dx);
 53 |   set_delta(n, y, dy);
 54 |   set_delta(n, z, dz);
 55 |   set_pm(n, m, pm);
 56 | 
 57 |   MKL_INT size = n * n;
 58 | 
 59 |   // r = sqrt(dx**2 + dy**2 + dz**2)
 60 |   vdPowx(size, dx, 2.0, tmp1);
 61 |   vdPowx(size, dy, 2.0, tmp2);
 62 |   vdAdd(size, tmp1, tmp2, tmp1);
 63 |   vdPowx(size, dz, 2.0, tmp2);
 64 |   vdAdd(size, tmp1, tmp2, tmp1);
 65 |   vdSqrt(size, tmp1, r);
 66 | 
 67 |   compute_force(n, dx, pm, r, tmp1, tmp2);
 68 |   vdDiv(n, tmp2, m, tmp1);
 69 |   vdMuli(n, tmp1, dt, tmp1);
 70 |   vdAdd(n, vx, tmp1, vx);
 71 | 
 72 |   vdMuli(n, vx, dt, tmp1);
 73 |   vdAdd(n, x, tmp1, x);
 74 | 
 75 |   compute_force(n, dy, pm, r, tmp1, tmp2);
 76 |   vdDiv(n, tmp2, m, tmp1);
 77 |   vdMuli(n, tmp1, dt, tmp1);
 78 |   vdAdd(n, vy, tmp1, vy);
 79 | 
 80 |   vdMuli(n, vy, dt, tmp1);
 81 |   vdAdd(n, y, tmp1, y);
 82 | 
 83 |   compute_force(n, dz, pm, r, tmp1, tmp2);
 84 |   vdDiv(n, tmp2, m, tmp1);
 85 |   vdMuli(n, tmp1, dt, tmp1);
 86 |   vdAdd(n, vz, tmp1, vz);
 87 | 
 88 |   vdMuli(n, vz, dt, tmp1);
 89 |   vdAdd(n, z, tmp1, z);
 90 | }
 91 | 
 92 | void run_mkl(int iterations, MKL_INT n,
 93 |     double *m,
 94 |     double *x, double *y, double *z, double *vx, double *vy, double *vz) {
 95 | 
 96 |   vec_t dx = new_vec(n * n, 0);
 97 |   vec_t dy = new_vec(n * n, 0);
 98 |   vec_t dz = new_vec(n * n, 0);
 99 |   vec_t pm = new_vec(n * n, 0);
100 |   vec_t r = new_vec(n * n, 0);
101 |   vec_t tmp1 = new_vec(n * n, 0);
102 |   vec_t tmp2 = new_vec(n * n, 0);
103 | 
104 |   for (int i = 0; i < iterations; i++) {
105 |     printf("iteration %d\n", i);
106 |     move(n, m, x, y, z, vx, vy, vz,
107 |         dx.data, dy.data, dz.data, pm.data, r.data, tmp1.data, tmp2.data);
108 |   }
109 | }
110 | 
111 | 


--------------------------------------------------------------------------------
/c/benchmarks/nbody/nbody_mkl.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NBODY_MKL_H_
 2 | #define _NBODY_MKL_H_
 3 | 
 4 | #include <mkl.h>
 5 | 
 6 | void run_mkl(int iterations, MKL_INT n,
 7 |     double *m,
 8 |     double *x, double *y, double *z, double *vx, double *vy, double *vz);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/c/benchmarks/run-all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Runs all the C-based benchmarks. Specifically, this runs:
 4 | # - Black Scholes with MKL
 5 | # - Haversine with MKL
 6 | # - nBody with MKL
 7 | # - Shallow Water with MKL
 8 | # - Nashville with ImageMagick
 9 | # - Gotham with ImageMagick
10 | 
11 | set -x
12 | 
13 | # Get the data for Nashville and Gotham
14 | ./get-data.sh
15 | 
16 | rm -rf results/
17 | mkdir results/
18 | 
19 | tasks=( blackscholes haversine nbody shallow_water gotham nashville )
20 | 
21 | # Write system information.
22 | git log | head -1 > results/CONFIG.txt
23 | uname -a >> results/CONFIG.txt
24 | lsb_release -d >> results/CONFIG.txt
25 | 
26 | for task in "${tasks[@]}"; do 
27 |   echo "Executing $task"  
28 |   pushd $task
29 |   make
30 |   ./benchmark.sh
31 |   popd
32 |   mkdir results/$task
33 |   mv $task/*.std* results/$task
34 | done
35 | 


--------------------------------------------------------------------------------
/c/benchmarks/shallow_water/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | # Libraries
 3 | LD_LIBRARY_PATH=-L../../target/release -L../../lib/composer_mkl
 4 | 
 5 | OS=$(shell uname -s)
 6 | 
 7 | ifeq ($(OS), Darwin)
 8 | CC=gcc-7
 9 | LDFLAGS=
10 | INTEL= -DMKL_ILP64 -m64 -I${MKLROOT}/include -L${MKLROOT}/lib -Wl,-rpath,${MKLROOT}/lib -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl
11 | else ifeq ($(OS), Linux)
12 | CC=gcc-5
13 | LDFLAGS=-Wl,-rpath-link -Wno-format
14 | INTEL= -DMKL_ILP64 -m64 -I${MKLROOT}/include -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl
15 | else
16 | $(error Unsupported platform: $(OS))
17 | endif
18 | 
19 | CFLAGS=-O3 -Wall -pedantic
20 | INCLUDE=-I../../composer/ -I../../lib/composer_mkl
21 | EXEC=bench
22 | 
23 | .PHONY: all clean
24 | 
25 | all:
26 | 	$(CC) -fopenmp $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) shallow_water_mkl.c shallow_water_composer.c shallow_water.c -o $(EXEC) -lcomposer -lcomposer_mkl -lm $(INTEL)
27 | 	
28 | asm:
29 | 	$(CC) $(CFLAGS) $(INCLUDE) $(LD_LIBRARY_PATH) -fopenmp shallow_water_mkl.c shallow_water_composer.c shallow_water.c -S
30 | 
31 | clean:
32 | 	rm -rf *.o *.s $(EXEC) $(VLIB)
33 | 


--------------------------------------------------------------------------------
/c/benchmarks/shallow_water/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Shallow Water Benchmark
 3 | 
 4 | This benchmark is based on the reference solution provided [here](https://github.com/mrocklin/ShallowWater/blob/master/shallowwater_simple.py). The actual workload simulates the flow of a disturbed fluid based on the equations described [here](http://en.wikipedia.org/wiki/Shallow_water_equations).
 5 | 
 6 | Since thie benchmark is more complex than Haversine and Black Scholes, it is divided into several files:
 7 | 
 8 | * `shallow_water.c` is the driver and contains `main()` and utilities for creating inputs, etc.
 9 | * `shallow_water_mkl.c` implements the workload using MKL.
10 | * `shallow_water_composer.c` implements the workload using Composer. This basically just adds the `c_` prefix to all the MKL functions that are supported (note that if we were using C++, we could've just copied and pasted the MKL file and used namespaces).
11 | 


--------------------------------------------------------------------------------
/c/benchmarks/shallow_water/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | tasks=( mkl mklcomposer )
 6 | threads=( 1 2 4 8 16 )
 7 | runs=${1:-1}
 8 | 
 9 | for task in "${tasks[@]}"; do 
10 |   rm -f $task.stderr $task.stdout
11 |   git log | head -1 > $task.stderr
12 |   git log | head -1 > $task.stdout
13 | done
14 | 
15 | for i in {1..$runs}; do
16 |   for task in "${tasks[@]}"; do 
17 |     for nthreads in "${threads[@]}"; do 
18 |       ./bench -m $task -s 16384 -t $nthreads -i 10 >> $task.stdout 2>> $task.stderr
19 |     done
20 |   done
21 | done
22 | 


--------------------------------------------------------------------------------
/c/benchmarks/shallow_water/shallow_water.h:
--------------------------------------------------------------------------------
 1 | #ifndef  _SHALLOW_WATER_H_
 2 | #define  _SHALLOW_WATER_H_
 3 | 
 4 | #include <mkl.h>
 5 | 
 6 | // Inputs to the simulation.
 7 | typedef struct input {
 8 |   MKL_INT n;
 9 |   double *u;
10 |   double *v;
11 |   double *eta;
12 |   double g;
13 |   double b;
14 |   double dt;
15 |   double grid_spacing;
16 | } input_t;
17 | 
18 | /** Initialize inputs.
19 |  *
20 |  * The inputs are initialized to be consistent with shallowwater_reference.py.
21 |  *
22 |  */
23 | input_t inputs(long n, int lazy);
24 | 
25 | /** Prints an n * n matrix to stdout. */
26 | void print_matrix(int n, const double *v);
27 | 
28 | /** Shifts the input vector along the given axis by amount.
29 |  *
30 |  * If axis is 0, rolls along columns. If axis is 1, rolls along rows.
31 |  * The input should be an n * n matrix.
32 |  */
33 | void roll(
34 |     // Inputs
35 |     MKL_INT n, const double *restrict input, int axis, int amount,
36 |     // Output
37 |     double *restrict output);
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/c/benchmarks/shallow_water/shallow_water_composer.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdio.h>
  3 | 
  4 | #include "mkl_vml_functions.h"
  5 | #include "mkl.h"
  6 | 
  7 | #include <mkl_extensions.h>
  8 | #include <vec.h>
  9 | 
 10 | #include "shallow_water.h"
 11 | #include "shallow_water_composer.h"
 12 | 
 13 | #include <generated/generated.h>
 14 | 
 15 | void c_spatialDerivative(
 16 |     // Inputs
 17 |     MKL_INT n, const double *restrict input, int axis, double grid_spacing,
 18 |     // Temporaries
 19 |     double *restrict tmp1, double *restrict tmp2,
 20 |     // Outputs
 21 |     double *output) {
 22 | 
 23 |   MKL_INT size = n * n;
 24 | 
 25 |   composer_execute();
 26 |   roll(n, input, axis, -1, tmp1);
 27 |   roll(n, input, axis,  1, tmp2);
 28 | 
 29 |   c_vdSub(size, tmp1, tmp2, output);
 30 |   c_vdDivi(size, output, grid_spacing * 2.0, output);
 31 | }
 32 | 
 33 | void c_d_dx(
 34 |     // Inputs
 35 |     MKL_INT n, const double *restrict input, double grid_spacing,
 36 |     // Temporaries
 37 |     double *restrict tmp1, double *restrict tmp2,
 38 |     // Output
 39 |     double *output) {
 40 |   c_spatialDerivative(n, input, 1, grid_spacing, tmp1, tmp2, output);
 41 | }
 42 | 
 43 | void c_d_dy(
 44 |     // Inputs
 45 |     MKL_INT n, const double *restrict input, double grid_spacing,
 46 |     // Temporaries
 47 |     double *restrict tmp1, double *restrict tmp2,
 48 |     // Output
 49 |     double *output) {
 50 |   c_spatialDerivative(n, input, 0, grid_spacing, tmp1, tmp2, output);
 51 | }
 52 | 
 53 | void c_d_dt(
 54 |     MKL_INT n, double *eta, double *u, double *v,
 55 |     double g, double b, double grid_spacing,
 56 |     // Outputs
 57 |     double *du_dt, double *dv_dt, double *deta_dt,
 58 |     // Temporaries
 59 |     double *tmp1, double *tmp2, double *tmp3, double *tmp4) {
 60 | 
 61 |   MKL_INT size = n * n;
 62 | 
 63 |   // STAGE 1:
 64 | 
 65 |   // du_dt = -g*d_dx(eta) - u*b
 66 |   c_d_dx(n, eta, grid_spacing, tmp2, tmp3, tmp1);
 67 |   c_vdMuli(size, tmp1, -g, tmp1);
 68 |   c_vdMuli(size, u, b, tmp2);
 69 |   c_vdSub(size, tmp1, tmp2, du_dt);
 70 | 
 71 |   // STAGE 2:
 72 |   // dv_dt = -g*d_dy(eta) - v*b
 73 |   c_d_dy(n, eta, grid_spacing, tmp2, tmp3, tmp1);
 74 |   c_vdMuli(size, tmp1, -g, tmp1);
 75 |   c_vdMuli(size, v, b, tmp2);
 76 |   c_vdSub(size, tmp1, tmp2, dv_dt);
 77 | 
 78 |   // STAGE 3 (not pipelined)
 79 | 
 80 |   // tmp1 = -d_dx(u * eta)
 81 |   c_vdMul(size, u, eta, tmp4);
 82 |   c_d_dx(n, tmp4, grid_spacing, tmp2, tmp3, tmp1);
 83 |   c_vdMuli(size, tmp1, -1, tmp1);
 84 | 
 85 |   // STAGE 4 (not pipelined)
 86 | 
 87 |   // deta_dt = d_dy(v * eta)
 88 |   c_vdMul(size, v, eta, tmp4);
 89 |   c_d_dy(n, tmp4, grid_spacing, tmp2, tmp3, deta_dt);
 90 | 
 91 |   // deta_dt = -d_dx(u*eta) - d_dy(v*eta)
 92 |   c_vdSub(size, tmp1, deta_dt, deta_dt);
 93 | }
 94 | 
 95 | void c_evolveEuler(
 96 |     // Inputs and Outputs
 97 |     MKL_INT n, double *eta, double *u, double *v,
 98 |     double g, double b, double dt, double grid_spacing,
 99 |     // Temporaries
100 |     double *du_dt, double *dv_dt, double *deta_dt,
101 |     double *tmp1, double *tmp2, double *tmp3, double *tmp4) {
102 | 
103 |   c_d_dt(
104 |       n, eta, u, v,
105 |       g, b, grid_spacing,
106 |       du_dt, dv_dt, deta_dt,
107 |       tmp1, tmp2, tmp3, tmp4);
108 | 
109 |   MKL_INT size = n * n;
110 | 
111 |   // eta = eta + deta_dt + dt
112 |   c_vdMuli(size, deta_dt, dt, tmp1);
113 |   c_vdAdd(size, eta, tmp1, eta);
114 | 
115 |   // u = u + du_dt * dt
116 |   c_vdMuli(size, du_dt, dt, tmp1);
117 |   c_vdAdd(size, u, tmp1, u);
118 | 
119 |   // v = v + dv_dt * dt
120 |   c_vdMuli(size, dv_dt, dt, tmp1);
121 |   c_vdAdd(size, v, tmp1, v);
122 | }
123 | 
124 | void run_mkl_composer(
125 |     int iterations,
126 |     MKL_INT n,
127 |     double *eta,  // Lazy
128 |     double *u,    // Lazy
129 |     double *v,    // Lazy
130 |     double g,
131 |     double b,
132 |     double dt,
133 |     double grid_spacing) {
134 | 
135 |   long size = n * n;
136 | 
137 |   // Generate outputs and temporaries.
138 |   //
139 |   // We mark these as lazy.
140 |   vec_t du_dt = new_vec(size, 1);
141 |   vec_t dv_dt = new_vec(size, 1);
142 |   vec_t deta_dt = new_vec(size, 1);
143 | 
144 |   vec_t tmp1 = new_vec(size, 1);
145 |   vec_t tmp2 = new_vec(size, 1);
146 |   vec_t tmp3 = new_vec(size, 1);
147 |   vec_t tmp4 = new_vec(size, 1);
148 | 
149 |   // TODO Benchmark with and without temporaries.
150 |   //
151 |   // Marking all of these as temporaries is not right because they are only
152 |   // temporaries for the full program -- not temporaries in any particular
153 |   // stage.
154 |   /*
155 |   composer_register_temporary(&du_dt.data, sizeof(double*));
156 |   composer_register_temporary(&dv_dt.data, sizeof(double*));
157 |   composer_register_temporary(&deta_dt.data, sizeof(double*));
158 |   composer_register_temporary(&tmp1.data, sizeof(double*));
159 |   composer_register_temporary(&tmp2.data, sizeof(double*));
160 |   composer_register_temporary(&tmp3.data, sizeof(double*));
161 |   */
162 | 
163 |   double time = 0;
164 | 
165 |   for (int i = 0; i < iterations; i++) {
166 |     fprintf(stderr, "iteration %d\n", i);
167 |     c_evolveEuler(n, eta, u, v, g, b, dt, grid_spacing,
168 |         du_dt.data, dv_dt.data, deta_dt.data,
169 |         tmp1.data, tmp2.data, tmp3.data, tmp4.data);
170 |     time += dt;
171 | 
172 |     // Force execution at the end of an iteration.
173 |     composer_execute();
174 |   }
175 | }
176 | 


--------------------------------------------------------------------------------
/c/benchmarks/shallow_water/shallow_water_composer.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SHALLOW_WATER_COMPOSER_H_
 2 | #define  _SHALLOW_WATER_COMPOSER_H_
 3 | 
 4 | /** Run the shallow water simulation with MKL and Composer. */
 5 | void run_mkl_composer(
 6 |     int iterations,
 7 |     MKL_INT n,
 8 |     double *eta,
 9 |     double *u,
10 |     double *v,
11 |     double g,
12 |     double b,
13 |     double dt,
14 |     double grid_spacing);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/c/benchmarks/shallow_water/shallow_water_mkl.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdio.h>
  3 | 
  4 | #include "mkl_vml_functions.h"
  5 | #include "mkl.h"
  6 | 
  7 | #include <mkl_extensions.h>
  8 | #include <vec.h>
  9 | 
 10 | #include "shallow_water.h"
 11 | 
 12 | void spatialDerivative(
 13 |     // Inputs
 14 |     MKL_INT n, const double *restrict input, int axis, double grid_spacing,
 15 |     // Temporaries
 16 |     double *restrict tmp1, double *restrict tmp2,
 17 |     // Outputs
 18 |     double *output) {
 19 | 
 20 |   MKL_INT size = n * n;
 21 | 
 22 |   roll(n, input, axis, -1, tmp1);
 23 |   roll(n, input, axis,  1, tmp2);
 24 | 
 25 |   vdSub(size, tmp1, tmp2, output);
 26 |   vdDivi(size, output, grid_spacing * 2.0, output);
 27 | }
 28 | 
 29 | void d_dx(
 30 |     // Inputs
 31 |     MKL_INT n, const double *restrict input, double grid_spacing,
 32 |     // Temporaries
 33 |     double *restrict tmp1, double *restrict tmp2,
 34 |     // Output
 35 |     double *output) {
 36 |   spatialDerivative(n, input, 1, grid_spacing, tmp1, tmp2, output);
 37 | }
 38 | 
 39 | void d_dy(
 40 |     // Inputs
 41 |     MKL_INT n, const double *restrict input, double grid_spacing,
 42 |     // Temporaries
 43 |     double *restrict tmp1, double *restrict tmp2,
 44 |     // Output
 45 |     double *output) {
 46 |   spatialDerivative(n, input, 0, grid_spacing, tmp1, tmp2, output);
 47 | }
 48 | 
 49 | void d_dt(
 50 |     MKL_INT n, double *eta, double *u, double *v,
 51 |     double g, double b, double grid_spacing,
 52 |     // Outputs
 53 |     double *du_dt, double *dv_dt, double *deta_dt,
 54 |     // Temporaries
 55 |     double *tmp1, double *tmp2, double *tmp3, double *tmp4) {
 56 | 
 57 |   MKL_INT size = n * n;
 58 | 
 59 |   // STAGE 1:
 60 | 
 61 |   // du_dt = -g*d_dx(eta) - u*b
 62 |   d_dx(n, eta, grid_spacing, tmp2, tmp3, tmp1);
 63 |   vdMuli(size, tmp1, -g, tmp1);
 64 |   vdMuli(size, u, b, tmp2);
 65 |   vdSub(size, tmp1, tmp2, du_dt);
 66 | 
 67 |   // STAGE 2:
 68 |   // dv_dt = -g*d_dy(eta) - v*b
 69 |   d_dy(n, eta, grid_spacing, tmp2, tmp3, tmp1);
 70 |   vdMuli(size, tmp1, -g, tmp1);
 71 |   vdMuli(size, v, b, tmp2);
 72 |   vdSub(size, tmp1, tmp2, dv_dt);
 73 | 
 74 |   // STAGE 3 (not pipelined)
 75 | 
 76 |   // tmp1 = -d_dx(u * eta)
 77 |   vdMul(size, u, eta, tmp4);
 78 |   d_dx(n, tmp4, grid_spacing, tmp2, tmp3, tmp1);
 79 |   vdMuli(size, tmp1, -1, tmp1);
 80 | 
 81 |   // deta_dt = d_dy(v * eta)
 82 |   vdMul(size, v, eta, tmp4);
 83 |   d_dy(n, tmp4, grid_spacing, tmp2, tmp3, deta_dt);
 84 | 
 85 |   // deta_dt = -d_dx(u*eta) - d_dy(v*eta)
 86 |   vdSub(size, tmp1, deta_dt, deta_dt);
 87 | }
 88 | 
 89 | void evolveEuler(
 90 |     // Inputs and Outputs
 91 |     MKL_INT n, double *eta, double *u, double *v,
 92 |     double g, double b, double dt, double grid_spacing,
 93 |     // Temporaries
 94 |     double *du_dt, double *dv_dt, double *deta_dt,
 95 |     double *tmp1, double *tmp2, double *tmp3, double *tmp4) {
 96 | 
 97 |   d_dt(
 98 |       n, eta, u, v,
 99 |       g, b, grid_spacing,
100 |       du_dt, dv_dt, deta_dt,
101 |       tmp1, tmp2, tmp3, tmp4);
102 | 
103 |   MKL_INT size = n * n;
104 | 
105 |   // eta = eta + deta_dt + dt
106 |   vdMuli(size, deta_dt, dt, tmp1);
107 |   vdAdd(size, eta, tmp1, eta);
108 | 
109 |   // u = u + du_dt * dt
110 |   vdMuli(size, du_dt, dt, tmp1);
111 |   vdAdd(size, u, tmp1, u);
112 | 
113 |   // v = v + dv_dt * dt
114 |   vdMuli(size, dv_dt, dt, tmp1);
115 |   vdAdd(size, v, tmp1, v);
116 | }
117 | 
118 | void run_mkl(
119 |     int iterations,
120 |     MKL_INT n,
121 |     double *eta,
122 |     double *u,
123 |     double *v,
124 |     double g,
125 |     double b,
126 |     double dt,
127 |     double grid_spacing) {
128 | 
129 |   long size = n * n;
130 | 
131 |   // Generate outputs and temporaries.
132 |   vec_t du_dt = new_vec(size, 0);
133 |   vec_t dv_dt = new_vec(size, 0);
134 |   vec_t deta_dt = new_vec(size, 0);
135 | 
136 |   vec_t tmp1 = new_vec(size, 0);
137 |   vec_t tmp2 = new_vec(size, 0);
138 |   vec_t tmp3 = new_vec(size, 0);
139 |   vec_t tmp4 = new_vec(size, 0);
140 | 
141 |   double time = 0;
142 | 
143 |   for (int i = 0; i < iterations; i++) {
144 |     fprintf(stderr, "iteration %d\n", i);
145 |     evolveEuler(n, eta, u, v, g, b, dt, grid_spacing,
146 |         du_dt.data, dv_dt.data, deta_dt.data,
147 |         tmp1.data, tmp2.data, tmp3.data, tmp4.data);
148 |     time += dt;
149 |   }
150 | }
151 | 


--------------------------------------------------------------------------------
/c/benchmarks/shallow_water/shallow_water_mkl.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SHALLOW_WATER_MKL_H_
 2 | #define _SHALLOW_WATER_MKL_H_
 3 | 
 4 | /** Run the shallow water simulation with MKL. */
 5 | void run_mkl(
 6 |     int iterations,
 7 |     MKL_INT n,
 8 |     double *eta,
 9 |     double *u,
10 |     double *v,
11 |     double g,
12 |     double b,
13 |     double dt,
14 |     double grid_spacing);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/c/composer/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "composer"
 3 | version = "0.1.0"
 4 | authors = ["Shoumik Palkar <shoumik@cs.stanford.edu>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | libc = "0.2"
 9 | byteorder = "1.3.1"
10 | serde = { version = "1.0", features = ["derive"] }
11 | serde_json = "1.0"
12 | lazy_static = "1.2.0"
13 | fnv = "1.0.6"
14 | env_logger = "0.6.0"
15 | log = "0.4.6"
16 | crossbeam-utils = "0.6"
17 | 
18 | [build-dependencies]
19 | cbindgen = "0.8.0"
20 | 
21 | [lib]
22 | crate-type = ["cdylib", "rlib"]
23 | 


--------------------------------------------------------------------------------
/c/composer/build.rs:
--------------------------------------------------------------------------------
 1 | 
 2 | use cbindgen;
 3 | use std::env;
 4 | 
 5 | fn main() {
 6 |     let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
 7 |     cbindgen::Builder::new()
 8 |       .with_crate(crate_dir)
 9 |       .with_language(cbindgen::Language::C)
10 |       .with_include_guard("_COMPOSER_H_")
11 |       .generate()
12 |       .expect("Unable to generate bindings")
13 |       .write_to_file("composer.h");
14 | }
15 | 


--------------------------------------------------------------------------------
/c/composer/src/error.rs:
--------------------------------------------------------------------------------
 1 | //! Errors in Composer.
 2 | 
 3 | use std::error;
 4 | use std::fmt;
 5 | 
 6 | /// Macro for creating an error.
 7 | #[macro_export]
 8 | macro_rules! composer_err {
 9 |     ( $($arg:tt)* ) => ({
10 |         ::std::result::Result::Err($crate::Error::new(format!($($arg)*)))
11 |     })
12 | }
13 | 
14 | /// Errors produced by the annotation system.
15 | #[derive(Debug, Clone)]
16 | pub struct Error(String);
17 | 
18 | /// A custom result type for functions in this library.
19 | pub type Result<T> = std::result::Result<T, Error>;
20 | 
21 | impl Error {
22 |     pub fn new<T: Into<String>>(description: T) -> Error {
23 |         Error(description.into())
24 |     }
25 | }
26 | 
27 | impl fmt::Display for Error {
28 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
29 |         write!(f, "{}", self.0)
30 |     }
31 | }
32 | 
33 | impl error::Error for Error {
34 |     fn cause(&self) -> Option<&error::Error> {
35 |         None
36 |     }
37 | }
38 | 
39 | impl From<String> for Error {
40 |     fn from(e: String) -> Error {
41 |         Error(e)
42 |     }
43 | }
44 | 
45 | impl From<Error> for std::io::Error {
46 |     fn from(e: Error) -> std::io::Error {
47 |         std::io::Error::new(std::io::ErrorKind::Other, e.to_string())
48 |     }
49 | }
50 | 
51 | impl<'a> From<&'a str> for Error {
52 |     fn from(e: &'a str) -> Error {
53 |         Error(String::from(e))
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/c/composer/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! The composer runtime and its associated components.
  2 | //!
  3 | //! Most of the interesting stuff is in the `runtime` module. This contains both the FFI functions
  4 | //! that the wrapper functions call, as well as the runtime itself, which launches parallel tasks.
  5 | //!
  6 | pub mod runtime;
  7 | pub mod util;
  8 | 
  9 | #[macro_use]
 10 | mod error;
 11 | 
 12 | pub use error::*;
 13 | 
 14 | use log::*;
 15 | 
 16 | use std::collections::HashMap;
 17 | use std::fmt;
 18 | 
 19 | use serde::{Serialize, Deserialize};
 20 | use serde_json;
 21 | 
 22 | // TODO change the initializer/next to actual function pointer types.
 23 | type FunctionPointer = usize;
 24 | 
 25 | /// A C declaration.
 26 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 27 | pub struct CDecl {
 28 |     pub return_type: String,
 29 |     pub func_name: String,
 30 |     pub arguments: Vec<(String, Option<String>)>,
 31 | }
 32 | 
 33 | impl CDecl {
 34 |     pub fn new<T>(return_type: T, func_name: T, arguments: Vec<(T, Option<T>)>) -> CDecl
 35 |         where T: Into<String> {
 36 |             CDecl {
 37 |                 return_type: return_type.into(),
 38 |                 func_name: func_name.into(),
 39 |                 arguments: arguments.into_iter()
 40 |                     .map(|(ty, name)| (ty.into(), name.map(|n| n.into())))
 41 |                     .collect(),
 42 |             }
 43 |         }
 44 | 
 45 |     pub fn is_void(&self) -> bool {
 46 |         return self.return_type == "void"
 47 |     }
 48 | 
 49 |     pub fn strip_type_qualifiers(s: &str) -> String {
 50 |         s.replace("const ", "")
 51 |     }
 52 | }
 53 | 
 54 | impl fmt::Display for CDecl {
 55 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 56 |         let arguments = self.arguments.iter().map(|(ty, name)| {
 57 |             format!("{}{}", &ty, 
 58 |                     name.as_ref()
 59 |                     .map(|n| format!(" {}", n))
 60 |                     .unwrap_or("".to_string())
 61 |             )
 62 |         });
 63 |         let arguments = util::join("", ", ", arguments);
 64 |         write!(f, "{} {}({})", self.return_type, self.func_name, arguments)
 65 |     }
 66 | }
 67 | 
 68 | /// Split type information that is known only at runtime.
 69 | ///
 70 | /// This includes information such as function pointer values and the sizes of types, which can
 71 | /// only be known when the header files produced by the annotator tool are compiled.
 72 | #[derive(Debug,Clone,PartialEq,Eq,Hash,Serialize,Deserialize)]
 73 | pub struct SplitTypeRuntimeInfo {
 74 |     /// Function pointer to initializer.
 75 |     initializer: usize,
 76 |     /// Function pointer to retrieve next value.
 77 |     next: usize,
 78 |     /// Size of the value that the splitter splits.
 79 |     data_size: usize,
 80 | }
 81 | 
 82 | /// A split type.
 83 | ///
 84 | /// Split types can either be braodcast (copy the value to each worker), generic (i.e., they can
 85 | /// take on any type) or named.
 86 | #[derive(Debug, Clone,PartialEq,Eq,Hash,Serialize,Deserialize)]
 87 | pub enum SplitType {
 88 |     Broadcast {
 89 |         runtime: Option<SplitTypeRuntimeInfo>,
 90 |     },
 91 |     Generic {
 92 |         name: String,
 93 |     },
 94 |     Named {
 95 |         name: String,
 96 |         arguments: usize,
 97 |         runtime: Option<SplitTypeRuntimeInfo>,
 98 |     },
 99 | }
100 | 
101 | impl SplitType {
102 |     /// Returns the string name of this split type.
103 |     pub fn name(&self) -> Option<&str> {
104 |         match *self {
105 |             SplitType::Broadcast { .. } => None,
106 |             SplitType::Generic { ref name } => Some(name),
107 |             SplitType::Named { ref name, .. } => Some(name),
108 |         }
109 |     }
110 | 
111 |     pub fn is_broadcast(&self) -> bool {
112 |         match *self {
113 |             SplitType::Broadcast { .. } => true,
114 |             _ => false,
115 |         }
116 |     }
117 | 
118 |     pub fn is_named(&self) -> bool {
119 |         match *self {
120 |             SplitType::Named { .. } => true,
121 |             _ => false,
122 |         }
123 |     }
124 | 
125 |     /// Returns the runtime information about the split type.
126 |     ///
127 |     /// Panics if this type is generic or if the runtime information is not present.
128 |     pub fn runtime_info(&self) -> &SplitTypeRuntimeInfo {
129 |         match *self {
130 |             SplitType::Broadcast { ref runtime, .. } => runtime.as_ref().unwrap(),
131 |             SplitType::Named { ref runtime, .. } => runtime.as_ref().unwrap(),
132 |             SplitType::Generic { .. } => {
133 |                 panic!("Attempted to retrieve runtime information from generic type.")
134 |             }
135 |         }
136 |     }
137 | }
138 | 
139 | /// A parameter within the context of an annotation.
140 | ///
141 | /// This struct defines the split type along with the arguments that are fed to the type to instantiate
142 | /// it. Arguments are indices into the function argument list (e.g., `[1,2,4]` means that the first,
143 | /// second, and fourth arguments should be passed to the split type initializer). Generic split
144 | /// types should not have any arguments.
145 | #[derive(Debug, Clone, PartialEq,Serialize,Deserialize)]
146 | pub struct SplitTypeParameter {
147 |     pub ty: SplitType,
148 |     pub arguments: Vec<usize>,
149 | }
150 | 
151 | /// A self-contained annotation over a C function.
152 | #[derive(Debug, Clone, PartialEq,Serialize,Deserialize)]
153 | pub struct Annotation {
154 |     pub function: CDecl,
155 |     pub params: Vec<SplitTypeParameter>,
156 |     pub return_param: Option<SplitTypeParameter>,
157 |     pub defaults: HashMap<String, SplitType>,
158 | }
159 | 
160 | /// Entry points for parsing and creating annotations from strings.
161 | impl Annotation {
162 |     /// Parses an annotation from a JSON string.
163 |     pub fn from_json(s: &str) -> Result<Annotation> {
164 |         Ok(serde_json::from_str(s).unwrap())
165 |     }
166 | 
167 | }
168 | 
169 | /// Methods for runtime instantiation of annotations.
170 | ///
171 | /// These methods are called from the generated C code (via FFI).
172 | impl Annotation {
173 |     /// Sets the runtime information for an argument.
174 |     fn set_type_runtime_info(&mut self,
175 |                              index: usize,
176 |                              rt: SplitTypeRuntimeInfo) -> Result<()> {
177 |         let param = self.params.get_mut(index).unwrap();
178 |         match param.ty {
179 |             SplitType::Named { ref mut runtime, .. } | SplitType::Broadcast { ref mut runtime, .. } => {
180 |                 *runtime = Some(rt);
181 |             }
182 |             SplitType::Generic { .. } => {
183 |                 info!("instantiated {:?} with no runtime info.", self);
184 |             }
185 |         }
186 |         Ok(())
187 |     }
188 | }
189 | 


--------------------------------------------------------------------------------
/c/composer/src/util.rs:
--------------------------------------------------------------------------------
 1 | //! Shared utility functions.
 2 | 
 3 | /// Joins a an iterator of strings using a delimiter.
 4 | pub fn join<'a>(start: impl Into<String>,
 5 |                 sep: impl Into<&'a str>,
 6 |                 strings: impl std::iter::Iterator<Item=String>) -> String {
 7 | 
 8 |     let sep = sep.into();
 9 |     strings.enumerate().fold(start.into(), |mut buf, (i, val)| {
10 |         if i > 0 {
11 |             buf.push_str(sep);
12 |         }
13 |         buf.push_str(&val);
14 |         buf
15 |     })
16 | }
17 | 


--------------------------------------------------------------------------------
/c/lib/ImageMagick/Makefile:
--------------------------------------------------------------------------------
 1 | CFLAGS=-fPIC -g -O3 -Wall
 2 | LDFLAGS=-shared
 3 | LD_LIBRARY_PATH=../../target/release
 4 | 
 5 | OS=$(shell uname -s)
 6 | 
 7 | ifeq ($(OS), Darwin)
 8 | CC=gcc-7
 9 | LIB=dylib
10 | MAGICK=$(shell pkg-config --cflags --libs MagickWand)
11 | else ifeq ($(OS), Linux)
12 | CC=gcc-5
13 | LIB=so
14 | MAGICK=$(shell pkg-config --cflags --libs MagickWand)
15 | else
16 | 	$(error Unsupported platform: $(OS))
17 | endif
18 | 
19 | INCLUDE=-I../../composer/ -I/usr/local/include/ImageMagick-7/MagickWand
20 | 
21 | .PHONY: all annotate clean
22 | 
23 | all: annotate
24 | 	$(CC) $(INCLUDE) $(MAGICK) -fopenmp -L$(LD_LIBRARY_PATH) $(CFLAGS) $(LDFLAGS) splitters.c -o libcomposer_imagemagick.$(LIB) -lcomposer $(INTEL)
25 | 
26 | annotate:
27 | 	rm -rf generated
28 | 	annotate -i MagickWand imagemagick.annotation
29 | 
30 | clean:
31 | 	rm -rf a.out generated libcomposer_imagemagick.$(LIB) *.dSYM
32 | 


--------------------------------------------------------------------------------
/c/lib/ImageMagick/imagemagick.annotation:
--------------------------------------------------------------------------------
 1 | @sa(wand: WandSplit()) -> WandSplit() {
 2 |   MagickWand *CloneMagickWand(MagickWand *);
 3 |   MagickWand *DestroyMagickWand(MagickWand *);
 4 | }
 5 | 
 6 | @sa() -> broadcast {
 7 |   PixelWand *NewPixelWand();
 8 | }
 9 | 
10 | @sa(wand: broadcast) -> broadcast {
11 |   PixelWand *DestroyPixelWand(PixelWand *);
12 | }
13 | 
14 | @sa(wand: broadcast, color: broadcast) {
15 |   void PixelSetColor(PixelWand *, const char *);
16 | }
17 | 
18 | @sa(wand: WandSplit(), colorize: broadcast, alpha: broadcast) -> broadcast {
19 |   MagickBooleanType MagickColorizeImage(MagickWand *, PixelWand *, PixelWand *);
20 | }
21 | 
22 | @sa(wand: WandSplit(), colorspace: broadcast) -> broadcast {
23 |   MagickBooleanType MagickSetImageColorspace(MagickWand *, const ColorspaceType);
24 | }
25 | 
26 | @sa(wand: WandSplit(), flag: broadcast) -> broadcast {
27 |   MagickBooleanType MagickNegateImage(MagickWand *, const MagickBooleanType);
28 |   MagickBooleanType MagickContrastImage(MagickWand *, const MagickBooleanType);
29 | }
30 | 
31 | @sa(wand: WandSplit(), key: broadcast, value: broadcast) -> broadcast {
32 |   MagickBooleanType MagickSetImageArtifact(MagickWand *, const char*, const char*);
33 | }
34 | 
35 | @sa(wand: WandSplit(), hue: broadcast, saturation: broadcast, value: broadcast) -> broadcast {
36 |   MagickBooleanType MagickModulateImage(MagickWand *, const double, const double, const double);
37 | }
38 | 
39 | @sa(wand: WandSplit(), gamma: broadcast) -> broadcast {
40 |   MagickBooleanType MagickGammaImage(MagickWand *, const double);
41 | }
42 | 
43 | @sa(wand: WandSplit(), other: WandSplit(), kind: broadcast,
44 |       clip: broadcast, x_offset: broadcast, y_offset: broadcast) -> broadcast {
45 | 
46 |   MagickBooleanType MagickCompositeImage(MagickWand *,
47 |     const MagickWand *,const CompositeOperator,
48 |     const MagickBooleanType, const ssize_t, const ssize_t);
49 | }
50 | 


--------------------------------------------------------------------------------
/c/lib/ImageMagick/splitters.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <composer.h>
  3 | #include <MagickWand.h>
  4 | 
  5 | #include <stdio.h>
  6 | 
  7 | #define DEBUG 0
  8 | 
  9 | #define DBG(fmt, ...) \
 10 |         do { if (DEBUG) fprintf(stderr, "%s:%d:%s(): " fmt "\n", __FILE__, \
 11 |                                 __LINE__, __func__, __VA_ARGS__); } while (0)
 12 | 
 13 | 
 14 | struct WandSplit {
 15 |   MagickWand *wand;
 16 |   size_t width;
 17 |   size_t height;
 18 | };
 19 | 
 20 | void* WandSplit_new(MagickWand **wand_to_split, struct WandSplit_init_args *_, int64_t *items) {
 21 |   struct WandSplit *splitter = (struct WandSplit *)malloc(sizeof(struct WandSplit));
 22 |   splitter->wand = *wand_to_split;
 23 | 
 24 |   // We'll split the image by row, since there are nice methods for reconstructing an image
 25 |   // in this way that are builtin.
 26 |   splitter->width = MagickGetImageWidth(splitter->wand);
 27 |   splitter->height = MagickGetImageHeight(splitter->wand);
 28 |   *items = splitter->height;
 29 |   DBG("items: %ld", splitter->height);
 30 |   return (void *)splitter;
 31 | }
 32 | 
 33 | SplitterStatus WandSplit_next(const void *s,
 34 |     int64_t start,
 35 |     int64_t end,
 36 |     MagickWand **out) {
 37 | 
 38 |   const struct WandSplit *splitter = (const struct WandSplit *)s;
 39 |   DBG("start: %ld end: %ld height: %ld", start, end, splitter->height);
 40 | 
 41 |   if (splitter->height <= start) {
 42 |     DBG("finished got range (%ld %ld)", start, end);
 43 |     return SplitterFinished;
 44 |   } else {
 45 |     size_t region_height = (end - start);
 46 |     if (splitter->height < end) {
 47 |       DBG("clipping region height by %ld", end - splitter->height);
 48 |       region_height = splitter->height - start;
 49 |     }
 50 |     DBG("range: %ld, %ld", start, start + region_height);
 51 | 
 52 |     MagickWand *wand = MagickGetImageRegion(splitter->wand, splitter->width, region_height, 0, start);
 53 |     *out = wand;
 54 |     return SplitterContinue;
 55 |   }
 56 | }
 57 | 
 58 | MagickWand *aggregate_seq(MagickWand **pieces, int64_t count) {
 59 |   MagickWand *results = NewMagickWand();
 60 |   MagickResetIterator(results);
 61 | 
 62 |   DBG("consturcted results image %p", results);
 63 | 
 64 |   for (int i = 0; i < count; i++) {
 65 |     DBG("adding image %d", i);
 66 |     fflush(stderr);
 67 |     MagickSetLastIterator(results);
 68 |     MagickAddImage(results, pieces[i]); 
 69 |   }
 70 | 
 71 |   MagickResetIterator(results);
 72 |   MagickWand *final = MagickAppendImages(results, 1);
 73 |   DestroyMagickWand(results);
 74 | 
 75 |   return final;
 76 | }
 77 | 
 78 | MagickWand *aggregate_par(MagickWand **pieces, int count, int threads) {
 79 |   // Holds aggregation state.
 80 |   MagickWand **results = (MagickWand **)malloc(sizeof(MagickWand *) * threads);
 81 |   for (int i = 0; i < threads; i++) {
 82 |     results[i] = NewMagickWand();
 83 |     MagickResetIterator(results[i]);
 84 |   }
 85 | 
 86 |   int values_per_thread = count / threads;
 87 |   printf("values per piece: %d\n", values_per_thread);
 88 | 
 89 | #pragma omp parallel for
 90 |   for (int i = 0; i < threads; i++) {
 91 |     int start = i * values_per_thread;
 92 |     int end = (i + 1) * values_per_thread;
 93 | 
 94 |     if (i == threads - 1) {
 95 |       end = count;
 96 |     }
 97 | 
 98 |     MagickWand *result = results[i];
 99 | 
100 |     // printf("thread %d: %d->%d\n", omp_get_thread_num(), start, end);
101 |     for (int j = start; j < end; j++) {
102 |       MagickSetLastIterator(result);
103 |       MagickAddImage(result, pieces[j]); 
104 |     }
105 | 
106 |     MagickResetIterator(result);
107 |     MagickWand *final = MagickAppendImages(result, 1);
108 | 
109 |     result = DestroyMagickWand(result);
110 |     results[i] = final;
111 |   }
112 | 
113 |   MagickWand *final_iterator = NewMagickWand();
114 |   MagickResetIterator(final_iterator);
115 |   for (int i = 0; i < threads; i++) {
116 |     MagickSetLastIterator(final_iterator);
117 |     MagickAddImage(final_iterator, results[i]);
118 |   }
119 |   MagickResetIterator(final_iterator);
120 |   MagickWand *final = MagickAppendImages(final_iterator, 1);
121 | 
122 |   for (int i = 0; i < threads; i++) {
123 |     DestroyMagickWand(results[i]);
124 |   }
125 |   free(results);
126 | 
127 |   return final;
128 | }
129 | 
130 | void *WandSplit_merge(const void *s, int64_t length, int64_t threads) {
131 |   MagickWand *final;
132 |   MagickWand **pieces = (MagickWand **)s;
133 | 
134 |   if (length == 1) {
135 |     DBG("only one item: returning it %d", 0);
136 |     return ((MagickWand **)s)[0];
137 |   }
138 | 
139 |   MagickWand *results = NewMagickWand();
140 |   MagickResetIterator(results);
141 |   for (int i = 0; i < length; i++) {
142 |     MagickSetLastIterator(results);
143 |     MagickAddImage(results, pieces[i]); 
144 |   }
145 |   MagickResetIterator(results);
146 |   final = MagickAppendImages(results, 1);
147 |   DestroyMagickWand(results);
148 |   // DBG("aggregate_seq: %p", aggregate_seq);
149 |   // final = aggregate_seq(pieces, length);
150 | 
151 |   return (void *)final;
152 | }
153 | 


--------------------------------------------------------------------------------
/c/lib/ImageMagick/splitters.h:
--------------------------------------------------------------------------------
1 | #ifndef _MAGICK_SPLITTER_H_
2 | #define _MAGICK_SPLITTER_H_
3 | 
4 | void *WandSplit_merge(const void *s, int64_t length, int64_t threads);
5 | 
6 | #endif
7 | 


--------------------------------------------------------------------------------
/c/lib/composer_mkl/Makefile:
--------------------------------------------------------------------------------
 1 | CFLAGS=-fPIC -g -O3 -Wall
 2 | LDFLAGS=-shared
 3 | INCLUDE=-I../../composer -I.
 4 | LD_LIBRARY_PATH=../../target/release
 5 | 
 6 | OS=$(shell uname -s)
 7 | 
 8 | ifeq ($(OS), Darwin)
 9 | 	CC=gcc-7
10 | 	LIB=dylib
11 | 	INTEL= -L${MKLROOT}/lib -Wl,-rpath,${MKLROOT}/lib -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl
12 | else ifeq ($(OS), Linux)
13 | 	CC=gcc-5
14 | 	LIB=so
15 | 	INTEL= -I${MKLROOT}/compilers_and_libraries/linux/mkl/include -L${MKLROOT}/compilers_and_libraries/linux/mkl/lib/intel64 -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl
16 | else
17 | 	$(error Unsupported platform: $(OS))
18 | endif
19 | 
20 | 
21 | .PHONY: all annotate clean
22 | 
23 | all: annotate
24 | 	$(CC) $(INCLUDE) -L$(LD_LIBRARY_PATH) $(CFLAGS) $(LDFLAGS) vec.c splitters.c mkl_extensions.c -o libcomposer_mkl.$(LIB) -lcomposer $(INTEL)
25 | 
26 | annotate:
27 | 	rm -rf generated
28 | 	../../target/release/annotate -i mkl,mkl_extensions mkl.annotation
29 | 
30 | clean:
31 | 	rm -rf a.out generated libcomposer_mkl.$(LIB) *.dSYM
32 | 


--------------------------------------------------------------------------------
/c/lib/composer_mkl/README.md:
--------------------------------------------------------------------------------
1 | 
2 | # Controlling Threads
3 | 
4 | * Make sure the `OMP_NUM_THREADS` variable is set to 1 when running with composer!
5 | 


--------------------------------------------------------------------------------
/c/lib/composer_mkl/mkl.annotation:
--------------------------------------------------------------------------------
 1 | @sa (n: SizeSplit(), a: RegularSplit(n), b: RegularSplit(n), out: RegularSplit(n)) {
 2 |     void vdAdd(MKL_INT n, double *a, double *b, double *out);
 3 |     void vdDiv(MKL_INT n, double *a, double *b, double *out);
 4 |     void vdMul(MKL_INT n, double *a, double *b, double *out);
 5 |     void vdSub(MKL_INT n, double *a, double *b, double *out);
 6 | }
 7 | 
 8 | @sa (n: SizeSplit(), a: RegularSplit(n), b: RegularSplit(n)) {
 9 |     void vdAsin(MKL_INT n, double *a, double *out);
10 |     void vdCos(MKL_INT n, double *a, double *out);
11 |     void vdErf(MKL_INT n, double *a, double *out);
12 |     void vdExp(MKL_INT n, double *a, double *out);
13 |     void vdLog1p(MKL_INT n, double *a, double *out);
14 |     void vdSin(MKL_INT n, double *a, double *out);
15 |     void vdSqrt(MKL_INT n, double *a, double *out);
16 | }
17 | 
18 | @sa (n: SizeSplit(), a: RegularSplit(n), b: broadcast, out: RegularSplit(n)) {
19 |     void vdAddi(MKL_INT n, double *a, double b, double *out);
20 |     void vdDivi(MKL_INT n, double *a, double b, double *out);
21 |     void vdMuli(MKL_INT n, double *a, double b, double *out);
22 |     void vdSubi(MKL_INT n, double *a, double b, double *out);
23 |     void vdPowx(MKL_INT n, double *a, double b, double *out);
24 | }
25 | 
26 | @sa (n: SizeSplit(), a: broadcast, b: RegularSplit(n), out: RegularSplit(n)) {
27 |     void vdSubvi(MKL_INT n, double a, double *b, double *out);
28 | }
29 | 


--------------------------------------------------------------------------------
/c/lib/composer_mkl/mkl_extensions.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #define INFINITE_PIECES (-1)
 3 | 
 4 | #include "mkl.h"
 5 | 
 6 | #include "mkl_extensions.h"
 7 | #include <assert.h>
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | // Extensions to MKL, since it doesn't support immediates
14 | // Unary Operators with Immediate values
15 | 
16 | void vdAddi(MKL_INT length, double *a, double b, double *result) {
17 |   for (size_t i = 0; i < length; i++) {
18 |     result[i] = a[i] + b;
19 |   }
20 | }
21 | 
22 | void vdSubi(MKL_INT length, double *a, double b, double *result) {
23 |   for (size_t i = 0; i < length; i++) {
24 |     result[i] = a[i] - b;
25 |   }
26 | }
27 | 
28 | void vdMuli(MKL_INT length, double *a, double b, double *result) {
29 |   for (size_t i = 0; i < length; i++) {
30 |     result[i] = a[i] * b;
31 |   }
32 | }
33 | 
34 | void vdDivi(MKL_INT length, double *a, double b, double *result) {
35 |   for (size_t i = 0; i < length; i++) {
36 |     result[i] = a[i] / b;
37 |   }
38 | }
39 | 
40 | void vdSubvi(MKL_INT length, double a, double *b, double *result) {
41 |   for (size_t i = 0; i < length; i++) {
42 |     result[i] = a - b[i];
43 |   }
44 | }
45 | 
46 | void vdDivvi(MKL_INT length, double a, double *b, double *result) {
47 |   for (size_t i = 0; i < length; i++) {
48 |     result[i] = a / b[i];
49 |   }
50 | }
51 | 
52 | #ifdef __cplusplus
53 | }
54 | #endif
55 | 


--------------------------------------------------------------------------------
/c/lib/composer_mkl/mkl_extensions.h:
--------------------------------------------------------------------------------
 1 | #ifndef _MKL_EXTENSIONS_H_
 2 | #define _MKL_EXTENSIONS_H_
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void vdAddi(MKL_INT length, double *a, double b, double *result);
 9 | void vdSubi(MKL_INT length, double *a, double b, double *result);
10 | void vdMuli(MKL_INT length, double *a, double b, double *result);
11 | void vdDivi(MKL_INT length, double *a, double b, double *result);
12 | void vdSubvi(MKL_INT length, double a, double *b, double *result);
13 | void vdDivvi(MKL_INT length, double a, double *b, double *result);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/c/lib/composer_mkl/splitters.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "generated/generated.h"
 3 | #include <stdio.h>
 4 | 
 5 | #define DEBUG 0
 6 | 
 7 | #define DBG(fmt, ...) \
 8 |         do { if (DEBUG) fprintf(stderr, "%s:%d:%s(): " fmt "\n", __FILE__, \
 9 |                                 __LINE__, __func__, __VA_ARGS__); } while (0)
10 | 
11 | struct RegularSplit {
12 |   double *base;
13 |   int size;
14 | };
15 | 
16 | struct SizeSplit {
17 |   size_t size;
18 | };
19 | 
20 | void* RegularSplit_new(double **item_to_split, struct RegularSplit_init_args *a, int64_t *items) {
21 |   struct RegularSplit *splitter = (struct RegularSplit *)malloc(sizeof(struct RegularSplit));
22 |   splitter->base = *item_to_split;
23 |   splitter->size = a->_0; 
24 |   DBG("base=%p, size=%d\n", item_to_split, a->_0);
25 | 
26 |   *items = splitter->size;
27 |   return (void *)splitter;
28 | }
29 | 
30 | SplitterStatus RegularSplit_next(const void *s,
31 |     int64_t start,
32 |     int64_t end,
33 |     double **out) {
34 | 
35 |   const struct RegularSplit *splitter = (const struct RegularSplit *)s;
36 |   DBG("start=%lld, end=%lld, size=%d", start, end, splitter->size);
37 |   if (splitter->size < start) {
38 |     return SplitterFinished;
39 |   } else {
40 |     *out = splitter->base + start;
41 |     return SplitterContinue;
42 |   }
43 | }
44 | 
45 | void* SizeSplit_new(MKL_INT *item_to_split, struct SizeSplit_init_args *_unused, int64_t *items) {
46 |   struct SizeSplit *splitter = (struct SizeSplit *)malloc(sizeof(struct SizeSplit));
47 |   splitter->size = *item_to_split;
48 |   *items = splitter->size;
49 |   return (void *)splitter;
50 | }
51 | 
52 | SplitterStatus SizeSplit_next(const void *s,
53 |     int64_t start,
54 |     int64_t end,
55 |     MKL_INT *out) {
56 | 
57 |   struct SizeSplit *splitter = (struct SizeSplit *)s;
58 |   DBG("start=%lld, end=%lld, size=%zu", start, end, splitter->size);
59 |   if (splitter->size < start) {
60 |     return SplitterFinished;
61 |   } else if (splitter->size < end) {
62 |     *out = (splitter->size - start);
63 |     return SplitterContinue;
64 |   } else {
65 |     *out = (end - start);
66 |     return SplitterContinue;
67 |   }
68 | }
69 | 


--------------------------------------------------------------------------------
/c/lib/composer_mkl/vec.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <composer.h>
 3 | #include <assert.h>
 4 | #include <stdlib.h>
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | typedef struct {
11 |   double *data;
12 |   size_t length;
13 | } vec_t;
14 | 
15 | vec_t new_vec(size_t length, int lazy) {
16 |   vec_t result;
17 |   result.data = (double *)composer_malloc(sizeof(double) * length, lazy);
18 |   result.length = length;
19 |   return result;
20 | }
21 | 
22 | vec_t new_vec_nolazy(size_t length) {
23 |   vec_t result;
24 |   result.data = (double *)malloc(sizeof(double) * length);
25 |   assert(result.data);
26 |   result.length = length;
27 |   return result;
28 | }
29 | 
30 | // Initialize a vector where the value is val.
31 | vec_t vvals(size_t length, double val, int lazy) {
32 |   vec_t result;
33 |   size_t size = sizeof(double) * length;
34 |   result.data = (double *)composer_malloc(size, 0);
35 |   result.length = length;
36 |   for (int i = 0; i < length; i++) {
37 |     result.data[i] = val;
38 |   }
39 | 
40 |   if (lazy) {
41 |     composer_tolazy(result.data);
42 |   }
43 | 
44 |   return result;
45 | }
46 | 
47 | #ifdef __cplusplus
48 | }
49 | #endif
50 | 


--------------------------------------------------------------------------------
/c/lib/composer_mkl/vec.h:
--------------------------------------------------------------------------------
 1 | #ifndef _VEC_H_
 2 | #define _VEC_H_
 3 | 
 4 | /** A small convinience library for vectors used with composer. */
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | typedef struct {
11 |   double *data;
12 |   size_t length;
13 | } vec_t;
14 | 
15 | 
16 | vec_t new_vec(size_t length, int lazy);
17 | vec_t new_vec_nolazy(size_t length);
18 | vec_t vvals(size_t length, double val, int lazy);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/python/benchmarks/birth_analysis/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | source ../benchmarks/bin/activate
 6 | 
 7 | # File to use. babynames.txt is for testing. babynames-xlarge.txt is for benchmark.
 8 | filename="../datasets/birth_analysis/_data/babynames-xlarge.txt"
 9 | runs=${1:-1}
10 | 
11 | tasks=( composer naive )
12 | threads=( 1 2 4 8 16 )
13 | 
14 | for task in "${tasks[@]}"; do 
15 |   rm -f $task.stdout $task.stderr
16 |   git log | head -1 > $task.stderr
17 |   git log | head -1 > $task.stdout
18 | done
19 | 
20 | for i in {1..$runs}; do
21 |   python birth_analysis.py -f $filename >> naive.stdout 2>> naive.stderr
22 |   for nthreads in "${threads[@]}"; do 
23 |     python birth_analysis_composer.py -f $filename -t $nthreads >> composer.stdout 2>> composer.stderr
24 |   done
25 | done
26 | 


--------------------------------------------------------------------------------
/python/benchmarks/birth_analysis/birth_analysis.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import argparse
 3 | import pandas as pd
 4 | import sys
 5 | import time
 6 | 
 7 | def get_top1000(group):
 8 |     return group.sort_values(by='births', ascending=False)[0:1000]
 9 | 
10 | def analyze(top1000):
11 |     start1 = time.time()
12 |     all_names = pd.Series(top1000.name.unique())
13 |     lesley_like = all_names[all_names.str.lower().str.contains('lesl')]
14 |     filtered = top1000[top1000.name.isin(lesley_like)]
15 |     table = filtered.pivot_table('births', index='year',
16 |                                  columns='sex', aggfunc='sum')
17 | 
18 |     table = table.div(table.sum(1), axis=0)
19 |     end1 = time.time()
20 |     print("Analysis:", end1 - start1)
21 |     return table
22 | 
23 | def run(filename):
24 |     years = range(1880, 2011)
25 |     pieces = []
26 |     columns = ['year', 'sex', 'name', 'births']
27 | 
28 |     sys.stdout.write("Reading data...")
29 |     sys.stdout.flush()
30 |     names = pd.read_csv(filename, names=columns)
31 |     print("done.")
32 |     sys.stdout.flush()
33 | 
34 |     print("Size of names:", len(names))
35 | 
36 |     e2e_start = time.time()
37 | 
38 |     # Time preprocessing step
39 |     start0 = time.time()
40 |     grouped = names.groupby(['year', 'sex'])
41 |     end0 = time.time()
42 |     print("GroupBy:", end0 - start0)
43 |     start0 = end0
44 | 
45 |     top1000 = grouped.apply(get_top1000)
46 |     top1000.reset_index(inplace=True, drop=True)
47 | 
48 |     end0 = time.time()
49 |     print("Apply:", end0-start0)
50 |     print("Elements in top1000:", len(top1000))
51 | 
52 |     result = analyze(top1000)
53 | 
54 |     e2e_end = time.time()
55 |     print("Total time:", e2e_end - e2e_start)
56 | 
57 |     print(top1000['births'].sum())
58 | 
59 | def main():
60 |     parser = argparse.ArgumentParser(
61 |         description="Birth Analysis."
62 |     )
63 |     parser.add_argument('-f', "--filename", type=str, default="babynames.txt", help="Input file")
64 |     args = parser.parse_args()
65 | 
66 |     filename = args.filename
67 | 
68 |     print("File:", filename)
69 |     mi = run(filename)
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     main()
74 | 


--------------------------------------------------------------------------------
/python/benchmarks/birth_analysis/birth_analysis_composer.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import sys
 3 | 
 4 | sys.path.append("../../lib/")
 5 | sys.path.append("../../pycomposer/")
 6 | 
 7 | import argparse
 8 | import composer_pandas as pd
 9 | import time
10 | 
11 | def analyze(top1000):
12 |     start1 = time.time()
13 |     all_names = pd.Series(top1000.name.unique())
14 |     lesley_like = all_names[all_names.str.lower().str.contains('lesl')]
15 |     filtered = top1000[top1000.name.isin(lesley_like)]
16 |     table = filtered.pivot_table('births', index='year',
17 |                                  columns='sex', aggfunc='sum')
18 | 
19 |     table = table.div(table.sum(1), axis=0)
20 |     end1 = time.time()
21 |     print("Analysis:", end1 - start1)
22 |     return table
23 | 
24 | def get_top1000(group):
25 |     return group.sort_values(by='births', ascending=False)[0:1000]
26 | 
27 | def run(filename, threads):
28 |     years = range(1880, 2011)
29 |     columns = ['year', 'sex', 'name', 'births']
30 | 
31 |     sys.stdout.write("Reading data...")
32 |     sys.stdout.flush()
33 |     names = pd.read_csv(filename, names=columns)
34 |     print("done")
35 | 
36 |     print("Size of names:", len(names))
37 | 
38 |     e2e_start = time.time()
39 | 
40 |     start0 = time.time()
41 |     grouped = pd.dfgroupby(names, ['year', 'sex'])
42 |     top1000 = pd.gbapply(grouped, get_top1000)
43 |     pd.evaluate(workers=threads)
44 |     top1000 = top1000.value
45 |     top1000.reset_index(inplace=True, drop=True)
46 |     print(len(top1000))
47 | 
48 |     """
49 |     grouped: Dag Operation
50 |     GBApply Takes a DAG operation and stores it in its type. The operation must be a GroupBy
51 |     GBApply has type ApplySplit. It's combiner:
52 |         1. Combines the results of the dataFrame
53 |         2. Resets index
54 |         3. Gets the keys from the DAG operation
55 |         4. Calls groupBy again
56 |         5. Calls apply again.
57 |     """
58 | 
59 |     localreduce_start = time.time()
60 |     top1000 = top1000.groupby(['year', 'sex']).apply(get_top1000)
61 |     localreduce_end = time.time()
62 |     print("Local reduction:", localreduce_end - localreduce_start)
63 |     top1000.reset_index(inplace=True, drop=True)
64 |     end0 = time.time()
65 | 
66 |     print("Apply:", end0-start0)
67 |     print("Elements in top1000:", len(top1000))
68 | 
69 |     result = analyze(top1000)
70 | 
71 |     e2e_end = time.time()
72 |     print("Total time:", e2e_end - e2e_start)
73 | 
74 |     print(top1000['births'].sum())
75 | 
76 | def main():
77 |     parser = argparse.ArgumentParser(
78 |         description="Birth Analysis with Composer."
79 |     )
80 |     parser.add_argument('-f', "--filename", type=str, default="babynames.txt", help="Input file")
81 |     parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.")
82 |     args = parser.parse_args()
83 | 
84 |     filename = args.filename
85 |     threads = args.threads
86 | 
87 |     print("File:", filename)
88 |     print("Threads:", threads)
89 |     mi = run(filename, threads)
90 | 
91 | 
92 | if __name__ == "__main__":
93 |     main()
94 | 


--------------------------------------------------------------------------------
/python/benchmarks/blackscholes/benchmark-batch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | source ../benchmarks/bin/activate
 6 | 
 7 | size=30
 8 | 
 9 | tasks=( composerbatch )
10 | batchsizes=( 512 2048 4096 8192 16384 32768 8388608 16777216 33554432 )
11 | 
12 | for task in "${tasks[@]}"; do 
13 |   rm -f $task.stdout $task.stderr
14 |   git log | head -1 > $task.stderr
15 |   git log | head -1 > $task.stdout
16 | done
17 | 
18 | for i in {1..5}; do
19 |   for batchsize in "${batchsizes[@]}"; do 
20 |      taskset -a -c 0-9,20-29 python blackscholes.py -m composer -s $size -t 16 -p $batchsize >> $task.stdout 2>> $task.stderr
21 |   done
22 | done
23 | 


--------------------------------------------------------------------------------
/python/benchmarks/blackscholes/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | source ../benchmarks/bin/activate
 6 | 
 7 | tasks=( naive numba composer )
 8 | size=30
 9 | runs=${1:-1}
10 | threads=( 1 2 4 8 16 )
11 | 
12 | for task in "${tasks[@]}"; do 
13 |   rm -f $task.stdout $task.stderr
14 |   git log | head -1 > $task.stderr
15 |   git log | head -1 > $task.stdout
16 | done
17 | 
18 | for i in {1..$runs}; do
19 |   for nthreads in "${threads[@]}"; do 
20 |     NUMBA_NUM_THREADS=$nthreads python blackscholes_numba.py -s $size >> numba.stdout 2>> numba.stderr
21 |   done
22 | done
23 | 
24 | for i in {1..$runs}; do
25 |   for nthreads in "${threads[@]}"; do 
26 |     python blackscholes.py -m composer -s $size -t $nthreads >> composer.stdout 2>> composer.stderr
27 |   done
28 | done
29 | 
30 | for i in {1..$runs}; do
31 |   for nthreads in "${threads[@]}"; do 
32 |     python blackscholes.py -m naive -s $size -t $nthreads >> naive.stdout 2>> naive.stderr
33 |   done
34 | done
35 | 


--------------------------------------------------------------------------------
/python/benchmarks/blackscholes/blackscholes.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import sys
  3 | sys.path.append("../../lib/")
  4 | sys.path.append("../../pycomposer/")
  5 | 
  6 | import argparse
  7 | import math
  8 | import scipy.special as ss
  9 | import time
 10 | 
 11 | def get_data(size, composer):
 12 |     if composer:
 13 |         import composer_numpy as np
 14 |     else:
 15 |         import numpy as np
 16 | 
 17 |     price = np.ones(size, dtype="float64") * 4.0
 18 |     strike = np.ones(size, dtype="float64") * 4.0
 19 |     t = np.ones(size, dtype="float64") * 4.0
 20 |     rate = np.ones(size, dtype="float64") * 4.0
 21 |     vol = np.ones(size, dtype="float64") * 4.0
 22 |     
 23 |     return price, strike, t, rate, vol
 24 | 
 25 | def bs(price, strike, t, rate, vol, composer, threads, piece_size):
 26 | 
 27 |     if composer:
 28 |         import composer_numpy as np
 29 |     else:
 30 |         import numpy as np
 31 | 
 32 |     c05 = 3.0
 33 |     c10 = 1.5
 34 |     invsqrt2 = 1.0 / math.sqrt(2.0)
 35 | 
 36 |     start = time.time()
 37 | 
 38 |     tmp = np.ones(len(price), dtype="float64")
 39 |     vol_sqrt = np.ones(len(price), dtype="float64")
 40 |     rsig = np.ones(len(price), dtype="float64")
 41 |     d1 = np.ones(len(price), dtype="float64")
 42 |     d2 = np.ones(len(price), dtype="float64")
 43 | 
 44 |     # Outputs
 45 |     call = np.ones(len(price), dtype="float64")
 46 |     put = np.ones(len(price), dtype="float64")
 47 |     end = time.time()
 48 |     print("Allocation:", end - start)
 49 | 
 50 |     start = time.time()
 51 | 
 52 |     np.multiply(vol, vol, out=rsig)
 53 |     np.multiply(rsig, c05, out=rsig)
 54 |     np.add(rsig, rate, out=rsig)
 55 | 
 56 |     np.sqrt(t, out=vol_sqrt)
 57 |     np.multiply(vol_sqrt, vol, out=vol_sqrt)
 58 | 
 59 |     np.multiply(rsig, t, out=tmp)
 60 |     np.divide(price, strike, out=d1)
 61 |     np.log2(d1, out=d1)
 62 |     np.add(d1, tmp, out=d1)
 63 | 
 64 |     np.divide(d1, vol_sqrt, out=d1)
 65 |     np.subtract(d1, vol_sqrt, out=d2)
 66 | 
 67 |     # d1 = c05 + c05 * erf(d1 * invsqrt2)
 68 |     np.multiply(d1, invsqrt2, out=d1)
 69 | 
 70 |     if composer:
 71 |         np.erf(d1, out=d1)
 72 |     else:
 73 |         ss.erf(d1, out=d1)
 74 | 
 75 |     np.multiply(d1, c05, out=d1)
 76 |     np.add(d1, c05, out=d1)
 77 | 
 78 |     # d2 = c05 + c05 * erf(d2 * invsqrt2)
 79 |     np.multiply(d2, invsqrt2, out=d2)
 80 | 
 81 |     if composer:
 82 |         np.erf(d2, out=d2)
 83 |     else:
 84 |         ss.erf(d2, out=d2)
 85 | 
 86 |     np.multiply(d2, c05, out=d2)
 87 |     np.add(d2, c05, out=d2)
 88 | 
 89 |     # Reuse existing buffers
 90 |     e_rt = vol_sqrt
 91 |     tmp2 = rsig
 92 | 
 93 |     # e_rt = exp(-rate * t)
 94 |     np.multiply(rate, -1.0, out=e_rt)
 95 |     np.multiply(e_rt, t, out=e_rt)
 96 |     np.exp(e_rt, out=e_rt)
 97 | 
 98 |     # call = price * d1 - e_rt * strike * d2
 99 |     #
100 |     # tmp = price * d1
101 |     # tmp2 = e_rt * strike * d2
102 |     # call = tmp - tmp2
103 |     np.multiply(price, d1, out=tmp)
104 |     np.multiply(e_rt, strike, out=tmp2)
105 |     np.multiply(tmp2, d2, out=tmp2)
106 |     np.subtract(tmp, tmp2, out=call)
107 | 
108 |     # put = e_rt * strike * (c10 - d2) - price * (c10 - d1)
109 |     # tmp = e_rt * strike
110 |     # tmp2 = (c10 - d2)
111 |     # put = tmp - tmp2
112 |     # tmp = c10 - d1
113 |     # tmp = price * tmp
114 |     # put = put - tmp
115 |     np.multiply(e_rt, strike, out=tmp)
116 |     np.subtract(c10, d2, out=tmp2)
117 |     np.multiply(tmp, tmp2, out=put)
118 |     np.subtract(c10, d1, out=tmp)
119 |     np.multiply(price, tmp, out=tmp)
120 |     np.subtract(put, tmp, out=put)
121 | 
122 |     end = time.time()
123 |     print("Build time:", end - start)
124 | 
125 |     if composer:
126 |         np.evaluate(workers=threads, batch_size=piece_size)
127 | 
128 |     end = time.time()
129 |     print("Runtime:", end - start)
130 | 
131 |     return call, put
132 | 
133 | def run():
134 |     parser = argparse.ArgumentParser(
135 |         description="Chained Adds pipelining test on a single thread."
136 |     )
137 |     parser.add_argument('-s', "--size", type=int, default=27, help="Size of each array")
138 |     parser.add_argument('-p', "--piece_size", type=int, default=16384, help="Size of each piece.")
139 |     parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.")
140 |     parser.add_argument('-v', "--verbosity", type=str, default="none", help="Log level (debug|info|warning|error|critical|none)")
141 |     parser.add_argument('-m', "--mode", type=str, required=False, help="Mode (composer|naive)")
142 |     args = parser.parse_args()
143 | 
144 |     size = (1 << args.size)
145 |     piece_size = args.piece_size
146 |     threads = args.threads
147 |     loglevel = args.verbosity
148 |     mode = args.mode.strip().lower()
149 | 
150 |     assert threads >= 1
151 | 
152 |     print("Size:", size)
153 |     print("Piece Size:", piece_size)
154 |     print("Threads:", threads)
155 |     print("Log Level", loglevel)
156 |     print("Mode:", mode)
157 | 
158 |     if mode == "composer":
159 |         composer = True
160 |     elif mode == "naive":
161 |         composer = False
162 |     else:
163 |         raise ValueError("invalid mode", mode)
164 | 
165 |     sys.stdout.write("Generating data...")
166 |     sys.stdout.flush()
167 |     a, b, c, d, e = get_data(size, composer)
168 |     print("done.")
169 | 
170 |     call, put = bs(a, b, c, d, e, composer, threads, piece_size)
171 |     print("Call:", call)
172 |     print("Put:", put)
173 | 
174 | if __name__ == "__main__":
175 |     run()
176 | 


--------------------------------------------------------------------------------
/python/benchmarks/blackscholes/blackscholes_numba.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import sys
 3 | 
 4 | import argparse
 5 | import scipy.special as ss
 6 | import time
 7 | 
 8 | import numpy as np
 9 | from numba import njit, jit
10 | from numba import vectorize, float64
11 | 
12 | def get_data(size):
13 |     price = np.ones(size, dtype="float64") * 4.0
14 |     strike = np.ones(size, dtype="float64") * 4.0
15 |     t = np.ones(size, dtype="float64") * 4.0
16 |     rate = np.ones(size, dtype="float64") * 4.0
17 |     vol = np.ones(size, dtype="float64") * 4.0
18 |     
19 |     return price, strike, t, rate, vol
20 | 
21 | @njit(parallel=True)
22 | def bs(price, strike, t, rate, vol):
23 |     """
24 |     This is the cookie-cutter implementation.
25 |     """
26 |     c05 = 3.0
27 |     c10 = 1.5
28 |     invsqrt2 = 1.0 / np.sqrt(2.0)
29 | 
30 |     c05 = np.float64(3.0)
31 |     c10 = np.float64(1.5)
32 | 
33 |     rsig = rate + (vol**2) * c05
34 |     vol_sqrt = vol * np.sqrt(t)
35 | 
36 |     d1 = (np.log(price / strike) + rsig * t) / vol_sqrt
37 |     d2 = d1 - vol_sqrt
38 | 
39 |     d1 = c05 + c05 * np.exp(d1 * invsqrt2)
40 |     d2 = c05 + c05 * np.exp(d2 * invsqrt2)
41 | 
42 |     e_rt = np.exp((-rate) * t)
43 | 
44 |     call = price * d1 - e_rt * strike * d2
45 |     put = e_rt * strike * (c10 - d2) - price * (c10 - d1)
46 |     return call, put
47 | 
48 | 
49 | def run():
50 |     parser = argparse.ArgumentParser(
51 |         description="Chained Adds pipelining test on a single thread."
52 |     )
53 |     parser.add_argument('-s', "--size", type=int, default=27, help="Size of each array")
54 |     parser.add_argument('-p', "--piece_size", type=int, default=16384, help="Size of each piece.")
55 |     parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.")
56 |     parser.add_argument('-v', "--verbosity", type=str, default="none", help="Log level (debug|info|warning|error|critical|none)")
57 |     args = parser.parse_args()
58 | 
59 |     size = (1 << args.size)
60 |     piece_size = args.piece_size
61 |     threads = args.threads
62 |     loglevel = args.verbosity
63 | 
64 |     assert threads >= 1
65 | 
66 |     print("Size:", size)
67 |     print("Piece Size:", piece_size)
68 |     print("Threads:", threads)
69 |     print("Log Level", loglevel)
70 | 
71 |     sys.stdout.write("Generating data...")
72 |     sys.stdout.flush()
73 |     a, b, c, d, e = get_data(size)
74 |     print("done")
75 | 
76 | 
77 |     start = time.time()
78 |     call, put = bs(a, b, c, d, e)
79 |     end = time.time()
80 |     print(end-start)
81 | 
82 | if __name__ == "__main__":
83 |     run()
84 | 
85 | 


--------------------------------------------------------------------------------
/python/benchmarks/crime_index/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | source ../benchmarks/bin/activate
 6 | 
 7 | size=29
 8 | tasks=( naive composer )
 9 | threads=( 1 2 4 8 16)
10 | runs=${1:-1}
11 | 
12 | for task in "${tasks[@]}"; do 
13 |   rm -f $task.stdout $task.stderr
14 |   git log | head -1 > $task.stderr
15 |   git log | head -1 > $task.stdout
16 | done
17 | 
18 | for task in "${tasks[@]}"; do 
19 |   for i in {1..$runs}; do
20 |     for nthreads in "${threads[@]}"; do 
21 |       python crime_index.py -m $task -t $nthreads -s $size >> $task.stdout 2>> $task.stderr
22 |     done
23 |   done
24 | done
25 | 


--------------------------------------------------------------------------------
/python/benchmarks/crime_index/crime_index.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import argparse
  4 | import sys
  5 | 
  6 | sys.path.append("../../lib")
  7 | sys.path.append("../../pycomposer")
  8 | 
  9 | import numpy as np
 10 | import time
 11 | 
 12 | import composer_pandas as pd
 13 | 
 14 | def gen_data(size):
 15 |     total_population = np.ones(size, dtype="float64") * 500000
 16 |     adult_population = np.ones(size, dtype="float64") * 250000
 17 |     num_robberies = np.ones(size, dtype="float64") * 1000
 18 |     return pd.Series(total_population), pd.Series(adult_population), pd.Series(num_robberies)
 19 | 
 20 | def crime_index_composer(total_population, adult_population, num_robberies, threads):
 21 |     # Get all city information with total population greater than 500,000
 22 |     big_cities = pd.greater_than(total_population, 500000.0)
 23 |     big_cities.dontsend = True
 24 |     big_cities = pd.mask(total_population, big_cities, 0.0)
 25 |     big_cities.dontsend = True
 26 | 
 27 |     double_pop = pd.multiply(adult_population, 2.0)
 28 |     double_pop.dontsend = True
 29 |     double_pop = pd.add(big_cities, double_pop)
 30 |     double_pop.dontsend = True
 31 |     multiplied = pd.multiply(num_robberies, 2000.0)
 32 |     multiplied.dontsend = True
 33 |     double_pop = pd.subtract(double_pop, multiplied)
 34 |     double_pop.dontsend = True
 35 |     crime_index = pd.divide(double_pop, 100000.0)
 36 |     crime_index.dontsend = True
 37 | 
 38 | 
 39 |     gt = pd.greater_than(crime_index, 0.02)
 40 |     gt.dontsend = True
 41 |     crime_index = pd.mask(crime_index, gt, 0.032)
 42 |     crime_index.dontsend = True
 43 |     lt = pd.less_than(crime_index, 0.01)
 44 |     crime_index = pd.mask(crime_index, lt, 0.005)
 45 |     crime_index.dontsend = True
 46 | 
 47 |     result = pd.pandasum(crime_index)
 48 |     pd.evaluate(workers=threads)
 49 |     return result.value
 50 | 
 51 | def crime_index_pandas(total_population, adult_population, num_robberies):
 52 |     print(len(total_population))
 53 |     big_cities = total_population > 500000
 54 |     big_cities = total_population.mask(big_cities, 0.0)
 55 |     double_pop = adult_population * 2 + big_cities - (num_robberies * 2000.0)
 56 |     crime_index = double_pop / 100000
 57 |     crime_index = crime_index.mask(crime_index > 0.02, 0.032)
 58 |     crime_index = crime_index.mask(crime_index < 0.01, 0.005)
 59 |     return crime_index.sum()
 60 | 
 61 | def run():
 62 |     parser = argparse.ArgumentParser(description="Crime Index")
 63 |     parser.add_argument('-s', "--size", type=int, default=26, help="Size of each array")
 64 |     parser.add_argument('-p', "--piece_size", type=int, default=16384*2, help="Size of each piece.")
 65 |     parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.")
 66 |     parser.add_argument('-m', "--mode", type=str, required=True, help="Mode (composer|naive)")
 67 |     args = parser.parse_args()
 68 | 
 69 |     size = (1 << args.size)
 70 |     piece_size = args.piece_size
 71 |     threads = args.threads
 72 |     mode = args.mode.strip().lower()
 73 | 
 74 |     assert mode == "composer" or mode == "naive"
 75 |     assert threads >= 1
 76 | 
 77 |     print("Size:", size)
 78 |     print("Piece Size:", piece_size)
 79 |     print("Threads:", threads)
 80 |     print("Mode:", mode)
 81 | 
 82 |     sys.stdout.write("Generating data...")
 83 |     sys.stdout.flush()
 84 |     inputs = gen_data(size)
 85 |     print("done.")
 86 | 
 87 |     start = time.time()
 88 |     if mode == "composer":
 89 |         result = crime_index_composer(inputs[0], inputs[1], inputs[2], threads)
 90 |     elif mode == "naive":
 91 |         result = crime_index_pandas(*inputs)
 92 |     end = time.time()
 93 | 
 94 |     print(end - start, "seconds")
 95 |     print(result)
 96 | 
 97 | if __name__ == "__main__":
 98 |     run()
 99 | 
100 | 


--------------------------------------------------------------------------------
/python/benchmarks/data_cleaning/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | source ../benchmarks/bin/activate
 6 | 
 7 | size=29
 8 | tasks=( naive composer )
 9 | threads=( 1 2 4 8 16 )
10 | runs=${1:-1}
11 | 
12 | for task in "${tasks[@]}"; do 
13 |   rm -f $task.stdout $task.stderr
14 |   git log | head -1 > $task.stderr
15 |   git log | head -1 > $task.stdout
16 | done
17 | 
18 | for task in "${tasks[@]}"; do 
19 |   for i in {1..$runs}; do
20 |     for nthreads in "${threads[@]}"; do 
21 |       python data_cleaning.py -m $task -t $nthreads -s $size >> $task.stdout 2>> $task.stderr
22 |     done
23 |   done
24 | done
25 | 


--------------------------------------------------------------------------------
/python/benchmarks/data_cleaning/data_cleaning.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | # The usual preamble
 4 | import numpy as np
 5 | import time
 6 | import argparse
 7 | 
 8 | import sys
 9 | 
10 | sys.path.append("../../lib")
11 | sys.path.append("../../pycomposer/")
12 | 
13 | import composer_pandas as pd
14 | 
15 | def gen_data(size):
16 |     values = ["1234567" for  _ in range(size)]
17 |     return pd.Series(data=values)
18 | 
19 | def datacleaning_pandas(requests):
20 |     requests = requests.str.slice(0, 5)
21 |     zero_zips = requests == "00000"
22 |     requests = requests.mask(zero_zips, np.nan)
23 |     requests = requests.unique()
24 |     return requests
25 | 
26 | def datacleaning_composer(requests, threads):
27 |     # Fix requests with extra digits
28 |     requests = pd.series_str_slice(requests, 0, 5)
29 |     requests.dontsend = True
30 | 
31 |     # Fix requests with 00000 zipcodes
32 |     zero_zips = pd.equal(requests, "00000")
33 |     zero_zips.dontsend = True
34 |     requests = pd.mask(requests, zero_zips, np.nan)
35 |     requests.dontsend = True
36 |     requests = pd.unique(requests)
37 |     pd.evaluate(workers=threads)
38 |     requests = requests.value
39 |     return requests
40 | 
41 | def run():
42 |     parser = argparse.ArgumentParser(
43 |         description="Data Cleaning"
44 |     )
45 |     parser.add_argument('-s', "--size", type=int, default=26, help="Size of each array")
46 |     parser.add_argument('-p', "--piece_size", type=int, default=16384*2, help="Size of each piece.")
47 |     parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.")
48 |     parser.add_argument('-v', "--verbosity", type=str, default="none", help="Log level (debug|info|warning|error|critical|none)")
49 |     parser.add_argument('-m', "--mode", type=str, required=True, help="Mode (composer|naive)")
50 |     args = parser.parse_args()
51 | 
52 |     size = (1 << args.size)
53 |     piece_size = args.piece_size
54 |     threads = args.threads
55 |     loglevel = args.verbosity
56 |     mode = args.mode.strip().lower()
57 | 
58 |     assert mode == "composer" or mode == "naive"
59 |     assert threads >= 1
60 | 
61 |     print("Size:", size)
62 |     print("Piece Size:", piece_size)
63 |     print("Threads:", threads)
64 |     print("Log Level", loglevel)
65 |     print("Mode:", mode)
66 | 
67 |     sys.stdout.write("Generating data...")
68 |     sys.stdout.flush()
69 |     inputs = gen_data(size)
70 |     print("done.")
71 | 
72 |     start = time.time()
73 |     if mode == "composer":
74 |         result = datacleaning_composer(inputs, threads)
75 |     elif mode == "naive":
76 |         result = datacleaning_pandas(inputs)
77 |     end = time.time()
78 |     print(end - start, "seconds")
79 |     print(result)
80 | 
81 | if __name__ == "__main__":
82 |     run()
83 | 
84 | 


--------------------------------------------------------------------------------
/python/benchmarks/datasets/birth_analysis/babynames.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/weld-project/split-annotations/d835cc54476336e7f4355d87e820595aeddcc442/python/benchmarks/datasets/birth_analysis/babynames.txt.gz


--------------------------------------------------------------------------------
/python/benchmarks/datasets/birth_analysis/replicate-csv:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import argparse
 4 | import csv
 5 | import random
 6 | 
 7 | def transformCSV_birth_analysis(inputFile, outputFile, replicationFactor):
 8 |     years = [str(i) for i in range(0, 1000 * replicationFactor)]
 9 |     gender = [str(i) for i in range(0, 4)]
10 |     with open(inputFile, 'r') as f1, open(outputFile, 'w') as f2:
11 |         lineId = 0
12 |         numCols = 0
13 |         count = 0
14 |         for line in csv.reader(f1, delimiter=','):
15 |             if lineId == 0:
16 |                 numCols = len(line)
17 |             if len(line) != numCols:
18 |                 continue
19 |             line_ele_list = [str(line_ele) for line_ele in line]
20 | 
21 |             for factor in range(0, replicationFactor):
22 |                 count += 1
23 |                 new_year = years[count % len(years)]
24 |                 # This is a hack. We need to get to the bottom of pandas ordering on sorts
25 |                 # which is causing grizzly to include "Leslyn" when pandas does not
26 |                 # for birth analysis (sort on groupmerger).
27 |                 num_diff = str(int(line_ele_list[2]) + count)
28 |                 new_gender = gender[count % len(gender)]
29 |                 new_line_list = [new_year, line_ele_list[0], line_ele_list[1], num_diff]
30 |                 f2.write(','.join(new_line_list))
31 |                 f2.write('\n')
32 |         lineId += 1
33 | 
34 | def transformCSV(inputFile, outputFile, replicationFactor):
35 |     with open(inputFile, 'r') as f1, open(outputFile, 'w') as f2:
36 |         lineId = 0
37 |         numCols = 0
38 |         for line in csv.reader(f1, delimiter=','):
39 |             if lineId == 0:
40 |                 numCols = len(line)
41 |                 f2.write(','.join([str(line_ele) for line_ele in line]))
42 |                 f2.write('\n')
43 |             else:
44 |                 if len(line) != numCols:
45 |                     continue
46 |                 for i in xrange(replicationFactor):
47 |                     f2.write(','.join([str(line_ele) for line_ele in line]))
48 |                     f2.write('\n')
49 |             lineId += 1
50 | 
51 | if __name__ == '__main__':
52 |     parser = argparse.ArgumentParser(
53 |         description=("Produce plot of data dumped in provided data file")
54 |     )
55 |     parser.add_argument('-i', "--inputFile", required=True,
56 |                         help="Input CSV file")
57 |     parser.add_argument('-o', "--outputFile", required=True,
58 |                         help="Output CSV file")
59 |     parser.add_argument('-r', "--replicationFactor", default=1, type=int,
60 |                         help="Number of times to replicate input row in output file")
61 | 
62 |     cmdLineArgs = parser.parse_args()
63 |     optDict = vars(cmdLineArgs)
64 |     if optDict["inputFile"].split("/")[-1].startswith("yob"):
65 |         transformCSV_birth_analysis(optDict["inputFile"], optDict["outputFile"],
66 |                                     optDict["replicationFactor"])
67 |     else:
68 |         transformCSV(optDict["inputFile"], optDict["outputFile"],
69 |                      optDict["replicationFactor"])
70 | 


--------------------------------------------------------------------------------
/python/benchmarks/datasets/movielens/replicate-csv:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import argparse
 4 | import csv
 5 | import random
 6 | 
 7 | def transformCSV(inputFile, outputFile, replicationFactor):
 8 |     with open(inputFile, 'r') as f1, open(outputFile, 'w') as f2:
 9 |         lineId = 0
10 |         numCols = 0
11 |         for line in f1:
12 |             line = line.split("::")
13 |             if lineId == 0:
14 |                 numCols = len(line)
15 |                 f2.write('::'.join([str(line_ele) for line_ele in line]))
16 |             else:
17 |                 if len(line) != numCols:
18 |                     continue
19 |                 for i in xrange(replicationFactor):
20 |                     f2.write('::'.join([str(line_ele) for line_ele in line]))
21 |             lineId += 1
22 | 
23 | if __name__ == '__main__':
24 |     parser = argparse.ArgumentParser(
25 |         description=("Produce plot of data dumped in provided data file")
26 |     )
27 |     parser.add_argument('-i', "--inputFile", required=True,
28 |                         help="Input CSV file")
29 |     parser.add_argument('-o', "--outputFile", required=True,
30 |                         help="Output CSV file")
31 |     parser.add_argument('-r', "--replicationFactor", default=1, type=int,
32 |                         help="Number of times to replicate input row in output file")
33 | 
34 |     cmdLineArgs = parser.parse_args()
35 |     optDict = vars(cmdLineArgs)
36 |     transformCSV(optDict["inputFile"], optDict["outputFile"], optDict["replicationFactor"])
37 | 


--------------------------------------------------------------------------------
/python/benchmarks/get-data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | cd datasets
 6 | TOP=`pwd`
 7 | 
 8 | # MovieLens dataset for the MovieLens workload.
 9 | cd movielens
10 | wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
11 | unzip ml-1m.zip
12 | rm -rf _data
13 | mkdir -p _data
14 | mv ml-1m _data
15 | rm ml-1m.zip
16 | ./replicate-csv -i _data/ml-1m/movies.dat -o  _data/ml-1m/movies-large.dat -r 7
17 | ./replicate-csv -i _data/ml-1m/ratings.dat -o _data/ml-1m/ratings-large.dat -r 7
18 | ./replicate-csv -i _data/ml-1m/users.dat -o   _data/ml-1m/users-large.dat -r 7
19 | 
20 | # Birth Analysis
21 | cd $TOP
22 | cd birth_analysis
23 | gunzip -k babynames.txt.gz
24 | rm -rf _data
25 | mkdir -p _data
26 | mv babynames.txt _data
27 | ./replicate-csv -i _data/babynames.txt -o  _data/babynames-xlarge.txt -r 80
28 | 


--------------------------------------------------------------------------------
/python/benchmarks/haversine/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | source ../benchmarks/bin/activate
 6 | 
 7 | size=30
 8 | runs=${1:-1}
 9 | tasks=( numba naive composer )
10 | threads=( 1 2 4 8 16 )
11 | 
12 | for task in "${tasks[@]}"; do 
13 |   rm -f $task.stdout $task.stderr
14 |   git log | head -1 > $task.stderr
15 |   git log | head -1 > $task.stdout
16 | done
17 | 
18 | for i in {1..$runs}; do
19 |   for nthreads in "${threads[@]}"; do 
20 |    NUMBA_NUM_THREADS=$nthreads python haversine_numba.py -s $size >> numba.stdout 2>> numba.stderr
21 |   done
22 | done
23 | 
24 | for i in {1..$runs}; do
25 |   for nthreads in "${threads[@]}"; do 
26 |    python haversine.py -m composer -s $size -t $nthreads >> composer.stdout 2>> composer.stderr
27 |   done
28 | done
29 | 
30 | for i in {1..$runs}; do
31 |   for nthreads in "${threads[@]}"; do 
32 |    python haversine.py -m naive -s $size >> naive.stdout 2>> naive.stderr
33 |   done
34 | done
35 | 


--------------------------------------------------------------------------------
/python/benchmarks/haversine/haversine.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import sys
  3 | sys.path.append("../../lib/")
  4 | sys.path.append("../../pycomposer/")
  5 | 
  6 | import argparse
  7 | import math
  8 | import time
  9 | 
 10 | def get_data(size, composer):
 11 |     if composer:
 12 |         import composer_numpy as np
 13 |     else:
 14 |         import numpy as np
 15 | 
 16 |     lats = np.ones(size, dtype="float64") * 0.0698132
 17 |     lons = np.ones(size, dtype="float64") * 0.0698132
 18 |     return lats, lons
 19 | 
 20 | def haversine(lat2, lon2, composer, threads):
 21 |     if composer:
 22 |         import composer_numpy as np
 23 |     else:
 24 |         import numpy as np
 25 | 
 26 |     lat1 = 0.70984286
 27 |     lon1 = 1.23892197
 28 |     MILES_CONST = 3959.0
 29 | 
 30 |     start = time.time()
 31 |     a = np.zeros(len(lat2), dtype="float64")
 32 |     dlat = np.zeros(len(lat2), dtype="float64")
 33 |     dlon = np.zeros(len(lat2), dtype="float64")
 34 |     end = time.time()
 35 |     print("Allocation time:", end-start)
 36 | 
 37 |     start = time.time()
 38 |     np.subtract(lat2, lat1, out=dlat)
 39 |     np.subtract(lon2, lon1, out=dlon)
 40 | 
 41 |     # dlat = sin(dlat / 2.0) ** 2.0
 42 |     np.divide(dlat, 2.0, out=dlat)
 43 |     np.sin(dlat, out=dlat)
 44 |     np.multiply(dlat, dlat, out=dlat)
 45 | 
 46 |     # a = cos(lat1) * cos(lat2)
 47 |     lat1_cos = math.cos(lat1)
 48 |     np.cos(lat2, out=a)
 49 |     np.multiply(a, lat1_cos, out=a)
 50 | 
 51 |     # a = a + sin(dlon / 2.0) ** 2.0
 52 |     np.divide(dlon, 2.0, out=dlon)
 53 |     np.sin(dlon, out=dlon)
 54 |     np.multiply(dlon, dlon, out=dlon)
 55 |     np.multiply(a, dlon, out=a)
 56 |     np.add(dlat, a, out=a)
 57 | 
 58 |     c = a
 59 |     np.sqrt(a, out=a)
 60 |     np.arcsin(a, out=a)
 61 |     np.multiply(a, 2.0, out=c)
 62 | 
 63 |     mi = c
 64 |     np.multiply(c, MILES_CONST, out=mi)
 65 | 
 66 |     if composer:
 67 |         np.evaluate(workers=threads)
 68 | 
 69 |     end = time.time()
 70 |     print("Runtime:", end-start)
 71 | 
 72 |     return mi
 73 | 
 74 | def run():
 75 |     parser = argparse.ArgumentParser(
 76 |         description="Haversine distance computation."
 77 |     )
 78 |     parser.add_argument('-s', "--size", type=int, default=26, help="Size of each array")
 79 |     parser.add_argument('-p', "--piece_size", type=int, default=16384, help="Size of each piece.")
 80 |     parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.")
 81 |     parser.add_argument('-v', "--verbosity", type=str, default="none", help="Log level (debug|info|warning|error|critical|none)")
 82 |     parser.add_argument('-m', "--mode", type=str, required=True, help="Mode (composer|naive)")
 83 |     args = parser.parse_args()
 84 | 
 85 |     size = (1 << args.size)
 86 |     piece_size = args.piece_size
 87 |     threads = args.threads
 88 |     loglevel = args.verbosity
 89 |     mode = args.mode.strip().lower()
 90 | 
 91 |     print("Size:", size)
 92 |     print("Piece Size:", piece_size)
 93 |     print("Threads:", threads)
 94 |     print("Log Level", loglevel)
 95 |     print("Mode:", mode)
 96 | 
 97 |     if mode == "composer":
 98 |         composer = True
 99 |     elif mode == "naive":
100 |         composer = False
101 |     else:
102 |         raise ValueError("unknown mode", mode)
103 | 
104 |     sys.stdout.write("Generating data...")
105 |     sys.stdout.flush()
106 |     lats, lons = get_data(size, composer)
107 |     print("done.")
108 | 
109 | 
110 |     mi = haversine(lats, lons, composer, threads)
111 |     print(mi)
112 | 
113 | if __name__ == "__main__":
114 |     run()
115 | 
116 | 


--------------------------------------------------------------------------------
/python/benchmarks/haversine/haversine_numba.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import argparse
 3 | import math
 4 | import sys
 5 | import time
 6 | 
 7 | import numpy as np
 8 | from numba import njit
 9 | 
10 | def get_data(size):
11 |     lats = np.ones(size, dtype="float64") * 0.0698132
12 |     lons = np.ones(size, dtype="float64") * 0.0698132
13 |     return lats, lons
14 | 
15 | 
16 | @njit(parallel=True)
17 | def haversine(lat2, lon2):
18 |     lat1 = 0.70984286
19 |     lon1 = 1.23892197
20 |     miles_constant = 3959.0
21 |     dlat = lat2 - lat1 
22 |     dlon = lon2 - lon1 
23 |     a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
24 |     c = 2.0 * np.arcsin(np.sqrt(a)) 
25 |     mi = miles_constant * c
26 |     return mi
27 | 
28 | def run():
29 |     parser = argparse.ArgumentParser(
30 |         description="Haversine distance computation."
31 |     )
32 |     parser.add_argument('-s', "--size", type=int, default=26, help="Size of each array")
33 |     parser.add_argument('-p', "--piece_size", type=int, default=16384, help="Size of each piece.")
34 |     parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.")
35 |     parser.add_argument('-v', "--verbosity", type=str, default="none", help="Log level (debug|info|warning|error|critical|none)")
36 |     args = parser.parse_args()
37 | 
38 |     size = (1 << args.size)
39 |     piece_size = args.piece_size
40 |     threads = args.threads
41 |     loglevel = args.verbosity
42 | 
43 |     print("Size:", size)
44 |     print("Piece Size:", piece_size)
45 |     print("Threads:", threads)
46 |     print("Log Level", loglevel)
47 | 
48 |     sys.stdout.write("Generating data...")
49 |     sys.stdout.flush()
50 |     lats, lons = get_data(size)
51 |     print("done.")
52 | 
53 | 
54 |     start = time.time()
55 |     mi = haversine(lats, lons)
56 |     end = time.time()
57 |     print("Runtime:", end - start)
58 |     print(mi)
59 | 
60 | if __name__ == "__main__":
61 |     run()
62 | 
63 | 


--------------------------------------------------------------------------------
/python/benchmarks/movielens/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | source ../benchmarks/bin/activate
 6 | 
 7 | runs=${1:-1}
 8 | tasks=( naive composer )
 9 | threads=( 1 2 4 8 16 )
10 | 
11 | for task in "${tasks[@]}"; do 
12 |   rm -f $task.stdout $task.stderr
13 |   git log | head -1 > $task.stderr
14 |   git log | head -1 > $task.stdout
15 | done
16 | 
17 | for i in {1..$runs}; do
18 |   for nthreads in "${threads[@]}"; do 
19 |     python movielens_composer.py -t $nthreads >> composer.stdout 2>> composer.stderr
20 |   done
21 | done
22 | 
23 | for i in {1..$runs}; do
24 |   python movielens.py >> naive.stdout 2>> naive.stderr
25 | done
26 | 


--------------------------------------------------------------------------------
/python/benchmarks/movielens/movielens.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import sys
 3 | import time
 4 | 
 5 | import warnings
 6 | warnings.filterwarnings('ignore')
 7 | 
 8 | # Make display smaller
 9 | pd.options.display.max_rows = 10
10 | 
11 | sys.stdout.write("Reading data...")
12 | sys.stdout.flush()
13 | unames = ['user_id', 'gender', 'age', 'occupation', 'zip']
14 | users = pd.read_table('../datasets/movielens/_data/ml-1m/users-large.dat', sep='::', header=None,
15 |                       names=unames)
16 | 
17 | rnames = ['user_id', 'movie_id', 'rating', 'timestamp']
18 | ratings = pd.read_table('../datasets/movielens/_data/ml-1m/ratings-large.dat', sep='::', header=None,
19 |                         names=rnames)
20 | 
21 | mnames = ['movie_id', 'title', 'genres']
22 | movies = pd.read_table('../datasets/movielens/_data/ml-1m/movies-large.dat', sep='::', header=None,
23 |                        names=mnames)
24 | print("Done")
25 | 
26 | e2e_start = time.time()
27 | 
28 | start = time.time()
29 | data = pd.merge(ratings, users)
30 | end = time.time()
31 | print("Merge 1:", end - start)
32 | start = end
33 | data = pd.merge(data, movies)
34 | end = time.time()
35 | print("Merge 2:", end - start)
36 | start = end
37 | print(len(data))
38 | data = data[data['age'] > 45]
39 | print(len(data))
40 | end = time.time()
41 | print("Filter:", end - start)
42 | start = end
43 | 
44 | mean_ratings = data.pivot_table('rating', index='title', columns='gender',
45 |                                 aggfunc='mean')
46 | end = time.time()
47 | print("Pivot:", end - start)
48 | start = end
49 | 
50 | ratings_by_title = data.groupby('title').size()
51 | end = time.time()
52 | print("GroupBy size:", end - start)
53 | start = end
54 | 
55 | active_titles = ratings_by_title.index[ratings_by_title >= 250]
56 | mean_ratings = mean_ratings.loc[active_titles]
57 | mean_ratings['diff'] = mean_ratings['M'] - mean_ratings['F']
58 | sorted_by_diff = mean_ratings.sort_values(by='diff')
59 | end = time.time()
60 | print("Diff:", end - start)
61 | start = end
62 | 
63 | rating_std_by_title = data.groupby('title')['rating'].std()
64 | end = time.time()
65 | print("GroupBy std:", end - start)
66 | start = end
67 | 
68 | rating_std_by_title = rating_std_by_title.loc[active_titles]
69 | rating_std_by_title = rating_std_by_title.sort_values(ascending=False)[:10]
70 | end = time.time()
71 | print("Sort:", end - start)
72 | start = end
73 | 
74 | e2e_end = time.time()
75 | 
76 | print(sorted_by_diff.head())
77 | print(rating_std_by_title.head())
78 | 
79 | print("Total:", e2e_end - e2e_start)
80 | 


--------------------------------------------------------------------------------
/python/benchmarks/movielens/movielens_composer.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import argparse
  3 | import sys
  4 | 
  5 | sys.path.append("../../lib/")
  6 | sys.path.append("../../pycomposer/")
  7 | 
  8 | import composer_pandas as pd
  9 | import time
 10 | 
 11 | import warnings
 12 | warnings.filterwarnings('ignore')
 13 | 
 14 | def run(threads):
 15 |     # Make display smaller
 16 |     pd.options.display.max_rows = 10
 17 | 
 18 |     sys.stdout.write("Reading data...")
 19 |     sys.stdout.flush()
 20 |     unames = ['user_id', 'gender', 'age', 'occupation', 'zip']
 21 |     users = pd.read_table('../datasets/movielens/_data/ml-1m/users-large.dat', sep='::', header=None,
 22 |                           names=unames)
 23 | 
 24 |     rnames = ['user_id', 'movie_id', 'rating', 'timestamp']
 25 |     ratings = pd.read_table('../datasets/movielens/_data/ml-1m/ratings-large.dat', sep='::', header=None,
 26 |                             names=rnames)
 27 | 
 28 |     mnames = ['movie_id', 'title', 'genres']
 29 |     movies = pd.read_table('../datasets/movielens/_data/ml-1m/movies-large.dat', sep='::', header=None,
 30 |                            names=mnames)
 31 |     print("Done")
 32 | 
 33 |     e2e_start = time.time()
 34 | 
 35 |     start = time.time()
 36 |     tmp = pd.merge(ratings, users)
 37 |     tmp.dontsend = True
 38 |     data = pd.merge(tmp, movies)
 39 |     data.dontsend = True
 40 |     data = pd.filter(data, 'age', 45)
 41 |     pd.evaluate(workers=threads)
 42 |     data = data.value
 43 |     end = time.time()
 44 |     print("Merge 2:", end - start)
 45 |     start = end
 46 | 
 47 |     mean_ratings = data.pivot_table('rating', index='title', columns='gender',
 48 |                                     aggfunc='mean')
 49 |     end = time.time()
 50 |     print("Pivot:", end - start)
 51 |     start = end
 52 | 
 53 |     """
 54 |     ratings_by_title = pd.dfgroupby(data, 'title')
 55 |     ratings_by_title = pd.gbsize(ratings_by_title)
 56 |     pd.evaluate(workers=threads)
 57 |     ratings_by_title = ratings_by_title.value
 58 |     """
 59 |     ratings_by_title = data.groupby('title').size()
 60 |     end = time.time()
 61 | 
 62 |     print("GroupBy size:", end - start)
 63 |     start = end
 64 | 
 65 |     active_titles = ratings_by_title.index[ratings_by_title >= 250]
 66 |     mean_ratings = mean_ratings.loc[active_titles]
 67 |     mean_ratings['diff'] = mean_ratings['M'] - mean_ratings['F']
 68 |     sorted_by_diff = mean_ratings.sort_values(by='diff')
 69 |     end = time.time()
 70 |     print("Diff:", end - start)
 71 |     start = end
 72 | 
 73 |     rating_std_by_title = data.groupby('title')['rating'].std()
 74 |     end = time.time()
 75 |     print("GroupBy std:", end - start)
 76 |     start = end
 77 | 
 78 |     rating_std_by_title = rating_std_by_title.loc[active_titles]
 79 |     rating_std_by_title = rating_std_by_title.sort_values(ascending=False)[:10]
 80 |     end = time.time()
 81 |     print("Sort:", end - start)
 82 |     start = end
 83 | 
 84 |     e2e_end = time.time()
 85 | 
 86 |     print(sorted_by_diff.head())
 87 |     print(rating_std_by_title.head())
 88 | 
 89 |     print("Total:", e2e_end - e2e_start)
 90 | 
 91 | def main():
 92 |     parser = argparse.ArgumentParser(
 93 |         description="MovieLens with Composer."
 94 |     )
 95 |     parser.add_argument('-t', "--threads", type=int, default=16, help="Number of threads.")
 96 |     args = parser.parse_args()
 97 | 
 98 |     threads = args.threads
 99 | 
100 |     print("Threads:", threads)
101 |     mi = run(threads)
102 | 
103 | 
104 | if __name__ == "__main__":
105 |     main()
106 | 


--------------------------------------------------------------------------------
/python/benchmarks/nbody/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | source ../benchmarks/bin/activate
 6 | 
 7 | runs=${1:-1}
 8 | size=15
 9 | iterations=5
10 | tasks=( numba naive composer bohrium )
11 | threads=( 1 2 4 8 16 )
12 | 
13 | for task in "${tasks[@]}"; do 
14 |   rm -f $task.stdout $task.stderr
15 |   git log | head -1 > $task.stderr
16 |   git log | head -1 > $task.stdout
17 | done
18 | 
19 | for i in {1..$runs}; do
20 |   for nthreads in "${threads[@]}"; do 
21 |     NUMBA_NUM_THREADS=$nthreads python nbody_numba.py -s $size -i $iterations >> numba.stdout 2>> numba.stderr
22 |   done
23 | done
24 | 
25 | for i in {1..$runs}; do
26 |   for nthreads in "${threads[@]}"; do 
27 |     python nbody.py -m composer -s $size -i $iterations -t $nthreads >> composer.stdout 2>> composer.stderr
28 |   done
29 | done
30 | 
31 | for i in {1..$runs}; do
32 |   python nbody.py -m naive -s $size -i $iterations -t $nthreads >> naive.stdout 2>> naive.stderr
33 | done
34 | 
35 | for i in {1..$runs}; do
36 |   for nthreads in "${threads[@]}"; do 
37 |     OMP_NUM_THREADS=$nthreads python nbody_boh.py -s $size -i $iterations -t $nthreads >> bohrium.stdout 2>> bohrium.stderr
38 |   done
39 | done
40 | 


--------------------------------------------------------------------------------
/python/benchmarks/nbody/nbody_boh.py:
--------------------------------------------------------------------------------
  1 | """
  2 | NBody in N^2 complexity
  3 | 
  4 | Note that we are using only Newtonian forces and do not consider relativity
  5 | Neither do we consider collisions between stars
  6 | Thus some of our stars will accelerate to speeds beyond c
  7 | This is done to keep the simulation simple enough for teaching purposes
  8 | 
  9 | All the work is done in the calc_force, move and random_galaxy functions.
 10 | To vectorize the code these are the functions to transform.
 11 | 
 12 | https://benchpress.readthedocs.io/autodoc_benchmarks/nbody_nice.html
 13 | """
 14 | 
 15 | import argparse
 16 | import sys
 17 | import time
 18 | 
 19 | import bohrium as np
 20 | import numpy
 21 | 
 22 | def fill_diagonal(a, val):
 23 |     """ Set diagonal of 2D matrix a to val in-place. """
 24 |     d, _ = a.shape
 25 |     a.shape = d * d
 26 |     a[::d + 1] = val
 27 |     a.shape = (d, d)
 28 | 
 29 | def random_galaxy(N):
 30 |     """ Generate a galaxy of random bodies """
 31 |     m =  np.array((numpy.arange(0.0, 1.0, step=1.0 / N) + np.float64(10)) * np.float64(m_sol/10))
 32 |     x =  np.array((numpy.arange(0.0, 1.0, step=1.0 / N) - np.float64(0.5)) * np.float64(r_ly/100))
 33 |     y =  np.array((numpy.arange(0.0, 1.0, step=1.0 / N) - np.float64(0.5)) * np.float64(r_ly/100))
 34 |     z =  np.array((numpy.arange(0.0, 1.0, step=1.0 / N) - np.float64(0.5)) * np.float64(r_ly/100))
 35 |     vx = np.zeros(N, dtype=np.float64)
 36 |     vy = np.zeros(N, dtype=np.float64)
 37 |     vz = np.zeros(N, dtype=np.float64)
 38 | 
 39 |     assert len(m) == N
 40 |     return m, x, y, z, vx, vy, vz
 41 | 
 42 | def move(m, x, y, z, vx, vy, vz, dt, temporaries):
 43 |     """ Move the bodies.
 44 | 
 45 |     first find forces and change velocity and then move positions.
 46 |     """
 47 | 
 48 |     start = time.time()
 49 | 
 50 |     dx = x - x[:,None]
 51 |     dy = numpy.subtract(y, y[:,None])
 52 |     dz = numpy.subtract(z, z[:,None])
 53 |     pm = numpy.multiply(m, m[:,None])
 54 | 
 55 |     end = time.time()
 56 |     print("Step 0:", end - start)
 57 | 
 58 |     start = end
 59 | 
 60 |     r = np.sqrt(dx ** 2 + dy ** 2 + dz ** 2)
 61 |     tmp = G * pm / r ** 2
 62 |     Fx = tmp * (dx / r)
 63 |     Fy = tmp * (dy / r)
 64 |     Fz = tmp * (dz / r)
 65 | 
 66 |     end = time.time()
 67 |     print("Step 1:", end - start)
 68 |     start = end
 69 | 
 70 |     fill_diagonal(Fx, 0.0)
 71 |     fill_diagonal(Fy, 0.0)
 72 |     fill_diagonal(Fz, 0.0)
 73 |     end = time.time()
 74 |     print("Step 2:", end - start)
 75 |     start = end
 76 | 
 77 |     mdt = m / dt
 78 | 
 79 |     # Update state.
 80 |     vx += np.add.reduce(Fx, axis=1) / mdt
 81 |     vy += np.add.reduce(Fy, axis=1) / mdt
 82 |     vz += np.add.reduce(Fz, axis=1) / mdt
 83 |     x += vx * dt
 84 |     y += vy * dt
 85 |     z += vz * dt
 86 | 
 87 |     end = time.time()
 88 |     print("Step 3:", end - start)
 89 |     start = end
 90 |     return Fx, Fy, Fz
 91 | 
 92 | def simulate(m, x, y, z, vx, vy, vz, timesteps):
 93 | 
 94 |     temporaries = (
 95 |             np.ones((size, size), dtype="float64"),
 96 |             np.ones((size, size), dtype="float64"),
 97 |             np.ones((size, size), dtype="float64"),
 98 |             np.ones((size, size), dtype="float64")
 99 |             )
100 | 
101 | 
102 |     start = time.time()
103 |     for i in range(timesteps):
104 |         ret = move(m, x, y, z, vx, vy, vz, dt, temporaries)
105 |         np.flush()
106 |         print(x, y, z)
107 |     end = time.time()
108 |     print("Simulation time:", end - start)
109 | 
110 | ####################################################################3
111 | #                            ENTRY POINT
112 | ####################################################################3
113 | 
114 | parser = argparse.ArgumentParser(
115 |     description="N-Body benchmark."
116 | )
117 | parser.add_argument('-s', "--size", type=int, default=10, help="Size of each array")
118 | parser.add_argument('-i', "--iterations", type=int, default=1, help="Iterations of simulation")
119 | parser.add_argument('-p', "--piece_size", type=int, default=16384, help="Size of each piece.")
120 | parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.")
121 | parser.add_argument('-v', "--verbosity", type=str, default="none",\
122 |         help="Log level (debug|info|warning|error|critical|none)")
123 | args = parser.parse_args()
124 | 
125 | size = (1 << args.size)
126 | iterations = args.iterations 
127 | piece_size = args.piece_size
128 | threads = args.threads
129 | loglevel = args.verbosity
130 | 
131 | assert threads >= 1
132 | 
133 | print("Size:", size)
134 | print("Piece Size:", piece_size)
135 | print("Threads:", threads)
136 | print("Log Level", loglevel)
137 | 
138 | # Constants
139 | G     = np.float64(6.67384e-11)     # m/(kg*s^2)
140 | dt    = np.float64(60*60*24*365.25) # Years in seconds
141 | r_ly  = np.float64(9.4607e15)       # Lightyear in m
142 | m_sol = np.float64(1.9891e30)       # Solar mass in kg
143 | 
144 | np.seterr(divide='ignore', invalid='ignore')
145 | 
146 | sys.stdout.write("Generating data...")
147 | sys.stdout.flush()
148 | m, x, y, z, vx, vy, vz = random_galaxy(size)
149 | print("done.")
150 | 
151 | simulate(m, x, y, z, vx, vy, vz, iterations)
152 | print(x)
153 | 


--------------------------------------------------------------------------------
/python/benchmarks/nbody/nbody_numba.py:
--------------------------------------------------------------------------------
  1 | """
  2 | NBody in N^2 complexity
  3 | 
  4 | Note that we are using only Newtonian forces and do not consider relativity
  5 | Neither do we consider collisions between stars
  6 | Thus some of our stars will accelerate to speeds beyond c
  7 | This is done to keep the simulation simple enough for teaching purposes
  8 | 
  9 | All the work is done in the calc_force, move and random_galaxy functions.
 10 | To vectorize the code these are the functions to transform.
 11 | 
 12 | https://benchpress.readthedocs.io/autodoc_benchmarks/nbody_nice.html
 13 | """
 14 | 
 15 | import argparse
 16 | import sys
 17 | import time
 18 | 
 19 | import numpy as np
 20 | from numba import njit
 21 | 
 22 | def fill_diagonal(a, val):
 23 |     """ Set diagonal of 2D matrix a to val in-place. """
 24 |     d, _ = a.shape
 25 |     a.shape = d * d
 26 |     a[::d + 1] = val
 27 |     a.shape = (d, d)
 28 | 
 29 | def random_galaxy(N):
 30 |     """ Generate a galaxy of random bodies """
 31 |     m =  (np.arange(0.0, 1.0, step=1.0 / N) + np.float64(10)) * np.float64(m_sol/10)
 32 |     x =  (np.arange(0.0, 1.0, step=1.0 / N) - np.float64(0.5)) * np.float64(r_ly/100)
 33 |     y =  (np.arange(0.0, 1.0, step=1.0 / N) - np.float64(0.5)) * np.float64(r_ly/100)
 34 |     z =  (np.arange(0.0, 1.0, step=1.0 / N) - np.float64(0.5)) * np.float64(r_ly/100)
 35 |     vx = np.zeros(N, dtype=np.float64)
 36 |     vy = np.zeros(N, dtype=np.float64)
 37 |     vz = np.zeros(N, dtype=np.float64)
 38 | 
 39 |     assert len(m) == N
 40 |     return m, x, y, z, vx, vy, vz
 41 | 
 42 | @njit(parallel=True)
 43 | def calc_force1(pm, m, x, y, z, dx, dy, dz):
 44 |     """Calculate forces between bodies
 45 | 
 46 |     F = ((G m_a m_b)/r^2)/((x_b-x_a)/r)
 47 | 
 48 |     """
 49 |     r = np.sqrt(dx ** 2 + dy ** 2 + dz ** 2)
 50 |     tmp = G * pm / r ** 2
 51 |     Fx = tmp * (dx / r)
 52 |     Fy = tmp * (dy / r)
 53 |     Fz = tmp * (dz / r)
 54 |     return Fx, Fy, Fz
 55 | 
 56 | 
 57 | @njit(parallel=True)
 58 | def calc_force2(m, Fx, Fy, Fz, x, y, z, vx, vy, vz, dt):
 59 |     vx += Fx / m * dt
 60 |     vy += Fy / m * dt
 61 |     vz += Fz / m * dt
 62 |     x += vx * dt
 63 |     y += vy * dt
 64 |     z += vz * dt
 65 | 
 66 | 
 67 | def move(m, x, y, z, vx, vy, vz, dt, temporaries):
 68 |     """ Move the bodies.
 69 | 
 70 |     first find forces and change velocity and then move positions.
 71 |     """
 72 |     dx, dy, dz, pm = temporaries
 73 | 
 74 |     start = time.time()
 75 |     np.subtract(x, x[:,None], out=dx)
 76 |     np.subtract(y, y[:,None], out=dy)
 77 |     np.subtract(z, z[:,None], out=dz)
 78 |     np.multiply(m, m[:,None], out=pm)
 79 |     end = time.time()
 80 |     print("Step 0:", end - start)
 81 |     start = end
 82 | 
 83 |     Fx, Fy, Fz = calc_force1(pm, m, x, y, z, dx, dy, dz)
 84 |     end = time.time()
 85 |     print("Step 1:", end - start)
 86 |     start = end
 87 | 
 88 |     fill_diagonal(Fx, 0.0)
 89 |     fill_diagonal(Fy, 0.0)
 90 |     fill_diagonal(Fz, 0.0)
 91 |     end = time.time()
 92 |     print("Step 2:", end - start)
 93 |     start = end
 94 | 
 95 |     Fx2 = Fx[:,0]
 96 |     np.add.reduce(Fx, axis=1, out=Fx2)
 97 |     Fy2 = Fy[:,0]
 98 |     np.add.reduce(Fy, axis=1, out=Fy2)
 99 |     Fz2 = Fz[:,0]
100 |     np.add.reduce(Fz, axis=1, out=Fz2)
101 | 
102 |     end = time.time()
103 |     print("Step 3:", end - start)
104 |     start = end
105 | 
106 |     calc_force2(m, Fx2, Fy2, Fz2, x, y, z, vx, vy, vz, dt)
107 |     end = time.time()
108 |     print("Step 4:", end - start)
109 |     start = end
110 | 
111 | def simulate(m, x, y, z, vx, vy, vz, timesteps):
112 | 
113 |     temporaries = (
114 |             np.ones((size, size), dtype="float64"),
115 |             np.ones((size, size), dtype="float64"),
116 |             np.ones((size, size), dtype="float64"),
117 |             np.ones((size, size), dtype="float64")
118 |             )
119 | 
120 | 
121 |     start = time.time()
122 |     for i in range(timesteps):
123 |         ret = move(m, x, y, z, vx, vy, vz, dt, temporaries)
124 |     end = time.time()
125 |     print("Simulation time:", end - start)
126 | 
127 | ####################################################################3
128 | #                            ENTRY POINT
129 | ####################################################################3
130 | 
131 | parser = argparse.ArgumentParser(
132 |     description="N-Body benchmark."
133 | )
134 | parser.add_argument('-s', "--size", type=int, default=10, help="Size of each array")
135 | parser.add_argument('-i', "--iterations", type=int, default=1, help="Iterations of simulation")
136 | parser.add_argument('-p', "--piece_size", type=int, default=16384, help="Size of each piece.")
137 | parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.")
138 | parser.add_argument('-v', "--verbosity", type=str, default="none",\
139 |         help="Log level (debug|info|warning|error|critical|none)")
140 | args = parser.parse_args()
141 | 
142 | size = (1 << args.size)
143 | iterations = args.iterations 
144 | piece_size = args.piece_size
145 | threads = args.threads
146 | loglevel = args.verbosity
147 | 
148 | assert threads >= 1
149 | 
150 | print("Size:", size)
151 | print("Piece Size:", piece_size)
152 | print("Threads:", threads)
153 | print("Log Level", loglevel)
154 | 
155 | # Constants
156 | G     = np.float64(6.67384e-11)     # m/(kg*s^2)
157 | dt    = np.float64(60*60*24*365.25) # Years in seconds
158 | r_ly  = np.float64(9.4607e15)       # Lightyear in m
159 | m_sol = np.float64(1.9891e30)       # Solar mass in kg
160 | 
161 | np.seterr(divide='ignore', invalid='ignore')
162 | 
163 | sys.stdout.write("Generating data...")
164 | sys.stdout.flush()
165 | m, x, y, z, vx, vy, vz = random_galaxy(size)
166 | print("done.")
167 | 
168 | simulate(m, x, y, z, vx, vy, vz, iterations)
169 | print(x)
170 | 


--------------------------------------------------------------------------------
/python/benchmarks/requirements.txt:
--------------------------------------------------------------------------------
 1 | atomicwrites==1.3.0
 2 | attrs==19.1.0
 3 | blis==0.2.4
 4 | bohrium==0.10.2.post29
 5 | bohrium-api==0.10.2.post29
 6 | certifi==2019.3.9
 7 | chardet==3.0.4
 8 | cloudpickle==0.8.1
 9 | cymem==2.0.2
10 | idna==2.8
11 | joblib==0.13.2
12 | jsonschema==2.6.0
13 | llvmlite==0.28.0
14 | more-itertools==7.0.0
15 | murmurhash==1.0.2
16 | numba==0.43.1
17 | numpy==1.16.2
18 | pandas==0.24.2
19 | pathlib2==2.3.3
20 | plac==0.9.6
21 | pluggy==0.9.0
22 | preshed==2.0.1
23 | py==1.8.0
24 | pyarrow==0.13.0
25 | pytest==4.4.0
26 | python-dateutil==2.8.0
27 | pytz==2019.1
28 | requests==2.21.0
29 | scipy==1.2.1
30 | sharedmem==0.3.5
31 | six==1.12.0
32 | spacy==2.1.3
33 | srsly==0.0.5
34 | thinc==7.0.4
35 | tqdm==4.31.1
36 | urllib3==1.24.2
37 | wasabi==0.2.1
38 | 


--------------------------------------------------------------------------------
/python/benchmarks/run-all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | # Name of the environment
 6 | rm -rf benchmarks
 7 | ./setup-env.sh
 8 | 
 9 | # Get the data
10 | ./get-data.sh
11 | 
12 | rm -rf results/
13 | mkdir results/
14 | 
15 | tasks=( blackscholes birth_analysis crime_index data_cleaning haversine movielens nbody shallow_water speechtag )
16 | 
17 | # Write system information.
18 | git log | head -1 > results/CONFIG.txt
19 | uname -a >> results/CONFIG.txt
20 | lsb_release -d >> results/CONFIG.txt
21 | 
22 | for task in "${tasks[@]}"; do 
23 |   echo "Executing $task"  
24 |   pushd $task
25 |   ./benchmark.sh
26 |   popd
27 |   mkdir results/$task
28 |   mv $task/*.std* results/$task
29 | done
30 | 


--------------------------------------------------------------------------------
/python/benchmarks/setup-env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | virtualenv -p python3.5 benchmarks
4 | source benchmarks/bin/activate
5 | 
6 | # Install everything
7 | pip install -r requirements.txt
8 | 


--------------------------------------------------------------------------------
/python/benchmarks/shallow_water/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | source ../benchmarks/bin/activate
 6 | 
 7 | size=14
 8 | runs=${1:-1}
 9 | iterations=10
10 | tasks=( numba composer naive bohrium )
11 | threads=( 1 2 4 8 16 )
12 | 
13 | for task in "${tasks[@]}"; do 
14 |   rm -f $task.stdout $task.stderr
15 |   git log | head -1 > $task.stderr
16 |   git log | head -1 > $task.stdout
17 | done
18 | 
19 | for i in {1..$runs}; do
20 |   for nthreads in "${threads[@]}"; do 
21 |     NUMBA_NUM_THREADS=$nthreads python shallow_water_numba.py -s $size -i $iterations >> numba.stdout 2>> numba.stderr
22 |   done
23 | done
24 | 
25 | for i in {1..$runs}; do
26 |   for nthreads in "${threads[@]}"; do 
27 |     OMP_NUM_THREADS=$nthreads python shallow_water.py -m bohrium -s $size -i $iterations -t $nthreads >> bohrium.stdout 2>> bohrium.stderr
28 |   done
29 | done
30 | 
31 | unset OMP_NUM_THREADS
32 | 
33 | for i in {1..$runs}; do
34 |   for nthreads in "${threads[@]}"; do 
35 |     python shallow_water.py -m composer -s $size -i $iterations -t $nthreads >> composer.stdout 2>> composer.stderr
36 |   done
37 | done
38 | 
39 | for i in {1..$runs}; do
40 |   python shallow_water.py -m naive -s $size -i $iterations -t 1 >> naive.stdout 2>> naive.stderr
41 | done
42 | 


--------------------------------------------------------------------------------
/python/benchmarks/shallow_water/shallow_water_numba.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | from numba import njit
  4 | 
  5 | import argparse
  6 | import sys
  7 | import time
  8 | 
  9 | @njit(parallel=True)
 10 | def spatial_derivative(A, axis=0):
 11 |     """
 12 |     Compute derivative of array A using balanced finite differences
 13 |     Axis specifies direction of spatial derivative (d/dx or d/dy)
 14 | 
 15 |     dA[i] =  A[i+1] - A[i-1]   / 2
 16 |     ... or with grid spacing included ...
 17 |     dA[i]/dx =  A[i+1] - A[i-1]   / 2dx
 18 | 
 19 |     Used By:
 20 |         d_dx
 21 |         d_dy
 22 |     """
 23 |     return (np.roll(A, -1) - np.roll(A, 1)) / (grid_spacing*2.)
 24 | 
 25 | @njit(parallel=True)
 26 | def d_dx(A):
 27 |     return spatial_derivative(A, 1)
 28 | 
 29 | @njit(parallel=True)
 30 | def d_dy(A):
 31 |     return spatial_derivative(A, 0)
 32 | 
 33 | @njit(parallel=True)
 34 | def d_dt(eta, u, v, g, b=0):
 35 |     """
 36 |     http://en.wikipedia.org/wiki/Shallow_water_equations#Non-conservative_form
 37 |     """
 38 | 
 39 |     du_dt = -g*d_dx(eta) - b*u
 40 |     dv_dt = -g*d_dy(eta) - b*v
 41 | 
 42 |     H = 0#eta.mean() - our definition of eta includes this term
 43 |     deta_dt = -d_dx(u * (H+eta)) - d_dy(v * (H+eta))
 44 | 
 45 |     eta = eta + deta_dt * dt
 46 |     u = u + du_dt * dt
 47 |     v = v + dv_dt * dt
 48 |     return eta, u, v
 49 | 
 50 | 
 51 | def evolveEuler(eta, u, v, g, dt):
 52 |     """
 53 |     Evolve state (eta, u, v, g) forward in time using simple Euler method
 54 |     x_{n+1} = x_{n} +   dx/dt * d_t
 55 | 
 56 |     Returns an generator / infinite list of all states in the evolution
 57 |     """
 58 |     elapsedTime = 0
 59 |     yield eta, u, v, elapsedTime # return initial conditions as first state in sequence
 60 | 
 61 |     while(True):
 62 |         eta, u, v = d_dt(eta, u, v, g)
 63 |         elapsedTime += dt
 64 |         yield eta, u, v, elapsedTime
 65 | 
 66 | def simulate(eta, u, v, g, dt, iterations):
 67 | 
 68 |     trajectory = evolveEuler(eta, u, v, g, dt)
 69 | 
 70 |     # Figure with initial conditions
 71 | 
 72 |     start = time.time()
 73 | 
 74 |     eta, u, v, elapsedTime = next(trajectory)
 75 |     for i in range(iterations):
 76 |         eta, u, v, elapsedTime = next(trajectory)
 77 |         print(eta[0][0])
 78 | 
 79 |     end = time.time()
 80 |     print("total time:", end - start)
 81 | 
 82 |     print("Final State:")
 83 |     print(eta[0][0])
 84 | 
 85 | 
 86 | ####################################################################
 87 | #                            ENTRY POINT
 88 | ####################################################################
 89 | 
 90 | parser = argparse.ArgumentParser(
 91 |     description="Shallow Water benchmark."
 92 | )
 93 | parser.add_argument('-s', "--size", type=int, default=10, help="Size of each array")
 94 | parser.add_argument('-i', "--iterations", type=int, default=1, help="Iterations of simulation")
 95 | parser.add_argument('-p', "--piece_size", type=int, default=16384, help="Size of each piece.")
 96 | parser.add_argument('-t', "--threads", type=int, default=1, help="Number of threads.")
 97 | parser.add_argument('-v', "--verbosity", type=str, default="none",\
 98 |         help="Log level (debug|info|warning|error|critical|none)")
 99 | args = parser.parse_args()
100 | 
101 | size = (1 << args.size)
102 | iterations = args.iterations 
103 | piece_size = args.piece_size
104 | threads = args.threads
105 | loglevel = args.verbosity
106 | 
107 | assert threads >= 1
108 | 
109 | print("Size:", size)
110 | print("Piece Size:", piece_size)
111 | print("Threads:", threads)
112 | print("Log Level", loglevel)
113 | print("Mode:", "Numba")
114 | 
115 | sys.stdout.write("Generating data...")
116 | sys.stdout.flush()
117 | 
118 | # Initial Conditions
119 | n = size
120 | 
121 | # velocity in x direction
122 | u = np.zeros((n, n))
123 | # velocity in y direction
124 | v = np.zeros((n, n))
125 | # pressure deviation (like height)
126 | eta = np.ones((n, n))
127 | 
128 | # Set eta.
129 | for i in range(n):
130 |     eta[i] *= 0.1 * i
131 | 
132 | # Constants
133 | G     = np.float64(6.67384e-11)     # m/(kg*s^2)
134 | dt    = np.float64(60*60*24*365.25) # Years in seconds
135 | r_ly  = np.float64(9.4607e15)       # Lightyear in m
136 | m_sol = np.float64(1.9891e30)       # Solar mass in kg
137 | b     = np.float64(0.0)
138 | 
139 | box_size = 1.
140 | grid_spacing =  1.0 * box_size / n
141 | g = 1.
142 | dt = grid_spacing / 100.
143 | print("done.")
144 | 
145 | simulate(eta, u, v, g, dt, iterations)
146 | 


--------------------------------------------------------------------------------
/python/benchmarks/speechtag/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | source ../benchmarks/bin/activate
 6 | 
 7 | tasks=( composer )
 8 | threads=( 1 2 4 8 16 )
 9 | runs=${1:-1}
10 | 
11 | for task in "${tasks[@]}"; do 
12 |   rm -f $task.stdout $task.stderr
13 |   git log | head -1 > $task.stderr
14 |   git log | head -1 > $task.stdout
15 | done
16 | 
17 | for i in {1..$runs}; do
18 |   for task in "${tasks[@]}"; do 
19 |     for nthreads in "${threads[@]}"; do 
20 |       python speechtag_composer.py -n $nthreads >> $task.stdout 2>> $task.stderr
21 |     done
22 |   done
23 | done
24 | 
25 | tasks=( spacy )
26 | threads=( 1 )
27 | 
28 | for task in "${tasks[@]}"; do 
29 |   rm -f $task.stdout $task.stderr
30 |   git log | head -1 > $task.stderr
31 |   git log | head -1 > $task.stdout
32 | done
33 | 
34 | for i in {1..$runs}; do
35 |   for task in "${tasks[@]}"; do 
36 |     for nthreads in "${threads[@]}"; do 
37 |       python speechtag.py >> $task.stdout 2>> $task.stderr
38 |     done
39 |   done
40 | done
41 | 


--------------------------------------------------------------------------------
/python/benchmarks/speechtag/speechtag.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf8
 3 | """Example of multi-processing with Joblib. Here, we're exporting
 4 | part-of-speech-tagged, true-cased, (very roughly) sentence-separated text, with
 5 | each "sentence" on a newline, and spaces between tokens. Data is loaded from
 6 | the IMDB movie reviews dataset and will be loaded automatically via Thinc's
 7 | built-in dataset loader.
 8 | 
 9 | Compatible with: spaCy v2.0.0+
10 | Last tested with: v2.1.0
11 | Prerequisites: pip install joblib
12 | 
13 | 
14 | Adapted from https://github.com/explosion/spaCy/blob/master/examples/pipeline/multi_processing.py
15 | """
16 | 
17 | import plac
18 | import sys
19 | import spacy
20 | from spacy.util import minibatch
21 | import thinc.extra.datasets
22 | import time
23 | 
24 | @plac.annotations(
25 |     model=("Model name (needs tagger)", "positional", None, str),
26 |     n_jobs=("Number of workers", "option", "n", int),
27 |     batch_size=("Batch-size for each process", "option", "b", int),
28 |     limit=("Limit of entries from the dataset", "option", "l", int),
29 | )
30 | def main(model="en_core_web_sm", n_jobs=4, batch_size=1000, limit=10000):
31 |     nlp = spacy.load(model)  # load spaCy model
32 |     print("Loaded model '%s'" % model)
33 | 
34 |     # load and pre-process the IMDB dataset
35 |     sys.stdout.write("Loading IMDB data...")
36 |     data, _ = thinc.extra.datasets.imdb()
37 |     print("done.")
38 |     texts, _ = zip(*data[-limit:])
39 | 
40 |     start = time.time()
41 |     process(nlp, texts)
42 |     end = time.time()
43 |     print("Total:", end - start)
44 | 
45 | def process(nlp, texts):
46 |     print(nlp.pipe_names)
47 |     for doc in nlp.pipe(texts):
48 |         sentence = " ".join(represent_word(w) for w in doc if not w.is_space)
49 |         sentence += "\n"
50 | 
51 | def represent_word(word):
52 |     text = word.text
53 |     # True-case, i.e. try to normalize sentence-initial capitals.
54 |     # Only do this if the lower-cased form is more probable.
55 |     if (
56 |         text.istitle()
57 |         and is_sent_begin(word)
58 |         and word.prob < word.doc.vocab[text.lower()].prob
59 |     ):
60 |         text = text.lower()
61 |     return text + "|" + word.tag_
62 | 
63 | def is_sent_begin(word):
64 |     if word.i == 0:
65 |         return True
66 |     elif word.i >= 2 and word.nbor(-1).text in (".", "!", "?", "..."):
67 |         return True
68 |     else:
69 |         return False
70 | 
71 | if __name__ == "__main__":
72 |     plac.call(main)
73 | 


--------------------------------------------------------------------------------
/python/benchmarks/speechtag/speechtag_composer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf8
 3 | """Example of multi-processing with Joblib. Here, we're exporting
 4 | part-of-speech-tagged, true-cased, (very roughly) sentence-separated text, with
 5 | each "sentence" on a newline, and spaces between tokens. Data is loaded from
 6 | the IMDB movie reviews dataset and will be loaded automatically via Thinc's
 7 | built-in dataset loader.
 8 | 
 9 | Compatible with: spaCy v2.0.0+
10 | Last tested with: v2.1.0
11 | Prerequisites: pip install joblib
12 | 
13 | 
14 | Adapted from https://github.com/explosion/spaCy/blob/master/examples/pipeline/multi_processing.py
15 | """
16 | 
17 | import plac
18 | import sys
19 | import spacy
20 | from spacy.util import minibatch
21 | import thinc.extra.datasets
22 | import time
23 | 
24 | sys.path.append("../../pycomposer/")
25 | 
26 | from pycomposer import *
27 | 
28 | @plac.annotations(
29 |     model=("Model name (needs tagger)", "positional", None, str),
30 |     n_jobs=("Number of workers", "option", "n", int),
31 |     batch_size=("Batch-size for each process", "option", "b", int),
32 |     limit=("Limit of entries from the dataset", "option", "l", int),
33 | )
34 | def main(model="en_core_web_sm", n_jobs=4, batch_size=1000, limit=10000):
35 |     nlp = spacy.load(model)  # load spaCy model
36 |     print("Loaded model '%s'" % model)
37 | 
38 |     # load and pre-process the IMDB dataset
39 |     sys.stdout.write("Loading IMDB data...")
40 |     data, _ = thinc.extra.datasets.imdb()
41 |     print("done.")
42 |     texts, _ = zip(*data[-limit:])
43 | 
44 |     start = time.time()
45 |     process(nlp, texts)
46 |     evaluate(workers=n_jobs, batch_size=batch_size)
47 |     end = time.time()
48 |     print("Total:", end - start)
49 | 
50 | class TextBatchSplit(SplitType):
51 |     def combine(self, values):
52 |         """ No need to combine text batches"""
53 |         pass
54 | 
55 |     def split(self, start, end, texts):
56 |         return minibatch(texts, size=(end-start))
57 | 
58 | @sa((Broadcast(), TextBatchSplit()), {}, Broadcast())
59 | def process(nlp, texts):
60 |     print(nlp.pipe_names)
61 |     for doc in nlp.pipe(texts):
62 |         sentence = " ".join(represent_word(w) for w in doc if not w.is_space)
63 |         sentence += "\n"
64 | 
65 | def represent_word(word):
66 |     text = word.text
67 |     # True-case, i.e. try to normalize sentence-initial capitals.
68 |     # Only do this if the lower-cased form is more probable.
69 |     if (
70 |         text.istitle()
71 |         and is_sent_begin(word)
72 |         and word.prob < word.doc.vocab[text.lower()].prob
73 |     ):
74 |         text = text.lower()
75 |     return text + "|" + word.tag_
76 | 
77 | def is_sent_begin(word):
78 |     if word.i == 0:
79 |         return True
80 |     elif word.i >= 2 and word.nbor(-1).text in (".", "!", "?", "..."):
81 |         return True
82 |     else:
83 |         return False
84 | 
85 | if __name__ == "__main__":
86 |     plac.call(main)
87 | 


--------------------------------------------------------------------------------
/python/benchmarks/weld-python/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/weld-project/split-annotations/d835cc54476336e7f4355d87e820595aeddcc442/python/benchmarks/weld-python/__init__.py


--------------------------------------------------------------------------------
/python/benchmarks/weld-python/benchmark-weld.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | size=14
 6 | 
 7 | tasks=( weld )
 8 | threads=( 1 2 4 8 16 32 )
 9 | 
10 | for task in "${tasks[@]}"; do 
11 |   rm -f $task.stdout $task.stderr
12 |   git log | head -1 > $task.stderr
13 |   git log | head -1 > $task.stdout
14 | done
15 | 
16 | # Weld doesn't seem to free memory properly in this setup, so just run it ten times and add up...
17 | for i in {1..5}; do
18 |   for task in "${tasks[@]}"; do 
19 |     for nthreads in "${threads[@]}"; do 
20 |       python shallow_water_weld.py -s $size -i 1 -t $nthreads >> $task.stdout 2>> $task.stderr
21 |     done
22 |   done
23 | done
24 | 


--------------------------------------------------------------------------------
/python/benchmarks/weld-python/bindings.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Implements a wrapper around the Weld API.
  3 | #
  4 | 
  5 | from ctypes import *
  6 | 
  7 | import os
  8 | import platform
  9 | import copy
 10 | 
 11 | import pkg_resources
 12 | 
 13 | system = platform.system()
 14 | if system == 'Linux':
 15 |     lib_file = "../weld/target/release/libweld.so"
 16 | elif system == 'Windows':
 17 |     lib_file = "libweld.dll"
 18 | elif system == 'Darwin':
 19 |     lib_file = "libweld.dylib"
 20 | else:
 21 |     raise OSError("Unsupported platform {}", system)
 22 | 
 23 | lib_file = pkg_resources.resource_filename(__name__, lib_file)
 24 | 
 25 | weld = CDLL(lib_file, mode=RTLD_GLOBAL)
 26 | 
 27 | # Used for some type checking carried out by ctypes
 28 | 
 29 | class c_weld_module(c_void_p):
 30 |     pass
 31 | 
 32 | class c_weld_conf(c_void_p):
 33 |     pass
 34 | 
 35 | class c_weld_err(c_void_p):
 36 |     pass
 37 | 
 38 | class c_weld_value(c_void_p):
 39 |     pass
 40 | 
 41 | class WeldModule(c_void_p):
 42 | 
 43 |     def __init__(self, code, conf, err):
 44 |         weld_module_compile = weld.weld_module_compile
 45 |         weld_module_compile.argtypes = [
 46 |             c_char_p, c_weld_conf, c_weld_err]
 47 |         weld_module_compile.restype = c_weld_module
 48 | 
 49 |         code = c_char_p(code)
 50 |         self.module = weld_module_compile(code, conf.conf, err.error)
 51 | 
 52 |     def run(self, conf, arg, err):
 53 |         weld_module_run = weld.weld_module_run
 54 |         # module, conf, arg, &err
 55 |         weld_module_run.argtypes = [
 56 |             c_weld_module, c_weld_conf, c_weld_value, c_weld_err]
 57 |         weld_module_run.restype = c_weld_value
 58 |         ret = weld_module_run(self.module, conf.conf, arg.val, err.error)
 59 |         return WeldValue(ret, assign=True)
 60 | 
 61 |     def __del__(self):
 62 |         weld_module_free = weld.weld_module_free
 63 |         weld_module_free.argtypes = [c_weld_module]
 64 |         weld_module_free.restype = None
 65 |         weld_module_free(self.module)
 66 | 
 67 | 
 68 | class WeldValue(c_void_p):
 69 | 
 70 |     def __init__(self, value, assign=False):
 71 |         if assign is False:
 72 |             weld_value_new = weld.weld_value_new
 73 |             weld_value_new.argtypes = [c_void_p]
 74 |             weld_value_new.restype = c_weld_value
 75 |             self.val = weld_value_new(value)
 76 |         else:
 77 |             self.val = value
 78 |         self.freed = False
 79 | 
 80 |     def _check(self):
 81 |         if self.freed:
 82 |             raise ValueError("Attempted to use freed WeldValue")
 83 | 
 84 |     def data(self):
 85 |         self._check()
 86 |         weld_value_data = weld.weld_value_data
 87 |         weld_value_data.argtypes = [c_weld_value]
 88 |         weld_value_data.restype = c_void_p
 89 |         return weld_value_data(self.val)
 90 | 
 91 |     def memory_usage(self):
 92 |         self._check()
 93 |         weld_value_memory_usage = weld.weld_value_memory_usage
 94 |         weld_value_memory_usage.argtypes = [c_weld_value]
 95 |         weld_value_memory_usage.restype = c_int64
 96 |         return weld_value_memory_usage(self.val)
 97 | 
 98 |     def free(self):
 99 |         self._check()
100 |         weld_value_free = weld.weld_value_free
101 |         weld_value_free.argtypes = [c_weld_value]
102 |         weld_value_free.restype = None
103 |         self.freed = True
104 |         return weld_value_free(self.val)
105 | 
106 | 
107 | class WeldConf(c_void_p):
108 | 
109 |     def __init__(self):
110 |         weld_conf_new = weld.weld_conf_new
111 |         weld_conf_new.argtypes = []
112 |         weld_conf_new.restype = c_weld_conf
113 |         self.conf = weld_conf_new()
114 | 
115 |     def get(self, key):
116 |         key = c_char_p(key)
117 |         weld_conf_get = weld.weld_conf_get
118 |         weld_conf_get.argtypes = [c_weld_conf, c_char_p]
119 |         weld_conf_get.restype = c_char_p
120 |         val = weld_conf_get(self.conf, key)
121 |         return copy.copy(val)
122 | 
123 |     def set(self, key, value):
124 |         key = c_char_p(key)
125 |         value = c_char_p(value)
126 |         weld_conf_set = weld.weld_conf_set
127 |         weld_conf_set.argtypes = [c_weld_conf, c_char_p, c_char_p]
128 |         weld_conf_set.restype = None
129 |         weld_conf_set(self.conf, key, value)
130 | 
131 |     def __del__(self):
132 |         weld_conf_free = weld.weld_conf_free
133 |         weld_conf_free.argtypes = [c_weld_conf]
134 |         weld_conf_free.restype = None
135 |         weld_conf_free(self.conf)
136 | 
137 | 
138 | class WeldError(c_void_p):
139 | 
140 |     def __init__(self):
141 |         weld_error_new = weld.weld_error_new
142 |         weld_error_new.argtypes = []
143 |         weld_error_new.restype = c_weld_err
144 |         self.error = weld_error_new()
145 | 
146 |     def code(self):
147 |         weld_error_code = weld.weld_error_code
148 |         weld_error_code.argtypes = [c_weld_err]
149 |         weld_error_code.restype = c_uint64
150 |         return weld_error_code(self.error)
151 | 
152 |     def message(self):
153 |         weld_error_message = weld.weld_error_message
154 |         weld_error_message.argtypes = [c_weld_err]
155 |         weld_error_message.restype = c_char_p
156 |         val = weld_error_message(self.error)
157 |         return copy.copy(val)
158 | 
159 |     def __del__(self):
160 |         weld_error_free = weld.weld_error_free
161 |         weld_error_free.argtypes = [c_weld_err]
162 |         weld_error_free.restype = None
163 |         weld_error_free(self.error)
164 | 
165 | WeldLogLevelOff = 0
166 | WeldLogLevelError = 1
167 | WeldLogLevelWarn = 2
168 | WeldLogLevelInfo = 3
169 | WeldLogLevelDebug = 4
170 | WeldLogLevelTrace = 5
171 | 
172 | def weld_set_log_level(log_level):
173 |      """
174 |      Sets the log_level for Weld:
175 |         0 = No Logs,
176 |         1 = Error,
177 |         2 = Warn,
178 |         3 = Info,
179 |         4 = Debug,
180 |         5 = Trace.
181 |      """
182 |      weld.weld_set_log_level(log_level)
183 | 


--------------------------------------------------------------------------------
/python/benchmarks/weld-python/compiled.py:
--------------------------------------------------------------------------------
  1 | """
  2 | """
  3 | 
  4 | from bindings import *
  5 | # from bindings_latest import *
  6 | import weldtypes
  7 | 
  8 | import ctypes
  9 | import time
 10 | 
 11 | import numpy as np
 12 | 
 13 | # Global num threads setting.
 14 | THREADS = [ "1" ]
 15 | 
 16 | class WeldEncoder(object):
 17 |     """
 18 |     An abstract class that must be overwridden by libraries. This class
 19 |     is used to marshall objects from Python types to Weld types.
 20 |     """
 21 |     def encode(obj):
 22 |         """
 23 |         """
 24 |         raise NotImplementedError
 25 | 
 26 |     def py_to_weld_type(self, obj):
 27 |         raise NotImplementedError
 28 | 
 29 | 
 30 | class WeldDecoder(object):
 31 |     """
 32 |     An abstract class that must be overwridden by libraries. This class
 33 |     is used to marshall objects from Weld types to Python types.
 34 |     """
 35 |     def decode(obj, restype):
 36 |         """
 37 |         Decodes obj, assuming object is of type `restype`. obj's Python
 38 |         type is ctypes.POINTER(restype.ctype_class).
 39 |         """
 40 |         raise NotImplementedError
 41 | 
 42 | # Returns a wrapped ctypes Structure
 43 | def args_factory(arg_names, arg_types):
 44 |     class Args(ctypes.Structure):
 45 |         _fields_ = list(zip(arg_names, arg_types))
 46 |     return Args
 47 | 
 48 | def compile(program, arg_types, restype, decoder, verbose=False):
 49 |     """Compiles a program and returns a function for calling it.
 50 | 
 51 |     Parameters
 52 |     ----------
 53 | 
 54 |     program : a string representing a Weld program.
 55 |     arg_types : a tuple of (type, encoder)
 56 |     decoder : a decoder for the returned value.
 57 |     """
 58 | 
 59 |     start = time.time()
 60 | 
 61 |     conf = WeldConf()
 62 |     err = WeldError()
 63 |     module = WeldModule(program, conf, err)
 64 |     if err.code() != 0:
 65 |         raise ValueError("Could not compile function: {}".format(err.message()))
 66 |     end = time.time()
 67 | 
 68 |     if verbose:
 69 |         print("Weld compile time:", end - start)
 70 | 
 71 |     def func(*args):
 72 |         # Field names.
 73 |         names = []
 74 |         # C type of each argument.
 75 |         arg_c_types = []
 76 |         # Encoded version of each argument.
 77 |         encoded = []
 78 | 
 79 |         for (i, (arg, arg_type)) in enumerate(zip(args, arg_types)):
 80 |             names.append("_{}".format(i))
 81 |             if isinstance(arg_type, WeldEncoder):
 82 |                 arg_c_types.append(arg_type.py_to_weld_type(arg).ctype_class)
 83 |                 encoded.append(arg_type.encode(arg))
 84 |             else:
 85 |                 # Primitive type with a builtin encoder
 86 |                 assert isinstance(arg, arg_type)
 87 |                 ctype = weldtypes.encoder(arg_type)
 88 |                 arg_c_types.append(ctype)
 89 |                 encoded.append(ctype(arg))
 90 | 
 91 |         Args = args_factory(names, arg_c_types)
 92 |         raw_args = Args()
 93 | 
 94 |         for name, value in zip(names, encoded):
 95 |             setattr(raw_args, name, value)
 96 | 
 97 |         raw_args_pointer = ctypes.cast(ctypes.byref(raw_args), ctypes.c_void_p)
 98 |         weld_input = WeldValue(raw_args_pointer)
 99 |         conf = WeldConf()
100 | 
101 |         # 100GB Memory limit
102 |         conf.set("weld.memory.limit", "100000000000")
103 |         conf.set("weld.threads", THREADS[0])
104 | 
105 |         err = WeldError()
106 | 
107 |         result = module.run(conf, weld_input, err)
108 |         if err.code() != 0:
109 |             raise ValueError("Error while running function: {}".format(err.message()))
110 | 
111 |         pointer_type = POINTER(restype.ctype_class)
112 |         data = ctypes.cast(result.data(), pointer_type)
113 |         result = decoder.decode(data, restype)
114 | 
115 |         return result
116 | 
117 |     return func
118 | 


--------------------------------------------------------------------------------
/python/benchmarks/weld-python/encoders.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from bindings import *
  3 | from compiled import WeldEncoder, WeldDecoder
  4 | from weldtypes import *
  5 | 
  6 | import numpy as np
  7 | import ctypes
  8 | 
  9 | 
 10 | def dtype_to_weld_type(dtype):
 11 |     if dtype == 'int32':
 12 |         return WeldInt()
 13 |     elif dtype == 'int64':
 14 |         return WeldLong()
 15 |     elif dtype == 'float32':
 16 |         return WeldFloat()
 17 |     elif dtype == 'float64':
 18 |         return WeldDouble()
 19 |     else:
 20 |         raise ValueError("unsupported dtype {}".format(dtype))
 21 | 
 22 | 
 23 | class NumpyArrayEncoder(WeldEncoder):
 24 | 
 25 |     def _check(self, obj):
 26 |         """
 27 |         Checks whether this NumPy array is supported by Weld.
 28 |         """
 29 |         assert isinstance(obj, np.ndarray)
 30 | 
 31 |     def encode(self, obj):
 32 |         self._check(obj)
 33 |         elem_type = dtype_to_weld_type(obj.dtype)
 34 |         c_class = WeldVec(elem_type).ctype_class
 35 |         elem_class = elem_type.ctype_class
 36 |         ptr = obj.ctypes.data_as(POINTER(elem_class))
 37 |         # obj.size gives the correct value for multi-dimensional arrays.
 38 |         size = ctypes.c_int64(obj.size)
 39 |         return c_class(ptr=ptr, size=size)
 40 | 
 41 |     def py_to_weld_type(self, obj):
 42 |         self._check(obj)
 43 |         return WeldVec(dtype_to_weld_type(obj.dtype))
 44 | 
 45 | 
 46 | class NumpyArrayDecoder(WeldDecoder):
 47 |     def decode(self, obj, restype):
 48 |         # This stuff is same as grizzly.
 49 |         if restype == WeldInt():
 50 |             data = WeldValue(obj).data()
 51 |             result = ctypes.cast(data, ctypes.POINTER(c_int)).contents.value
 52 |             return np.int32(result)
 53 |         elif restype == WeldLong():
 54 |             data = WeldValue(obj).data()
 55 |             result = ctypes.cast(data, ctypes.POINTER(c_long)).contents.value
 56 |             return np.int64(result)
 57 |         elif restype == WeldFloat():
 58 |             data = WeldValue(obj).data()
 59 |             result = ctypes.cast(data, ctypes.POINTER(c_float)).contents.value
 60 |             return np.float32(result)
 61 |         elif restype == WeldDouble():
 62 |             data = WeldValue(obj).data()
 63 |             result = ctypes.cast(data, ctypes.POINTER(c_double)).contents.value
 64 |             return np.float64(result) 
 65 | 
 66 |         # is a WeldVec() - depending on the types, need to make minor changes.
 67 |         assert isinstance(restype, WeldVec)
 68 |         obj = obj.contents
 69 |         size = obj.size
 70 |         data = obj.ptr
 71 |         dtype = restype.elemType.ctype_class
 72 | 
 73 |         if restype == WeldVec(WeldInt()) or restype == WeldVec(WeldFloat()):
 74 |             # these have same sizes.
 75 |             ArrayType = ctypes.c_float*size
 76 |         elif restype == WeldVec(WeldLong()) or restype == WeldVec(WeldDouble()):
 77 |             ArrayType = ctypes.c_double*size
 78 |         
 79 |         array_pointer = ctypes.cast(data, ctypes.POINTER(ArrayType))
 80 |         result = np.frombuffer(array_pointer.contents, dtype=dtype,count=size)
 81 |         return result
 82 | 
 83 | class StructDecoder(WeldDecoder):
 84 | 
 85 |     def __init__(self, types, decoders):
 86 |         for decoder in decoders:
 87 |             assert isinstance(decoder, WeldDecoder)
 88 |         for ty in types:
 89 |             assert isinstance(ty, WeldType)
 90 |         assert len(types) == len(decoders)
 91 |         self.decoders = decoders
 92 |         self.types = types
 93 |     
 94 |     def decode(self, obj, restype):
 95 |         decoded = []
 96 |         result_struct = ctypes.cast(obj, ctypes.POINTER(restype.ctype_class)).contents
 97 |         for (ty, decoder, (name, field_type)) in zip(self.types, self.decoders, result_struct._fields_):
 98 |             value = getattr(result_struct, name)
 99 |             decoded.append(decoder.decode(ctypes.pointer(value), ty))
100 |         return tuple(decoded)
101 | 
102 | class ScalarDecoder(WeldDecoder):
103 |     def decode(self, obj, restype):
104 |         assert isinstance(restype, WeldLong)
105 |         result = obj.contents.value
106 |         return result
107 | 


--------------------------------------------------------------------------------
/python/benchmarks/weld-python/test.py:
--------------------------------------------------------------------------------
 1 | 
 2 | code = "|a: vec[i64], b: vec[i64]| {a, result(for(zip(a, b), appender, |b, i, e| merge(b, e.$0 + e.$1)))}"
 3 | 
 4 | from compiled import *
 5 | from encoders import *
 6 | 
 7 | import numpy as np
 8 | 
 9 | myfunc = compile(code, (NumpyArrayEncoder(), NumpyArrayEncoder()), WeldVec(WeldLong()), NumpyArrayDecoder())
10 | 
11 | a = np.ones(5, dtype=np.int64)
12 | b = np.ones(5, dtype=np.int64)
13 | 
14 | print(myfunc(a, b))
15 | 


--------------------------------------------------------------------------------
/python/benchmarks/weld-python/weldtypes.py:
--------------------------------------------------------------------------------
  1 | from ctypes import *
  2 | 
  3 | def encoder(ty):
  4 |     if ty == int:
  5 |         return c_long
  6 |     elif ty == float:
  7 |         return c_double
  8 |     elif ty == str:
  9 |         return c_char_p
 10 |     raise ValueError
 11 | 
 12 | class WeldType(object):
 13 |     def __str__(self):
 14 |         return "type"
 15 | 
 16 |     def __hash__(self):
 17 |         return hash(str(self))
 18 | 
 19 |     def __eq__(self, other):
 20 |         return hash(other) == hash(self)
 21 | 
 22 |     @property
 23 |     def ctype_class(self):
 24 |         raise NotImplementedError
 25 | 
 26 | 
 27 | class WeldChar(WeldType):
 28 |     def __str__(self):
 29 |         return "i8"
 30 | 
 31 |     @property
 32 |     def ctype_class(self):
 33 |         return c_wchar_p
 34 | 
 35 | 
 36 | class WeldBit(WeldType):
 37 |     def __str__(self):
 38 |         return "bool"
 39 | 
 40 |     @property
 41 |     def ctype_class(self):
 42 |         return c_bool
 43 | 
 44 | 
 45 | class WeldInt(WeldType):
 46 | 
 47 |     def __str__(self):
 48 |         return "i32"
 49 | 
 50 |     @property
 51 |     def ctype_class(self):
 52 |         return c_int
 53 | 
 54 | 
 55 | class WeldLong(WeldType):
 56 | 
 57 |     def __str__(self):
 58 |         return "i64"
 59 | 
 60 |     @property
 61 |     def ctype_class(self):
 62 |         return c_long
 63 | 
 64 | 
 65 | class WeldFloat(WeldType):
 66 | 
 67 |     def __str__(self):
 68 |         return "f32"
 69 | 
 70 |     @property
 71 |     def ctype_class(self):
 72 |         return c_float
 73 | 
 74 | 
 75 | class WeldDouble(WeldType):
 76 | 
 77 |     def __str__(self):
 78 |         return "f64"
 79 | 
 80 |     @property
 81 |     def ctype_class(self):
 82 |         return c_double
 83 | 
 84 | 
 85 | class WeldVec(WeldType):
 86 |     # Kind of a hack, but ctypes requires that the class instance returned is
 87 |     # the same object. Every time we create a new Vec instance (templatized by
 88 |     # type), we cache it here.
 89 |     _singletons = {}
 90 | 
 91 |     def __init__(self, elemType):
 92 |         self.elemType = elemType
 93 | 
 94 |     def __str__(self):
 95 |         return "vec[%s]" % str(self.elemType)
 96 | 
 97 |     @property
 98 |     def ctype_class(self):
 99 |         def vec_factory(elemType):
100 |             class Vec(Structure):
101 |                 _fields_ = [
102 |                     ("ptr", POINTER(elemType.ctype_class)),
103 |                     ("size", c_long),
104 |                 ]
105 |             return Vec
106 | 
107 |         if self.elemType not in WeldVec._singletons:
108 |             WeldVec._singletons[self.elemType] = vec_factory(self.elemType)
109 |         return WeldVec._singletons[self.elemType]
110 | 
111 | 
112 | class WeldStruct(WeldType):
113 |     _singletons = {}
114 | 
115 |     def __init__(self, field_types):
116 |         assert False not in [isinstance(e, WeldType) for e in field_types]
117 |         self.field_types = field_types
118 | 
119 |     def __str__(self):
120 |         return "{" + ",".join([str(f) for f in self.field_types]) + "}"
121 | 
122 |     @property
123 |     def ctype_class(self):
124 |         def struct_factory(field_types):
125 |             class Struct(Structure):
126 |                 _fields_ = [("_" + str(i), t.ctype_class) for i, t in enumerate(field_types)]
127 |             return Struct
128 | 
129 |         if frozenset(self.field_types) not in WeldVec._singletons:
130 |             WeldStruct._singletons[
131 |                 frozenset(self.field_types)] = struct_factory(self.field_types)
132 |         return WeldStruct._singletons[frozenset(self.field_types)]
133 | 


--------------------------------------------------------------------------------
/python/lib/composer_numpy/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | # Fall back to NumPy if we don't support something.
3 | from numpy import *
4 | 
5 | from .annotated import *
6 | from pycomposer import evaluate
7 | 


--------------------------------------------------------------------------------
/python/lib/composer_numpy/annotated.py:
--------------------------------------------------------------------------------
 1 | from pycomposer import *
 2 | import time
 3 | 
 4 | import sharedmem
 5 | import numpy as np
 6 | import scipy.special as ss
 7 | 
 8 | from copy import deepcopy as dc
 9 | 
10 | class NdArraySplit(SplitType):
11 | 
12 |     def __init__(self):
13 |         self.slice_col = False
14 |         self.merge = False
15 | 
16 |     def combine(self, values):
17 |         if self.merge:
18 |             return np.concatenate(values)
19 | 
20 |     def split(self, start, end, value):
21 |         if isinstance(value, np.ndarray):
22 |             shape = value.shape
23 |             ndims = len(value.shape)
24 |             if ndims == 1:
25 |                 if start >= shape[0]:
26 |                     return STOP_ITERATION
27 |                 return value[start:min(end, shape[0])]
28 |             elif ndims == 2:
29 |                 if shape[1] == 1:
30 |                     return value
31 |                 if self.slice_col:
32 |                     return value[:,start:end]
33 |                 else:
34 |                     return value[start:end,:]
35 |             else:
36 |                 return NotImplementedError("ndarray with dim > 2 not supported")
37 |         else:
38 |             # Scalar.
39 |             return value
40 | 
41 |     def elements(self, value):
42 |         if isinstance(value, np.ndarray):
43 |             if len(value.shape) == 2 and value.shape[1] == 1:
44 |                 return value.shape[0]
45 |             return value.shape[-1]
46 | 
47 |     def __str__(self):
48 |         return "NdArraySplit"
49 | 
50 | _args = (NdArraySplit(), NdArraySplit())
51 | _kwargs = { 'out' : mut(NdArraySplit()), 'axis': Broadcast() }
52 | _ret = NdArraySplit()
53 | 
54 | 
55 | # Binary ops.
56 | add         = sa(dc(_args), dc(_kwargs), dc(_ret))(np.add)
57 | subtract    = sa(dc(_args), dc(_kwargs), dc(_ret))(np.subtract)
58 | multiply    = sa(dc(_args), dc(_kwargs), dc(_ret))(np.multiply)
59 | divide      = sa(dc(_args), dc(_kwargs), dc(_ret))(np.divide)
60 | power       = sa(dc(_args), dc(_kwargs), dc(_ret))(np.power)
61 | 
62 | _args = (NdArraySplit(),)
63 | 
64 | # Unary ops.
65 | log         = sa(dc(_args), dc(_kwargs), dc(_ret))(np.log)
66 | log2        = sa(dc(_args), dc(_kwargs), dc(_ret))(np.log2)
67 | exp         = sa(dc(_args), dc(_kwargs), dc(_ret))(np.exp)
68 | sin         = sa(dc(_args), dc(_kwargs), dc(_ret))(np.sin)
69 | arcsin      = sa(dc(_args), dc(_kwargs), dc(_ret))(np.arcsin)
70 | cos         = sa(dc(_args), dc(_kwargs), dc(_ret))(np.cos)
71 | sqrt        = sa(dc(_args), dc(_kwargs), dc(_ret))(np.sqrt)
72 | erf         = sa(dc(_args), dc(_kwargs), dc(_ret))(ss.erf)
73 | 
74 | # addreduce = np.add.reduce
75 | addreduce = sa(dc(_args), dc(_kwargs), dc(_ret))(np.add.reduce)
76 | 
77 | _args = (NdArraySplit(), Broadcast())
78 | _kwargs = { 'axis': Broadcast() }
79 | # roll = sa(dc(_args), dc(_kwargs), dc(NdArraySplit()))(np.roll)
80 | 
81 | def ones(shape, dtype=None, order='C'):
82 |     result = sharedmem.empty(shape)
83 |     result[:] = np.ones(shape, dtype, order)[:]
84 |     return result
85 | 
86 | def zeros(shape, dtype=None, order='C'):
87 |     result = sharedmem.empty(shape)
88 |     result[:] = np.zeros(shape, dtype, order)[:]
89 |     return result
90 | 


--------------------------------------------------------------------------------
/python/lib/composer_pandas/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | # Fall back to NumPy if we don't support something.
3 | from pandas import *
4 | 
5 | from .annotated import *
6 | from pycomposer import evaluate
7 | 


--------------------------------------------------------------------------------
/python/lib/composer_pandas/annotated.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Annotations for Pandas functions.
  3 | 
  4 | Note: For convinience, we just write a wrapper function that calls the Pandas function, and then
  5 | use those functions instead. We could equivalently just replace methods on the DataFrame class too and
  6 | split `self` instead of the DataFrame passed in here.
  7 | """
  8 | 
  9 | from pycomposer import *
 10 | import time
 11 | 
 12 | import numpy as np
 13 | import pandas as pd
 14 | 
 15 | from copy import deepcopy as dc
 16 | 
 17 | class UniqueSplit(SplitType):
 18 |     """ For the result of Unique """
 19 |     def combine(self, values):
 20 |         if len(values) > 0:
 21 |             return np.unique(np.concatenate(values))
 22 |         else:
 23 |             return np.array([])
 24 | 
 25 |     def split(self, values):
 26 |         raise ValueError
 27 | 
 28 | class DataFrameSplit(SplitType):
 29 |     def combine(self, values):
 30 |         do_combine = False
 31 |         for val in values:
 32 |             if val is not None:
 33 |                 do_combine = True
 34 | 
 35 |         if do_combine and len(values) > 0:
 36 |             return pd.concat(values)
 37 | 
 38 |     def split(self, start, end, value):
 39 |         if not isinstance(value, pd.DataFrame) and not isinstance(value, pd.Series):
 40 |             # Assume this is a constant (str, int, etc.).
 41 |             return value
 42 |         return value[start:end]
 43 | 
 44 |     def elements(self, value):
 45 |         if not isinstance(value, pd.DataFrame) and not isinstance(value, pd.Series):
 46 |             return None
 47 |         return len(value)
 48 | 
 49 | class SumSplit(SplitType):
 50 |     def combine(self, values):
 51 |         return sum(values)
 52 | 
 53 |     def split(self, start, end, value):
 54 |         raise ValueError("can't split sum values")
 55 | 
 56 | class GroupBySplit(SplitType):
 57 |     def combine(self, values):
 58 |         return None
 59 | 
 60 |     def split(self, start, end, value):
 61 |         raise ValueError("can't split groupby values")
 62 | 
 63 | class SizeSplit(SplitType):
 64 |     def combine(self, values):
 65 |         return pd.concat(values)
 66 | 
 67 |     def split(self, start, end, value):
 68 |         raise ValueError("can't split size values")
 69 | 
 70 | def dfgroupby(df, keys):
 71 |     return df.groupby(keys)
 72 | 
 73 | def merge(left, right):
 74 |     return pd.merge(left, right)
 75 | 
 76 | def gbapply(grouped, func):
 77 |     return grouped.apply(func)
 78 | 
 79 | def gbsize(grouped):
 80 |     return grouped.size()
 81 | 
 82 | def filter(df, column, target):
 83 |     return df[df[column] > target]
 84 | 
 85 | @sa((DataFrameSplit(), DataFrameSplit()), {}, DataFrameSplit())
 86 | def divide(series, value):
 87 |     result = (series / value)
 88 |     return result
 89 | 
 90 | @sa((DataFrameSplit(), DataFrameSplit()), {}, DataFrameSplit())
 91 | def multiply(series, value):
 92 |     result = (series * value)
 93 |     return result
 94 | 
 95 | @sa((DataFrameSplit(), DataFrameSplit()), {}, DataFrameSplit())
 96 | def subtract(series, value):
 97 |     result = (series - value)
 98 |     return result
 99 | 
100 | @sa((DataFrameSplit(), DataFrameSplit()), {}, DataFrameSplit())
101 | def add(series, value):
102 |     result = (series + value)
103 |     return result
104 | 
105 | @sa((DataFrameSplit(), DataFrameSplit()), {}, DataFrameSplit())
106 | def equal(series, value):
107 |     result = (series == value)
108 |     return result
109 | 
110 | @sa((DataFrameSplit(), DataFrameSplit()), {}, DataFrameSplit())
111 | def greater_than(series, value):
112 |     result = (series >= value)
113 |     return result
114 | 
115 | @sa((DataFrameSplit(), DataFrameSplit()), {}, DataFrameSplit())
116 | def less_than(series, value):
117 |     result = (series < value)
118 |     return result
119 | 
120 | @sa((DataFrameSplit(),), {}, SumSplit())
121 | def pandasum(series):
122 |     result = series.sum()
123 |     return result
124 | 
125 | @sa((DataFrameSplit(),), {}, UniqueSplit())
126 | def unique(series):
127 |     result = series.unique()
128 |     return result
129 | 
130 | @sa((DataFrameSplit(),), {}, DataFrameSplit())
131 | def series_str(series):
132 |     result = series.str
133 |     return result
134 | 
135 | @sa((DataFrameSplit(), DataFrameSplit(), Broadcast()), {}, DataFrameSplit())
136 | def mask(series, cond, val):
137 |     result = series.mask(cond, val)
138 |     return result
139 | 
140 | @sa((DataFrameSplit(), Broadcast(), Broadcast()), {}, DataFrameSplit())
141 | def series_str_slice(series, start, end):
142 |     result = series.str.slice(start, end)
143 |     return result
144 | 
145 | @sa((DataFrameSplit(),), {}, DataFrameSplit())
146 | def pandanot(series):
147 |     return ~series
148 | 
149 | @sa((DataFrameSplit(), Broadcast()), {}, DataFrameSplit())
150 | def series_str_contains(series, target):
151 |     result = series.str.contains(target)
152 |     return result
153 | 
154 | dfgroupby = sa((DataFrameSplit(), Broadcast()), {}, GroupBySplit())(dfgroupby)
155 | merge = sa((DataFrameSplit(), Broadcast()), {}, DataFrameSplit())(merge)
156 | filter = sa((DataFrameSplit(), Broadcast(), Broadcast()), {}, DataFrameSplit())(filter)
157 | 
158 | # Return split type should be ApplySplit(subclass of DataFrameSplit), and it
159 | # should take the first argument as a parameter. The parameter is guaranteed to
160 | # be a dag.Operation.  The combiner can then use the `by` arguments to groupby
161 | # in the combiner again, and then apply again.
162 | gbapply = sa((GroupBySplit(), Broadcast()), {}, DataFrameSplit())(gbapply)
163 | gbsize = sa((GroupBySplit(), Broadcast()), {}, SizeSplit())(gbsize)
164 | 


--------------------------------------------------------------------------------
/python/pycomposer/pycomposer/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .composer import sa, evaluate, mut
3 | from .split_types import SplitType, Broadcast
4 | from .vm.driver import STOP_ITERATION
5 | 
6 | # Import the generics.
7 | from .split_types import A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z
8 | 


--------------------------------------------------------------------------------
/python/pycomposer/pycomposer/annotation.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import functools
  3 | 
  4 | from inspect import signature, Parameter, Signature
  5 | 
  6 | from .split_types import Broadcast
  7 | 
  8 | class Mut(object):
  9 |     """ Marker that marks values in an annotation as mutable. """
 10 | 
 11 |     __slots__ = [ "value" ]
 12 |     def __init__(self, value):
 13 |         self.value = value
 14 | 
 15 | # Constructor for mutables.
 16 | mut = lambda x: Mut(x)
 17 | 
 18 | class Annotation(object):
 19 |     """ An annotation on a function.
 20 | 
 21 |     Annotations map arguments (by index for regular arguments and by name for
 22 |     keyword arguments) to their split type.
 23 | 
 24 |     """
 25 | 
 26 |     __slots__ = [ "mutables", "arg_types", "return_type", "kwarg_types" ]
 27 | 
 28 |     def __init__(self, func, types, kwtypes, return_type):
 29 |         """ Initialize an annotation for a function invocation with the given
 30 |         arguments.
 31 | 
 32 |         Parameters
 33 |         __________
 34 | 
 35 |         func : the function that was invoked.
 36 |         types : the split types of the non-keyword arguments and return type.
 37 |         kwtypes : the split types of the keyword arguments.
 38 |         
 39 |         """
 40 | 
 41 |         try:
 42 |             sig = signature(func)
 43 |             args = [(name, param) for (name, param) in sig.parameters.items()\
 44 |                     if param.kind == Parameter.POSITIONAL_OR_KEYWORD]
 45 | 
 46 |             num_required_types = 0
 47 |             for (name, param) in args:
 48 |                 if param.default is Parameter.empty:
 49 |                     num_required_types += 1
 50 | 
 51 |             if len(types) != num_required_types:
 52 |                 raise ValueError("invalid number of arguments in annotation (expected {}, got {})".format(len(args), len(types)))
 53 | 
 54 |             # Make sure there's no extraneous args.
 55 |             kwargs = set([name for (name, param) in args if param.default is not Parameter.empty])
 56 | 
 57 |             for name in kwargs:
 58 |                 if name not in kwtypes:
 59 |                     kwtypes[name] = Broadcast()
 60 | 
 61 |             for name in kwtypes:
 62 |                 assert(name in kwargs)
 63 | 
 64 |         except ValueError as e:
 65 |             pass
 66 |             # print("WARN: Continuing without verification of annotation")
 67 | 
 68 |         # The mutable values. These are indices for positionals and string
 69 |         # names for keyword args.
 70 |         self.mutables = set()
 71 | 
 72 |         # The argument types.
 73 |         self.arg_types = []
 74 | 
 75 |         for (i, ty) in enumerate(types):
 76 |             if isinstance(ty, Mut):
 77 |                 self.arg_types.append(ty.value)
 78 |                 self.mutables.add(i)
 79 |             else:
 80 |                 self.arg_types.append(ty)
 81 | 
 82 |         # The return type. This can be None if the function doesn't return anything.
 83 |         self.return_type = return_type
 84 | 
 85 |         # Dictionary of kwarg types.
 86 |         self.kwarg_types = dict()
 87 |         for (key, value) in kwtypes.items():
 88 |             if isinstance(value, Mut):
 89 |                 self.kwarg_types[key] = value.value
 90 |                 self.mutables.add(key)
 91 |             else:
 92 |                 self.kwarg_types[key] = value
 93 | 
 94 | 
 95 |     def types(self):
 96 |         """ Iterate over the split types in this annotation. """
 97 |         for ty in self.arg_types:
 98 |             yield ty
 99 |         for ty in self.kwarg_types.values():
100 |             yield ty
101 |         yield self.return_type
102 | 
103 |     def __str__(self):
104 |         if len(self.arg_types) > 0:
105 |             args = ", ".join([str(t) for t in self.arg_types])
106 |         else:
107 |             args = ", " if len(self.kwarg_types) > 0 else ""
108 | 
109 |         if len(self.kwarg_types) > 0:
110 |             args += ", "
111 |             args += ", ".join(["{}={}".format(k, v) for (k,v) in self.kwarg_types.items()])
112 | 
113 |         return "({}) -> {}".format(args, self.return_type)
114 | 


--------------------------------------------------------------------------------
/python/pycomposer/pycomposer/composer.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .annotation import Annotation, mut
 3 | from .dag import LogicalPlan, evaluate_dag
 4 | from .split_types import *
 5 | from .vm.driver import DEFAULT_BATCH_SIZE
 6 | 
 7 | import functools 
 8 | 
 9 | import copy
10 | 
11 | # The task graph.
12 | _DAG = LogicalPlan()
13 | 
14 | class sa(object):
15 |     """ A splitability annotation."""
16 | 
17 |     def __init__(self, types, kwtypes, return_type):
18 |         """ A splitability annotation.
19 | 
20 |         Parameters
21 |         ----------
22 | 
23 |         postypes : a tuple of split types for each positional argument. The number of elements in the tuple must match the number
24 |         of positional arguments in the funciton.
25 | 
26 |         kwtypes : a dictionary of split types for each keyword argument. Providing
27 |         split types for keyword arguments is optional. If a keyword argument does
28 |         not have a split type, its split type will default to "broadcast."
29 | 
30 |         return_type : split type of the value returned by this function.
31 | 
32 |         """
33 |         self.types = types
34 |         self.kwtypes = kwtypes
35 |         self.return_type = return_type
36 | 
37 |     def __call__(self, func):
38 |         annotation = Annotation(func, self.types, self.kwtypes, self.return_type)
39 | 
40 |         @functools.wraps(func)
41 |         def _decorated(*args, **kwargs):
42 |             return _DAG.register(func, args, kwargs, annotation)
43 | 
44 |         return _decorated
45 | 
46 | def evaluate(workers=1, batch_size=DEFAULT_BATCH_SIZE, profile=False):
47 |     evaluate_dag(_DAG, workers, batch_size, profile)
48 | 


--------------------------------------------------------------------------------
/python/pycomposer/pycomposer/unevaluated.py:
--------------------------------------------------------------------------------
 1 | """ A singleton representing an unevaluated computation. """
 2 | 
 3 | class _Unevaluated:
 4 |     """ An unevaluated value.
 5 | 
 6 |     Users should access the UNEVALUATED singleton instead of
 7 |     making instances of this directly.
 8 | 
 9 |     """
10 |     __slots__ = []
11 | 
12 | UNEVALUATED = _Unevaluated()
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/python/pycomposer/pycomposer/vm/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .program import Program
3 | from .driver import Driver, STOP_ITERATION
4 | 


--------------------------------------------------------------------------------
/python/pycomposer/pycomposer/vm/instruction.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from abc import ABC, abstractmethod
  3 | import types
  4 | 
  5 | from .driver import STOP_ITERATION
  6 | 
  7 | class Instruction(ABC):
  8 |     """
  9 |     An instruction that updates an operation in a lazy DAG.
 10 |     """
 11 | 
 12 |     @abstractmethod
 13 |     def evaluate(self, thread, start, end, values, context):
 14 |         """
 15 |         Evaluates an instruction.
 16 | 
 17 |         Parameters
 18 |         ----------
 19 | 
 20 |         thread : the thread that is  currently executing
 21 |         start : the start index of the current split value.
 22 |         end : the end index of the current split value
 23 |         values : a global value map holding the inputs.
 24 |         context : map holding execution state (arg ID -> value).
 25 | 
 26 |         """
 27 |         pass
 28 | 
 29 | class Split(Instruction):
 30 |     """
 31 |     An instruction that splits the inputs to an operation.
 32 |     """
 33 | 
 34 |     def __init__(self, target, ty):
 35 |         """
 36 |         A Split instruction takes an argument and split type and applies
 37 |         the splitter on the argument.
 38 | 
 39 |         Parameters
 40 |         ----------
 41 | 
 42 |         target : the arg ID that will be split.
 43 |         ty : the split type.
 44 |         """
 45 |         self.target = target
 46 |         self.ty = ty
 47 |         self.splitter = None
 48 | 
 49 |     def __str__(self):
 50 |         return "v{} = split {}:{}".format(self.target, self.target, self.ty)
 51 | 
 52 |     def evaluate(self, thread, start, end, values, context):
 53 |         """ Returns values from the split. """
 54 | 
 55 |         if self.splitter is None:
 56 |             # First time - check if the splitter is actually a generator.
 57 |             result = self.ty.split(start, end, values[self.target])
 58 |             if isinstance(result, types.GeneratorType):
 59 |                 self.splitter = result
 60 |                 result = next(self.splitter)
 61 |             else:
 62 |                 self.splitter = self.ty.split
 63 |         else:
 64 |             if isinstance(self.splitter, types.GeneratorType):
 65 |                 result = next(self.splitter)
 66 |             else:
 67 |                 result = self.splitter(start, end, values[self.target])
 68 | 
 69 |         if isinstance(result, str) and result == STOP_ITERATION:
 70 |             return STOP_ITERATION
 71 |         else:
 72 |             context[self.target].append(result)
 73 | 
 74 | class Call(Instruction):
 75 |     """ An instruction that calls an SA-enabled function. """
 76 |     def __init__(self,  target, func, args, kwargs, ty):
 77 |         self.target = target
 78 |         # Function to call.
 79 |         self.func = func
 80 |         # Arguments: list of targets.
 81 |         self.args = args
 82 |         # Keyword arguments: Maps { name -> target }
 83 |         self.kwargs = kwargs
 84 |         # Return split type.
 85 |         self.ty = ty
 86 | 
 87 |     def __str__(self):
 88 |         args = ", ".join(map(lambda a: "v" + str(a), self.args))
 89 |         kwargs = list(map(lambda v: "{}=v{}".format(v[0], v[1]), self.kwargs.items()))
 90 |         arguments = ", ".join([args] + kwargs)
 91 |         return "v{} = call {}({}):{}".format(self.target, self.func.__name__, arguments, str(self.ty))
 92 | 
 93 |     def get_args(self, context):
 94 |         return [ context[target][-1] for target in self.args ]
 95 | 
 96 |     def get_kwargs(self, context):
 97 |         return dict([ (name, context[target][-1]) for (name, target) in self.kwargs.items() ])
 98 | 
 99 |     def evaluate(self, _thread, _start, _end, _values, context):
100 |         """
101 |         Evaluates a function call by gathering arguments and calling the
102 |         function.
103 |         
104 |         """
105 |         args = self.get_args(context)
106 |         kwargs = self.get_kwargs(context)
107 |         context[self.target].append(self.func(*args, **kwargs))
108 | 


--------------------------------------------------------------------------------
/python/pycomposer/pycomposer/vm/program.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .driver import STOP_ITERATION
 3 | from .instruction import Split
 4 | 
 5 | class Program:
 6 |     """
 7 |     A Composer Virtual Machine Program.
 8 | 
 9 |     A program stores a sequence of instructions to execute.
10 | 
11 |     """
12 | 
13 |     __slots__ = ["ssa_counter", "insts", "registered", "index"]
14 | 
15 |     def __init__(self):
16 |         # Counter for registering instructions.
17 |         self.ssa_counter = 0
18 |         # Instruction list.
19 |         self.insts = []
20 |         # Registered values. Maps SSA value to real value.
21 |         self.registered = {} 
22 | 
23 |     def get(self, value):
24 |         """
25 |         Get the SSA value for a value, or None if the value is not registered.
26 | 
27 |         value : The value to lookup
28 | 
29 |         """
30 |         for num, val in self.registered.items():
31 |             if value is val:
32 |                 return num
33 | 
34 |     def set_range_end(self, range_end):
35 |         for inst in self.insts:
36 |             if isinstance(inst, Split):
37 |                 inst.ty.range_end = range_end
38 | 
39 |     def step(self, thread, piece_start, piece_end, values, context):
40 |         """
41 |         Step the program and return whether are still items to process.
42 |         """
43 |         for task in self.insts:
44 |             result = task.evaluate(thread, piece_start, piece_end, values, context)
45 |             if isinstance(result, str) and result == STOP_ITERATION:
46 |                 return False
47 |         return True
48 | 
49 |     def elements(self, values):
50 |         """Returns the number of elements that this program will process.
51 | 
52 |         This quantity is retrieved by querying the Split instructions in the program.
53 | 
54 |         """
55 |         elements = None
56 |         for inst in self.insts:
57 |             if isinstance(inst, Split):
58 |                 e = inst.ty.elements(values[inst.target])
59 |                 if e is None:
60 |                     continue
61 |                 if elements is not None:
62 |                     assert(elements == e, inst)
63 |                 else:
64 |                     elements = e
65 |         return elements
66 | 
67 |     def __str__(self):
68 |         return "\n".join([str(i) for i in self.insts])
69 | 


--------------------------------------------------------------------------------
/python/pycomposer/pycomposer/vm/vm.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .program import Program
 3 | 
 4 | class VM:
 5 |     """
 6 |     A Composer virtual machine, which holds a program and its associated data.
 7 |     """
 8 |     def __init__(self):
 9 |         # Counter for argument IDs
10 |         self.ssa_counter = 0
11 |         # Program
12 |         self.program = Program()
13 |         # Values, mapping argID -> values
14 |         self.values = dict()
15 | 
16 |     def get(self, value):
17 |         """
18 |         Get the SSA value for a value, or None if the value is not registered.
19 | 
20 |         value : The value to lookup
21 | 
22 |         """
23 |         for num, val in self.values.items():
24 |             if value is val:
25 |                 return num
26 | 
27 |     def register_value(self, value):
28 |         """
29 |         Register a counter to a value.
30 |         """
31 |         arg_id = self.ssa_counter
32 |         self.ssa_counter += 1
33 |         self.values[arg_id] = value
34 |         return arg_id
35 | 
36 | 


--------------------------------------------------------------------------------
/python/pycomposer/requirements.txt:
--------------------------------------------------------------------------------
1 | cloudpickle
2 | 
3 | 


--------------------------------------------------------------------------------
/python/pycomposer/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from distutils.core import setup
 4 | 
 5 | setup(name='pycomposer',
 6 |       version='0.1.0',
 7 |       description='Python Composer library',
 8 |       author='Shoumik Palkar',
 9 |       author_email='shoumik@cs.stanford.edu',
10 |       url='https://www.github.com/sppalkia/annotator',
11 |       packages=['cloudpickle'],
12 |      )
13 | 


--------------------------------------------------------------------------------