├── benchmarks ├── .gitignore ├── benchmark_types.ml ├── README.md ├── util.ml ├── Makefile ├── benchmark.ml ├── benchmark_strymonas.ml ├── benchmark_seq.ml ├── benchmark_iter.ml ├── benchmark_c.ml ├── benchmark_batteries.ml ├── benchmark_abstract.ml └── benchmark_streaming.ml ├── examples ├── gnuradio-fm │ ├── baseline-pi │ │ ├── .gitignore │ │ ├── env_armv6l.cmake │ │ ├── Makefile │ │ ├── CMakeLists.txt │ │ ├── utils.h │ │ ├── fmradio.hpp │ │ ├── utils.c │ │ ├── main.cpp │ │ └── fmradio.cpp │ ├── baseline │ │ ├── .gitignore │ │ ├── env_intelmac.cmake │ │ ├── Makefile │ │ ├── CMakeLists.txt │ │ ├── utils.h │ │ ├── fmradio.hpp │ │ ├── utils.c │ │ ├── main.cpp │ │ └── fmradio.cpp │ ├── parameters.ml │ ├── utils.h │ ├── Makefile │ ├── atan_test.c │ ├── utils.c │ ├── main.ml │ ├── README.md │ ├── fir.ml │ ├── bench_main.c │ ├── play_main.c │ └── stream_gnuradio_sdr_fn.ml ├── .gitignore ├── streamit-fm │ ├── streamit │ │ ├── .gitignore │ │ ├── memory_profile.c │ │ ├── benchmarks.h │ │ ├── README.md │ │ ├── Makefile │ │ ├── utils.h │ │ ├── memory.sh │ │ ├── bench.sh │ │ ├── utils.c │ │ ├── main.c │ │ ├── fmref.patch │ │ └── benchmarks_gen.c │ ├── main.ml │ ├── parameters.ml │ ├── README.md │ ├── Makefile │ ├── stream_streamit_sdr_fn.ml │ ├── util.ml │ ├── sit_emulator.ml │ ├── fir.ml │ ├── test_c.ml │ └── test.ml ├── amradio │ └── Makefile ├── sliding-window │ └── Makefile ├── run-length-encoding │ ├── Makefile │ ├── apples.c │ └── apples.ml ├── Makefile ├── TryFirst │ ├── Makefile │ ├── 0README.dr │ └── simple_c.ml └── Makefile.common ├── lib ├── backends_pure.ml ├── META ├── META.default ├── META.pure ├── backends │ ├── Trx │ │ ├── trx_code.ml │ │ └── trx_code_native.ml │ └── C │ │ ├── offshoringIR.mli │ │ └── c_ast.mli ├── cde_top.mli ├── 0README.dr ├── backends.ml ├── cde_ex.mli ├── stream_cooked.mli ├── Makefile ├── cde.mli ├── pk_coll.ml └── stream_raw.mli ├── .merlin ├── .gitignore ├── test └── Makefile ├── strymonas-pure.opam ├── Makefile ├── LICENSE └── README.md /benchmarks/.gitignore: -------------------------------------------------------------------------------- 1 | *.txt 2 | *.dat -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline-pi/.gitignore: -------------------------------------------------------------------------------- 1 | build -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline/.gitignore: -------------------------------------------------------------------------------- 1 | build -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | *.pcap 2 | *.pcm 3 | *.csv 4 | *.tmp -------------------------------------------------------------------------------- /examples/streamit-fm/streamit/.gitignore: -------------------------------------------------------------------------------- 1 | fmref.c 2 | benchmarks_base.c -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline/env_intelmac.cmake: -------------------------------------------------------------------------------- 1 | # for macOS 2 | add_compile_options(-Ofast -march=native) 3 | -------------------------------------------------------------------------------- /benchmarks/benchmark_types.ml: -------------------------------------------------------------------------------- 1 | type 'b benchmark_options = { 2 | repetitions : int; 3 | final_f : ('b code -> unit code); 4 | } 5 | -------------------------------------------------------------------------------- /lib/backends_pure.ml: -------------------------------------------------------------------------------- 1 | (* Convinient functions for selecting a backend *) 2 | 3 | (* C backend *) 4 | module C = Pk_cde.Make(C_cde) 5 | -------------------------------------------------------------------------------- /examples/amradio/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile.common 2 | 3 | .PHONY: all 4 | all: 5 | ${TOP} ${INCLUDES} ${LIB} am_radio.ml 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /examples/sliding-window/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile.common 2 | 3 | .PHONY: all 4 | all: 5 | ${TOP} ${INCLUDES} ${LIB} sliding_window.ml 6 | 7 | -------------------------------------------------------------------------------- /.merlin: -------------------------------------------------------------------------------- 1 | EXT meta 2 | S . 3 | S lib 4 | S lib/backends/C 5 | S lib/backends/Trx 6 | B . 7 | B lib 8 | B lib/backends/C 9 | B lib/backends/Trx 10 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline-pi/env_armv6l.cmake: -------------------------------------------------------------------------------- 1 | # for Raspberry Pi 2 | add_compile_options(-Ofast -mfpu=vfp -mfloat-abi=hard -march=armv6zk -mtune=arm1176jzf-s) 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### OCaml ### 2 | *.annot 3 | *.cmo 4 | *.cma 5 | *.cmi 6 | *.a 7 | *.o 8 | *.cmx 9 | *.cmxs 10 | *.cmxa 11 | 12 | ### Misc. ### 13 | *.out 14 | depend 15 | *.eps 16 | 17 | -------------------------------------------------------------------------------- /examples/streamit-fm/streamit/memory_profile.c: -------------------------------------------------------------------------------- 1 | #include "benchmarks.h" 2 | 3 | #if defined(BENCHF1) 4 | int main(void) 5 | { 6 | BENCHF1(); 7 | } 8 | 9 | #else 10 | int main(void) 11 | { 12 | } 13 | #endif -------------------------------------------------------------------------------- /lib/META: -------------------------------------------------------------------------------- 1 | version = "2.1.1" 2 | description = "stream fusion, to completeness" 3 | requires = "" 4 | archive(byte) = "stream.cma" 5 | archive(native) = "stream.cmxa" 6 | plugin(byte) = "stream.cma" 7 | #plugin(native) = "stream.cmxs" 8 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/parameters.ml: -------------------------------------------------------------------------------- 1 | let gain = 1. 2 | let samplingRate = 3_072_000. 3 | let cutoff = 75_000. 4 | let trWidth = 5 | let numberOfTaps = 65 in 6 | 53. *. samplingRate /. (22. *. float numberOfTaps) 7 | let maxDev = 75_000. 8 | -------------------------------------------------------------------------------- /lib/META.default: -------------------------------------------------------------------------------- 1 | version = "2.1.1" 2 | description = "stream fusion, to completeness" 3 | requires = "" 4 | archive(byte) = "stream.cma" 5 | archive(native) = "stream.cmxa" 6 | plugin(byte) = "stream.cma" 7 | #plugin(native) = "stream.cmxs" 8 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline-pi/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build-pi clean 2 | 3 | build-pi: clean 4 | cd build && \ 5 | cmake .. -DCMAKE_TOOLCHAIN_FILE=../env_armv6l.cmake && \ 6 | cmake --build . 7 | 8 | clean: 9 | rm -rf build/ 10 | mkdir build 11 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build build-pi clean 2 | 3 | build: clean 4 | cd build && \ 5 | cmake .. -DCMAKE_TOOLCHAIN_FILE=../env_intelmac.cmake && \ 6 | cmake --build . -j4 7 | 8 | clean: 9 | rm -rf build/ 10 | mkdir build 11 | -------------------------------------------------------------------------------- /examples/streamit-fm/streamit/benchmarks.h: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_BENCHMARKS_H 2 | #define INCLUDED_BENCHMARKS_H 3 | 4 | #include 5 | #include 6 | #include 7 | // #include 8 | 9 | float fmradio(void); 10 | 11 | #endif -------------------------------------------------------------------------------- /lib/META.pure: -------------------------------------------------------------------------------- 1 | version = "2.1.1" 2 | description = "stream fusion, to completeness" 3 | requires = "" 4 | archive(byte) = "stream_ocaml.cma" 5 | archive(native) = "stream_ocaml.cmxa" 6 | plugin(byte) = "stream_ocaml.cma" 7 | #plugin(native) = "stream_ocaml.cmxs" 8 | -------------------------------------------------------------------------------- /examples/run-length-encoding/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile.common 2 | 3 | .PHONY: all 4 | all: rle apples 5 | 6 | .PHONY: rle 7 | rle: 8 | ${TOP} ${INCLUDES} ${LIB} rle.ml 9 | 10 | apples: 11 | ${TOP} ${INCLUDES} ${LIB} apples.ml 12 | $(CC) -W -Wall -o apples apples.c && ./apples 13 | 14 | clean:: 15 | rm -f apples 16 | 17 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | # Run all the examples 2 | 3 | .PHONY: all run-length-encoding sliding-window 4 | 5 | all: tryfirst run-length-encoding sliding-window 6 | 7 | tryfirst: 8 | cd TryFirst && $(MAKE) 9 | 10 | run-length-encoding: 11 | cd run-length-encoding && $(MAKE) 12 | 13 | sliding-window: 14 | cd sliding-window && $(MAKE) 15 | 16 | -------------------------------------------------------------------------------- /lib/backends/Trx/trx_code.ml: -------------------------------------------------------------------------------- 1 | (* An implementation of the Cde signature, using the 2 | MetaOCaml code type and code generation facilities 3 | MetaOCaml bytecode 4 | *) 5 | 6 | include Trx_code_common 7 | 8 | let ident = "MetaOCaml" 9 | 10 | (* Use byte-code, for debugging *) 11 | let run_capture_output = run_capture_output_gen Runcode.run 12 | 13 | let run = Runcode.run 14 | -------------------------------------------------------------------------------- /lib/backends/Trx/trx_code_native.ml: -------------------------------------------------------------------------------- 1 | (* An implementation of the Cde signature, using the 2 | MetaOCaml code type and code generation facilities 3 | MetaOCaml native 4 | *) 5 | 6 | include Trx_code_common 7 | 8 | let ident = "MetaOCaml native" 9 | 10 | (* Use byte-code, for debugging *) 11 | let run_capture_output = run_capture_output_gen Runnative.run 12 | 13 | let run = Runnative.run 14 | -------------------------------------------------------------------------------- /examples/TryFirst/Makefile: -------------------------------------------------------------------------------- 1 | # Test the simplest examples 2 | include ../Makefile.common 3 | 4 | # Order matters! 5 | 6 | .PHONY: all 7 | all: trytop c 8 | 9 | trytop: 10 | ${TOP} ${INCLUDES} ${LIB} simple.ml 11 | echo '#use "simple.ml";;' | ${TOP} 12 | 13 | # We want to demonstrate pure OCaml, so we manually pick up only relevant 14 | # pure OCaml modules 15 | c: 16 | $(OCAML) ${INCLUDES} ${LIBPURE} simple_c.ml 17 | -------------------------------------------------------------------------------- /examples/streamit-fm/streamit/README.md: -------------------------------------------------------------------------------- 1 | - First of all, change CC in bench.sh and memory.sh accodingly. 2 | - benchmarks_base.c is based on http://groups.csail.mit.edu/cag/streamit/apps/benchmarks/fm/c/fmref.c. Apply fmref.patch fot it and rename it to benchmarks_base.c: 3 | ``` 4 | $ patch -u fmref.c fmref.patch 5 | $ mv fmref.c benchmarks_base.c 6 | ``` 7 | 8 | 9 | `make` and `make bench` invoke the time bench: bench.sh. 10 | `make memory` invokes the memory bench: memory.sh. 11 | 12 | -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | OCAMLC=metaocamlc 2 | TOP=metaocaml 3 | OCAMLOPT=metaocamlopt 4 | 5 | .SUFFIXES: .ml .mli .cmo .cmi .cmx .tex .pdf 6 | 7 | 8 | LIBDIR=../lib 9 | LIB=${LIBDIR}/stream.cma 10 | LIBOPT=${LIBDIR}/stream.cmxa 11 | INCLUDES=-I ../lib -I ../lib/backends/Trx -I ../lib/backends/C 12 | # Order matters! 13 | 14 | test: standard 15 | 16 | .PHONY: standard 17 | standard: 18 | ${TOP} ${INCLUDES} ${LIB} test_stream.ml 19 | 20 | .PHONY: window 21 | window: 22 | ${TOP} ${INCLUDES} ${LIB} test_window.ml 23 | 24 | -------------------------------------------------------------------------------- /examples/streamit-fm/streamit/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: bench memory clean display_options diff_base 2 | bench: 3 | sh bench.sh 4 | cat result_f1.csv > result_both.csv 5 | cat result_f2.csv | tail -n +2 >> result_both.csv 6 | 7 | memory: 8 | sh memory.sh 9 | 10 | clean: 11 | rm -rf *.dSYM 12 | rm -rf .ipynb_checkpoints 13 | rm *.bc *.i *.s *.out *.o \ 14 | *.csv *.tmp *.eps 15 | 16 | display_options: 17 | $(CC) $(CFLAGS) -Q --help=warnings 18 | 19 | diff_base: 20 | cd misc && diff -u fmref.c ../benchmarks_base.c >| fmref.patch || : 21 | -------------------------------------------------------------------------------- /examples/Makefile.common: -------------------------------------------------------------------------------- 1 | # The common part of Makefile, meant to be included into Makefile 2 | # into the subdirectories 3 | # (therefore, relative links are one deeper) 4 | 5 | # Many examples use OCaml back-end, and hence use MetaOCaml 6 | TOP=metaocaml 7 | 8 | OCAML=ocaml 9 | 10 | MOCAMLC=metaocamlc 11 | MOCAMLOPT=metaocamlopt 12 | 13 | .SUFFIXES: .ml .mli .cmo .cmi .cmx 14 | 15 | LIBDIR="../../lib" 16 | LIB=${LIBDIR}/stream.cma 17 | LIBOPT=${LIBDIR}/stream.cmxa 18 | LIBPURE=${LIBDIR}/stream_ocaml.cma 19 | INCLUDES=-I ../../lib -I ../../lib/backends/Trx -I ../../lib/backends/C 20 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline-pi/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8) 2 | set(CMAKE_C_STANDARD 11) 3 | set(CMAKE_CXX_STANDARD 17) 4 | 5 | project(gr-fmradio_gnuradio LANGUAGES C CXX) 6 | 7 | find_package(Gnuradio "3.8.2.0" COMPONENTS 8 | blocks 9 | fft 10 | filter 11 | analog 12 | # audio 13 | ) 14 | 15 | 16 | add_executable(gr-fmradio_gnuradio utils.c fmradio.cpp main.cpp) 17 | target_link_libraries(gr-fmradio_gnuradio 18 | gnuradio::gnuradio-blocks 19 | gnuradio::gnuradio-filter 20 | gnuradio::gnuradio-analog 21 | # gnuradio::gnuradio-audio 22 | ) 23 | 24 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8) 2 | set(CMAKE_C_STANDARD 11) 3 | set(CMAKE_CXX_STANDARD 17) 4 | 5 | project(gr-fmradio_gnuradio LANGUAGES C CXX) 6 | 7 | find_package(Gnuradio "3.10.6.0" COMPONENTS 8 | blocks 9 | fft 10 | filter 11 | analog 12 | audio 13 | soapy 14 | ) 15 | 16 | add_executable(gr-fmradio_gnuradio utils.c fmradio.cpp main.cpp) 17 | target_link_libraries(gr-fmradio_gnuradio 18 | gnuradio::gnuradio-blocks 19 | gnuradio::gnuradio-filter 20 | gnuradio::gnuradio-analog 21 | gnuradio::gnuradio-audio 22 | gnuradio::gnuradio-soapy 23 | ) 24 | 25 | -------------------------------------------------------------------------------- /examples/streamit-fm/streamit/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_UTILS_H 2 | #define INCLUDED_UTILS_H 3 | 4 | #include 5 | 6 | // Return the current CPU time 7 | static inline double time_in_mili(void) { 8 | struct timespec ts; 9 | clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); 10 | return ts.tv_sec*1000 + ((double) ts.tv_nsec)/1000000; 11 | } 12 | 13 | 14 | double mean(double arr[], int n); 15 | double var_raw(double arr[], int n); 16 | double var(double arr[], int n); 17 | double unbiased_var(double arr[], int n); 18 | double standard_deviation(double arr[], int n); 19 | double mean_error(double arr[], int n); // confidence == 95% 20 | 21 | #endif -------------------------------------------------------------------------------- /examples/gnuradio-fm/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_UTILS_H 2 | #define INCLUDED_UTILS_H 3 | 4 | #include 5 | 6 | // Return the current CPU time 7 | static inline double time_in_mili(void) { 8 | struct timespec ts; 9 | clock_gettime(CLOCK_MONOTONIC, &ts); 10 | return ts.tv_sec*1000 + ((double) ts.tv_nsec)/1000000; 11 | } 12 | 13 | 14 | double mean(double arr[], int n); 15 | double var_raw(double arr[], int n); 16 | double var(double arr[], int n); 17 | double unbiased_var(double arr[], int n); 18 | double standard_deviation(double arr[], int n); 19 | double mean_error(double arr[], int n); // confidence == 95% 20 | 21 | 22 | void null_func(float const x); 23 | #endif 24 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_UTILS_H 2 | #define INCLUDED_UTILS_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" 6 | { 7 | #endif 8 | #include 9 | static inline double time_in_mili(void) { 10 | struct timespec ts; 11 | clock_gettime(CLOCK_MONOTONIC, &ts); 12 | return (ts.tv_sec)*1000 + (ts.tv_nsec)/1000000; 13 | } 14 | 15 | double mean(double arr[], int n); 16 | double var_raw(double arr[], int n); 17 | double var(double arr[], int n); 18 | double unbiased_var(double arr[], int n); 19 | double standard_deviation(double arr[], int n); 20 | double mean_error(double arr[], int n); // confidence == 95% 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | 25 | #endif -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline-pi/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_UTILS_H 2 | #define INCLUDED_UTILS_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" 6 | { 7 | #endif 8 | #include 9 | static inline double time_in_mili(void) { 10 | struct timespec ts; 11 | clock_gettime(CLOCK_MONOTONIC, &ts); 12 | return (ts.tv_sec)*1000 + (ts.tv_nsec)/1000000; 13 | } 14 | 15 | double mean(double arr[], int n); 16 | double var_raw(double arr[], int n); 17 | double var(double arr[], int n); 18 | double unbiased_var(double arr[], int n); 19 | double standard_deviation(double arr[], int n); 20 | double mean_error(double arr[], int n); // confidence == 95% 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | 25 | #endif -------------------------------------------------------------------------------- /examples/TryFirst/0README.dr: -------------------------------------------------------------------------------- 1 | The first examples of using Strymonas, with detailed explanations 2 | (in lieu of the user guide) 3 | They demonstrate the most common features of the library 4 | 5 | 6 | Using strymonas from the BER MetaOCaml top-level: 7 | generating OCaml code and running it. Implementing the simplest 8 | example code (from the paper) 9 | For the example that does not need MetaOCaml, see simple.ml below 10 | simple.ml 11 | 12 | Using strymonas with the BER MetaOCaml native compiler. 13 | Generating OCaml code, natively compiling it and linking back 14 | into the still running generator to run it 15 | 16 | Using strymonas to generate and run C code 17 | (Using plain OCaml: MetaOCaml is no longer necessary here) 18 | simple_c.ml 19 | 20 | 21 | -------------------------------------------------------------------------------- /lib/cde_top.mli: -------------------------------------------------------------------------------- 1 | (* Abstract interface for code generation 2 | 3 | Building top-level functions and printing/running them 4 | *) 5 | 6 | include module type of Cde_ex 7 | 8 | val ident : string (* identifier of the backend *) 9 | 10 | (* Building top-level procedures *) 11 | 12 | type 'a proc_t 13 | 14 | val nullary_proc : 'a stm -> 'a proc_t 15 | val arg_base : ?name:string -> 'a tbase -> ('a exp -> 'b proc_t) -> 16 | ('a -> 'b) proc_t 17 | val arg_array : ?name:string -> ?mutble:bool -> 18 | int exp -> (* length *) 19 | 'a tbase -> ('a arr -> 'b proc_t) -> ('a array -> 'b) proc_t 20 | 21 | val pp_proc : ?name:string -> Format.formatter -> 'a proc_t -> unit 22 | 23 | (* The following is often used in debugging and testing *) 24 | val print_code : ?name:string -> 'a stm -> unit 25 | val run : 'a stm -> 'a 26 | val run_capture_output : unit stm -> Scanf.Scanning.in_channel 27 | 28 | -------------------------------------------------------------------------------- /strymonas-pure.opam: -------------------------------------------------------------------------------- 1 | 2 | opam-version: "2.0" 3 | name: "strymonas-pure" 4 | version: "2.1.1" 5 | synopsis: "Stream fusion, to completeness" 6 | maintainer: "tomoaki.kobayashi.t3@alumni.tohoku.ac.jp" 7 | authors: [ 8 | "Oleg Kiselyov" 9 | "Tomoaki Kobayashi" 10 | "Aggelos Biboudis" 11 | "Nick Palladinos" 12 | "Yannis Smaragdakis" 13 | ] 14 | license: "MIT" 15 | homepage: "https://strymonas.github.io/" 16 | bug-reports: "https://github.com/strymonas/strymonas-ocaml/issues" 17 | dev-repo: "git+https://github.com/strymonas/strymonas-ocaml.git" 18 | depends: [ 19 | "ocaml" {>= "4.14.1"} 20 | "ocamlfind" {build} 21 | ] 22 | available: [sys-ocaml-version = "4.14.1" | (arch != "arm32" & arch != "x86_32")] 23 | build: [ 24 | [make "-C" "lib/" "depend-pure"] 25 | [make "-C" "lib/" "pure"] 26 | ] 27 | install:[make "-C" "lib/" "install-pure"] 28 | remove:[make "-C" "lib/" "uninstall-pure"] 29 | url { 30 | src: "https://github.com/strymonas/strymonas-ocaml/tree/main/lib" 31 | } -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | all: check-compiler lib 3 | 4 | .PHONY: check-compiler 5 | check-compiler: 6 | @test $$(opam switch show) = "4.14.1+BER" \ 7 | || (echo 1>&2 "OCaml must be 4.14.1+BER"; exit 1) 8 | 9 | .PHONY: lib 10 | lib: 11 | cd lib && $(MAKE) 12 | 13 | .PHONY: clean 14 | clean:: 15 | cd lib && $(MAKE) clean 16 | 17 | .PHONY: install uninstall reinstall 18 | install: 19 | cd lib && $(MAKE) install 20 | 21 | uninstall: 22 | cd lib && $(MAKE) uninstall 23 | 24 | reinstall: 25 | $(MAKE) uninstall 26 | $(MAKE) install 27 | 28 | .PHONY: install-pure uninstall-pure reinstall-pure 29 | install-pure: 30 | cd lib && $(MAKE) install-pure 31 | 32 | uninstall-pure: 33 | cd lib && $(MAKE) uninstall-pure 34 | 35 | reinstall-pure: 36 | $(MAKE) uninstall-pure 37 | $(MAKE) install-pure 38 | 39 | .PHONY: test 40 | test: 41 | cd test && $(MAKE) test 42 | 43 | .PHONY: bench 44 | bench: 45 | cd benchmarks && $(MAKE) 46 | 47 | clean:: 48 | cd benchmarks && $(MAKE) clean 49 | 50 | -------------------------------------------------------------------------------- /examples/streamit-fm/main.ml: -------------------------------------------------------------------------------- 1 | (* the main file for the fmradio bench: run it to generate code 2 | on the standard output 3 | *) 4 | 5 | module C = Backends.C 6 | module F32 = C.F32 7 | module Raw = Stream_raw_fn.Make(C) 8 | open Stream_cooked_fn.Make(C) 9 | open Stream_streamit_sdr_fn.Make(C)(Raw) 10 | 11 | open Parameters 12 | 13 | let ( let- ) c k = c k 14 | let numIters = C.int 1_000_000 15 | 16 | (* accumulate sum in out *) 17 | let () = 18 | C.pp_proc ~name:"fmradio" Format.std_formatter @@ 19 | C.nullary_proc @@ 20 | let open C in 21 | let- out = newref F32.(lit 0.) in 22 | begin 23 | get_floats 24 | |> fir_filter (Fir.lowPassFilter samplingRate cutoffFrequency numberOfTaps) 25 | ~decimation:4 26 | |> fmDemodulator samplingRate maxAmplitude bandwidth 27 | |> fir_filter 28 | (Fir.equalizer samplingRate bands eqCutoff eqGain numberOfTaps) 29 | |> take numIters 30 | |> iter F32.(fun e -> out := dref out +. e) 31 | end @. 32 | (ret (dref out)) 33 | 34 | 35 | -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # Benchmarking strymonas 2 | 3 | Benchmarks rely on MetaOCaml since we generate OCaml code. 4 | Benckmarks also rely on MetaOCaml for infrastructure: to run OCaml code. 5 | To benchmark against streaming, batteries, etc. libraries, we obviously 6 | need to install those libraries. 7 | 8 | `make bench` : run the strymonas benhcmarks. The result is in the file 9 | bench_result.txt and also bench_staged.dat 10 | 11 | `make bench-base` : run the baseline (that is, handwritten code) benchmarks. 12 | The results are added to bench_result.txt and also separated into 13 | bench_baseline.dat 14 | 15 | For external libraries 16 | `make bench-streaming` 17 | `make bench-batteries` 18 | 19 | `make benchmark_c` 20 | Generate code for C benchmarks 21 | C benchmarks are taken and tested in a separate directory: ../../C/bench/, which corresponds to https://github.com/strymonas/strymonas-c/tree/main/bench 22 | 23 | The handwritten code for the benchmarks (the baseline) is in 24 | `benchmark_baseline.ml` 25 | 26 | The strymonas benchmarks (and also benchmarks for other libraries) are in 27 | `benchmark_abstract.ml` 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Oleg Kiselyov, Aggelos Biboudis, Tomoaki Kobayashi, Nick Palladinos, Yannis Smaragdakis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /examples/streamit-fm/streamit/memory.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -euo pipefail 3 | # set -euox pipefail 4 | 5 | CC="gcc-13" 6 | 7 | CFLAG_COMMON="-W -Wall -Wno-tautological-compare -Wno-unused-but-set-variable -pipe -g" 8 | CFLAGS1="${CFLAG_COMMON} -O3 -march=native -mfpmath=both -fno-math-errno -lm" 9 | CFLAGS2="${CFLAG_COMMON} -O3 -march=native -mfpmath=both -ffast-math -lm" 10 | 11 | OBJS="benchmarks_gen.c memory_profile.c" 12 | OBJSBASE="benchmarks_base.c memory_profile.c" 13 | 14 | benchs1=(\ 15 | "fmradio") 16 | 17 | run() { 18 | local OPT=$1 19 | local CFLAGS=$2 20 | for bench in ${benchs1[@]}; do 21 | local SOUTPUT=${bench}_${OPT}_so.out 22 | ($CC $CFLAGS -o ${SOUTPUT} -DBENCHF1=$bench $OBJS) \ 23 | && valgrind --tool=massif --stacks=yes --massif-out-file=massif.${SOUTPUT} ./${SOUTPUT} 24 | if [ "$bench" = "fmradio" ]; then 25 | local BOUTPUT=${bench}_${OPT}_bo.out 26 | ($CC $CFLAGS -o ${BOUTPUT} -DBENCHF1=$bench -DBASELINE $OBJSBASE) \ 27 | && valgrind --tool=massif --stacks=yes --massif-out-file=massif.${BOUTPUT} ./${BOUTPUT} 28 | fi 29 | done 30 | } 31 | 32 | # =============================================== 33 | echo "Start" 34 | run "F1" "$CFLAGS1" 35 | run "F2" "$CFLAGS2" 36 | echo "Completed" 37 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline-pi/fmradio.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FMRADIO_HPP 2 | #define FMRADIO_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | // #include 12 | 13 | 14 | 15 | using namespace gr; 16 | 17 | 18 | 19 | class fmradio { 20 | 21 | private: 22 | blocks::vector_source::sptr blocks_vector_source_0; 23 | blocks::file_source::sptr blocks_file_source_0; 24 | filter::fir_filter_ccf::sptr low_pass_filter_0; 25 | analog::quadrature_demod_cf::sptr fm_demodulator_0; 26 | blocks::null_sink::sptr null_sink_0; 27 | // audio::sink::sptr audio_sink_0; 28 | 29 | // Variables: 30 | double gain = 1; 31 | int samp_rate = 3072000; 32 | double cutoff = 75000; 33 | double tr_width = 53 * samp_rate / (22.0 * 65); 34 | int decimation = 64; 35 | int samp_rate2 = samp_rate/decimation; 36 | double max_dev = 75000; 37 | 38 | public: 39 | top_block_sptr tb; 40 | fmradio(const std::vector&); 41 | fmradio(); 42 | ~fmradio(); 43 | }; 44 | 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline/fmradio.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FMRADIO_HPP 2 | #define FMRADIO_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | // #include 12 | 13 | 14 | 15 | using namespace gr; 16 | 17 | 18 | 19 | class fmradio { 20 | 21 | private: 22 | blocks::vector_source::sptr blocks_vector_source_0; 23 | blocks::file_source::sptr blocks_file_source_0; 24 | filter::fir_filter_ccf::sptr low_pass_filter_0; 25 | analog::quadrature_demod_cf::sptr fm_demodulator_0; 26 | blocks::null_sink::sptr null_sink_0; 27 | // audio::sink::sptr audio_sink_0; 28 | 29 | // Variables: 30 | double gain = 1; 31 | int samp_rate = 3072000; 32 | double cutoff = 75000; 33 | double tr_width = 53 * samp_rate / (22.0 * 65); 34 | int decimation = 64; 35 | int samp_rate2 = samp_rate/decimation; 36 | double max_dev = 75000; 37 | 38 | public: 39 | top_block_sptr tb; 40 | fmradio(const std::vector&); 41 | fmradio(); 42 | ~fmradio(); 43 | }; 44 | 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /examples/streamit-fm/streamit/bench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -euo pipefail 3 | # set -euox pipefail 4 | 5 | WARMUP="5" 6 | REPS="20" # must be >= 2 due to mean_error 7 | 8 | CC="gcc-13" 9 | DBGFLAG="-save-temps" 10 | 11 | CFLAG_COMMON="-W -Wall -Wno-tautological-compare -Wno-unused-but-set-variable -DWARMUP=$WARMUP -DREPS=$REPS -pipe" 12 | CFLAGS1="${CFLAG_COMMON} -O3 -march=native -mfpmath=both -fno-math-errno -lm -DF1" 13 | CFLAGS2="${CFLAG_COMMON} -O3 -march=native -mfpmath=both -ffast-math -lm" 14 | 15 | OBJS="utils.c benchmarks_gen.c main.c" 16 | OBJSBASE="utils.c benchmarks_base.c main.c" 17 | 18 | benchs1=(\ 19 | "fmradio") 20 | 21 | run() { 22 | local OPT=$1 23 | local OUTPUT=$2 24 | local CFLAGS=$3 25 | ($CC $CFLAGS -o t.out $OBJS) && ./t.out >| $OUTPUT 26 | for bench in ${benchs1[@]}; do 27 | ($CC $CFLAGS -o ${bench}.out -DBENCHF1=$bench $OBJS) && ./${bench}.out >> $OUTPUT 28 | if [ "$bench" = "fmradio" ]; then 29 | ($CC $CFLAGS -o ${bench}.out -DBENCHF1=$bench -DBASELINE $OBJSBASE) && ./${bench}.out >> $OUTPUT 30 | fi 31 | done 32 | echo $OUTPUT 33 | } 34 | 35 | # =============================================== 36 | echo "The number of repetition is changed by REPS" 37 | run "F1" "result_f1.csv" "$CFLAGS1" 38 | run "F2" "result_f2.csv" "$CFLAGS2" 39 | echo "Completed" 40 | -------------------------------------------------------------------------------- /examples/streamit-fm/parameters.ml: -------------------------------------------------------------------------------- 1 | (* Parameters of FM Radio and StreamIt benchmark, taken from StreamIt code *) 2 | 3 | (* In US, FMRadio is 87-108 MHz; a channel is 200KHz wide 4 | https://en.wikipedia.org/wiki/Frequency_modulation 5 | *) 6 | 7 | let samplingRate = 250_000_000. (* 250 MHz *) 8 | let cutoffFrequency = 108_000_000. (* 108 MHz *) 9 | (* XXX https://www.mathworks.com/help/signal/ref/fir1.html#bulla52-n: 10 | "The order must be even because odd-order symmetric FIR filters must have zero gain at the Nyquist frequency." 11 | i.e. taps must be odd as gnuradio in https://github.com/gnuradio/gnuradio/blob/b2c9623cbd548bd86250759007b80b61bd4a2a06/gr-filter/lib/firdes.cc#L710. *) 12 | let numberOfTaps = 64 13 | 14 | let maxAmplitude = 27_000. 15 | let bandwidth = 10_000. (* 10 KHz *) 16 | 17 | (* Equalization, after demodulation *) 18 | let bands = 11 19 | let low = 55. 20 | let high = 1760. 21 | 22 | let eqCutoff = 23 | List.init bands (fun i -> 24 | exp (float i *. (log high -. log low) /. float (bands - 1) 25 | +. log low) 26 | ) 27 | 28 | let eqGain = 29 | List.init bands (fun i -> 30 | if i=0 then 0. 31 | else 32 | let t = (float (i - 1) -. (float (bands - 2) /. 2.)) /. 5. in 33 | if t > 0. then 2. -. t else 2. +. t 34 | ) 35 | 36 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/Makefile: -------------------------------------------------------------------------------- 1 | OCAMLC=ocamlc 2 | OCAMLOPT=ocamlopt 3 | OCAMLDEP=ocamldep 4 | OCAMLTOP=ocaml 5 | 6 | .SUFFIXES: .ml .mli .cmo .cmi .cmx .tex .pdf 7 | 8 | LIBDIR=../../lib 9 | LIB=${LIBDIR}/stream_ocaml.cma 10 | INCLUDES=-I ${LIBDIR} -I ${LIBDIR}/backends/Trx -I ${LIBDIR}/backends/C 11 | 12 | INTF= 13 | OBJS=parameters.cmo fir.cmo stream_gnuradio_sdr_fn.cmo 14 | 15 | # ======================================== 16 | .PHONY: all bench clean 17 | all: bench 18 | 19 | UNAME_OS=$(shell uname -s) 20 | ifeq ($(UNAME_OS),Linux) 21 | PBCOPY=xsel --clipboard --input 22 | else ifeq ($(UNAME_OS),Darwin) 23 | PBCOPY=pbcopy 24 | endif 25 | bench: ${OBJS} 26 | echo "#ifndef CMPLXF" >| /tmp/generated.c 27 | echo "#define CMPLXF(x, y) __builtin_complex ((float) (x), (float) (y))" >> /tmp/generated.c 28 | echo "#endif" >> /tmp/generated.c 29 | $(OCAMLTOP) $(INCLUDES) $(LIB) $^ main.ml | tail -n +2 >> /tmp/generated.c 30 | 31 | 32 | .mli.cmi: 33 | $(OCAMLC) -c $(INCLUDES) $< 34 | .ml.cmo: 35 | $(OCAMLC) -c $(INCLUDES) $< 36 | .ml.cmx: 37 | $(OCAMLOPT) -c $(INCLUDES) $< 38 | 39 | # depend: $(wildcard *.mli *.ml) 40 | # $(OCAMLDEP) $(INCLUDES) $^ > depend 41 | 42 | clean:: 43 | rm -f *.cm[ixoa] *.cmxa *.[oa] *.out depend 44 | 45 | test_atan: 46 | $(CC) -W -Wall -O3 -ffast-math -lm atan_test.c && ./a.out 47 | 48 | -include depend 49 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/atan_test.c: -------------------------------------------------------------------------------- 1 | /* Test of fast atanf */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "fast_atanf.h" 9 | 10 | int main(void) { 11 | float abserr_x=0, abserr_y=0; 12 | float maxabs = 0.0f; 13 | float relerr_x=0, relerr_y=0; 14 | float maxrel = 0.0f; 15 | float errsum = 0.0f; 16 | int n = 0; 17 | 18 | for(float x=-1.0; x <= 1.0; x += 1./16.) 19 | for(float y=-1.0; y <= 1.0; y += 1./16.) 20 | { 21 | float const exact = atan2f(y,x); 22 | // float const apprx = fast_atan2f(y,x); 23 | float const apprx = faster_atan2f(y,x); 24 | float const abserr = fabs(exact - apprx); 25 | float const relerr = abserr / (exact + 1e-7); 26 | 27 | if(abserr > maxabs) { 28 | abserr_x = x; 29 | abserr_y = y; 30 | maxabs = abserr; 31 | } 32 | 33 | if(relerr > maxrel) { 34 | relerr_x = x; 35 | relerr_y = y; 36 | maxrel = relerr; 37 | } 38 | 39 | errsum += abserr; 40 | n += 1; 41 | } 42 | 43 | printf("\nMax abs error %g detected at x=%g, y=%g\n", 44 | maxabs,abserr_x,abserr_y); 45 | printf("\nMax rel error %g detected at x=%g, y=%g\n", 46 | maxrel,relerr_x,relerr_y); 47 | printf("\nAverage error %g over %d samples",errsum/(float)n, n); 48 | } 49 | -------------------------------------------------------------------------------- /examples/run-length-encoding/apples.c: -------------------------------------------------------------------------------- 1 | /* Generated C code and the driver */ 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | void rll(int64_t const n_1,int64_t * const a_2); 8 | 9 | int main(void) { 10 | int64_t arr[] = {41,41,41,41,42,42,42,43,43,41}; 11 | rll(sizeof(arr)/sizeof(arr[0]),arr); 12 | printf("Done\n"); 13 | } 14 | 15 | /* Generated code */ 16 | void rll(int64_t const n_1,int64_t * const a_2){ 17 | int64_t x_3 = 0; 18 | int64_t x_4 = 0; 19 | bool x_5 = false; 20 | int64_t x_6 = 0; 21 | while (x_6 <= n_1) 22 | { 23 | int64_t const t_7 = x_6; 24 | x_6++; 25 | if (t_7 < n_1) 26 | { 27 | int64_t const t_9 = a_2[t_7]; 28 | if (x_5) 29 | { 30 | int64_t const t_10 = x_4; 31 | x_4 = t_9; 32 | if (!(t_10 == t_9)) 33 | { 34 | int64_t const t_11 = x_3; 35 | x_3 = 0; 36 | printf("%ld\n",t_10); 37 | printf("%ld\n",t_11 + 1); 38 | } 39 | else 40 | x_3++; 41 | } 42 | else { 43 | x_4 = t_9; 44 | x_5 = true; 45 | } 46 | } 47 | else {if (x_5) 48 | { 49 | int64_t const t_8 = x_3; 50 | x_3 = 0; 51 | printf("%ld\n",x_4); 52 | printf("%ld\n",t_8 + 1); 53 | }} 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /examples/streamit-fm/README.md: -------------------------------------------------------------------------------- 1 | # FM Reception about StreamIt 2 | 3 | 4 | ## Bench 5 | `make bench` generates a C function coreesponding to the following strymonas pipeline to stdout: 6 | ```:ocaml 7 | get_floats 8 | |> fir_filter (Fir.lowPassFilter samplingRate cutoffFrequency numberOfTaps) 9 | ~decimation:4 10 | |> fmDemodulator samplingRate maxAmplitude bandwidth 11 | |> fir_filter 12 | (Fir.equalizer samplingRate bands eqCutoff eqGain numberOfTaps) 13 | |> take numIters 14 | |> iter F32.(fun e -> out := dref out +. e) 15 | ``` 16 | You can actually benchmark the newly generated code by replacing the code that had already been generated in [streamit/benchmarks_gen.c](streamit/benchmarks_gen.c). In this benchmark, you compare the generated code with http://groups.csail.mit.edu/cag/streamit/apps/benchmarks/fm/c/fmref.c, which is a hand-written reference C implementation of a StreamIt's FM Radio pipeline. 17 | 18 | **Requirements for the bench**: 19 | ``` 20 | $ cd streamit 21 | $ wget http://groups.csail.mit.edu/cag/streamit/apps/benchmarks/fm/c/fmref.c 22 | $ patch -u fmref.c fmref.patch 23 | $ mv fmref.c benchmarks_base.c 24 | $ make bench 25 | ``` 26 | 27 | 28 | ## Test 29 | `make test` (resp. `make test_c`) tests the application correctoness by StreamIt interpreter [sit_emulator.ml](sit_emulator.ml) while invoking [test.ml](test.ml) (resp. [test_c.ml](test_c.ml)). -------------------------------------------------------------------------------- /lib/0README.dr: -------------------------------------------------------------------------------- 1 | Strymonas library 2 | 3 | Basic (foundational) interface 4 | 5 | stream_raw.mli Interface 6 | stream_raw_fn.ml Implementation: functor of Cde 7 | 8 | Back-ends 9 | cde.mli Code generation interface, used in strymonas 10 | cde_ex.mli Extended interface, for user actions 11 | cde_top.mli Making and running top-level functions 12 | 13 | pk_cde.ml Partially-known cde 14 | pk_coll.ml Partially-known collections 15 | 16 | backends.ml The convenience for selecting a backend 17 | backends/Trx/trx_code_common.ml Instance for MetaOCaml code 18 | backends/Trx/trx_code.ml Instance for MetaOCaml code (bytecode) 19 | backends/Trx/trx_code_native.ml native code 20 | 21 | backends/C/c_cde.ml Instance for C 22 | The following is the offshoring back-end, shared with MetaOCaml 23 | backends/C/c_ast.mli AST for C 24 | backends/C/c_pp.ml Pretty-print the C AST into C code 25 | backends/C/offshoringIR.mli Offshoring IR 26 | backends/C/offshoringIR.ml 27 | backends/C/offshoringIR_pp.ml 28 | 29 | 30 | High-level interface 31 | stream_cooked.mli Interface 32 | stream_cooked_fn.ml Implementation: functor of Cde 33 | 34 | window_fn.ml Windowing 35 | 36 | Makefile How to make it 37 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline/utils.c: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | #include 3 | 4 | double mean(double arr[], int n) { 5 | double result = 0.; 6 | int i; 7 | for(i=0; i t_distribution 39 | double t_distribution_table[30] = { 40 | 12.706, 41 | 4.303, 42 | 3.182, 43 | 2.776, 44 | 2.571, 45 | 2.447, 46 | 2.365, 47 | 2.306, 48 | 2.262, 49 | 2.228, 50 | 2.201, 51 | 2.179, 52 | 2.160, 53 | 2.145, 54 | 2.131, 55 | 2.120, 56 | 2.110, 57 | 2.101, 58 | 2.093, 59 | 2.086, 60 | 2.080, 61 | 2.074, 62 | 2.069, 63 | 2.064, 64 | 2.060, 65 | 2.056, 66 | 2.052, 67 | 2.048, 68 | 2.045, 69 | 2.0 70 | }; 71 | 72 | // confidence == 95% 73 | double mean_error(double arr[], int n) { 74 | const double t = t_distribution_table[(n - 1)-1]; 75 | return t * (sqrt(unbiased_var(arr,n))) / (sqrt(n)); 76 | } 77 | -------------------------------------------------------------------------------- /examples/streamit-fm/streamit/utils.c: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | #include 3 | 4 | double mean(double arr[], int n) { 5 | double result = 0.; 6 | int i; 7 | for(i=0; i t_distribution 39 | double t_distribution_table[30] = { 40 | 12.706, 41 | 4.303, 42 | 3.182, 43 | 2.776, 44 | 2.571, 45 | 2.447, 46 | 2.365, 47 | 2.306, 48 | 2.262, 49 | 2.228, 50 | 2.201, 51 | 2.179, 52 | 2.160, 53 | 2.145, 54 | 2.131, 55 | 2.120, 56 | 2.110, 57 | 2.101, 58 | 2.093, 59 | 2.086, 60 | 2.080, 61 | 2.074, 62 | 2.069, 63 | 2.064, 64 | 2.060, 65 | 2.056, 66 | 2.052, 67 | 2.048, 68 | 2.045, 69 | 2.0 70 | }; 71 | 72 | // confidence == 95% 73 | double mean_error(double arr[], int n) { 74 | const double t = t_distribution_table[(n - 1)-1]; 75 | return t * (sqrt(unbiased_var(arr,n))) / (sqrt(n)); 76 | } 77 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline-pi/utils.c: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | #include 3 | 4 | double mean(double arr[], int n) { 5 | double result = 0.; 6 | int i; 7 | for(i=0; i t_distribution 39 | double t_distribution_table[30] = { 40 | 12.706, 41 | 4.303, 42 | 3.182, 43 | 2.776, 44 | 2.571, 45 | 2.447, 46 | 2.365, 47 | 2.306, 48 | 2.262, 49 | 2.228, 50 | 2.201, 51 | 2.179, 52 | 2.160, 53 | 2.145, 54 | 2.131, 55 | 2.120, 56 | 2.110, 57 | 2.101, 58 | 2.093, 59 | 2.086, 60 | 2.080, 61 | 2.074, 62 | 2.069, 63 | 2.064, 64 | 2.060, 65 | 2.056, 66 | 2.052, 67 | 2.048, 68 | 2.045, 69 | 2.0 70 | }; 71 | 72 | // confidence == 95% 73 | double mean_error(double arr[], int n) { 74 | const double t = t_distribution_table[(n - 1)-1]; 75 | return t * (sqrt(unbiased_var(arr,n))) / (sqrt(n)); 76 | } 77 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/utils.c: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | #include 3 | 4 | double mean(double arr[], int n) { 5 | double result = 0.; 6 | int i; 7 | for(i=0; i t_distribution 39 | double t_distribution_table[30] = { 40 | 12.706, 41 | 4.303, 42 | 3.182, 43 | 2.776, 44 | 2.571, 45 | 2.447, 46 | 2.365, 47 | 2.306, 48 | 2.262, 49 | 2.228, 50 | 2.201, 51 | 2.179, 52 | 2.160, 53 | 2.145, 54 | 2.131, 55 | 2.120, 56 | 2.110, 57 | 2.101, 58 | 2.093, 59 | 2.086, 60 | 2.080, 61 | 2.074, 62 | 2.069, 63 | 2.064, 64 | 2.060, 65 | 2.056, 66 | 2.052, 67 | 2.048, 68 | 2.045, 69 | 2.0 70 | }; 71 | 72 | // confidence == 95% 73 | double mean_error(double arr[], int n) { 74 | const double t = t_distribution_table[(n - 1)-1]; 75 | return t * (sqrt(unbiased_var(arr,n))) / (sqrt(n)); 76 | } 77 | 78 | 79 | void null_func(float const x) { 80 | return; 81 | } -------------------------------------------------------------------------------- /examples/streamit-fm/Makefile: -------------------------------------------------------------------------------- 1 | OCAMLC=ocamlc 2 | OCAMLOPT=ocamlopt 3 | OCAMLDEP=ocamldep 4 | METAOCAMLC=metaocamlc 5 | METAOCAMLOPT=metaocamlopt 6 | METAOCAMLTOP=metaocaml 7 | 8 | .SUFFIXES: .ml .mli .cmo .cmi .cmx .tex .pdf 9 | 10 | LIBDIR=../../lib 11 | LIB=${LIBDIR}/stream.cma 12 | LIBOPT=${LIBDIR}/stream.cmxa 13 | LIBPURE=${LIBDIR}/stream_ocaml.cma 14 | LIBPUREOPT=${LIBDIR}/stream_ocaml.cmxa 15 | INCLUDES=-I ${LIBDIR} -I ${LIBDIR}/backends/Trx -I ${LIBDIR}/backends/C 16 | 17 | INTF= 18 | OBJS=util.cmo parameters.cmo fir.cmo stream_streamit_sdr_fn.cmo 19 | # ======================================== 20 | .PHONY: all bench test test_c clean 21 | all: bench 22 | 23 | UNAME_OS=$(shell uname -s) 24 | ifeq ($(UNAME_OS),Linux) 25 | PBCOPY=xsel --clipboard --input 26 | else ifeq ($(UNAME_OS),Darwin) 27 | PBCOPY=pbcopy 28 | endif 29 | # Only main requires MetaOCaml 30 | bench: ${OBJS} 31 | $(METAOCAMLTOP) $(INCLUDES) $(LIB) $^ main.ml | tail -n +3 #| $(PBCOPY) 32 | 33 | test: ${OBJS} sit_emulator.cmo sit_experiments.cmo 34 | $(METAOCAMLC) -o test.out $(INCLUDES) $(LIB) $^ test.ml && ./test.out 35 | # Pure OCaml 36 | test_c: $(OBJS:.cmo=.cmx) sit_emulator.cmx sit_experiments.cmx 37 | $(OCAMLOPT) -o test_c.out $(INCLUDES) $(LIBPUREOPT) $^ test_c.ml && ./test_c.out 38 | 39 | # None of the OBJS requuire MetaOCaml 40 | 41 | .mli.cmi: 42 | $(OCAMLC) -c $(INCLUDES) $< 43 | .ml.cmo: 44 | $(OCAMLC) -c $(INCLUDES) $< 45 | .ml.cmx: 46 | $(OCAMLOPT) -c $(INCLUDES) $< 47 | 48 | depend: $(wildcard *.mli *.ml) 49 | $(OCAMLDEP) $(INCLUDES) $^ > depend 50 | 51 | clean:: 52 | rm -f *.cm[ixoa] *.cmxa *.[oa] *.out depend 53 | 54 | -include depend 55 | -------------------------------------------------------------------------------- /examples/streamit-fm/streamit/main.c: -------------------------------------------------------------------------------- 1 | #if defined(BASELINE) 2 | #if defined(F1) 3 | #define ADDSUFFRAW(name) #name"BaselineF1" 4 | #else 5 | #define ADDSUFFRAW(name) #name"BaselineF2" 6 | #endif 7 | #define ADDSUFF(name) ADDSUFFRAW(name) 8 | #else 9 | #if defined(F1) 10 | #define ADDSUFFRAW(name) #name"StrymonasF1" 11 | #else 12 | #define ADDSUFFRAW(name) #name"StrymonasF2" 13 | #endif 14 | #define ADDSUFF(name) ADDSUFFRAW(name) 15 | #endif 16 | #include "benchmarks.h" 17 | #include "utils.h" 18 | #include 19 | #include 20 | #include 21 | 22 | #ifndef WARMUP 23 | #define WARMUP 5 24 | #endif 25 | #ifndef REPS 26 | #define REPS 20 27 | #endif 28 | 29 | #define fmradio_def 1 30 | 31 | #define ADDDEF(name) name##_def 32 | #define STRCMP(benchf, bench) (ADDDEF(benchf) == ADDDEF(bench)) 33 | 34 | #define RUN1() \ 35 | { int64_t i;\ 36 | for (i=0; i F32.lit 0.5 *. (ch1 +. ch2)) 42 | 43 | (** pop = 1, push = 1, peek = 2 *) 44 | let fmDemodulator : float -> float -> float -> F32.t cstream -> F32.t cstream = 45 | fun sampRate max bandwidth st -> 46 | let gain = F32.lit (max *. (sampRate /. (bandwidth *. Float.pi))) in 47 | let (module Win) = Window.make_window F32.tbase 2 1 in 48 | st 49 | |> Win.make_stream 50 | |> map_raw (Win.reduce F32.( *. )) (* mixing of the 2 signals *) 51 | |> map_raw C.(fun e k -> letl F32.(gain *. atan.invoke e) k) 52 | end 53 | -------------------------------------------------------------------------------- /examples/TryFirst/simple_c.ml: -------------------------------------------------------------------------------- 1 | (* The first, simplest and commonest examples of using Strymonas, 2 | with detailed explanations 3 | 4 | This example is for generating C. It does not require MetaOCaml 5 | *) 6 | 7 | (* Evaluate the following if working at the top level of OCaml 8 | 9 | #directory "../../lib";; 10 | #directory "../../lib/backends/C";; 11 | #load "stream_ocaml.cma";; 12 | *) 13 | 14 | (* C backend *) 15 | 16 | (* The following module is quoted from the module Backends. So we could have 17 | just said 18 | module C = Backends.C 19 | 20 | However, Backends module also contains MetaOCaml backends and hence needs 21 | MetaOCaml to compile it. We want the present file to be compilable by 22 | OCaml only. 23 | *) 24 | 25 | module C = Pk_cde.Make(C_cde) 26 | 27 | (* Again, Open the strymonas library: higher-level interface, which see 28 | ../../lib/stream_cooked.mli 29 | *) 30 | open Stream_cooked_fn.Make(C) 31 | 32 | (* The first example (also the first example in the paper) as before. 33 | But now C stands for the C backend 34 | See the paper (OCaml 22 paper) for explanation 35 | *) 36 | let ex1 = iota C.(int 1) |> map C.(fun e -> e * e) 37 | (* val ex1 : int cstream = *) 38 | 39 | (* The following sum is for illustration only: strymonas already provides this 40 | combinator, under the name sum_int, defined as below. 41 | *) 42 | let sum = fold C.(+) C.(int 0) 43 | (* val sum : int cstream -> int cde = *) 44 | 45 | let ex2 = ex1 |> filter C.(fun e -> e mod (int 17) > int 7) 46 | |> take C.(int 10) |> sum 47 | 48 | (* The generated code *) 49 | let _ = C.print_code ~name:"fn" ex2 50 | 51 | (* 52 | int fn(){ 53 | int x_1 = 0; 54 | int x_2 = 10; 55 | int x_3 = 1; 56 | while (x_2 > 0) 57 | { 58 | int const t_4 = x_3; 59 | x_3++; 60 | int const t_5 = t_4 * t_4; 61 | if ((t_5 % 17) > 7) 62 | { 63 | x_2--; 64 | x_1 = x_1 + t_5; 65 | } 66 | } 67 | return x_1; 68 | } 69 | *) 70 | 71 | (* We can compile it, and run capturing its output *) 72 | 73 | let[@warning "-8"] 853 = C.run ex2 74 | 75 | let () = print_endline "All done" 76 | ;; 77 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline/main.cpp: -------------------------------------------------------------------------------- 1 | /* disable the logging settings in ~/.gnuradio/config.conf */ 2 | #include "fmradio.hpp" 3 | #include "utils.h" 4 | 5 | #include 6 | #include 7 | #include 8 | gr_complex *load_file(const char* file_name, size_t * arrlen) { 9 | size_t desired_size = *arrlen; 10 | 11 | FILE *fp = fopen(file_name, "rb"); 12 | 13 | if ( desired_size == 0 ) { 14 | desired_size = std::filesystem::file_size(file_name) / sizeof(gr_complex); 15 | } 16 | 17 | assert ( desired_size > 0 ); 18 | gr_complex *arr = 19 | (gr_complex *)malloc(sizeof(gr_complex) * desired_size); 20 | size_t const real_size = fread(arr, sizeof(gr_complex), desired_size, fp); 21 | if ( *arrlen > 0 ) 22 | { 23 | if (desired_size != real_size) 24 | perror("read error"), exit(1); 25 | } 26 | else *arrlen = real_size; 27 | 28 | fclose(fp); 29 | return arr; 30 | } 31 | 32 | #define WARMUP 5 33 | #define REPS 20 34 | double results[REPS]; 35 | int main () { 36 | size_t arr_size = 0; 37 | gr_complex *arr = load_file("./sps3072000_c32_30s.pcm",&arr_size); 38 | std::vector source(arr, arr + arr_size); 39 | printf("\nTesting on the array of size %lu\n",arr_size); 40 | 41 | fmradio *top_block; 42 | int i; 43 | for (i=0; itb->start(); 47 | top_block->tb->wait(); 48 | delete top_block; 49 | } 50 | for (i=0; itb->start(); 55 | top_block->tb->wait(); 56 | double end = time_in_mili(); 57 | results[i] = (end - start); 58 | delete top_block; 59 | } 60 | 61 | printf("%-40s %10s %10s %5s %7s\n", "Benchmark", "Mean", "Mean-Error", "Sdev", "Unit"); 62 | printf("%-40s %10.1lf %10.1lf %5.1lf ms\n",\ 63 | "gr-fmradio_gnuradio",\ 64 | mean(results, REPS),\ 65 | mean_error(results, REPS),\ 66 | standard_deviation(results, REPS)); 67 | 68 | return 0; 69 | } 70 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline-pi/main.cpp: -------------------------------------------------------------------------------- 1 | /* disable the logging settings in ~/.gnuradio/config.conf */ 2 | #include "fmradio.hpp" 3 | #include "utils.h" 4 | 5 | #include 6 | #include 7 | #include 8 | gr_complex *load_file(const char* file_name, size_t * arrlen) { 9 | size_t desired_size = *arrlen; 10 | 11 | FILE *fp = fopen(file_name, "rb"); 12 | 13 | if ( desired_size == 0 ) { 14 | desired_size = boost::filesystem::file_size(file_name) / sizeof(gr_complex); 15 | } 16 | 17 | assert ( desired_size > 0 ); 18 | gr_complex *arr = 19 | (gr_complex *)malloc(sizeof(gr_complex) * desired_size); 20 | size_t const real_size = fread(arr, sizeof(gr_complex), desired_size, fp); 21 | if ( *arrlen > 0 ) 22 | { 23 | if (desired_size != real_size) 24 | perror("read error"), exit(1); 25 | } 26 | else *arrlen = real_size; 27 | 28 | fclose(fp); 29 | return arr; 30 | } 31 | 32 | #define WARMUP 5 33 | #define REPS 20 34 | double results[REPS]; 35 | int main () { 36 | // FOR BENCH: 37 | size_t arr_size = 0; 38 | gr_complex *arr = load_file("./sps3072000_c32_3s.pcm",&arr_size); 39 | std::vector source(arr, arr + arr_size); 40 | printf("\nTesting on the array of size %lu\n",arr_size); 41 | 42 | fmradio *top_block; 43 | int i; 44 | for (i=0; itb->start(); 48 | top_block->tb->wait(); 49 | delete top_block; 50 | } 51 | for (i=0; itb->start(); 56 | top_block->tb->wait(); 57 | double end = time_in_mili(); 58 | results[i] = (end - start); 59 | delete top_block; 60 | } 61 | 62 | printf("%-40s %10s %10s %5s %7s\n", "Benchmark", "Mean", "Mean-Error", "Sdev", "Unit"); 63 | printf("%-40s %10.1lf %10.1lf %5.1lf ms\n",\ 64 | "gr-fmradio_gnuradio",\ 65 | mean(results, REPS),\ 66 | mean_error(results, REPS),\ 67 | standard_deviation(results, REPS)); 68 | 69 | return 0; 70 | } 71 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/main.ml: -------------------------------------------------------------------------------- 1 | (* for the reference *) 2 | module C = Pk_cde.Make(C_cde) 3 | module F32 = C.F32 4 | module Raw = Stream_raw_fn.Make(C) 5 | open Stream_cooked_fn.Make(C) 6 | open Stream_gnuradio_sdr_fn.Make(C)(Raw) 7 | 8 | open Parameters 9 | 10 | let ( let- ) c k = c k 11 | 12 | (* Main module for benchmark: array input (could be mmmaped array), 13 | dummy output 14 | *) 15 | let () = 16 | C.pp_proc ~name:"gr_fmradio" Format.std_formatter @@ 17 | C.arg_base C.tint @@ fun n -> 18 | C.arg_array n C32.tbase @@ fun arr -> 19 | C.nullary_proc @@ begin 20 | Raw.pull_array C.(array_len arr) C.(array_get arr) 21 | |> fir_filter_ccf (Fir.lowPassFilter gain samplingRate cutoff trWidth) 22 | ~decimation:64 23 | |> demod_quad_fm ((samplingRate /. 64.) /. (2. *. Float.pi *. maxDev)) 24 | |> iter null_func.invoke 25 | end 26 | 27 | let () = 28 | C.pp_proc ~name:"gr_fmradio_fread" Format.std_formatter @@ 29 | C.nullary_proc @@ begin 30 | let open C in 31 | init_read.invoke () @. 32 | begin 33 | file_read_c32 1024 34 | |> fir_filter_ccf (Fir.lowPassFilter gain samplingRate cutoff trWidth) 35 | ~decimation:64 36 | |> demod_quad_fm ((samplingRate /. 64.) /. (2. *. Float.pi *. maxDev)) 37 | |> iter null_func.invoke 38 | end 39 | end 40 | 41 | 42 | (* Main module for playback *) 43 | let () = 44 | C.pp_proc ~name:"gr_fmradio" Format.std_formatter @@ 45 | C.arg_base C.tint @@ fun n -> 46 | C.arg_array n C32.tbase @@ fun arr -> 47 | C.nullary_proc @@ begin 48 | Raw.pull_array C.(array_len arr) C.(array_get arr) 49 | |> fir_filter_ccf (Fir.lowPassFilter gain samplingRate cutoff trWidth) 50 | ~decimation:64 51 | |> demod_quad_fm ((samplingRate /. 64.) /. (2. *. Float.pi *. maxDev)) 52 | |> iter write_f32_le.invoke 53 | end 54 | 55 | let () = 56 | C.pp_proc ~name:"gr_fmradio_fread" Format.std_formatter @@ 57 | C.nullary_proc @@ begin 58 | file_read_ci8_in_c32 1024 59 | |> fir_filter_ccf (Fir.lowPassFilter gain samplingRate cutoff trWidth) 60 | ~decimation:64 61 | |> demod_quad_fm ((samplingRate /. 64.) /. (2. *. Float.pi *. maxDev)) 62 | |> iter write_f32_le.invoke 63 | end 64 | -------------------------------------------------------------------------------- /lib/backends.ml: -------------------------------------------------------------------------------- 1 | (* Convinient functions for selecting a backend *) 2 | 3 | (* MetaOCaml backend 4 | 5 | This backend implements the `least-common denominator' of backends: 6 | user actions implemented with this backend would work as they are 7 | if we switch to any other backend, like C. 8 | 9 | Beside implementing the backend interface, the module offers procedures 10 | to actually extract the generated Ocaml code, so it could be saved 11 | or run. 12 | *) 13 | 14 | module MetaOCaml = struct 15 | include Pk_cde.Make(Trx_code) 16 | 17 | (* return the generated code *) 18 | let to_code : 'a stm -> 'a code = dyn_stm 19 | (* Historically used in benchmarks and early tests *) 20 | let one_arg_fun : ('a arr -> 'b stm) -> ('a array -> 'b) code 21 | = fun f -> 22 | . 23 | .~(dyn_stm @@ f (inj_global ..,..))>. 24 | let two_arg_fun : ('a arr * 'b arr -> 'c stm) -> 25 | ('a array * 'b array -> 'c) code 26 | = fun f -> 27 | . 28 | .~(dyn_stm @@ f ((inj_global ..,..), 29 | (inj_global ..,..)))>. 30 | end 31 | 32 | (* The following is an extended version of MetaOCaml backend for OCaml-specific 33 | user actions. It permits arbitrary MetaOCaml quotations in user actions. 34 | This backend can no longer be swapped for C backend. 35 | *) 36 | module MetaOCamlExt = struct 37 | include MetaOCaml 38 | 39 | let of_code : 'a code -> 'a exp = inj 40 | 41 | (* Convenience: Lists and Pairs *) 42 | let nil : unit -> 'a list exp = fun () -> inj_global Trx_code.nil 43 | let cons : 'a exp -> 'a list exp -> 'a list exp = fun x y -> 44 | inj2 Trx_code.cons x y 45 | 46 | let pair : 'a exp -> 'b exp -> ('a * 'b) exp = fun x y -> 47 | inj2 Trx_code.pair x y 48 | let cde_app1 : ('a -> 'b) code -> 'a exp -> 'b exp = fun cde -> 49 | inj1 ((Trx_code.make_ff1 cde).invoke) 50 | let fst : ('a * 'b) exp -> 'a exp = fun x -> cde_app1 .. x 51 | let snd : ('a * 'b) exp -> 'b exp = fun x -> cde_app1 .. x 52 | end 53 | 54 | 55 | (* C backend *) 56 | module C = Pk_cde.Make(C_cde) 57 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/README.md: -------------------------------------------------------------------------------- 1 | # FM Reception about GNU Radio 2 | 3 | - [baseline](baseline) is a GNU Radio FM reception bench for x86-64 4 | - [baseline-pi](baseline-pi) is a GNU Radio FM reception bench for Raspberry Pi Zero 5 | 6 | These benchmarks are compared with strymons-generated C codes about the same FM reception bench as follows: 7 | 1. First of all, prepare signal file sources for the bench while your PC is connected to HackRF One: 8 | ``` 9 | e.g.: 10 | $ hackrf_transfer -r sps3072000_s8_30s.pcm -f 82500000 -s 3072000 -g 30 -l 40 -a 0 -n 92160000 11 | $ ffmpeg -f s8 -i sps3072000_s8_30s.pcm -f f32le sps3072000_c32_30s.pcm 12 | $ head --byte=122880000 sps3072000_c32_30s.pcm > sps3072000_c32_5s.pcm 13 | $ head --byte=73728000 sps3072000_c32_30s.pcm > sps3072000_c32_3s.pcm 14 | $ head --byte=49152000 sps3072000_c32_30s.pcm > sps3072000_c32_2s.pcm 15 | $ cp sps3072000_* baseline 16 | $ cp sps3072000_* baseline-pi 17 | ``` 18 | 2. Select the strymons pipelines in [main.ml](main.ml) for use, then comment out all the other pipelines. 19 | 3. `make bench` generates the selected FM reception C code to `/tmp/generated.c`, which is included in [bench_main.c](bench_main.c) (and [play_main.c](play_main.c), which is used for playback test). 20 | 4. Adjust the commented-out sources in [bench_main.c](bench_main.c) depending on your purpose. 21 | 5. Compile the code by `gcc -Ofast -march=native -W -Wall utils.c bench_main.c`, for example, and invoke `./a.out` to get a strymonas result. 22 | 6. In baseline(-pi), adjust the commented-out sources in the related files, then invoke `make` and `./build/gr-fmradio_gnuradio` to get a GNU Radio result. 23 | 24 | As for the playback test, see the code and comments in [play_main.c](play_main.c) for more details (especially, about pipeing from hackrf_transfer to ffplay or aplay, for example). 25 | 26 | 27 | ## Playback Test on Headless Raspberry Pi 28 | Use PulseAudio. 29 | 30 | - Remote Raspberry Pi: 31 | 1. `default-server = ` in `~/.config/pulse/client.conf` 32 | 2. `pulseaudio --start` 33 | 3. test `aplay /usr/share/sounds/alsa/Rear_Center.wav` 34 | - Local machine (the macOS case): 35 | 1. `brew services start pulseaudio` 36 | 37 | 38 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/fir.ml: -------------------------------------------------------------------------------- 1 | (* Finite Impulse Response filters (FIR) 2 | For benchmarking and comparsion, we use exactly the filters 3 | used by GNU Radio 4 | *) 5 | 6 | type weights = float array (* represented by coefficients *) 7 | 8 | 9 | (* ===== basic operators ===== *) 10 | let scale : float -> weights -> weights = 11 | fun c -> Array.map (fun e -> c *. e) 12 | let scale_down : float -> weights -> weights = 13 | fun c -> Array.map (fun e -> e /. c) 14 | let norm : weights -> weights = fun w -> 15 | let sum = Array.fold_left (+.) 0. w in 16 | Array.map (fun e -> e /. sum) w 17 | 18 | (* ===== fusion operators for weights ===== *) 19 | (* horizontal (parallel) fusion *) 20 | let add : weights -> weights -> weights = Array.map2 ( +. ) 21 | let sub : weights -> weights -> weights = Array.map2 ( -. ) 22 | 23 | (* ===== weights ===== *) 24 | (* https://github.com/gnuradio/gnuradio/blob/main/gr-filter/lib/fir_filter.cc#L102 25 | ** https://github.com/gnuradio/gnuradio/blob/main/gr-filter/lib/firdes.cc#L77 26 | ** Using Float.pi instead of the literal number, as in GNU Radio, doesn't seem 27 | ** to have any effect *) 28 | let lowPassFilter : ?is_normalized:bool -> float -> float -> float -> float -> weights = 29 | fun ?(is_normalized=true) gain freq cutoff transition_width -> 30 | let taps = 31 | let t = int_of_float @@ 53. *. freq /. (22. *. transition_width) in 32 | if t mod 2 = 0 then t+1 else t 33 | in 34 | let m = float taps -. 1. in (* dim *) 35 | let tau = 2. *. Float.pi in 36 | let w = tau *. cutoff /. freq in (* normalized angular frequency *) 37 | let coeff = Array.init taps @@ fun i -> 38 | let t = float i -. m /. 2. in 39 | (if (t = 0.) then w /. Float.pi else sin (w *. t) /. (Float.pi *. t)) 40 | *. (0.54 -. 0.46 *. cos (tau *. float i /. m)) (* hamming window *) 41 | in 42 | if is_normalized then 43 | (* https://github.com/gnuradio/gnuradio/blob/main/gr-filter/lib/firdes.cc#L62 *) 44 | (* XXX reverse for not analitical one? https://github.com/gnuradio/gnuradio/blob/main/gr-filter/lib/fir_filter.cc#L30 *) 45 | coeff 46 | |> scale gain 47 | |> norm 48 | else 49 | coeff 50 | |> scale gain 51 | 52 | (* ===== for debugging ===== *) 53 | let print : weights -> unit = 54 | Array.iter (fun e -> print_float e; print_newline ()) 55 | -------------------------------------------------------------------------------- /benchmarks/util.ml: -------------------------------------------------------------------------------- 1 | (* 2 | open Oml.Statistics.Distributions 3 | open Oml.Statistics.Descriptive 4 | *) 5 | 6 | 7 | let mean : float array -> float = fun arr -> 8 | (Array.fold_left (+.) 0.0 arr) /. (float @@ Array.length arr) 9 | 10 | 11 | let var : float array -> float = fun arr -> 12 | let m = mean arr in 13 | (Array.fold_left (fun acc x -> 14 | let x' = x -. m in acc +. x'*.x') 0.0 arr) /. (float @@ Array.length arr) 15 | 16 | 17 | let unbiased_var : float array -> float = fun arr -> 18 | ((float @@ Array.length arr) *. var arr) /. (sqrt @@ float @@ Array.length arr - 1) 19 | 20 | 21 | (* approximation under N ~ infinity *) 22 | let standard_deviation : float array -> float = fun arr -> sqrt(var arr) 23 | 24 | 25 | let t_distribution_table deg_free = 26 | try 27 | List.assoc deg_free [ 28 | ( 1, 12.706); 29 | ( 2, 4.303); 30 | ( 3, 3.182); 31 | ( 4, 2.776); 32 | ( 5, 2.571); 33 | ( 6, 2.447); 34 | ( 7, 2.365); 35 | ( 8, 2.306); 36 | ( 9, 2.262); 37 | (10, 2.228); 38 | (11, 2.201); 39 | (12, 2.179); 40 | (13, 2.160); 41 | (14, 2.145); 42 | (15, 2.131); 43 | (16, 2.120); 44 | (17, 2.110); 45 | (18, 2.101); 46 | (19, 2.093); 47 | (20, 2.086); 48 | (21, 2.080); 49 | (22, 2.074); 50 | (23, 2.069); 51 | (24, 2.064); 52 | (25, 2.060); 53 | (26, 2.056); 54 | (27, 2.052); 55 | (28, 2.048); 56 | (29, 2.045); 57 | (30, 2.042) 58 | ] 59 | with Not_found -> 60 | failwith "Not supported sample size" 61 | 62 | 63 | let mean_error confidence arr = 64 | let sample_size = Array.length arr in 65 | (* `a_2` means `a/2` where `a` is a significance level. *) 66 | (* `t` is the upper `a_2` point in t-distribution *) 67 | (* let a_2 = (1.0 -. confidence) /. 2.0 in 68 | let t = (student_quantile ~degrees_of_freedom:(float sample_size -. 1.0) 69 | (1.0 -. a_2)) in *) 70 | (* a value for 99.5% CI where a sample size is 30 (i.e. 29 degrees of freedom) *) 71 | (* let t = 3.038 in *) 72 | (* a value for 95% CI where a sample size is 30 (i.e. 29 degrees of freedom) *) 73 | (* let t = 2.045 in *) 74 | let t = t_distribution_table (sample_size - 1) in 75 | t *. (sqrt @@ unbiased_var arr) /. (sqrt @@ float sample_size) 76 | 77 | 78 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline-pi/fmradio.cpp: -------------------------------------------------------------------------------- 1 | #include "fmradio.hpp" 2 | 3 | using namespace gr; 4 | 5 | fmradio::fmradio (const std::vector& source) { 6 | this->tb = make_top_block("FM Radio"); 7 | 8 | // Blocks: 9 | this->blocks_vector_source_0 = blocks::vector_source::make(source); 10 | this->low_pass_filter_0 = filter::fir_filter_ccf::make( 11 | decimation, 12 | filter::firdes::low_pass( 13 | gain, 14 | samp_rate, 15 | cutoff, 16 | tr_width, 17 | filter::firdes::win_type::WIN_HAMMING, 18 | 6.76 19 | ) 20 | ); 21 | this->fm_demodulator_0 = analog::quadrature_demod_cf::make( 22 | (samp_rate2/(2*M_PI*max_dev)) 23 | ); 24 | this->null_sink_0 = blocks::null_sink::make(4); 25 | // this->audio_sink_0 = audio::sink::make(48000); 26 | 27 | // Connections: 28 | this->tb->hier_block2::connect(this->blocks_vector_source_0, 0, this->low_pass_filter_0, 0); 29 | this->tb->hier_block2::connect(this->low_pass_filter_0, 0, this->fm_demodulator_0, 0); 30 | this->tb->hier_block2::connect(this->fm_demodulator_0, 0, this->null_sink_0, 0); 31 | // this->tb->hier_block2::connect(this->fm_demodulator_0, 0, this->audio_sink_0, 0); 32 | } 33 | 34 | fmradio::fmradio () { 35 | this->tb = make_top_block("FM Radio"); 36 | 37 | // Blocks: 38 | this->blocks_file_source_0 = blocks::file_source::make(sizeof(gr_complex), "./sps3072000_c32_3s.pcm", false, 0, 0); 39 | this->low_pass_filter_0 = filter::fir_filter_ccf::make( 40 | decimation, 41 | filter::firdes::low_pass( 42 | gain, 43 | samp_rate, 44 | cutoff, 45 | tr_width, 46 | filter::firdes::win_type::WIN_HAMMING, 47 | 6.76 48 | ) 49 | ); 50 | this->fm_demodulator_0 = analog::quadrature_demod_cf::make( 51 | (samp_rate2/(2*M_PI*max_dev)) 52 | ); 53 | this->null_sink_0 = blocks::null_sink::make(4); 54 | // this->audio_sink_0 = audio::sink::make(48000); 55 | 56 | // Connections: 57 | this->tb->hier_block2::connect(this->blocks_file_source_0, 0, this->low_pass_filter_0, 0); 58 | this->tb->hier_block2::connect(this->low_pass_filter_0, 0, this->fm_demodulator_0, 0); 59 | this->tb->hier_block2::connect(this->fm_demodulator_0, 0, this->null_sink_0, 0); 60 | // this->tb->hier_block2::connect(this->fm_demodulator_0, 0, this->audio_sink_0, 0); 61 | } 62 | 63 | fmradio::~fmradio () { 64 | } 65 | 66 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/baseline/fmradio.cpp: -------------------------------------------------------------------------------- 1 | #include "fmradio.hpp" 2 | 3 | using namespace gr; 4 | 5 | fmradio::fmradio (const std::vector& source) { 6 | this->tb = make_top_block("FM Radio", false); 7 | 8 | // Blocks: 9 | this->blocks_vector_source_0 = blocks::vector_source::make(source); 10 | this->low_pass_filter_0 = filter::fir_filter_ccf::make( 11 | decimation, 12 | filter::firdes::low_pass( 13 | gain, 14 | samp_rate, 15 | cutoff, 16 | tr_width, 17 | fft::window::win_type::WIN_HAMMING, 18 | 6.76 19 | ) 20 | ); 21 | this->fm_demodulator_0 = analog::quadrature_demod_cf::make( 22 | (samp_rate2/(2*M_PI*max_dev)) 23 | ); 24 | this->null_sink_0 = blocks::null_sink::make(4); 25 | // this->audio_sink_0 = audio::sink::make(48000); 26 | 27 | // Connections: 28 | this->tb->hier_block2::connect(this->blocks_vector_source_0, 0, this->low_pass_filter_0, 0); 29 | this->tb->hier_block2::connect(this->low_pass_filter_0, 0, this->fm_demodulator_0, 0); 30 | this->tb->hier_block2::connect(this->fm_demodulator_0, 0, this->null_sink_0, 0); 31 | // this->tb->hier_block2::connect(this->fm_demodulator_0, 0, this->audio_sink_0, 0); 32 | } 33 | 34 | fmradio::fmradio () { 35 | this->tb = make_top_block("FM Radio", false); 36 | 37 | // Blocks: 38 | this->blocks_file_source_0 = blocks::file_source::make(sizeof(gr_complex), "./sps3072000_c32_30s.pcm", false, 0, 0); 39 | this->low_pass_filter_0 = filter::fir_filter_ccf::make( 40 | decimation, 41 | filter::firdes::low_pass( 42 | gain, 43 | samp_rate, 44 | cutoff, 45 | tr_width, 46 | fft::window::win_type::WIN_HAMMING, 47 | 6.76 48 | ) 49 | ); 50 | this->fm_demodulator_0 = analog::quadrature_demod_cf::make( 51 | (samp_rate2/(2*M_PI*max_dev)) 52 | ); 53 | this->null_sink_0 = blocks::null_sink::make(4); 54 | // this->audio_sink_0 = audio::sink::make(48000); 55 | 56 | // Connections: 57 | this->tb->hier_block2::connect(this->blocks_file_source_0, 0, this->low_pass_filter_0, 0); 58 | this->tb->hier_block2::connect(this->low_pass_filter_0, 0, this->fm_demodulator_0, 0); 59 | this->tb->hier_block2::connect(this->fm_demodulator_0, 0, this->null_sink_0, 0); 60 | // this->tb->hier_block2::connect(this->fm_demodulator_0, 0, this->audio_sink_0, 0); 61 | } 62 | 63 | fmradio::~fmradio () { 64 | } 65 | 66 | -------------------------------------------------------------------------------- /examples/streamit-fm/util.ml: -------------------------------------------------------------------------------- 1 | let reduce : ('a -> 'a -> 'a) -> 'a list -> 'a = fun f -> function 2 | | h::t -> List.fold_left f h t 3 | | _ -> failwith "reduce: empty list" 4 | 5 | let drop_last : 'a list -> 'a list = fun l -> 6 | List.filteri (fun i _ -> i < List.length l - 1) l 7 | 8 | (* compare two lists, which are should be essnetially equal *) 9 | let cnter = ref 0 10 | let check_identity_list ?name ?(threshold_ave_rel_err=1e-12) ?(threshold_max_rel_err=1e-6) 11 | (temp_list: float list) (real_list: float list) 12 | : unit = 13 | let threshold_for_rel_err = 0.001 in 14 | Printf.printf "=== Check %s ===\n" (match name with Some x -> x | None -> incr cnter; string_of_int !cnter); 15 | let abs_err_list = 16 | List.map2 (fun ti ri -> Float.abs (ti -. ri)) temp_list real_list in 17 | let rel_err_list = 18 | List.combine abs_err_list real_list 19 | |> List.filter (fun (abs_err, ri) -> Float.abs ri >= threshold_for_rel_err) 20 | |> List.map (fun (abs_err, ri) -> abs_err /. Float.abs ri) 21 | in 22 | let ave_abs_err = 23 | let n = List.length real_list in 24 | if n>0 then 25 | List.fold_left ( +. ) 0. abs_err_list /. float n 26 | else 27 | 0. 28 | in 29 | let ave_rel_err = 30 | let n = List.length rel_err_list in 31 | if n>0 then 32 | List.fold_left ( +. ) 0. rel_err_list /. float n 33 | else 34 | 0. 35 | in 36 | let (_, max_abs_err, i_max, actual, expected) = 37 | let mymax (cnt, max_ae, i, a, e) x y = 38 | let xy = Float.abs (x -. y) in 39 | if xy > max_ae then 40 | (cnt+1, xy, cnt, x, y) 41 | else 42 | (cnt+1, max_ae, i, a, e) 43 | in 44 | List.fold_left2 mymax (0, 0., 0, 0., 0.) temp_list real_list 45 | in 46 | let (_, max_rel_err, i_max2, actual2, expected2) = 47 | let mymax (cnt, max_re, i, a, e) x y = 48 | if Float.abs y >= threshold_for_rel_err then 49 | let xy_y = Float.abs (x -. y) /. Float.abs y in 50 | if xy_y > max_re then 51 | (cnt+1, xy_y, cnt, x, y) 52 | else 53 | (cnt+1, max_re, i, a, e) 54 | else 55 | (cnt+1, max_re, i, a, e) 56 | in 57 | List.fold_left2 mymax (0, 0., 0, 0., 0.) temp_list real_list 58 | in 59 | Printf.printf "ave_abs_err: %.20f\n" ave_abs_err; 60 | Printf.printf "ave_rel_err: %.20f\n" ave_rel_err; 61 | Printf.printf "max abs err: %g = %g - %g, where i = %d\n" max_abs_err actual expected i_max; 62 | Printf.printf "max rel err: %g\n" max_rel_err; 63 | assert (ave_rel_err < threshold_ave_rel_err); 64 | assert (max_rel_err < threshold_max_rel_err) 65 | -------------------------------------------------------------------------------- /lib/cde_ex.mli: -------------------------------------------------------------------------------- 1 | (* Abstract interface for code generation 2 | 3 | Extended interface that supports user-supplied mapping, etc. actions 4 | *) 5 | 6 | include module type of Cde 7 | 8 | val ( @. ) : unit stm -> 'a stm -> 'a stm 9 | val seqs : unit stm list -> unit stm 10 | 11 | (* A constant of a base type: 0 or its equivalent *) 12 | val tbase_zero : 'a tbase -> 'a exp 13 | 14 | (* Integers *) 15 | val imax : int exp -> int exp -> int exp 16 | val ( * ) : int exp -> int exp -> int exp 17 | val ( / ) : int exp -> int exp -> int exp 18 | val ( ~-) : int exp -> int exp 19 | val logand : int exp -> int exp -> int exp 20 | 21 | (* Simple i/o, useful for debugging. Newline at the end *) 22 | val print_int : int exp -> unit stm 23 | 24 | (* Foreign function interface *) 25 | 26 | (* foreign function type : backends provide the constructors *) 27 | type 'sg ff = private {invoke : 'sg} 28 | 29 | (* Numbers of various sorts *) 30 | module type num = sig 31 | type t 32 | type num_t 33 | val tbase : t tbase 34 | val to_t : num_t -> t (* for the sake of Partial Eval *) 35 | val of_t : t -> num_t 36 | val lit : num_t -> t exp 37 | val neg : t exp -> t exp 38 | val ( +. ) : t exp -> t exp -> t exp 39 | val ( -. ) : t exp -> t exp -> t exp 40 | val ( *. ) : t exp -> t exp -> t exp 41 | val ( /. ) : t exp -> t exp -> t exp 42 | val equal : t exp -> t exp -> bool exp 43 | val ( < ) : t exp -> t exp -> bool exp 44 | val ( > ) : t exp -> t exp -> bool exp 45 | val print : t exp -> unit stm 46 | end 47 | 48 | (* Floating point numbers of various sorts *) 49 | module type flonum = sig 50 | include num 51 | val rem : t exp -> t exp -> t exp 52 | val truncate : t exp -> int exp 53 | val of_int : int exp -> t exp 54 | val sin : (t exp -> t exp) ff 55 | val cos : (t exp -> t exp) ff 56 | val atan : (t exp -> t exp) ff (* more can be added *) 57 | end 58 | 59 | (* Complex numbers *) 60 | module type cmplxnum = sig 61 | include num 62 | type float_t 63 | val conj : t exp -> t exp 64 | val norm2 : t exp -> float_t exp (* Euclidian norm, squared *) 65 | val arg : t exp -> float_t exp (* -pi to pi *) 66 | val real : t exp -> float_t exp 67 | val imag : t exp -> float_t exp 68 | val complex : float_t exp -> float_t exp -> t exp 69 | val scale : float_t exp -> t exp -> t exp 70 | end 71 | 72 | module F64 : (flonum with type num_t = float) 73 | module F32 : (flonum with type num_t = float) 74 | module C32 : (cmplxnum with type num_t = Complex.t and type float_t = F32.t) 75 | 76 | module I64 : sig 77 | include num with type num_t = int 78 | val of_int : int exp -> t exp 79 | end 80 | 81 | 82 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # strymonas-ocaml 2 | Strymonas is a code-generation–based library (embedded DSL) for fast, bulk, single-thread in-memory stream processing. This repository focuses on strymonas for OCaml and BER MetaOCaml, which generates C and OCaml code. 3 | 4 | Check https://strymonas.github.io/ for the latest situation of the project. 5 | 6 | 7 | ## Installing 8 | 9 | We offer `strymonas-pure`, a version that does not depend on MetaOCaml, in OPAM: 10 | 11 | ```bash 12 | $ opam update 13 | $ opam switch create 4.14.1 14 | $ eval $(opam env --switch=4.14.1) 15 | $ opam install strymonas-pure 16 | $ rlwrap ocaml 17 | ``` 18 | 19 | ```ocaml 20 | #require "strymonas-pure";; 21 | module C = Backends_pure.C;; 22 | module C32 = C.C32;; 23 | module F32 = C.F32;; 24 | module Raw = Stream_raw_fn.Make (C);; 25 | module Cook = Stream_cooked_fn.Make_ex (C) (Raw);; 26 | open Cook;; 27 | 28 | let pipeline = 29 | Raw.infinite C32.(fun yield -> yield @@ lit {re=0.; im=7.}) |> 30 | map F32.(fun e -> C32.imag e *. lit 5.) |> 31 | filter C.(fun e -> not F32.(equal e (lit 0.))) |> 32 | take C.(int 5) |> 33 | map F32.(fun e -> e *. lit 2.) |> 34 | fold F32.( +. ) F32.(lit 0.);; 35 | 36 | let _ = Format.asprintf "%a" (C.pp_proc ~name:"calculate") @@ 37 | C.nullary_proc pipeline;; 38 | # - : string = "\nfloat calculate(){\n float x_3 = 0.;\n int x_4 = 5;\n while (x_4 > 0)\n {\n x_4--;\n x_3 = x_3 + 70.;\n }\n return x_3;\n}\n" 39 | 40 | let _ = C.run pipeline;; 41 | # - : F32.t = 350. 42 | ``` 43 | 44 | ## Building 45 | There are several options depending on your purpose. Especially, the easiest way is as follows: 46 | ``` 47 | $ opam switch create 4.14.1+BER 48 | $ eval $(opam env) 49 | $ make lib 50 | $ make test 51 | ``` 52 | 53 | You can see [lib/0README.dr](lib/0README.dr) before exploring the source code to deepen your understandings of the library design. 54 | 55 | 56 | ## Example 57 | There are many examples (and some of them include benchmarks as mentioned below): 58 | - [examples/TryFirst](examples/TryFirst): literally, "TryFirst"! 59 | - [examples/sliding-window](examples/sliding-window): a base of [lib/window_fn.ml](lib/window_fn.ml) 60 | - [examples/amradio](examples/amradio): an SDR application 61 | - [examples/streamit-fm](examples/streamit-fm): an SDR benchmark/application related to [StreamIt](https://groups.csail.mit.edu/cag/streamit/) 62 | - [examples/gnuradio-fm](examples/gnuradio-fm): an SDR benchmark/application related to [GNU Radio](https://www.gnuradio.org/) with [HackRF One](https://greatscottgadgets.com/hackrf/one/) 63 | 64 | ## Benchmarking 65 | Micro-benchmarks are in [benchmarks](benchmarks). See [benchmarks/README.md](benchmarks/README.md) for more details. 66 | 67 | There are also macro-benchmarks. They can be found in the following directories (see the relevant README.md files for more details): 68 | - [examples/streamit-fm](examples/streamit-fm) 69 | - [examples/gnuradio-fm](examples/gnuradio-fm) 70 | -------------------------------------------------------------------------------- /examples/streamit-fm/sit_emulator.ml: -------------------------------------------------------------------------------- 1 | (* StreamIt emulator *) 2 | 3 | (* This is a simplest StreamIt interpreter, with no regard to efficiency and, 4 | specifically, fusion. 5 | 6 | The purpose is to be able to step-wise verify StreamIt and Strymonas 7 | pipelines. 8 | 9 | Since StreamIt filters are written in imperative style, we retain this 10 | style. The goal is to be able to run StreamIt code almost as it is. 11 | *) 12 | 13 | (* A pair of input and output streams, as lists *) 14 | module type iostream = sig 15 | type i (* input stream element type *) 16 | type o (* output stream element type *) 17 | exception Finished (* thrown when the input stream is finished *) 18 | val push : o -> unit 19 | val pop : unit -> i (* the latter two may throw the exception *) 20 | val peek : int -> i 21 | end 22 | 23 | type ('i,'o) iostream = (module iostream with type i = 'i and type o = 'o) 24 | 25 | type ('i,'o) filter = 26 | {prework: ('i,'o) iostream -> unit; 27 | work: ('i,'o) iostream -> unit; 28 | } 29 | 30 | (* A very naive, but clearly correct, implementation of iostream 31 | Essentially, the executable specification 32 | *) 33 | module IOList(S: sig type i type o val il : i list end) = struct 34 | type i = S.i 35 | type o = S.o 36 | let input : i list ref = ref S.il 37 | let output : o list ref = ref [] (* in reverse order *) 38 | exception Finished (* thrown when the input stream is finished *) 39 | 40 | let get_output : unit -> o list = fun () -> List.rev !output 41 | 42 | let push : o -> unit = fun x -> output := x :: !output 43 | (* the latter two may throw the exception *) 44 | let pop : unit -> i = fun () -> match !input with 45 | | [] -> raise Finished 46 | | h :: t -> input := t; h 47 | 48 | let peek : int -> i = fun x -> match List.nth !input x with 49 | | r -> r 50 | | exception (Failure _) -> raise Finished 51 | end 52 | 53 | (* Execute a StreamIt filter on a given input *) 54 | let run_filter : type i o. (i,o) filter -> i list -> o list = fun fl il -> 55 | let module IOS = 56 | IOList(struct type nonrec i = i type nonrec o = o let il = il end) in 57 | fl.prework (module IOS); 58 | begin try while (true) do fl.work (module IOS) done 59 | with IOS.Finished -> () 60 | end; 61 | IOS.get_output () 62 | 63 | (* run several filters in (split duplicate; join roundrobin;) fashion. 64 | All the filters should have the same window and the same production rate. 65 | *) 66 | 67 | (* All lists have to have the same length *) 68 | let rec interleave : 'a list list -> 'a list = function 69 | | [] -> [] 70 | | [] :: t -> if List.for_all ((=) []) t then [] else 71 | failwith "interleave: lists should have the same length" 72 | | (h::t) :: tt -> h :: interleave (tt @ [t]) 73 | 74 | (* 75 | let _ = interleave [[1;2];[3;4];[5;6]] 76 | let _ = interleave [[1;2];[3;4;10];[5;6]] 77 | *) 78 | 79 | let duplicate_rr : type i o. (i,o) filter list -> i list -> o list = 80 | fun fls il -> 81 | List.map (fun fl -> run_filter fl il) fls |> interleave 82 | 83 | let duplicate_rr' : type i o. (i list -> o list) list -> i list -> o list = 84 | fun fls' il -> 85 | List.map (fun fl' -> fl' il) fls' |> interleave 86 | ;; 87 | 88 | -------------------------------------------------------------------------------- /benchmarks/Makefile: -------------------------------------------------------------------------------- 1 | # Benchmarks rely on MetaOCaml since we generate OCaml code 2 | # Benckmarks also rely on MetaOCaml for infrastructure (run OCaml code) 3 | # To benchmark against streaming, batteries, etc. libraries, we obviously 4 | # need to install those libraries 5 | OCAML=metaocaml 6 | OCAMLC=metaocamlc 7 | OCAMLOPT=metaocamlopt 8 | OCAMLDEP=ocamldep 9 | OCAMLFND=ocamlfind -toolchain metaocaml ocamlopt -package streaming,batteries,iter 10 | 11 | .SUFFIXES: .ml .mli .cmo .cmi .cmx 12 | 13 | COMMON_OBJS=util.cmx benchmark_types.cmx benchmark.cmx benchmark_abstract.cmx 14 | OBJS=$(COMMON_OBJS) benchmark_strymonas.cmx 15 | OBJS_BASE=$(COMMON_OBJS) benchmark_baseline.cmx 16 | OBJS_STREAMING=$(COMMON_OBJS) benchmark_streaming.cmx 17 | OBJS_BATTERIES=$(COMMON_OBJS) benchmark_batteries.cmx 18 | # OBJS_SEQ=$(COMMON_OBJS) benchmark_seq.cmx 19 | OBJS_ITER=$(COMMON_OBJS) benchmark_iter.cmx 20 | INCLUDES=-I ../lib -I ../lib/backends/Trx 21 | 22 | .PHONY: all clean # intf 23 | all: lib lib-base lib-streaming lib-batteries lib-seq lib-iter 24 | 25 | .PHONY: lib lib-base lib-streaming lib-batteries lib-seq lib-iter 26 | lib: 27 | $(OCAMLOPT) -o bench.out $(INCLUDES) lifts.cmx stream.cmxa $(OBJS:.cmx=.ml) 28 | 29 | lib-base: 30 | $(OCAMLOPT) -o bench-base.out $(INCLUDES) lifts.cmx stream.cmxa $(OBJS_BASE:.cmx=.ml) 31 | 32 | # External libraries 33 | lib-streaming: 34 | $(OCAMLFND) -o bench-streaming.out $(INCLUDES) -linkpkg lifts.cmx stream.cmxa $(OBJS_STREAMING:.cmx=.ml) 35 | 36 | lib-batteries: 37 | $(OCAMLFND) -o bench-batteries.out $(INCLUDES) -linkpkg lifts.cmx stream.cmxa $(OBJS_BATTERIES:.cmx=.ml) 38 | 39 | # lib-seq: 40 | # $(OCAMLFND) -o bench-seq.out $(INCLUDES) -linkpkg lifts.cmx stream.cmxa $(OBJS_SEQ:.cmx=.ml) 41 | 42 | lib-iter: 43 | $(OCAMLFND) -o bench-iter.out $(INCLUDES) -linkpkg lifts.cmx stream.cmxa $(OBJS_ITER:.cmx=.ml) 44 | 45 | 46 | .PHONY: bench bench-base bench2 bench-streaming bench-batteries bench-seq bench-iter 47 | bench: lib 48 | ./bench.out >| bench_result.txt && \ 49 | cat ./bench_result.txt | grep staged >| bench_staged.dat 50 | 51 | # be carefull to ">>" in the first line 52 | bench-base: lib-base 53 | ./bench-base.out >> bench_result.txt && \ 54 | cat ./bench_result.txt | grep baseline >| bench_baseline.dat 55 | 56 | bench2: 57 | ./bench.out >| bench_v2.txt 58 | ./bench-base.out >| bench_base.txt 59 | ./bench-streaming.out >| bench_streaming.txt 60 | 61 | bench-streaming: lib-streaming 62 | ./bench-streaming.out >> bench_result.txt && \ 63 | cat ./bench_result.txt | grep source >| bench_source.dat 64 | 65 | # be carefull to ">>" in the first line 66 | bench-batteries: lib-batteries 67 | ./bench-batteries.out >> bench_result.txt && \ 68 | cat ./bench_result.txt | grep batteries >| bench_batteries.dat 69 | 70 | # bench-seq: lib-seq 71 | # ./bench-seq.out >> bench_result.txt && \ 72 | # cat ./bench_result.txt | grep seq >| bench_seq.dat 73 | 74 | bench-iter: lib-iter 75 | ./bench-iter.out >> bench_result.txt && \ 76 | cat ./bench_result.txt | grep iter >| bench_iter.dat 77 | 78 | 79 | #Generate code for C benchmarks 80 | # C benchmarks are taken and tested in a separate directory: 81 | # ../../C/bench/ 82 | 83 | .PHONY: benchmark_c 84 | benchmark_c: 85 | $(OCAML) $(INCLUDES) -I ../lib/backends/C stream.cma benchmark_c.ml 86 | 87 | 88 | clean: 89 | rm -f *.cm[ixo] *.[oa] *.txt *.dat *.eps *.out *.tmp 90 | -------------------------------------------------------------------------------- /lib/stream_cooked.mli: -------------------------------------------------------------------------------- 1 | (* More convenient interface for the stream library *) 2 | 3 | module Raw : (module type of Stream_raw) 4 | module C : (module type of Cde_ex) 5 | open C 6 | 7 | type 'a stream = 'a Raw.stream 8 | type 'a cstream = 'a exp stream (* Abstract type of streams, carrying 9 | base-type values 10 | Raw streams are more general, and 11 | permit collections such as tuples. 12 | Such collections sometimes require 13 | the descriptor argument, see 14 | Pk_coll module 15 | *) 16 | 17 | (* Producers *) 18 | val of_arr : 'a arr -> 'a cstream 19 | (* non-empty! And should be immutable *) 20 | val of_static_arr : 'a tbase -> ('b -> 'a exp) -> 'b array -> 'a cstream 21 | val of_int_array : int array -> int cstream (* Specializations, *) 22 | val of_float_array : float array -> F64.t cstream (* for tests/benchmarks *) 23 | val iota : int exp -> int cstream 24 | val from_to : ?step:int -> int exp -> int exp -> int cstream 25 | (* We don't provide unfold here: use the Raw interface *) 26 | 27 | 28 | module Desc : (Pk_coll.desc with type 'a exp = 'a exp and 29 | type 'a stm = 'a stm and 30 | type 'a mut = 'a mut) 31 | 32 | (* Consumers *) 33 | val fold_ : ('z,_) Desc.desc -> 34 | ('z -> 'a -> 'z) -> 'z -> ('z -> 'w stm) -> 'a stream -> 'w stm 35 | 36 | (* fold_ may be confusing to new users, who may think that 'z and 'a 37 | may be of base types such as int 38 | (they actually may, but it is rarely useful). 39 | They could be tuples of base types -- which is useful. 40 | Anyway, to avoid confusion we offer a sugared, if less general, 41 | specialization of the above 42 | *) 43 | val fold : ('z exp -> 'a exp -> 'z exp) -> 'z exp -> 'a cstream -> 'z stm 44 | val iter : ('a -> unit stm) -> 'a stream -> unit stm 45 | 46 | (* Specializations of the above, convenient 47 | especially for testing and benchmarks *) 48 | val find_first : ('a exp -> bool exp) -> 'a exp -> 'a cstream -> 'a stm 49 | val sum_int : int cstream -> int stm 50 | val sum_int_long : int cstream -> I64.t stm 51 | val average_int : int cstream -> F64.t stm 52 | val count : 'a stream -> int stm 53 | 54 | 55 | (* Transformers *) 56 | (* Most transformers are for cstream, for the sake of new users 57 | Use Raw functions with more general types 58 | *) 59 | val map : ('a exp -> 'b exp) -> 'a cstream -> 'b cstream 60 | val flat_map : ('a exp -> 'b stream) -> 'a cstream -> 'b stream 61 | val filter : ('a exp -> bool exp) -> 'a cstream -> 'a cstream 62 | val take : int exp -> 'a stream -> 'a stream 63 | val map_accum : ('z exp -> 'a exp -> 64 | ('z exp -> 'b exp -> unit stm) -> unit stm) -> 65 | 'z exp -> 'a cstream -> 'b cstream 66 | 67 | val drop : int exp -> 'a stream -> 'a stream 68 | val drop_while : ('a exp -> bool exp) -> 'a cstream -> 'a cstream 69 | val take_while : ('a exp -> bool exp) -> 'a cstream -> 'a cstream 70 | 71 | val zip_with : ('a exp -> 'b exp -> 'c exp) -> 72 | ('a cstream -> 'b cstream -> 'c cstream) 73 | 74 | (* inclusive scan: same as scanl1 in Haskell *) 75 | val scan : ('z exp -> 'a exp -> 'z exp) -> 'z exp -> 'a cstream -> 'z cstream 76 | 77 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | OCAMLC=ocamlc 2 | OCAMLOPT=ocamlopt 3 | OCAMLDEP=ocamldep 4 | # Some facilities require MetaOCaml 5 | METAOCAMLC=metaocamlc 6 | METAOCAMLOPT=metaocamlopt 7 | 8 | .SUFFIXES: .ml .mli .cmo .cmi .cmx .tex .pdf 9 | 10 | LIB=stream.cma 11 | LIBOPT=stream.cmxa 12 | # Pure OCaml, not MetaOCaml 13 | LIBPURE=stream_ocaml.cma 14 | LIBPUREOPT=stream_ocaml.cmxa 15 | 16 | # Trx backend needs MetaOCaml; the others are pure OCaml 17 | BACKC=backends/C 18 | BACKTRX=backends/Trx 19 | BACKENDC=\ 20 | $(BACKC)/c_ast.cmi \ 21 | $(BACKC)/c_pp.cmo \ 22 | $(BACKC)/offshoringIR.cmi \ 23 | $(BACKC)/offshoringIR.cmo \ 24 | $(BACKC)/offshoringIR_pp.cmo \ 25 | $(BACKC)/c_cde.cmo 26 | BACKENDS=\ 27 | $(BACKENDC) \ 28 | $(BACKTRX)/trx_code_common.cmo 29 | 30 | # Order matters! 31 | INTF=cde.cmi cde_ex.cmi cde_top.cmi stream_raw.cmi stream_cooked.cmi 32 | # OBJS are pure OCaml objects 33 | OBJS=pk_cde.cmo pk_coll.cmo stream_raw_fn.cmo stream_cooked_fn.cmo window_fn.cmo 34 | INCLUDES=-I $(BACKC) -I $(BACKTRX) 35 | 36 | .PHONY: all 37 | all: lib opt pure 38 | lib: $(LIB) 39 | opt: $(LIBOPT) 40 | pure: $(LIBPURE) $(LIBPUREOPT) 41 | 42 | .PHONY: intf #Just the interfaces for testing examples 43 | intf: $(INTF) 44 | 45 | $(LIB): $(INTF) $(OBJS) $(BACKENDS) $(BACKTRX)/trx_code.cmo backends.cmo 46 | $(OCAMLC) -a -o $@ $(filter-out %.cmi,$^) 47 | 48 | $(LIBOPT): $(INTF) $(BACKENDS:.cmo=.cmx) $(OBJS:.cmo=.cmx) \ 49 | $(BACKTRX)/trx_code_native.cmx backends.cmx 50 | $(OCAMLOPT) -a -o $@ $(filter-out %.cmi,$^) 51 | 52 | $(LIBPURE): $(INTF) $(BACKENDC) $(OBJS) backends_pure.cmo 53 | $(OCAMLC) -a -o $@ $(filter-out %.cmi,$^) 54 | 55 | $(LIBPUREOPT): $(INTF) $(BACKENDC:.cmo=.cmx) $(OBJS:.cmo=.cmx) backends_pure.cmx 56 | $(OCAMLOPT) -a -o $@ $(filter-out %.cmi,$^) 57 | 58 | 59 | .mli.cmi: 60 | $(OCAMLC) -c $(INCLUDES) $< 61 | .ml.cmo: 62 | $(OCAMLC) -c $(INCLUDES) $< 63 | .ml.cmx: 64 | $(OCAMLOPT) -c $(INCLUDES) $< 65 | 66 | 67 | $(BACKTRX)/trx_code_common.cmo: $(BACKTRX)/trx_code_common.ml 68 | $(METAOCAMLC) -c $< 69 | 70 | $(BACKTRX)/trx_code.cmo: $(BACKTRX)/trx_code.ml 71 | $(METAOCAMLC) -c -I $(BACKTRX) $< 72 | 73 | $(BACKTRX)/trx_code_common.cmx: $(BACKTRX)/trx_code_common.ml 74 | $(METAOCAMLOPT) -c $< 75 | 76 | $(BACKTRX)/trx_code_native.cmx: $(BACKTRX)/trx_code_native.ml 77 | $(METAOCAMLOPT) -c -I $(BACKTRX) $< 78 | 79 | $(BACKC)/offshoringIR_pp.cmo: $(BACKC)/offshoringIR_pp.ml $(BACKC)/c_ast.cmi 80 | $(OCAMLC) -c $(INCLUDES) -no-alias-deps $(BACKC)/offshoringIR_pp.ml 81 | 82 | $(BACKC)/offshoringIR_pp.cmx: $(BACKC)/offshoringIR_pp.ml $(BACKC)/c_ast.cmi 83 | $(OCAMLOPT) -c $(INCLUDES) -no-alias-deps $(BACKC)/offshoringIR_pp.ml 84 | 85 | backends.cmo: backends.ml 86 | $(METAOCAMLC) -c $(INCLUDES) $< 87 | backends.cmx: backends.ml 88 | $(METAOCAMLOPT) -c $(INCLUDES) $< 89 | 90 | depend: $(wildcard *.mli *.ml $(BACKC)/*.{ml,mli} $(BACKTRX)/*.{ml,mli}) 91 | $(OCAMLDEP) $(INCLUDES) $^ > depend 92 | 93 | depend-pure: $(filter-out backends.ml, $(wildcard *.mli *.ml $(BACKC)/*.{ml,mli} $(BACKTRX)/*.{ml,mli})) 94 | echo $^ 95 | $(OCAMLDEP) $(INCLUDES) $^ > depend 96 | 97 | clean:: 98 | rm -f *.cm[ixoa] *.cmxa *.[oa] depend 99 | 100 | clean:: 101 | cd $(BACKC) && rm -f *.cm[ixoa] *.cmxa *.[oa] 102 | 103 | clean:: 104 | cd $(BACKTRX) && rm -f *.cm[ixoa] *.cmxa *.[oa] 105 | 106 | .PHONY: install uninstall 107 | install: 108 | ocamlfind install strymonas META \ 109 | $(LIB) $(LIBOPT) \ 110 | $(BACKENDS:.cmo=.cmi) $(BACKENDS:.cmo=.cmx) \ 111 | $(OBJS:.cmo=.cmi) $(OBJS:.cmo=.cmx) \ 112 | backends.cmi backends.cmx 113 | 114 | uninstall: 115 | ocamlfind remove strymonas 116 | 117 | 118 | .PHONY: install-pure uninstall-pure 119 | install-pure: $(LIBPURE) $(LIBPUREOPT) 120 | cp META.pure META 121 | ocamlfind install strymonas-pure META \ 122 | $(LIBPURE) $(LIBPUREOPT) \ 123 | $(BACKENDC:.cmo=.cmi) $(BACKENDC:.cmo=.cmx) \ 124 | $(OBJS:.cmo=.cmi) $(OBJS:.cmo=.cmx) \ 125 | backends_pure.cmi backends_pure.cmx 126 | cp META.default META 127 | 128 | uninstall-pure: 129 | ocamlfind remove strymonas-pure 130 | 131 | -include depend 132 | -------------------------------------------------------------------------------- /benchmarks/benchmark.ml: -------------------------------------------------------------------------------- 1 | open Benchmark_types 2 | open Util 3 | 4 | let perfS : string -> 'a code -> ('a code -> 'b code) -> 'b benchmark_options -> 'b code = 5 | fun benchmark init f options -> 6 | let reps = options.repetitions in 7 | let final = options.final_f in 8 | assert (reps>1); 9 | 10 | . .~(f ..) in 11 | let x = .~init in 12 | let ret = ref (fn x) in 13 | let measurements = Array.make reps 0.0 in 14 | (* for i = 0 to reps-1 do 15 | ignore (Sys.opaque_identity (fn x)) (* warming up *) 16 | done; *) 17 | for i = 0 to reps-1 do 18 | Gc.compact(); 19 | (* XXX Sys.time is getrusage, may be not so good: 20 | https://github.com/ocaml/ocaml/blob/b4c5d7a55d9ec25693ba741a613d81f2c3ef66bc/runtime/sys.c#L512 *) 21 | let start_time = Sys.time () in 22 | let _ = Sys.opaque_identity (fn x) in 23 | let end_time = Sys.time () in 24 | let elapsed_time = (end_time -. start_time) *. 1000.0 in 25 | measurements.(i) <- elapsed_time 26 | done; 27 | Printf.printf "%-30s %10.1f %10.1f %5.1f ms/op\n%!" 28 | benchmark (mean measurements) (mean_error 0.95 measurements) (standard_deviation measurements); 29 | 30 | .~(final ..); 31 | !ret>.;; 32 | 33 | let perfS2 : string -> 'a code -> 'a code -> (('a code* 'a code) -> 'b code) -> 'b benchmark_options -> 'b code = 34 | fun benchmark init1 init2 f options -> 35 | let reps = options.repetitions in 36 | let final = options.final_f in 37 | assert (reps>1); 38 | 39 | ..,..) ) in 40 | let x = .~init1 in 41 | let y = .~init2 in 42 | let ret = ref (fn (x,y)) in 43 | let measurements = Array.make reps 0.0 in 44 | (* for i = 0 to reps-1 do 45 | ignore (Sys.opaque_identity (fn (x,y))) (* warming up *) 46 | done; *) 47 | for i = 0 to reps-1 do 48 | Gc.compact(); 49 | let start_time = Sys.time () in 50 | let _ = Sys.opaque_identity (fn (x,y)) in 51 | let end_time = Sys.time () in 52 | let elapsed_time = (end_time -. start_time) *. 1000.0 in 53 | measurements.(i) <- elapsed_time 54 | done; 55 | Printf.printf "%-30s %10.1f %10.1f %5.1f ms/op\n%!" 56 | benchmark (mean measurements) (mean_error 0.95 measurements) (standard_deviation measurements); 57 | 58 | .~(final ..); 59 | !ret>.;; 60 | 61 | let write_code : string -> 'a code -> unit = fun file_name c -> 62 | (* ===== make sure the code is closed ===== *) 63 | (* let start_time = Sys.time () in *) 64 | let cde = close_code c in 65 | (* let end_time = Sys.time () in *) 66 | (* Printf.printf "closing code took: %5.1f ms\n%!" ((end_time -. start_time) *. 1000.0); *) 67 | let cout = open_out file_name in 68 | let ppf = Format.formatter_of_out_channel cout in 69 | (* ===== print code ===== *) 70 | (* let start_time = Sys.time () in *) 71 | let () = (format_code ppf cde; Format.fprintf ppf "%!") in 72 | (* let end_time = Sys.time () in *) 73 | (* Printf.printf "emitting code took: %5.1f ms\n%!" ((end_time -. start_time) *. 1000.0); *) 74 | close_out cout 75 | 76 | (* Use Ctrl-z to interrupt running *) 77 | let run_natively : ?compiler:string -> ?save:bool -> 'a code -> unit = 78 | fun ?(compiler:string="ocamlopt -O2 -unsafe -nodynlink util.cmx") ?(save:bool=false) c -> 79 | let fname = Filename.(concat (get_temp_dir_name()) "gen.ml") in 80 | write_code fname c; 81 | (* let start_time = Sys.time () in *) 82 | let retc = Sys.command (compiler ^ " " ^ fname) in 83 | (* let end_time = Sys.time () in *) 84 | (* Printf.printf "compilation took: %5.1f ms\n%!" ((end_time -. start_time) *. 1000.0); *) 85 | if retc = 0 then begin 86 | ignore (Sys.command "./a.out"); 87 | begin 88 | if save then 89 | print_endline fname 90 | else 91 | Sys.remove fname 92 | end; 93 | Sys.remove "./a.out" 94 | end;; 95 | 96 | let run_script : ?compiler:string -> 'a code array -> unit = 97 | fun ?compiler arr -> 98 | Printf.printf "%-30s %10s %10s %5s %7s\n%!" "Benchmark" "Mean" "Mean-Error" "Sdev" "Unit"; 99 | Array.iter (fun c -> run_natively ?compiler c) arr 100 | ;; 101 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/bench_main.c: -------------------------------------------------------------------------------- 1 | /* The main module for benchmarking Strymonas for GNU Radio 2 | processing 3 | 4 | Probably this file should be moved somewhere else... 5 | 6 | We assume that the generated file "/tmp/generated.c" 7 | */ 8 | /* 9 | (Use gcc-13 in macOS) 10 | gcc -Ofast -march=native -W -Wall utils.c bench_main.c 11 | gcc -Ofast -mfpu=vfp -mfloat-abi=hard -march=armv6zk -mtune=arm1176jzf-s -W -Wall utils.c bench_main.c 12 | */ 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | // Read an array of short complex numbers from a given file. 20 | // arrlen on input should point to the desired size, or 0 21 | // In the latter case, we read the whole file, and store the array size 22 | // in that location. 23 | static float complex *load_file(const char* file_name, size_t * arrlen) { 24 | size_t desired_size = *arrlen; 25 | 26 | FILE *fp = fopen(file_name, "rb"); 27 | assert( fp != NULL ); 28 | 29 | if ( desired_size == 0 ) { 30 | struct stat stat; 31 | assert( fstat(fileno(fp),&stat) == 0); 32 | desired_size = stat.st_size / sizeof(float complex); 33 | } 34 | 35 | assert ( desired_size > 0 ); 36 | float complex *arr = 37 | (float complex *)malloc(sizeof(float complex) * desired_size); 38 | size_t const real_size = fread(arr, sizeof(float complex), desired_size, fp); 39 | if ( *arrlen > 0 ) 40 | { 41 | if (desired_size != real_size) 42 | perror("read error"), exit(1); 43 | } 44 | else *arrlen = real_size; 45 | 46 | fclose(fp); 47 | return arr; 48 | } 49 | 50 | #include 51 | #include 52 | #include 53 | 54 | // map the file as a short complex array 55 | // Incidentally, if we set MAP_POPULATE as the flag, the behevior is the 56 | // same as load_file 57 | static float complex *map_file(const char* file_name, size_t * arrlen) { 58 | size_t desired_size = *arrlen; 59 | 60 | int fd = open(file_name, O_RDONLY); 61 | assert( fd >= 0 ); 62 | 63 | if ( desired_size == 0 ) { 64 | struct stat stat; 65 | assert( fstat(fd,&stat) == 0); 66 | desired_size = stat.st_size / sizeof(float complex); 67 | } 68 | 69 | assert ( desired_size > 0 ); 70 | 71 | float complex * arr = 72 | (float complex *)mmap(NULL,desired_size*sizeof(float complex), 73 | PROT_READ,MAP_PRIVATE,fd,0); 74 | assert (arr != MAP_FAILED); 75 | close(fd); 76 | 77 | if ( *arrlen == 0 ) 78 | *arrlen = desired_size; 79 | return arr; 80 | } 81 | 82 | static FILE *global_fp; 83 | static void stream_file(const char* file_name) { 84 | global_fp = fopen(file_name, "rb"); 85 | assert( global_fp != NULL ); 86 | } 87 | 88 | // prototype of the generated code 89 | void gr_fmradio(int const n_1,float complex * const a_2); 90 | void gr_fmradio_fread(void); 91 | 92 | #include "utils.h" 93 | #define WARMUP 5 94 | #define REPS 20 95 | double results[REPS]; 96 | int main(void) { 97 | size_t arr_size = 0; 98 | // float complex *arr = load_file("sps3072000_c32_30s.pcm",&arr_size); 99 | float complex *arr = map_file("sps3072000_c32_30s.pcm",&arr_size); 100 | printf("\nTesting on the array of size %lu\n",arr_size); 101 | // stream_file("sps3072000_c32_30s.pcm"); 102 | 103 | // Take time 104 | int i; 105 | printf("%-40s %10s %10s %5s %7s\n", "Benchmark", "Mean", "Mean-Error", "Sdev", "Unit"); 106 | for (i=0; i 129 | #include "fast_atanf.h" 130 | 131 | static inline void init_read(void) { 132 | fseek(global_fp, 0, SEEK_SET); 133 | } 134 | 135 | static inline int read_c32(float complex *arr, int const arr_size) { 136 | return fread(arr,sizeof(float complex),arr_size,global_fp); 137 | } 138 | 139 | #include "/tmp/generated.c" 140 | 141 | -------------------------------------------------------------------------------- /examples/streamit-fm/fir.ml: -------------------------------------------------------------------------------- 1 | (* Finite Impulse Response filters (FIR) 2 | For benchmarking and comparsion, we use exactly the filters 3 | used by StreamIt 4 | *) 5 | 6 | type weights = float array (* represented by coefficients *) 7 | 8 | 9 | (* ===== basic operators ===== *) 10 | let scale : float -> weights -> weights = 11 | fun c -> Array.map (fun e -> c *. e) 12 | let scale_down : float -> weights -> weights = 13 | fun c -> Array.map (fun e -> e /. c) 14 | let norm : weights -> weights = fun w -> 15 | let sum = Array.fold_left (+.) 0. w in 16 | Array.map (fun e -> e /. sum) w 17 | 18 | (* ===== fusion operators for weights ===== *) 19 | (* horizontal (parallel) fusion *) 20 | let add : weights -> weights -> weights = Array.map2 ( +. ) 21 | let sub : weights -> weights -> weights = Array.map2 ( -. ) 22 | let adds : weights list -> weights = Util.reduce add 23 | let subs : weights list -> weights = Util.reduce sub 24 | 25 | (* vertical (serial) fusion, 26 | Compute convolution of the weights coefficients *) 27 | let seq : weights -> weights -> weights = fun w1 w2 -> 28 | let l1 = Array.length w1 in 29 | let l2 = Array.length w2 in 30 | let l3 = l1 + l2 - 1 in 31 | let new_weights = Array.make l3 0. in 32 | for i=0 to l3 - 1 do 33 | for j=0 to l1 - 1 do 34 | if ((i - j) >= 0 && (i - j) < l2) then 35 | new_weights.(i) <- new_weights.(i) +. w1.(j) *. w2.(i - j) 36 | done 37 | done; 38 | new_weights 39 | 40 | (* ===== weights ===== *) 41 | let filterBank : 'a list -> ('a -> weights) -> weights list = 42 | fun params weights_gen -> 43 | List.map weights_gen params 44 | 45 | let lowPassFilter : float -> float -> int -> weights = fun rate cutoff taps -> 46 | let coeff = Array.make taps 0. in 47 | let m = float taps -. 1. in (* order *) 48 | let w = 2. *. Float.pi *. cutoff /. rate in 49 | (* idk well, but this branching is required in 50 | ** realFIRFilter of the OFDM implementation *) 51 | if cutoff = 0. then 52 | for i = 0 to taps-1 do 53 | coeff.(i) <- (0.54 -. 0.46 *. cos (2. *. Float.pi 54 | *. float i 55 | /. m)) (* hamming window *) 56 | done 57 | else 58 | for i = 0 to taps-1 do 59 | let t = float i -. m /. 2. in 60 | if (t = 0.) then 61 | (* XXX why does not `* (0.54 -. 0.46 ...)`? 62 | ** https://github.com/gnuradio/gnuradio/blob/b2c9623cbd548bd86250759007b80b61bd4a2a06/gr-filter/lib/firdes.cc#L98 *) 63 | coeff.(i) <- w /. Float.pi 64 | else 65 | coeff.(i) <- sin (w *. t) 66 | /. Float.pi 67 | /. t 68 | *. (0.54 -. 0.46 *. cos (2. *. Float.pi 69 | *. float i 70 | /. m)) (* hamming window *) 71 | done; 72 | coeff 73 | 74 | (* Since convolutional filters are linear, 75 | filter coeff1 signal - filter coeff2 signal = 76 | filter (coeff1-coeff2) signal) 77 | *) 78 | let bandPassFilter : float -> float -> float -> int -> weights = 79 | fun rate cutoff_l cutoff_h taps -> 80 | let lph = lowPassFilter rate cutoff_h taps in 81 | let lpl = lowPassFilter rate cutoff_l taps in 82 | sub lph lpl 83 | 84 | (* Equalizer 85 | The idea is to split the signal into N bands and amplify by 86 | a band-specific gain and then re-combine. 87 | In Math: 88 | sum_i{ gain_i * filter bandpass_i signal} 89 | Because convolutional filter is linear: 90 | a*filter c1 signal + b*filter c2 signal = filter (a*c1 + b*c2) signal 91 | we can perform the equalization on filter coefficients instead 92 | *) 93 | let equalizer : float -> int -> float list -> float list -> int -> weights = 94 | fun rate bandsNum cutoffs eqGains taps -> (* XXX bandsNum??? *) 95 | assert (List.length cutoffs = List.length eqGains); 96 | (* (cutoffs[0],cutoffs[1]), (cutoffs[1],cutoffs[2]), ... *) 97 | let bands = List.combine (Util.drop_last cutoffs) (List.tl cutoffs) in 98 | let params = List.combine (List.tl eqGains) bands in 99 | filterBank params (fun (g, (pl,ph)) -> 100 | bandPassFilter rate pl ph taps |> scale g) 101 | |> adds 102 | 103 | (* ===== for debugging ===== *) 104 | let print : weights -> unit = 105 | Array.iter (fun e -> print_float e; print_newline ()) 106 | -------------------------------------------------------------------------------- /examples/streamit-fm/streamit/fmref.patch: -------------------------------------------------------------------------------- 1 | --- fmref.c 2023-09-29 20:12:28.000000000 +0900 2 | +++ ../benchmarks_base.c 2023-10-18 16:34:51.000000000 +0900 3 | @@ -1,7 +1,12 @@ 4 | +/* Handwritten C codes for benchmarks */ 5 | + 6 | +#include "benchmarks.h" 7 | + 8 | +// *** This is modifided fmref.c *** 9 | /* 10 | * fmref.c: C reference implementation of FM Radio 11 | * David Maze 12 | - * $Id: fmref.c,v 1.15 2003/11/05 18:13:10 dmaze Exp $ 13 | + * $Id: fmref.c,v 1.15 2003-11-05 18:13:10 dmaze Exp $ 14 | */ 15 | 16 | #ifdef raw 17 | @@ -23,8 +28,6 @@ 18 | /* Must be at least NUM_TAPS+1: */ 19 | #define IN_BUFFER_LEN 10000 20 | 21 | -void begin(void); 22 | - 23 | typedef struct FloatBuffer 24 | { 25 | float buff[IN_BUFFER_LEN]; 26 | @@ -52,8 +55,9 @@ 27 | 28 | #define EQUALIZER_BANDS 10 29 | float eq_cutoffs[EQUALIZER_BANDS + 1] = 30 | - { 55.000004, 77.78174, 110.00001, 155.56354, 220.00002, 311.12695, 31 | - 440.00003, 622.25415, 880.00006, 1244.5078, 1760.0001 }; 32 | + { 55.000000000000014, 77.781745930520245, 110.00000000000004, 155.56349186104052, 33 | + 219.99999999999989, 311.12698372208104, 439.99999999999983, 622.25396744416219, 34 | + 879.99999999999977, 1244.5079348883232, 1759.9999999999995}; 35 | typedef struct EqualizerData 36 | { 37 | LPFData lpf[EQUALIZER_BANDS + 1]; 38 | @@ -63,34 +67,20 @@ 39 | void init_equalizer(EqualizerData *data); 40 | void run_equalizer(FloatBuffer *fbin, FloatBuffer *fbout, EqualizerData *data); 41 | 42 | -void write_floats(FloatBuffer *fb); 43 | +// void write_floats(FloatBuffer *fb); 44 | +// void write_floats_to_var(FloatBuffer *fb); 45 | +void sum_floats(FloatBuffer *fb); 46 | 47 | /* Globals: */ 48 | static int numiters = -1; 49 | +static float out = 0.0; 50 | 51 | -#ifndef raw 52 | -int main(int argc, char **argv) 53 | +float fmradio(void) 54 | { 55 | - int option; 56 | - 57 | - while ((option = getopt(argc, argv, "i:")) != -1) 58 | - { 59 | - switch(option) 60 | - { 61 | - case 'i': 62 | - numiters = atoi(optarg); 63 | - } 64 | - } 65 | + numiters = 1000000; 66 | + out = 0.0; 67 | 68 | - begin(); 69 | - return 0; 70 | -} 71 | -#endif 72 | 73 | - 74 | - 75 | -void begin(void) 76 | -{ 77 | int i; 78 | FloatBuffer fb1, fb2, fb3, fb4; 79 | LPFData lpf_data; 80 | @@ -120,7 +110,7 @@ 81 | } 82 | 83 | /* Main loop: */ 84 | - while (numiters == -1 || numiters-- > 0) 85 | + while (numiters-- > 0) 86 | { 87 | /* The low-pass filter will need NUM_TAPS+1 items; read them if we 88 | * need to. */ 89 | @@ -129,8 +119,13 @@ 90 | run_lpf(&fb1, &fb2, &lpf_data); 91 | run_demod(&fb2, &fb3); 92 | run_equalizer(&fb3, &fb4, &eq_data); 93 | - write_floats(&fb4); 94 | + // write_floats(&fb4);// just for debugging 95 | + // write_floats_to_var(&fb4); 96 | + sum_floats(&fb4); 97 | } 98 | + // printf("%f", out); 99 | + 100 | + return out; 101 | } 102 | 103 | void fb_compact(FloatBuffer *fb) 104 | @@ -160,6 +155,7 @@ 105 | return 0; 106 | } 107 | 108 | +// XXX 0,1,2,3,... 109 | void get_floats(FloatBuffer *fb) 110 | { 111 | static int x = 0; 112 | @@ -220,7 +216,7 @@ 113 | float temp, gain; 114 | gain = MAX_AMPLITUDE * SAMPLING_RATE / (BANDWIDTH * M_PI); 115 | temp = fbin->buff[fbin->rpos] * fbin->buff[fbin->rpos + 1]; 116 | - temp = gain * atan(temp); 117 | + temp = gain * atanf(temp); 118 | fbin->rpos++; 119 | fb_ensure_writable(fbout, 1); 120 | fbout->buff[fbout->rlen++] = temp; 121 | @@ -275,14 +271,26 @@ 122 | fbout->buff[fbout->rlen++] = sum; 123 | } 124 | 125 | -void write_floats(FloatBuffer *fb) 126 | +// void write_floats(FloatBuffer *fb) 127 | +// { 128 | +// /* printf() any data that's available: */ 129 | +// #ifdef raw 130 | +// while (fb->rpos < fb->rlen) 131 | +// print_float(fb->buff[fb->rpos++]); 132 | +// #else 133 | +// while (fb->rpos < fb->rlen) 134 | +// printf("%f\n", fb->buff[fb->rpos++]); 135 | +// #endif 136 | +// } 137 | + 138 | +// void write_floats_to_var(FloatBuffer *fb) 139 | +// { 140 | +// while (fb->rpos < fb->rlen) 141 | +// out = fb->buff[fb->rpos++]; 142 | +// } 143 | + 144 | +void sum_floats(FloatBuffer *fb) 145 | { 146 | - /* printf() any data that's available: */ 147 | -#ifdef raw 148 | - while (fb->rpos < fb->rlen) 149 | - print_float(fb->buff[fb->rpos++]); 150 | -#else 151 | while (fb->rpos < fb->rlen) 152 | - printf("%f\n", fb->buff[fb->rpos++]); 153 | -#endif 154 | + out += fb->buff[fb->rpos++]; 155 | } 156 | -------------------------------------------------------------------------------- /benchmarks/benchmark_strymonas.ml: -------------------------------------------------------------------------------- 1 | module type cde = sig 2 | include module type of Cde_ex 3 | val to_code1 : ('a arr -> 'b stm) -> ('a array code -> 'b code) 4 | val to_code2 : ('a arr * 'b arr -> 'c stm) -> 5 | ('a array code * 'b array code -> 'c code) 6 | end 7 | 8 | module Benchmark_stream(C:cde) = struct 9 | module S = struct 10 | include Stream_cooked_fn.Make(C) 11 | type 'a exp = 'a C.exp 12 | type 'a stm = 'a C.stm 13 | type 'a arr = 'a C.arr 14 | type 'a stream = 'a cstream 15 | 16 | type byte = int (* element of an outer stream *) 17 | let byte_max = 255 18 | 19 | let encode : bool cstream -> byte cstream = fun st -> 20 | Raw.initializing_ref C.(int 0) @@ fun zeros_count -> 21 | st |> Raw.map_raw ~linear:false (fun el k -> 22 | let open C in 23 | letl (dref zeros_count) @@ fun zeros -> 24 | if_ el (seq (zeros_count := int 0) (k zeros)) @@ 25 | seq (zeros_count := zeros + int 1) @@ 26 | if1 (dref zeros_count = int byte_max) @@ 27 | seq (zeros_count := int 0) (k (int byte_max)) 28 | ) 29 | 30 | (* advanced and more interesting use of flat_map *) 31 | let decode : byte cstream -> bool cstream = fun st -> 32 | st |> flat_map (fun el -> 33 | Raw.pull_array C.(el + int 1) @@ fun i k -> 34 | let open C in 35 | if_ (i < el) (k (bool false)) 36 | (if1 (i < int byte_max) (k (bool true))) 37 | ) 38 | end 39 | 40 | open S 41 | open Benchmark_types 42 | open Benchmark 43 | open Benchmark_abstract.Benchmark(C)(S) 44 | 45 | 46 | (* Arrays used for benchmarking *) 47 | let v = .< Array.init 100_000_000 (fun i -> i mod 10) >.;; 48 | let vHi = .< Array.init 10_000_000 (fun i -> i mod 10) >.;; 49 | let vLo = .< Array.init 10 (fun i -> i mod 10) >.;; 50 | let vFaZ = .< Array.init 10_000 (fun i -> i) >.;; 51 | let vZaF = .< Array.init 10_000_000 (fun i -> i) >.;; 52 | 53 | let options = { 54 | repetitions = 20; 55 | final_f = (fun _ -> .<()>.); 56 | } 57 | 58 | let pr_int = {options with 59 | final_f = fun x -> ..} 60 | 61 | let check_int n = {options with 62 | final_f = fun x -> ..} 63 | 64 | let script =[| 65 | perfS "sum_staged" v sum options; 66 | perfS "sumOfSquares_staged" v sumOfSquares options; 67 | perfS "sumOfSquaresEven_staged" v sumOfSquaresEven options; 68 | perfS "mapsMegamorphic_staged" v maps options; 69 | perfS "filtersMegamorphic_staged" v filters options; 70 | perfS2 "cart_staged" vHi vLo cart options; 71 | perfS2 "dotProduct_staged" vHi vHi dotProduct options; 72 | perfS2 "flatMapAfterZip_staged" vFaZ vFaZ 73 | flatMap_after_zipWith options; 74 | perfS2 "zipAfterFlatMap_staged" vZaF vZaF 75 | zipWith_after_flatMap options; 76 | perfS2 "flatMapTake_staged" vHi vLo 77 | flat_map_take options; 78 | perfS2 "zipFilterFilter_staged" v vHi 79 | zip_filter_filter options; 80 | perfS2 "zipFlatMapFlatMap_staged" v vLo 81 | zip_flat_flat options; 82 | perfS2 "runLengthDecoding_staged" v v 83 | decoding options; 84 | |];; 85 | 86 | let test = 87 | let v = genlet v in 88 | let vHi = genlet vHi in 89 | let vLo = genlet vLo in 90 | let vFaZ = genlet vFaZ in 91 | let vZaF = genlet vZaF in 92 | .< 93 | ignore (Sys.command "date"); 94 | assert (.~(sum v) == 450000000); 95 | assert (.~(sumOfSquares v) == 2850000000); 96 | assert (.~(sumOfSquaresEven v) == 1200000000); 97 | assert (.~(maps v) == 2268000000000); 98 | assert (.~(filters v) == 170000000); 99 | assert (.~(cart (vHi, vLo)) == 2025000000); 100 | assert (.~(dotProduct (vHi, vHi)) == 285000000); 101 | assert (.~(flatMap_after_zipWith (vFaZ, vFaZ)) == 1499850000000); 102 | assert (.~(zipWith_after_flatMap (vZaF, vZaF)) == 99999990000000); 103 | assert (.~(flat_map_take (vHi, vLo)) == 405000000); 104 | assert (.~(zip_filter_filter (v, vHi)) == 64000000); 105 | assert (.~(zip_flat_flat (v, vLo)) == 3250000000); 106 | assert (.~(decoding (v, v)) == 100000000); 107 | print_endline "All done" 108 | >. 109 | end;; 110 | 111 | (* 112 | module M = Benchmark_stream(Benchmark_abstract.CodeBasic) 113 | *) 114 | module M = Benchmark_stream(Benchmark_abstract.CodePV) 115 | 116 | let main () = 117 | match Sys.argv with 118 | | [|_;"test"|] -> 119 | Benchmark.run_natively M.test 120 | (* ~save:true *) 121 | | _ -> 122 | Benchmark.run_script M.script 123 | 124 | let _ = main () 125 | -------------------------------------------------------------------------------- /benchmarks/benchmark_seq.ml: -------------------------------------------------------------------------------- 1 | module Seqb = struct 2 | open Seq 3 | 4 | type 'a cde = 'a code 5 | type 'a stream_raw = 'a t 6 | type 'a stream = 'a t cde 7 | 8 | let lift_tr1 : (('a -> 'b ) -> 'a stream_raw -> 'c stream_raw) cde 9 | -> ('a cde -> 'b cde) -> 'a stream -> 'c stream = 10 | fun tr f st -> .<.~tr (fun x -> .~(f ..)) .~st>. 11 | 12 | let lift_tr2 : (('a -> 'b -> 'c) -> ('a stream_raw -> 'b stream_raw -> 'c stream_raw) )cde 13 | -> ('a cde -> 'b cde -> 'c cde) -> 'a stream -> 'b stream -> 'c stream = 14 | fun tr f st1 st2 -> .<.~tr (fun x y -> .~(f .. ..)) .~st1 .~st2>. 15 | 16 | 17 | let of_arr : 'a array cde -> 'a stream = fun x -> .. 18 | 19 | let fold : ('z cde -> 'a cde -> 'z cde) -> 'z cde -> 'a stream -> 'z cde = 20 | fun f z st -> . .~(f .. ..)) .~z .~st>. 21 | 22 | let map : ('a cde -> 'b cde) -> 'a stream -> 'b stream = 23 | fun f st -> lift_tr1 .. f st 24 | 25 | let flat_map : ('a cde -> 'b stream) -> 'a stream -> 'b stream = 26 | fun f st -> lift_tr1 .. f st 27 | 28 | let filter : ('a cde -> bool cde) -> 'a stream -> 'a stream = 29 | fun f st -> lift_tr1 .. f st 30 | 31 | let take : int cde -> 'a stream -> 'a stream = 32 | fun n st -> .. 33 | 34 | let zip_with : ('a cde -> 'b cde -> 'c cde) -> ('a stream -> 'b stream -> 'c stream) = 35 | fun f st1 st2 -> lift_tr2 .. f st1 st2 36 | 37 | type byte = int 38 | let byte_max = 255 39 | let decode = fun st -> 40 | st |> flat_map (fun el -> .< 41 | unfold (fun i -> 42 | if i < .~el then Some (false, i + 1) 43 | else ( 44 | if i > .~el then None 45 | else ( 46 | if i < byte_max then Some (true, i + 1) 47 | else failwith "wrong sequence" 48 | ) 49 | ) 50 | ) 0>.) 51 | end 52 | 53 | 54 | module Benchmark_seq = struct 55 | open Benchmark_types 56 | open Benchmark 57 | open Benchmark_abstract.Benchmark(Benchmark_abstract.CodeBasic)(Seqb) 58 | 59 | (* Arrays used for benchmarking *) 60 | let v = .< Array.init 100_000_000 (fun i -> i mod 10) >.;; 61 | let vHi = .< Array.init 10_000_000 (fun i -> i mod 10) >.;; 62 | let vLo = .< Array.init 10 (fun i -> i mod 10) >.;; 63 | let vFaZ = .< Array.init 10_000 (fun i -> i) >.;; 64 | let vZaF = .< Array.init 10_000_000 (fun i -> i) >.;; 65 | 66 | let options = { 67 | repetitions = 20; 68 | final_f = (fun _ -> .<()>.); 69 | } 70 | 71 | let pr_int = {options with 72 | final_f = fun x -> ..} 73 | 74 | let check_int n = {options with 75 | final_f = fun x -> ..} 76 | 77 | let script =[| 78 | perfS "sum_seq" v sum options; 79 | perfS "sumOfSquares_seq" v sumOfSquares options; 80 | perfS "sumOfSquaresEven_seq" v sumOfSquaresEven options; 81 | perfS "mapsMegamorphic_seq" v maps options; 82 | perfS "filtersMegamorphic_seq" v filters options; 83 | perfS2 "cart_seq" vHi vLo cart options; 84 | perfS2 "dotProduct_seq" vHi vHi dotProduct options; 85 | perfS2 "flatMapAfterZip_seq" vFaZ vFaZ flatMap_after_zipWith options; 86 | perfS2 "zipAfterFlatMap_seq" vZaF vZaF zipWith_after_flatMap options; 87 | perfS2 "flatMapTake_seq" vHi vLo flat_map_take options; 88 | perfS2 "zipFilterFilter_seq" v vHi zip_filter_filter options; 89 | perfS2 "zipFlatMapFlatMap_seq" v vLo zip_flat_flat options; 90 | perfS2 "runLengthDecoding_seq" v v decoding options; 91 | |];; 92 | 93 | let test = .< 94 | print_endline "Last checked: Sep 9, 2022"; 95 | assert (.~(sum v) == 450000000); 96 | assert (.~(sumOfSquares v) == 2850000000); 97 | assert (.~(sumOfSquaresEven v) == 1200000000); 98 | assert (.~(maps v) == 2268000000000); 99 | assert (.~(filters v) == 170000000); 100 | assert (.~(cart (vHi, vLo)) == 2025000000); 101 | assert (.~(dotProduct (vHi, vHi)) == 285000000); 102 | assert (.~(flatMap_after_zipWith (vFaZ, vFaZ)) == 1499850000000); 103 | assert (.~(zipWith_after_flatMap (vZaF, vZaF)) == 99999990000000); 104 | assert (.~(flat_map_take (vHi, vLo)) == 405000000); 105 | assert (.~(zip_filter_filter (v, vHi)) == 64000000); 106 | assert (.~(zip_flat_flat (v, vLo)) == 3250000000); 107 | assert (.~(decoding (v, v)) == 100000000); 108 | print_endline "All done" 109 | >. 110 | end 111 | 112 | module M = Benchmark_seq 113 | 114 | let main () = 115 | let compiler = "ocamlfind ocamlopt -O2 -unsafe -nodynlink -linkpkg util.cmx" in 116 | match Sys.argv with 117 | | [|_;"test"|] -> 118 | Benchmark.run_natively M.test 119 | ~compiler 120 | (* ~save:true *) 121 | | _ -> 122 | Benchmark.run_script M.script 123 | ~compiler 124 | 125 | let _ = main () 126 | -------------------------------------------------------------------------------- /benchmarks/benchmark_iter.ml: -------------------------------------------------------------------------------- 1 | module Iterb = struct 2 | open Iter 3 | 4 | type 'a cde = 'a code 5 | type 'a stream_raw = 'a t 6 | type 'a stream = 'a t cde 7 | 8 | let lift_tr1 : (('a -> 'b ) -> 'a stream_raw -> 'c stream_raw) cde 9 | -> ('a cde -> 'b cde) -> 'a stream -> 'c stream = 10 | fun tr f st -> .<.~tr (fun x -> .~(f ..)) .~st>. 11 | 12 | let lift_tr2 : (('a -> 'b -> 'c) -> ('a stream_raw -> 'b stream_raw -> 'c stream_raw) )cde 13 | -> ('a cde -> 'b cde -> 'c cde) -> 'a stream -> 'b stream -> 'c stream = 14 | fun tr f st1 st2 -> .<.~tr (fun x y -> .~(f .. ..)) .~st1 .~st2>. 15 | 16 | 17 | let of_arr : 'a array cde -> 'a stream = fun x -> .. 18 | 19 | let fold : ('z cde -> 'a cde -> 'z cde) -> 'z cde -> 'a stream -> 'z cde = 20 | fun f z st -> . .~(f .. ..)) .~z .~st>. 21 | 22 | let map : ('a cde -> 'b cde) -> 'a stream -> 'b stream = 23 | fun f st -> lift_tr1 .. f st 24 | 25 | let flat_map : ('a cde -> 'b stream) -> 'a stream -> 'b stream = 26 | fun f st -> lift_tr1 .. f st 27 | 28 | let filter : ('a cde -> bool cde) -> 'a stream -> 'a stream = 29 | fun f st -> lift_tr1 .. f st 30 | 31 | let take : int cde -> 'a stream -> 'a stream = 32 | fun n st -> .. 33 | 34 | let zip_with : ('a cde -> 'b cde -> 'c cde) -> ('a stream -> 'b stream -> 'c stream) = 35 | fun f st1 st2 -> failwith "unusable" 36 | 37 | type byte = int 38 | let byte_max = 255 39 | let decode = fun st -> 40 | st |> flat_map (fun el -> .< 41 | unfoldr (fun i -> 42 | if i < .~el then Some (false, i + 1) 43 | else ( 44 | if i > .~el then None 45 | else ( 46 | if i < byte_max then Some (true, i + 1) 47 | else failwith "wrong sequence" 48 | ) 49 | ) 50 | ) 0>.) 51 | end 52 | 53 | 54 | module Benchmark_iter = struct 55 | open Benchmark_types 56 | open Benchmark 57 | open Benchmark_abstract.Benchmark(Benchmark_abstract.CodeBasic)(Iterb) 58 | 59 | (* Arrays used for benchmarking *) 60 | let v = .< Array.init 100_000_000 (fun i -> i mod 10) >.;; 61 | let vHi = .< Array.init 10_000_000 (fun i -> i mod 10) >.;; 62 | let vLo = .< Array.init 10 (fun i -> i mod 10) >.;; 63 | let vFaZ = .< Array.init 10_000 (fun i -> i) >.;; 64 | let vZaF = .< Array.init 10_000_000 (fun i -> i) >.;; 65 | 66 | let options = { 67 | repetitions = 20; 68 | final_f = (fun _ -> .<()>.); 69 | } 70 | 71 | let pr_int = {options with 72 | final_f = fun x -> ..} 73 | 74 | let check_int n = {options with 75 | final_f = fun x -> ..} 76 | 77 | let script =[| 78 | perfS "sum_iter" v sum options; 79 | perfS "sumOfSquares_iter" v sumOfSquares options; 80 | perfS "sumOfSquaresEven_iter" v sumOfSquaresEven options; 81 | perfS "mapsMegamorphic_iter" v maps options; 82 | perfS "filtersMegamorphic_iter" v filters options; 83 | perfS2 "cart_iter" vHi vLo cart options; 84 | (* perfS2 "dotProduct_iter" vHi vHi dotProduct options; 85 | perfS2 "flatMapAfterZip_iter" vFaZ vFaZ flatMap_after_zipWith options; 86 | perfS2 "zipAfterFlatMap_iter" vZaF vZaF zipWith_after_flatMap options; *) 87 | perfS2 "flatMapTake_iter" vHi vLo flat_map_take options; 88 | (* perfS2 "zipFilterFilter_iter" v vHi zip_filter_filter options; 89 | perfS2 "zipFlatMapFlatMap_iter" v vLo zip_flat_flat options; 90 | perfS2 "runLengthDecoding_iter" v v decoding options; *) 91 | |];; 92 | 93 | let test = .< 94 | print_endline "Last checked: Sep 9, 2022"; 95 | assert (.~(sum v) == 450000000); 96 | assert (.~(sumOfSquares v) == 2850000000); 97 | assert (.~(sumOfSquaresEven v) == 1200000000); 98 | assert (.~(maps v) == 2268000000000); 99 | assert (.~(filters v) == 170000000); 100 | assert (.~(cart (vHi, vLo)) == 2025000000); 101 | (* assert (.~(dotProduct (vHi, vHi)) == 285000000); 102 | assert (.~(flatMap_after_zipWith (vFaZ, vFaZ)) == 1499850000000); 103 | assert (.~(zipWith_after_flatMap (vZaF, vZaF)) == 99999990000000); *) 104 | assert (.~(flat_map_take (vHi, vLo)) == 405000000); 105 | (* assert (.~(zip_filter_filter (v, vHi)) == 64000000); 106 | assert (.~(zip_flat_flat (v, vLo)) == 3250000000); 107 | assert (.~(decoding (v, v)) == 100000000); *) 108 | print_endline "All done" 109 | >. 110 | end 111 | 112 | module M = Benchmark_iter 113 | 114 | let main () = 115 | let compiler = "ocamlfind ocamlopt -O2 -unsafe -nodynlink -package iter -linkpkg util.cmx" in 116 | match Sys.argv with 117 | | [|_;"test"|] -> 118 | Benchmark.run_natively M.test 119 | ~compiler 120 | (* ~save:true *) 121 | | _ -> 122 | Benchmark.run_script M.script 123 | ~compiler 124 | 125 | let _ = main () 126 | -------------------------------------------------------------------------------- /examples/streamit-fm/test_c.ml: -------------------------------------------------------------------------------- 1 | module C = Pk_cde.Make(C_cde) 2 | module F32 = C.F32 3 | module Raw = Stream_raw_fn.Make(C) 4 | open Stream_cooked_fn.Make(C) 5 | open Stream_streamit_sdr_fn.Make(C)(Raw) 6 | 7 | let check_identity_list = 8 | Util.check_identity_list ~threshold_ave_rel_err:1e-6 9 | ~threshold_max_rel_err:1e-6 10 | 11 | let check_list ?name reader exp v = 12 | (* print_endline msg; *) 13 | let c = C.run_capture_output exp in 14 | let rec loop acc = 15 | match reader c :: acc with 16 | | exception End_of_file -> Scanf.Scanning.close_in c; List.rev acc 17 | | acc -> loop acc 18 | in 19 | let r = loop [] in 20 | check_identity_list ?name r v 21 | 22 | (* `Scanf.bscanf c "%h\n" Fun.id` doesn't work bacause of the last new line *) 23 | let reader_single c = Scanf.bscanf c "%s\n" (fun e -> Scanf.sscanf e "%.17f" Fun.id) 24 | (* let reader_single c = Scanf.bscanf c "%s\n" (fun e -> Scanf.sscanf e "%h" Fun.id) *) 25 | let check_float_list ?name = check_list ?name reader_single 26 | 27 | 28 | open Parameters 29 | 30 | let numLen = 10_000 (* For the non-native OCaml *) 31 | (* let numLen = 100_000 *) (* XXX stack overflow *) 32 | 33 | (* Verify each operator *) 34 | let lp1' = 35 | get_floats 36 | |> take C.(int numLen) 37 | |> fir_filter (Fir.lowPassFilter samplingRate cutoffFrequency numberOfTaps) 38 | ~decimation:4 39 | |> iter F32.print 40 | let lp1 = Sit_experiments.(Sit_emulator.run_filter lp1 (get_floats numLen)) 41 | let () = check_float_list ~name:"lp1" lp1' lp1 42 | 43 | let dem' = 44 | get_floats 45 | |> take C.(int numLen) 46 | |> fmDemodulator samplingRate maxAmplitude bandwidth 47 | |> iter F32.print 48 | let dem = Sit_experiments.(Sit_emulator.run_filter dem (get_floats numLen)) 49 | let () = check_float_list ~name:"dem" dem' dem 50 | 51 | let bp1' = 52 | get_floats 53 | |> take C.(int numLen) 54 | |> fir_filter (Fir.bandPassFilter samplingRate low high numberOfTaps) 55 | |> iter F32.print 56 | let bp1 = Sit_experiments.(run_band_pass_filter ~rate:samplingRate ~low ~high 57 | ~taps:numberOfTaps (get_floats numLen)) 58 | let () = check_float_list ~name:"bp1" bp1' bp1 59 | 60 | (* decreasing num_len: get stack overflow otherwise *) 61 | let numLen = 10_000 62 | 63 | let eq' = 64 | get_floats 65 | |> take C.(int numLen) 66 | |> fir_filter (Fir.equalizer samplingRate bands eqCutoff eqGain numberOfTaps) 67 | |> iter F32.print 68 | let eq = Sit_experiments.(run_equalizer ~rate:samplingRate ~bands ~cutoffs:eqCutoff 69 | ~gains:eqGain ~taps:numberOfTaps (get_floats numLen)) 70 | let () = check_float_list ~name:"eq" eq' eq 71 | 72 | 73 | (* Verify combined operators *) 74 | let lp1_dem' = 75 | get_floats 76 | |> take C.(int numLen) 77 | |> fir_filter (Fir.lowPassFilter samplingRate cutoffFrequency numberOfTaps) 78 | ~decimation:4 79 | |> fmDemodulator samplingRate maxAmplitude bandwidth 80 | |> iter F32.print 81 | let lp1_dem = Sit_experiments.(Sit_emulator.run_filter lp1 (get_floats numLen) |> Sit_emulator.run_filter dem) 82 | let () = check_float_list ~name:"lp1_dem" lp1_dem' lp1_dem 83 | 84 | let fmradio' = 85 | get_floats 86 | |> take C.(int numLen) 87 | |> fir_filter (Fir.lowPassFilter samplingRate cutoffFrequency numberOfTaps) 88 | ~decimation:4 89 | |> fmDemodulator samplingRate maxAmplitude bandwidth 90 | |> fir_filter (Fir.equalizer samplingRate bands eqCutoff eqGain numberOfTaps) 91 | |> iter F32.print 92 | let fmradio = Sit_experiments.( 93 | Sit_emulator.run_filter lp1 (get_floats numLen) 94 | |> Sit_emulator.run_filter dem 95 | |> run_equalizer ~rate:samplingRate ~bands ~cutoffs:eqCutoff 96 | ~gains:eqGain ~taps:numberOfTaps 97 | ) 98 | let () = check_float_list ~name:"fmradio" fmradio' fmradio 99 | 100 | (* These are using 32-bit floats on C side (sit_experiments is OCaml and 101 | hence uses 64-bit floats) 102 | === Check lp1 === 103 | ave_abs_err: 0.00064825302581439034 104 | ave_rel_err: 0.00000012615939924911 105 | max abs err: 0.0057231 = 9368.08 - 9368.08, where i = 1868 106 | max rel err: 6.62518e-07 107 | === Check dem === 108 | ave_abs_err: 9.31369323043873720280 109 | ave_rel_err: 0.00000002760118218441 110 | max abs err: 34.1985 = 3.37496e+08 - 3.37496e+08, where i = 239 111 | max rel err: 1.0133e-07 112 | === Check bp1 === 113 | ave_abs_err: 0.00000023432995727028 114 | ave_rel_err: 0.00000010098027081450 115 | max abs err: 1.93291e-06 = 4.28914 - 4.28914, where i = 9190 116 | max rel err: 5.68127e-07 117 | === Check eq === 118 | ave_abs_err: 0.00000034109758354081 119 | ave_rel_err: 0.00000010387030184281 120 | max abs err: 2.89345e-06 = 6.11653 - 6.11653, where i = 9310 121 | max rel err: 5.2574e-07 122 | === Check lp1_dem === 123 | ave_abs_err: 9.25971163687060538905 124 | ave_rel_err: 0.00000002743621641624 125 | max abs err: 31.5895 = 3.37498e+08 - 3.37498e+08, where i = 65 126 | max rel err: 9.35989e-08 127 | === Check fmradio === 128 | ave_abs_err: 0.01671405524958003569 129 | ave_rel_err: 0.00000007563443211207 130 | max abs err: 0.0734369 = 220984 - 220984, where i = 34 131 | max rel err: 3.32318e-07 132 | *) 133 | 134 | let () = print_endline "filter test in C: All done" 135 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/play_main.c: -------------------------------------------------------------------------------- 1 | /* The main module for playback of Strymonas for GNU Radio 2 | processing 3 | 4 | The input still comes from the file (should be switched to 5 | HackRF later, then pipe from the HackRF application, e.g. 6 | $ hackrf_transfer -r - -f 82500000 -s 3072000 -g 30 -l 40 -a 0 7 | ) 8 | The output goes on stdout. Pipe to the play application, e.g. 9 | - $ aplay -t raw -f FLOAT_LE -r48000 10 | - $ aplay -t raw -f S16_LE -r48000 11 | - $ ffplay -autoexit -nodisp -f f32le -ar 48000 -ac 1 -i - 12 | - $ ffplay -autoexit -nodisp -f s16le -ar 48000 -ac 1 -i - 13 | 14 | Probably this file should be moved somewhere else... 15 | 16 | We assume that the generated file is "/tmp/generated.c" 17 | It should use "write_f32_le" for outputing. 18 | (in the future, consider "write_s16_le": makes the output smaller. 19 | But one need to set up a proper gain to hear something.) 20 | 21 | */ 22 | /* 23 | (Use gcc-13 in macOS) 24 | $ gcc -Ofast -march=native -W -Wall play_main.c 25 | $ ./a.out | ffplay -autoexit -nodisp -f f32le -ar 48000 -ac 1 -i - 26 | $ hackrf_transfer -r - -f 77100000 -s 3072000 -g 30 -l 40 -a 0 | ./a.out | ffplay -autoexit -nodisp -f f32le -ar 48000 -ac 1 -i - 27 | 28 | $ gcc -Ofast -mfpu=vfp -mfloat-abi=hard -march=armv6zk -mtune=arm1176jzf-s -W -Wall play_main.c 29 | $ ./a.out | aplay -t raw -f FLOAT_LE -r 48000 -c 1 -i - 30 | $ hackrf_transfer -r - -f 77100000 -s 3072000 -g 30 -l 40 -a 0 | ./a.out | aplay -t raw -f FLOAT_LE -r 48000 -c 1 -i - 31 | */ 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | // Read an array of short complex numbers from a given file. 39 | // arrlen on input should point to the desired size, or 0 40 | // In the latter case, we read the whole file, and store the array size 41 | // in that location. 42 | static float complex *load_file(const char* file_name, size_t * arrlen) { 43 | size_t desired_size = *arrlen; 44 | 45 | FILE *fp = fopen(file_name, "rb"); 46 | assert( fp != NULL ); 47 | 48 | if ( desired_size == 0 ) { 49 | struct stat stat; 50 | assert( fstat(fileno(fp),&stat) == 0); 51 | desired_size = stat.st_size / sizeof(float complex); 52 | } 53 | 54 | assert ( desired_size > 0 ); 55 | float complex *arr = 56 | (float complex *)malloc(sizeof(float complex) * desired_size); 57 | size_t const real_size = fread(arr, sizeof(float complex), desired_size, fp); 58 | if ( *arrlen > 0 ) 59 | { 60 | if (desired_size != real_size) 61 | perror("read error"), exit(1); 62 | } 63 | else *arrlen = real_size; 64 | 65 | fclose(fp); 66 | return arr; 67 | } 68 | 69 | #include 70 | #include 71 | #include 72 | 73 | // map the file as a short complex array 74 | // Incidentally, if we set MAP_POPULATE as the flag, the behevior is the 75 | // same as load_file 76 | static float complex *map_file(const char* file_name, size_t * arrlen) { 77 | size_t desired_size = *arrlen; 78 | 79 | int fd = open(file_name, O_RDONLY); 80 | assert( fd >= 0 ); 81 | 82 | if ( desired_size == 0 ) { 83 | struct stat stat; 84 | assert( fstat(fd,&stat) == 0); 85 | desired_size = stat.st_size / sizeof(float complex); 86 | } 87 | 88 | assert ( desired_size > 0 ); 89 | 90 | float complex * arr = 91 | (float complex *)mmap(NULL,desired_size*sizeof(float complex), 92 | PROT_READ,MAP_PRIVATE,fd,0); 93 | assert (arr != MAP_FAILED); 94 | close(fd); 95 | 96 | if ( *arrlen == 0 ) 97 | *arrlen = desired_size; 98 | return arr; 99 | } 100 | 101 | static FILE *global_fp; 102 | 103 | // prototype of the generated code 104 | void gr_fmradio(int const n_1,float complex * const a_2); 105 | void gr_fmradio_fread(void); 106 | 107 | int main(void) { 108 | size_t arr_size = 0; 109 | // float complex *arr = load_file("sps3072000_c32_30s.pcm",&arr_size); 110 | float complex *arr = map_file("sps3072000_c32_30s.pcm",&arr_size); 111 | fprintf(stderr,"\nTesting on the array of size %lu\n",arr_size); 112 | // Could do forking here, if really desired. OTH, leave it to shell 113 | gr_fmradio(arr_size,arr); 114 | // free(arr); if mapped, needs munpmap. Just leave it 115 | 116 | // global_fp = stdin; 117 | // gr_fmradio_fread(); 118 | } 119 | 120 | #include 121 | #include 122 | #include "fast_atanf.h" 123 | #ifndef CMPLXF 124 | #define CMPLXF(x, y) __builtin_complex ((float) (x), (float) (y)) 125 | #endif 126 | 127 | static inline void init_read(void) { 128 | fseek(global_fp, 0, SEEK_SET); 129 | } 130 | 131 | static inline int read_c32(float complex *arr, int const arr_size) { 132 | return fread(arr,sizeof(float complex),arr_size,global_fp); 133 | } 134 | 135 | static inline int read_ci8_in_c32(float complex *arr, int const arr_size) { 136 | int8_t buff[arr_size*2]; 137 | int const num = fread(buff, sizeof(int8_t), arr_size*2, global_fp)/2; 138 | int i; 139 | for (i = 0; i < arr_size; i += 1) 140 | { 141 | arr[i] = CMPLXF((float)buff[i*2] / 127., (float)buff[i*2 + 1] / 127.); 142 | } 143 | 144 | return num; 145 | } 146 | 147 | // write on the stdout 148 | static inline void write_s16_le(float const x) { 149 | int16_t const y = (int16_t)(x * 127); 150 | fwrite(&y, sizeof(y), 1, stdout); 151 | } 152 | 153 | static inline void write_f32_le(float const x) { 154 | fwrite(&x,sizeof(x),1,stdout); 155 | } 156 | 157 | #include "/tmp/generated.c" 158 | 159 | -------------------------------------------------------------------------------- /benchmarks/benchmark_c.ml: -------------------------------------------------------------------------------- 1 | (* Generating code for C benchmarks *) 2 | 3 | (* 4 | #directory "../lib";; 5 | #directory "../lib/backends/Trx";; 6 | #directory "../lib/backends/C";; 7 | #load "stream.cma";; 8 | *) 9 | 10 | module type cde_ex = module type of Cde_ex 11 | 12 | module CCodePV = struct 13 | include Pk_cde.Make(C_cde) 14 | 15 | let print_one_array : string -> Format.formatter -> 16 | 'a tbase -> ('a arr -> 'b stm) -> unit = fun name ppf tp body -> 17 | (arg_base tint @@ fun n -> 18 | arg_array n tp @@ fun a -> body a |> nullary_proc) 19 | |> pp_proc ~name ppf; 20 | Format.fprintf ppf "@." 21 | let print_two_array : string -> Format.formatter -> 'a tbase * 'b tbase -> 22 | ('a arr * 'b arr -> 'c stm) -> unit = fun name ppf (tp1,tp2) body -> 23 | (arg_base tint @@ fun n1 -> arg_array n1 tp1 @@ fun a1 -> 24 | arg_base tint @@ fun n2 -> arg_array n2 tp2 @@ fun a2 -> 25 | body (a1,a2) |> nullary_proc) 26 | |> pp_proc ~name ppf; 27 | Format.fprintf ppf "@." 28 | end 29 | 30 | module C = CCodePV 31 | module Raw = Stream_raw_fn.Make(C) 32 | open Stream_cooked_fn.Make_ex(C)(Raw) 33 | 34 | type byte = int (* element of an outer stream *) 35 | let byte_max = 255 36 | 37 | let encode : bool cstream -> byte cstream = fun st -> 38 | Raw.initializing_ref C.(int 0) @@ fun zeros_count -> 39 | st |> Raw.map_raw ~linear:false (fun el k -> 40 | let open C in 41 | letl (dref zeros_count) @@ fun zeros -> 42 | if_ el (seq (zeros_count := int 0) (k zeros)) @@ 43 | seq (zeros_count := zeros + int 1) @@ 44 | if1 (dref zeros_count = int byte_max) @@ 45 | seq (zeros_count := int 0) (k (int byte_max)) 46 | ) 47 | 48 | (* advanced and more interesting use of flat_map *) 49 | let decode : byte cstream -> bool cstream = fun st -> 50 | st |> flat_map (fun el -> 51 | Raw.pull_array C.(el + int 1) @@ fun i k -> 52 | let open C in 53 | if_ (i < el) (k (bool false)) 54 | (if1 (i < int byte_max) (k (bool true)))) 55 | 56 | let generate ppf = 57 | let () = C.print_one_array "sum" ppf C_cde.tint @@ fun arr -> 58 | of_arr arr 59 | |> sum_int_long 60 | in 61 | let () = C.print_one_array "sum_squares" ppf C_cde.tint @@ fun arr -> 62 | of_arr arr 63 | |> map C.(fun x -> x * x) 64 | |> sum_int_long 65 | in 66 | let () = C.print_one_array "maps" ppf C_cde.tint @@ fun arr -> 67 | of_arr arr 68 | |> map C.(fun x -> x * int 1) 69 | |> map C.(fun x -> x * int 2) 70 | |> map C.(fun x -> x * int 3) 71 | |> map C.(fun x -> x * int 4) 72 | |> map C.(fun x -> x * int 5) 73 | |> map C.(fun x -> x * int 6) 74 | |> map C.(fun x -> x * int 7) 75 | |> sum_int_long 76 | in 77 | let () = C.print_one_array "filters" ppf C_cde.tint @@ fun arr -> 78 | of_arr arr 79 | |> filter C.(fun x -> x > int 1) 80 | |> filter C.(fun x -> x > int 2) 81 | |> filter C.(fun x -> x > int 3) 82 | |> filter C.(fun x -> x > int 4) 83 | |> filter C.(fun x -> x > int 5) 84 | |> filter C.(fun x -> x > int 6) 85 | |> filter C.(fun x -> x > int 7) 86 | |> sum_int_long 87 | in 88 | let () = C.print_one_array "sum_squares_even" ppf C_cde.tint @@ fun arr -> 89 | of_arr arr 90 | |> filter C.(fun x -> x mod (int 2) = int 0) 91 | |> map C.(fun x -> x * x) 92 | |> sum_int_long 93 | in 94 | let () = C.print_two_array "cart" ppf (C_cde.tint,C_cde.tint) @@ 95 | fun (arr1,arr2) -> 96 | of_arr arr1 97 | |> flat_map (fun x -> of_arr arr2 |> map C.(fun y -> x * y)) 98 | |> sum_int_long 99 | in 100 | let () = C.print_two_array "dot_product" ppf (C_cde.tint,C_cde.tint) @@ 101 | fun (arr1,arr2)-> 102 | zip_with C.( * ) (of_arr arr1) (of_arr arr2) 103 | |> sum_int_long 104 | in 105 | let () = C.print_two_array "flatmap_after_zipwith" ppf 106 | (C_cde.tint,C_cde.tint) @@ 107 | fun (arr1,arr2) -> 108 | zip_with C.( + ) (of_arr arr1) (of_arr arr1) 109 | |> flat_map (fun x -> of_arr arr2|> map C.(fun el -> el + x)) 110 | |> sum_int_long 111 | in 112 | let () = C.print_two_array "zipwith_after_flatmap" ppf 113 | (C_cde.tint,C_cde.tint) @@ fun (arr1,arr2) -> 114 | of_arr arr1 115 | |> flat_map (fun x -> of_arr arr2 |> map C.(fun y -> y + x)) 116 | |> zip_with C.( + ) (of_arr arr1) 117 | |> sum_int_long 118 | in 119 | let () = C.print_two_array "flat_map_take" ppf (C_cde.tint,C_cde.tint) @@ 120 | fun (arr1,arr2)-> 121 | of_arr arr1 122 | |> flat_map (fun x -> of_arr arr2 |> map C.(fun y -> x * y)) 123 | |> take (C.int 20_000_000) 124 | |> sum_int_long 125 | in 126 | let () = C.print_two_array "zip_filter_filter" ppf 127 | (C_cde.tint,C_cde.tint) @@ fun (arr1,arr2) -> 128 | zip_with C.( + ) 129 | (of_arr arr1 |> filter C.(fun x -> x > int 7)) 130 | (of_arr arr2 |> filter C.(fun x -> x > int 5)) 131 | |> sum_int_long 132 | in 133 | let () = C.print_two_array "zip_flat_flat" ppf (C_cde.tint,C_cde.tint) @@ 134 | fun (arr1,arr2) -> 135 | zip_with C.( + ) 136 | (of_arr arr1 |> 137 | flat_map (fun x -> of_arr arr2 |> map C.(fun y -> x * y))) 138 | (of_arr arr2 |> 139 | flat_map (fun x -> of_arr arr1 |> map C.(fun y -> x - y))) 140 | |> take (C.int 200_000_000) 141 | |> sum_int_long 142 | in 143 | let () = C.print_two_array "decoding" ppf (C_cde.tint,C_cde.tint) @@ 144 | fun (arr1,arr2) -> 145 | zip_with C.(||) (of_arr arr1 |> decode) (of_arr arr2 |> decode) 146 | |> map C.(fun x -> cond x (int 1) (int 0)) 147 | |> sum_int_long 148 | in 149 | () 150 | 151 | let () = generate Format.std_formatter 152 | 153 | let () = 154 | let code_file = "/tmp/bench.c" in 155 | let c = open_out code_file in 156 | let ppf = Format.formatter_of_out_channel c in 157 | generate ppf; 158 | close_out c; 159 | Printf.printf "\nGenerated %s\n" code_file 160 | 161 | ;; 162 | -------------------------------------------------------------------------------- /examples/gnuradio-fm/stream_gnuradio_sdr_fn.ml: -------------------------------------------------------------------------------- 1 | module type cde_ex = module type of Backends.C 2 | module type stream_raw = module type of Stream_raw 3 | 4 | module Make(C: cde_ex)(Raw: stream_raw with 5 | type 'a exp = 'a C.exp and 6 | type 'a stm = 'a C.stm and 7 | type 'a mut = 'a C.mut and 8 | type 'a arr = 'a C.arr and 9 | type 'a tbase = 'a C.tbase) = struct 10 | type 'a exp = 'a C.exp 11 | type 'a stm = 'a C.stm 12 | type 'a mut = 'a C.mut 13 | type 'a arr = 'a C.arr 14 | type 'a tbase = 'a C.tbase 15 | type 'a stream = 'a Raw.stream 16 | type 'a cstream = 'a exp stream 17 | open Raw 18 | 19 | module F32 = C.F32 20 | module C32 = C.C32 21 | module Cook = Stream_cooked_fn.Make_ex(C)(Raw) 22 | module Window = Window_fn.Make(C)(Raw) 23 | 24 | let ( let- ) c k = c k 25 | 26 | (* Left-to-right function composition *) 27 | let (>>) f g = fun x -> f x |> g 28 | 29 | (* useful FFI functions. Should probably be moved to somewhere else *) 30 | let fast_atan2f : (F32.t C.exp -> F32.t C.exp -> F32.t C.exp) C.ff = 31 | let ff = 32 | let open C_cde in 33 | let module I = OffshoringIR in 34 | F32.{invoke = binary_op (tbase,tbase,I.OP.name "fast_atan2f",tbase)} 35 | in 36 | C.{invoke = inj2 ff.invoke} 37 | 38 | let faster_atan2f : (F32.t C.exp -> F32.t C.exp -> F32.t C.exp) C.ff = 39 | let ff = 40 | let open C_cde in 41 | let module I = OffshoringIR in 42 | F32.{invoke = binary_op (tbase,tbase,I.OP.name "faster_atan2f",tbase)} 43 | in 44 | C.{invoke = inj2 ff.invoke} 45 | 46 | let write_s16_le : (F32.t C.exp -> unit C.stm) C.ff = 47 | let ff = 48 | let open C_cde in 49 | let module I = OffshoringIR in 50 | {invoke = fun (_,e) -> stmt_app (I.OP.name "write_s16_le") [e]} 51 | in 52 | {invoke = C.dyn >> ff.invoke >> C.inj_stm } 53 | 54 | let write_f32_le : (F32.t C.exp -> unit C.stm) C.ff = 55 | let ff = 56 | let open C_cde in 57 | let module I = OffshoringIR in 58 | {invoke = fun (_,e) -> stmt_app (I.OP.name "write_f32_le") [e]} 59 | in 60 | {invoke = C.dyn >> ff.invoke >> C.inj_stm } 61 | 62 | let null_func : (F32.t C.exp -> unit C.stm) C.ff = 63 | let ff = 64 | let open C_cde in 65 | let module I = OffshoringIR in 66 | {invoke = fun (_,e) -> stmt_app (I.OP.name "null_func") [e]} 67 | in 68 | {invoke = C.dyn >> ff.invoke >> C.inj_stm } 69 | 70 | let init_read : (unit -> unit C.stm) C.ff = 71 | let ff = 72 | let open C_cde in 73 | let module I = OffshoringIR in 74 | {invoke = fun () -> stmt_app (I.OP.name "init_read") []} 75 | in 76 | {invoke = fun () -> C.inj_stm @@ ff.invoke ()} 77 | 78 | let read_c32 : (C32.t C.arr -> int C.exp) C.ff = 79 | let ff = 80 | let open C_cde in 81 | let module I = OffshoringIR in 82 | {invoke = fun (_,(_,n),a) -> exp_app tint (I.OP.name "read_c32") [LocalVar a;n]} 83 | in 84 | {invoke = (fun (_,arr) -> arr) >> ff.invoke >> C.inj} 85 | 86 | let read_ci8_in_c32 : (C32.t C.arr -> int C.exp) C.ff = 87 | let ff = 88 | let open C_cde in 89 | let module I = OffshoringIR in 90 | {invoke = fun (_,(_,n),a) -> exp_app tint (I.OP.name "read_ci8_in_c32") [LocalVar a;n]} 91 | in 92 | {invoke = (fun (_,arr) -> arr) >> ff.invoke >> C.inj} 93 | 94 | let file_read_c32 n = 95 | let dummy_exp = C.int 0 in 96 | let- buff = Raw.initializing_uarr C32.tbase n in 97 | let- size = Raw.initializing_ref C.(int 1) in 98 | Raw.infinite C.(fun k -> (size := read_c32.invoke buff) @. k dummy_exp) 99 | |> Raw.guard C.(Raw.GExp (dref size > int 0)) 100 | |> Raw.flat_map_raw C.(fun _ -> 101 | Raw.pull_array (dref size) (array_get buff) 102 | ) 103 | 104 | let file_read_ci8_in_c32 n = 105 | let dummy_exp = C.int 0 in 106 | let- buff = Raw.initializing_uarr C32.tbase n in 107 | let- size = Raw.initializing_ref C.(int 1) in 108 | Raw.infinite C.(fun k -> (size := read_ci8_in_c32.invoke buff) @. k dummy_exp) 109 | |> Raw.guard C.(Raw.GExp (dref size > int 0)) 110 | |> Raw.flat_map_raw C.(fun _ -> 111 | Raw.pull_array (dref size) (array_get buff) 112 | ) 113 | 114 | 115 | (* Transformers *) 116 | (** fir_filter_ABC: A=input, B=output, C=weights *) 117 | let fir_filter_fff ?(decimation=1) : 118 | float array -> F32.t cstream -> F32.t cstream = 119 | fun weights st -> 120 | let ntaps = Array.length weights in 121 | let (module Win) = Window.make_window F32.tbase ntaps decimation in 122 | st 123 | |> Win.make_stream ~padding:true 124 | |> map_raw F32.(Win.dot tbase lit weights ( +. ) ( *. )) 125 | 126 | let fir_filter_ccf ?(decimation=1) : 127 | float array -> C32.t cstream -> C32.t cstream = 128 | fun weights st -> 129 | let ntaps = Array.length weights in 130 | let (module Win) = Window.make_window C32.tbase ntaps decimation in 131 | st 132 | |> Win.make_stream ~padding:true 133 | |> map_raw F32.(Win.dot tbase lit weights C32.( +. ) C32.scale) 134 | 135 | let monauralize : (F32.t exp * F32.t exp) stream -> F32.t cstream = 136 | map_raw' F32.(fun (ch1,ch2) -> F32.lit 0.5 *. (ch1 +. ch2)) 137 | 138 | (** The Quadrature Demod blocks: 139 | - https://github.com/gnuradio/gnuradio/blob/master/gr-analog/lib/quadrature_demod_cf_impl.cc#L42 140 | - https://wiki.gnuradio.org/index.php/Quadrature_Demod 141 | 142 | FM demodulation of quadrature-demodulated signal (I/Q components) *) 143 | let demod_quad_fm (g : float) : C32.t cstream -> F32.t cstream = 144 | let (module Win) = Window.make_window C32.tbase 2 1 in 145 | Win.make_stream 146 | >> map_raw C32.(Win.reduce (fun t0 tprev -> t0 *. conj tprev)) 147 | >> Cook.map F32.(fun e -> 148 | lit g *. fast_atan2f.invoke C32.(imag e) C32.(real e)) 149 | 150 | let demod_quad_fm_faster (g : float) : C32.t cstream -> F32.t cstream = 151 | let (module Win) = Window.make_window C32.tbase 2 1 in 152 | Win.make_stream 153 | >> map_raw C32.(Win.reduce (fun t0 tprev -> t0 *. conj tprev)) 154 | >> Cook.map F32.(fun e -> 155 | lit g *. faster_atan2f.invoke C32.(imag e) C32.(real e)) 156 | end 157 | -------------------------------------------------------------------------------- /examples/streamit-fm/test.ml: -------------------------------------------------------------------------------- 1 | module C = Backends.MetaOCamlExt 2 | module F32 = C.F32 3 | module Raw = Stream_raw_fn.Make(C) 4 | open Stream_cooked_fn.Make(C) 5 | open Stream_streamit_sdr_fn.Make(C)(Raw) 6 | 7 | let check_identity_list = Util.check_identity_list 8 | 9 | let collect = fold C.(fun z x -> cons x z) C.(nil ()) 10 | 11 | open Parameters 12 | 13 | let numLen = 10_000 (* For the non-native OCaml *) 14 | (* let numLen = 100_000 *) (* XXX stack overflow *) 15 | 16 | (* Synthetic input stream used by StreamIt in its benchmarks *) 17 | let get_floats : F32.t cstream = 18 | iota C.(int 0) |> map F32.of_int 19 | 20 | (* Verify each operator *) 21 | let lp1' = 22 | get_floats 23 | |> take C.(int numLen) 24 | |> fir_filter (Fir.lowPassFilter samplingRate cutoffFrequency numberOfTaps) 25 | ~decimation:4 26 | |> collect 27 | |> C.run 28 | |> List.rev 29 | let lp1 = Sit_experiments.(Sit_emulator.run_filter lp1 (get_floats numLen)) 30 | let () = check_identity_list ~name:"lp1" lp1' lp1 31 | 32 | 33 | (* gain = 214859173.174: 34 | string_of_float limits the significant figures to 12 *) 35 | let dem' = 36 | get_floats 37 | |> take C.(int numLen) 38 | |> fmDemodulator samplingRate maxAmplitude bandwidth 39 | |> collect 40 | |> C.run 41 | |> List.rev 42 | (* gain = 214859173.1740587056... *) 43 | let dem = Sit_experiments.(Sit_emulator.run_filter dem (get_floats numLen)) 44 | let () = check_identity_list ~name:"dem" dem' dem 45 | 46 | let bp1' = 47 | get_floats 48 | |> take C.(int numLen) 49 | |> fir_filter (Fir.bandPassFilter samplingRate low high numberOfTaps) 50 | |> collect 51 | |> C.run 52 | |> List.rev 53 | let bp1 = Sit_experiments.(run_band_pass_filter ~rate:samplingRate ~low ~high 54 | ~taps:numberOfTaps (get_floats numLen)) 55 | let () = check_identity_list ~name:"bp1" bp1' bp1 56 | 57 | 58 | (* decreasing num_len: get stack overflow otherwise *) 59 | let numLen = 10_000 60 | let eq' = 61 | get_floats 62 | |> take C.(int numLen) 63 | |> fir_filter (Fir.equalizer samplingRate bands eqCutoff eqGain numberOfTaps) 64 | |> collect 65 | |> C.run 66 | |> List.rev 67 | let eq = Sit_experiments.(run_equalizer ~rate:samplingRate ~bands ~cutoffs:eqCutoff 68 | ~gains:eqGain ~taps:numberOfTaps (get_floats numLen)) 69 | let () = check_identity_list ~name:"eq" eq' eq 70 | 71 | 72 | (* Verify combined operators *) 73 | let lp1_dem' = 74 | get_floats 75 | |> take C.(int numLen) 76 | |> fir_filter (Fir.lowPassFilter samplingRate cutoffFrequency numberOfTaps) 77 | ~decimation:4 78 | |> fmDemodulator samplingRate maxAmplitude bandwidth 79 | |> collect 80 | |> C.run 81 | |> List.rev 82 | let lp1_dem = Sit_experiments.(Sit_emulator.run_filter lp1 (get_floats numLen) |> Sit_emulator.run_filter dem) 83 | let () = check_identity_list ~name:"lp1_dem" lp1_dem' lp1_dem 84 | 85 | let fmradio' = 86 | get_floats 87 | |> take C.(int numLen) 88 | |> fir_filter (Fir.lowPassFilter samplingRate cutoffFrequency numberOfTaps) 89 | ~decimation:4 90 | |> fmDemodulator samplingRate maxAmplitude bandwidth 91 | |> fir_filter (Fir.equalizer samplingRate bands eqCutoff eqGain numberOfTaps) 92 | |> collect 93 | |> C.run 94 | |> List.rev 95 | let fmradio = Sit_experiments.( 96 | Sit_emulator.run_filter lp1 (get_floats numLen) 97 | |> Sit_emulator.run_filter dem 98 | |> run_equalizer ~rate:samplingRate ~bands ~cutoffs:eqCutoff 99 | ~gains:eqGain ~taps:numberOfTaps 100 | ) 101 | let () = check_identity_list ~name:"fmradio" fmradio' fmradio 102 | 103 | (* 104 | === Check lp1 === 105 | ave_abs_err: 0.00000002404169125911 106 | ave_rel_err: 0.00000000000048102121 107 | max err: 4.81232e-08 = 99925 - 99925 at pt 19986, rel err 4.81593e-13 108 | === Check dem === 109 | ave_abs_err: 0.00009222083312799757 110 | ave_rel_err: 0.00000000000027325132 111 | max err: 9.2268e-05 = 3.37468e+08 - 3.37468e+08 at pt 81, rel err 2.73413e-13 112 | === Check bp1 === 113 | ave_abs_err: 0.00000000000069858133 114 | ave_rel_err: 0.00000000000003003906 115 | max err: 1.45661e-12 = 46.4401 - 46.4401 at pt 99813, rel err 3.13654e-14 116 | === Check eq === 117 | ave_abs_err: 0.00000000000002526018 118 | ave_rel_err: 0.00000000000000771608 119 | max err: 5.50671e-14 = 6.43475 - 6.43475 at pt 9796, rel err 8.55776e-15 120 | === Check lp1_dem === 121 | ave_abs_err: 0.00009222032439486730 122 | ave_rel_err: 0.00000000000027324596 123 | max err: 9.2268e-05 = 3.37478e+08 - 3.37478e+08 at pt 13, rel err 2.73404e-13 124 | === Check fmradio === 125 | ave_abs_err: 0.00000005868002334261 126 | ave_rel_err: 0.00000000000026553876 127 | max err: 5.89353e-08 = 220985 - 220985 at pt 1408, rel err 2.66694e-13 128 | 129 | With the new bandpass filter 130 | === Check bp1 === 131 | ave_abs_err: 0.00000000001599116183 132 | ave_rel_err: 0.00000000000068761710 133 | max err: 3.19957e-11 = 46.4777 - 46.4777 at pt 99894, rel err 6.8841e-13 134 | 135 | With new equalizer 136 | ave_abs_err: 0.00000000000021023768 137 | ave_rel_err: 0.00000000000006422420 138 | max err: 4.21885e-13 = 6.51332 - 6.51332 at pt 9916, rel err 6.47726e-14 139 | === Check fmradio === 140 | ave_abs_err: 0.00000007457992695117 141 | ave_rel_err: 0.00000000000033748898 142 | max err: 7.48259e-08 = 220985 - 220985 at pt 772, rel err 3.38602e-13 143 | 144 | *) 145 | 146 | let lpfs' = 147 | let open Fir in 148 | get_floats 149 | |> take C.(int numLen) 150 | |> fir_filter Fir.( 151 | seq (lowPassFilter samplingRate 138000000. numberOfTaps) 152 | (lowPassFilter samplingRate 108000000. numberOfTaps)) 153 | |> collect 154 | |> C.run 155 | |> List.rev 156 | 157 | let lpfs = 158 | let open Fir in 159 | get_floats 160 | |> take C.(int numLen) 161 | |> fir_filter (Fir.lowPassFilter samplingRate 138000000. numberOfTaps) 162 | |> fir_filter (Fir.lowPassFilter samplingRate 108000000. numberOfTaps) 163 | |> collect 164 | |> C.run 165 | |> List.rev 166 | 167 | let () = check_identity_list ~name:"lpfs" lpfs' lpfs 168 | (* 169 | === Check lpfs === 170 | ave_abs_err: 0.00000000000232766359 171 | ave_rel_err: 0.00000000000000045942 172 | max abs err: 2.36469e-11 = 8979.45 - 8979.45, where i = 8916 173 | max rel err: 2.63344e-15 174 | *) 175 | 176 | let () = print_endline "filter test in (non-native) OCaml: All done" 177 | -------------------------------------------------------------------------------- /lib/cde.mli: -------------------------------------------------------------------------------- 1 | (* Abstract interface for code generation 2 | 3 | Which code generation facilities we really need for this library 4 | 5 | This interface concerns the code generation used in strymonas 6 | itself. The `semantic actions' (user supplied code for mapping, etc) 7 | generally uses a richer interface: see exp_ex 8 | 9 | The object/target language is considered statement-based, 10 | distinguising expressions and statements (like C and many such 11 | languages do). 12 | For the justification of the design, see Generating C 13 | *) 14 | 15 | type 'a exp (* Abstract type of expressions *) 16 | 17 | type 'a tbase (* Base types *) 18 | val tbool : bool tbase (* More can/will be added later *) 19 | val tint : int tbase (* corresponds to C int, the `normal' 20 | or `fastest' int type. At least 21 | 32-bit (usually 32-bit) 22 | *) 23 | (* Backends may provide more base types 24 | unit is not a base type! 25 | In fact, there are no expressions 26 | of unit type 27 | *) 28 | 29 | type 'a stm (* Statements. May produce values: 30 | see return in C 31 | *) 32 | 33 | (* Local let, without movement. Only in statement context, like in C *) 34 | val letl : 'a exp -> (('a exp -> 'w stm) -> 'w stm) 35 | val glet : 'a exp -> 'a exp (* possibly let-insertion, at some 36 | higher place. That is, movement 37 | is OK 38 | *) 39 | 40 | val seq : unit stm -> 'a stm -> 'a stm 41 | val unit : unit stm 42 | 43 | val ret : 'a exp -> 'a stm (* Like return in C *) 44 | 45 | (* Booleans *) 46 | val bool : bool -> bool exp 47 | val not : bool exp -> bool exp 48 | val (&&) : bool exp -> bool exp -> bool exp (* shortcut eval *) 49 | val (||) : bool exp -> bool exp -> bool exp 50 | 51 | (* Integers *) 52 | val int : int -> int exp 53 | val ( + ) : int exp -> int exp -> int exp 54 | val ( - ) : int exp -> int exp -> int exp 55 | val (mod) : int exp -> int exp -> int exp 56 | val ( =) : int exp -> int exp -> bool exp 57 | val ( <) : int exp -> int exp -> bool exp 58 | val ( >) : int exp -> int exp -> bool exp 59 | val (<=) : int exp -> int exp -> bool exp 60 | val (>=) : int exp -> int exp -> bool exp 61 | 62 | (* It turns out that Stdlib.min is slow! It was responsible for 63 | the big slowdown in one of the benchmarks. It seems OCaml does not 64 | inline min and generates a function call instead. 65 | *) 66 | val imin : int exp -> int exp -> int exp 67 | 68 | 69 | (* Control operators *) 70 | (* Separate if_ expression from if-statement *) 71 | val cond : bool exp -> 'a exp -> 'a exp -> 'a exp 72 | val if_ : bool exp -> unit stm -> unit stm -> unit stm 73 | val if1 : bool exp -> unit stm -> unit stm 74 | 75 | val for_ : int exp -> (* exact lower bound *) 76 | upe:int exp -> (* least upper bound, *exclusive* *) 77 | ?guard:bool exp -> (* possibly a guard, terminate when false *) 78 | ?step:int exp -> (* step *) 79 | (int exp -> unit stm) -> unit stm 80 | 81 | val while_ : bool exp -> unit stm -> unit stm 82 | 83 | (* Loop with continue: execute the body, which may request to be 84 | re-executed (like C `continue') 85 | *) 86 | val cloop : 87 | ('a -> unit stm) -> (* cloop's continuation *) 88 | bool exp option -> (* possibly a guard. 89 | It is true at the beginning *) 90 | (('a -> unit stm) -> unit stm) -> (* body, in CPS, which may exit 91 | without calling its continuation, 92 | in which case the loop is re-executed 93 | provided the guard is still true. 94 | *) 95 | unit stm 96 | 97 | 98 | (* Mutable variables *) 99 | type 'a mut (* Mutable variables are NOT expressions *) 100 | val newref : 'a exp -> ('a mut -> 'w stm) -> 'w stm 101 | (* Create an *uninitialized* reference cell. The first argument is 102 | not an initial value: it is just a hint to get the type. 103 | Therefore, the first argument may be unusable as the value. 104 | A backend may still use some other value of the same type as 105 | the initial value. 106 | *) 107 | val newuref : 'a exp -> ('a mut -> 'w stm) -> 'w stm 108 | val dref : 'a mut -> 'a exp 109 | val (:=) : 'a mut -> 'a exp -> unit stm 110 | val incr : int mut -> unit stm 111 | val decr : int mut -> unit stm 112 | 113 | (* Arrays *) 114 | type 'a arr (* Arrays are not expressions *) 115 | (* XXX Later consider constant arrays, mutable arrays and mutable 116 | extensible arrays 117 | *) 118 | val array_get' : 'a arr -> int exp -> 'a exp 119 | (* It makes sense to combine it with letl *) 120 | val array_get : 'a arr -> int exp -> ('a exp -> 'w stm) -> 'w stm 121 | val array_len : 'a arr -> int exp 122 | val array_set : 'a arr -> int exp -> 'a exp -> unit stm 123 | (* initialized non-empty array, immediately bound to a variable. 124 | Must be of a base type 125 | *) 126 | val new_array : 'a tbase -> 'a exp array -> ('a arr -> 'w stm) -> 'w stm 127 | (* Array of the statically known size with statically known elements 128 | It should not be modified. If possible, it is allocated in the DATA 129 | segment 130 | *) 131 | val new_static_array : 'a tbase -> ('b -> 'a exp) -> 'b array -> 132 | ('a arr -> 'w stm) -> 'w stm 133 | (* new uninitialized array, of the base type 134 | *) 135 | val new_uarray : 'a tbase -> int -> ('a arr -> 'w stm) -> 'w stm 136 | 137 | (* Inquiries. They are best effort *) 138 | (* Is value statically known: known at code-generation time *) 139 | (* It may always return false *) 140 | val is_static : 'a exp -> bool 141 | (* Is value dependent on something introduced by the library itself? 142 | For example, dependent on letl identifier, etc. 143 | It may always return true. 144 | *) 145 | val is_fully_dynamic : 'a exp -> bool 146 | -------------------------------------------------------------------------------- /lib/pk_coll.ml: -------------------------------------------------------------------------------- 1 | (* Strymonas also allows streams that carry not mere target 2 | code values but also composite values such as tuples, whose 3 | structure is known at code-generation time. Such values should be 4 | accompanied by a descriptor. 5 | 6 | *) 7 | 8 | module type cde = module type of Cde 9 | 10 | module type desc = sig 11 | type 'a exp 12 | type 'a stm 13 | type 'a mut 14 | (* The two arguments of (coll,collref) desc are: 15 | coll: the type of the collection 16 | collref : the type to the mutable reference collection 17 | Currently we do not provide mapping between collections. 18 | If we do provide it, we need an index type: see below for an example. 19 | *) 20 | type (_,_) desc = 21 | | Single : ('a exp, 'a mut) desc 22 | | Tuple : ('a exp * 'b exp, 'a mut * 'b mut) desc 23 | (* can be extended and generalized to triples, etc. and pairs *) 24 | end 25 | 26 | module Desc(C:cde) = struct 27 | type 'a exp = 'a C.exp 28 | type 'a stm = 'a C.stm 29 | type 'a mut = 'a C.mut 30 | type (_,_) desc = 31 | | Single : ('a exp, 'a mut) desc 32 | | Tuple : ('a exp * 'b exp, 'a mut * 'b mut) desc 33 | let newref : type c cref w. 34 | (c,cref) desc -> c -> (cref -> w stm) -> w stm = 35 | fun d c k -> 36 | match d with 37 | | Single -> C.newref c k 38 | | Tuple -> 39 | let (x,y) = c in 40 | C.newref x @@ fun xr -> 41 | C.newref y @@ fun yr -> 42 | k (xr,yr) 43 | let dref : type c cref. (c,cref) desc -> cref -> c = fun d cr -> 44 | match d with 45 | | Single -> C.dref cr 46 | | Tuple -> 47 | let (xr,yr) = cr in 48 | C.(dref xr,dref yr) 49 | let set : type c cref. (c,cref) desc -> cref -> c -> unit stm = fun d cr c -> 50 | match d with 51 | | Single -> C.(cr := c) 52 | | Tuple -> 53 | let (xr,yr) = cr and (x,y) = c in 54 | C.(seq (xr := x) (yr := y)) 55 | 56 | (* As for array, this interface prefer multiple host arrays to arrays of structs in SQL term. 57 | ** - https://docs.oracle.com/en/database/oracle/oracle-database/21/lnpcc/host-arrays.html#GUID-861203A7-309A-4A33-A6B8-89B10B36E0C4 58 | ** - https://stackoverflow.com/questions/17924705/structure-of-arrays-vs-array-of-structures 59 | *) 60 | end 61 | 62 | 63 | (* 64 | module type coll = sig 65 | type idx (* representation of coll's content *) 66 | module C : cde 67 | type 'a cde = 'a C.cde 68 | type coll 69 | type collref 70 | val newref : coll -> (collref -> 'w cde) -> 'w cde 71 | val dref : collref -> coll 72 | val dset : collref -> coll -> unit cde 73 | end 74 | 75 | type ('i,'c) desc = (module (coll with type idx='i and type coll='c)) 76 | 77 | module Single(C:cde)(S:sig type idx end) : 78 | (coll with module C=C and type idx=S.idx and type coll=S.idx C.cde) = struct 79 | type idx = S.idx 80 | module C = C 81 | type 'a cde = 'a C.cde 82 | type coll = idx cde 83 | type collref = idx ref cde 84 | let newref : coll -> (collref -> 'w cde) -> 'w cde = C.newref 85 | let dref : collref -> coll = C.dref 86 | let dset : collref -> coll -> unit cde = C.(:=) 87 | end 88 | 89 | module Tuple(C:cde)(S:sig type i1 type i2 end) : 90 | (coll with module C=C and type idx=S.i1 * S.i2 and 91 | type coll= S.i1 C.cde * S.i2 C.cde) = struct 92 | type idx = S.i1 * S.i2 93 | module C = C 94 | type 'a cde = 'a C.cde 95 | type coll = S.i1 cde * S.i2 cde 96 | type collref = S.i1 ref C.cde * S.i2 ref C.cde 97 | let newref : coll -> (collref -> 'w cde) -> 'w cde = fun (x,y) k -> 98 | C.newref x @@ fun xr -> 99 | C.newref y @@ fun yr -> 100 | k (xr,yr) 101 | let dref : collref -> coll = fun (xr,yr) -> (C.dref xr, C.dref yr) 102 | let dset : collref -> coll -> unit cde = fun (xr,yr) (x,y) -> 103 | C.(seq (xr := x) (yr := y)) 104 | end 105 | 106 | module Desc(C:cde) = struct 107 | let single : type i. unit -> (i,i C.cde) desc = fun () -> 108 | let module M = Single(C)(struct type idx=i end) in 109 | (module M) 110 | let tuple : type ix1 ix2. unit -> (ix1*ix2,ix1 C.cde * ix2 C.cde) desc = 111 | fun () -> 112 | let module M = Tuple(C)(struct type i1=ix1 type i2=ix2 end) in 113 | (module M) 114 | end 115 | 116 | *) 117 | 118 | (* A different approach, reminiscent of HList 119 | The descriptor is bundled with the value 120 | type 'a single 121 | module type coll = sig 122 | type 'a el 123 | type (_,_) desc = 124 | | Unit : (unit,unit) desc 125 | | One : ('a single,'a el) desc 126 | | Tuple : ('a,'a1) desc -> ('b * 'a, 'b el * 'a1) desc 127 | type 'a coll = Coll : ('a,'e) desc * 'e -> 'a coll 128 | val unit : unit coll 129 | val single : 'a el -> 'a single coll 130 | val from_single : 'a single coll -> 'a el 131 | val ascribe : ('a,'e) desc -> 'e -> 'a coll 132 | end 133 | 134 | module Coll(E: sig type 'a t end) = struct 135 | type 'a el = 'a E.t 136 | type (_,_) desc = 137 | | Unit : (unit,unit) desc 138 | | One : ('a single,'a el) desc 139 | | Tuple : ('a,'a1) desc -> ('b * 'a, 'b el * 'a1) desc 140 | type 'a coll = Coll : ('a,'e) desc * 'e -> 'a coll 141 | let unit : unit coll = Coll (Unit,()) 142 | let single : 'a el -> 'a single coll = fun x -> Coll (One,x) 143 | let from_single : type a. a single coll -> a el = function Coll (One,x) -> x 144 | let ascribe : ('a,'e) desc -> 'e -> 'a coll = fun d x -> Coll (d,x) 145 | end 146 | 147 | (* Mapping, between element representations. Element type _index_ 148 | should be preserved though (which means that the length is also 149 | preserved) 150 | *) 151 | module CMAP(S:coll)(T:coll) = struct 152 | type ftor = {f: 'a. 'a S.el -> 'a T.el} 153 | type 'w ftor_cps = {fcps: 'a. 'a S.el -> ('a T.el -> 'w) -> 'w} 154 | let rec map : type a. ftor -> a S.coll -> a T.coll = fun {f} -> function 155 | | S.Coll (Unit,_) -> T.Coll (Unit,()) 156 | | S.Coll (One,x) -> T.Coll (One,f x) 157 | | S.Coll (Tuple d,(h,t)) -> 158 | let T.Coll (d',t') = map {f} (S.Coll (d,t)) in 159 | T.Coll (Tuple d', (f h, t')) 160 | let rec map_cps : type a w. 161 | w ftor_cps -> a S.coll -> (a T.coll -> w) -> w = fun {fcps} sc k -> 162 | match sc with 163 | | S.Coll (Unit,_) -> k @@ T.Coll (Unit,()) 164 | | S.Coll (One,x) -> fcps x @@ fun x' -> k @@ T.Coll (One,x') 165 | | S.Coll (Tuple d,(h,t)) -> 166 | fcps h @@ fun h' -> 167 | map_cps {fcps} (S.Coll (d,t)) @@ function T.Coll (d',t') -> 168 | k @@ T.Coll (Tuple d', (h', t')) 169 | end 170 | *) 171 | 172 | 173 | -------------------------------------------------------------------------------- /lib/stream_raw.mli: -------------------------------------------------------------------------------- 1 | (* Stream lower-level interface *) 2 | 3 | type 'a exp (* Abstract type of code expressions *) 4 | type 'a stm (* Abstract type of statements *) 5 | type 'a mut (* Mutable variables *) 6 | type 'a arr (* Arrays *) 7 | type 'a tbase (* Base types *) 8 | type 'a stream (* Here, 'a is not necessarily the code type! *) 9 | 10 | (* Producer of values. When evaluated repeatedly, may produce different values. 11 | This is the primitive of initalizers and loops. 12 | The received continuation must be invoked exactly once. 13 | FYI, the continuation can be thought of as a kind of the yield keyword in generators 14 | from the point of view of the interface (although it is not correct). 15 | *) 16 | type 'a emit = ('a -> unit stm) -> unit stm 17 | 18 | (* The termination condition: true to continue, false to terminate the stream. 19 | It should be cheap to evaluate. 20 | It may depend on the stream state (introduced by initalizing_ref etc. 21 | below), but it must not change state. Furthermore, it must satisfy 22 | one of the following side-conditions: 23 | (1) the state used by the termination condition should not be modified 24 | by map_raw, filter or flat_map operations further down the pipeline. 25 | The simplest way to ensure that is to make all reference cells used by the 26 | termination condition private and not available to further operations. 27 | (for an example, see the implementation of from_to on stream_cooked_fn.ml). 28 | To say it differently, if map/filter/flatmap need state, it should allocate 29 | (using initalizing_ref, etc.) private state for itself and not interfere 30 | in other operations state. 31 | (2) At times, it is necessary for the termination condition and 32 | the mapping function to share state and for the mapping to modify it. 33 | (example: take_while in stream_cooked_fn.ml). In that case, the following 34 | conditions must be preserved: 35 | (a) if the termination condition evaluates to false, no further changes 36 | in the state should turn it to true. 37 | (b) if the mapping function changes the state in such a way that 38 | a termination condition in a guard somewhere (either up or down the 39 | pipeline) becomes false, the mapping function must not produce 40 | an stream item (it should `skip': avoid invoking its continuation). 41 | It is easiest to ensure the preservation of these conditions if we 42 | allocate a fresh mutable boolean variable, initialized to true. 43 | If a mapping/filtering function wants to signal the termination, 44 | it should assign it false (and `skip'). That is, assignments 45 | to such mutable varioable should be `monotone': any new value should be 46 | ANDed with the existing value in this mutable cell. 47 | *) 48 | type goon = 49 | | GTrue (* constant true *) 50 | | GExp of bool exp (* should be cheap to evaluate *) 51 | | GRef of bool mut (* a boolean flag *) 52 | 53 | (* Producers *) 54 | (* The index expression of pull array should do let-insertion! 55 | pull_array len idx: 0 <= idx < len *) 56 | val pull_array : int exp -> (int exp -> 'a emit) -> 'a stream 57 | 58 | (* Initializers: sort-of let-expressions. They introduce stream state. 59 | They are also a sort of a flat_map 60 | *) 61 | (* The following introduce local stream variables, so to speak, 62 | both mutable and immutable. 63 | Nested streams may have to be `closure converted' (in complicated 64 | zips), therefore, all local variables, in particular, 65 | variables whose bound expressions depend on the current element of 66 | the outer stream, have to be declared using initializing... below. 67 | The bound/initializing expression is evaluated only once, 68 | when the stream starts (that is, before the first element is emitted). 69 | *) 70 | 71 | (* Essentially let-insertion. The initializing expression may be stateful *) 72 | val initializing : 'z exp -> ('z exp -> 'a stream) -> 'a stream 73 | (* Mutable state with the given initial value *) 74 | val initializing_ref : 'z exp -> ('z mut -> 'a stream) -> 'a stream 75 | (* A *non-empty* array with a statically known, and 76 | preferably rather small size. It is generally mutable. 77 | For immutable (parameters), consider initializing_static_arr 78 | *) 79 | val initializing_arr : 'z tbase -> 'z exp array -> 80 | ('z arr -> 'a stream) -> 'a stream 81 | (* A *non-empty* array with a statically known content. 82 | It should not be mutated *) 83 | val initializing_static_arr : 'z tbase -> ('b -> 'z exp) -> 'b array -> 84 | ('z arr -> 'a stream) -> 'a stream 85 | (* An uninitialized array of the given size. The first argument is 86 | is the type descriptor. 87 | *) 88 | val initializing_uarr : 'z tbase -> int -> 89 | ('z arr -> 'a stream) -> 'a stream 90 | 91 | 92 | (* Create an infinite stream: run step in an infinite loop *) 93 | val infinite : 'a emit -> 'a stream 94 | 95 | (* Consumer: the inverse of [infinite] *) 96 | val iter : ('a -> unit stm) -> 'a stream -> unit stm 97 | 98 | 99 | (* Transformers *) 100 | (* map_raw is assured to be applied in order. So, it is actually 101 | an accumulating map_filter. 102 | It could maintain its own state. It must not, however, change the state 103 | used by guard. 104 | *) 105 | (* The optional argument, ?linear, tells if the transformer is 106 | linear -- that is, if the continuation ('b -> unit cde) is invoked 107 | exactly once. 108 | By default it is true. 109 | If the continuation ends up not being invoked in some cases 110 | (that is, map_raw behaves like map_option), be sure specify 111 | ~linear:false! 112 | On no occasion should continuation be invoked multiple times! 113 | (because we expect goon to be evaluated once each time 114 | before calling the continuation) 115 | *) 116 | val map_raw : ?linear:bool -> ('a -> 'b emit) -> 'a stream -> 'b stream 117 | val map_raw' : ('a -> 'b) -> 'a stream -> 'b stream 118 | 119 | (* Essentially, take_while *) 120 | val guard : goon -> 'a stream -> 'a stream 121 | 122 | (* Although filter_raw is a particular form of map_raw, it is worth 123 | providing on its own: the filter predicates fuse better. 124 | Also, filter is rather common. 125 | *) 126 | val filter_raw : ('a -> bool exp) -> 'a stream -> 'a stream 127 | 128 | val flat_map_raw : ('a exp -> 'b stream) -> 'a exp stream -> 'b stream 129 | 130 | val zip_raw : 'a stream -> 'b stream -> ('a * 'b) stream 131 | -------------------------------------------------------------------------------- /benchmarks/benchmark_batteries.ml: -------------------------------------------------------------------------------- 1 | module Batteries = struct 2 | open BatEnum 3 | type 'a cde = 'a code 4 | type 'a stream_raw = 'a t 5 | type 'a stream = 'a t cde 6 | 7 | let lift_tr1 : (('a -> 'b ) -> 'a stream_raw -> 'c stream_raw) cde 8 | -> ('a cde -> 'b cde) -> 'a stream -> 'c stream = 9 | fun tr f st -> .<.~tr (fun x -> .~(f ..)) .~st>. 10 | 11 | (* let lift_tr2 : (('a -> 'b -> 'c) -> ('a stream_raw -> 'b stream_raw -> 'c stream_raw) )cde 12 | -> ('a cde -> 'b cde -> 'c cde) -> 'a stream -> 'b stream -> 'c stream = 13 | fun tr f st1 st2 -> .<.~tr (fun x y -> .~(f .. ..)) .~st1 .~st2>. *) 14 | 15 | 16 | let of_arr : 'a array cde -> 'a stream = fun x -> .. 17 | 18 | let fold : ('z cde -> 'a cde -> 'z cde) -> 'z cde -> 'a stream -> 'z cde = 19 | fun f z st -> . .~(f .. ..)) .~z .~st>. 20 | 21 | let map : ('a cde -> 'b cde) -> 'a stream -> 'b stream = 22 | fun f st -> lift_tr1 .. f st 23 | 24 | let flat_map : ('a cde -> 'b stream) -> 'a stream -> 'b stream = 25 | fun f st -> lift_tr1 .. f st 26 | 27 | let filter : ('a cde -> bool cde) -> 'a stream -> 'a stream = 28 | fun f st -> lift_tr1 .. f st 29 | 30 | let take : int cde -> 'a stream -> 'a stream = 31 | fun n st -> .. 32 | 33 | let zip_with : ('a cde -> 'b cde -> 'c cde) -> ('a stream -> 'b stream -> 'c stream) = 34 | fun f st1 st2 -> 35 | . .~(f .. ..)) (.~st1 |> BatStream.of_enum) (.~st2 |> BatStream.of_enum) 36 | |> BatStream.enum>. 37 | 38 | type byte = int 39 | let byte_max = 255 40 | let decode = fun st -> 41 | st |> flat_map (fun el -> .< 42 | unfold 0 (fun i -> 43 | if i < .~el then Some (false, i + 1) 44 | else ( 45 | if i > .~el then None 46 | else ( 47 | if i < byte_max then Some (true, i + 1) 48 | else failwith "wrong sequence" 49 | ) 50 | ) 51 | )>.) 52 | end 53 | 54 | 55 | module Benchmark_batteries = struct 56 | open Benchmark_types 57 | open Benchmark 58 | open Benchmark_abstract.Benchmark(Benchmark_abstract.CodeBasic)(Batteries) 59 | 60 | (* let maps_array = fun src -> .< 61 | .~src 62 | |> BatArray.map(fun x -> x * 1) 63 | |> BatArray.map(fun x -> x * 2) 64 | |> BatArray.map(fun x -> x * 3) 65 | |> BatArray.map(fun x -> x * 4) 66 | |> BatArray.map(fun x -> x * 5) 67 | |> BatArray.map(fun x -> x * 6) 68 | |> BatArray.map(fun x -> x * 7) 69 | |> BatArray.fold_left (fun z a -> z + a) 0 >. 70 | 71 | let filters_array = fun src -> .< 72 | .~src 73 | |> BatArray.filter(fun x -> x > 1) 74 | |> BatArray.filter(fun x -> x > 2) 75 | |> BatArray.filter(fun x -> x > 3) 76 | |> BatArray.filter(fun x -> x > 4) 77 | |> BatArray.filter(fun x -> x > 5) 78 | |> BatArray.filter(fun x -> x > 6) 79 | |> BatArray.filter(fun x -> x > 7) 80 | |> BatArray.fold_left (fun z a -> z + a) 0 >. *) 81 | 82 | (* faster than normal dotProduct *) 83 | (* let dotProduct_enum = fun (src1, src2) -> .< 84 | BatEnum.fold2 (fun z x y -> z + x * y) 0 85 | (.~src1 |> BatArray.enum) 86 | (.~src2 |> BatArray.enum) >. *) 87 | 88 | 89 | (* Arrays used for benchmarking *) 90 | let v = .< Array.init 100_000_000 (fun i -> i mod 10) >.;; 91 | let vHi = .< Array.init 10_000_000 (fun i -> i mod 10) >.;; 92 | let vLo = .< Array.init 10 (fun i -> i mod 10) >.;; 93 | let vFaZ = .< Array.init 10_000 (fun i -> i) >.;; 94 | let vZaF = .< Array.init 10_000_000 (fun i -> i) >.;; 95 | 96 | let options = { 97 | repetitions = 20; 98 | final_f = (fun _ -> .<()>.); 99 | } 100 | 101 | let pr_int = {options with 102 | final_f = fun x -> ..} 103 | 104 | let check_int n = {options with 105 | final_f = fun x -> ..} 106 | 107 | let script =[| 108 | perfS "sum_batteries" v sum options; 109 | perfS "sumOfSquares_batteries" v sumOfSquares options; 110 | perfS "sumOfSquaresEven_batteries" v sumOfSquaresEven options; 111 | perfS "mapsMegamorphic_batteries" v maps options; 112 | (* perfS "mapsMegamorphicArray_batteries" v maps_array options; *) 113 | perfS "filtersMegamorphic_batteries" v filters options; 114 | (* perfS "filtersMegamorphicArray_batteries" v filters_array options; *) 115 | perfS2 "cart_batteries" vHi vLo cart options; 116 | (* perfS2 "dotProduct_enum_batteries" vHi vHi dotProduct_enum options; *) 117 | perfS2 "dotProduct_batteries" vHi vHi dotProduct options; 118 | perfS2 "flatMapAfterZip_batteries" vFaZ vFaZ flatMap_after_zipWith options; 119 | perfS2 "zipAfterFlatMap_batteries" vZaF vZaF zipWith_after_flatMap options; 120 | perfS2 "flatMapTake_batteries" vHi vLo flat_map_take options; 121 | perfS2 "zipFilterFilter_batteries" v vHi zip_filter_filter options; 122 | perfS2 "zipFlatMapFlatMap_batteries" v vLo zip_flat_flat options; 123 | perfS2 "runLengthDecoding_batteries" v v decoding options; 124 | |];; 125 | 126 | (* too slow *) 127 | let test = .< 128 | print_endline "Not checked yet"; 129 | assert (.~(sum v) == 450000000); 130 | assert (.~(sumOfSquares v) == 2850000000); 131 | assert (.~(sumOfSquaresEven v) == 1200000000); 132 | assert (.~(maps v) == 2268000000000); 133 | assert (.~(filters v) == 170000000); 134 | assert (.~(cart (vHi, vLo)) == 2025000000); 135 | assert (.~(dotProduct (vHi, vHi)) == 285000000); 136 | assert (.~(flatMap_after_zipWith (vFaZ, vFaZ)) == 1499850000000); 137 | assert (.~(zipWith_after_flatMap (vZaF, vZaF)) == 99999990000000); 138 | assert (.~(flat_map_take (vHi, vLo)) == 405000000); 139 | assert (.~(zip_filter_filter (v, vHi)) == 64000000); 140 | assert (.~(zip_flat_flat (v, vLo)) == 3250000000); 141 | assert (.~(decoding (v, v)) == 100000000); 142 | print_endline "All done" 143 | >. 144 | end 145 | 146 | module M = Benchmark_batteries 147 | 148 | let main () = 149 | let compiler = "ocamlfind ocamlopt -O2 -unsafe -nodynlink -package batteries -linkpkg util.cmx" in 150 | match Sys.argv with 151 | | [|_;"test"|] -> 152 | Benchmark.run_natively M.test 153 | ~compiler 154 | (* ~save:true *) 155 | | _ -> 156 | Benchmark.run_script M.script 157 | ~compiler 158 | 159 | let _ = main () 160 | -------------------------------------------------------------------------------- /lib/backends/C/offshoringIR.mli: -------------------------------------------------------------------------------- 1 | (* Offshoring IR 2 | 3 | (AST of the) simple imperative statement-oriented language: 4 | the target of MetaOCaml offshoring or tagless-final combinators 5 | The language is designed to be pretty-printable to C (or Fortran 6 | or other such language) -- in full, and easily 7 | 8 | For more details, see `Generating C' 9 | *) 10 | 11 | type float32 = private float 12 | 13 | val float32_of_float : float -> float32 14 | 15 | (* For-loop with a step, like the for-loop in C *) 16 | val forloop : 17 | int -> (* lower bound *) 18 | upe:int -> (* upper bound, exclusive, like in C *) 19 | step:int -> 20 | (int -> unit) -> (* body *) 21 | unit 22 | 23 | 24 | type numtyp = I32 | I64 | F32 | F64 | C32 | C64 (* Numeric types *) 25 | 26 | type typ = .. 27 | type typ += 28 | | TVoid (* No values of that type *) 29 | | TNum of numtyp 30 | | TBool 31 | | TChar 32 | | TArray1 of typ (* Usual array or Bigarray.Array1 *) 33 | | TArray2 of typ (* Bigarray.Array2 *) 34 | | TLenArray1 of int * typ (* Array of a known length *) 35 | | TRef of typ 36 | | TString 37 | 38 | type varname = private string 39 | (* Operations. Some are specified explicitly, for portability 40 | They follow the operations in C and C standard libraries. 41 | Many other languages (say, OCaml) support the same operations 42 | Many are indexed by type: cf WASM, where there is i32.add, i64.add, 43 | f32.add, f64.add 44 | *) 45 | module OP : sig 46 | type t = 47 | | ADD of numtyp | SUB of numtyp 48 | | MUL of numtyp | DIV of numtyp | MOD of numtyp 49 | | BAND of numtyp | BOR of numtyp | XOR of numtyp (* bitwise operations *) 50 | | SHL of numtyp | SHR of numtyp 51 | | EQ of numtyp | NE of numtyp 52 | | LT of numtyp | GT of numtyp | LE of numtyp | GE of numtyp 53 | | AND | OR (* these are used mostly internally. In IR, we create 54 | a special node for shortcut applications *) 55 | | NEG of numtyp | NOT | BNOT of numtyp 56 | | ASSIGN of typ 57 | | INCR of numtyp | DECR of numtyp (* increment/decrement *) 58 | | CAST of {from: numtyp; onto: numtyp} 59 | | Assert 60 | | DEREF of typ | REF of typ (* typ = type of content *) 61 | | Array1_get of typ | Array1_set of typ (* typ = array element type *) 62 | | Other of varname 63 | val name : string -> t 64 | end 65 | 66 | 67 | (* Numeric constants should be serialized, not to lose precision,etc 68 | The serialization format is C or OCaml numerals (the common part) 69 | *) 70 | type constant_t = (* no constants of void type! *) 71 | | Const_num of numtyp * string (* appropriately serialized *) 72 | | Const_bool of bool 73 | | Const_char of char 74 | | Const_string of string 75 | 76 | type attribute = .. 77 | type attribute += A_static | A_align of int 78 | 79 | (* A sequence, with easy concatenation *) 80 | 81 | module Sq : sig 82 | type 'a t 83 | val empty : 'a t 84 | val one : 'a -> 'a t 85 | val (@) : 'a t -> 'a t -> 'a t 86 | val iter : ('a -> unit) -> 'a t -> unit 87 | val all : ('a -> bool) -> 'a t -> bool 88 | val fold_right : ('a -> 'z -> 'z) -> 'z -> 'a t -> 'z 89 | val concat : 'a t list -> 'a t 90 | end 91 | 92 | type mutble = Mut | Cnst 93 | 94 | (* As the data type declaration make it clear, all let-bindings 95 | are `straightened-out': 96 | There are no nested let-bindings. 97 | Sequences are also straightened-out 98 | *) 99 | (* exp is a simple expression, _without_ any local bindings *) 100 | type exp = 101 | | Const of constant_t (* Constant/literal: int, bool,...*) 102 | | Array of exp list (* immediate array *) 103 | | LocalVar of varname (* Locally-bound variable *) 104 | | MutVar of varname (* Reference to a mutable var *) 105 | | GlobalVar of varname (* Global var,... *) 106 | | FunCall of OP.t * exp list (* Calls only to known identifiers *) 107 | | Cond of exp * exps * exps (* Conditional expression *) 108 | | And of exp * exps (* && *) 109 | | Or of exp * exps (* || *) 110 | (* Many contexts in C permit a comma-separated 111 | non-empty sequence of expressions: 112 | still with no local bindings. 113 | Such sequences are normally imperative, and we use them in 114 | if branches or while-tests, which may be executed 0 or more than 1 time. 115 | When we offshore e1 && e2, the local bindings in e1 may be lifted out; 116 | but not in e2. That's why the result of offshoring is denoted by 117 | And of exp * exps. Likewise, in while e do ... done, the local bindings 118 | from e cannot be lifted out. 119 | *) 120 | and exps = exp list 121 | (* A basic element of a flow-chart of sorts *) 122 | and stmt = 123 | | Exp of exp 124 | | If of exp * block * block option 125 | | While of exps * block 126 | (* Like in OCaml (but unlike C), init, upe, step are evaluated only once, 127 | in unspecified order 128 | upe is the exclusive upper-bound, like in C 129 | *) 130 | | For of {id: varname; ty:typ; 131 | guard: exp option; 132 | lwb: exp; upe: exp; step: exp; body: block} 133 | (* Invariant: In Seq (b1,b2), b1 is not an Exp statement with empty bindings 134 | Also, b1 and b2 are not Unit 135 | *) 136 | | Seq of block * block 137 | and block = 138 | | Unit 139 | | Block of binding Sq.t * stmt 140 | (* (Some v, exp) corresponds to let v = exp in ... 141 | (None, exp) corresponds to let () = exp in ... 142 | By the grammar, exp has no internal bindings. 143 | The expression to bind is always simple: not a sequence 144 | *) 145 | and binding = bv_desc option * exp 146 | and bv_desc = {id: varname; ty: typ; mut: mutble; attrs: attribute list} 147 | 148 | 149 | (* Complete procedure 150 | The typ may be TVoid (in which case the it is the procedure). 151 | Otherwise, the block is not Unit and its last element is 152 | Exp 153 | *) 154 | type args_t = (varname * typ) list 155 | type proc_t = args_t * typ * block 156 | 157 | (* Utilities *) 158 | 159 | (* New local varname with a given stem *) 160 | val genvarname : string -> varname 161 | 162 | val local_name : string -> varname 163 | 164 | val of_exp : ?bindings:binding Sq.t -> exp -> block 165 | val dummy_binding : binding -> bool 166 | 167 | (* Sequencing: enfocing the invariant of Seq (b1,b2) *) 168 | val seq : block -> block -> block 169 | -------------------------------------------------------------------------------- /examples/streamit-fm/streamit/benchmarks_gen.c: -------------------------------------------------------------------------------- 1 | /* Genarated C codes for benchmark 2 | from examples/streamit-fm/main.ml 3 | Last Update: Oct. 18, 2023 */ 4 | 5 | #include "benchmarks.h" 6 | float fmradio(){ 7 | float x_1 = 0.; 8 | int x_2 = 1000000; 9 | int x_3 = 127; 10 | float a_4[128] = 11 | {0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0., 12 | 0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0., 13 | 0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0., 14 | 0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0., 15 | 0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0., 16 | 0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0., 17 | 0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.};; 18 | int x_5 = 0; 19 | bool x_6 = false; 20 | float x_7 = 0.; 21 | int x_8 = 62; 22 | float a_9[126] = 23 | {0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0., 24 | 0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0., 25 | 0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0., 26 | 0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0., 27 | 0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0., 28 | 0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0., 29 | 0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.};; 30 | int x_10 = 0; 31 | int x_11 = 0; 32 | while (x_2 > 0) 33 | { 34 | int const t_12 = x_11; 35 | x_11++; 36 | float const t_13 = (float)t_12; 37 | if (x_10 < 63) 38 | { 39 | x_8--; 40 | if (x_8 < 0) 41 | 42 | x_8 = 62; 43 | (a_9[x_8]) = t_13; 44 | (a_9[x_8 + 63]) = t_13; 45 | x_10++; 46 | } 47 | else { 48 | static float a_14[64] = 49 | {-0.0005074295,0.00076760163,-0.00096094337, 50 | 0.0010377073,-0.00092009215,0.00052436007, 51 | 0.00020035879,-0.0012276311,0.002419481, 52 | -0.0035185256,0.0041778507,-0.0040306991, 53 | 0.0027903138,-0.00035912517,-0.0030809149, 54 | 0.0070260762,-0.010679383,0.013059247, 55 | -0.013179701,0.010275472,-0.0040308254, 56 | -0.0052350907,0.016469188,-0.027898367, 57 | 0.037156224,-0.041484997,0.037921171, 58 | -0.023267954,-0.0066606012,0.060466273, 59 | -0.16922387,0.62179246,0.62179246, 60 | -0.16922387,0.060466273,-0.0066606012, 61 | -0.023267954,0.037921171,-0.041484997, 62 | 0.037156224,-0.027898367,0.016469188, 63 | -0.0052350907,-0.0040308254,0.010275472, 64 | -0.013179701,0.013059247,-0.010679383, 65 | 0.0070260762,-0.0030809149,-0.00035912517, 66 | 0.0027903138,-0.0040306991,0.0041778507, 67 | -0.0035185256,0.002419481,-0.0012276311, 68 | 0.00020035879,0.00052436007,-0.00092009215, 69 | 0.0010377073,-0.00096094337,0.00076760163, 70 | -0.0005074295};; 71 | float x_15 = (a_14[0]) * t_13; 72 | for (int i_22 = 0; i_22 < 63; i_22 += 1) 73 | x_15 = x_15 + ((a_14[i_22 + 1]) * (a_9[x_8 + i_22])); 74 | if (x_6) 75 | { 76 | float const t_16 = x_15 * x_7; 77 | float const t_17 = 2.1485917e+08 * atanf(t_16); 78 | if (x_5 < 63) 79 | { 80 | (a_4[x_3]) = t_17; 81 | x_5++; 82 | x_3--; 83 | } 84 | else { 85 | if (x_3 < 0) 86 | { 87 | for (int i_21 = 0; i_21 < 63; i_21 += 1) 88 | (a_4[i_21 + 65]) = a_4[i_21]; 89 | x_3 = 64; 90 | } 91 | static float a_18[64] = 92 | {1.5361169e-06,1.5800085e-06,1.7112469e-06, 93 | 1.9285278e-06,2.2296919e-06,2.611746e-06, 94 | 3.0708931e-06,3.60257e-06,4.2014927e-06, 95 | 4.8617087e-06,5.5766566e-06, 96 | 6.3392309e-06,7.1418528e-06, 97 | 7.9765455e-06,8.8350134e-06, 98 | 9.7087246e-06,1.0588996e-05, 99 | 1.1467079e-05,1.2334246e-05,1.318188e-05, 100 | 1.4001557e-05,1.4785129e-05,1.552481e-05, 101 | 1.6213247e-05,1.68436e-05,1.7409604e-05, 102 | 1.7905633e-05,1.8326757e-05, 103 | 1.8668791e-05,1.8928336e-05, 104 | 1.9102813e-05,1.9190487e-05, 105 | 1.9190487e-05,1.9102813e-05, 106 | 1.8928336e-05,1.8668791e-05, 107 | 1.8326757e-05,1.7905633e-05, 108 | 1.7409604e-05,1.68436e-05,1.6213247e-05, 109 | 1.552481e-05,1.4785129e-05,1.4001557e-05, 110 | 1.318188e-05,1.2334246e-05,1.1467079e-05, 111 | 1.0588996e-05,9.7087246e-06, 112 | 8.8350134e-06,7.9765455e-06, 113 | 7.1418528e-06,6.3392309e-06, 114 | 5.5766566e-06,4.8617087e-06, 115 | 4.2014927e-06,3.60257e-06,3.0708931e-06, 116 | 2.611746e-06,2.2296919e-06,1.9285278e-06, 117 | 1.7112469e-06,1.5800085e-06,1.5361169e-06};; 118 | float x_19 = (a_18[0]) * t_17; 119 | for (int i_20 = 1; i_20 < 64; i_20 += 1) 120 | x_19 = x_19 + ((a_18[i_20]) * (a_4[x_3 + i_20])); 121 | x_2--; 122 | x_1 = x_1 + x_19; 123 | (a_4[x_3]) = t_17; 124 | x_3--; 125 | } 126 | x_7 = x_15; 127 | } 128 | else { 129 | x_7 = x_15; 130 | x_6 = true; 131 | } 132 | x_8--; 133 | if (x_8 < 0) 134 | 135 | x_8 = 62; 136 | (a_9[x_8]) = t_13; 137 | (a_9[x_8 + 63]) = t_13; 138 | x_10 = x_10 + -4; 139 | } 140 | } 141 | return x_1; 142 | } 143 | -------------------------------------------------------------------------------- /benchmarks/benchmark_abstract.ml: -------------------------------------------------------------------------------- 1 | (* abstraction for streaming, batteries, and v1 *) 2 | module type cde = sig 3 | type 'a exp 4 | type 'a stm 5 | type 'a arr 6 | 7 | val int : int -> int exp 8 | val ( + ) : int exp -> int exp -> int exp 9 | val ( - ) : int exp -> int exp -> int exp 10 | val ( * ) : int exp -> int exp -> int exp 11 | val (mod) : int exp -> int exp -> int exp 12 | 13 | val ( = ) : int exp -> int exp -> bool exp 14 | val ( > ) : int exp -> int exp -> bool exp 15 | 16 | val cond : bool exp -> 'a exp -> 'a exp -> 'a exp 17 | val ( || ) : bool exp -> bool exp -> bool exp 18 | 19 | val to_code1 : ('a arr -> 'b stm) -> ('a array code -> 'b code) 20 | val to_code2 : ('a arr * 'b arr -> 'c stm) -> 21 | ('a array code * 'b array code -> 'c code) 22 | end 23 | 24 | module CodeBasic = struct 25 | include Trx_code_native 26 | 27 | let to_code1 : ('a arr -> 'b stm) -> ('a array code -> 'b code) 28 | = fun f x -> f x 29 | let to_code2 : ('a arr * 'b arr -> 'c stm) -> 30 | ('a array code * 'b array code -> 'c code) 31 | = fun f (x,y) -> f (x,y) 32 | end 33 | 34 | module CodePV = struct 35 | include Pk_cde.Make(Trx_code_native) 36 | let injarr : 'a array code -> 'a arr = fun x -> 37 | (inj_global .., x) 38 | 39 | let to_code1 : ('a arr -> 'b stm) -> ('a array code -> 'b code) 40 | = fun f x -> dyn_stm @@ f (injarr x) 41 | let to_code2 : ('a arr * 'b arr -> 'c stm) -> 42 | ('a array code * 'b array code -> 'c code) 43 | = fun f (x,y) -> dyn_stm @@ f (injarr x, injarr y) 44 | end 45 | 46 | 47 | module type lib = sig 48 | type 'a exp 49 | type 'a stm 50 | type 'a arr 51 | type 'a stream 52 | 53 | (* Producers *) 54 | val of_arr : 'a arr -> 'a stream 55 | 56 | (* Consumers *) 57 | val fold : ('z exp -> 'a exp -> 'z exp) -> 'z exp -> 'a stream -> 'z stm 58 | 59 | (* Transformers *) 60 | val map : ('a exp -> 'b exp) -> 'a stream -> 'b stream 61 | val flat_map : ('a exp -> 'b stream) -> 'a stream -> 'b stream 62 | val filter : ('a exp -> bool exp) -> 'a stream -> 'a stream 63 | val take : int exp -> 'a stream -> 'a stream 64 | val zip_with : ('a exp -> 'b exp -> 'c exp) -> ('a stream -> 'b stream -> 'c stream) 65 | 66 | 67 | type byte 68 | val byte_max: byte 69 | val decode : byte stream -> bool stream 70 | end 71 | 72 | 73 | (* ===Main Functor=== *) 74 | module Benchmark(C : cde)(L : lib with type 'a exp = 'a C.exp 75 | and type 'a stm = 'a C.stm and type 'a arr = 'a C.arr) = struct 76 | open L 77 | type 'a exp = 'a C.exp 78 | type 'a stm = 'a C.stm 79 | type 'a arr = 'a C.arr 80 | let sum_int : int stream -> int stm = fold C.( + ) (C.int 0) 81 | 82 | 83 | let sum : int array code -> int code 84 | = C.to_code1 @@ fun arr -> 85 | of_arr arr 86 | |> sum_int 87 | 88 | let sumOfSquares : int array code -> int code 89 | = C.to_code1 @@ fun arr -> 90 | of_arr arr 91 | |> map C.(fun x -> x * x) 92 | |> sum_int 93 | 94 | let maps : int array code -> int code 95 | = C.to_code1 @@ fun arr -> 96 | of_arr arr 97 | |> map C.(fun x -> x * int 1) 98 | |> map C.(fun x -> x * int 2) 99 | |> map C.(fun x -> x * int 3) 100 | |> map C.(fun x -> x * int 4) 101 | |> map C.(fun x -> x * int 5) 102 | |> map C.(fun x -> x * int 6) 103 | |> map C.(fun x -> x * int 7) 104 | |> sum_int 105 | 106 | let filters : int array code -> int code 107 | = C.to_code1 @@ fun arr -> 108 | of_arr arr 109 | |> filter C.(fun x -> x > int 1) 110 | |> filter C.(fun x -> x > int 2) 111 | |> filter C.(fun x -> x > int 3) 112 | |> filter C.(fun x -> x > int 4) 113 | |> filter C.(fun x -> x > int 5) 114 | |> filter C.(fun x -> x > int 6) 115 | |> filter C.(fun x -> x > int 7) 116 | |> sum_int 117 | 118 | let sumOfSquaresEven : int array code -> int code 119 | = C.to_code1 @@ fun arr -> 120 | of_arr arr 121 | |> filter C.(fun x -> x mod (int 2) = int 0) 122 | |> map C.(fun x -> x * x) 123 | |> sum_int 124 | 125 | let cart : (int array code * int array code) -> int code 126 | = C.to_code2 @@ fun (arr1, arr2) -> 127 | of_arr arr1 128 | |> flat_map (fun x -> of_arr arr2 |> map C.(fun y -> x * y)) 129 | |> sum_int 130 | 131 | let dotProduct : (int array code * int array code) -> int code 132 | = C.to_code2 @@ fun (arr1, arr2) -> 133 | zip_with C.( * ) (of_arr arr1) (of_arr arr2) 134 | |> sum_int 135 | 136 | let flatMap_after_zipWith : (int array code * int array code) -> int code 137 | = C.to_code2 @@ fun (arr1, arr2) -> 138 | zip_with C.( + ) (of_arr arr1) (of_arr arr1) 139 | |> flat_map (fun x -> of_arr arr2|> map C.(fun el -> el + x)) 140 | |> sum_int 141 | 142 | 143 | let zipWith_after_flatMap : (int array code * int array code) -> int code 144 | = C.to_code2 @@ fun (arr1, arr2) -> 145 | of_arr arr1 146 | |> flat_map (fun x -> of_arr arr2 |> map C.(fun y -> y + x)) 147 | |> zip_with C.( + ) (of_arr arr1) 148 | |> sum_int 149 | 150 | 151 | let flat_map_take : (int array code * int array code) -> int code 152 | = C.to_code2 @@ fun (arr1, arr2) -> 153 | of_arr arr1 154 | |> flat_map (fun x -> of_arr arr2 |> map C.(fun y -> x * y)) 155 | |> take (C.int 20_000_000) 156 | |> sum_int 157 | 158 | 159 | (* The following two benchmarks are from 160 | https://github.com/epfldata/staged-rewritten-streams 161 | but slightly modified (compexified) 162 | Also, zip_flat_flat code from the thesis had a mistake 163 | *) 164 | let zip_filter_filter :(int array code * int array code) -> int code 165 | = C.to_code2 @@ fun (arr1, arr2) -> 166 | zip_with C.( + ) 167 | (of_arr arr1 |> filter C.(fun x -> x > int 7)) 168 | (of_arr arr2 |> filter C.(fun x -> x > int 5)) 169 | |> sum_int 170 | 171 | (* Take more values, so that arr1 is scanned in full at least once. 172 | This also checks better the code, more control paths are taken. 173 | *) 174 | let zip_flat_flat :(int array code * int array code) -> int code 175 | = C.to_code2 @@ fun (arr1, arr2) -> 176 | zip_with C.( + ) 177 | (of_arr arr1 |> 178 | flat_map (fun x -> of_arr arr2 |> map C.(fun y -> x * y))) 179 | (of_arr arr2 |> 180 | flat_map (fun x -> of_arr arr1 |> map C.(fun y -> x - y))) 181 | |> take (C.int 200_000_000) 182 | |> sum_int 183 | 184 | 185 | (* XXX Needed benchmarks: 186 | zip 187 | (zip (stream |> filter) (stream |> filter)) 188 | (zip (stream |> filter) (stream |> filter)) 189 | 190 | And another one, with fllat_map instead of filter. 191 | 192 | And another one, 193 | zip (stream |> filter) 194 | (zip (stream |> filter) (stream |> filter)) 195 | 196 | (perhaps, make 4 or 5 chain of zippers?) 197 | *) 198 | 199 | 200 | let decoding :(byte array code * byte array code) -> int code 201 | = C.to_code2 @@ fun (arr1,arr2) -> 202 | zip_with C.(||) (of_arr arr1 |> decode) (of_arr arr2 |> decode) 203 | |> map C.(fun x -> cond x (int 1) (int 0)) 204 | |> sum_int 205 | end 206 | -------------------------------------------------------------------------------- /benchmarks/benchmark_streaming.ml: -------------------------------------------------------------------------------- 1 | (* 2 | #require "streaming";; 3 | open Streaming;; 4 | let st = Source.array [|1;2;3;4;5;6;7;8;9;10|] in 5 | Stream.from (Source.zip st st) 6 | |> Stream.fold (fun (z1,z2) (x1,x2) -> (z1+x1, z2+x2) ) (0,0) 7 | 8 | - https://odis-labs.github.io/streaming/streaming/index.html#what's-the-difference-between-sources-and-streams? 9 | "In general, streams offer better performance than sources for 10 | the most common operations (including concatenation) and offer integration with 11 | sinks and flows. On the other hand, sources are easier to create, and support zipping." 12 | 13 | - https://odis-labs.github.io/streaming/streaming/Streaming/Stream/index.html 14 | "Streams are built to be compatible with sources, sinks and flows. 15 | To create a stream that produces all elements from a source use 16 | Stream.from, to consume a stream with a sink use Stream.into and 17 | to transform stream elements with a flow use Stream.via. 18 | For more sophisticated pipelines that might have source leftovers, 19 | run can be used." 20 | 21 | In conclusion, streaming cannot zip a nested stream. 22 | *) 23 | 24 | module Streaming_intf = struct 25 | open Streaming.Stream 26 | type 'a cde = 'a code 27 | type 'a stream_raw = 'a t 28 | type 'a stream = 'a t cde 29 | 30 | let lift_tr1 : (('a -> 'b ) -> 'a stream_raw -> 'c stream_raw) cde 31 | -> ('a cde -> 'b cde) -> 'a stream -> 'c stream = 32 | fun tr f st -> .<.~tr (fun x -> .~(f ..)) .~st>. 33 | 34 | let lift_tr2 : (('a -> 'b -> 'c) -> ('a stream_raw -> 'b stream_raw -> 'c stream_raw) )cde 35 | -> ('a cde -> 'b cde -> 'c cde) -> 'a stream -> 'b stream -> 'c stream = 36 | fun tr f st1 st2 -> .<.~tr (fun x y -> .~(f .. ..)) .~st1 .~st2>. 37 | 38 | 39 | let of_arr : 'a array cde -> 'a stream = fun x -> .. 40 | 41 | let fold : ('z cde -> 'a cde -> 'z cde) -> 'z cde -> 'a stream -> 'z cde = 42 | fun f z st -> . .~(f .. ..)) .~z .~st>. 43 | 44 | let map : ('a cde -> 'b cde) -> 'a stream -> 'b stream = 45 | fun f st -> lift_tr1 .. f st 46 | 47 | let flat_map : ('a cde -> 'b stream) -> 'a stream -> 'b stream = 48 | fun f st -> lift_tr1 .. f st 49 | 50 | let filter : ('a cde -> bool cde) -> 'a stream -> 'a stream = 51 | fun f st -> lift_tr1 .. f st 52 | 53 | let take : int cde -> 'a stream -> 'a stream = 54 | fun n st -> .. 55 | 56 | let zip_with : ('a cde -> 'b cde -> 'c cde) -> ('a stream -> 'b stream -> 'c stream) = 57 | fun f st1 st2 -> failwith "unusable" 58 | 59 | 60 | type byte = int 61 | let byte_max = 255 62 | let decode = fun st -> failwith "unusable" 63 | end 64 | 65 | 66 | module Benchmark_streaming = struct 67 | open Benchmark_types 68 | open Benchmark 69 | 70 | open Streaming_intf 71 | 72 | module C = Benchmark_abstract.CodeBasic 73 | open Benchmark_abstract.Benchmark(C)(Streaming_intf) 74 | 75 | open Streaming 76 | let of_arr arr = .. 77 | let map f st = . .~(f ..)) .~st)>. 78 | let filter f st = . .~(f ..)) .~st>. 79 | let zip_with f st1 st2 = 80 | . .~(f .. ..)) .~st1 .~st2)>. 81 | 82 | let dotProduct : (int array code * int array code) -> int code 83 | = C.to_code2 @@ fun (arr1, arr2) -> 84 | zip_with C.( * ) (of_arr arr1) (of_arr arr2) 85 | |> sum_int 86 | 87 | let flatMap_after_zipWith : (int array code * int array code) -> int code 88 | = C.to_code2 @@ fun (arr1, arr2) -> 89 | zip_with C.( + ) (of_arr arr1) (of_arr arr1) 90 | |> flat_map (fun x -> of_arr arr2 |> map C.(fun el -> el + x)) 91 | |> sum_int 92 | 93 | let zip_filter_filter :(int array code * int array code) -> int code 94 | = C.to_code2 @@ fun (arr1, arr2) -> 95 | zip_with C.( + ) 96 | (of_arr arr1 |> filter C.(fun x -> x > int 7)) 97 | (of_arr arr2 |> filter C.(fun x -> x > int 5)) 98 | |> sum_int 99 | 100 | 101 | (* Arrays used for benchmarking *) 102 | let v = .< Array.init 100_000_000 (fun i -> i mod 10) >.;; 103 | let vHi = .< Array.init 10_000_000 (fun i -> i mod 10) >.;; 104 | let vLo = .< Array.init 10 (fun i -> i mod 10) >.;; 105 | let vFaZ = .< Array.init 10_000 (fun i -> i) >.;; 106 | let vZaF = .< Array.init 10_000_000 (fun i -> i) >.;; 107 | 108 | let options = { 109 | repetitions = 20; 110 | final_f = (fun _ -> .<()>.); 111 | } 112 | 113 | let pr_int = {options with 114 | final_f = fun x -> ..} 115 | 116 | let check_int n = {options with 117 | final_f = fun x -> ..} 118 | 119 | let script =[| 120 | perfS "sum_streaming" v sum options; 121 | perfS "sumOfSquares_streaming" v sumOfSquares options; 122 | perfS "sumOfSquaresEven_streaming" v sumOfSquaresEven options; 123 | perfS "mapsMegamorphic_streaming" v maps options; 124 | perfS "filtersMegamorphic_streaming" v filters options; 125 | perfS2 "cart_streaming" vHi vLo cart options; 126 | perfS2 "dotProduct_streaming" vHi vHi dotProduct options; 127 | perfS2 "flatMapAfterZip_streaming" vFaZ vFaZ 128 | flatMap_after_zipWith options; 129 | (* perfS2 "zipAfterFlatMap_streaming" vZaF vZaF 130 | zipWith_after_flatMap options; *) 131 | perfS2 "flatMapTake_streaming" vHi vLo 132 | flat_map_take options; 133 | perfS2 "zipFilterFilter_streaming" v vHi 134 | zip_filter_filter options; 135 | (* perfS2 "zipFlatMapFlatMap_streaming" v vLo 136 | zip_flat_flat options; *) 137 | (* perfS2 "runLengthDecoding_streaming" v v 138 | decoding options; *) 139 | |];; 140 | 141 | let test = .< 142 | print_endline "Last checked: Jun 2, 2022"; 143 | assert (.~(sum v) == 450000000); 144 | assert (.~(sumOfSquares v) == 2850000000); 145 | assert (.~(sumOfSquaresEven v) == 1200000000); 146 | assert (.~(maps v) == 2268000000000); 147 | assert (.~(filters v) == 170000000); 148 | assert (.~(cart (vHi, vLo)) == 2025000000); 149 | assert (.~(dotProduct (vHi, vHi)) == 285000000); 150 | assert (.~(flatMap_after_zipWith (vFaZ, vFaZ)) == 1499850000000); 151 | (* assert (.~(zipWith_after_flatMap (vZaF, vZaF)) == 99999990000000); *) 152 | assert (.~(flat_map_take (vHi, vLo)) == 405000000); 153 | assert (.~(zip_filter_filter (v, vHi)) == 64000000); 154 | (* assert (.~(zip_flat_flat (v, vLo)) == 3250000000); *) 155 | (* assert (.~(decoding (v, v)) == 100000000); *) 156 | print_endline "All done" 157 | >. 158 | end;; 159 | 160 | module M = Benchmark_streaming 161 | 162 | let main () = 163 | let compiler = "ocamlfind ocamlopt -O2 -unsafe -nodynlink -package streaming -linkpkg util.cmx" in 164 | match Sys.argv with 165 | | [|_;"test"|] -> 166 | Benchmark.run_natively M.test 167 | ~compiler 168 | (* ~save:true *) 169 | | _ -> 170 | Benchmark.run_script M.script 171 | ~compiler 172 | 173 | let _ = main () 174 | -------------------------------------------------------------------------------- /lib/backends/C/c_ast.mli: -------------------------------------------------------------------------------- 1 | (* Abstract Syntax Tree for C 2 | 3 | Unlike IR used by MetaOCaml, it is designed to reflect a bigger subset 4 | of C, including GOTO and possibly inline assembly. Also, it is designed 5 | to be straightforwardly pretty-printed to C. 6 | 7 | It is loosely based on the `abstract syntax for FrontC' by Hugues Cassé 8 | (version 2.1, 4.7.99) but with many simplifications and changes 9 | 10 | There are two biggest simplifications. First, type specification: We do not 11 | follow C bizzare conventions, and do not support composite 12 | declarations like 13 | int x, *z, * const z; 14 | Each declaration should introduce only one variable. 15 | 16 | Second, we reject C's madness and do not consider assignment, 17 | pre/post increment/decrement and modification (e.g., +=) as expressions. 18 | They are statements, period. We thus do not support things like *x++ = j++ 19 | etc. 20 | 21 | We also support C99: mixing of declarations and statements. 22 | Declarations don't have to be collected at the beginning of the block. 23 | 24 | Our data structure representation is also more precise, reflecting 25 | the fact that comma-expressions support a restricted set of statements. 26 | *) 27 | 28 | type label = string 29 | type varname = string 30 | type fieldname = string 31 | 32 | (* C types, or subset thereof that we are using *) 33 | (* C11 has boolean data types. From 34 | * https://stackoverflow.com/questions/1921539/using-boolean-values-in-c 35 | * ``For the datatype, #include , and use true, false and bool. 36 | * Or do not include it, and use _Bool, 1 and 0 instead.'' 37 | *) 38 | 39 | type ctype = {typ: typ; specs: spec list} 40 | and typ = 41 | | Tvoid 42 | | Tbool 43 | | Tchar 44 | | Tshort 45 | | Tint 46 | | Tlong 47 | | Tint64 48 | | Tuint (* unsigned int *) 49 | | Tulong (* unsigned long *) 50 | | Tfloat 51 | | Tdouble 52 | | Tnamed of string (* names typedef'd types, plus the 53 | catch-all for other types 54 | *) 55 | | Tptr of ctype 56 | | Tarray of int * ctype 57 | (* Tfun -- don't support for now *) 58 | and spec = (* specifiers *) 59 | | S_const 60 | | S_volatile 61 | | S_restrict 62 | | S_static 63 | | S_extern 64 | | S_inline 65 | 66 | (* The constants carry textual representation, to spare the 67 | pretty-printer having to deal with the precision of OCaml's integers, 68 | etc. 69 | *) 70 | type constant = 71 | | Const_num of string (* textual representation *) 72 | | Const_char of char (* may need escaping *) 73 | | Const_string of string (* may need escaping *) 74 | 75 | type binary_operator = 76 | | ADD | SUB | MUL | DIV | MOD 77 | | AND | OR 78 | | BAND | BOR | XOR | SHL | SHR 79 | | EQ | NE | LT | GT | LE | GE 80 | 81 | type unary_operator = 82 | | MINUS | PLUS 83 | | NOT (* ! *) 84 | | BNOT (* ~ *) 85 | | MEMOF (* * *) 86 | | ADDROF (* & *) 87 | 88 | type binary_modifier = 89 | | ASSIGN 90 | | ADD_ASSIGN | SUB_ASSIGN | MUL_ASSIGN | DIV_ASSIGN | MOD_ASSIGN 91 | | BAND_ASSIGN | BOR_ASSIGN | XOR_ASSIGN | SHL_ASSIGN | SHR_ASSIGN 92 | 93 | type unary_modifier = 94 | | POSINCR | POSDECR (* no pre-increment/pre-decrement *) 95 | 96 | (* If an expression does not have Calls, it is pure! *) 97 | type expression = 98 | | Nothing (* for return; etc. *) 99 | | Const of constant 100 | | Var of varname 101 | | Unary of unary_operator * expression 102 | | Label_addr of label (* GCC's && Label *) 103 | | Binary of binary_operator * expression * expression 104 | | Cond of expression * expression * expression 105 | | Cast of ctype * expression 106 | (* A CAST can actually be a constructor expression *) 107 | 108 | | Call of expression * expression list 109 | | Comma of simple_statement list * expression 110 | (* 111 | | EXPR_SIZEOF of expression 112 | | TYPE_SIZEOF of specifier * decl_type 113 | | EXPR_ALIGNOF of expression 114 | | TYPE_ALIGNOF of specifier * decl_type 115 | *) 116 | | Index of expression * expression 117 | | Memberof of expression * fieldname 118 | | Memberofptr of expression * fieldname 119 | 120 | and init_expression = 121 | | Init_none 122 | | Init_single of expression 123 | | Init_compound of (init_designator * init_expression) list 124 | 125 | and init_designator = 126 | | Indes_none 127 | | Indes_field of string * init_designator 128 | | Indes_index of expression * init_designator 129 | | Indes_index_range of expression * expression 130 | 131 | and simple_statement = 132 | | NOP 133 | | CALL of expression * expression list (* procedure call *) 134 | | UNMOD of unary_modifier * expression 135 | | BIMOD of binary_modifier * expression * expression 136 | and let_or_statement = 137 | | LET of definition 138 | | STMT of simple_statement 139 | and statement = 140 | | SIMPLE of let_or_statement 141 | (* The motivation for this is to restrict the scope of bindings 142 | in the containing expressions 143 | *) 144 | | BLOCK of block 145 | | IF of expression * block * block 146 | | WHILE of expression * block 147 | | DOWHILE of expression * block 148 | | FOR of let_or_statement * expression * simple_statement * block 149 | | BREAK 150 | | CONTINUE 151 | | RETURN of expression 152 | | SWITCH of expression * block 153 | | CASE of expression * block 154 | | CASERANGE of expression * expression * block 155 | | DEFAULT of block 156 | | LABEL of label (* label:; *) 157 | | GOTO of label 158 | | COMPGOTO of expression (* GCC's "goto *exp" *) 159 | (* 160 | | ASM of attribute list * (* typically only volatile and const *) 161 | string list * (* template *) 162 | (string * expression) list * (* list of constraints and expressions for 163 | * outputs *) 164 | (string * expression) list * (* same for inputs *) 165 | string list * (* clobbered registers *) 166 | *) 167 | 168 | (* Before: 169 | A block contains a list of local label declarations ( GCC's ({ __label__ 170 | l1, l2; ... }) ) , a list of definitions and a list of statements. 171 | We don't use GCC labels anyway, so we drop them. 172 | Also, we now support the mixture of definitions and statements 173 | *) 174 | and block = statement list 175 | 176 | and typedname = varname * ctype (* const int * x *) 177 | 178 | and definition = typedname * init_expression (* local/global variable *) 179 | 180 | (* 181 | * Declarations: at the top level 182 | *) 183 | type declaration = 184 | | FUNDEF of ctype * varname * typedname list * block 185 | | DECDEF of definition (* global variable *) 186 | | PROTO of typedname * varname * typedname list (* function prototype *) 187 | | TYPEDEF of typedname 188 | (* 189 | | ONLYTYPEDEF of specifier 190 | | GLOBASM of string 191 | | PRAGMA of expression 192 | | INCLUDE of string 193 | | IFDEF of string 194 | | ENDIF 195 | *) 196 | 197 | -------------------------------------------------------------------------------- /examples/run-length-encoding/apples.ml: -------------------------------------------------------------------------------- 1 | (* Run-Length Encodings 2 | 3 | Another example of run-length encoding: solving the problem from 4 | http://okmij.org/ftp/Algorithms/grasping-all-apples-at-once.html 5 | 6 | Specifically, the problem is converting the input like 7 | "aaaabbbcca" 8 | to the output like 9 | [("a",4), ("b", 3), ("c", 2), ("a", 1)] 10 | 11 | The characteristic of the problem is stream with explicit termination, 12 | and stream look-ahead. 13 | 14 | Since strymonas is made mostly for numeric computations, we slightly 15 | adjust the example to use integers instead of characters, and 16 | arrays instead of strings. 17 | *) 18 | 19 | (* 20 | #directory "../../lib";; 21 | #directory "../../lib/backends/Trx";; 22 | #directory "../../lib/backends/C";; 23 | #load "stream.cma";; 24 | *) 25 | 26 | (* The identity function that makes CPS convenient *) 27 | let (let-) c k = c k 28 | 29 | module type cde_ex = module type of Cde_ex 30 | 31 | (* This could have been in the library: and probably will be 32 | at some point 33 | *) 34 | module TerminatedStream(C:cde_ex) = struct 35 | open Stream_cooked_fn.Make(C) 36 | open Raw 37 | 38 | (* A stream with an explicit terminator. The type of the stream, 39 | 'a cde option (statically-visible option) means that we will 40 | potentially duplicate code: for the regular case and for the 41 | terminated-stream case. 42 | *) 43 | type 'a term_stream = 'a option stream 44 | 45 | let of_arr_term : 'a arr -> 'a exp term_stream = 46 | fun arr -> 47 | let- len = initializing C.(array_len arr) in 48 | let- i = initializing_ref C.(int 0) in 49 | infinite C.(fun k -> 50 | let- iv = letl (dref i) in 51 | incr i @. 52 | if_ (iv < len) ( 53 | let- v = array_get arr iv in 54 | k (Some v) 55 | ) ( 56 | k None 57 | ) 58 | ) 59 | |> guard (GExp C.(dref i <= len)) 60 | end 61 | 62 | module LookAhead(C:cde_ex) = struct 63 | open Stream_cooked_fn.Make(C) 64 | open TerminatedStream(C) 65 | open Raw 66 | 67 | (* A stream with look-ahead: a stream whose elements are tuples 68 | of the current element and possibly the next element. 69 | Again, with the statically visible option we opt into code 70 | duplication and specialization 71 | *) 72 | type 'a look_ahead_stream = ('a * 'a option) stream 73 | 74 | let look_ahead : 'a tbase -> 'a exp term_stream -> 'a exp look_ahead_stream = 75 | fun tbase st -> 76 | let- prev = initializing_ref C.(tbase_zero tbase) in 77 | let- saw_prev = initializing_ref C.(bool false) in 78 | st |> map_raw ~linear:false C.(function 79 | | Some e -> fun k -> 80 | if_ (dref saw_prev) ( 81 | let- pv = letl (dref prev) in 82 | (prev := e) @. 83 | k (pv, Some e) 84 | ) ( 85 | (prev := e) @. 86 | (saw_prev := bool true) 87 | ) 88 | | None -> fun k -> (* stream is terminated *) 89 | if1 (dref saw_prev) ( 90 | k (dref prev,None) 91 | ) 92 | ) 93 | end 94 | 95 | (* Solving the problem, as described on the above page *) 96 | module RLL(C:cde_ex) = struct 97 | open Stream_cooked_fn.Make(C) 98 | open TerminatedStream(C) 99 | open LookAhead(C) 100 | 101 | (* computing group breaks: annotating each element with a boolean: 102 | true if this element is the last in its group (and so the next 103 | element (if any) will start a new group) 104 | *) 105 | type 'a annot = 'a * bool Raw.exp 106 | let group : 'a look_ahead_stream -> 'a annot stream = 107 | Raw.map_raw' @@ function 108 | | (x,Some next) -> C.(x, (not (x = next))) 109 | | (x,_) -> C.(x,bool true) 110 | 111 | (* counting the group elements *) 112 | let count : 'a annot stream -> ('a * int Raw.exp) stream = fun st -> 113 | let- cnt = Raw.initializing_ref C.(int 0) in 114 | st |> Raw.map_raw ~linear:false @@ fun (x,break) k -> 115 | let open C in 116 | if_ break ( 117 | let- cv = letl (dref cnt) in 118 | (cnt := int 0) @. 119 | k (x, cv + int 1) 120 | ) ( 121 | incr cnt 122 | ) 123 | 124 | let rll arr = arr |> of_arr_term |> look_ahead C.tint |> group |> count 125 | 126 | let rll_print arr = 127 | arr |> rll |> 128 | iter C.(fun (x,cnt) -> 129 | print_int x @. print_int cnt) 130 | end 131 | 132 | module CCaml = Backends.MetaOCaml 133 | 134 | module M = RLL(CCaml) 135 | let f = CCaml.one_arg_fun M.rll_print 136 | 137 | (* 138 | val f : (int array -> unit) code = .< 139 | fun arg1_9 -> 140 | let t_10 = Stdlib.Array.length arg1_9 in 141 | let v_11 = Stdlib.ref 0 in 142 | let v_12 = Stdlib.ref 0 in 143 | let v_13 = Stdlib.ref false in 144 | let v_14 = Stdlib.ref 0 in 145 | while (! v_14) <= t_10 do 146 | let t_15 = ! v_14 in 147 | Stdlib.incr v_14; 148 | if t_15 < t_10 149 | then 150 | (let el_17 = Stdlib.Array.get arg1_9 t_15 in 151 | if ! v_13 152 | then 153 | let t_18 = ! v_12 in 154 | (v_12 := el_17; 155 | if Stdlib.not (t_18 = el_17) 156 | then 157 | (let t_19 = ! v_11 in 158 | v_11 := 0; 159 | (Stdlib.Format.print_int t_18; Stdlib.Format.force_newline ()); 160 | Stdlib.Format.print_int (t_19 + 1); 161 | Stdlib.Format.force_newline ()) 162 | else Stdlib.incr v_11) 163 | else (v_12 := el_17; v_13) := true) 164 | else 165 | if ! v_13 166 | then 167 | (let t_16 = ! v_11 in 168 | v_11 := 0; 169 | (Stdlib.Format.print_int (! v_12); Stdlib.Format.force_newline ()); 170 | Stdlib.Format.print_int (t_16 + 1); 171 | Stdlib.Format.force_newline ()) 172 | done>. 173 | *) 174 | 175 | let _ = Runcode.run f [|41;41;41;41;42;42;42;43;43;41|] 176 | (* 177 | 41 178 | 4 179 | 42 180 | 3 181 | 43 182 | 2 183 | 41 184 | 1 185 | *) 186 | 187 | let _ = Runcode.run f [||] 188 | 189 | let _ = Runcode.run f [|41;41;41;41;42;42;42;43;43|] 190 | (* 191 | 41 192 | 4 193 | 42 194 | 3 195 | 43 196 | 2 197 | *) 198 | 199 | let _ = Runcode.run f [|41|] 200 | (* 201 | 41 202 | 1 203 | *) 204 | 205 | (* Generate C code *) 206 | module CC = Backends.C 207 | module M = RLL(CC) 208 | let _ = 209 | let open CC in 210 | pp_proc ~name:"rll" Format.std_formatter @@ 211 | arg_base ~name:"n" tint @@ fun n -> 212 | arg_array ~name:"a" n tint @@ fun a -> 213 | nullary_proc (M.rll_print a) 214 | 215 | (* 216 | void rll(int64_t const n_1,int64_t * const a_2){ 217 | int64_t x_3 = 0; 218 | int64_t x_4 = 0; 219 | bool x_5 = false; 220 | int64_t x_6 = 0; 221 | while (x_6 <= n_1) 222 | { 223 | int64_t const t_7 = x_6; 224 | x_6++; 225 | if (t_7 < n_1) 226 | { 227 | int64_t const t_9 = a_2[t_7]; 228 | if (x_5) 229 | { 230 | int64_t const t_10 = x_4; 231 | x_4 = t_9; 232 | if (!(t_10 == t_9)) 233 | { 234 | int64_t const t_11 = x_3; 235 | x_3 = 0; 236 | printf("%ld\n",t_10); 237 | printf("%ld\n",t_11 + 1); 238 | } 239 | else 240 | x_3++; 241 | } 242 | else { 243 | x_4 = t_9; 244 | x_5 = true; 245 | } 246 | } 247 | else {if (x_5) 248 | { 249 | int64_t const t_8 = x_3; 250 | x_3 = 0; 251 | printf("%ld\n",x_4); 252 | printf("%ld\n",t_8 + 1); 253 | }} 254 | } 255 | } 256 | *) 257 | 258 | 259 | let _ = print_endline "\nAll done" 260 | --------------------------------------------------------------------------------