├── .coveragerc
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── c_test_environment
    ├── .gitignore
    ├── Makefile
    ├── c_index_strings.py
    ├── clang_datalog_tests.py
    ├── clang_myrial_tests.py
    ├── clang_processor.py
    ├── compare.py
    ├── convert2bin.h
    ├── convert2bin.py
    ├── counters_util.cc
    ├── counters_util.h
    ├── dates.cc
    ├── dates.h
    ├── dates_test.cc
    ├── generate_test_relations.py
    ├── grappa_detect_new_files.sh
    ├── grappa_ingest.py
    ├── grappalang_myrial_tests.py
    ├── grappalang_tests.py
    ├── hash.h
    ├── io_util.cc
    ├── io_util.h
    ├── old_query.cc
    ├── old_query2.cc
    ├── osutils.py
    ├── query.cc
    ├── radish_utils.cc
    ├── radish_utils.h
    ├── rdf-strings2ints.rb
    ├── repr_myrial_tests.py
    ├── run_query.py
    ├── strings.cc
    ├── strings.h
    ├── testqueries
    │   ├── aggregate_count.sql
    │   ├── aggregate_count_group_one.sql
    │   ├── aggregate_count_group_one_notgroup_filtered_one.sql
    │   ├── aggregate_count_group_one_notgroup_one.sql
    │   ├── aggregate_double.sql
    │   ├── aggregate_max.sql
    │   ├── aggregate_min.sql
    │   ├── aggregate_of_binop.sql
    │   ├── aggregate_of_binop_double.sql
    │   ├── aggregate_of_binop_no_key_unionall_double.sql
    │   ├── aggregate_string.sql
    │   ├── aggregate_sum.sql
    │   ├── apply.sql
    │   ├── apply_and_self_join.sql
    │   ├── argmax.myl
    │   ├── argmax_all_uda.sql
    │   ├── argmax_uda.sql
    │   ├── builtin_and_UDA.sql
    │   ├── common_index_allowed.sql
    │   ├── common_index_disallowed.sql
    │   ├── countstar_string.sql
    │   ├── directed_squares.sql
    │   ├── directed_triangles.sql
    │   ├── expr_singleton.sql
    │   ├── few_col_store.sql
    │   ├── groupby_string_key.sql
    │   ├── groupby_string_multi_key.sql
    │   ├── head_scalar_op.sql
    │   ├── idivide.sql
    │   ├── join.sql
    │   ├── join_of_aggregate_of_join.sql
    │   ├── join_of_two_unionalls.sql
    │   ├── join_string_key.sql
    │   ├── join_string_val.sql
    │   ├── join_swap_indexing.sql
    │   ├── join_then_aggregate.sql
    │   ├── join_two_types.sql
    │   ├── like_begin.sql
    │   ├── like_begin_end.sql
    │   ├── like_end.sql
    │   ├── like_middle.sql
    │   ├── matrix_mult.sql
    │   ├── more_col_store.sql
    │   ├── multi_builtin.sql
    │   ├── project_string.sql
    │   ├── q2.sql
    │   ├── scan.sql
    │   ├── select.sql
    │   ├── select_conjunction.sql
    │   ├── select_double.sql
    │   ├── select_string.sql
    │   ├── select_string_literal.sql
    │   ├── select_then_join.sql
    │   ├── self_join.sql
    │   ├── self_three_path.sql
    │   ├── singleton_constant.sql
    │   ├── store.sql
    │   ├── string_join.sql
    │   ├── swap.sql
    │   ├── test_join_of_two_aggregates.sql
    │   ├── three_path.sql
    │   ├── three_way_three_key_hash_join.sql
    │   ├── two_hop.sql
    │   ├── two_join_switch.sql
    │   ├── two_key_hash_join.sql
    │   ├── two_key_hash_join_swap.sql
    │   ├── two_path.sql
    │   ├── two_var_select.sql
    │   ├── unionall.sql
    │   ├── unionall_3.sql
    │   ├── unionall_apply_and_self_join.sql
    │   ├── unionall_of_join.sql
    │   ├── unionall_then_aggregate.sql
    │   ├── unionall_then_join.sql
    │   ├── while.sql
    │   ├── while_repeat_groupby.sql
    │   ├── while_repeat_join.sql
    │   ├── while_union_all.sql
    │   └── zero_store.sql
    ├── testquery.py
    ├── timing.h
    └── verifier.py
├── docs
    └── index.md
├── examples
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── bad_column_name.myl
    ├── cast.myl
    ├── catalog.py
    ├── chained.myl
    ├── clog.py
    ├── clog.sh
    ├── connected_components.myl
    ├── crossmatch_2d.myl
    ├── deadcode.myl
    ├── deadcode2.myl
    ├── dept.csv
    ├── emp.csv
    ├── example_queries.py
    ├── grappa_test_query.py
    ├── grappalog.py
    ├── grappalog.sh
    ├── groupby1.myl
    ├── groupby2.myl
    ├── groupby3.myl
    ├── helloworld.py
    ├── iteration.myl
    ├── join.myl
    ├── join.sql
    ├── kmeans.myl
    ├── language_demo.myl
    ├── lineage.myl
    ├── load_options.csv
    ├── load_opts.myl
    ├── naivebayes
    │   ├── .gitignore
    │   ├── buckets.myl
    │   ├── cat_scheme
    │   ├── catalog.py
    │   ├── create_scheme.py
    │   ├── generate_parse.py
    │   ├── naivebayes_classify.myl
    │   ├── naivebayes_train.myl
    │   ├── nb_classify.myl
    │   ├── nb_train.myl
    │   ├── prepare_test.sh
    │   └── prepare_training.sh
    ├── noschema.myl
    ├── openmp_examples
    │   ├── A.h
    │   ├── DictOut.hpp
    │   ├── Makefile
    │   ├── igor_omp_tri.rb
    │   └── triangle_with_tuples.cpp
    ├── pagerank.myl
    ├── pagerank_dead.myl
    ├── pairwise_distances.myl
    ├── rdfsimple.myl
    ├── reachable.myl
    ├── samplescan.myl
    ├── seaflow.myl
    ├── seaflow2.myl
    ├── sigma-clipping-v0.myl
    ├── sigma-clipping.myl
    ├── sigma_clipping_points.txt
    ├── sp2bench.py
    ├── sp2bench
    │   ├── catalog.py
    │   ├── q1.dlg
    │   ├── q1.myl
    │   ├── q2.myl
    │   ├── q3.myl
    │   ├── q4.myl
    │   ├── sp2bench_rdf.py
    │   ├── sp2bench_rdf_brackets.py
    │   └── sp2bench_rdf_long.py
    ├── standalone.myl
    ├── tipsy.myl
    ├── uda.myl
    └── worker_id.myl
├── raco
    ├── __init__.py
    ├── algebra.py
    ├── backends
    │   ├── __init__.py
    │   ├── backend_common.py
    │   ├── cpp
    │   │   ├── __init__.py
    │   │   ├── c_templates
    │   │   │   ├── ascii_scan.cpp
    │   │   │   ├── base_query.cpp
    │   │   │   ├── clang_group_timing.cpp
    │   │   │   ├── clang_pipeline_timing.cpp
    │   │   │   ├── groupby
    │   │   │   │   ├── 0key_declaration.cpp
    │   │   │   │   ├── 0key_materialize.cpp
    │   │   │   │   ├── 0key_scan.cpp
    │   │   │   │   ├── 1key_declaration.cpp
    │   │   │   │   ├── 1key_materialize.cpp
    │   │   │   │   ├── 1key_scan.cpp
    │   │   │   │   ├── 2key_declaration.cpp
    │   │   │   │   ├── 2key_materialize.cpp
    │   │   │   │   └── 2key_scan.cpp
    │   │   │   ├── hashjoin
    │   │   │   │   ├── hash_declaration.cpp
    │   │   │   │   ├── insert_materialize.cpp
    │   │   │   │   └── lookup.cpp
    │   │   │   ├── materialized_tuple_ref_additional.cpp
    │   │   │   ├── memory_scan.cpp
    │   │   │   ├── relation_declaration.cpp
    │   │   │   └── string_index_lookup.cpp
    │   │   ├── cbase_templates
    │   │   │   ├── assignment.cpp
    │   │   │   ├── group_timing.cpp
    │   │   │   ├── materialized_tuple_create_one.cpp
    │   │   │   ├── materialized_tuple_create_two.cpp
    │   │   │   ├── materialized_tuple_ref.cpp
    │   │   │   ├── output_stream_close.cpp
    │   │   │   ├── output_stream_decl.cpp
    │   │   │   ├── output_stream_open.cpp
    │   │   │   ├── output_stream_write.cpp
    │   │   │   ├── pipeline_timing.cpp
    │   │   │   ├── select.cpp
    │   │   │   ├── tuple_declaration.cpp
    │   │   │   ├── tuple_type_convert.cpp
    │   │   │   └── write_count.cpp
    │   │   ├── cpp.py
    │   │   ├── cppcommon.py
    │   │   └── operator_at_a_time_c_templates
    │   │   │   ├── ascii_scan.template
    │   │   │   ├── base_query.template
    │   │   │   ├── binary_scan.template
    │   │   │   ├── emit_joined_tuple.template
    │   │   │   ├── filtering_nestedloop_hashjoin_chain.template
    │   │   │   ├── filtering_nestedloop_join.template
    │   │   │   ├── filtering_nestedloop_join_chain.template
    │   │   │   ├── filteringhashjoin.template
    │   │   │   ├── hashjoin.template
    │   │   │   ├── join_simple_hash_twopass.template
    │   │   │   ├── precount_select.template
    │   │   │   ├── scan.template
    │   │   │   └── select_simple_twopass.template
    │   ├── logical.py
    │   ├── myria
    │   │   ├── __init__.py
    │   │   ├── catalog.py
    │   │   ├── connection.py
    │   │   ├── errors.py
    │   │   ├── myria.py
    │   │   └── tests
    │   │   │   ├── test_error.py
    │   │   │   └── test_myria_execution.py
    │   ├── radish
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── grappa_templates
    │   │   │   ├── base_query.cpp
    │   │   │   ├── define_cl_arg.cpp
    │   │   │   ├── define_metric.cpp
    │   │   │   ├── file_scan.cpp
    │   │   │   ├── gce_app_metric.cpp
    │   │   │   ├── global_array_memory_scan.cpp
    │   │   │   ├── global_array_relation_declaration.cpp
    │   │   │   ├── graph_file_scan.cpp
    │   │   │   ├── grappa_group_timing.cpp
    │   │   │   ├── grappa_pipeline_timing.cpp
    │   │   │   ├── groupby
    │   │   │   │   ├── 0key_output.cpp
    │   │   │   │   ├── combine_definition.cpp
    │   │   │   │   ├── init_definition.cpp
    │   │   │   │   ├── multi_uda_0key_output.cpp
    │   │   │   │   ├── multi_uda_0key_update.cpp
    │   │   │   │   ├── multi_uda_scan.cpp
    │   │   │   │   ├── nkey_update.cpp
    │   │   │   │   ├── one_built_in_0key_output.cpp
    │   │   │   │   ├── one_built_in_0key_update.cpp
    │   │   │   │   ├── one_built_in_scan.cpp
    │   │   │   │   ├── scan.cpp
    │   │   │   │   ├── update_definition.cpp
    │   │   │   │   ├── withkey_decl.cpp
    │   │   │   │   ├── withkey_init.cpp
    │   │   │   │   └── withoutkey_init.cpp
    │   │   │   ├── hashjoin
    │   │   │   │   ├── hash_declaration.cpp
    │   │   │   │   ├── hash_init.cpp
    │   │   │   │   ├── insert_materialize.cpp
    │   │   │   │   └── lookup.cpp
    │   │   │   ├── input_relation_declarations.cpp
    │   │   │   ├── iterators
    │   │   │   │   ├── 0key_groupby_source.cpp
    │   │   │   │   ├── apply.cpp
    │   │   │   │   ├── broadcast_stream.cpp
    │   │   │   │   ├── hashjoin_sink.cpp
    │   │   │   │   ├── hashjoin_source.cpp
    │   │   │   │   ├── instantiate_operator.cpp
    │   │   │   │   ├── instantiate_sink.cpp
    │   │   │   │   ├── multikey_groupby_sink.cpp
    │   │   │   │   ├── multikey_groupby_source.cpp
    │   │   │   │   ├── partition_groupby
    │   │   │   │   │   └── multikey_groupby_sink.cpp
    │   │   │   │   ├── select.cpp
    │   │   │   │   ├── sink_declaration.cpp
    │   │   │   │   └── withkey_init.cpp
    │   │   │   ├── partition_groupby
    │   │   │   │   └── nkey_update.cpp
    │   │   │   ├── shuffle.cpp
    │   │   │   ├── shufflehashjoin
    │   │   │   │   ├── delete.cpp
    │   │   │   │   ├── hash_init.cpp
    │   │   │   │   ├── materialize.cpp
    │   │   │   │   ├── reduce.cpp
    │   │   │   │   └── result_scan.cpp
    │   │   │   ├── spawn.cpp
    │   │   │   ├── string_index_lookup.cpp
    │   │   │   ├── symmetric_array_file_scan.cpp
    │   │   │   ├── symmetric_array_memory_scan.cpp
    │   │   │   ├── symmetric_array_relation_declaration.cpp
    │   │   │   ├── symmetric_array_relation_materialize.cpp
    │   │   │   ├── symmetric_array_temprelation_declaration.cpp
    │   │   │   ├── symmetric_array_temprelation_init.cpp
    │   │   │   ├── symmetric_array_temprelation_materialize.cpp
    │   │   │   ├── symmetric_array_temprelation_materializer_done.cpp
    │   │   │   ├── symmetric_array_temprelation_recycle.cpp
    │   │   │   ├── symmetrichashjoin
    │   │   │   │   ├── hash_declaration.cpp
    │   │   │   │   ├── hash_init.cpp
    │   │   │   │   └── hash_insert_lookup.cpp
    │   │   │   ├── sync_declaration.cpp
    │   │   │   └── wait_statement.cpp
    │   │   └── radish.py
    │   ├── sparql
    │   │   ├── __init__.py
    │   │   └── sparql.py
    │   └── sql
    │   │   ├── __init__.py
    │   │   ├── catalog.py
    │   │   ├── test_case.py
    │   │   └── test_sql.py
    ├── catalog.py
    ├── catalog_tests
    │   ├── default_cardinality_relation.py
    │   ├── set_cardinality_relation.py
    │   └── test_catalog.py
    ├── clangtestdb.py
    ├── clib
    │   ├── algorithms.h
    │   ├── boolean.cc
    │   ├── boolean.h
    │   ├── testboolean.cc
    │   └── testboolean.o
    ├── compile.py
    ├── cpp_datalog_utils.py
    ├── datalog
    │   ├── __init__.py
    │   ├── datalog_test.py
    │   ├── grammar.py
    │   ├── model.py
    │   └── query_tests.py
    ├── datastructure
    │   ├── UnionFind.py
    │   ├── __init__.py
    │   └── test_union_find.py
    ├── dbconn.py
    ├── expression
    │   ├── __init__.py
    │   ├── aggregate.py
    │   ├── boolean.py
    │   ├── expression.py
    │   ├── expressions_library.py
    │   ├── function.py
    │   ├── statevar.py
    │   ├── udf.py
    │   ├── util.py
    │   └── visitor.py
    ├── fake_data.py
    ├── fakedb.py
    ├── from_repr.py
    ├── myrial
    │   ├── __init__.py
    │   ├── cfg.py
    │   ├── cfg_test.py
    │   ├── cli_test.py
    │   ├── emitarg.py
    │   ├── empty_aggregate_tests.py
    │   ├── exceptions.py
    │   ├── filescan_tests.py
    │   ├── groupby.py
    │   ├── interpreter.py
    │   ├── keywords.py
    │   ├── kmeans_test.py
    │   ├── multiway.py
    │   ├── myrial_test.py
    │   ├── optimizer_tests.py
    │   ├── pagerank_test.py
    │   ├── parser.py
    │   ├── query_tests.py
    │   ├── reachable_tests.py
    │   ├── sample_test.py
    │   ├── scanner.py
    │   ├── setop_tests.py
    │   ├── sigma_clipping_test.py
    │   └── type_tests.py
    ├── nary_join_rules_test.py
    ├── operator_test.py
    ├── pipelines.py
    ├── platform_tests.py
    ├── python
    │   ├── __init__.py
    │   ├── convert.py
    │   ├── exceptions.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── boolean_test.py
    │   │   ├── convert_tests.py
    │   │   ├── decompile_function_test.py
    │   │   ├── decompile_lambda_test.py
    │   │   ├── functions_test.py
    │   │   ├── operators_test.py
    │   │   ├── projection_test.py
    │   │   ├── python_test.py
    │   │   ├── syntax_test.py
    │   │   └── udf_test.py
    │   └── util
    │   │   ├── __init__.py
    │   │   ├── decompile.py
    │   │   └── visitor.py
    ├── relation_key.py
    ├── replace_with_repr.py
    ├── representation.py
    ├── rules.py
    ├── scheme.py
    ├── sparql_tests.py
    ├── sqllite_test.py
    ├── test_style.py
    ├── test_utility.py
    ├── tests.py
    ├── types.py
    ├── utility.py
    └── viz.py
├── requirements-dev.txt
├── scripts
    ├── myrial
    ├── see-rules
    └── simple_raco_execution.py
└── setup.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source = raco


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.swp
 3 | *~
 4 | output.json
 5 | .DS_Store
 6 | parser.out
 7 | parsetab.py
 8 | build
 9 | raco.egg-info
10 | .coverage
11 | .noseids
12 | .idea/
13 | *.dot
14 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "2.7"
 4 | addons:
 5 |   apt:
 6 |     packages:
 7 |     - clang-3.6
 8 |     - lldb-3.6
 9 |     - libc++-dev
10 |     - libc++abi-dev
11 | install:
12 |   - pip install pip --upgrade
13 |   - pip install setuptools --upgrade
14 |   - pip install -r requirements-dev.txt
15 |   - python setup.py install
16 |   - pip install coveralls
17 |   - export CXX=clang++
18 |   - export CXXFLAGS="-stdlib=libc++"
19 |   - export LDFLAGS=-lc++abi
20 | script: nosetests --with-coverage --cover-package=raco --logging-level=WARN
21 | after_success:
22 |   - coveralls
23 | sudo: false
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012-2016 University of Washington
 2 | All rights reserved.
 3 |  
 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 5 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 6 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 7 | Neither the name of the University of Washington nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
 8 |  
 9 | THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF WASHINGTON AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF WASHINGTON OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
10 | 


--------------------------------------------------------------------------------
/c_test_environment/.gitignore:
--------------------------------------------------------------------------------
 1 | *.exe
 2 | *.o
 3 | *.a
 4 | R1
 5 | R2
 6 | R3
 7 | S1
 8 | S2
 9 | S3
10 | T1
11 | T2
12 | T3
13 | I1
14 | I2
15 | I3
16 | D1
17 | D2
18 | D3
19 | *.dot
20 | *.cpp
21 | sp2b.100t*
22 | sp2bench_1m*
23 | tmp/
24 | *.out
25 | *.db
26 | Index
27 | Str
28 | *.ttl
29 | edges
30 | *.ps
31 | *.pdf
32 | log*.rb
33 | *.cpp.*
34 | test.txt
35 | *store
36 | importTestData.sql
37 | 


--------------------------------------------------------------------------------
/c_test_environment/Makefile:
--------------------------------------------------------------------------------
 1 | # WARNING: name environment source files .h or .cc, but not .cpp, which is reserved (.gitignore laziness)
 2 | 
 3 | CXX ?= g++
 4 | CXXFLAGS += -ggdb -std=c++11 -O3
 5 | #-O3 #-m64 -Wno-deprecated -fPIC
 6 | 
 7 | ifneq ($(shell uname), Darwin)
 8 | LIBS = -lrt
 9 | endif
10 | 
11 | INCL = 
12 | COMP = $(CXX) $(CXXFLAGS) -c $< $(INCL)
13 | LINK = $(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
14 | COMPLINK = $(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS) $@.cc
15 | 
16 | 
17 | # for 'triangle'
18 | CSWITCHES = -O -DLINUX -I/usr/X11R6/include -L/usr/X11R6/lib
19 | TRILIBDEFS = -DTRILIBRARY
20 | 
21 | #all: 
22 | 
23 | %.convert: %.convert.cpp convert2bin.h
24 | 	$(CXX) $(CXXFLAGS) -o $@ $<
25 | 
26 | %.exe: %.o io_util.o counters_util.o strings.o
27 | 	$(LINK) 
28 | 
29 | clean:
30 | 	rm -f *.o *.exe
31 | 
32 | io_util.o : io_util.cc io_util.h
33 | 	$(COMP) 
34 | 
35 | counters_util.o : counters_util.cc counters_util.h
36 | 	$(COMP) 
37 | 
38 | strings.o : strings.cc strings.h
39 | 	$(COMP) 
40 | 
41 | dates.o : dates.cc dates.h
42 | 	$(COMP)
43 | 
44 | radish_utils.o : radish_utils.cc radish_utils.h
45 | 	$(COMP) 
46 | 
47 | %.o : %.cpp 
48 | 	$(COMP) 
49 | 
50 | dates_test : dates.o dates_test.cc
51 | 	$(LINK)
52 | 
53 | run_dates_test : dates_test
54 | 	./dates_test
55 | 
56 | libracoc.a: strings.o radish_utils.o dates.o
57 | 	ar rcs $@ $^
58 | 


--------------------------------------------------------------------------------
/c_test_environment/c_index_strings.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import sys
 3 | 
 4 | #TODO take a schema as input
 5 | 
 6 | 
 7 | class WordIndexer:
 8 |     def __init__(self, indexf):
 9 |         self.words = {}
10 |         self.count = 0
11 |         self.indexfw = open(indexf, 'w')
12 | 
13 |     def add_word(self, w):
14 |         if w in self.words:
15 |             return self.words[w]
16 |         else:
17 |             self.indexfw.write(w+'\n')
18 |             t = self.count
19 |             self.count += 1
20 |             self.words[w] = t
21 |             return t
22 | 
23 |     def close(self):
24 |         self.indexfw.close()
25 | 
26 | 
27 | def indexing(inputf, delim_in):
28 |     intfile = inputf + '.i'
29 |     indexf = inputf + '.index'
30 |     delim_out = ' '
31 | 
32 |     wi = WordIndexer(indexf)
33 |     with open(inputf, 'r') as ins:
34 |         reader = csv.reader(ins, delimiter=delim_in)
35 |         with open(intfile, 'w') as outs:
36 |             writer = csv.writer(outs, delimiter=delim_out)
37 |             for row in reader:
38 |                 cols = [wi.add_word(w) for w in row]
39 |                 writer.writerow(cols)
40 | 
41 |     wi.close()
42 |     return intfile, indexf
43 | 
44 | 
45 | if __name__ == '__main__':
46 |     if len(sys.argv) < 2:
47 |         raise Exception("usage: %s inputfile [delim]" % sys.argv[0])
48 | 
49 |     if len(sys.argv) == 3:
50 |         delim = sys.argv[2]
51 |     else:
52 |         delim = ' '
53 | 
54 |     indexing(sys.argv[1], delim_in=delim)
55 | 
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/c_test_environment/clang_datalog_tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from testquery import checkquery, checkstore
 3 | from testquery import ClangRunner
 4 | from generate_test_relations import generate_default
 5 | from generate_test_relations import need_generate
 6 | import raco.backends.cpp as clang
 7 | import raco.backends.cpp.cppcommon as cppcommon
 8 | from raco.platform_tests import DatalogPlatformTest
 9 | 
10 | import sys
11 | sys.path.append('./examples')
12 | from osutils import Chdir
13 | from raco.cpp_datalog_utils import emitCode
14 | import os
15 | 
16 | 
17 | class DatalogClangTest(unittest.TestCase, DatalogPlatformTest):
18 |     def check(self, query, name):
19 |         with Chdir("c_test_environment") as d:
20 |             os.remove("%s.cpp" % name) if os.path.exists("%s.cpp" % name) else None
21 |             emitCode(query, name, clang.CCAlgebra)
22 |             checkquery(name, ClangRunner())
23 | 
24 |     def check_file(self, query, name):
25 |         with Chdir("c_test_environment") as d:
26 |             os.remove("%s.cpp" % name) if os.path.exists("%s.cpp" % name) else None
27 |             emitCode(query, name, clang.CCAlgebra, emit_print=cppcommon.EMIT_FILE)
28 |             checkstore(name, ClangRunner())
29 | 
30 |     def setUp(self):
31 |         with Chdir("c_test_environment") as d:
32 |             if need_generate():
33 |                 generate_default()
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     unittest.main()
38 | 


--------------------------------------------------------------------------------
/c_test_environment/clang_myrial_tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from testquery import checkquery
 3 | from testquery import ClangRunner
 4 | from generate_test_relations import generate_default
 5 | from generate_test_relations import need_generate
 6 | from raco.backends.cpp import CCAlgebra
 7 | from raco.platform_tests import MyriaLPlatformTestHarness, MyriaLPlatformTests
 8 | from raco.compile import compile
 9 | 
10 | import sys
11 | sys.path.append('./examples')
12 | from osutils import Chdir
13 | import os
14 | 
15 | import raco.viz as viz
16 | 
17 | #import logging
18 | #logging.basicConfig(level=logging.DEBUG)
19 | 
20 | 
21 | class MyriaLClangTest(MyriaLPlatformTestHarness, MyriaLPlatformTests):
22 |     def check(self, query, name, **kwargs):
23 |         kwargs['target_alg'] = CCAlgebra()
24 |         plan = self.get_physical_plan(query, **kwargs)
25 |         physical_dot = viz.operator_to_dot(plan)
26 |         with open(os.path.join("c_test_environment", "%s.physical.dot"%(name)), 'w') as dwf:
27 |             dwf.write(physical_dot)
28 | 
29 |         # generate code in the target language
30 |         code = compile(plan)
31 | 
32 |         fname = os.path.join("c_test_environment", "{name}.cpp".format(name=name))
33 |         if os.path.exists(fname):
34 |             os.remove(fname)
35 |         with open(fname, 'w') as f:
36 |             f.write(code)
37 | 
38 |         with Chdir("c_test_environment") as d:
39 |             checkquery(name, ClangRunner())
40 | 
41 |     def setUp(self):
42 |         super(MyriaLClangTest, self).setUp()
43 |         with Chdir("c_test_environment") as d:
44 |             if need_generate():
45 |                 generate_default()
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     unittest.main()
50 | 


--------------------------------------------------------------------------------
/c_test_environment/clang_processor.py:
--------------------------------------------------------------------------------
 1 | import raco.myrial.parser as parser
 2 | import raco.myrial.interpreter as interpreter
 3 | import raco.compile
 4 | 
 5 | 
 6 | class ClangProcessor:
 7 |     def __init__(self, catalog):
 8 |         self.parser = parser.Parser()
 9 |         self.processor = interpreter.StatementProcessor(catalog)
10 | 
11 |     def get_plan(self, query, **kwargs):
12 |         """Get the MyriaL query plan for a query"""
13 |         statements = self.parser.parse(query)
14 |         self.processor.evaluate(statements)
15 |         if kwargs.get('logical', False):
16 |             return self.processor.get_logical_plan(**kwargs)
17 |         else:
18 |             return self.processor.get_physical_plan(**kwargs)
19 | 
20 |     def get_physical_plan(self, query, **kwargs):
21 |         """Get the physical plan for a MyriaL query"""
22 |         kwargs['logical'] = False
23 |         return self.get_plan(query, **kwargs)
24 | 
25 |     def get_source_code(self, query, **kwargs):
26 |         plan = self.get_physical_plan(query, kwargs)
27 | 
28 |         # generate code in the target language
29 |         return raco.compile.compile(plan)
30 | 
31 |     def write_source_code(self, query, basename, **kwargs):
32 |         code = self.get_source_code(query, kwargs)
33 |         with open(basename+'.cpp', 'w') as f:
34 |             f.write(code)
35 | 


--------------------------------------------------------------------------------
/c_test_environment/compare.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | with open('sql.ans', 'r') as f1:
 4 |     with open('code.ans', 'r') as f2:
 5 |         f1csv = csv.reader(f1, delimiter=',')
 6 |         f2csv = csv.reader(f2, delimiter=',')
 7 | 
 8 |         f1results = {}
 9 | 
10 |         for row in f1csv:
11 |             t = tuple(row)    
12 |             v = 0
13 |             if t in f1results:
14 |                 v = f1results[t]
15 | 
16 |             f1results[t] = v+1
17 | 
18 |         for row in f2csv:
19 |             t = tuple(row)
20 |             
21 |             if t in f1results:
22 |                 if f1results[t]==1:
23 |                     del f1results[t]
24 |                 else:
25 |                     v = f1results[t]
26 |                     f1results[t] = v-1
27 |             else:
28 |                 print t,"is not in sql"
29 |                 assert False
30 | 
31 |         for t in f1results:
32 |             print t,"is not in code"
33 |             assert False
34 | 
35 | print "success!"
36 | 
37 |             
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/c_test_environment/convert2bin.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <sstream>
 3 | #include <fstream>
 4 | #include <cstdint>
 5 | #include <string>
 6 | #include <iostream>
 7 | #include <cstdlib>
 8 | 
 9 | void CHECK(bool cond, std::string s) {
10 |   if (!cond) {
11 |     std::cerr << "assertion failed: " << s << std::endl;
12 |     exit(1);
13 |   }
14 | }
15 | 
16 | 
17 | template< typename Tuple >
18 | void convert2bin_withTuple( std::string fn, char delim=' ', uint64_t burn=0, int add_id=0) {
19 |   std::ifstream infile(fn, std::ifstream::in);
20 |   CHECK( infile.is_open(),  fn + " failed to open");
21 |   
22 |   std::string outpath = fn+".bin";
23 |   std::ofstream outfile(outpath, std::ios_base::out | std::ios_base::binary );
24 |   CHECK( outfile.is_open(),  outpath + " failed to open");
25 |   
26 |   int64_t linenum = 0;
27 |   while( infile.good() ) {
28 |     std::string line;
29 |     std::getline( infile, line );
30 |     if (line.length() == 0) break; // takes care of EOF
31 | 
32 |     std::istringstream iss(line);
33 |     auto t = Tuple::fromIStream(iss, delim);
34 |    
35 |     // add a sequential id to the data
36 |     if (add_id) {
37 |       outfile.write((char*)&linenum, sizeof(int64_t));
38 |     }
39 |  
40 |     outfile.write((char*) &(t.f0), Tuple::fieldsSize()); 
41 |     linenum++;
42 |   }
43 |   infile.close();
44 |   outfile.close();
45 |   std::cout << "binary: " << outpath << std::endl;
46 |   std::cout << "rows: " << linenum << std::endl;
47 |   std::cout << "cols: " << Tuple::numFields() << std::endl;
48 |   std::cout << "tuple size: " << Tuple::fieldsSize() << std::endl;
49 |   if (add_id) {
50 |     std::cout << "  + 1 column for id" << std::endl;
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/c_test_environment/convert2bin.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import argparse
 3 | import sys
 4 | 
 5 | from raco.catalog import FromFileCatalog
 6 | from raco.backends.cpp.cppcommon import StagedTupleRef
 7 | 
 8 | """
 9 | given a schema, creates a C++ program to convert csv data to a binary format
10 | """
11 | 
12 | template = """
13 | #include <tuple>
14 | #include <string>
15 | #include <cstring>
16 | #include <cstdint>
17 | #include <sstream>
18 | #include <vector>
19 | #include <cstdlib>
20 | #include <iostream>
21 | #include "convert2bin.h"
22 | #include "radish_utils.h"
23 | #include "strings.h"
24 | 
25 | {definition}
26 | 
27 | int main(int argc, char * const argv[]) {{
28 |     if (argc < 4) {{
29 |         std::cerr << "usage: " << argv[0] << " [file] [delim char] [burns] [add_id?]" << std::endl;
30 |         exit(1);
31 |     }}
32 | 
33 |     convert2bin_withTuple<{typ}>(argv[1], argv[2][0], atoi(argv[3]), atoi(argv[4]));
34 | }}
35 | """
36 | 
37 | 
38 | def generate_tuple_class(rel_key, cat):
39 |    sch = cat.get_scheme(rel_key)
40 |    tupleref = StagedTupleRef(None, sch)
41 |    definition = tupleref.generateDefinition()
42 |    outfnbase = rel_key.split(':')[2]
43 |    cpp_name = "{0}.convert.cpp".format(outfnbase)
44 |    with open(cpp_name, 'w') as outf:
45 |        outf.write(template.format(definition=definition, typ=tupleref.getTupleTypename()))
46 | 
47 |    subprocess.check_output(["make", "{fn}.convert".format(fn=outfnbase)])
48 |    return cpp_name
49 | 
50 | 
51 | def generate_tuple_class_from_file(name, catpath):
52 |     cat = FromFileCatalog.load_from_file(catpath)
53 | 
54 |     if name is not None:
55 |         rel_key = "public:adhoc:{0}".format(name)
56 |         return cat, rel_key, generate_tuple_class(rel_key, cat)
57 |     else:
58 |         return cat, [(n, generate_tuple_class(n, cat)) for n in cat.get_keys()]
59 | 
60 | 
61 | if __name__ == "__main__":
62 | 
63 |     p = argparse.ArgumentParser(prog=sys.argv[0])
64 |     p.add_argument("-n", dest="name", help="name of relation [optional]. If not specified then will convert whole catalog")
65 |     p.add_argument("-c", dest="catpath", help="path of catalog file, see FromFileCatalog for format", required=True)
66 | 
67 |     args = p.parse_args(sys.argv[1:])
68 |     generate_tuple_class_from_file(args.name, args.catpath)
69 |    
70 | 


--------------------------------------------------------------------------------
/c_test_environment/counters_util.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // How to use the counters:
 4 | // 1) malloc space for as many counters as you need
 5 | //    int numCounters = 7;
 6 | //    int *counters = mallocCounterMemory(numCounters);
 7 | // 2) Call getCounters(), specifying the particular address for storing the
 8 | //    counter values. Increment currCounter so that you don't overwrite values.
 9 | //    int currCounter = 0;
10 | //    getCounters(counters, currCounter);
11 | //    currCounter = currCounter + 1; // 1
12 | // 3) Print the difference between the counters
13 | //    printDiffCounters(counters, numCounters);
14 | // 4) Free the memory storing the counter values
15 | //    free(counters);
16 | 
17 | int * mallocCounterMemory(int numCounters);
18 | void freeCounterMemory(int *counters);
19 | void getCounters(int *counters, int currCounter);
20 | void printDiffCounters(int *counters, int sz);
21 | void printCounters(int *counters, int sz); 
22 | 
23 | 


--------------------------------------------------------------------------------
/c_test_environment/dates.cc:
--------------------------------------------------------------------------------
 1 | #include "dates.h"
 2 | #include <ctime>
 3 | 
 4 | namespace dates {
 5 |   const uint32_t date_format_len = 11;
 6 | 
 7 |   uint64_t year(std::string date) {
 8 |     return std::stoi(date.substr(0, 4));
 9 |   }
10 |   
11 |   uint64_t month(std::string date) {
12 |     return std::stoi(date.substr(5, 2));
13 |   }
14 | 
15 |   uint64_t day(std::string date) {
16 |     return std::stoi(date.substr(8, 2));
17 |   }
18 | 
19 |   namespace impl {
20 |     std::string mkstrdate(const tm* timeptr) {
21 |       char r[date_format_len];
22 |       strftime(r, date_format_len, "%Y-%m-%d", timeptr);
23 |       return std::string(r);
24 |     }
25 |   }
26 | 
27 |   std::string add(std::string t, int64_t days) {
28 |     tm tc_ = tm(); // initialize fields to 0
29 |     tc_.tm_year = year(t)-1900;   // years since 1900
30 |     tc_.tm_mon = month(t)-1;     // months since january
31 |     tc_.tm_mday = day(t);       // day of the month
32 | 
33 |     tc_.tm_mday += days;
34 |     
35 |     // fix up the struct tm
36 |     std::mktime(&tc_);
37 | 
38 |     return impl::mkstrdate(&tc_);
39 |   }
40 | }
41 | 
42 |     
43 | 


--------------------------------------------------------------------------------
/c_test_environment/dates.h:
--------------------------------------------------------------------------------
 1 | #include <cstdint>
 2 | #include <string>
 3 | #include <array>
 4 | 
 5 | // date format is a string YYYY-MM-DD
 6 | //                         0123456789
 7 | // "1998-01-01"
 8 | 
 9 | namespace dates {
10 | 
11 |   uint64_t year(std::string date); 
12 |   uint64_t month(std::string date);
13 |   uint64_t day(std::string date);
14 | 
15 | // for passing an array
16 | // TODO implicit conversion to avoid this code, https://github.com/uwescience/raco/issues/454
17 |   template <size_t N>
18 |   uint64_t year(std::array<char, N> date) {
19 |     year(std::string(date.data()));
20 |   }
21 |   template <size_t N>
22 |   uint64_t month(std::array<char, N> date) {
23 |     month(std::string(date.data()));
24 |   }
25 |   template <size_t N>
26 |   uint64_t day(std::array<char, N> date) {
27 |     day(std::string(date.data()));
28 |   }
29 | 
30 |   std::string add(std::string t, int64_t days);
31 | }
32 | 
33 |     
34 | 


--------------------------------------------------------------------------------
/c_test_environment/dates_test.cc:
--------------------------------------------------------------------------------
 1 | #include "dates.h"
 2 | #include <iostream>
 3 | #include <string>
 4 | 
 5 | int main() {
 6 |   // test string manipulations from TPC-H Q1
 7 |   std::string d = "1998-12-01";
 8 |   std::string ct = dates::add(d, -60);
 9 |   
10 |   std::cout << d << " " << ct << std::endl;
11 | 
12 |   std::string in = "1998-01-01";
13 |   std::cout << in << " <= " << ct << " | " << (in <= ct) << std::endl;
14 | 
15 |   std::string in2 = "1998-11-29";
16 |   std::cout << in2 << " <= " << ct << " | " << (in2 <= ct) << std::endl;
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/c_test_environment/grappa_detect_new_files.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash 
2 | pushd $GRAPPA_HOME
3 | #./configure --gen=Make --mode=Release --cc=/sampa/share/distcc/gcc-4.7.2/bin/gcc --third-party=/sampa/share/grappa-third-party
4 | ./configure --gen=Make --mode=Release --cxx=`which g++` --third-party=/sampa/share/grappa-third-party/gcc-4.8.2 "$@"
5 | popd
6 | 


--------------------------------------------------------------------------------
/c_test_environment/grappalang_tests.py:
--------------------------------------------------------------------------------
 1 | from nose.plugins.skip import SkipTest
 2 | import os
 3 | from osutils import Chdir
 4 | import sys
 5 | import unittest
 6 | 
 7 | from raco.backends.radish import GrappaAlgebra
 8 | 
 9 | from generate_test_relations import generate_default, need_generate
10 | from raco.platform_tests import DatalogPlatformTest
11 | from testquery import checkquery, checkstore, GrappalangRunner
12 | 
13 | sys.path.append('./examples')
14 | from raco.cpp_datalog_utils import emitCode
15 | 
16 | 
17 | class DatalogGrappaTest(unittest.TestCase, DatalogPlatformTest):
18 |     def check(self, query, name):
19 |         with Chdir("c_test_environment") as d:
20 |             emitCode(query, 'grappa_%s' % name, GrappaAlgebra)
21 |             # TODO actually be able to check the query
22 |             raise SkipTest(query)
23 |             checkquery(name, GrappalangRunner(binary_input=False))
24 | 
25 |     def check_file(self, query, name):
26 |         # TODO implement this function
27 |         raise SkipTest(query)
28 | 
29 |     def setUp(self):
30 |         # TODO instead of returning, we should do something with GRAPPA_HOME
31 |         return
32 |         with Chdir("c_test_environment") as d:
33 |             targetpath = os.path.join(os.environ.copy()['GRAPPA_HOME'], 'build/Make+Release/applications/join')
34 |             if need_generate(targetpath):
35 |               generate_default(targetpath)
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     unittest.main()
40 | 


--------------------------------------------------------------------------------
/c_test_environment/io_util.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <string>
 3 | #include <sstream>
 4 | #include <sstream>
 5 | #include <vector>
 6 | #include <fstream>
 7 | 
 8 | // How to use the I/O utilities:
 9 | // 1) Inhale a particular file. Right now, expected to be a space separated
10 | //    char *filePath = "/scratch/tmp/...";
11 | //    struct relationInfo relInfo;
12 | //    struct relationInfo *ptr = binary_inhale(filePath, &relInfo);
13 | //    OR
14 | //    struct relationInfo *ptr = inhale(filePath, &relInfo);
15 | // 2) Manipulate the relation as you see fit.
16 | //    ...
17 | // 3) Free the memory for the relation
18 | //    free(relInfo.data);
19 | 
20 | double timer();
21 |   
22 | class RangeIter;
23 | class RangeIter {
24 |   private:
25 |     uint64_t num;
26 |     uint64_t next;
27 |   public:
28 |     RangeIter(uint64_t num, bool asEnd=false);
29 |     
30 |     uint64_t operator*();
31 | 
32 |     RangeIter& operator++();
33 | 
34 |     bool notequal(const RangeIter& o) const;
35 | };
36 | 
37 | class RangeIterable {
38 |   private:
39 |     uint64_t num;
40 |   public:
41 |     RangeIterable(uint64_t num);
42 | 
43 |     RangeIter begin();
44 |     RangeIter end();
45 | };
46 | 
47 |   
48 | struct relationInfo {
49 |   uint64 tuples;
50 |   uint64 fields;
51 |   int64 *relation;
52 | 
53 |   RangeIterable range() {
54 |     return RangeIterable(tuples);
55 |   }
56 | };
57 |       
58 | bool operator!=(const RangeIter& o1, const RangeIter& o2);
59 | bool operator==(const RangeIter& o1, const RangeIter& o2);
60 | 
61 | struct relationInfo *inhale(const char *path, struct relationInfo *relInfo);
62 | struct relationInfo *binary_inhale(const char *path, struct relationInfo *relInfo);
63 | 
64 | void printrelation(struct relationInfo *R);
65 | 
66 | 
67 | template<typename T>
68 | std::vector<T> tuplesFromAscii(const char *path) {
69 |   std::string pathst(path);
70 |   std::ifstream testfile(pathst, std::ifstream::in);
71 | 
72 |   std::vector<T> tuples;
73 | 
74 |   std::string line;
75 |   while (std::getline(testfile,line)) {
76 |     std::istringstream ss(line);
77 |     tuples.push_back(T::fromIStream(ss)); 
78 |   }
79 | 
80 |   // rely on RVO to avoid content copy
81 |   return tuples;
82 | }
83 | 
84 | void write_count(const char* path, uint64_t count);
85 |     
86 | 
87 | #define ZAPPA
88 | 
89 | 


--------------------------------------------------------------------------------
/c_test_environment/osutils.py:
--------------------------------------------------------------------------------
 1 | import errno
 2 | import os
 3 | 
 4 | 
 5 | class Chdir:         
 6 |   """
 7 |   Safe cd that is really a pushd then popd on leaving the scope
 8 |   """
 9 |   def __init__( self, newPath ):  
10 |     self._newPath = newPath
11 | 
12 |   def __enter__( self ):
13 |     self._savedPath = os.getcwd()
14 |     os.chdir(self._newPath)
15 | 
16 |   def __exit__( self, x, y, z ):
17 |     os.chdir( self._savedPath )
18 | 
19 | def mkdir_p(path):
20 |     try:
21 |         os.makedirs(path)
22 |     except OSError as exc: 
23 |         if exc.errno == errno.EEXIST and os.path.isdir(path):
24 |             pass
25 |         else: raise
26 | 


--------------------------------------------------------------------------------
/c_test_environment/radish_utils.cc:
--------------------------------------------------------------------------------
 1 | #include "radish_utils.h"
 2 | 
 3 | uint64_t identity_hash( int64_t k ) {
 4 |   return k;
 5 | }
 6 | 
 7 | uint64_t linear_hash( int64_t k) {
 8 |   return (73251599 * k + 110802387) % 98764321261;
 9 | }
10 | 
11 | static pairhash ph;
12 | uint64_t pair_hash( std::pair<int64_t, int64_t> k ) {
13 |   return ph.operator()<int64_t, int64_t>(k);
14 | }
15 | 


--------------------------------------------------------------------------------
/c_test_environment/repr_myrial_tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from generate_test_relations import generate_default
 3 | from generate_test_relations import need_generate
 4 | from raco.backends.myria import MyriaLeftDeepTreeAlgebra
 5 | from raco.platform_tests import MyriaLPlatformTestHarness, MyriaLPlatformTests
 6 | from raco.from_repr import plan_from_repr
 7 | 
 8 | import sys
 9 | sys.path.append('./examples')
10 | from osutils import Chdir
11 | 
12 | 
13 | class MyriaLReprTest(MyriaLPlatformTestHarness, MyriaLPlatformTests):
14 |     def check(self, query, name, **kwargs):
15 |         kwargs['target_alg'] = MyriaLeftDeepTreeAlgebra()
16 |         plan = self.get_physical_plan(query, **kwargs)
17 |         assert plan == plan_from_repr(repr(plan))
18 | 
19 |     def setUp(self):
20 |         super(MyriaLReprTest, self).setUp()
21 |         with Chdir("c_test_environment") as d:
22 |             if need_generate():
23 |                 generate_default()
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     unittest.main()
28 | 


--------------------------------------------------------------------------------
/c_test_environment/run_query.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """ Executes cpp runner and retrieves the results """
 4 | 
 5 | import argparse
 6 | import os
 7 | import sys
 8 | import subprocess
 9 | sys.path.append('./c_test_environment')
10 | from testquery import ClangRunner, GrappalangRunner
11 | import osutils
12 | 
13 | 
14 | def parse_options(args):
15 |     parser = argparse.ArgumentParser()
16 | 
17 |     parser.add_argument('platform', metavar='P', type=str,
18 |                         help='Type of platform to use: cpp or grappa', choices=['grappa', 'cpp'])
19 | 
20 |     parser.add_argument('file', help='File containing platform source program')
21 |     parser.add_argument('--query', help='File containing myrial query')
22 |     parser.add_argument('--catalog', help='File containing catalog')
23 | 
24 |     ns = parser.parse_args(args)
25 |     return ns
26 | 
27 | 
28 | from raco.backends.cpp import CCAlgebra
29 | from raco.backends.radish import GrappaAlgebra
30 | from raco.catalog import FromFileCatalog
31 | from raco.backends.cpp.cppcommon import EMIT_FILE
32 | from clang_processor import ClangProcessor
33 | 
34 | 
35 | def main(args):
36 |     opt = parse_options(args)
37 |     osutils.mkdir_p("logs")
38 |     abspath = os.path.abspath("logs")
39 |     name = opt.file
40 | 
41 |     if opt.query:
42 |         if opt.catalog is None:
43 |             raise Exception("--query also requires a --catalog")
44 | 
45 |         with open(opt.query, 'r') as f:
46 |             qt = f.read()
47 | 
48 |         target_alg = CCAlgebra(emit_print=EMIT_FILE)
49 |         if opt.platform == 'grappa':
50 |             target_alg = GrappaAlgebra(emit_print=EMIT_FILE)
51 |         ClangProcessor(FromFileCatalog.load_from_file(opt.catalog))\
52 |             .write_source_code(qt, name, target_alg=target_alg)
53 | 
54 |     if opt.platform == 'grappa':
55 |         runner = GrappalangRunner()
56 |         runner.run(name, abspath)
57 |     elif opt.platform == 'cpp':
58 |         try:
59 |             runner = ClangRunner()
60 |             runner.run(name, abspath)
61 |         except subprocess.CalledProcessError as e:
62 |             print 'cpp runner for %s failed' % (name)
63 |             print e.output
64 |             raise
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     main(sys.argv[1:])
69 | 


--------------------------------------------------------------------------------
/c_test_environment/strings.cc:
--------------------------------------------------------------------------------
 1 | #include "strings.h"
 2 | 
 3 | #include <stdexcept>
 4 | #include <fstream>
 5 | 
 6 |   
 7 | 
 8 | size_t StringIndex::size() const {
 9 |   return strings.size();
10 | }
11 | 
12 | StringIndex::StringIndex(const std::map<std::string, int64_t>& mapping) : strings(), indices() {
13 |   // mapping stores the strings in sorted order
14 |   // so this forms sorted vectors
15 |   for (auto p : mapping) {
16 |     strings.push_back(p.first);
17 |     indices.push_back(p.second);
18 |   }
19 | }
20 | 
21 | // This integer represents strings not in the database
22 | const int64_t DB_NON_EXISTANT_STRING = -1;
23 | int64_t StringIndex::string_lookup(const std::string& s) const {
24 |   // TODO: use trie structure instead of binary search
25 |   
26 |   auto ifound = QueryUtils::binary_search( this->strings.begin(), this->strings.end(), s);
27 |   if (ifound == this->strings.end()) {
28 |     return DB_NON_EXISTANT_STRING;
29 |   } else {
30 |     auto ind = (ifound - this->strings.begin());
31 |     return this->indices[ind];
32 |   }
33 | }
34 | 
35 | StringIndex::StringIndex() : strings(), indices() {}
36 | 
37 | StringIndex build_string_index(const std::string& indexfn) {
38 |   std::map<std::string, int64_t> str2int;
39 |   std::ifstream file( indexfn );
40 |   std::string line;
41 |   int64_t ln = 0;
42 |   while (getline( file, line )) {
43 |     str2int[line] = ln++;
44 |   }
45 | 
46 |   return StringIndex(str2int);
47 | }
48 |   
49 | std::regex compile_like_pattern(const std::string& pattern) {
50 |   // compile regex
51 |   std::stringstream ss;
52 |   for (auto c=pattern.begin(); c!=pattern.end(); ++c) {
53 |     if (*c == '%') {
54 |       ss << ".*";
55 |     } else {
56 |       ss << *c;
57 |     }
58 |   }
59 |   return std::regex(ss.str());
60 | }
61 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/aggregate_count.sql:
--------------------------------------------------------------------------------
1 | select COUNT(a) from R1;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/aggregate_count_group_one.sql:
--------------------------------------------------------------------------------
1 | select b, COUNT(a) from R2 group by b;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/aggregate_count_group_one_notgroup_filtered_one.sql:
--------------------------------------------------------------------------------
1 | select b, COUNT(a) from R3 where c < 5 group by b;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/aggregate_count_group_one_notgroup_one.sql:
--------------------------------------------------------------------------------
1 | select b, COUNT(a) from R3 group by b;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/aggregate_double.sql:
--------------------------------------------------------------------------------
1 | select a, SUM(b) from D2 group by a;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/aggregate_max.sql:
--------------------------------------------------------------------------------
1 | select MAX(a) from T2;


--------------------------------------------------------------------------------
/c_test_environment/testqueries/aggregate_min.sql:
--------------------------------------------------------------------------------
1 | select MIN(a) from T2;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/aggregate_of_binop.sql:
--------------------------------------------------------------------------------
1 | select SUM(a+b) from R2;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/aggregate_of_binop_double.sql:
--------------------------------------------------------------------------------
1 | select a, MAX(b-c) from D3 group by a;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/aggregate_of_binop_no_key_unionall_double.sql:
--------------------------------------------------------------------------------
1 | select MAX(b-c) from D3
2 | UNION ALL
3 | select MIN(c-b) from D3;
4 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/aggregate_string.sql:
--------------------------------------------------------------------------------
1 | select a, COUNT(b) from C3 group by a;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/aggregate_sum.sql:
--------------------------------------------------------------------------------
1 | select SUM(a) from R1;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/apply.sql:
--------------------------------------------------------------------------------
1 | select b from (select a, b from T2);
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/apply_and_self_join.sql:
--------------------------------------------------------------------------------
1 | select X.a, Y.b from (select a as a, c as b from T3 where b < 4) X,
2 |                      (select a as a, c as b from T3 where b < 4) Y
3 | where X.b=Y.a;
4 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/argmax.myl:
--------------------------------------------------------------------------------
 1 | -- arbitrarily favor new value in case of tie
 2 | def pickval(value, arg, _value, _arg):
 3 |     case when value >= _value then arg
 4 |         else _arg end;
 5 | 
 6 | uda ArgMax(outcome, lprob) {
 7 |    -- init
 8 |    [0 as _outcome, 0 as _lprob];
 9 |    
10 |    -- update
11 |    [pickval(lprob, outcome, _lprob, _outcome),
12 |     pickval(lprob, lprob, _lprob, _lprob)];
13 | 
14 |    -- output
15 |    [_lprob, _outcome];
16 | };
17 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/argmax_all_uda.sql:
--------------------------------------------------------------------------------
1 | select b, max(c) from I3;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/argmax_uda.sql:
--------------------------------------------------------------------------------
1 | -- for each a, compute max c and the corresponding b (argmax)
2 | select a, b, max(c) from I3 group by a;
3 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/builtin_and_UDA.sql:
--------------------------------------------------------------------------------
1 | select a, b, MAX(c), SUM(b) from I3 group by a; -- seems wrong


--------------------------------------------------------------------------------
/c_test_environment/testqueries/common_index_allowed.sql:
--------------------------------------------------------------------------------
1 | select t.a, t.b, r1.b, r2.b from T2 t, R2 r1, R2 r2
2 | where t.a=r1.a
3 | and r1.a=r2.a;
4 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/common_index_disallowed.sql:
--------------------------------------------------------------------------------
1 | select t.a, t.b, r1.b, r2.a from T2 t, R2 r1, R2 r2
2 | where t.a=r1.a
3 | and r1.a=r2.b;
4 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/countstar_string.sql:
--------------------------------------------------------------------------------
1 | select COUNT(b) from C3;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/directed_squares.sql:
--------------------------------------------------------------------------------
1 | select r.a, s.a, t.a, z.a from R2 r, S2 s, T2 t, R3 z where r.b=s.a and s.b=t.a and t.b=z.a and z.b=r.a;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/directed_triangles.sql:
--------------------------------------------------------------------------------
1 | select r.a, s.a, t.a from R2 r, S2 s, T2 t where r.b=s.a and s.b=t.a and t.b=r.a;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/expr_singleton.sql:
--------------------------------------------------------------------------------
1 | select (1 - 0.85)/1000;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/few_col_store.sql:
--------------------------------------------------------------------------------
1 | select a from R2
2 | where b=3;
3 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/groupby_string_key.sql:
--------------------------------------------------------------------------------
1 | select sum(C2.a), C2.b from C2
2 | group by C2.b; 
3 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/groupby_string_multi_key.sql:
--------------------------------------------------------------------------------
1 | select sum(C3.a), C3.b, C3.c from C3
2 | group by C3.b, C3.c
3 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/head_scalar_op.sql:
--------------------------------------------------------------------------------
1 | select a+b from R2;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/idivide.sql:
--------------------------------------------------------------------------------
1 | select a/b from T2 where b!=0;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/join.sql:
--------------------------------------------------------------------------------
1 | select t3.a, r3.c from T3 t3, R3 r3 where t3.b=r3.b and r3.a=r3.c;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/join_of_aggregate_of_join.sql:
--------------------------------------------------------------------------------
1 | select a.rsum, s.b from S2 s,
2 |     (select SUM(r.a) as rsum, t.b as tc from R2 r, T2 t 
3 |         where r.b = t.a group by t.b) a
4 | where
5 | a.tc = s.a;
6 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/join_of_two_unionalls.sql:
--------------------------------------------------------------------------------
 1 | --unionall
 2 | select A1.a from 
 3 | (select a, b from T2
 4 | union all 
 5 | select a, b from R2) A1,
 6 | (select a, b from T2
 7 | union all 
 8 | select a, b from R2) A2
 9 | where A1.a=A2.a;
10 | 
11 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/join_string_key.sql:
--------------------------------------------------------------------------------
1 | select r1.a, r2.a from C3 r1, C3 r2 where r1.b=r2.c;
2 | 
3 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/join_string_val.sql:
--------------------------------------------------------------------------------
1 | select C2.b, T2.b from C2, T2 where C2.a=T2.a;
2 | 
3 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/join_swap_indexing.sql:
--------------------------------------------------------------------------------
1 | select t3.a, s3.b, r3.b from T3 t3, R3 r3, S3 s3 where s3.c=r3.a and r3.c=t3.c;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/join_then_aggregate.sql:
--------------------------------------------------------------------------------
1 | select SUM(R2.a), R2.b from R2, S2, T2 where R2.b=S2.a and S2.a=T2.a group by R2.b;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/join_two_types.sql:
--------------------------------------------------------------------------------
1 | select * from C3, R3 where C3.a = R3.c;


--------------------------------------------------------------------------------
/c_test_environment/testqueries/like_begin.sql:
--------------------------------------------------------------------------------
1 | select * from C2 where b like "A%" or b like 'cof%';
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/like_begin_end.sql:
--------------------------------------------------------------------------------
1 | select * from C2 where b like "A%B" or b like "co%fe";
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/like_end.sql:
--------------------------------------------------------------------------------
1 | select * from C2 where b like "%A" or b like '%ee';


--------------------------------------------------------------------------------
/c_test_environment/testqueries/like_middle.sql:
--------------------------------------------------------------------------------
1 | select * from C2 where b like "%A%" or b like "%fe%";
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/matrix_mult.sql:
--------------------------------------------------------------------------------
1 | select t1.a as src, t2.b as dst, count(t1.a) from T2 t1, T2 t2
2 | where t1.b = t2.a
3 | group by t1.a, t2.b;
4 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/more_col_store.sql:
--------------------------------------------------------------------------------
1 | select r.a, s.a, t.a from R2 r, S2 s, T2 t
2 | where r.b = s.a
3 | and s.b = t.a
4 | and t.b = r.a;


--------------------------------------------------------------------------------
/c_test_environment/testqueries/multi_builtin.sql:
--------------------------------------------------------------------------------
1 | select c, MAX(a), SUM(b) from I3 group by c;


--------------------------------------------------------------------------------
/c_test_environment/testqueries/project_string.sql:
--------------------------------------------------------------------------------
1 | select b from C3;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/q2.sql:
--------------------------------------------------------------------------------
 1 | select T1.a as inproc, T2.c as author, T3.c as booktitle, T4.c as title, T5.c as proc, T6.c as ee, T7.c as page, T8.c as url, T9.c as yr
 2 | from R3 T1,
 3 |             R3 T2,
 4 |             R3 T3,
 5 |             R3 T4,
 6 |             R3 T5,
 7 |             R3 T6,
 8 |             R3 T7,
 9 |             R3 T8,
10 |             R3 T9 
11 | WHERE T1.a=T2.a
12 | and T2.a=T3.a
13 | and T3.a=T4.a
14 | and T4.a=T5.a
15 | and T5.a=T6.a
16 | and T6.a=T7.a
17 | and T7.a=T8.a
18 | and T8.a=T9.a
19 | and T1.b = 1 and T1.c > 5
20 | and T2.b = 1
21 | and T3.b = 1
22 | and T4.b = 1
23 | and T5.b = 1
24 | and T6.b = 1
25 | and T7.b = 1
26 | and T8.b = 1
27 | and T9.b = 1;
28 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/scan.sql:
--------------------------------------------------------------------------------
1 | select * from T1;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/select.sql:
--------------------------------------------------------------------------------
1 | select * from T1 where a > 5;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/select_conjunction.sql:
--------------------------------------------------------------------------------
1 | select * from T1 where a > 0 and a < 10;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/select_double.sql:
--------------------------------------------------------------------------------
1 | select a,c from D3 where b < 6.4;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/select_string.sql:
--------------------------------------------------------------------------------
1 | select b from C3 where b="coffee";
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/select_string_literal.sql:
--------------------------------------------------------------------------------
1 | select * from C3 where b = "coffee";


--------------------------------------------------------------------------------
/c_test_environment/testqueries/select_then_join.sql:
--------------------------------------------------------------------------------
1 | select t.a,t.b,t.c from T3 t, R2 r where t.a<t.b and r.b<9 and t.c=r.a;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/self_join.sql:
--------------------------------------------------------------------------------
1 | select x.a, x.b from R2 x, R2 y where x.a=y.a;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/self_three_path.sql:
--------------------------------------------------------------------------------
1 | select r.a, s.a, t.a from R2 r, R2 s, R2 t where r.b=s.a and s.b=t.a;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/singleton_constant.sql:
--------------------------------------------------------------------------------
1 | select 0.85;


--------------------------------------------------------------------------------
/c_test_environment/testqueries/store.sql:
--------------------------------------------------------------------------------
1 | select * from R2;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/string_join.sql:
--------------------------------------------------------------------------------
1 | select C2.b, T2.b from C2, T2 where C2.a=T2.a;
2 | 
3 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/swap.sql:
--------------------------------------------------------------------------------
1 | select b,a from R2;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/test_join_of_two_aggregates.sql:
--------------------------------------------------------------------------------
1 | select agg1.a, agg2.a
2 |             from (select a, MIN(b) as mb from D2 group by a) agg1,
3 |             (select a, MIN(b) as mb from D3 group by a) agg2
4 |         where agg1.mb = agg2.mb;
5 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/three_path.sql:
--------------------------------------------------------------------------------
1 | select r.a, s.a, t.a from R2 r, S2 s, T2 t where r.b=s.a and s.b=t.a;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/three_way_three_key_hash_join.sql:
--------------------------------------------------------------------------------
1 | select R3.c, T3.c, S3.c from R3, T3, S3 where R3.a=T3.a and R3.b=T3.b and R3.c=T3.c
2 | and R3.a=S3.a and R3.b=S3.b and R3.c=S3.c;
3 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/two_hop.sql:
--------------------------------------------------------------------------------
1 | select r.a, s.b from R2 r, S2 s where r.b=s.a;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/two_join_switch.sql:
--------------------------------------------------------------------------------
1 | select R3.a, T3.c from R3,S3,T3 where R3.c=S3.b and S3.a=T3.a and R3.a>1 and S3.a>2 and T3.a>3;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/two_key_hash_join.sql:
--------------------------------------------------------------------------------
1 | select R3.c, T3.c from R3, T3 where R3.a=T3.a and R3.b=T3.b;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/two_key_hash_join_swap.sql:
--------------------------------------------------------------------------------
1 | select R3.c, T3.c from R3, T3 where R3.a=T3.b and R3.b=T3.a;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/two_path.sql:
--------------------------------------------------------------------------------
1 | select r.a, r.b, s.b from R2 r, S2 s where r.b=s.a;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/two_var_select.sql:
--------------------------------------------------------------------------------
1 | select a, b from T2 where a<9 and b<9;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/unionall.sql:
--------------------------------------------------------------------------------
1 | -- unionall
2 | select a from T1 
3 | union all
4 | select a from R1;
5 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/unionall_3.sql:
--------------------------------------------------------------------------------
1 | select * from T1
2 | union all
3 | select * from R1
4 | union all
5 | select * from S1;
6 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/unionall_apply_and_self_join.sql:
--------------------------------------------------------------------------------
1 | --unionall
2 | select X.a, Y.a, Y.b from (select t.a as a, t.b as b from T2 t, R1 r where t.b < 4  and t.a=r.a
3 |                       union all
4 |                       select r.a as a, r.b as b from R2 r, T1 t where r.a=t.a) X,
5 |                      (select t.a as a, t.b as b from T2 t, R1 r where t.b < 4  and t.a=r.a
6 |                       union all
7 |                       select r.a as a, r.b as b from R2 r, T1 t where r.a=t.a) Y
8 | where X.b=Y.a;
9 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/unionall_of_join.sql:
--------------------------------------------------------------------------------
1 | --unionall
2 | select a, b from T2
3 | union all
4 | select r.a as a, t.b as b from R2 r, T2 t where r.b=t.a;
5 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/unionall_then_aggregate.sql:
--------------------------------------------------------------------------------
1 | select SUM(A2.a), A2.b from 
2 | (select * from R2
3 |  union all
4 |  select * from S2) A2
5 | group by A2.b;
6 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/unionall_then_join.sql:
--------------------------------------------------------------------------------
1 | --unionall
2 | select A.a from 
3 |     (select a, b from T2
4 |     union all
5 |     select a, b from R2) A,
6 |     S1 s
7 | where s.a=A.a;
8 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/while.sql:
--------------------------------------------------------------------------------
1 | select 0;
2 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/while_repeat_groupby.sql:
--------------------------------------------------------------------------------
1 | select SUM(I.a) as a, I.c as b, SUM(I.b) as c from
2 |   (select SUM(T3.a) as a, T3.c as b, SUM(T3.b) as c from T3 group by T3.c) as I
3 |   group by I.c;
4 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/while_repeat_join.sql:
--------------------------------------------------------------------------------
1 | -- iteration 2
2 | select s1.b, s1.c, s1.a
3 |     -- iteration 1
4 |     from (select s1.b as a, s1.c as b, s1.a as c from T3 s1, T3 s2
5 |     where s1.a=s2.b) as s1,
6 |          (select s1.b as a, s1.c as b, s1.a as c from T3 s1, T3 s2
7 |     where s1.a=s2.b) as s2
8 |     where s1.a=s2.b;
9 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/while_union_all.sql:
--------------------------------------------------------------------------------
 1 | select 1234
 2 | UNION ALL
 3 | select 1234
 4 | UNION ALL
 5 | select 1234
 6 | UNION ALL
 7 | select 1234
 8 | UNION ALL
 9 | select 1234
10 | UNION ALL
11 | select 1234
12 | UNION ALL
13 | select 1234
14 | UNION ALL
15 | select 1234;
16 | 


--------------------------------------------------------------------------------
/c_test_environment/testqueries/zero_store.sql:
--------------------------------------------------------------------------------
1 | select r.a from R2 r
2 | where r.b = 11;
3 | 


--------------------------------------------------------------------------------
/c_test_environment/timing.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | 
 4 | #if defined(__MTA__)
 5 | #include <sys/mta_task.h>
 6 | #include <machine/runtime.h>
 7 | #elif defined(__MACH__)
 8 | #include <mach/mach_time.h>
 9 | #else
10 | #include <time.h>
11 | #endif
12 | 
13 | 
14 | #define BILLION 1000000000
15 | 
16 | 
17 | /// "Universal" wallclock time (works at least for Mac, MTA, and most Linux)
18 | inline double walltime(void) {
19 | #if defined(__MTA__)
20 |         return((double)mta_get_clock(0) / mta_clock_freq());
21 | #elif defined(__MACH__)
22 |         static mach_timebase_info_data_t info;
23 |         mach_timebase_info(&info);
24 |         uint64_t now = mach_absolute_time();
25 |         now *= info.numer;
26 |         now /= info.denom;
27 |         return 1.0e-9 * (double)now;
28 | #else
29 |         struct timespec tp;
30 | #if defined(CLOCK_PROCESS_CPUTIME_ID)
31 | #define CLKID CLOCK_PROCESS_CPUTIME_ID
32 | #elif  defined(CLOCK_REALTIME_ID)
33 | #define CLKID CLOCK_REALTIME_ID
34 | #endif
35 |         clock_gettime(CLOCK_MONOTONIC, &tp);
36 |         return (double)tp.tv_sec + (double)tp.tv_nsec / BILLION;
37 | #endif
38 | }
39 | 


--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
 1 | *.cpp
 2 | *.cpp.orig
 3 | *.dot
 4 | build/
 5 | log*.rb
 6 | *.logical.pdf
 7 | *.physical*.pdf
 8 | *.logical.ps
 9 | *.physical.ps
10 | 
11 | 


--------------------------------------------------------------------------------
/examples/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | %.png: %.dot
 3 | 	dot -Tpng $< -o $@
 4 | 
 5 | %.ps: %.dot
 6 | 	dot -Tps $< -o $@
 7 | 
 8 | %.pdf: %.ps
 9 | 	ps2pdf $<
10 | 	pdfcrop $@
11 | 	mv `basename $@ .pdf`-crop.pdf $@
12 | 


--------------------------------------------------------------------------------
/examples/bad_column_name.myl:
--------------------------------------------------------------------------------
1 | T = empty(x:int);
2 | A = [from T emit SafeDiv(x, 3) AS SafeDiv];
3 | store(A, OUTPUT);
4 | 


--------------------------------------------------------------------------------
/examples/cast.myl:
--------------------------------------------------------------------------------
1 | Emp = SCAN(public:adhoc:employee);
2 | Groups = [FROM Emp EMIT id + 3, string(salary)];
3 | Store(Groups, OUTPUT);
4 | 


--------------------------------------------------------------------------------
/examples/catalog.py:
--------------------------------------------------------------------------------
 1 | # Schemas corresponding to Myrial examples
 2 | 
 3 | {
 4 |     'public:adhoc:edges': [('src','LONG_TYPE'), ('dst', 'LONG_TYPE')],
 5 |     'public:adhoc:vertices': [('id','LONG_TYPE')],
 6 |     'public:adhoc:points': [('id','LONG_TYPE'), ('x','DOUBLE_TYPE'), ('y', 'DOUBLE_TYPE')],
 7 |     'public:adhoc:sc_points': [('id', 'LONG_TYPE'), ('v', 'DOUBLE_TYPE')],
 8 |     'public:adhoc:employee' : [('id', 'LONG_TYPE'), ('dept_id', 'LONG_TYPE'), ('name', 'STRING_TYPE'),
 9 |                                 ('salary','LONG_TYPE')],
10 |     'public:adhoc:departments' : [('id', 'LONG_TYPE'), ('name','STRING_TYPE')],
11 |     'armbrustlab:seaflow:all_data' : [('Cruise', 'LONG_TYPE'),
12 |                                       ('Day', 'LONG_TYPE'),
13 |                                       ('File_Id', 'LONG_TYPE'),
14 |                                       ('chl_small', 'DOUBLE_TYPE'),
15 |                                       ('pe', 'DOUBLE_TYPE')],
16 |     'public:adhoc:nodes_jstor' : [('paper_id', 'LONG_TYPE'), ('year','LONG_TYPE')],
17 |     'public:adhoc:links_jstor' : [('p1', 'LONG_TYPE'), ('p2','LONG_TYPE')],
18 |     'dhalperi:lineage:top_papers_jstor' : [('paper_id', 'LONG_TYPE')],
19 |     'public:adhoc:sp2bench' : [('subject', 'STRING_TYPE'), ('predicate','STRING_TYPE'), ('object','STRING_TYPE')],
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/clog.py:
--------------------------------------------------------------------------------
 1 | from raco.cpp_datalog_utils import emitCode
 2 | from raco.backends.cpp import CCAlgebra
 3 | import sys
 4 | 
 5 | import logging
 6 | logging.basicConfig(level=logging.DEBUG)
 7 | LOG = logging.getLogger(__name__)
 8 |   
 9 | if __name__ == "__main__":
10 |   query = sys.argv[1]
11 |   print query
12 |   name = sys.argv[2]
13 |   print name
14 | 
15 |   plan = ""
16 |   if len(sys.argv) > 3:
17 |       plan = sys.argv[3]
18 | 
19 |   lst = []
20 |   alg = CCAlgebra
21 |   if plan: lst.append(plan)
22 |   if name: lst.append(name)
23 |   emitCode(query, "_".join(lst), alg, plan)
24 | 
25 | 


--------------------------------------------------------------------------------
/examples/clog.sh:
--------------------------------------------------------------------------------
 1 | query=$1
 2 | name=$2
 3 | 
 4 | cdir=`cd ..; pwd`
 5 | cappsrcdir=$cdir/c_test_environment
 6 | cbuilddir=$cdir/c_test_environment
 7 | cappbuilddir=$gbuilddir/applications/join
 8 | 
 9 | pushd $cbuilddir
10 | if [ ! -f R1 ]; then
11 |   echo "GENERATING TEST DATA (first time)"
12 |   python generate_test_relations.py
13 | fi
14 | popd
15 |   
16 | 
17 | echo "GENERATING QUERY CODE"
18 | PYTHONPATH=.. python clog.py "$query" $name 2> log.rb
19 | mv $name.cpp $cappsrcdir
20 | 
21 | echo "COMPILING QUERY CODE"
22 | cd $cbuilddir; make $name.exe; echo "RUNNING QUERY CODE"; ./$name.exe
23 | 
24 | 


--------------------------------------------------------------------------------
/examples/connected_components.myl:
--------------------------------------------------------------------------------
 1 | E = scan(TwitterK); -- edges
 2 | V = select distinct E.$0 from E; -- vertices
 3 | CC = [from V emit V.$0 as node_id, V.$0 as component_id]; -- initial node IDs and component IDs
 4 | do
 5 |   new_CC = [from E, CC where E.$0 = CC.$0 emit E.$1, CC.$1] + CC; -- join CC with the graph to propagate component IDs
 6 |   new_CC = [from new_CC emit new_CC.$0, MIN(new_CC.$1)]; -- for each vertex, only keep the minimum component ID
 7 |   delta = diff(CC, new_CC);
 8 |   CC = new_CC;
 9 | while [from delta emit count(*) > 0]; -- while we have update
10 | store(CC, CC);
11 | 
12 | 


--------------------------------------------------------------------------------
/examples/deadcode.myl:
--------------------------------------------------------------------------------
 1 | -- Begin dead code block
 2 | X = [3.14159 AS y, 3 AS id, 4 AS x];
 3 | Y = SCAN(public:adhoc:points);
 4 | Z = SCAN(public:adhoc:points);
 5 | 
 6 | X = [FROM X,Y WHERE X.y == Y.y EMIT X.id, Y.x, X.y];
 7 | X = DISTINCT(X);
 8 | X = UNIONALL(X, Y);
 9 | -- End dead code block
10 | 
11 | X = SCAN(public:adhoc:points);
12 | Q = UNIONALL(X, Z);
13 | STORE(Q, OUTPUT);
14 | 


--------------------------------------------------------------------------------
/examples/deadcode2.myl:
--------------------------------------------------------------------------------
1 | x = [0 as val, 1 as exp];
2 | do
3 |   x = [from x emit val+1 as val, 2*exp as exp];
4 | while [from x emit val < 5];
5 | -- with no store, this should be the empty program
6 | 


--------------------------------------------------------------------------------
/examples/dept.csv:
--------------------------------------------------------------------------------
1 | 1, "accounting", 5
2 | 2, "human resources", 2
3 | 3, "engineering", 2
4 | 4, "sales", 7


--------------------------------------------------------------------------------
/examples/emp.csv:
--------------------------------------------------------------------------------
1 | 1, 2, "Bill Howe", 25000
2 | 2, 1, "Dan Halperin", 90000
3 | 3, 1, "Andrew Whitaker", 5000
4 | 4, 2, "Shumo Chu", 5000
5 | 5, 1, "Victor Almeida", 25000
6 | 6, 3, "Dan Suciu", 90000
7 | 7, 1, "Magdalena Balazinska", 25000


--------------------------------------------------------------------------------
/examples/grappa_test_query.py:
--------------------------------------------------------------------------------
 1 | from raco import RACompiler
 2 | 
 3 | import logging
 4 | logging.basicConfig(level=logging.DEBUG)
 5 | LOG = logging.getLogger(__name__)
 6 | 
 7 | def comment(s):
 8 |   return "/*\n%s\n*/\n" % str(s)
 9 | 
10 | def testEmit(query, name):
11 |     LOG.info("compiling %s: %s", name, query)
12 | 
13 |     # Create a compiler object
14 |     dlog = RACompiler()
15 | 
16 |     # parse the query
17 |     dlog.fromDatalog(query)
18 |     #print dlog.parsed
19 |     LOG.info("logical: %s",dlog.logicalplan)
20 | 
21 |     dlog.optimize(target=GrappaAlgebra)
22 | 
23 |     LOG.info("physical: %s",dlog.physicalplan[0][1])
24 | 
25 |     # generate code in the target language
26 |     code = ""
27 |     code += comment("Query " + query)
28 |     code += dlog.compile()
29 | 
30 |     with open(name+'.cpp', 'w') as f:
31 |         f.write(code)
32 | 
33 | 
34 | queries = [
35 | ("A(s1) :- T1(s1)", "scan"),
36 | ("A(s1) :- T1(s1), s1>10", "select"),
37 | ("A(s1) :- T1(s1), s1>0, s1<10", "select_conjunction"),
38 | ("A(s1,s2) :- T2(s1,s2), s1>10, s2>10", "two_var_select"),
39 | ("A(s1,o2) :- T3(s1,p1,o1), R3(o2,p1,o2)", "join"),
40 | ("A(a,b,c) :- R2(a,b), S2(b,c)", "two_path"),
41 | ("A(a,c) :- R2(a,b), S2(b,c)", "two_hop"),
42 | ("A(a,b,c) :- R2(a,b), S2(b,c), T2(c,d)", "three_path"),
43 | ("A(a,b,c) :- R2(a,b), S2(b,c), T2(c,a)", "directed_triangles"),
44 | ("A(a,b,c,d) :- R2(a,b), S2(b,c), T2(c,d), Z2(d,a)", "directed_squares"),
45 | ("A(s1,s2,s3) :- T3(s1,s2,s3), R2(s3,s4), s1<s2, s4<100", "select_then_join"),
46 | ("A(a,b) :- R2(a,b), S2(a,b)", "two_match"),
47 | ("""A(s1,s2) :- T2(s1,s2)
48 |     A(s1,s2) :- R2(s1,s2)""", "union"),
49 | #("A(a,b,c) :- R(a,b), S(b,c), T(c,a), a<b, b<c", "increasing_triangles"),
50 | #("A(s1,s2,s3) :- T(s1,s2,s3), R(s3,s4), s1<s4", "equi_and_range"),
51 | #("A(s1,s2,s3) :- T(s1,s2),R(s3,s4), s1<s3", "range_join"),
52 | #("A(a,b,c,d,e):-X(a,b),Y(a,c),Z(a,d,e),T(a,b),K(b,a)", "complex_joins"),
53 | ]
54 | 
55 | for q in queries:
56 |     query, name = q
57 |     testEmit(query, 'grappa_'+name)
58 | 
59 | 


--------------------------------------------------------------------------------
/examples/grappalog.py:
--------------------------------------------------------------------------------
 1 | from raco.cpp_datalog_utils import emitCode
 2 | from raco.backends.radish import GrappaAlgebra
 3 | import sys
 4 | 
 5 | import logging
 6 | logging.basicConfig(level=logging.DEBUG)
 7 | LOG = logging.getLogger(__name__)
 8 | 
 9 | if __name__ == "__main__":
10 |   query = sys.argv[1]
11 |   print query
12 |   name = sys.argv[2]
13 |   print name
14 | 
15 |   plan = ""
16 |   if len(sys.argv) > 3:
17 |       plan = sys.argv[3]
18 | 
19 |   lst = []
20 |   alg = GrappaAlgebra
21 |   prefix = "grappa"
22 |   lst.append(prefix)
23 |   if plan: lst.append(plan)
24 |   if name: lst.append(name)
25 |   emitCode(query, "_".join(lst), alg, plan)
26 | 
27 | 


--------------------------------------------------------------------------------
/examples/grappalog.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | query=$1
 3 | name=$2
 4 | plan=$3
 5 | 
 6 | 
 7 | cappbuilddir=`cd ../c_test_environment; pwd`
 8 | gdir=$GRAPPA_HOME
 9 | gappsrcdir=$gdir/applications/join
10 | gbuilddir=$gdir/build/Make+Release
11 | gappbuilddir=$gbuilddir/applications/join
12 | 
13 | pushd $gappbuilddir
14 | if [ ! -f R1 ]; then
15 |   echo "GENERATING TEST DATA (first time)"
16 |   python $cappbuilddir/generate_test_relations.py
17 | fi
18 | popd
19 | 
20 | echo "GENERATING QUERY CODE"
21 | PYTHONPATH=.. python grappalog.py "$query" $name $plan 2> log.rb
22 | # get file name assuming it is most recent cpp file
23 | fullname=`ls -lt *cpp | head -n1 |awk '{gsub(/ +/, " ");print}' | cut -d' ' -f9 | cut -d'.' -f1`
24 | cp $fullname.cpp $gappsrcdir
25 | 
26 | echo "COMPILING QUERY CODE"
27 | #TODO: make this not so new target dependent. Easy way is have a set of default targets that can be recycled
28 | cd $gdir; ./configure --gen=Make --mode=Release --cc=/sampa/share/distcc/gcc-4.7.2/bin/gcc --third-party=/sampa/share/grappa-third-party
29 | cd $gbuilddir; bin/distcc_make -j 24; cd $gappbuilddir; ../../bin/distcc_make -j24 $fullname.exe; echo "RUNNING QUERY CODE"; ../../bin/grappa_srun --ppn=4 --nnode=4 -f -- $fullname.exe
30 | #cd $gappbuilddir; ../../bin/distcc_make $fullname.exe; echo "RUNNING QUERY CODE"; ../../bin/grappa_srun --ppn=4 --nnode=4 -f -- $fullname.exe
31 | 
32 | 


--------------------------------------------------------------------------------
/examples/groupby1.myl:
--------------------------------------------------------------------------------
1 | 
2 | Emp = SCAN(public:adhoc:employee);
3 | Groups = [FROM Emp EMIT COUNT(salary), Emp.id];
4 | Store(Groups, OUTPUT, [$1]);
5 | 


--------------------------------------------------------------------------------
/examples/groupby2.myl:
--------------------------------------------------------------------------------
1 | 
2 | Emp = SCAN(public:adhoc:employee);
3 | Groups = [FROM Emp EMIT COUNT(*)];
4 | Store(Groups, OUTPUT);
5 | 


--------------------------------------------------------------------------------
/examples/groupby3.myl:
--------------------------------------------------------------------------------
1 | 
2 | Emp = SCAN(public:adhoc:employee);
3 | Groups = [FROM Emp EMIT id, AVG(salary), id];
4 | Store(Groups, OUTPUT);
5 | 


--------------------------------------------------------------------------------
/examples/helloworld.py:
--------------------------------------------------------------------------------
 1 | from raco.compile import compile, optimize
 2 | from raco.expression.boolean import EQ, AND, OR
 3 | from raco.expression import NamedAttributeRef, StringLiteral, NumericLiteral
 4 | import raco.scheme
 5 | import raco.catalog
 6 | 
 7 | # declare the schema for each relation
 8 | sch = raco.scheme.Scheme([("subject", int), ("predicate", int), ("object", int)])
 9 | 
10 | # Create a relation object.  We can add formats here as needed.
11 | trialdat = raco.catalog.ASCIIFile("trial.dat", sch)
12 | print sch
13 | 
14 | # Now write the RA expression
15 | 
16 | # Scan just takes a pointer to a relation object
17 | R = Scan(trialdat, sch)  #TODO: is this supposed to pass sch?
18 | print R.scheme()
19 | 
20 | 
21 | # Select
22 | # EQ(x,y) means x=y, GT(x,y) means x>y, etc.
23 | sR = Select(EQ(NamedAttributeRef("predicate"), NumericLiteral(1133564893)), R)
24 | sS = Select(EQ(NamedAttributeRef("predicate"), NumericLiteral(77645021)), R)
25 | #sT = Select(EQ(NamedAttributeRef("predicate"), NumericLiteral(77645021)), R)
26 | sT = Select(EQ(NamedAttributeRef("object"), NumericLiteral(1018848684)), R)
27 | 
28 | # Join([(w,x),(y,z)], R, S) means "JOIN R, S ON (R.w = S.x AND R.y = S.z)"
29 | sRsS = Join([("object","subject")], sR, sS)
30 | sRsSsT = Join([("object","subject")], sRsS, sT)
31 | 
32 | # optimize applies a set of rules to translate a source
33 | # expression to a target expression
34 | result = optimize([("Ans", sT)], CCAlgebra)
35 | 
36 | # compile generates the linear code from the expression tree
37 | print compile(result)
38 | 


--------------------------------------------------------------------------------
/examples/iteration.myl:
--------------------------------------------------------------------------------
1 | -- Invariant: val = 2^exp
2 | x = [1 as val, 0 as exp];
3 | do
4 |   x = [from x emit val*2 as val, exp+1 as exp];
5 | while [from x emit exp < 5];
6 | store(x, powersOfTwo);
7 | 


--------------------------------------------------------------------------------
/examples/join.myl:
--------------------------------------------------------------------------------
1 | out = [FROM SCAN(public:adhoc:departments) AS D, SCAN(public:adhoc:employee) AS E
2 |        WHERE E.dept_id == D.id AND E.salary > 5000
3 |        EMIT E.name AS emp_name, D.name AS dept_name];
4 | STORE(out, OUTPUT);
5 | 


--------------------------------------------------------------------------------
/examples/join.sql:
--------------------------------------------------------------------------------
1 | emp = scan(public:adhoc:employee);
2 | dept = scan(public:adhoc:departments);
3 | out = select emp.name as emp_name, dept.name as dept_name
4 |       from dept, emp
5 |       where emp.dept_id == dept.id AND emp.salary > 5000;
6 | store(out, OUTPUT);
7 | 


--------------------------------------------------------------------------------
/examples/kmeans.myl:
--------------------------------------------------------------------------------
 1 | 
 2 | DEF EuclideanDistance(x0, y0, x1, y1):
 3 |     sqrt(pow(x0 - x1, 2) + pow(y0 - y1, 2));
 4 | 
 5 | -- Load some points; assume each point has a unique ID
 6 | Point = SCAN(public:adhoc:points);
 7 | 
 8 | -- Create some initial cluster centers from the first K points
 9 | -- TODO: We should choose these at random somehow...
10 | -- TODO: The cluster count should be expressable as a constant
11 | Centroid = [FROM LIMIT(Point, 3) AS K EMIT id AS cluster_id, x AS x,y AS y];
12 | 
13 | 
14 | -- Assign each point to the first cluster
15 | FirstCluster = LIMIT(Centroid, 1);
16 | Kmeans = [FROM Point EMIT Point.id AS id,
17 |           *FirstCluster.cluster_id AS cluster_id];
18 | 
19 | DO
20 |   -- Calculate distance from each point to each centroid
21 |   Distance = [FROM Point, Centroid
22 |               EMIT Point.id AS id,
23 |                    Centroid.cluster_id AS cluster_id,
24 |                    EuclideanDistance(Point.x, Centroid.x, Point.y, Centroid.y) AS distance];
25 |                                            
26 |   -- Choose closest cluster for each point
27 |   Closest = [FROM Distance EMIT id, MIN(distance) AS distance];
28 |   NewKmeans = [FROM Closest, Distance
29 |                WHERE Closest.id == Distance.id AND
30 |                      ABS(Closest.distance - Distance.distance) < .000001
31 |                EMIT Closest.id AS id, MIN(Distance.cluster_id) AS cluster_id];
32 | 
33 |   -- Compute delta from the previous iteration
34 |   Delta = DIFF(NewKmeans, Kmeans);
35 |   Continue = [FROM Delta EMIT COUNT(id) > 0];
36 | 
37 |   Kmeans = NewKmeans;
38 | 
39 |   -- Update centroids
40 |   PointsInCentroid = [FROM Centroid, Kmeans, Point
41 |                       WHERE Centroid.cluster_id == Kmeans.cluster_id AND
42 |                             Point.id == Kmeans.id
43 |                       EMIT Centroid.cluster_id AS cluster_id, Point.x AS x,
44 |                            Point.y AS y];
45 | 
46 |   Centroid = [FROM PointsInCentroid EMIT cluster_id, avg(x) AS x, avg(y) AS y];
47 | 
48 | WHILE Continue;
49 | 
50 | STORE(Kmeans, OUTPUT);
51 | 


--------------------------------------------------------------------------------
/examples/language_demo.myl:
--------------------------------------------------------------------------------
 1 | T1 = scan(TwitterK);
 2 | T2 = [from T1 emit $0 == "foo bar" as x];
 3 | 
 4 | -- wrong:
 5 | T2 = [from T1 emit $0 == 'foo bar' as x];
 6 | 
 7 | def triangleArea(a,b): (a*b)/2;
 8 | R = [from Foo emit triangleArea(x,y) as area];
 9 | 
10 | apply RunningMean(value) {
11 |       [0 as c, 0 as s];
12 |       [c + 1 as c, s + value as s];
13 |       s / c;
14 | };
15 | 
16 | -- number of allowed standard deviations
17 | N = [2];
18 | 
19 | --this is a comment
20 | 
21 | newBad = empty(id:int, v:float);
22 | 
23 | bc = [from emp emit emp.*];
24 | 
25 | out = [from emp where $0 * 2 == $1 emit *];
26 | out = [from emp where $0 // $1 <> $1 emit *];
27 | 
28 | -- Unicode math operators ≤, ≥, ≠
29 | out = [from emp where $0 ≤ $1 and $0 ≠ $1 and $1 ≥ $0 emit *];
30 | 
31 | do
32 |     mean = [from Good emit avg(v) as val];
33 |     -- foo bar
34 |     NewBad = [from Good where abs(Good.v - *mean) > *N * *std emit *];
35 |     continue = diff(Good, NewBad);
36 | while continue;
37 | 
38 | store(Good, OUTPUT);
39 | 
40 | -- comment
41 | T3 = [from T1 emit sin(a)/4 + b as x];
42 | store(T2, JustX);


--------------------------------------------------------------------------------
/examples/load_options.csv:
--------------------------------------------------------------------------------
1 | this file uses the pipe character ("|") as field delimiter and the tilde ("~") as the quote character
2 | the percent character ("%") is used to escape the field delimiter
3 | 1|foo|~abc|def~|1.0
4 | 2|bar|ghi%|jkl|2.0
5 | 


--------------------------------------------------------------------------------
/examples/load_opts.myl:
--------------------------------------------------------------------------------
1 | t = load("https://s3-us-west-2.amazonaws.com/myria/public-adhoc-TwitterK.csv", csv(schema(column0:int, column1:int), skip=1));
2 | store(t, TwitterK2);
3 | 


--------------------------------------------------------------------------------
/examples/naivebayes/.gitignore:
--------------------------------------------------------------------------------
1 | msd_catalog*py
2 | 


--------------------------------------------------------------------------------
/examples/naivebayes/buckets.myl:
--------------------------------------------------------------------------------
 1 | input = SCAN(testdata);
 2 | 
 3 | discrete = select id, 
 4 | x0/10 as x0, 
 5 | x1/10 as x1, 
 6 | x2/10 as x2, 
 7 | x3/10 as x3, 
 8 | x4/10 as x4 
 9 | from input;
10 | 
11 | store(discrete, OUTPUT);
12 | 


--------------------------------------------------------------------------------
/examples/naivebayes/cat_scheme:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | from raco.catalog import FromFileCatalog
4 | import sys
5 | 
6 | c1 = FromFileCatalog.load_from_file(sys.argv[1])
7 | c2 = FromFileCatalog.load_from_file(sys.argv[2])
8 | FromFileCatalog.print_cat(c1, c2)
9 | 


--------------------------------------------------------------------------------
/examples/naivebayes/catalog.py:
--------------------------------------------------------------------------------
 1 | {'public:adhoc:conditionals': [('index', 'LONG_TYPE'), ('lp', 'DOUBLE_TYPE'), ('outcome', 'LONG_TYPE'), ('value', 'LONG_TYPE')],
 2 | 'public:adhoc:testdata': [('id', 'LONG_TYPE'), ('x0', 'DOUBLE_TYPE'),
 3 | ('x1', 'DOUBLE_TYPE'),
 4 | ('x2', 'DOUBLE_TYPE'),
 5 | ('x3', 'DOUBLE_TYPE'),
 6 | ('x4', 'DOUBLE_TYPE')],
 7 | 'public:adhoc:trainingdata': [('id', 'LONG_TYPE'), ('x0', 'DOUBLE_TYPE'),
 8 | ('x1', 'DOUBLE_TYPE'),
 9 | ('x2', 'DOUBLE_TYPE'),
10 | ('x3', 'DOUBLE_TYPE'),
11 | ('x4', 'DOUBLE_TYPE'),
12 | ('y', 'LONG_TYPE')]
13 | }
14 | 


--------------------------------------------------------------------------------
/examples/naivebayes/create_scheme.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import argparse
 4 | 
 5 | AVG_COLS = 12
 6 | COV_COLS = 78
 7 | 
 8 | if __name__ == "__main__":
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument("-a", dest='avg_cols', type=int, required=True, help='number of timbre average columns')
11 |     parser.add_argument("-c", dest='cov_cols', type=int, required=True, help='number of timbre covariance columns')
12 |     parser.add_argument("--no-id", dest='id', action='store_false', default=True, help='include an id [default=true]')
13 |     parser.add_argument("--no-y", dest='y', action='store_false', default=True, help='include an id [default=true]')
14 |     parser.add_argument("--input", dest='inputtype', help="test or train", required=True)
15 | 
16 |     opt = parser.parse_args(sys.argv[1:])
17 | 
18 |     assert opt.avg_cols <= AVG_COLS
19 |     assert opt.cov_cols <= COV_COLS
20 | 
21 |     sch = []
22 |     if opt.id:
23 |       sch.append(('id', 'LONG_TYPE',))
24 | 
25 |     if opt.y:
26 |       sch.append(('y', 'LONG_TYPE',))
27 | 
28 |     for i in range(opt.avg_cols):
29 |         sch.append(('x{0}'.format(i), 'DOUBLE_TYPE',))
30 | 
31 |     for i in range(opt.cov_cols):
32 |         sch.append(('x{0}'.format(i+opt.avg_cols), 'DOUBLE_TYPE'))
33 | 
34 |     cat = {}
35 | 
36 |     if opt.inputtype == 'train':
37 |       cat['public:adhoc:trainingdata'] = sch
38 |     else:
39 |       cat['public:adhoc:testdata'] = sch
40 | 
41 |     print cat
42 | 


--------------------------------------------------------------------------------
/examples/naivebayes/generate_parse.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | nfeat = int(sys.argv[1])
 4 | 
 5 | y = int(sys.argv[2])
 6 | 
 7 | if y==1:
 8 |   parse_template = "input_sp{i} = select INT(input.x{i}/{bwidth}) as value, {i} as index, y from input;"
 9 | else:
10 |   parse_template = "input_sp{i} = select id, INT(input.x{i}/{bwidth}) as value, {i} as index from input;"
11 | 
12 | if y==1:
13 |   print "input = SCAN(trainingdata);"
14 | else:
15 |   print "input = SCAN(testdata);"
16 | 
17 | bwidth = 10
18 | for i in range(nfeat):
19 |   print parse_template.format(i=i, bwidth=bwidth)
20 | 
21 | inputs = []
22 | for i in range(nfeat):
23 |   inputs.append("input_sp%d" % i)
24 | print "input_sp = UNIONALL(%s);" % ', '.join(inputs)
25 | 


--------------------------------------------------------------------------------
/examples/naivebayes/naivebayes_classify.myl:
--------------------------------------------------------------------------------
 1 | -- arbitrarily favor new value in case of tie
 2 | def pickval(value, arg, _value, _arg):
 3 |     case when value >= _value then arg
 4 |         else _arg end;
 5 | 
 6 | uda ArgMax(outcome, lprob) {
 7 |    -- init
 8 |    [0 as _outcome, 0 as _lprob];
 9 |    
10 |    -- update
11 |    [pickval(lprob, outcome, _lprob, _outcome),
12 |     pickval(lprob, lprob, _lprob, _lprob)];
13 | 
14 |    -- output
15 |    [_lprob, _outcome];
16 | };
17 | 
18 | 
19 | CondP = SCAN(conditionals);
20 | 
21 | -- calculate probability of outcomes
22 | Poe = select input_sp.id as inputId, 
23 |       sum(CondP.lp) as lprob, 
24 |       CondP.outcome as outcome 
25 | from CondP, input_sp
26 | where
27 | CondP.index=input_sp.index
28 | and CondP.value=input_sp.value;
29 | -- double join!
30 | --group by CondP.outcome, input_sp.id;
31 | 
32 | -- select the max probability outcome
33 | classes = select inputId, ArgMax(outcome, lprob) from Poe;
34 | 
35 | store(classes, classified);
36 | 


--------------------------------------------------------------------------------
/examples/naivebayes/naivebayes_train.myl:
--------------------------------------------------------------------------------
 1 | freq_o = select y as outcome, count(y) as freq from input_sp;
 2 | 
 3 | freq_e_o = select y as outcome, index, value, count(y) as freq from input_sp;
 4 | 
 5 | condp = select freq_e_o.index, 
 6 |         -LOG(float(freq_e_o.freq) / freq_o.freq) as lp, 
 7 |         freq_e_o.outcome as outcome,
 8 |         freq_e_o.value as value
 9 | from freq_o, freq_e_o
10 | where freq_e_o.outcome = freq_o.outcome;
11 | 
12 | STORE(condp, conditionals);
13 | 


--------------------------------------------------------------------------------
/examples/naivebayes/nb_classify.myl:
--------------------------------------------------------------------------------
 1 | -- arbitrarily favor new value in case of tie
 2 | def pickval(value, arg, _value, _arg):
 3 |     case when value >= _value then arg
 4 |         else _arg end;
 5 | 
 6 | uda ArgMax(outcome, lprob) {
 7 |    -- init
 8 |    [0 as _outcome, 0 as _lprob];
 9 |    
10 |    -- update
11 |    [pickval(lprob, outcome, _lprob, _outcome),
12 |     pickval(lprob, lprob, _lprob, _lprob)];
13 | 
14 |    -- output
15 |    [_lprob, _outcome];
16 | };
17 | 
18 | 
19 | input = SCAN(testdata);
20 | CondP = SCAN(conditionals);
21 | 
22 | -- an alternation operation like an unpivot
23 | -- may be unnecessary if input already comes in a sparse format
24 | input_sp0 = select input.id as id, input.x0 as value, 0 as index from input;
25 | input_sp1 = select input.id as id, input.x1 as value, 1 as index from input;
26 | input_sp2 = select input.id as id, input.x2 as value, 2 as index from input;
27 | input_sp3 = select input.id as id, input.x3 as value, 3 as index from input;
28 | input_sp4 = select input.id as id, input.x4 as value, 4 as index from input;
29 | input_sp = UNIONALL(input_sp0, input_sp1, input_sp2, input_sp3, input_sp4);
30 | 
31 | -- calculate probability of outcomes
32 | Poe = select input_sp.id as inputId, 
33 |       sum(CondP.lp) as lprob, 
34 |       CondP.outcome as outcome 
35 | from CondP, input_sp
36 | where
37 | CondP.index=input_sp.index
38 | and CondP.value=input_sp.value;
39 | --group by CondP.outcome, input_sp.id;
40 | 
41 | -- select the max probability outcome
42 | classes = select inputId, ArgMax(outcome, lprob) from Poe;
43 | 
44 | store(classes, OUTPUT);
45 | 


--------------------------------------------------------------------------------
/examples/naivebayes/nb_train.myl:
--------------------------------------------------------------------------------
 1 | input = SCAN(trainingdata);
 2 | 
 3 | -- an alternation operation like an unpivot
 4 | -- may be unnecessary if input already comes in a sparse format
 5 | --input_sp0 = select input.id as id, input.x0 as value, 0 as index, y from input;
 6 | --input_sp1 = select input.id as id, input.x1 as value, 1 as index, y from input;
 7 | --input_sp2 = select input.id as id, input.x2 as value, 2 as index, y from input;
 8 | --input_sp3 = select input.id as id, input.x3 as value, 3 as index, y from input;
 9 | --input_sp4 = select input.id as id, input.x4 as value, 4 as index, y from input;
10 | input_sp0 = select input.x0 as value, 0 as index, y from input;
11 | input_sp1 = select input.x1 as value, 1 as index, y from input;
12 | input_sp2 = select input.x2 as value, 2 as index, y from input;
13 | input_sp3 = select input.x3 as value, 3 as index, y from input;
14 | input_sp4 = select input.x4 as value, 4 as index, y from input;
15 | input_sp01 = UNIONALL(input_sp0, input_sp1);
16 | input_sp02 = UNIONALL(input_sp01, input_sp2);
17 | input_sp03 = UNIONALL(input_sp02, input_sp3);
18 | input_sp = UNIONALL(input_sp03, input_sp4);
19 | 
20 | freq_o = select y as outcome, count(y) as freq from input_sp;
21 | 
22 | freq_e_o = select y as outcome, index, value, count(y) as freq from input_sp;
23 | 
24 | condp = select freq_e_o.index, 
25 |         -LOG(float(freq_e_o.freq) / freq_o.freq) as lp, 
26 |         freq_e_o.outcome as outcome,
27 |         freq_e_o.value as value
28 | from freq_o, freq_e_o
29 | where freq_e_o.outcome = freq_o.outcome;
30 | 
31 | STORE(condp, OUTPUT);
32 | 


--------------------------------------------------------------------------------
/examples/naivebayes/prepare_test.sh:
--------------------------------------------------------------------------------
 1 | set -o errexit
 2 | 
 3 | # created by msd_train.myl:
 4 | # conditionals (scheme)
 5 | # conditionals.bin (data)
 6 | 
 7 | add_id=1
 8 | catalog=msd_catalog_test.py
 9 | catalog_wid=msd_catalog_test_wid.py
10 | catalog_all=msd_catalog_test_all.py
11 | navg=4
12 | ncov=4
13 | rel=testdata
14 | input='test'
15 | dataset=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_test.txt
16 | #dataset=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_test_small.txt
17 | binfile=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_test_8attr.txt
18 | #binfile=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_test_small_6attr.txt
19 | queryin=naivebayes_classify.myl
20 | query=msd_classify.myl
21 | convert_home=../../c_test_environment
22 | 
23 | # without id
24 | python create_scheme.py -a $navg -c $ncov --input=$input --no-id --no-y > $catalog
25 | pushd $convert_home
26 | python convert2bin.py -n $rel -c ../examples/naivebayes/$catalog
27 | ./$rel.convert $dataset 0 $add_id
28 | popd
29 | mv $dataset.bin $binfile.bin
30 | 
31 | # add in id now that we added it
32 | python create_scheme.py -a $navg -c $ncov --input=$input --no-y > $catalog_wid
33 | ./cat_scheme $GRAPPA_HOME/build/Make+Release/applications/join/conditionals $catalog_wid > $catalog_all
34 | python generate_parse.py $(($navg + $ncov)) 0 > tmp.myl
35 | cat tmp.myl $queryin > $query
36 | ../../scripts/myrial --emit=console -c --catalog=$catalog_all $query
37 | 
38 | 
39 | codef=`basename $query .myl`.cpp 
40 | exef=grappa_`basename $query .myl`.exe
41 | scp $codef pal:~/grappa-nb/applications/join/grappa_$codef
42 | cp $codef $GRAPPA_HOME/applications/join/grappa_$codef
43 | pushd $GRAPPA_HOME/build/Make+Release/applications/join
44 | make -j $exef
45 | popd
46 | 
47 | echo "--input_file_conditionals=$GRAPPA_HOME/build/Make+Release/applications/join/conditionals --output_file=$GRAPPA_HOME/build/Make+Release/applications/join/classified --input_file_testdata=$binfile --relations=/"
48 | 


--------------------------------------------------------------------------------
/examples/naivebayes/prepare_training.sh:
--------------------------------------------------------------------------------
 1 | set -o errexit
 2 | 
 3 | add_id=1
 4 | catalog=msd_catalog_train.py
 5 | catalog_wid=msd_catalog_train_wid.py
 6 | #NOTE: convert does not actually pick this order; it picks the first navg+ncov features
 7 | navg=4
 8 | ncov=4
 9 | rel=trainingdata
10 | input='train'
11 | dataset=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_train.txt
12 | #dataset=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_train_small.txt
13 | binfile=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_train_8attr.txt
14 | #binfile=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_train_small_6attr.txt
15 | queryin=naivebayes_train.myl
16 | query=msd_train.myl
17 | convert_home=../../c_test_environment
18 | 
19 | # without id
20 | python create_scheme.py -a $navg -c $ncov --input=$input --no-id > $catalog
21 | pushd $convert_home
22 | python convert2bin.py -n $rel -c ../examples/naivebayes/$catalog
23 | ./$rel.convert $dataset 0 $add_id
24 | popd
25 | mv $dataset.bin $binfile.bin
26 | 
27 | # add in id now that we added it
28 | python create_scheme.py -a $navg -c $ncov --input=$input > $catalog_wid
29 | python generate_parse.py $(($navg + $ncov )) 1 > tmp.myl
30 | cat tmp.myl $queryin > $query
31 | ../../scripts/myrial -c --emit=file --catalog=$catalog_wid $query
32 | 
33 | 
34 | codef=`basename $query .myl`.cpp 
35 | exef=grappa_`basename $query .myl`.exe
36 | cp $codef $GRAPPA_HOME/applications/join/grappa_$codef
37 | pushd $GRAPPA_HOME/build/Make+Release/applications/join
38 | make -j $exef
39 | popd
40 | 
41 | echo "--input_file_trainingdata=$binfile --output_file=$GRAPPA_HOME/build/Make+Release/applications/join/conditionals --relations=/"
42 | 


--------------------------------------------------------------------------------
/examples/noschema.myl:
--------------------------------------------------------------------------------
1 | -- Scan of a table that has no schema in the catalog
2 | T1 = SCAN(foo:bar:baz);
3 | T2 = [FROM T1 EMIT x=$3];
4 | T3 = [FROM T2 EMIT y=MIN(x)];
5 | STORE (T3, bang:baz:bar);
6 | 


--------------------------------------------------------------------------------
/examples/openmp_examples/A.h:
--------------------------------------------------------------------------------
 1 | #ifndef ____A__
 2 | #define ____A__
 3 | 
 4 | #include <vector>
 5 | #include <iostream>
 6 | #include <fstream>
 7 | #include <map>
 8 | #include <stdlib.h>
 9 | //#include <unordered_set>
10 | 
11 | using namespace std;
12 | 
13 | void query(const char* fname,int num_threads);
14 | 
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/examples/openmp_examples/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | triangle_parallel: triangle_with_tuples.cpp
3 | 	gcc -O3 -lrt -fopenmp triangle_with_tuples.cpp -lstdc++ -o triangle_parallel
4 | 
5 | clean:
6 | 	rm -f triangle_parallel
7 | 


--------------------------------------------------------------------------------
/examples/openmp_examples/igor_omp_tri.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'igor'
 3 | 
 4 | $datasets="/sampa/home/bdmyers/graph_datasets"
 5 | 
 6 | Igor do
 7 |   
 8 |   database 'join.omp.db', :triangles
 9 | 
10 |   command 'srun -p grappa ./triangles_parallel %{fin} %{ppn}'
11 | 
12 |   sbatch_flags << "--time=60"
13 |   
14 |   params {
15 |     nnode       1
16 |     ppn         2
17 |     fin         ""
18 |     tag         'none'
19 |   }
20 | 
21 |  run {
22 |     fin "#{$datasets}/berkstan/web-BerkStan.txt"
23 |  }
24 |    
25 |   
26 |   expect :triangles_runtime
27 |  
28 |   #$filtered = results{|t| t.select(:id, :nnode, :ppn, :tree, :run_at, :search_runtime) }
29 |     
30 |   interact # enter interactive mode
31 | end
32 | 


--------------------------------------------------------------------------------
/examples/pagerank.myl:
--------------------------------------------------------------------------------
 1 | 
 2 | -- Simplified PageRank; assumes that all nodes have out degree > 0
 3 | 
 4 | alpha = [.85];
 5 | epsilon = [.0001];
 6 | 
 7 | Edge = SCAN(public:adhoc:edges);
 8 | Vertex = SCAN(public:adhoc:vertices);
 9 | 
10 | N = [FROM Vertex EMIT COUNT(id) AS val];
11 | min_rank = [(1 - *alpha) / *N];
12 | 
13 | OutDegree = [FROM Edge EMIT Edge.src AS id, COUNT(Edge.dst) AS cnt];
14 | PageRank = [FROM Vertex EMIT Vertex.id AS id, 1.0 / *N AS rank];
15 | 
16 | DO
17 |     -- Calculate each node's outbound page rank contribution
18 |     PrOut = [FROM PageRank, OutDegree WHERE PageRank.id == OutDegree.id
19 |              EMIT PageRank.id AS id, PageRank.rank / OutDegree.cnt AS out_rank];
20 | 
21 |     -- Compute the inbound summands for each node
22 |     Summand = [FROM Vertex, Edge, PrOut
23 |                 WHERE Edge.dst == Vertex.id AND Edge.src == PrOut.id
24 |                 EMIT Vertex.id AS id, PrOut.out_rank AS summand];
25 |     
26 |     -- Sum up the summands; adjust by alpha
27 |     NewPageRank = [FROM Summand EMIT id AS id,
28 |                    *min_rank + *alpha * SUM(Summand.summand) AS rank];
29 |     Delta = [FROM NewPageRank, PageRank WHERE NewPageRank.id == PageRank.id
30 |              EMIT ABS(NewPageRank.rank - PageRank.rank) AS val];
31 |     Continue = [FROM Delta EMIT MAX(Delta.val) > *epsilon];
32 |     PageRank = NewPageRank;
33 | WHILE Continue;
34 | 
35 | STORE(PageRank, OUTPUT);
36 | 


--------------------------------------------------------------------------------
/examples/pagerank_dead.myl:
--------------------------------------------------------------------------------
 1 | 
 2 | -- PageRank augmented with dead code; for use as an optimization test
 3 | 
 4 | alpha = [.85];
 5 | epsilon = [.0001];
 6 | 
 7 | D0 = [3.14159 AS pi, 2.71828 AS e]; -- dead code
 8 | 
 9 | Edge = SCAN(public:adhoc:edges);
10 | Vertex = SCAN(public:adhoc:vertices);
11 | 
12 | N = [FROM Vertex EMIT COUNT(id) AS val];
13 | min_rank = [(1 - *alpha) / *N];
14 | 
15 | OutDegree = [FROM Edge EMIT Edge.src AS id, COUNT(Edge.dst) AS cnt];
16 | PageRank = [FROM Vertex EMIT Vertex.id AS id, 1.0 / *N AS rank];
17 | 
18 | DO
19 |     D1 = [FROM Vertex EMIT COUNT(id) AS val]; -- dead code
20 | 
21 |     -- Calculate each node's outbound page rank contribution
22 |     PrOut = [FROM PageRank, OutDegree WHERE PageRank.id == OutDegree.id
23 |              EMIT PageRank.id AS id, PageRank.rank / OutDegree.cnt AS out_rank];
24 | 
25 |     -- Compute the inbound summands for each node
26 |     Summand = [FROM Vertex, Edge, PrOut
27 |                 WHERE Edge.dst == Vertex.id AND Edge.src == PrOut.id
28 |                 EMIT Vertex.id AS id, PrOut.out_rank AS summand];
29 |     
30 |     -- Sum up the summands; adjust by alpha
31 |     NewPageRank = [FROM Summand EMIT id AS id,
32 |                    *min_rank + *alpha * SUM(Summand.summand) AS rank];
33 |     Delta = [FROM NewPageRank, PageRank WHERE NewPageRank.id == PageRank.id
34 |              EMIT ABS(NewPageRank.rank - PageRank.rank) AS val];
35 |     Continue = [FROM Delta EMIT MAX(Delta.val) > *epsilon];
36 |     Continue = [FROM Delta EMIT MAX(Delta.val) > *epsilon]; -- duplicate line
37 |     PageRank = NewPageRank;
38 | WHILE Continue;
39 | 
40 | STORE(PageRank, OUTPUT);
41 | 


--------------------------------------------------------------------------------
/examples/pairwise_distances.myl:
--------------------------------------------------------------------------------
 1 | const partition: 0.5;
 2 | const epsilon: 0.0000106;
 3 | 
 4 | def mod(x, n): x - int(x/n)*n;
 5 | def cell(v): int((v - mod(v, partition)) * (1/partition));
 6 | def is_ghost(xoffset, yoffset, zoffset):
 7 |   case when xoffset = 0 and
 8 |             yoffset = 0 and
 9 |             zoffset = 0 then 0 else 1 end;
10 | def is_replicated(x, y, z, xoffset, yoffset, zoffset):
11 |   is_ghost(xoffset, yoffset, zoffset) = 0 or
12 |   cell(x + epsilon*xoffset) != cell(x) or
13 |   cell(y + epsilon*yoffset) != cell(y) or
14 |   cell(z + epsilon*zoffset) != cell(z);
15 | def distance(x1, x2, y1, y2, z1, z2): sqrt((x1-x2)*(x1-x2) +
16 |                                            (y1-y2)*(y1-y2) +
17 |                                            (z1-z2)*(z1-z2));
18 | 
19 | points = load("https://s3-us-west-2.amazonaws.com/uwdb/sampleData/sampleCrossmatch/points.txt",
20 |               csv(schema(id:int,
21 |                          x:float,
22 |                          y:float,
23 |                          z:float), skip=0));
24 | permutations = load("https://s3-us-west-2.amazonaws.com/myria/permutations",
25 |                     csv(schema(xoffset:int,
26 |                                yoffset:int,
27 |                                zoffset:int), skip=0));
28 | 
29 | -- Partition into a grid with edges of size partition
30 | -- Replicate any point that falls within epsilon of a partition boundary
31 | 
32 | partitions = [from points, permutations
33 |               where is_replicated(x, y, z, xoffset, yoffset, zoffset)
34 |               emit id, x, y, z,
35 |                    cell(x) + xoffset as px,
36 |                    cell(y) + yoffset as py,
37 |                    cell(z) + zoffset as pz,
38 |                    is_ghost(xoffset, yoffset, zoffset) as ghost];
39 | 
40 | -------------------------------------------
41 | 
42 | -- Cross product on partition + ghost cells; no shuffle required
43 | local = [from partitions left,
44 |               partitions right
45 |          where left.px = right.px and
46 |                left.py = right.py and
47 |                left.pz = right.pz
48 |          emit *];
49 | 
50 | -- Calculate distances within each local pair and filter outliers
51 | distances = [from local
52 |              where id < id1 and
53 |                    ghost = 0 and
54 |                    distance(x, x1, y, y1, z, z1) <= epsilon
55 |              emit id as id1,
56 |                   id1 as id2,
57 |                   distance(x, x1, y, y1, z, z1) as distance];
58 | 
59 | store(distances, distances);


--------------------------------------------------------------------------------
/examples/rdfsimple.myl:
--------------------------------------------------------------------------------
1 | R1 = scan(public:adhoc:sp2bench);
2 | R2 = scan(public:adhoc:sp2bench);
3 | r = [FROM R1, R2 
4 | WHERE R1.subject = "<http://dbpedia.org/resource/Oscar_Cristi>"
5 |   AND R1.object = R2.subject
6 | EMIT R1.subject, R2.object, 4+1
7 | ];
8 | store(r, predicates);
9 | 


--------------------------------------------------------------------------------
/examples/reachable.myl:
--------------------------------------------------------------------------------
 1 | Edge = SCAN(public:adhoc:edges);
 2 | Source = [1 AS addr];
 3 | Reachable = Source;
 4 | Delta = Source;
 5 | 
 6 | DO
 7 |     NewlyReachable = DISTINCT([FROM Delta, Edge
 8 |                               WHERE Delta.addr == Edge.src
 9 |                               EMIT Edge.dst AS addr]);
10 |     Delta = DIFF(NewlyReachable, Reachable);
11 |     Reachable = UNIONALL(Delta, Reachable);
12 | WHILE [FROM COUNTALL(Delta) AS size EMIT *size > 0];
13 | 
14 | STORE(Reachable, OUTPUT);
15 | 


--------------------------------------------------------------------------------
/examples/samplescan.myl:
--------------------------------------------------------------------------------
 1 | -- Sample from relation with-replacement
 2 | T1 = samplescan(public:adhoc:employee, 1, WR);
 3 | T2 = samplescan(public:adhoc:employee, 1.5%, WR);
 4 | 
 5 | -- Sample from relation without-replacement
 6 | T3 = samplescan(public:adhoc:employee, 1, WoR);
 7 | T4 = samplescan(public:adhoc:employee, .5%, WoR);
 8 | 
 9 | -- Uses With-Replacement sampling if no sample type specified
10 | T5 = samplescan(public:adhoc:employee, 1);
11 | T6 = samplescan(public:adhoc:employee, 1%);
12 | 
13 | T = unionall(T1, T2, T3, T4, T5, T6);
14 | Store(T, samplescanquery);
15 | 


--------------------------------------------------------------------------------
/examples/seaflow.myl:
--------------------------------------------------------------------------------
1 |  DEF transform(x): pow(10, x/pow(2,16)*3.5);
2 |  AllData = SCAN(armbrustlab:seaflow:all_data);
3 |  AllDataLinear = SELECT Cruise, Day, File_Id
4 |                       , transform(fsc_small) as fsc_small
5 |                       -- fsc_perp is measured differently, defer for later
6 |                       , transform(chl_small) as chl_small
7 |                       , transform(pe) as pe
8 |                  FROM AllData;
9 |  STORE(AllDataLinear, armbrustlab:seaflow:all_data_linear);


--------------------------------------------------------------------------------
/examples/seaflow2.myl:
--------------------------------------------------------------------------------
 1 | DEF transform(x): pow(10, x/pow(2,16)*3.5);
 2 | 
 3 | AllData = SCAN(armbrustlab:seaflow:all_data);
 4 | AllDataLinear = SELECT Cruise, Day, File_Id
 5 |                      , pow(10, fsc_small/pow(2,16)*3.5) as fsc_small
 6 |                      -- fsc_perp is measured differently, defer for later
 7 |                      , pow(10, chl_small/pow(2,16)*3.5) as chl_small
 8 |                      , pow(10, pe/pow(2,16)*3.5) as pe
 9 |                  FROM AllData;
10 | STORE(AllDataLinear, armbrustlab:seaflow:all_data_linear);
11 | 


--------------------------------------------------------------------------------
/examples/sigma-clipping-v0.myl:
--------------------------------------------------------------------------------
 1 | -- Simple and slow implementation of sigma clipping; this query is not
 2 | -- incremental, so it can re-scans points on every iteration.
 3 | 
 4 | Good = scan(sc_points);
 5 | 
 6 | -- number of allowed standard deviations
 7 | const N: 2;
 8 | 
 9 | do
10 |     stats = [from Good emit avg(v) AS mean, stdev(v) as std];
11 |     NewBad = [from Good, stats where abs(v - mean) > N * std emit Good.*];
12 |     Good = diff(Good, NewBad);
13 |     continue = [from NewBad emit count(NewBad.v) > 0];
14 | while continue;
15 | 
16 | store(Good, sc_points_clipped);
17 | 


--------------------------------------------------------------------------------
/examples/sigma-clipping.myl:
--------------------------------------------------------------------------------
 1 | Points = SCAN(public:adhoc:sc_points);
 2 | 
 3 | aggs = [from Points emit sum(v) as _sum, sum(v*v) as sumsq, count(v) as cnt];
 4 | newBad = empty(id:int, v:float);
 5 | 
 6 | bounds = [from Points emit min(v) as lower, max(v) as upper];
 7 | 
 8 | -- number of allowed standard deviations
 9 | const Nstd: 2;
10 | 
11 | do
12 |   -- Incrementally update aggs and stats
13 |   new_aggs = [from newBad emit sum(v) as _sum, sum(v*v) as sumsq,
14 |                                count(v) as cnt];
15 |   aggs = [from aggs, new_aggs
16 |           emit aggs._sum - new_aggs._sum as _sum,
17 |                aggs.sumsq - new_aggs.sumsq as sumsq,
18 |                aggs.cnt - new_aggs.cnt as cnt];
19 | 
20 |   stats = [from aggs
21 |            emit _sum/cnt as mean,
22 |                 SQRT(1.0/(cnt*(cnt-1)) * (cnt * sumsq - _sum * _sum)) as std];
23 | 
24 |   -- Compute the new bounds
25 |   newBounds = [from stats emit mean - Nstd * std as lower,
26 |                                mean + Nstd * std as upper];
27 | 
28 |   newBad = [from Points, bounds, newBounds
29 |             where (newBounds.upper < v
30 |                    and v <= bounds.upper)
31 |                or (newBounds.lower > v
32 |                    and v >= bounds.lower)
33 |             emit Points.*];
34 | 
35 |   bounds = newBounds;
36 |   continue = [from newBad emit count(v) > 0];
37 | while continue;
38 | 
39 | output = [from Points, bounds
40 |           where Points.v > bounds.lower
41 |                 and Points.v < bounds.upper
42 |           emit Points.*];
43 | store(output, sc_points_clipped);
44 | 


--------------------------------------------------------------------------------
/examples/sigma_clipping_points.txt:
--------------------------------------------------------------------------------
 1 | 25.0
 2 | 27.2
 3 | 23.4
 4 | 25.1
 5 | 26.3
 6 | 24.9
 7 | 23.5
 8 | 22.7
 9 | 108.2
10 | 26.2
11 | 25.3
12 | 24.7
13 | 25.01
14 | 26.1
15 | 22.8
16 | 2.2
17 | 24.8
18 | 25.05
19 | 25.15
20 | 


--------------------------------------------------------------------------------
/examples/sp2bench.py:
--------------------------------------------------------------------------------
 1 | import test_query
 2 | import sys
 3 | 
 4 | if __name__ == "__main__":
 5 |   queryfile = sys.argv[1]
 6 |   tr = 'sp2bench_1m'
 7 |   with open(queryfile, 'r') as f:
 8 |       query = f.read() % locals()
 9 | 
10 |   fname = test_query.testEmit(query, queryfile, test_query.CCAlgebra)
11 |   
12 |   
13 | 


--------------------------------------------------------------------------------
/examples/sp2bench/catalog.py:
--------------------------------------------------------------------------------
1 | # Schemas corresponding to Myrial examples
2 | 
3 | {
4 |     'public:adhoc:sp2bench' : [('subject', 'STRING_TYPE'), ('predicate','STRING_TYPE'), ('object','STRING_TYPE')],
5 | }
6 | 


--------------------------------------------------------------------------------
/examples/sp2bench/q1.dlg:
--------------------------------------------------------------------------------
1 | A(yr) :- %(tr)s(journal, 'rdf:type', 'bench:Journal'),
2 |                  %(tr)s(journal, 'dc:title', 'Journal 1 (1940)'),
3 |                  %(tr)s(journal, 'dcterms:issued', yr)
4 | 


--------------------------------------------------------------------------------
/examples/sp2bench/q1.myl:
--------------------------------------------------------------------------------
 1 | Triples = scan(public:adhoc:sp2bench);
 2 | 
 3 | Q1 = SELECT
 4 |     T3.object AS yr
 5 | FROM 
 6 |      Triples T1
 7 |    , Triples T2     
 8 |    , Triples T3     
 9 | WHERE 
10 |       T1.subject=T3.subject
11 |   AND T1.subject=T2.subject
12 |   AND T1.predicate="<http://www.w3.org/1999/02/22-rdf-syntax-ns#/type>"
13 |   AND T2.predicate="<http://purl.org/dc/elements/1.1/title>"
14 |   AND T3.predicate="<http://purl.org/dc/terms/issued>"
15 |   AND T1.object="<http://localhost/vocabulary/bench/Journal>"
16 |   AND T2.object='"Journal 1 (1940)"^^xsd:string';
17 | 
18 | store(Q1, Q1);
19 | 


--------------------------------------------------------------------------------
/examples/sp2bench/q2.myl:
--------------------------------------------------------------------------------
 1 | R1 = scan(public:adhoc:sp2bench);
 2 | 
 3 | SELECT
 4 |     T1.subject AS inproc,
 5 |     T2.object AS author,
 6 |     T3.val AS booktitle,
 7 |     T4.val AS title,
 8 |     T5.val AS proc,
 9 |     T6.val AS ee,
10 |     T7.val AS page,
11 |     T8.val AS URL,
12 |     T9.val AS yr,    
13 |     AB.val AS abstract
14 | FROM
15 |     Triples T1
16 |     JOIN Triples T2     ON T1.subject=T2.subject
17 |     JOIN Triples T3     ON T1.subject=T3.subject
18 |     JOIN Triples T4     ON T1.subject=T4.subject
19 |     JOIN Triples T5     ON T1.subject=T5.subject
20 |     JOIN Triples T6     ON T1.subject=T6.subject
21 |     JOIN Triples T7     ON T1.subject=T7.subject
22 |     JOIN Triples T8     ON T1.subject=T8.subject
23 |     JOIN Triples T9     ON T1.subject=T9.subject
24 |     LEFT JOIN (
25 |         SELECT *
26 |         FROM
27 |             Triples T10
28 |         WHERE
29 |             T10.predicate='bench:abstract'
30 |     ) AB ON T1.subject=AB.subject
31 | WHERE
32 |     T1.predicate='rdf:type'
33 |     AND T2.predicate='dc:creator'
34 |     AND T3.predicate='bench:booktitle'
35 |     AND T4.predicate='dc:title'
36 |     AND T5.predicate='dcterms:partOf'
37 |     AND T6.predicate='rdfs:seeAlso'
38 |     AND T7.predicate='swrc:pages'
39 |     AND T8.predicate='foaf:homepage'
40 |     AND T9.predicate='dcterms:issued'
41 |     AND T1.object='bench:Inproceedings'
42 | ORDER BY T9.object;
43 |     AND T2.object='"Journal 1 (1940)"^^xsd:string';
44 | store(Q2, Q2);
45 | 


--------------------------------------------------------------------------------
/examples/sp2bench/q3.myl:
--------------------------------------------------------------------------------
 1 | Triples = scan(public:adhoc:sp2bench);
 2 | 
 3 | Q3 = SELECT
 4 |     T1.subject AS article
 5 | FROM
 6 |     Triples T1,
 7 |     Triples T2
 8 | WHERE
 9 |     T1.subject=T2.subject 
10 |     AND T1.predicate="<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>"
11 |     AND T2.predicate="<http://swrc.ontoware.org/ontology#pages>"
12 |     AND T2.object="<http://localhost/vocabulary/bench/Article>";
13 | 
14 | store(Q3, Q3);
15 | 


--------------------------------------------------------------------------------
/examples/sp2bench/q4.myl:
--------------------------------------------------------------------------------
 1 | Triples = scan(public:adhoc:sp2bench);
 2 | 
 3 | Q4 = SELECT
 4 |     T1.subject AS article
 5 | FROM
 6 |     Triples T1,
 7 |     Triples T2
 8 | WHERE
 9 |     T1.subject=T2.subject
10 |     AND T1.predicate="rdf:type"
11 |     AND T2.predicate="swrc:month"
12 |     AND T2.object="bench:Article";
13 | 
14 | store(Q4, Q4);
15 | 


--------------------------------------------------------------------------------
/examples/standalone.myl:
--------------------------------------------------------------------------------
1 | Emp = load("./examples/emp.csv", csv(schema(id:int, dept_id:int, name:string, salary:int)));
2 | Dept = load("./examples/dept.csv", csv(schema(id:int, name:string, manager:int)));
3 | 
4 | out = [from Emp, Dept
5 |        where Emp.dept_id == Dept.id AND Emp.salary > 5000
6 |        emit Emp.name as emp_name, Dept.name as dept_name];
7 | dump(out);
8 | 


--------------------------------------------------------------------------------
/examples/tipsy.myl:
--------------------------------------------------------------------------------
1 | t = load("https://s3-us-west-2.amazonaws.com/uwdb/sampleData/sampleTipsy/cosmo8.33PLK.256g3bwK1C52.000970", tipsy(group="amiga"));
2 | --t = load("baz", tipsy());
3 | store(t, t);


--------------------------------------------------------------------------------
/examples/uda.myl:
--------------------------------------------------------------------------------
 1 | -- test with user-defined aggregate
 2 | uda LogicalAvg(x) {
 3 |   [0 as _sum, 0 as _count];
 4 |   [_sum + x, _count + 1];
 5 |   _sum / _count;
 6 | };
 7 | uda LocalAvg(x) {
 8 |   [0 as _sum, 0 as _count];
 9 |   [_sum + x, _count + 1];
10 | };
11 | uda RemoteAvg(_local_sum, _local_count) {
12 |   [0 as _sum, 0 as _count];
13 |   [_sum + _local_sum, _count + _local_count];
14 |   [_sum/_count];
15 | };
16 | uda* LogicalAvg {LocalAvg, RemoteAvg};
17 | 
18 | out = [FROM SCAN(public:adhoc:employee) AS X EMIT dept_id,
19 |        LogicalAvg(salary) + LogicalAvg($0)];
20 | STORE(out, OUTPUT);
21 | 


--------------------------------------------------------------------------------
/examples/worker_id.myl:
--------------------------------------------------------------------------------
1 | X = [FROM SCAN(public:adhoc:employee) AS X EMIT X.id, WORKER_ID()];
2 | STORE(X, OUTPUT);
3 | 


--------------------------------------------------------------------------------
/raco/__init__.py:
--------------------------------------------------------------------------------
 1 | from raco.datalog.grammar import parse
 2 | from raco.compile import optimize
 3 | 
 4 | import logging
 5 | LOG = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class RACompiler(object):
 9 | 
10 |     """Thin wrapper interface for lower level functions parse, optimize,
11 |     compile"""
12 | 
13 |     def fromDatalog(self, program):
14 |         """Parse datalog and convert to RA"""
15 |         self.physicalplan = None
16 |         self.source = program
17 |         self.parsed = parse(program)
18 |         LOG.debug("parser output: %s", self.parsed)
19 |         self.logicalplan = self.parsed.toRA()
20 | 
21 |     def optimize(self, target, **kwargs):
22 |         """Convert logical plan to physical plan"""
23 |         self.physicalplan = optimize(self.logicalplan, target, **kwargs)
24 | 


--------------------------------------------------------------------------------
/raco/backends/__init__.py:
--------------------------------------------------------------------------------
1 | # everything in backend_common made public
2 | from backend_common import *
3 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/__init__.py:
--------------------------------------------------------------------------------
1 | # everything in cpp.py made public
2 | from cpp import *
3 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/ascii_scan.cpp:
--------------------------------------------------------------------------------
1 | auto {{resultsym}} = tuplesFromAscii<{{result_type}}>("{{name}}");
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/base_query.cpp:
--------------------------------------------------------------------------------
  1 | // Precount_select: Use buckets to track the number of matches
  2 | // Use buckets to copy into the result array
  3 | #include <cstdio>
  4 | #include <cstdlib>     // for exit()
  5 | #include <fcntl.h>      // for open()
  6 | #include <unistd.h>     // for close()
  7 | #include <sys/stat.h>   // for fstat()
  8 | #include <ctype.h>      // for isdigit()
  9 | #include <cstring>
 10 | #include <errno.h>
 11 | #include <algorithm>
 12 | #include <sys/types.h>
 13 | #include <sys/stat.h>
 14 | #include <sys/file.h>
 15 | 
 16 | #ifdef __MTA__
 17 | #include <machine/runtime.h>
 18 | #include <luc/luc_common.h>
 19 | #include <snapshot/client.h>
 20 | #include <sys/mta_task.h>
 21 | 
 22 | 
 23 | typedef int int64;
 24 | typedef unsigned uint64;
 25 | #else
 26 | #include <sys/time.h>
 27 | 
 28 | #include <iomanip>
 29 | #include <cstdint>
 30 | #include <iostream>
 31 | #include <fstream>
 32 | typedef int64_t int64;
 33 | typedef uint64_t uint64;
 34 | 
 35 | #include <unordered_map>
 36 | #include <vector>
 37 | #include <limits>
 38 | #endif
 39 | 
 40 | #include "io_util.h"
 41 | #include "hash.h"
 42 | #include "radish_utils.h"
 43 | #include "strings.h"
 44 | #include "timing.h"
 45 | 
 46 | // ------------------------------------------------------------------
 47 | 
 48 | 
 49 | {{declarations}}
 50 | 
 51 | StringIndex string_index;
 52 | void init( ) {
 53 | }
 54 | 
 55 | 
 56 | void query(struct relationInfo *resultInfo)
 57 | {
 58 |   printf("\nstarting Query stdout\n");fflush(stdout);
 59 | 
 60 |   double start = timer();
 61 | 
 62 |   uint64 resultcount = 0;
 63 |   struct relationInfo {{resultsym}}_val;
 64 |   struct relationInfo *{{resultsym}} = &{{resultsym}}_val;
 65 | 
 66 | 
 67 |   // -----------------------------------------------------------
 68 |   // Fill in query here
 69 |   // -----------------------------------------------------------
 70 |   {{initialized}}
 71 | 
 72 | 
 73 |  {{queryexec}}
 74 | 
 75 |   {{cleanups}}
 76 | 
 77 |   // return final result
 78 |   resultInfo->tuples = {{resultsym}}->tuples;
 79 |   resultInfo->fields = {{resultsym}}->fields;
 80 |   resultInfo->relation = {{resultsym}}->relation;
 81 | 
 82 | }
 83 | 
 84 | 
 85 | 
 86 | int main(int argc, char **argv) {
 87 | 
 88 |   struct relationInfo resultInfo;
 89 | 
 90 |   init();
 91 | 
 92 |     printf("post-init stdout\n");fflush(stdout);
 93 | 
 94 |   // Execute the query
 95 |   query(&resultInfo);
 96 | 
 97 |     printf("post-query stdout\n");fflush(stdout);
 98 | 
 99 | #ifdef ZAPPA
100 | //  printrelation(&resultInfo);
101 | #endif
102 | //  free(resultInfo.relation);
103 | 
104 |     printf("exiting stdout\n");fflush(stdout);
105 | 
106 | }
107 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/clang_group_timing.cpp:
--------------------------------------------------------------------------------
1 | {% extends "group_timing.cpp" %}
2 | {% block printcode %}
3 | std::cout << "pipeline group {{ident}}: "
4 |           << runtime_{{ident}}
5 |           << " s" << std::endl;
6 | {% endblock %}
7 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/clang_pipeline_timing.cpp:
--------------------------------------------------------------------------------
 1 | {% extends "pipeline_timing.cpp" %}
 2 | 
 3 | {% block printstart %}
 4 | std::cout {{ super() }} << std::endl;
 5 | {% endblock %}
 6 | 
 7 | {% block printruntime %}
 8 | std::cout {{ super() }} << std::endl;
 9 | {% endblock %}
10 | 
11 | {% block printend %}
12 | std::cout {{ super() }} << std::endl;
13 | {% endblock %}
14 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/groupby/0key_declaration.cpp:
--------------------------------------------------------------------------------
1 | {{valtype}} {{hashname}} = {{initial_value}};
2 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/groupby/0key_materialize.cpp:
--------------------------------------------------------------------------------
1 | {{op}}_insert({{hashname}}, {{val}});
2 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/groupby/0key_scan.cpp:
--------------------------------------------------------------------------------
1 | {
2 |     {{output_tuple_type}} {{output_tuple_name}}({{hashname}});
3 |     {{inner_code}}
4 | }
5 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/groupby/1key_declaration.cpp:
--------------------------------------------------------------------------------
1 | std::unordered_map<{{keytype}},{{valtype}}> {{hashname}};
2 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/groupby/1key_materialize.cpp:
--------------------------------------------------------------------------------
1 | {{op}}_insert({{hashname}}, {{key1val}}, {{val}});
2 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/groupby/1key_scan.cpp:
--------------------------------------------------------------------------------
1 | for (auto it={{hashname}}.begin(); it!={{hashname}}.end(); it++) {
2 |     {{output_tuple_type}} {{output_tuple_name}}(it->first, it->second);
3 |     {{inner_code}}
4 | }
5 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/groupby/2key_declaration.cpp:
--------------------------------------------------------------------------------
1 | std::unordered_map<std::pair<{{keytypes}}>, {{valtype}}, pairhash> {{hashname}};
2 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/groupby/2key_materialize.cpp:
--------------------------------------------------------------------------------
1 | {{op}}_insert({{hashname}}, {{key1val}}, {{key2val}}, {{val}});
2 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/groupby/2key_scan.cpp:
--------------------------------------------------------------------------------
1 | for (auto it={{hashname}}.begin(); it!={{hashname}}.end(); it++) {
2 |     {{output_tuple_type}} {{output_tuple_name}}(it->first.first, it->first.second, it->second);
3 |     {{inner_code}}
4 | }
5 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/hashjoin/hash_declaration.cpp:
--------------------------------------------------------------------------------
1 | std::unordered_map<{{keytype}}, std::vector<{{in_tuple_type}}> > {{hashname}};
2 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/hashjoin/insert_materialize.cpp:
--------------------------------------------------------------------------------
1 | insert({{hashname}}, {{keyval}}, {{in_tuple_name}});
2 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/hashjoin/lookup.cpp:
--------------------------------------------------------------------------------
1 | for (auto {{right_tuple_name}} : lookup({{hashname}}, {{keyval}})) {
2 |     auto {{out_tuple_name}} = {{append_func_name}}({{keyname}}, {{right_tuple_name}});
3 |     {{inner_plan_compiled}}
4 | }
5 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/materialized_tuple_ref_additional.cpp:
--------------------------------------------------------------------------------
 1 | public:
 2 |     static {{tupletypename}} fromRelationInfo(relationInfo * rel, int row) {
 3 |         // DOESN'T WORK WITH SCHEMAS WITH STRINGS
 4 |       {{tupletypename}} _t;
 5 |       {% for ft in fieldtypes %}
 6 |          _t.f{{loop.index-1}} = *({{ft}}*)(&(rel->relation[row*rel->fields+{{loop.index-1}}]));
 7 |       {% endfor %}
 8 |       return _t;
 9 |     }
10 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/memory_scan.cpp:
--------------------------------------------------------------------------------
1 | for (auto {{tuple_name}} : {{inputsym}}) {
2 |     {{inner_plan_compiled}}
3 | } // end scan over {{inputsym}}
4 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/relation_declaration.cpp:
--------------------------------------------------------------------------------
1 | std::vector<{{tuple_type}}> {{resultsym}};
2 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/c_templates/string_index_lookup.cpp:
--------------------------------------------------------------------------------
1 | auto {{name}} = string_index.string_lookup({{st}});
2 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/cbase_templates/assignment.cpp:
--------------------------------------------------------------------------------
1 | {{dst_set_func}} = {{src_expr_compiled}};
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/cbase_templates/group_timing.cpp:
--------------------------------------------------------------------------------
1 | {% block precode %}{% endblock %}
2 | auto start_{{ident}} = walltime();
3 | {{inner_code}}
4 | auto end_{{ident}} = walltime();
5 | {% block postcode %}{% endblock %}
6 | auto runtime_{{ident}} = end_{{ident}} - start_{{ident}};
7 | {% block printcode %}{% endblock %}
8 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/cbase_templates/materialized_tuple_create_one.cpp:
--------------------------------------------------------------------------------
1 | static {{result_type}} {{convert_func_name}}(const {{type1}}& t1) {
2 |     {{result_type}} t;
3 |     {% for i in range(type1numfields) %}
4 |         t.f{{i}} = t1.f{{i}};
5 |     {% endfor %}
6 | 
7 |     return t;
8 | }
9 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/cbase_templates/materialized_tuple_create_two.cpp:
--------------------------------------------------------------------------------
 1 | static {{result_type}} {{append_func_name}}(const {{type1}}& t1, const {{type2}}& t2) {
 2 |     {{result_type}} t;
 3 |     {% for i in range(type1numfields) %}
 4 |         t.f{{i}} = t1.f{{i}};
 5 |     {% endfor %}
 6 | 
 7 |     {% for i in range(type2numfields) %}
 8 |         t.f{{i+type1numfields}} = t2.f{{i}};
 9 |     {% endfor %}
10 | 
11 |     return t;
12 | }
13 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/cbase_templates/output_stream_close.cpp:
--------------------------------------------------------------------------------
1 | {{output_stream_symbol}}.close();
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/cbase_templates/output_stream_decl.cpp:
--------------------------------------------------------------------------------
1 | std::ofstream {{output_stream_symbol}};
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/cbase_templates/output_stream_open.cpp:
--------------------------------------------------------------------------------
1 | {{output_stream_symbol}}.open("{{filename}}");
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/cbase_templates/output_stream_write.cpp:
--------------------------------------------------------------------------------
1 | {{output_stream_symbol}} << "{{stringval}}" << std::endl;
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/cbase_templates/pipeline_timing.cpp:
--------------------------------------------------------------------------------
 1 | auto start_{{ident}} = walltime();
 2 | {% block printstart %} << "timestamp {{ident}} start " << std::setprecision(15) << start_{{ident}}{% endblock %}
 3 | 
 4 | {{inner_code}}
 5 | auto end_{{ident}} = walltime();
 6 | auto runtime_{{ident}} = end_{{ident}} - start_{{ident}};
 7 | {% block printruntime %} << "pipeline {{ident}}: " << runtime_{{ident}} << " s"{% endblock %}
 8 | 
 9 | {% block printend %} << "timestamp {{ident}} end " << std::setprecision(15) << end_{{ident}}{% endblock %}
10 | 
11 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/cbase_templates/select.cpp:
--------------------------------------------------------------------------------
1 | if ({{conditioncode}}) {
2 |   {{inner_code_compiled}}
3 | }
4 | 
5 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/cbase_templates/tuple_declaration.cpp:
--------------------------------------------------------------------------------
1 | {{dst_type_name}} {{dst_name}};
2 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/cbase_templates/tuple_type_convert.cpp:
--------------------------------------------------------------------------------
1 | {{result_type}} {{result_name}} = {{convert_func_name}}({{input_tuple_name}});
2 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/cbase_templates/write_count.cpp:
--------------------------------------------------------------------------------
1 | write_count("{{filename}}", {{count_symbol}});
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/operator_at_a_time_c_templates/ascii_scan.template:
--------------------------------------------------------------------------------
 1 | /*
 2 | =====================================
 3 |   Scan(%(name)s)
 4 | =====================================
 5 | */
 6 | 
 7 | printf("%(resultsym)s = Scan(%(name)s)\n");
 8 | 
 9 | struct relationInfo %(resultsym)s_val;
10 | 
11 | #ifdef __MTA__
12 |   //binary_inhale("%(name)s", &%(resultsym)s_val);
13 |   inhale("%(name)s", &%(resultsym)s_val);
14 | #else
15 |   inhale("%(name)s", &%(resultsym)s_val);
16 | #endif // __MTA__
17 | 
18 | struct relationInfo *%(resultsym)s = &%(resultsym)s_val;
19 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/operator_at_a_time_c_templates/binary_scan.template:
--------------------------------------------------------------------------------
 1 | /*
 2 | =====================================
 3 |   Scan(%(name)s)
 4 | =====================================
 5 | */
 6 | 
 7 | printf("%(resultsym)s = Scan(%(name)s)\n");
 8 | 
 9 | struct relationInfo %(resultsym)s_val;
10 | 
11 | #ifdef __MTA__
12 |   binary_inhale("%(name)s", &%(resultsym)s_val);
13 |   //inhale("%(name)s", &%(resultsym)s_val);
14 | #else
15 |   inhale("%(name)s", &%(resultsym)s_val);
16 | #endif // __MTA__
17 | 
18 | struct relationInfo *%(resultsym)s = &%(resultsym)s_val;


--------------------------------------------------------------------------------
/raco/backends/cpp/operator_at_a_time_c_templates/emit_joined_tuple.template:
--------------------------------------------------------------------------------
1 | 
2 | printf("joined tuple: %d, %d, %d, %d\n", join1_leftrow, join1_rightrow, join2_leftrow, join2_rightrow);
3 | resultcount++;
4 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/operator_at_a_time_c_templates/filtering_nestedloop_hashjoin_chain.template:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/operator_at_a_time_c_templates/filtering_nestedloop_join.template:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 |       // Join %(depth)s
 4 |       for (uint64 join%(depth)s_rightrow = 0; join%(depth)s_rightrow < join%(depth)s_right->tuples; join%(depth)s_rightrow++) {
 5 |         if (%(right_condition)s) { // filter on join%(depth)s.right
 6 |           uint64 joini%(depth)s_leftrow = %(left_row_variable)s;
 7 |           if (check_condition(join%(depth)s_left
 8 |                              , join%(depth)s_right
 9 |                              , join%(depth)s_leftrow
10 |                              , join%(depth)s_rightrow
11 |                              , join%(depth)s_leftattribute
12 |                              , join%(depth)s_rightattribute)) {
13 | 
14 | 
15 |              %(inner_plan_compiled)s
16 | 
17 | 
18 |           } // Join %(depth)s condition
19 |         } // filter on join1.right
20 |       } // loop over join1.right
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/operator_at_a_time_c_templates/filtering_nestedloop_join_chain.template:
--------------------------------------------------------------------------------
 1 | { // Begin Filtering_NestedLoop_Join_Chain
 2 | 
 3 | 
 4 | 
 5 |   printf("V2 = Join(%(inner_plan)s,V1) \n");
 6 |   // Assume left-deep plan
 7 | 
 8 |   // leaves of the tree
 9 |   %(relation_decls)s
10 | 
11 |   // Join 1
12 |   %(join_decls)s
13 | 
14 |   double start = timer();
15 | 
16 |   getCounters(counters, currCounter);
17 |   currCounter = currCounter + 1; // 1
18 | 
19 | #pragma mta trace "running join"
20 |   // Left Root
21 |   for (uint64 join%(depth)s_leftrow = 0; join%(depth)s_leftrow < join%(depth)s_left->tuples; join%(depth)s_leftrow++) {
22 |     if (%(left_root_condition)s) { // filter on join%(depth)s.left
23 |       // Join %(depth)s
24 |       for (uint64 join%(depth)s_rightrow = 0; join%(depth)s_rightrow < join%(depth)s_right->tuples; join%(depth)s_rightrow++) {
25 |         if (%(right_condition)s) { // filter on join%(depth)s.right
26 |           if (check_condition(join%(depth)s_left, join%(depth)s_right
27 |                              , join%(depth)s_leftrow, join%(depth)s_rightrow, join%(depth)s_leftattribute, join%(depth)s_rightattribute)) {
28 |              %(inner_plan_compiled)s
29 |           } // Join 1 condition
30 |         } // filter on join1.right
31 |       } // loop over join1.right
32 |     } // filter on join1.left 
33 |   } // loop over join1.left
34 | 
35 | } // End Filtering_NestedLoop_Join_Chain
36 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/operator_at_a_time_c_templates/scan.template:
--------------------------------------------------------------------------------
 1 | /*
 2 | =====================================
 3 |   Scan(%(name)s)
 4 | =====================================
 5 | */
 6 | 
 7 | printf("%(resultsym)s = Scan(%(name)s)\n");
 8 | 
 9 | struct relationInfo %(resultsym)s_val;
10 | 
11 | #ifdef __MTA__
12 |   //binary_inhale("%(name)s", &%(resultsym)s_val);
13 |   inhale("%(name)s", &%(resultsym)s_val);
14 | #else
15 |   inhale("%(name)s", &%(resultsym)s_val);
16 | #endif // __MTA__
17 | 
18 | struct relationInfo *%(resultsym)s = &%(resultsym)s_val;
19 | 


--------------------------------------------------------------------------------
/raco/backends/cpp/operator_at_a_time_c_templates/select_simple_twopass.template:
--------------------------------------------------------------------------------
 1 | /*
 2 | ============================================
 3 | ============ TwoPass Select ================
 4 | ============================================
 5 | */
 6 | struct relationInfo %(resultsym)s_val;
 7 | struct relationInfo *%(resultsym)s = &%(resultsym)s_val;
 8 | 
 9 | {
10 |   printf("\n%(resultsym)s = TwoPassSelect(%(condition)s, %(inputsym)s)\n");
11 | 
12 |   uint64 *relation = %(inputsym)s->relation;
13 |   uint64 tuples = %(inputsym)s->tuples;
14 |   uint64 fields = %(inputsym)s->fields;
15 | 
16 |   uint64 matches = 0;
17 | 
18 |   int numCounters = 4;
19 |   int currCounter = 0;
20 |   int *counters = mallocCounterMemory(numCounters);
21 | 
22 |   double start = timer();
23 | 
24 |   getCounters(counters, currCounter);
25 |   currCounter = currCounter + 1; // 1
26 |   
27 |   // Count the number of matching entries
28 |   #pragma mta trace "1st pass to compute size for TwoPassSelect(%(condition)s, %(inputsym)s))"
29 |   for (uint64 i = 0; i < tuples*fields; i += fields) {
30 |       if (%(condition)s) {
31 | 	      matches++;
32 |       }
33 |   }
34 |   printf("\tfinished first pass\n");
35 | 
36 |   getCounters(counters, currCounter);
37 |   currCounter = currCounter + 1; // 2
38 | 
39 |   uint64 *%(resultsym)s_result;
40 |   // allocate space for the result
41 |   %(resultsym)s_result = (uint64 *) malloc(matches*fields*sizeof(uint64));
42 |  
43 |   // check success
44 |   if (!%(resultsym)s_result) {
45 |     #pragma mta trace "Memory Allocation FAILURE in TwoPassSelect(%(condition)s, %(inputsym)s))"
46 |     printf("Memory Allocation FAILURE in TwoPassSelect(%(condition)s, %(inputsym)s)\n");
47 |     exit(5);
48 |   }
49 | 
50 |   uint64 current_result = 0;
51 |   #pragma mta trace "begin 2nd pass"
52 |   #pragma mta assert nodep
53 |   for (uint64 i = 0; i < tuples*fields; i+=fields) {
54 | 	  if (%(condition)s) {
55 | 	    for( uint64 j = 0; j < fields; j = j + 1 ) {
56 | 	      %(resultsym)s_result[current_result + j] = relation[i + j];
57 | 	    }
58 |       current_result+=fields;
59 | 	  }
60 |   }
61 | 
62 |   getCounters(counters, currCounter);
63 |   currCounter = currCounter + 1; // 3
64 | 
65 |   double finish = timer();
66 |   printf("\t%%f seconds\n", finish - start);
67 |   printf("\t%%lu tuples in result\n", matches);
68 |   if (matches) {
69 |     %(resultsym)s->tuples = matches;
70 |     %(resultsym)s->fields = fields;
71 |     %(resultsym)s->relation = %(resultsym)s_result;
72 |   }
73 |   else {
74 |     %(resultsym)s->tuples = 0;
75 |     %(resultsym)s->fields = fields;
76 |     %(resultsym)s->relation = NULL;
77 |   }
78 | 
79 |   printDiffCounters(counters, numCounters);
80 |   freeCounterMemory(counters);
81 | 
82 | /*
83 | ============ End TwoPass Select ================
84 | */
85 | }
86 | 


--------------------------------------------------------------------------------
/raco/backends/logical.py:
--------------------------------------------------------------------------------
 1 | import raco.rules as rules
 2 | from raco.backends import Algebra
 3 | 
 4 | 
 5 | class OptLogicalAlgebra(Algebra):
 6 | 
 7 |     @staticmethod
 8 |     def opt_rules(**kwargs):
 9 |         return [rules.RemoveTrivialSequences(),
10 |                 rules.SimpleGroupBy(),
11 |                 rules.SplitSelects(),
12 |                 rules.PushSelects(),
13 |                 rules.MergeSelects(),
14 |                 rules.ProjectToDistinctColumnSelect(),
15 |                 rules.JoinToProjectingJoin(),
16 |                 rules.PushApply(),
17 |                 rules.RemoveUnusedColumns(),
18 |                 rules.PushApply(),
19 |                 rules.RemoveUnusedColumns(),
20 |                 rules.PushApply(),
21 |                 rules.DeDupBroadcastInputs()]
22 | 


--------------------------------------------------------------------------------
/raco/backends/myria/__init__.py:
--------------------------------------------------------------------------------
1 | # everything in myria made public
2 | from myria import *
3 | 


--------------------------------------------------------------------------------
/raco/backends/myria/errors.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | 
 4 | class MyriaError(Exception):
 5 |     def __init__(self, err=None):
 6 |         if isinstance(err, requests.Response):
 7 |             msg = 'Error {} ({})'.format(err.status_code, err.reason)
 8 |             if err.text:
 9 |                 msg = '{}: {}'.format(msg, err.text)
10 |             Exception.__init__(self, msg)
11 |         else:
12 |             Exception.__init__(self, err)
13 | 


--------------------------------------------------------------------------------
/raco/backends/myria/tests/test_error.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from raco.backends.myria.errors import MyriaError
 3 | 
 4 | 
 5 | class TestError(unittest.TestCase):
 6 |     def test_error(self):
 7 |         with self.assertRaises(MyriaError):
 8 |             raise MyriaError
 9 | 
10 | if __name__ == '__main__':
11 |     unittest.main()
12 | 


--------------------------------------------------------------------------------
/raco/backends/radish/README.md:
--------------------------------------------------------------------------------
1 | # Radish
2 | 
3 | Backend compiling Myria Algebra to C++ for execution on grappa.
4 | 
5 | * Depends on ../clang templates and routines 
6 | 


--------------------------------------------------------------------------------
/raco/backends/radish/__init__.py:
--------------------------------------------------------------------------------
1 | # everything in radish made public
2 | from radish import *
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/base_query.cpp:
--------------------------------------------------------------------------------
 1 | // grappa
 2 | #include <Grappa.hpp>
 3 | #include <Collective.hpp>
 4 | #include <GlobalCompletionEvent.hpp>
 5 | #include <Metrics.hpp>
 6 | 
 7 | using namespace Grappa;
 8 | 
 9 | // stl
10 | #include <vector>
11 | #include <iomanip>
12 | #include <cstring>
13 | #include <limits>
14 | 
15 | // query library
16 | #include "relation_io.hpp"
17 | #include "MatchesDHT.hpp"
18 | #include "DoubleDHT.hpp"
19 | #include "MapReduce.hpp"
20 | //#include "HashJoin.hpp"
21 | #include "DHT_symmetric.hpp"
22 | #include "Aggregates.hpp"
23 | #include "Iterators.hpp"
24 | #include "radish_utils.h"
25 | #include "stats.h"
26 | #include "strings.h"
27 | #include "dates.h"
28 | #include "relation.hpp"
29 | #include "pipeline.hpp"
30 | #include "TemporaryTable.hpp"
31 | #include "dowhile.hpp"
32 | //FIXME: prefer to include this only for Iterator codes
33 | #include "Operators.hpp"
34 | 
35 | DEFINE_uint64( nt, 30, "hack: number of tuples");
36 | DEFINE_bool( jsonsplits, false, "interpret input file F as F/part-*,"
37 |                              "and containing json records");
38 | 
39 | template <typename T>
40 | struct counter {
41 |   T count;
42 |   static GlobalAddress<counter<T>> create(T init) {
43 |     auto res = symmetric_global_alloc<counter<T>>();
44 |     on_all_cores([res, init] {
45 |         res->count = init;
46 |         });           
47 |     return res;
48 |   }
49 | } GRAPPA_BLOCK_ALIGNED;
50 | 
51 | template <typename T>
52 | T get_count(GlobalAddress<counter<T>> p) {
53 |   return p->count;                           
54 | }
55 | 
56 | {{declarations}}
57 | 
58 | StringIndex string_index;
59 | void init( ) {
60 | }
61 | 
62 | void query() {
63 |     double start, end;
64 |     double saved_scan_runtime = 0, saved_init_runtime = 0;
65 |     start = walltime();
66 | 
67 |      {{initialized}}
68 | 
69 |     end = walltime();
70 |     init_runtime += (end-start);
71 |     saved_init_runtime += (end-start);
72 | 
73 |     {{queryexec}}
74 | 
75 |     // since reset the stats after scan, need to set these again
76 |     scan_runtime = saved_scan_runtime;
77 |     init_runtime = saved_init_runtime;
78 | }
79 | 
80 | 
81 | int main(int argc, char** argv) {
82 |     init(&argc, &argv);
83 | 
84 |     run([] {
85 | 
86 |     init();
87 | double start = Grappa::walltime();
88 |     	query();
89 |       double end = Grappa::walltime();
90 |       query_runtime = end - start;
91 |       on_all_cores([] { emit_count = result.size(); });
92 |       Metrics::merge_and_print();
93 |     });
94 | 
95 |     finalize();
96 |     return 0;
97 | }
98 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/define_cl_arg.cpp:
--------------------------------------------------------------------------------
1 | DEFINE_{{type}}({{name}}, {{default_value}}, "{{description}}");
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/define_metric.cpp:
--------------------------------------------------------------------------------
1 | GRAPPA_DEFINE_METRIC({% block type %}{% endblock %}, {% block name %}{% endblock %}, {% block init %}{% endblock %});


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/file_scan.cpp:
--------------------------------------------------------------------------------
 1 | {
 2 |     if (FLAGS_bin) {
 3 |         BinaryRelationFileReader<{{result_type}}> reader;
 4 |         {{resultsym}} = reader.read( FLAGS_input_file_{{name}} + ".bin" );
 5 |     } else if (FLAGS_jsonsplits) {
 6 |         SplitsRelationFileReader<JSONRowParser<{{result_type}},&schema_{{resultsym}}>, {{result_type}}> reader;
 7 |         {{resultsym}} = reader.read( FLAGS_input_file_{{name}} );
 8 |     } else {
 9 |         {{resultsym}}.data = readTuples<{{result_type}}>( FLAGS_input_file_{{name}}, FLAGS_nt);
10 |         {{resultsym}}.numtuples = FLAGS_nt;
11 |         auto l_{{resultsym}} = {{resultsym}};
12 |         on_all_cores([=]{ {{resultsym}} = l_{{resultsym}}; });
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/gce_app_metric.cpp:
--------------------------------------------------------------------------------
 1 | {% extends "define_metric.cpp" %}
 2 | 
 3 | {% block type %}CallbackMetric<int64_t>{% endblock %}
 4 | 
 5 | {% block name %}app_{{pipeline_id}}_gce_incomplete{% endblock %}
 6 | 
 7 | {% block init %}[] {
 8 |   return {{global_syncname}}.incomplete();
 9 | }{% endblock %}
10 | 
11 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/global_array_memory_scan.cpp:
--------------------------------------------------------------------------------
1 | forall<&{{global_syncname}}>( {{inputsym}}.data, {{inputsym}}.numtuples, [=](int64_t i, {{tuple_type}}& {{tuple_name}}) {
2 | {{inner_code}}
3 | });
4 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/global_array_relation_declaration.cpp:
--------------------------------------------------------------------------------
1 | Relation<{{tuple_type}}> {{resultsym}};
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/graph_file_scan.cpp:
--------------------------------------------------------------------------------
 1 | {
 2 |     tuple_graph tg;
 3 |     tg = readTuples( "{{name}}" );
 4 | 
 5 |     FullEmpty<GlobalAddress<Graph<Vertex>>> f1;
 6 |     privateTask( [&f1,tg] {
 7 |       f1.writeXF( Graph<Vertex>::create(tg, /*directed=*/true) );
 8 |     });
 9 |     auto l_{{resultsym}}_index = f1.readFE();
10 | 
11 |     on_all_cores([=] {
12 |       {{resultsym}}_index = l_{{resultsym}}_index;
13 |     });
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/grappa_group_timing.cpp:
--------------------------------------------------------------------------------
 1 | {% extends "group_timing.cpp" %}
 2 | {% block printcode %}
 3 | {{timer_metric}} += runtime_{{ident}};
 4 | VLOG(1) << "pipeline group {{ident}}: " << runtime_{{ident}} << " s";
 5 | {% endblock %}
 6 | 
 7 | {% block precode %}Grappa::Metrics::reset();
 8 | {{tracing_on}}{% endblock %}
 9 | 
10 | {% block postcode %}{{tracing_off}}{% endblock %}
11 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/grappa_pipeline_timing.cpp:
--------------------------------------------------------------------------------
1 | {% extends "pipeline_timing.cpp" %}
2 | 
3 | {% block printstart %}VLOG(1) {{ super() }};{% endblock %}
4 | 
5 | {% block printruntime %}VLOG(1) {{ super() }};{% endblock %}
6 | 
7 | {% block printend %}VLOG(1) {{ super() }};{% endblock %}


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/0key_output.cpp:
--------------------------------------------------------------------------------
 1 | {{comment}}
 2 | auto {{output_tuple_name}}_tmp = reduce<{% block templateargs %}{% endblock %}>({{hashname}});
 3 | 
 4 | {% block output %}{% endblock %}
 5 | 
 6 | {{inner_code}}
 7 | 
 8 | // putting a wait here satisfies the invariant that inner code depends
 9 | // on global synchronization by the pipeline source
10 | {{pipeline_sync}}.wait();
11 | 
12 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/combine_definition.cpp:
--------------------------------------------------------------------------------
1 | {{state_type}} {{name}}_combine(const {{state_type}}& state0, const {{state_type}}& state1) {
2 |     {% for c in combine_updates %}
3 |     {{ c }}
4 |     {% endfor %}
5 |     return {{state_type}}(std::make_tuple({{ combine_state_vars|join(',') }}));
6 | }
7 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/init_definition.cpp:
--------------------------------------------------------------------------------
1 | {{state_type}} {{name}}_init() {
2 |     {% for u in init_updates %}
3 |     {{u}}
4 |     {% endfor %}
5 | 
6 |     return {{state_type}}( std::make_tuple({{ init_state_vars|join(',') }}) );
7 | }


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/multi_uda_0key_output.cpp:
--------------------------------------------------------------------------------
 1 | {% extends '0key_output.cpp' %}
 2 | 
 3 | {% block templateargs %}
 4 | {{state_type}}, &{{combine_func}}
 5 | {% endblock %}
 6 | 
 7 | {% block output %}
 8 | {{output_tuple_type}} {{output_tuple_name}};
 9 | {{ assignmentcode }}
10 | {% endblock %}
11 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/multi_uda_0key_update.cpp:
--------------------------------------------------------------------------------
1 | auto {{hashname}}_local_ptr = {{hashname}}.localize();
2 | *{{hashname}}_local_ptr = {{update_func}}(*{{hashname}}_local_ptr, {{update_val}});
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/multi_uda_scan.cpp:
--------------------------------------------------------------------------------
1 | {% extends 'scan.cpp' %}
2 | 
3 | {# depends on materialized_tuple_ref constructor of std::tuple #}
4 | {% block initializer %}std::tuple_cat({{ super() }}, {{mapping_var_name}}.second.to_tuple()){% endblock %}
5 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/nkey_update.cpp:
--------------------------------------------------------------------------------
1 | {{comment}}
2 | {{hashname}}->update<&{{pipeline_sync}}, {{input_type}}, &{{update_func}},&{{init_func}}>(std::make_tuple({{ keygets|join(',') }}), {{update_val}});
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/one_built_in_0key_output.cpp:
--------------------------------------------------------------------------------
 1 | {% extends '0key_output.cpp' %}
 2 | 
 3 | {% block templateargs %}{{state_type}}, counter<{{state_type}}>, &{{combine_func}}, &get_count<{{state_type}}>{% endblock %}
 4 | 
 5 | {% block output %}
 6 | {{output_tuple_type}} {{output_tuple_name}};
 7 | {{output_tuple_set_func}} = {{output_tuple_name}}_tmp;
 8 | {% endblock %}
 9 | 
10 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/one_built_in_0key_update.cpp:
--------------------------------------------------------------------------------
1 | {{hashname}}->count = {{update_func}}({{hashname}}->count, {{update_val}});
2 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/one_built_in_scan.cpp:
--------------------------------------------------------------------------------
1 | {% extends 'scan.cpp' %}
2 | 
3 | {# depends on materialized_tuple_ref constructor of std::tuple #}
4 | {% block initializer %}std::tuple_cat({{ super() }}, std::make_tuple({{mapping_var_name}}.second)){% endblock %}


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/scan.cpp:
--------------------------------------------------------------------------------
1 | {{comment}}
2 | {{hashname}}->forall_entries<&{{pipeline_sync}}>([=](std::pair<const {{keytype}},{{emit_type}}>&{{mapping_var_name}}) {
3 |     {{output_tuple_type}} {{output_tuple_name}}({% block initializer %}{{mapping_var_name}}.first{% endblock %});
4 |     {{inner_code}}
5 | });
6 | 
7 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/update_definition.cpp:
--------------------------------------------------------------------------------
1 | {{state_type}} {{name}}_update(const {{state_type}}& state, const {{input_type}}& {{input_tuple_name}}) {
2 |     {% for u in update_updates %}
3 |     {{ u }}
4 |     {% endfor %}
5 |     return {{state_type}}(std::make_tuple({{ update_state_vars|join(',') }}));
6 | }


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/withkey_decl.cpp:
--------------------------------------------------------------------------------
1 | decltype(DHT_symmetric<{{keytype}},{{valtype}},hash_tuple::hash<{{keytype}}>>::create_DHT_symmetric( )) {{hashname}};
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/withkey_init.cpp:
--------------------------------------------------------------------------------
1 | auto l_{{hashname}} = DHT_symmetric<{{keytype}},{{valtype}},hash_tuple::hash<{{keytype}}>>::create_DHT_symmetric( );
2 | on_all_cores([=] {
3 |   {{hashname}} = l_{{hashname}};
4 | });
5 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/groupby/withoutkey_init.cpp:
--------------------------------------------------------------------------------
1 | auto {{hashname}} = {{initializer}};
2 | on_all_cores([=] {
3 |    *({{hashname}}.localize()) = {{func_name}}_init();
4 | });
5 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/hashjoin/hash_declaration.cpp:
--------------------------------------------------------------------------------
1 | typedef MatchesDHT<{{keytype}}, {{in_tuple_type}}, hash_tuple::hash<{{keytype}}>> DHT_{{in_tuple_type}}_{{hashname}};
2 | DHT_{{in_tuple_type}}_{{hashname}} {{hashname}};
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/hashjoin/hash_init.cpp:
--------------------------------------------------------------------------------
1 | {{hashname}}.init_global_DHT( &{{hashname}}, cores()*16*1024 );
2 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/hashjoin/insert_materialize.cpp:
--------------------------------------------------------------------------------
1 | {{comment}}
2 | {{hashname}}.insert_async<&{{pipeline_sync}}>({{keyval}}, {{keyname}});
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/hashjoin/lookup.cpp:
--------------------------------------------------------------------------------
1 | {{comment}}
2 | {{hashname}}.lookup_iter<&{{pipeline_sync}}>( {{keyval}}, [=]({{right_tuple_type}}& {{right_tuple_name}}) {
3 |   join_coarse_result_count++;
4 |   {{out_tuple_type}} {{out_tuple_name}} = {{append_func_name}}({{keyname}}, {{right_tuple_name}});
5 |   {{inner_plan_compiled}}
6 | });
7 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/input_relation_declarations.cpp:
--------------------------------------------------------------------------------
1 | DEFINE_string(input_file_{{name}}, "{{name}}", "Input file");
2 | std::vector<std::string> schema_{{resultsym}} = { {% for c in colnames %}"{{c}}",{% endfor %} };
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/iterators/0key_groupby_source.cpp:
--------------------------------------------------------------------------------
1 | class {{class_symbol}} : public ZeroKeyAggregateSource<{{produce_type}}, {{state_type}}, &{{combine_func}}> {
2 |     using ZeroKeyAggregateSource<{{produce_type}}, {{state_type}}, {{combine_func}}>::ZeroKeyAggregateSource;
3 |     protected:
4 |         void mktuple({{produce_type}}& {{produce_tuple_name}}, {{state_type}}& {{state_tuple_name}}) {
5 |             {{assignment_code}}
6 |         }
7 | 
8 | };


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/iterators/apply.cpp:
--------------------------------------------------------------------------------
1 | class {{class_symbol}} : public Apply<{{consume_type}}, {{produce_type}}> {
2 |     using Apply<{{consume_type}}, {{produce_type}}>::Apply;
3 |     protected:
4 |       void apply({{produce_type}}& {{produce_tuple_name}}, {{consume_type}}& {{consume_tuple_name}}) {
5 |         {{statements}}
6 |       }
7 | };


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/iterators/broadcast_stream.cpp:
--------------------------------------------------------------------------------
1 | class {{class_symbol}} : public BroadcastTupleStream<{{left_type}}, {{right_type}}, {{output_type}}> {
2 |     using BroadcastTupleStream<{{left_type}}, {{right_type}}, {{output_type}}>::BroadcastTupleStream;
3 |     protected:
4 |         void mktuple({{output_type}}& {{output_name}}, {{left_type}}& l, {{right_type}}& r) {
5 |               {{output_name}} = {{append_func_name}}(l, r);
6 |         }
7 | };


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/iterators/hashjoin_sink.cpp:
--------------------------------------------------------------------------------
 1 | class {{class_symbol}} : public HashJoinSink{{side}}<{{keytype}},
 2 |                                                      {{left_tuple_type}},
 3 |                                                      {{right_tuple_type}},
 4 |                                                      hash_tuple::hash<{{keytype}}>, &{{pipeline_sync}}> {
 5 |     using HashJoinSink{{side}}<{{keytype}},
 6 |                                                      {{left_tuple_type}},
 7 |                                                      {{right_tuple_type}},
 8 |                                                      hash_tuple::hash<{{keytype}}>, &{{pipeline_sync}}>::HashJoinSink{{side}};
 9 |     protected:
10 |       {{keytype}} mktuple({% if side == 'Right' %}
11 |                                       {{right_tuple_type}}
12 |                                       {% else %}
13 |                                       {{left_tuple_type}}
14 |                                       {% endif %}
15 |                                       &{{input_tuple_name}}) {
16 | 
17 |             return {{keyval}};
18 |       }
19 | };
20 | 
21 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/iterators/hashjoin_source.cpp:
--------------------------------------------------------------------------------
 1 | class {{class_symbol}} : public HashJoinSource<{{keytype}},
 2 |                                                 {{left_tuple_type}},
 3 |                                                 {{right_tuple_type}},
 4 |                                                 hash_tuple::hash<{{keytype}}>,
 5 | {{out_tuple_type}}> {
 6 | 
 7 |     using HashJoinSource<{{keytype}},
 8 |                                                 {{left_tuple_type}},
 9 |                                                 {{right_tuple_type}},
10 |                                                 hash_tuple::hash<{{keytype}}>, {{out_tuple_type}}>::HashJoinSource;
11 | 
12 |     protected:
13 |         {{out_tuple_type}} mktuple({{left_tuple_type}}& {{left_name}}, {{right_tuple_type}}& {{right_name}}) {
14 |             return {{append_func_name}}({{left_name}}, {{right_name}});
15 |         }
16 | };


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/iterators/instantiate_operator.cpp:
--------------------------------------------------------------------------------
1 | Operator<{{produce_type}}> * {{symbol}} = new {{call_constructor}};
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/iterators/instantiate_sink.cpp:
--------------------------------------------------------------------------------
1 | {{symbol}} = new {{call_constructor}};
2 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/iterators/multikey_groupby_sink.cpp:
--------------------------------------------------------------------------------
1 | class {{class_symbol}} : public AggregateSink<{{consume_type}}, {{keytype}}, {{state_type}}, &{{pipeline_sync}}> {
2 | using AggregateSink<{{consume_type}}, {{keytype}}, {{state_type}}, &{{pipeline_sync}}>::AggregateSink;
3 | protected:
4 |     {{keytype}} mktuple({{consume_type}}& {{consume_tuple_name}}) {
5 |         return std::make_tuple({{ keygets|join(',') }});
6 |     }
7 | 
8 | };


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/iterators/multikey_groupby_source.cpp:
--------------------------------------------------------------------------------
 1 | class {{class_symbol}} : public AggregateSource<{{produce_type}}, {{keytype}}, {{state_type}}, {{input_type}}> {
 2 |     using AggregateSource<{{produce_type}}, {{keytype}}, {{state_type}}, {{input_type}}>::AggregateSource;
 3 | 
 4 |     private:
 5 |         typedef AggregateSource<{{produce_type}}, {{keytype}}, {{state_type}}, {{input_type}}>::map_output_t map_output_t;
 6 | 
 7 |     protected:
 8 |         void mktuple({{produce_type}}& {{produce_tuple_name}}, map_output_t& {{mapping_var_name}}) {
 9 |             {{produce_type}} {{produce_tuple_name}}_tmp(std::tuple_cat({{mapping_var_name}}.first, {{mapping_var_name}}.second.to_tuple()));
10 |             {{produce_tuple_name}} = {{produce_tuple_name}}_tmp;
11 |         }
12 | };


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/iterators/partition_groupby/multikey_groupby_sink.cpp:
--------------------------------------------------------------------------------
1 | class {{class_symbol}} : public AggregatePartitionSink<{{consume_type}}, {{keytype}}, {{state_type}}> {
2 | using AggregatePartitionSink<{{consume_type}}, {{keytype}}, {{state_type}}>::AggregatePartitionSink;
3 | protected:
4 |     {{keytype}} mktuple({{consume_type}}& {{consume_tuple_name}}) {
5 |         return std::make_tuple({{ keygets|join(',') }});
6 |     }
7 | 
8 | };


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/iterators/select.cpp:
--------------------------------------------------------------------------------
1 | class {{class_symbol}} : public Select<{{consume_type}}, {{produce_type}}> {
2 |     using Select<{{produce_type}}, {{produce_type}}>::Select;
3 |     protected:
4 |         bool predicate({{produce_type}}& {{consume_tuple_name}}) {
5 |             return {{expression}};
6 |         }
7 | };


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/iterators/sink_declaration.cpp:
--------------------------------------------------------------------------------
1 | Operator<int> * {{symbol}};


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/iterators/withkey_init.cpp:
--------------------------------------------------------------------------------
1 | auto {{hashname}} = DHT_symmetric_generic<{{keytype}},{{valtype}},{{update_val_type}},hash_tuple::hash<{{keytype}}>>::create_DHT_symmetric(&{{update_func}}, &{{init_func}});
2 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/partition_groupby/nkey_update.cpp:
--------------------------------------------------------------------------------
1 | {{comment}}
2 | {{hashname}}->template update_partition<{{input_type}}, &{{update_func}},&{{init_func}}>(std::make_tuple({{ keygets|join(',') }}), {{update_val}});
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/shuffle.cpp:
--------------------------------------------------------------------------------
1 | {{comment}}
2 | auto target = hash_tuple::hash<{{keytype}}>()({{keyval}}) % Grappa::cores();
3 | // DEV NOTE: if something inside this call is not captured in the lambda,
4 | // (probably a data structure) then we need to change its declaration to a global one.
5 | // The alternative is just to capture [=] but this will mask unneeded communication.
6 | Grappa::delegate::call<async, &{{pipeline_sync}}>(target, [{{keyname}}] {
7 |     {{inner_code}}
8 | });


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/shufflehashjoin/delete.cpp:
--------------------------------------------------------------------------------
1 | freeJoinReducers({{hashname}}, {{hashname}}_num_reducers);
2 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/shufflehashjoin/hash_init.cpp:
--------------------------------------------------------------------------------
1 | auto {{hashname}}_num_reducers = cores();
2 | auto {{hashname}} = allocateJoinReducers<int64_t,{{left_type}},{{right_type}},{{out_tuple_type}}>({{hashname}}_num_reducers);
3 | auto {{hashname}}_ctx = HashJoinContext<int64_t,{{left_type}},{{right_type}},{{out_tuple_type}}>({{hashname}}, {{hashname}}_num_reducers);
4 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/shufflehashjoin/materialize.cpp:
--------------------------------------------------------------------------------
1 | {{hashname}}_ctx.emitIntermediate{{side}}<&{{global_syncname}}>({{keyval}}, {{keyname}});
2 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/shufflehashjoin/reduce.cpp:
--------------------------------------------------------------------------------
1 | %(hashname)s_ctx.reduceExecute();
2 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/shufflehashjoin/result_scan.cpp:
--------------------------------------------------------------------------------
1 | MapReduce::forall_symmetric<&{{pipeline_sync}}>({{hashname}}, &JoinReducer<int64_t,{{left_type}},{{right_type}},{{out_tuple_type}}>::resultAccessor, [=]({{out_tuple_type}}& {{out_tuple_name}}) {
2 | {{inner_code_compiled}}
3 | });
4 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/spawn.cpp:
--------------------------------------------------------------------------------
1 | auto {{name}} = Pipeline({{ident}}, { {{dependence_captures}} }, [=] {
2 | {{inner_code}}
3 | });
4 | {{name}}.run();
5 | 
6 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/string_index_lookup.cpp:
--------------------------------------------------------------------------------
1 | auto l_{{sid}} = string_index.string_lookup({{st}});
2 | on_all_cores([=] { {{sid}} = l_{{sid}}; });
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/symmetric_array_file_scan.cpp:
--------------------------------------------------------------------------------
 1 | if (FLAGS_bin) {
 2 | BinaryRelationFileReader<{{result_type}},
 3 |                            aligned_vector<{{result_type}}>,
 4 |                            SymmetricArrayRepresentation<{{result_type}}>> reader;
 5 |                            // just always broadcast the name to all cores
 6 |                            // although for some queries it is unnecessary
 7 |                            auto l_{{resultsym}} = reader.read( FLAGS_input_file_{{name}} + ".bin" );
 8 |                            on_all_cores([=] {
 9 |                                 {{resultsym}} = l_{{resultsym}};
10 |                            });
11 | 
12 |                            } else {
13 | 
14 |                            CHECK(false) << "only --bin=true supported for symmetric array repr";
15 | 
16 |                            }
17 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/symmetric_array_memory_scan.cpp:
--------------------------------------------------------------------------------
1 | forall<&{{global_syncname}}>( {{readfrom}}, [=]({{tuple_type}}& {{tuple_name}}) {
2 | {{inner_code}}
3 | });
4 | 
5 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/symmetric_array_relation_declaration.cpp:
--------------------------------------------------------------------------------
1 | Relation<aligned_vector<{{tuple_type}}>> {{resultsym}};
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/symmetric_array_relation_materialize.cpp:
--------------------------------------------------------------------------------
1 | {{sym}}.data->vector.push_back({{input_tuple_name}});
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/symmetric_array_temprelation_declaration.cpp:
--------------------------------------------------------------------------------
1 | TemporaryTable<{{tuple_type}}> {{resultsym}};
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/symmetric_array_temprelation_init.cpp:
--------------------------------------------------------------------------------
1 | {{sym}}.init();
2 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/symmetric_array_temprelation_materialize.cpp:
--------------------------------------------------------------------------------
1 | {{sym}}.append({{input_tuple_name}});
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/symmetric_array_temprelation_materializer_done.cpp:
--------------------------------------------------------------------------------
1 | {{sym}}.release_producer();
2 | 
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/symmetric_array_temprelation_recycle.cpp:
--------------------------------------------------------------------------------
1 | // recycle result vector
2 | {{sym}}.register_producers({{num_producers}});
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/symmetrichashjoin/hash_declaration.cpp:
--------------------------------------------------------------------------------
1 | typedef DoubleDHT<{{keytype}}, {{left_in_tuple_type}}, {{right_in_tuple_type}}, hash_tuple::hash<{{keytype}}>> DHT_{{left_in_tuple_type}}_{{right_in_tuple_type}}_{{hashname}};
2 | DHT_{{left_in_tuple_type}}_{{right_in_tuple_type}}_{{hashname}} {{hashname}};
3 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/symmetrichashjoin/hash_init.cpp:
--------------------------------------------------------------------------------
1 | {{hashname}}.init_global_DHT( &{{hashname}}, cores()*16*1024 );
2 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/symmetrichashjoin/hash_insert_lookup.cpp:
--------------------------------------------------------------------------------
1 | {{hashname}}.insert_lookup_iter_{{side}}<&{{global_syncname}}>({{keyval}}, {{keyname}}, [=]({{other_tuple_type}} {{valname}}) {
2 |   join_coarse_result_count++;
3 |   {{out_tuple_type}} {{out_tuple_name}} = {{append_func_name}}({{left_name}}, {{right_name}});
4 |   {{inner_plan_compiled}}
5 | });
6 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/sync_declaration.cpp:
--------------------------------------------------------------------------------
1 | GlobalCompletionEvent {{global_syncname}}(true);
2 | 


--------------------------------------------------------------------------------
/raco/backends/radish/grappa_templates/wait_statement.cpp:
--------------------------------------------------------------------------------
1 | {{name}}.wait();
2 | 


--------------------------------------------------------------------------------
/raco/backends/sparql/__init__.py:
--------------------------------------------------------------------------------
1 | # everything in sparql.py made public
2 | from sparql import *
3 | 


--------------------------------------------------------------------------------
/raco/backends/sql/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uwescience/raco/1f2bedbef71bacf715340289f4973d85a3c1dc97/raco/backends/sql/__init__.py


--------------------------------------------------------------------------------
/raco/backends/sql/test_case.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | import sqlalchemy
 3 | import unittest
 4 | 
 5 | import raco.algebra as algebra
 6 | from raco.compile import optimize_by_rules
 7 | from raco.backends.logical import OptLogicalAlgebra
 8 | import raco.myrial.interpreter as interpreter
 9 | import raco.myrial.parser as parser
10 | import raco.scheme as scheme
11 | from .catalog import SQLCatalog
12 | import raco.types as types
13 | 
14 | 
15 | class SQLTestCase(unittest.TestCase):
16 |     """A base for testing the compilation of RACO programs to SQL queries"""
17 | 
18 |     emp_table = [
19 |         # id dept_id name salary
20 |         (0, 1, "Hank Levy", 1000000, -1),
21 |         (1, 2, "Bill Howe", 25000, 0),
22 |         (2, 1, "Dan Halperin", 90000, 0),
23 |         (3, 1, "Andrew Whitaker", 5000, 0),
24 |         (4, 2, "Shumo Chu", 5000, 0),
25 |         (5, 1, "Victor Almeida", 25000, 0),
26 |         (6, 3, "Dan Suciu", 90000, 0),
27 |         (7, 1, "Magdalena Balazinska", 25000, 0)]
28 | 
29 |     emp_schema = scheme.Scheme([("id", types.INT_TYPE),
30 |                                 ("dept_id", types.INT_TYPE),
31 |                                 ("name", types.STRING_TYPE),
32 |                                 ("salary", types.LONG_TYPE),
33 |                                 ("mgr_id", types.INT_TYPE)])
34 | 
35 |     emp_key = "public:adhoc:employee"
36 | 
37 |     def setUp(self):
38 |         # SQLAlchemy
39 |         self.db = SQLCatalog(sqlalchemy.
40 |                              create_engine('sqlite:///:memory:', echo=True))
41 |         self.db.add_table(self.emp_key, self.emp_schema)
42 |         self.db.add_tuples(self.emp_key, self.emp_schema, self.emp_table)
43 |         # MyriaL
44 |         self.parser = parser.Parser()
45 |         self.processor = interpreter.StatementProcessor(self.db)
46 | 
47 |     def query_to_phys_plan(self, query, **kwargs):
48 |         statements = self.parser.parse(query)
49 |         self.processor.evaluate(statements)
50 |         p = self.processor.get_logical_plan(**kwargs)
51 |         p = optimize_by_rules(p, OptLogicalAlgebra.opt_rules())
52 |         if isinstance(p, (algebra.Store, algebra.StoreTemp)):
53 |             p = p.input
54 |         return p
55 | 
56 |     def execute(self, query, expected, **kwargs):
57 |         p = self.query_to_phys_plan(query, **kwargs)
58 |         ans = self.db.evaluate(p)
59 |         self.assertEquals(expected, Counter(ans))
60 | 


--------------------------------------------------------------------------------
/raco/catalog_tests/default_cardinality_relation.py:
--------------------------------------------------------------------------------
1 | {'A': [('a', 'DOUBLE_TYPE'), ('b', 'STRING_TYPE')],
2 |  'B': [('x', 'DOUBLE_TYPE'), ('y', 'STRING_TYPE'), ('z', 'LONG_TYPE')]
3 |  }
4 | 


--------------------------------------------------------------------------------
/raco/catalog_tests/set_cardinality_relation.py:
--------------------------------------------------------------------------------
1 | {'A': [('b', 'STRING_TYPE')],
2 |  'B': [('x', 'DOUBLE_TYPE'), ('y', 'STRING_TYPE'), ('z', 'LONG_TYPE')],
3 |  'C': ([('a', 'DOUBLE_TYPE'), ('b', 'STRING_TYPE'), ('c', 'LONG_TYPE')], 12)
4 |  }
5 | 


--------------------------------------------------------------------------------
/raco/clangtestdb.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from raco import relation_key
 3 | from raco.catalog import Catalog
 4 | from raco.algebra import DEFAULT_CARDINALITY
 5 | import csv
 6 | 
 7 | 
 8 | class ClangTestDatabase(Catalog):
 9 |     """
10 |     Interface for table metadata and ingest
11 |     for raco.cpp query processor
12 |     """
13 | 
14 |     def __init__(self):
15 |         # Map from relation keys to tuples of (Bag, scheme.Scheme)
16 |         self.tables = {}
17 | 
18 |     def get_num_servers(self):
19 |         return 1
20 | 
21 |     def num_tuples(self, rel_key):
22 |         return DEFAULT_CARDINALITY
23 | 
24 |     def ingest(self, rel_key, contents, scheme):
25 |         '''Directly load raw data into the database'''
26 |         if isinstance(rel_key, basestring):
27 |             rel_key = relation_key.RelationKey.from_string(rel_key)
28 |         assert isinstance(rel_key, relation_key.RelationKey)
29 | 
30 |         with open(rel_key.relation, 'w') as writetable:
31 |             writer = csv.writer(writetable, delimiter=' ')
32 |             for tup in contents:
33 |                 writer.writerow(tup)
34 | 
35 |         self.tables[rel_key] = scheme
36 | 
37 |     def get_scheme(self, rel_key):
38 |         if isinstance(rel_key, basestring):
39 |             rel_key = relation_key.RelationKey.from_string(rel_key)
40 | 
41 |         assert isinstance(rel_key, relation_key.RelationKey)
42 | 
43 |         scheme = self.tables[rel_key]
44 |         return scheme
45 | 


--------------------------------------------------------------------------------
/raco/clib/algorithms.h:
--------------------------------------------------------------------------------
1 | 
2 | // function ptr syntax??
3 | bool TwoPassSelect(condition(Tuple *), const Relation *input, Relation *output);
4 | 
5 | bool HashJoin(const Attribute &leftattr, const Attribute &rightattr, const Relation *left, const Relation *right, Relation *output);
6 | 
7 | bool Scan(string &name, const Catalog *catalog, Relation *output);
8 | 


--------------------------------------------------------------------------------
/raco/clib/boolean.cc:
--------------------------------------------------------------------------------
 1 | #include "boolean.h"
 2 | 
 3 | using namespace std;
 4 | 
 5 | void BinaryExpression::PrintTo(ostream &os, int indent) {
 6 |   os <<
 7 | };
 8 | 
 9 | // AND, OR
10 | class BinaryBooleanExpression : public BooleanExpression {
11 |   public:
12 |     BinaryBooleanExpression(BooleanExpression &left, BooleanExpression &right);
13 | };
14 | 
15 | // attribute reference, literal
16 | class Value {};
17 | 
18 | // =, !=, <, >, <=, >=
19 | class BinaryBooleanOperator : public BooleanExpression {
20 |   public:
21 |     BinaryBooleanOperator(const Value &left, const Value &right) : left(left), right(right) {};
22 |   protected:
23 |     const Value &left;
24 |     const Value &right;
25 | };
26 | 
27 | template<typename T>
28 | class Literal : public Value {
29 |   public:
30 |     Literal(T val) : value(val) {};
31 |   protected:
32 |     T value;
33 | };
34 | 
35 | class Attribute : public Value {
36 |   public:
37 |     Attribute(string val) : value(val) {};
38 |   protected:
39 |     string &value;
40 | };
41 | 
42 | class EQ : public BinaryBooleanOperator {
43 |   public:
44 |     EQ(const Value &left, const Value &right) : BinaryBooleanOperator(left, right) {};
45 | };
46 | 
47 | 


--------------------------------------------------------------------------------
/raco/clib/boolean.h:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | using namespace std;
 4 | 
 5 | template<class T>
 6 | class BinaryOperator {
 7 |   public:
 8 |     BinaryOperator(const T &left, const T& right) : left(left), right(right) {};
 9 |     virtual void PrintTo(ostream &os, int indent); 
10 |   protected:
11 |     const T &left;
12 |     const T &right;
13 | };
14 | 
15 | // all boolean expressions happen to be binary operators currently
16 | template<class T>
17 | class BooleanExpression : public BinaryOperator<T> {
18 |     BooleanExpression(const T &left, const T& right) : BinaryOperator<T>(left, right) {};
19 | };
20 | 
21 | // AND, OR
22 | class BinaryBooleanExpression : public BooleanExpression<BooleanExpression> {
23 |   public:
24 |     BinaryBooleanExpression(BooleanExpression &left, BooleanExpression &right) : BooleanExpression<BooleanExpression>(left, right) {};
25 | };
26 | 
27 | // attribute reference or literal
28 | class Value {};
29 | 
30 | // =, !=, <, >, <=, >=
31 | class Comparator : public BooleanExpression<Value> {
32 |   public:
33 |     Comparator(const Value &left, const Value &right) : BooleanExpression<Value>(left, right) {};
34 | };
35 | 
36 | template<typename T>
37 | class Literal : public Value {
38 |   public:
39 |     Literal(T val) : value(val) {};
40 |   protected:
41 |     T value;
42 | };
43 | 
44 | class Attribute : public Value {
45 |   public:
46 |     Attribute(string val) : value(val) {};
47 |   protected:
48 |     string &value;
49 | };
50 | 
51 | class EQ : public BinaryBooleanOperator {
52 |   public: 
53 |     EQ(const Value &left, const Value &right) : BinaryBooleanOperator(left, right) {};
54 | };
55 | 
56 | 


--------------------------------------------------------------------------------
/raco/clib/testboolean.cc:
--------------------------------------------------------------------------------
 1 | #include "boolean.h"
 2 | #include <iostream>
 3 | 
 4 | using namespace std;
 5 | 
 6 | int main(int argc, char **argv) {
 7 | 
 8 |   BooleanExpression cond = EQ(Attribute(string("X")), Literal<int>(1));
 9 |   
10 |   cond.PrintTo(cond);
11 |   //cout << cond << endl;
12 | }
13 | 
14 | 


--------------------------------------------------------------------------------
/raco/clib/testboolean.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uwescience/raco/1f2bedbef71bacf715340289f4973d85a3c1dc97/raco/clib/testboolean.o


--------------------------------------------------------------------------------
/raco/cpp_datalog_utils.py:
--------------------------------------------------------------------------------
 1 | from raco import RACompiler
 2 | from raco.compile import compile
 3 | 
 4 | from raco.backends.radish import GrappaSymmetricHashJoin
 5 | from raco.backends.radish import GrappaShuffleHashJoin
 6 | 
 7 | import raco.viz as viz
 8 | 
 9 | import logging
10 | LOG = logging.getLogger(__name__)
11 | 
12 | 
13 | def comment(s):
14 |     return "/*\n%s\n*/\n" % str(s)
15 | 
16 | 
17 | def hack_plan(alg, plan):
18 |     # plan hacking
19 |     newRule = None
20 |     if plan == "sym":
21 |         alg.set_join_type(GrappaSymmetricHashJoin)
22 |     elif plan == "shuf":
23 |         alg.set_join_type(GrappaShuffleHashJoin)
24 | 
25 | 
26 | def emitCode(query, name, algType, plan=None, emit_print=None, dir='.'):
27 |     if emit_print is not None:
28 |         alg = algType(emit_print)
29 |     else:
30 |         alg = algType()
31 | 
32 |     hack_plan(alg, plan)
33 | 
34 |     LOG.info("compiling %s: %s", name, query)
35 | 
36 |     # Create a compiler object
37 |     dlog = RACompiler()
38 | 
39 |     # parse the query
40 |     dlog.fromDatalog(query)
41 |     # print dlog.parsed
42 |     LOG.info("logical: %s", dlog.logicalplan)
43 | 
44 |     print dlog.logicalplan
45 |     logical_dot = viz.operator_to_dot(dlog.logicalplan)
46 |     with open("%s.logical.dot" % (name), 'w') as dwf:
47 |         dwf.write(logical_dot)
48 | 
49 |     dlog.optimize(target=alg)
50 | 
51 |     LOG.info("physical: %s", dlog.physicalplan)
52 | 
53 |     print dlog.physicalplan
54 |     physical_dot = viz.operator_to_dot(dlog.physicalplan)
55 |     with open("%s.physical.dot" % (name), 'w') as dwf:
56 |         dwf.write(physical_dot)
57 | 
58 |     # generate code in the target language
59 |     code = ""
60 |     code += comment("Query " + query)
61 |     code += compile(dlog.physicalplan)
62 | 
63 |     fname = '{dir}/{name}.cpp'.format(dir=dir, name=name)
64 |     with open(fname, 'w') as f:
65 |         f.write(code)
66 | 
67 |     # returns name of code file
68 |     return fname
69 | 


--------------------------------------------------------------------------------
/raco/datalog/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uwescience/raco/1f2bedbef71bacf715340289f4973d85a3c1dc97/raco/datalog/__init__.py


--------------------------------------------------------------------------------
/raco/datalog/datalog_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import json
 3 | 
 4 | import raco.fakedb
 5 | from raco import RACompiler
 6 | from raco.backends.myria import (compile_to_json,
 7 |                                  MyriaLeftDeepTreeAlgebra,
 8 |                                  MyriaHyperCubeAlgebra)
 9 | from raco.catalog import FakeCatalog
10 | 
11 | 
12 | class DatalogTestCase(unittest.TestCase):
13 | 
14 |     def setUp(self):
15 |         self.db = raco.fakedb.FakeDatabase()
16 | 
17 |     def execute_query(self, query, test_logical=False, skip_json=False,
18 |                       output="OUTPUT", algebra=MyriaLeftDeepTreeAlgebra):
19 |         """Run a test query against the fake database"""
20 | 
21 |         dlog = RACompiler()
22 |         dlog.fromDatalog(query)
23 | 
24 |         assert algebra in [MyriaLeftDeepTreeAlgebra,
25 |                            MyriaHyperCubeAlgebra]
26 | 
27 |         if test_logical:
28 |             plan = dlog.logicalplan
29 |         else:
30 |             if algebra == MyriaLeftDeepTreeAlgebra:
31 |                 dlog.optimize(MyriaLeftDeepTreeAlgebra())
32 |             else:
33 |                 dlog.optimize(MyriaHyperCubeAlgebra(FakeCatalog(64)))
34 |             plan = dlog.physicalplan
35 | 
36 |             if not skip_json:
37 |                 # test whether we can generate json without errors
38 |                 json_string = json.dumps(compile_to_json(
39 |                     query, dlog.logicalplan, dlog.physicalplan, "datalog"))
40 |                 assert json_string
41 | 
42 |         self.db.evaluate(plan)
43 |         return self.db.get_table(output)
44 | 
45 |     def check_result(self, query, expected, test_logical=False,
46 |                      skip_json=False, output="OUTPUT",
47 |                      algebra=MyriaLeftDeepTreeAlgebra):
48 |         """Execute a test query with an expected output"""
49 |         actual = self.execute_query(query, test_logical=test_logical,
50 |                                     skip_json=skip_json, output=output,
51 |                                     algebra=algebra)
52 |         self.assertEquals(actual, expected)
53 | 


--------------------------------------------------------------------------------
/raco/datastructure/UnionFind.py:
--------------------------------------------------------------------------------
 1 | """UnionFind.py
 2 | 
 3 | Union-find data structure. Based on Josiah Carlson's code,
 4 | http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/215912
 5 | with significant additional changes by D. Eppstein.
 6 | """
 7 | 
 8 | 
 9 | class UnionFind:
10 | 
11 |     """Union-find data structure.
12 | 
13 |     Each unionFind instance X maintains a family of disjoint sets of
14 |     hashable objects, supporting the following two methods:
15 | 
16 |     - X[item] returns a name for the set containing the given item.
17 |       Each set is named by an arbitrarily-chosen one of its members; as
18 |       long as the set remains unchanged it will keep the same name. If
19 |       the item is not yet part of a set in X, a new singleton set is
20 |       created for it.
21 | 
22 |     - X.union(item1, item2, ...) merges the sets containing each item
23 |       into a single larger set.  If any item is not yet part of a set
24 |       in X, it is added to X as one of the members of the merged set.
25 |     """
26 | 
27 |     def __init__(self):
28 |         """Create a new empty union-find structure."""
29 |         self.weights = {}
30 |         self.parents = {}
31 | 
32 |     def __getitem__(self, object):
33 |         if object not in self.parents:
34 |             raise KeyError(object)
35 | 
36 |         # find path of objects leading to the root
37 |         path = [object]
38 |         root = self.parents[object]
39 |         while root != path[-1]:
40 |             path.append(root)
41 |             root = self.parents[root]
42 | 
43 |         # compress the path and return
44 |         for ancestor in path:
45 |             self.parents[ancestor] = root
46 |         return root
47 | 
48 |     def get_or_insert(self, object):
49 |         """Find and return the name of the set containing the object."""
50 | 
51 |         # check for previously unknown object
52 |         if object not in self.parents:
53 |             self.parents[object] = object
54 |             self.weights[object] = 1
55 |             return object
56 | 
57 |         return self[object]
58 | 
59 |     def __iter__(self):
60 |         """Iterate through all items ever found or unioned by this structure.
61 |         """
62 |         return iter(self.parents)
63 | 
64 |     def union(self, *objects):
65 |         """Find the sets containing the objects and merge them all."""
66 |         roots = [self.get_or_insert(x) for x in objects]
67 |         heaviest = max([(self.weights[r], r) for r in roots])[1]
68 |         for r in roots:
69 |             if r != heaviest:
70 |                 self.weights[heaviest] += self.weights[r]
71 |                 self.parents[r] = heaviest
72 | 


--------------------------------------------------------------------------------
/raco/datastructure/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uwescience/raco/1f2bedbef71bacf715340289f4973d85a3c1dc97/raco/datastructure/__init__.py


--------------------------------------------------------------------------------
/raco/datastructure/test_union_find.py:
--------------------------------------------------------------------------------
 1 | from UnionFind import UnionFind
 2 | import copy
 3 | import unittest
 4 | 
 5 | 
 6 | class TestUnionFind(unittest.TestCase):
 7 | 
 8 |     uf = UnionFind()
 9 |     uf.get_or_insert(1)
10 |     uf.get_or_insert(10)
11 |     uf.get_or_insert(2)
12 |     uf.get_or_insert(5)
13 | 
14 |     def test_insert_or_get(self):
15 |         uf = copy.deepcopy(self.uf)
16 |         self.assertIn(1, uf)
17 |         self.assertIn(10, uf)
18 |         self.assertIn(2, uf)
19 |         self.assertIn(5, uf)
20 |         self.assertEqual(uf.get_or_insert(1), 1)
21 |         self.assertEqual(uf.get_or_insert(5), 5)
22 |         self.assertEqual(uf.get_or_insert(10), 10)
23 |         self.assertEqual(uf.get_or_insert(2), 2)
24 |         self.assertEqual(uf.get_or_insert(30), 30)
25 |         self.assertEqual(uf.get_or_insert(52), 52)
26 | 
27 |     def test_get(self):
28 |         uf = self.uf
29 |         self.assertEqual(uf[1], 1)
30 |         self.assertEqual(uf[10], 10)
31 | 
32 |     def test_get_error(self):
33 |         with self.assertRaises(Exception):
34 |             _ = self.uf[52]
35 |         with self.assertRaises(Exception):
36 |             _ = self.uf[30]
37 | 
38 |     def test_union(self):
39 |         uf = copy.deepcopy(self.uf)
40 |         uf.union(1, 10)
41 |         self.assertEqual(uf.get_or_insert(1), uf.get_or_insert(10))
42 |         uf.union(2, 5)
43 |         self.assertEqual(uf.get_or_insert(2), uf.get_or_insert(5))
44 |         uf.union(2, 1)
45 |         self.assertEqual(uf.get_or_insert(10), uf.get_or_insert(5))
46 | 


--------------------------------------------------------------------------------
/raco/expression/__init__.py:
--------------------------------------------------------------------------------
1 | from .expression import *
2 | from .aggregate import *
3 | from .boolean import *
4 | from .function import *
5 | from .util import *
6 | from .statevar import *
7 | 


--------------------------------------------------------------------------------
/raco/expression/statevar.py:
--------------------------------------------------------------------------------
1 | import collections
2 | 
3 | # This type represents a state variable, as used by StatefulApply and UDAs
4 | StateVar = collections.namedtuple(
5 |     'StateVar', ['name', 'init_expr', 'update_expr'])
6 | 


--------------------------------------------------------------------------------
/raco/expression/udf.py:
--------------------------------------------------------------------------------
1 | import collections
2 | 
3 | # A user-defined function
4 | Function = collections.namedtuple('Function', ['args', 'sexpr'])
5 | 
6 | # A user-defined stateful apply or UDA
7 | StatefulFunc = collections.namedtuple(
8 |     'StatefulFunc', ['args', 'statemods', "sexpr"])
9 | 


--------------------------------------------------------------------------------
/raco/fake_data.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import raco.scheme as scheme
 3 | import raco.types as types
 4 | 
 5 | """This class contains fake data used by several unit tests."""
 6 | 
 7 | 
 8 | class FakeData(object):
 9 |     emp_table = collections.Counter([
10 |         # id dept_id name salary
11 |         (1, 2, "Bill Howe", 25000),
12 |         (2, 1, "Dan Halperin", 90000),
13 |         (3, 1, "Andrew Whitaker", 5000),
14 |         (4, 2, "Shumo Chu", 5000),
15 |         (5, 1, "Victor Almeida", 25000),
16 |         (6, 3, "Dan Suciu", 90000),
17 |         (7, 1, "Magdalena Balazinska", 25000)])
18 | 
19 |     emp_schema = scheme.Scheme([("id", types.INT_TYPE),
20 |                                 ("dept_id", types.INT_TYPE),
21 |                                 ("name", types.STRING_TYPE),
22 |                                 ("salary", types.LONG_TYPE)])
23 | 
24 |     emp_key = "public:adhoc:employee"
25 | 
26 |     dept_table = collections.Counter([
27 |         (1, "accounting", 5),
28 |         (2, "human resources", 2),
29 |         (3, "engineering", 2),
30 |         (4, "sales", 7)])
31 | 
32 |     dept_schema = scheme.Scheme([("id", types.LONG_TYPE),
33 |                                  ("name", types.STRING_TYPE),
34 |                                  ("manager", types.LONG_TYPE)])
35 | 
36 |     dept_key = "public:adhoc:department"
37 | 
38 |     numbers_table = collections.Counter([
39 |         (1, 3),
40 |         (2, 5),
41 |         (3, -2),
42 |         (16, -4.3)])
43 | 
44 |     numbers_schema = scheme.Scheme([("id", types.LONG_TYPE),
45 |                                     ("val", types.DOUBLE_TYPE)])
46 | 
47 |     numbers_key = "public:adhoc:numbers"
48 | 
49 |     test_function = ("test", "function_text", 1,
50 |                      "id (INT_TYPE), dept_id (INT_TYPE)",
51 |                      "INT_TYPE", "test_body")
52 | 


--------------------------------------------------------------------------------
/raco/from_repr.py:
--------------------------------------------------------------------------------
 1 | # import all the expressions and algebras
 2 | 
 3 | from raco.backends.myria import *
 4 | from raco.backends.cpp import *
 5 | from raco.backends.cpp.cppcommon import *
 6 | from raco.backends.radish import *
 7 | from raco.algebra import *
 8 | from raco.scheme import *
 9 | from raco.expression.expression import *
10 | from raco.expression.aggregate import *
11 | from raco.types import *
12 | from raco.relation_key import *
13 | from raco.expression.boolean import *
14 | 
15 | 
16 | import logging
17 | logging.basicConfig()
18 | _LOG = logging.getLogger(name=__name__)
19 | 
20 | 
21 | def plan_from_repr(repr_string):
22 |     _LOG.warning("Relying on eval! "
23 |                  "This module should only be used in "
24 |                  "trusted development situations\n")
25 |     return eval(repr_string)
26 | 


--------------------------------------------------------------------------------
/raco/myrial/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uwescience/raco/1f2bedbef71bacf715340289f4973d85a3c1dc97/raco/myrial/__init__.py


--------------------------------------------------------------------------------
/raco/myrial/cli_test.py:
--------------------------------------------------------------------------------
 1 | """Basic test of the command-line interface to Myrial."""
 2 | 
 3 | import subprocess
 4 | import unittest
 5 | 
 6 | 
 7 | class CliTest(unittest.TestCase):
 8 | 
 9 |     def test_cli(self):
10 |         out = subprocess.check_output(['python', 'scripts/myrial',
11 |                                        'examples/reachable.myl'])
12 |         self.assertIn('DO', out)
13 |         self.assertIn('WHILE', out)
14 | 
15 |     def test_cli_standalone_execute(self):
16 |         out = subprocess.check_output(['python', 'scripts/myrial', '-f',
17 |                                        'examples/standalone.myl'])
18 |         self.assertIn('Dan Suciu,engineering', out)
19 | 
20 |     def test_cli_standalone_json(self):
21 |         out = subprocess.check_output(['python', 'scripts/myrial', '-j',
22 |                                        'examples/cast.myl'])
23 |         self.assertIn('rawQuery', out)
24 | 
25 |     def test_cli_standalone_logical(self):
26 |         out = subprocess.check_output(['python', 'scripts/myrial', '-l',
27 |                                        'examples/standalone.myl'])
28 |         self.assertIn("CrossProduct[FileScan", out)
29 | 
30 |     def test_cli_standalone_repr(self):
31 |         out = subprocess.check_output(['python', 'scripts/myrial', '-r',
32 |                                        'examples/standalone.myl'])
33 |         self.assertIn("FileScan('./examples/dept.csv'", out)
34 | 
35 |     def test_cli_reserved_column_name(self):
36 |         proc = subprocess.Popen(
37 |             ['python', 'scripts/myrial', 'examples/bad_column_name.myl'],
38 |             stdout=subprocess.PIPE)
39 |         out = proc.communicate()[0]
40 |         self.assertIn('The token "SafeDiv" on line 2 is reserved', out)
41 | 


--------------------------------------------------------------------------------
/raco/myrial/empty_aggregate_tests.py:
--------------------------------------------------------------------------------
 1 | """Test of aggregations over empty relations.
 2 | 
 3 | Aggregation queries without grouping should return sensible default values:
 4 | COUNT(empty) == 0
 5 | SUM(empty) == 0
 6 | """
 7 | 
 8 | import collections
 9 | 
10 | import raco.myrial.myrial_test as myrial_test
11 | 
12 | 
13 | class EmptyAggregateTests(myrial_test.MyrialTestCase):
14 | 
15 |     def setUp(self):
16 |         super(EmptyAggregateTests, self).setUp()
17 | 
18 |     def test_count(self):
19 |         query = """
20 |         W = EMPTY(v:int);
21 |         X = [FROM W EMIT COUNT(v)];
22 |         STORE(X, OUTPUT);
23 |         """
24 | 
25 |         self.check_result(query, collections.Counter([(0,)]))
26 | 
27 |     def test_sum(self):
28 |         query = """
29 |         W = EMPTY(v:int);
30 |         X = [FROM W EMIT SUM(v)];
31 |         STORE(X, OUTPUT);
32 |         """
33 | 
34 |         self.check_result(query, collections.Counter([(0,)]))
35 | 


--------------------------------------------------------------------------------
/raco/myrial/filescan_tests.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import collections
 3 | 
 4 | import raco.algebra
 5 | import raco.scheme as scheme
 6 | import raco.myrial.myrial_test as myrial_test
 7 | from raco import types
 8 | 
 9 | 
10 | class FileScanTest(myrial_test.MyrialTestCase):
11 | 
12 |     def test_filescan(self):
13 |         query = """
14 | x = load("examples/load_options.csv",
15 | csv(
16 |     schema(column0:int, column1:string, column2:string, column3:float),
17 |     delimiter="|", quote="~", escape="%", skip=2));
18 | store(x, OUTPUT);
19 |         """
20 |         expected = collections.Counter([
21 |             (1, "foo", "abc|def", 1.0),
22 |             (2, "bar", "ghi|jkl", 2.0),
23 |         ])
24 |         self.check_result(query, expected)
25 | 


--------------------------------------------------------------------------------
/raco/myrial/keywords.py:
--------------------------------------------------------------------------------
 1 | """Emit all Myrial/SQL keywords as lowercase strings."""
 2 | 
 3 | from raco.myrial.scanner import (builtins, keywords,
 4 |                                  types, comprehension_keywords,
 5 |                                  word_operators)
 6 | from raco.expression.expressions_library import EXPRESSIONS
 7 | 
 8 | 
 9 | def get_keywords():
10 |     """Return a list of Myrial/SQL keywords.
11 | 
12 |     This includes reserved lex tokens and system-defined functions.
13 |     """
14 |     return {
15 |         'builtins': sorted(
16 |             EXPRESSIONS.keys() + [kw.lower() for kw in builtins]),
17 |         'keywords': sorted(kw.lower() for kw in keywords),
18 |         'types': sorted(kw.lower() for kw in types),
19 |         'comprehension_keywords': sorted(
20 |             kw.lower() for kw in comprehension_keywords),
21 |         'word_operators': sorted(kw.lower() for kw in word_operators),
22 |     }
23 | 


--------------------------------------------------------------------------------
/raco/myrial/kmeans_test.py:
--------------------------------------------------------------------------------
 1 | """Unit test of kmeans.
 2 | 
 3 | TODO: implement a clustering algorithm that is less sensitive to the
 4 | initial cluster selection.  We can't verify the output because this algorithm
 5 | chooses initial clusters in a non-robust way.
 6 | """
 7 | 
 8 | import collections
 9 | 
10 | import raco.scheme as scheme
11 | import raco.myrial.myrial_test as myrial_test
12 | from raco import types
13 | 
14 | 
15 | class KmeansTest(myrial_test.MyrialTestCase):
16 |     points = [(1, 1.0, 1.0), (2, .99, .99), (3, 1.01, 1.01), (4, 10.0, 10.0),
17 |               (5, 10.99, 10.99), (6, 10.01, 10.01), (7, 100.0, 100.0),
18 |               (8, 100.99, 100.99), (9, 100.01, 100.01)]
19 |     points_table = collections.Counter(points)
20 | 
21 |     points_schema = scheme.Scheme([('id', types.LONG_TYPE),
22 |                                    ('x', types.DOUBLE_TYPE),
23 |                                    ('y', types.DOUBLE_TYPE)])
24 |     points_key = "public:adhoc:points"
25 | 
26 |     def setUp(self):
27 |         super(KmeansTest, self).setUp()
28 | 
29 |         self.db.ingest(KmeansTest.points_key,
30 |                        KmeansTest.points_table,
31 |                        KmeansTest.points_schema)
32 | 
33 |     def test_kmeans(self):
34 |         with open('examples/kmeans.myl') as fh:
35 |             query = fh.read()
36 |         self.execute_query(query, skip_json=True)
37 | 


--------------------------------------------------------------------------------
/raco/myrial/pagerank_test.py:
--------------------------------------------------------------------------------
 1 | """PageRank unit test.
 2 | 
 3 | Example data taken from:
 4 | http://select.cs.cmu.edu/code/graphlab/doxygen/html/pagerank_example.html
 5 | """
 6 | 
 7 | import collections
 8 | 
 9 | import raco.scheme as scheme
10 | import raco.myrial.myrial_test as myrial_test
11 | from raco import types
12 | 
13 | 
14 | class PageRankTest(myrial_test.MyrialTestCase):
15 | 
16 |     edge_table = collections.Counter([
17 |         (0, 3),
18 |         (1, 0),
19 |         (1, 2),
20 |         (2, 0),
21 |         (2, 1),
22 |         (2, 3),
23 |         (3, 0),
24 |         (3, 1),
25 |         (3, 2),
26 |         (3, 4),
27 |         (4, 0),
28 |         (4, 1),
29 |         (4, 2),
30 |         (4, 3),
31 |         (4, 4)])
32 | 
33 |     edge_schema = scheme.Scheme([("src", types.LONG_TYPE),
34 |                                  ("dst", types.LONG_TYPE)])
35 |     edge_key = "public:adhoc:edges"
36 | 
37 |     vertex_table = collections.Counter([(x,) for x in range(5)])
38 |     vertex_key = "public:adhoc:vertices"
39 |     vertex_schema = scheme.Scheme([("id", types.LONG_TYPE)])
40 | 
41 |     def setUp(self):
42 |         super(PageRankTest, self).setUp()
43 | 
44 |         self.db.ingest(PageRankTest.edge_key,
45 |                        PageRankTest.edge_table,
46 |                        PageRankTest.edge_schema)
47 | 
48 |         self.db.ingest(PageRankTest.vertex_key,
49 |                        PageRankTest.vertex_table,
50 |                        PageRankTest.vertex_schema)
51 | 
52 |     def __do_test(self, phile):
53 |         with open(phile) as fh:
54 |             query = fh.read()
55 | 
56 |         result = self.execute_query(query)
57 |         d = dict(result.elements())
58 | 
59 |         self.assertAlmostEqual(d[0], 0.23576110832410296)
60 |         self.assertAlmostEqual(d[1], 0.16544845649781043)
61 |         self.assertAlmostEqual(d[2], 0.18370688939571236)
62 |         self.assertAlmostEqual(d[3], 0.3016893082129546)
63 |         self.assertAlmostEqual(d[4], 0.11339423756941983)
64 | 
65 |     def test_pagerank(self):
66 |         self.__do_test('examples/pagerank.myl')
67 | 
68 |     def verify_undefined(self, var):
69 |         with self.assertRaises(KeyError):
70 |             self.db.get_temp_table(var)
71 | 
72 |     def test_pagerank_deadcode(self):
73 |         """Test of page rank with numerous dead code statements."""
74 |         self.__do_test('examples/pagerank_dead.myl')
75 | 
76 |         # Verify that D0, D1 tables are compiled out
77 |         self.verify_undefined("D0")
78 |         self.verify_undefined("D1")
79 | 


--------------------------------------------------------------------------------
/raco/myrial/reachable_tests.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import collections
 3 | 
 4 | import raco.algebra
 5 | import raco.scheme as scheme
 6 | import raco.myrial.myrial_test as myrial_test
 7 | from raco import types
 8 | 
 9 | 
10 | class ReachableTest(myrial_test.MyrialTestCase):
11 | 
12 |     edge_table = collections.Counter([
13 |         (1, 2),
14 |         (2, 3),
15 |         (3, 4),
16 |         (4, 3),
17 |         (3, 5),
18 |         (4, 13),
19 |         (5, 4),
20 |         (1, 9),
21 |         (7, 1),
22 |         (6, 1),
23 |         (10, 11),
24 |         (11, 12),
25 |         (12, 10),
26 |         (13, 4),
27 |         (10, 1)])
28 | 
29 |     edge_schema = scheme.Scheme([("src", types.LONG_TYPE),
30 |                                  ("dst", types.LONG_TYPE)])
31 |     edge_key = "public:adhoc:edges"
32 | 
33 |     def setUp(self):
34 |         super(ReachableTest, self).setUp()
35 | 
36 |         self.db.ingest(ReachableTest.edge_key,
37 |                        ReachableTest.edge_table,
38 |                        ReachableTest.edge_schema)
39 | 
40 |     def test_reachable(self):
41 |         with open('examples/reachable.myl') as fh:
42 |             query = fh.read()
43 | 
44 |         expected = collections.Counter([
45 |             (1,),
46 |             (2,),
47 |             (3,),
48 |             (4,),
49 |             (5,),
50 |             (9,),
51 |             (13,),
52 |         ])
53 | 
54 |         self.check_result(query, expected, skip_json=True)
55 | 
56 |     def test_multi_condition_join(self):
57 |         query = """
58 |         Edge = SCAN(public:adhoc:edges);
59 |         Symmetric = [FROM Edge AS E1, Edge AS E2
60 |                      WHERE E1.src==E2.dst
61 |                        AND E2.src==E1.dst
62 |                        AND E1.src < E1.dst
63 |                      EMIT E1.src AS src, E1.dst AS dst];
64 |         STORE(Symmetric, OUTPUT);
65 |         """
66 |         table = ReachableTest.edge_table
67 |         expected = collections.Counter(
68 |             [(a, b) for (a, b) in table for (c, d) in table
69 |              if a == d and b == c and a < b])
70 |         self.check_result(query, expected)
71 | 
72 |     def test_cross_plus_selection_becomes_join(self):
73 |         """Test that the optimizer compiles away cross-products."""
74 |         with open('examples/reachable.myl') as fh:
75 |             query = fh.read()
76 | 
77 |         def plan_contains_cross(plan):
78 |             def f(op):
79 |                 if isinstance(op, raco.algebra.CrossProduct) and not \
80 |                    isinstance(op.left, raco.algebra.SingletonRelation):
81 |                     yield True
82 | 
83 |             return any(plan.postorder(f))
84 | 
85 |         statements = self.parser.parse(query)
86 |         self.processor.evaluate(statements)
87 | 
88 |         lp = self.processor.get_logical_plan()
89 |         self.assertTrue(plan_contains_cross(lp))
90 | 
91 |         pp = self.processor.get_physical_plan()
92 |         self.assertFalse(plan_contains_cross(pp))
93 | 


--------------------------------------------------------------------------------
/raco/myrial/sample_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: UTF-8 -*-
 2 | import raco.myrial.myrial_test as myrial_test
 3 | from raco.fake_data import FakeData
 4 | 
 5 | 
 6 | class TestSamplingOperations(myrial_test.MyrialTestCase, FakeData):
 7 |     def setUp(self):
 8 |         super(TestSamplingOperations, self).setUp()
 9 | 
10 |         self.db.ingest(TestSamplingOperations.emp_key,
11 |                        TestSamplingOperations.emp_table,
12 |                        TestSamplingOperations.emp_schema)
13 | 
14 |     def run_samplescan(self, sample_size, sample_type, is_pct=False):
15 |         pct = '%' if is_pct else ''
16 |         query = """
17 |         emp = SAMPLESCAN({rel_key}, {size}{pct}, {type});
18 |         STORE(emp, OUTPUT);
19 |         """.format(rel_key=self.emp_key, size=sample_size, pct=pct,
20 |                    type=sample_type)
21 | 
22 |         res = self.execute_query(query)
23 |         if is_pct:
24 |             expected_len = int(round(len(res) * (sample_size / 100.0)))
25 |         else:
26 |             expected_len = sample_size
27 |         self.assertEquals(len(res), expected_len)
28 | 
29 |     def test_samplescan__wr_zero(self):
30 |         self.run_samplescan(0, 'WR')
31 | 
32 |     def test_samplescan__wr_zero_pct(self):
33 |         self.run_samplescan(0, 'WR', True)
34 | 
35 |     def test_samplescan__wor_zero(self):
36 |         self.run_samplescan(0, 'WoR')
37 | 
38 |     def test_samplescan__wor_zero_pct(self):
39 |         self.run_samplescan(0, 'WoR', True)
40 | 
41 |     def test_samplescan__wr_one(self):
42 |         self.run_samplescan(1, 'WR')
43 | 
44 |     def test_samplescan__wor_one(self):
45 |         self.run_samplescan(1, 'WoR')
46 | 
47 |     def test_samplescan__wr_all(self):
48 |         self.run_samplescan(len(self.emp_table), 'WR')
49 | 
50 |     def test_samplescan__wr_100_pct(self):
51 |         self.run_samplescan(100, 'WR', True)
52 | 
53 |     def test_samplescan__wor_all(self):
54 |         self.run_samplescan(len(self.emp_table), 'WoR')
55 | 
56 |     def test_samplescan__wor_100_pct(self):
57 |         self.run_samplescan(100, 'WoR', True)
58 | 


--------------------------------------------------------------------------------
/raco/myrial/sigma_clipping_test.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import collections
 3 | 
 4 | import raco.scheme as scheme
 5 | import raco.myrial.myrial_test as myrial_test
 6 | from raco import types
 7 | 
 8 | 
 9 | class SigmaClippingTest(myrial_test.MyrialTestCase):
10 |     points = [25.0, 27.2, 23.4, 25.1, 26.3, 24.9, 23.5, 22.7, 108.2,
11 |               26.2, 25.3, 24.7, 25.01, 26.1, 22.8, 2.2, 24.8, 25.05, 25.15]
12 |     points_tuples = [(i, x) for i, x in enumerate(points)]
13 |     points_table = collections.Counter(points_tuples)
14 | 
15 |     points_schema = scheme.Scheme([('id', types.LONG_TYPE), ('v', types.DOUBLE_TYPE)])  # noqa
16 |     points_key = "public:adhoc:sc_points"
17 | 
18 |     def setUp(self):
19 |         super(SigmaClippingTest, self).setUp()
20 | 
21 |         self.db.ingest(SigmaClippingTest.points_key,
22 |                        SigmaClippingTest.points_table,
23 |                        SigmaClippingTest.points_schema)
24 | 
25 |         # TODO: Better support for empty relations in the language
26 |         self.db.ingest("empty", collections.Counter(),
27 |                        SigmaClippingTest.points_schema)
28 | 
29 |     def run_it(self, query):
30 |         points = [(i, x) for i, x in self.points_tuples if x < 28 and x > 22]
31 |         expected = collections.Counter(points)
32 |         self.check_result(query, expected, output='sc_points_clipped')
33 | 
34 |     def test_v0(self):
35 |         with open('examples/sigma-clipping-v0.myl') as fh:
36 |             query = fh.read()
37 |         self.run_it(query)
38 | 
39 |     def test_v2(self):
40 |         with open('examples/sigma-clipping.myl') as fh:
41 |             query = fh.read()
42 |         self.run_it(query)
43 | 


--------------------------------------------------------------------------------
/raco/python/__init__.py:
--------------------------------------------------------------------------------
1 | from convert import convert
2 | 


--------------------------------------------------------------------------------
/raco/python/convert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | """ Functions to convert Python functions to RACO expressions """
 3 | 
 4 | import ast
 5 | 
 6 | from raco.python.exceptions import PythonSyntaxException, \
 7 |     PythonConvertException
 8 | from raco.python.util import visitor
 9 | from raco.python.util.decompile import get_source
10 | 
11 | 
12 | def convert(source_or_ast_or_callable, schema, udfs=None):
13 |     """
14 |     Convert a Python function into its RACO equivalent
15 |     :param source_or_ast_or_callable: Source string, callable, or AST node
16 |     :param schema: List of schema for the input parameter(s)
17 |     :param udfs: List of (name, arity) pairs of UDFs
18 |     :return: RACO expression representing the source, callable, or AST node
19 |     """
20 |     if isinstance(source_or_ast_or_callable, basestring):
21 |         try:
22 |             return convert(ast.parse(source_or_ast_or_callable), schema, udfs)
23 |         except SyntaxError as e:
24 |             raise PythonSyntaxException(e.msg, e.lineno, e.offset)
25 |     elif callable(source_or_ast_or_callable):
26 |         return convert(get_source(source_or_ast_or_callable), schema, udfs)
27 |     elif isinstance(source_or_ast_or_callable, ast.AST):
28 |         return visitor.ExpressionVisitor(schema or [], udfs or []).visit(
29 |             source_or_ast_or_callable) or None
30 |     else:
31 |         raise PythonConvertException(
32 |             'Argument was not a source string, callable, or AST node')
33 | 


--------------------------------------------------------------------------------
/raco/python/exceptions.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | """ Exceptions that occur during Python->RACO conversion """
 3 | 
 4 | 
 5 | class PythonConvertException(Exception):
 6 |     """ Base class for conversion exceptions """
 7 |     pass
 8 | 
 9 | 
10 | class PythonTokenException(PythonConvertException):
11 |     """ Base exception class for errors associated with a specific token """
12 |     def __init__(self, token, line, column):
13 |         self.token = token
14 |         self.line = line
15 |         self.column = column
16 | 
17 | 
18 | class PythonUnrecognizedTokenException(PythonTokenException):
19 |     """ Error occurring when an unrecognized token is encountered """
20 |     def __str__(self):
21 |         return 'Conversion error at token %s on line %d:%d' % \
22 |                (self.token, self.line, self.column)
23 | 
24 | 
25 | class PythonOutOfRangeException(PythonUnrecognizedTokenException):
26 |     """ Error occurring when a slice is out of range"""
27 |     def __str__(self):
28 |         return 'Slice out of range error near token %s on line %d:%d' % \
29 |                (self.token, self.line, self.column)
30 | 
31 | 
32 | class PythonSyntaxException(PythonConvertException):
33 |     """ Error occurring when a Python source string contains a syntax error """
34 |     def __init__(self, message, line, column):
35 |         self.token = message
36 |         self.line = line
37 |         self.column = column
38 | 
39 |     def __str__(self):
40 |         return 'Syntax error: %s (%d%s)' % \
41 |                (self.token, self.line,
42 |                 ':' + str(self.column) if self.column else '')
43 | 
44 | 
45 | class PythonUnsupportedOperationException(PythonSyntaxException):
46 |     """ Error occurring when an unsupported operation is detected """
47 |     def __str__(self):
48 |         return 'Unsupported operation: %s (%d%s)' % \
49 |                (self.token, self.line,
50 |                 ':' + str(self.column) if self.column else '')
51 | 
52 | 
53 | class PythonArgumentException(PythonSyntaxException):
54 |     """ Error occurring when a problem with an argument is detected """
55 |     def __str__(self):
56 |         return '%s (%d%s)' % \
57 |                (self.token, self.line,
58 |                 ':' + str(self.column) if self.column else '')
59 | 


--------------------------------------------------------------------------------
/raco/python/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uwescience/raco/1f2bedbef71bacf715340289f4973d85a3c1dc97/raco/python/tests/__init__.py


--------------------------------------------------------------------------------
/raco/python/tests/convert_tests.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | """ Tests for the 'convert' entry-point function """
 4 | import ast
 5 | import unittest
 6 | 
 7 | from raco.expression import NumericLiteral
 8 | from raco.python import convert
 9 | 
10 | 
11 | class TestConvert(unittest.TestCase):
12 |     def test_string(self):
13 |         f = "lambda: 0"
14 |         e = convert(f, None)
15 |         self.assertEqual(e, NumericLiteral(0))
16 | 
17 |     def test_ast(self):
18 |         t = ast.parse("lambda: 0")
19 |         e = convert(t, None)
20 |         self.assertEqual(e, NumericLiteral(0))
21 | 
22 |     def test_lambda(self):
23 |         f = lambda: 0
24 |         e = convert(f, None)
25 |         self.assertEqual(e, NumericLiteral(0))
26 | 
27 |     def test_function(self):
28 |         def f():
29 |             return 0
30 |         e = convert(f, None)
31 |         self.assertEqual(e, NumericLiteral(0))
32 | 


--------------------------------------------------------------------------------
/raco/python/tests/decompile_lambda_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | """ Tests for decompiling lambdas """
 4 | 
 5 | import unittest
 6 | 
 7 | from raco.python.exceptions import PythonConvertException
 8 | from raco.python.util import decompile
 9 | 
10 | 
11 | class TestDecompileLambdas(unittest.TestCase):
12 |     def test_simple(self):
13 |         s = decompile.get_source(lambda: 0)
14 |         self.assertEquals(eval(s)(), 0)
15 | 
16 |     def test_no_space(self):
17 |         s = decompile.get_source(lambda:0)
18 |         self.assertEquals(eval(s)(), 0)
19 | 
20 |     def test_space_after_colon(self):
21 |         s = decompile.get_source(lambda :0)
22 |         self.assertEquals(eval(s)(), 0)
23 | 
24 |     def test_variable(self):
25 |         f = lambda: 0
26 |         s = decompile.get_source(f)
27 |         self.assertEquals(eval(s)(), f())
28 | 
29 |     def test_newline(self):
30 |         f = \
31 |             lambda: 0
32 |         s = decompile.get_source(f)
33 |         self.assertEquals(eval(s)(), f())
34 | 
35 |     def test_newline2(self):
36 |         f = lambda: \
37 |             0
38 |         s = decompile.get_source(f)
39 |         self.assertEquals(eval(s)(), f())
40 | 
41 |     def test_tuple(self):
42 |         t = (lambda: 0), 5
43 |         s = decompile.get_source(t[0])
44 |         self.assertEquals(eval(s)(), t[0]())
45 | 
46 |     def test_tuple2(self):
47 |         f = lambda :  (0, 5)
48 |         s = decompile.get_source(f)
49 |         self.assertEquals(eval(s)(), f())
50 | 
51 |     def test_multiple_lambdas(self):
52 |         t = lambda: 1, lambda: 2, lambda: 3
53 |         for f in t:
54 |             s = decompile.get_source(f)
55 |             self.assertEquals(eval(s)(), f())
56 | 
57 |     def test_embedded_lambda_token(self):
58 |         f = lambda: "lambda: 0"
59 |         s = decompile.get_source(f)
60 |         self.assertEquals(eval(s)(), f())
61 | 
62 |     def test_parameters(self):
63 |         f = lambda x: x
64 |         s = decompile.get_source(f)
65 |         self.assertEquals(eval(s)(5), f(5))
66 | 
67 |     def test_multiple_parameters(self):
68 |         f = lambda x, y: x + y
69 |         s = decompile.get_source(f)
70 |         self.assertEquals(eval(s)(5, 6), f(5, 6))
71 | 
72 |     def test_args_kwargs(self):
73 |         f = lambda *args, **kwargs: args[0] + kwargs['foo']
74 |         s = decompile.get_source(f)
75 |         self.assertEqual(eval(s)(5, foo=6), f(5, foo=6))
76 | 
77 |     def test_unpacking(self):
78 |         """ Unpacking is not currently supported """
79 |         f = lambda (x, y): x + y
80 |         self.assertRaises(PythonConvertException,
81 |                           lambda: decompile.get_source(f))
82 | 


--------------------------------------------------------------------------------
/raco/python/tests/projection_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | """ Tests for projection expressions """
 3 | 
 4 | from collections import Counter
 5 | from raco.python import convert
 6 | from raco.algebra import Apply
 7 | from python_test import PythonTestCase
 8 | 
 9 | 
10 | class TestProjection(PythonTestCase):
11 |     def _execute_projection(self, query, expected):
12 |         projection = convert(query, [self.schema])
13 |         self.assertIsNotNone(projection)
14 | 
15 |         expression = Apply([('out', projection)], self.scan)
16 |         plan = self.get_query(expression)
17 |         return self.check_result(plan, expected)
18 | 
19 |     def test_name(self):
20 |         self._execute_projection("""lambda t: t.name""",
21 |                                  Counter([(t[2],) for t in self.emp_table]))
22 | 
23 |     def test_expression(self):
24 |         self._execute_projection("""lambda t: t.id + 1""",
25 |                                  Counter([(t[0] + 1,) for t
26 |                                           in self.emp_table]))
27 | 
28 |     def test_index(self):
29 |         self._execute_projection("""lambda t: t[2]""",
30 |                                  Counter([(t[2],) for t in self.emp_table]))
31 | 


--------------------------------------------------------------------------------
/raco/python/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uwescience/raco/1f2bedbef71bacf715340289f4973d85a3c1dc97/raco/python/util/__init__.py


--------------------------------------------------------------------------------
/raco/relation_key.py:
--------------------------------------------------------------------------------
 1 | """Representation of a Myria relation key.
 2 | 
 3 | Myria relations are identified by a tuple of user, program, relation_name."""
 4 | 
 5 | 
 6 | class RelationKey(object):
 7 |     def __init__(self, *args):
 8 |         if len(args) == 1:
 9 |             self.user = "public"
10 |             self.program = "adhoc"
11 |             self.relation = args[0]
12 |         else:
13 |             self.user, self.program, self.relation = args
14 |         assert self.user and isinstance(self.user, basestring)
15 |         assert self.program and isinstance(self.program, basestring)
16 |         assert self.relation and isinstance(self.relation, basestring)
17 | 
18 |     def __repr__(self):
19 |         return 'RelationKey(%r,%r,%r)' % (self.user, self.program,
20 |                                           self.relation)
21 | 
22 |     def __str__(self):
23 |         return '%s:%s:%s' % (self.user, self.program, self.relation)
24 | 
25 |     def __eq__(self, other):
26 |         return self.__dict__ == other.__dict__
27 | 
28 |     def __hash__(self):
29 |         return hash(str(self))
30 | 
31 |     @classmethod
32 |     def from_string(cls, s):
33 |         """Create a RelationKey from a colon-delimited string."""
34 |         toks = s.split(':')
35 |         assert len(toks) <= 3
36 | 
37 |         return cls(*toks)
38 | 


--------------------------------------------------------------------------------
/raco/replace_with_repr.py:
--------------------------------------------------------------------------------
 1 | # These imports are required here -- for eval inside replace_with_repr
 2 | from raco.expression import *
 3 | from raco.algebra import *
 4 | from raco.relation_key import *
 5 | from raco.scheme import *
 6 | from raco.backends.myria import *
 7 | from raco.backends.cpp import *
 8 | from raco.backends.radish import *
 9 | from raco.backends.sparql import *
10 | 
11 | # NOTES: relying on import * for eval is error prone due
12 | #        to namespace collisions
13 | # NOTES: what to do if a operator has two constructors?
14 | 
15 | 
16 | def replace_with_repr(plan):
17 |     r = repr(plan)
18 |     try:
19 |         return eval(r)
20 |     except (TypeError, AttributeError, SyntaxError):
21 |         print 'Error with repr {r} of plan {p}'.format(r=r, p=plan)
22 |         raise
23 | 


--------------------------------------------------------------------------------
/raco/representation.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class RepresentationProperties(object):
 4 | 
 5 |     def __init__(
 6 |             self,
 7 |             hash_partitioned=tuple(),
 8 |             sorted=None,
 9 |             grouped=None,
10 |             broadcasted=False):
11 |         """
12 |         @param hash_partitioned: None or set of AttributeRefs in hash key
13 |         @param sorted: None or list of (AttributeRefs, ASC/DESC) in sort order
14 |         @param grouped: None or list of AttributeRefs to group by
15 | 
16 |         None means that no knowledge about the interesting property is
17 |         known
18 |         """
19 | 
20 |         # TODO: make it a set of sets, representing a conjunction of hashes
21 |         # TODO: for example, after a HashJoin($1=$4) we know h($1) && h($4)
22 |         # TODO:     which is not equivalent to h($1, $4). Currently can only
23 |         # TODO:     represent conjunctions of size 1
24 |         self.hash_partitioned = hash_partitioned
25 |         self.broadcasted = broadcasted
26 | 
27 |         assert not (len(self.hash_partitioned) > 0 and self.broadcasted), \
28 |             "inconsistent state: cannot be partitioned and broadcasted"
29 | 
30 |         if sorted is not None or grouped is not None:
31 |             raise NotImplementedError("sorted and grouped not yet supported")
32 | 
33 |     def __str__(self):
34 |         return "{clazz}(hash: {hash_attrs}, broadcasted: {b})".format(
35 |             clazz=self.__class__.__name__,
36 |             hash_attrs=self.hash_partitioned,
37 |             b=self.broadcasted)
38 | 
39 |     def __repr__(self):
40 |         return "{clazz}({hp!r}, {sort!r}, {grp!r}, {br!r})".format(
41 |             clazz=self.__class__.__name__,
42 |             hp=self.hash_partitioned,
43 |             sort=None,
44 |             grp=None,
45 |             br=self.broadcasted
46 |         )
47 | 
48 |     def __eq__(self, other):
49 |         """Override the default Equals behavior"""
50 |         if isinstance(other, self.__class__):
51 |             return self.__dict__ == other.__dict__
52 |         return NotImplemented
53 | 
54 |     def __ne__(self, other):
55 |         """Define a non-equality test"""
56 |         if isinstance(other, self.__class__):
57 |             return not self.__eq__(other)
58 |         return NotImplemented
59 | 
60 |     def __hash__(self):
61 |         """Override the default hash behavior
62 |         (that returns the id of the object)"""
63 |         return hash(tuple(sorted(self.__dict__.items())))
64 | 


--------------------------------------------------------------------------------
/raco/sparql_tests.py:
--------------------------------------------------------------------------------
 1 | from raco.backends.sparql import SPARQLAlgebra
 2 | from raco.platform_tests import MyriaLPlatformTestHarness
 3 | import raco.compile
 4 | 
 5 | 
 6 | class SPARQLTests(object):
 7 |     # TODO: refactor MyrialPlatformTests to share code
 8 |     def check_sub_tables(self, query, name, **kwargs):
 9 |         self.check(query % self.tables, name, **kwargs)
10 | 
11 |     def test_scan(self):
12 |         self.check_sub_tables("""
13 |         T1 = SCAN(%(T1)s);
14 |         STORE(T1, OUTPUT);
15 |         """, "scan")
16 | 
17 |     def test_select(self):
18 |         self.check_sub_tables("""
19 |         T1 = SCAN(%(T1)s);
20 |         x = [FROM T1 WHERE a>5 EMIT a];
21 |         STORE(x, OUTPUT);
22 |         """, "select")
23 | 
24 |     def test_join(self):
25 |         self.check_sub_tables("""
26 |         T3 = SCAN(%(T3)s);
27 |         R3 = SCAN(%(R3)s);
28 |         out = JOIN(T3, b, R3, b);
29 |         out2 = [FROM out WHERE $3 = $5 EMIT $0, $3];
30 |         STORE(out2, OUTPUT);
31 |         """, "join")
32 | 
33 | 
34 | class SPARQLMyriaLTests(MyriaLPlatformTestHarness, SPARQLTests):
35 | 
36 |     def check(self, query, name):
37 |         plan = self.get_physical_plan(query, target_alg=SPARQLAlgebra())
38 | 
39 |         sparql = raco.compile.compile(plan)
40 | 
41 |         # TODO pretty lenient tests: is it a non empty string?
42 |         assert isinstance(sparql, ''.__class__)
43 |         assert sparql != ''
44 | 


--------------------------------------------------------------------------------
/raco/test_style.py:
--------------------------------------------------------------------------------
 1 | from nose.plugins.skip import SkipTest
 2 | import subprocess
 3 | import unittest
 4 | 
 5 | 
 6 | def check_output_and_print_stderr(args):
 7 |     """Run the specified command. If it does not exit cleanly, print the stderr
 8 |     of the command to stdout. Note that stderr prints are displayed as tests
 9 |     run, whereas stdout prints show up next to the failed test. We want the
10 |     latter."""
11 |     try:
12 |         subprocess.check_output(args, stderr=subprocess.STDOUT)
13 |     except subprocess.CalledProcessError as e:
14 |         print e.output
15 |         raise
16 | 
17 | 
18 | class StyleTest(unittest.TestCase):
19 |     """run flake8 with the right arguments and ensure all files pass"""
20 | 
21 |     def test_style(self):
22 |         "run flake8 with the right arguments and ensure all files pass"
23 |         check_output_and_print_stderr([
24 |             'flake8',
25 |             '--ignore=F',
26 |             '--exclude=parsetab.py,' +
27 |             'decompile_lambda_test.py,' +
28 |             'decompile_function_test.py',
29 |             'raco'])
30 | 
31 |     def test_pylint(self):
32 |         "run pylint -E to catch obvious errors"
33 |         # TODO fix this.
34 |         raise SkipTest()
35 |         check_output_and_print_stderr(['pylint', '-E', 'raco'])
36 | 


--------------------------------------------------------------------------------
/raco/test_utility.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from raco.utility import real_str
 4 | from collections import OrderedDict
 5 | 
 6 | 
 7 | class TestUtility(unittest.TestCase):
 8 |     def test_real_str_int(self):
 9 |         self.assertEqual(real_str(1), str(1))
10 |         self.assertEqual(real_str(1, skip_out=True), str(1))
11 | 
12 |     def test_real_str_string(self):
13 |         self.assertEqual(real_str("abc"), str("abc"))
14 |         self.assertEqual(real_str("abc", skip_out=True), str("abc"))
15 | 
16 |     def test_real_str_list(self):
17 |         self.assertEqual(real_str([1, 2]), "[1,2]")
18 |         self.assertEqual(real_str([1, 2], skip_out=True), "1,2")
19 | 
20 |     def test_real_str_dict(self):
21 |         d = OrderedDict([(1, 2), (3, 4)])
22 |         self.assertEqual(real_str(d), "{1:2,3:4}")
23 |         self.assertEqual(real_str(d, skip_out=True), "1:2,3:4")
24 | 
25 |     def test_real_str_set(self):
26 |         # Python has no built-in ordered set, so we do not know the item order
27 |         self.assertIn(real_str({1, 2}), ["{1,2}", "{2,1}"])
28 |         self.assertIn(real_str({1, 2}, skip_out=True), ["1,2", "2,1"])
29 | 
30 |     def test_real_str_nested_collections(self):
31 |         self.assertEqual(real_str([[1, 2], {3: 4}, []]),
32 |                          "[[1,2],{3:4},[]]")
33 |         self.assertEqual(real_str([[1, 2], {3: 4}, []], skip_out=True),
34 |                          "[1,2],{3:4},[]")
35 | 


--------------------------------------------------------------------------------
/raco/types.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """Names of primitive types understand by raco.
 3 | 
 4 | Note that raco internally supports a limited set of types.  Different backends
 5 | can support a richer set of types, but these aren't understood by raco's
 6 | type system.  For example, raco doesn't distinguish between int and long,
 7 | so unsafe casts are not detected inside raco.
 8 | """
 9 | 
10 | # Internal and external types; these are understood by raco's type system
11 | LONG_TYPE = "LONG_TYPE"
12 | BOOLEAN_TYPE = "BOOLEAN_TYPE"
13 | DOUBLE_TYPE = "DOUBLE_TYPE"
14 | STRING_TYPE = "STRING_TYPE"
15 | DATETIME_TYPE = "DATETIME_TYPE"
16 | BLOB_TYPE = "BLOB_TYPE"
17 | 
18 | INTERNAL_TYPES = {LONG_TYPE, BOOLEAN_TYPE, DOUBLE_TYPE, STRING_TYPE, DATETIME_TYPE, BLOB_TYPE}  # noqa
19 | 
20 | # External only types; not understood by raco's type system
21 | INT_TYPE = "INT_TYPE"
22 | FLOAT_TYPE = "FLOAT_TYPE"
23 | 
24 | NUMERIC_TYPES = {LONG_TYPE, DOUBLE_TYPE}
25 | 
26 | TYPE_MAP = {k: k for k in INTERNAL_TYPES}
27 | TYPE_MAP[INT_TYPE] = LONG_TYPE
28 | TYPE_MAP[FLOAT_TYPE] = DOUBLE_TYPE
29 | ALL_TYPES = TYPE_MAP.keys()
30 | 
31 | 
32 | # Map from python primitive types to internal typess
33 | python_type_map = {
34 |     int: LONG_TYPE,
35 |     bool: BOOLEAN_TYPE,
36 |     float: DOUBLE_TYPE,
37 |     str: STRING_TYPE,
38 | }
39 | 
40 | reverse_python_type_map = {v: k for k, v in python_type_map.iteritems()}
41 | 
42 | 
43 | def map_type(s):
44 |     """Convert an arbitrary type to an internal type."""
45 |     return TYPE_MAP[s]
46 | 
47 | 
48 | def parse_string(s, _type):
49 |     """Convert from a string to an internal python representation."""
50 |     assert _type in reverse_python_type_map
51 |     return reverse_python_type_map[_type](s)
52 | 


--------------------------------------------------------------------------------
/raco/utility.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | 
 4 | def emit(*args):
 5 |     """Return blocks of code as a string."""
 6 |     return "\n".join([str(x) for x in args if len(str(x)) > 0])
 7 | 
 8 | 
 9 | def emitlist(argslist):
10 |     """Return blocks of code as a string."""
11 |     return "\n".join([str(x) for x in argslist if len(str(x)) > 0])
12 | 
13 | 
14 | def real_str(obj, skip_out=False):
15 |     """Convert the input object to a string, recursively stringifying elements
16 |     inside of containers. If skip_out is True, the container bounds will not
17 |     be displayed. E.g. real_str([1, 2]) == "[1,2]" but
18 |     real_str([1, 2], skip_out=True) == "1,2"."""
19 | 
20 |     # Hack around basestrings being containers
21 |     if (not isinstance(obj, basestring)
22 |             and isinstance(obj, collections.Container)):
23 | 
24 |         if isinstance(obj, collections.Sequence):
25 |             inner = ','.join(real_str(e) for e in obj)
26 |             if skip_out:
27 |                 return inner
28 |             return '[{inn}]'.format(inn=inner)
29 |         elif isinstance(obj, collections.Mapping):
30 |             inner = ','.join('{a}:{b}'.format(a=real_str(a), b=real_str(b))
31 |                              for a, b in obj.items())
32 |             if skip_out:
33 |                 return inner
34 |             return '{{{inn}}}'.format(inn=inner)
35 |         elif isinstance(obj, collections.Set):
36 |             inner = ','.join(real_str(e) for e in obj)
37 |             if skip_out:
38 |                 return inner
39 |             return '{{{inn}}}'.format(inn=inner)
40 |         else:
41 |             raise NotImplementedError(
42 |                 "real_str(obj) for type(obj)={t}".format(t=type(obj)))
43 | 
44 |     return str(obj)
45 | 
46 | 
47 | class Printable(object):
48 |     @classmethod
49 |     def opname(cls):
50 |         return str(cls.__name__)
51 | 
52 |     def __str__(self):
53 |         return self.opname()
54 | 
55 | 
56 | # Optional raco dependency: termcolor
57 | # Without it, coloring will not happen
58 | def colored(s, color):
59 |     return s
60 | try:
61 |     from termcolor import colored
62 | except ImportError:
63 |     pass
64 | 


--------------------------------------------------------------------------------
/raco/viz.py:
--------------------------------------------------------------------------------
 1 | from raco import algebra
 2 | 
 3 | 
 4 | def graph_to_dot(graph, **kwargs):
 5 |     """Graph is expected to be a dict of the form { 'nodes' : list(), 'edges' :
 6 |     list() }. This function returns a string that will be input to dot."""
 7 | 
 8 |     title = kwargs.get('title', '')
 9 | 
10 |     # Template, including setup and formatting:
11 |     template = """digraph G {
12 |       ratio = "fill" ;
13 |       size = "4.0, 4.0" ;
14 |       page = "4, 4" ;
15 |       margin = 0 ;
16 |       mincross = 2.0 ;
17 |       rankdir = "BT" ;
18 |       nodesep = 0.25 ;
19 |       ranksep = 0.25 ;
20 |       node [fontname="Helvetica", fontsize=10,
21 |             shape=oval, style=filled, fillcolor=white ] ;
22 | 
23 |       // The nodes
24 |       %s
25 | 
26 |       // The edges
27 |       %s
28 | 
29 |       // The title
30 |       labelloc="t";
31 |       label="%s";
32 | }"""
33 | 
34 |     # Nodes:
35 |     nodes = ['"%s" [label="%s"] ;' % (id(n), n.shortStr().replace(r'"', r'\"'))
36 |              for n in graph['nodes']]
37 |     node_str = '\n      '.join(nodes)
38 | 
39 |     # Edges:
40 |     edges = ['"%s" -> "%s" ;' % (id(x), id(y)) for (x, y) in graph['edges']]
41 |     edge_str = '\n      '.join(edges)
42 | 
43 |     return template % (node_str, edge_str, title)
44 | 
45 | 
46 | def operator_to_dot(operator, graph=None, **kwargs):
47 |     """Operator is expected to be an object of class raco.algebra.Operator"""
48 |     graph = operator.collectGraph(graph)
49 |     return graph_to_dot(graph, **kwargs)
50 | 
51 | 
52 | def get_dot(obj):
53 |     if isinstance(obj, dict) and 'nodes' in dict and 'edges' in dict:
54 |         return graph_to_dot(obj)
55 |     elif isinstance(obj, algebra.Operator):
56 |         return operator_to_dot(obj)
57 |     raise NotImplementedError('Unable to get dot from object type %s' % type(obj))  # noqa
58 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | nose
 2 | coverage
 3 | cloud
 4 | 
 5 | # Flake8: hardcode version plus dependency versions
 6 | flake8 == 2.1.0
 7 | pep8 == 1.5.4
 8 | pyflakes == 0.8.1
 9 | mccabe == 0.2.1
10 | httmock >= 1.2.1
11 | # /Flake8
12 | 


--------------------------------------------------------------------------------
/scripts/see-rules:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import raco.rules
 4 | import raco.language.myrialang
 5 | 
 6 | import inspect
 7 | 
 8 | for r in raco.rules.Rule.__subclasses__():
 9 |     # skip one to ones
10 |     if r.__name__ != "OneToOne":
11 |         num_args = len(inspect.getargspec(r.__init__).args) - 1
12 |         try:
13 |             instance = r(*[None for i in range(num_args)])
14 |             print instance
15 |         except Exception as e:
16 |             print "Could not print rule named", r.__name__
17 |             # uncomment for debugging rule printing
18 |             # raise e
19 |     
20 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | ### Add find_packages function, see
 4 | # https://wiki.python.org/moin/Distutils/Cookbook/AutoPackageDiscovery
 5 | import os
 6 | 
 7 | def is_package(path):
 8 |     return (
 9 |         os.path.isdir(path) and
10 |         os.path.isfile(os.path.join(path, '__init__.py'))
11 |         )
12 | 
13 | def find_packages(path=".", base="", exclude=None):
14 |     """Find all packages in path"""
15 |     if not exclude:
16 |         exclude = []
17 |     packages = {}
18 |     for item in os.listdir(path):
19 |         dir = os.path.join(path, item)
20 |         if is_package(dir) and dir not in exclude:
21 |             if base:
22 |                 module_name = "{base}.{item}".format(base=base,item=item)
23 |             else:
24 |                 module_name = item
25 |             packages[module_name] = dir
26 |             packages.update(find_packages(dir, module_name))
27 |     return packages
28 | ###
29 | 
30 | setup(name='raco',
31 |       version='1.3.7',
32 |       description='Relational Algebra COmpiler',
33 |       author='Bill Howe, Andrew Whitaker, Daniel Halperin',
34 |       author_email='raco@cs.washington.edu',
35 |       url='https://github.com/uwescience/raco',
36 |       packages=find_packages(exclude=['clang']),
37 |       package_data={'': ['c_templates/*.template','grappa_templates/*.template']},
38 |       install_requires=['networkx==1.11', 'ply', 'pyparsing', 'SQLAlchemy', 'jinja2', 'requests', 'requests_toolbelt' ],
39 |       scripts=['scripts/myrial']
40 |       )
41 | 


--------------------------------------------------------------------------------