├── .coveragerc ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── c_test_environment ├── .gitignore ├── Makefile ├── c_index_strings.py ├── clang_datalog_tests.py ├── clang_myrial_tests.py ├── clang_processor.py ├── compare.py ├── convert2bin.h ├── convert2bin.py ├── counters_util.cc ├── counters_util.h ├── dates.cc ├── dates.h ├── dates_test.cc ├── generate_test_relations.py ├── grappa_detect_new_files.sh ├── grappa_ingest.py ├── grappalang_myrial_tests.py ├── grappalang_tests.py ├── hash.h ├── io_util.cc ├── io_util.h ├── old_query.cc ├── old_query2.cc ├── osutils.py ├── query.cc ├── radish_utils.cc ├── radish_utils.h ├── rdf-strings2ints.rb ├── repr_myrial_tests.py ├── run_query.py ├── strings.cc ├── strings.h ├── testqueries │ ├── aggregate_count.sql │ ├── aggregate_count_group_one.sql │ ├── aggregate_count_group_one_notgroup_filtered_one.sql │ ├── aggregate_count_group_one_notgroup_one.sql │ ├── aggregate_double.sql │ ├── aggregate_max.sql │ ├── aggregate_min.sql │ ├── aggregate_of_binop.sql │ ├── aggregate_of_binop_double.sql │ ├── aggregate_of_binop_no_key_unionall_double.sql │ ├── aggregate_string.sql │ ├── aggregate_sum.sql │ ├── apply.sql │ ├── apply_and_self_join.sql │ ├── argmax.myl │ ├── argmax_all_uda.sql │ ├── argmax_uda.sql │ ├── builtin_and_UDA.sql │ ├── common_index_allowed.sql │ ├── common_index_disallowed.sql │ ├── countstar_string.sql │ ├── directed_squares.sql │ ├── directed_triangles.sql │ ├── expr_singleton.sql │ ├── few_col_store.sql │ ├── groupby_string_key.sql │ ├── groupby_string_multi_key.sql │ ├── head_scalar_op.sql │ ├── idivide.sql │ ├── join.sql │ ├── join_of_aggregate_of_join.sql │ ├── join_of_two_unionalls.sql │ ├── join_string_key.sql │ ├── join_string_val.sql │ ├── join_swap_indexing.sql │ ├── join_then_aggregate.sql │ ├── join_two_types.sql │ ├── like_begin.sql │ ├── like_begin_end.sql │ ├── like_end.sql │ ├── like_middle.sql │ ├── matrix_mult.sql │ ├── more_col_store.sql │ ├── multi_builtin.sql │ ├── project_string.sql │ ├── q2.sql │ ├── scan.sql │ ├── select.sql │ ├── select_conjunction.sql │ ├── select_double.sql │ ├── select_string.sql │ ├── select_string_literal.sql │ ├── select_then_join.sql │ ├── self_join.sql │ ├── self_three_path.sql │ ├── singleton_constant.sql │ ├── store.sql │ ├── string_join.sql │ ├── swap.sql │ ├── test_join_of_two_aggregates.sql │ ├── three_path.sql │ ├── three_way_three_key_hash_join.sql │ ├── two_hop.sql │ ├── two_join_switch.sql │ ├── two_key_hash_join.sql │ ├── two_key_hash_join_swap.sql │ ├── two_path.sql │ ├── two_var_select.sql │ ├── unionall.sql │ ├── unionall_3.sql │ ├── unionall_apply_and_self_join.sql │ ├── unionall_of_join.sql │ ├── unionall_then_aggregate.sql │ ├── unionall_then_join.sql │ ├── while.sql │ ├── while_repeat_groupby.sql │ ├── while_repeat_join.sql │ ├── while_union_all.sql │ └── zero_store.sql ├── testquery.py ├── timing.h └── verifier.py ├── docs └── index.md ├── examples ├── .gitignore ├── Makefile ├── README.md ├── bad_column_name.myl ├── cast.myl ├── catalog.py ├── chained.myl ├── clog.py ├── clog.sh ├── connected_components.myl ├── crossmatch_2d.myl ├── deadcode.myl ├── deadcode2.myl ├── dept.csv ├── emp.csv ├── example_queries.py ├── grappa_test_query.py ├── grappalog.py ├── grappalog.sh ├── groupby1.myl ├── groupby2.myl ├── groupby3.myl ├── helloworld.py ├── iteration.myl ├── join.myl ├── join.sql ├── kmeans.myl ├── language_demo.myl ├── lineage.myl ├── load_options.csv ├── load_opts.myl ├── naivebayes │ ├── .gitignore │ ├── buckets.myl │ ├── cat_scheme │ ├── catalog.py │ ├── create_scheme.py │ ├── generate_parse.py │ ├── naivebayes_classify.myl │ ├── naivebayes_train.myl │ ├── nb_classify.myl │ ├── nb_train.myl │ ├── prepare_test.sh │ └── prepare_training.sh ├── noschema.myl ├── openmp_examples │ ├── A.h │ ├── DictOut.hpp │ ├── Makefile │ ├── igor_omp_tri.rb │ └── triangle_with_tuples.cpp ├── pagerank.myl ├── pagerank_dead.myl ├── pairwise_distances.myl ├── rdfsimple.myl ├── reachable.myl ├── samplescan.myl ├── seaflow.myl ├── seaflow2.myl ├── sigma-clipping-v0.myl ├── sigma-clipping.myl ├── sigma_clipping_points.txt ├── sp2bench.py ├── sp2bench │ ├── catalog.py │ ├── q1.dlg │ ├── q1.myl │ ├── q2.myl │ ├── q3.myl │ ├── q4.myl │ ├── sp2bench_rdf.py │ ├── sp2bench_rdf_brackets.py │ └── sp2bench_rdf_long.py ├── standalone.myl ├── tipsy.myl ├── uda.myl └── worker_id.myl ├── raco ├── __init__.py ├── algebra.py ├── backends │ ├── __init__.py │ ├── backend_common.py │ ├── cpp │ │ ├── __init__.py │ │ ├── c_templates │ │ │ ├── ascii_scan.cpp │ │ │ ├── base_query.cpp │ │ │ ├── clang_group_timing.cpp │ │ │ ├── clang_pipeline_timing.cpp │ │ │ ├── groupby │ │ │ │ ├── 0key_declaration.cpp │ │ │ │ ├── 0key_materialize.cpp │ │ │ │ ├── 0key_scan.cpp │ │ │ │ ├── 1key_declaration.cpp │ │ │ │ ├── 1key_materialize.cpp │ │ │ │ ├── 1key_scan.cpp │ │ │ │ ├── 2key_declaration.cpp │ │ │ │ ├── 2key_materialize.cpp │ │ │ │ └── 2key_scan.cpp │ │ │ ├── hashjoin │ │ │ │ ├── hash_declaration.cpp │ │ │ │ ├── insert_materialize.cpp │ │ │ │ └── lookup.cpp │ │ │ ├── materialized_tuple_ref_additional.cpp │ │ │ ├── memory_scan.cpp │ │ │ ├── relation_declaration.cpp │ │ │ └── string_index_lookup.cpp │ │ ├── cbase_templates │ │ │ ├── assignment.cpp │ │ │ ├── group_timing.cpp │ │ │ ├── materialized_tuple_create_one.cpp │ │ │ ├── materialized_tuple_create_two.cpp │ │ │ ├── materialized_tuple_ref.cpp │ │ │ ├── output_stream_close.cpp │ │ │ ├── output_stream_decl.cpp │ │ │ ├── output_stream_open.cpp │ │ │ ├── output_stream_write.cpp │ │ │ ├── pipeline_timing.cpp │ │ │ ├── select.cpp │ │ │ ├── tuple_declaration.cpp │ │ │ ├── tuple_type_convert.cpp │ │ │ └── write_count.cpp │ │ ├── cpp.py │ │ ├── cppcommon.py │ │ └── operator_at_a_time_c_templates │ │ │ ├── ascii_scan.template │ │ │ ├── base_query.template │ │ │ ├── binary_scan.template │ │ │ ├── emit_joined_tuple.template │ │ │ ├── filtering_nestedloop_hashjoin_chain.template │ │ │ ├── filtering_nestedloop_join.template │ │ │ ├── filtering_nestedloop_join_chain.template │ │ │ ├── filteringhashjoin.template │ │ │ ├── hashjoin.template │ │ │ ├── join_simple_hash_twopass.template │ │ │ ├── precount_select.template │ │ │ ├── scan.template │ │ │ └── select_simple_twopass.template │ ├── logical.py │ ├── myria │ │ ├── __init__.py │ │ ├── catalog.py │ │ ├── connection.py │ │ ├── errors.py │ │ ├── myria.py │ │ └── tests │ │ │ ├── test_error.py │ │ │ └── test_myria_execution.py │ ├── radish │ │ ├── README.md │ │ ├── __init__.py │ │ ├── grappa_templates │ │ │ ├── base_query.cpp │ │ │ ├── define_cl_arg.cpp │ │ │ ├── define_metric.cpp │ │ │ ├── file_scan.cpp │ │ │ ├── gce_app_metric.cpp │ │ │ ├── global_array_memory_scan.cpp │ │ │ ├── global_array_relation_declaration.cpp │ │ │ ├── graph_file_scan.cpp │ │ │ ├── grappa_group_timing.cpp │ │ │ ├── grappa_pipeline_timing.cpp │ │ │ ├── groupby │ │ │ │ ├── 0key_output.cpp │ │ │ │ ├── combine_definition.cpp │ │ │ │ ├── init_definition.cpp │ │ │ │ ├── multi_uda_0key_output.cpp │ │ │ │ ├── multi_uda_0key_update.cpp │ │ │ │ ├── multi_uda_scan.cpp │ │ │ │ ├── nkey_update.cpp │ │ │ │ ├── one_built_in_0key_output.cpp │ │ │ │ ├── one_built_in_0key_update.cpp │ │ │ │ ├── one_built_in_scan.cpp │ │ │ │ ├── scan.cpp │ │ │ │ ├── update_definition.cpp │ │ │ │ ├── withkey_decl.cpp │ │ │ │ ├── withkey_init.cpp │ │ │ │ └── withoutkey_init.cpp │ │ │ ├── hashjoin │ │ │ │ ├── hash_declaration.cpp │ │ │ │ ├── hash_init.cpp │ │ │ │ ├── insert_materialize.cpp │ │ │ │ └── lookup.cpp │ │ │ ├── input_relation_declarations.cpp │ │ │ ├── iterators │ │ │ │ ├── 0key_groupby_source.cpp │ │ │ │ ├── apply.cpp │ │ │ │ ├── broadcast_stream.cpp │ │ │ │ ├── hashjoin_sink.cpp │ │ │ │ ├── hashjoin_source.cpp │ │ │ │ ├── instantiate_operator.cpp │ │ │ │ ├── instantiate_sink.cpp │ │ │ │ ├── multikey_groupby_sink.cpp │ │ │ │ ├── multikey_groupby_source.cpp │ │ │ │ ├── partition_groupby │ │ │ │ │ └── multikey_groupby_sink.cpp │ │ │ │ ├── select.cpp │ │ │ │ ├── sink_declaration.cpp │ │ │ │ └── withkey_init.cpp │ │ │ ├── partition_groupby │ │ │ │ └── nkey_update.cpp │ │ │ ├── shuffle.cpp │ │ │ ├── shufflehashjoin │ │ │ │ ├── delete.cpp │ │ │ │ ├── hash_init.cpp │ │ │ │ ├── materialize.cpp │ │ │ │ ├── reduce.cpp │ │ │ │ └── result_scan.cpp │ │ │ ├── spawn.cpp │ │ │ ├── string_index_lookup.cpp │ │ │ ├── symmetric_array_file_scan.cpp │ │ │ ├── symmetric_array_memory_scan.cpp │ │ │ ├── symmetric_array_relation_declaration.cpp │ │ │ ├── symmetric_array_relation_materialize.cpp │ │ │ ├── symmetric_array_temprelation_declaration.cpp │ │ │ ├── symmetric_array_temprelation_init.cpp │ │ │ ├── symmetric_array_temprelation_materialize.cpp │ │ │ ├── symmetric_array_temprelation_materializer_done.cpp │ │ │ ├── symmetric_array_temprelation_recycle.cpp │ │ │ ├── symmetrichashjoin │ │ │ │ ├── hash_declaration.cpp │ │ │ │ ├── hash_init.cpp │ │ │ │ └── hash_insert_lookup.cpp │ │ │ ├── sync_declaration.cpp │ │ │ └── wait_statement.cpp │ │ └── radish.py │ ├── sparql │ │ ├── __init__.py │ │ └── sparql.py │ └── sql │ │ ├── __init__.py │ │ ├── catalog.py │ │ ├── test_case.py │ │ └── test_sql.py ├── catalog.py ├── catalog_tests │ ├── default_cardinality_relation.py │ ├── set_cardinality_relation.py │ └── test_catalog.py ├── clangtestdb.py ├── clib │ ├── algorithms.h │ ├── boolean.cc │ ├── boolean.h │ ├── testboolean.cc │ └── testboolean.o ├── compile.py ├── cpp_datalog_utils.py ├── datalog │ ├── __init__.py │ ├── datalog_test.py │ ├── grammar.py │ ├── model.py │ └── query_tests.py ├── datastructure │ ├── UnionFind.py │ ├── __init__.py │ └── test_union_find.py ├── dbconn.py ├── expression │ ├── __init__.py │ ├── aggregate.py │ ├── boolean.py │ ├── expression.py │ ├── expressions_library.py │ ├── function.py │ ├── statevar.py │ ├── udf.py │ ├── util.py │ └── visitor.py ├── fake_data.py ├── fakedb.py ├── from_repr.py ├── myrial │ ├── __init__.py │ ├── cfg.py │ ├── cfg_test.py │ ├── cli_test.py │ ├── emitarg.py │ ├── empty_aggregate_tests.py │ ├── exceptions.py │ ├── filescan_tests.py │ ├── groupby.py │ ├── interpreter.py │ ├── keywords.py │ ├── kmeans_test.py │ ├── multiway.py │ ├── myrial_test.py │ ├── optimizer_tests.py │ ├── pagerank_test.py │ ├── parser.py │ ├── query_tests.py │ ├── reachable_tests.py │ ├── sample_test.py │ ├── scanner.py │ ├── setop_tests.py │ ├── sigma_clipping_test.py │ └── type_tests.py ├── nary_join_rules_test.py ├── operator_test.py ├── pipelines.py ├── platform_tests.py ├── python │ ├── __init__.py │ ├── convert.py │ ├── exceptions.py │ ├── tests │ │ ├── __init__.py │ │ ├── boolean_test.py │ │ ├── convert_tests.py │ │ ├── decompile_function_test.py │ │ ├── decompile_lambda_test.py │ │ ├── functions_test.py │ │ ├── operators_test.py │ │ ├── projection_test.py │ │ ├── python_test.py │ │ ├── syntax_test.py │ │ └── udf_test.py │ └── util │ │ ├── __init__.py │ │ ├── decompile.py │ │ └── visitor.py ├── relation_key.py ├── replace_with_repr.py ├── representation.py ├── rules.py ├── scheme.py ├── sparql_tests.py ├── sqllite_test.py ├── test_style.py ├── test_utility.py ├── tests.py ├── types.py ├── utility.py └── viz.py ├── requirements-dev.txt ├── scripts ├── myrial ├── see-rules └── simple_raco_execution.py └── setup.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = raco -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | *~ 4 | output.json 5 | .DS_Store 6 | parser.out 7 | parsetab.py 8 | build 9 | raco.egg-info 10 | .coverage 11 | .noseids 12 | .idea/ 13 | *.dot 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | addons: 5 | apt: 6 | packages: 7 | - clang-3.6 8 | - lldb-3.6 9 | - libc++-dev 10 | - libc++abi-dev 11 | install: 12 | - pip install pip --upgrade 13 | - pip install setuptools --upgrade 14 | - pip install -r requirements-dev.txt 15 | - python setup.py install 16 | - pip install coveralls 17 | - export CXX=clang++ 18 | - export CXXFLAGS="-stdlib=libc++" 19 | - export LDFLAGS=-lc++abi 20 | script: nosetests --with-coverage --cover-package=raco --logging-level=WARN 21 | after_success: 22 | - coveralls 23 | sudo: false 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012-2016 University of Washington 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 7 | Neither the name of the University of Washington nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 8 | 9 | THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF WASHINGTON AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF WASHINGTON OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 10 | -------------------------------------------------------------------------------- /c_test_environment/.gitignore: -------------------------------------------------------------------------------- 1 | *.exe 2 | *.o 3 | *.a 4 | R1 5 | R2 6 | R3 7 | S1 8 | S2 9 | S3 10 | T1 11 | T2 12 | T3 13 | I1 14 | I2 15 | I3 16 | D1 17 | D2 18 | D3 19 | *.dot 20 | *.cpp 21 | sp2b.100t* 22 | sp2bench_1m* 23 | tmp/ 24 | *.out 25 | *.db 26 | Index 27 | Str 28 | *.ttl 29 | edges 30 | *.ps 31 | *.pdf 32 | log*.rb 33 | *.cpp.* 34 | test.txt 35 | *store 36 | importTestData.sql 37 | -------------------------------------------------------------------------------- /c_test_environment/Makefile: -------------------------------------------------------------------------------- 1 | # WARNING: name environment source files .h or .cc, but not .cpp, which is reserved (.gitignore laziness) 2 | 3 | CXX ?= g++ 4 | CXXFLAGS += -ggdb -std=c++11 -O3 5 | #-O3 #-m64 -Wno-deprecated -fPIC 6 | 7 | ifneq ($(shell uname), Darwin) 8 | LIBS = -lrt 9 | endif 10 | 11 | INCL = 12 | COMP = $(CXX) $(CXXFLAGS) -c $< $(INCL) 13 | LINK = $(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS) 14 | COMPLINK = $(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS) $@.cc 15 | 16 | 17 | # for 'triangle' 18 | CSWITCHES = -O -DLINUX -I/usr/X11R6/include -L/usr/X11R6/lib 19 | TRILIBDEFS = -DTRILIBRARY 20 | 21 | #all: 22 | 23 | %.convert: %.convert.cpp convert2bin.h 24 | $(CXX) $(CXXFLAGS) -o $@ $< 25 | 26 | %.exe: %.o io_util.o counters_util.o strings.o 27 | $(LINK) 28 | 29 | clean: 30 | rm -f *.o *.exe 31 | 32 | io_util.o : io_util.cc io_util.h 33 | $(COMP) 34 | 35 | counters_util.o : counters_util.cc counters_util.h 36 | $(COMP) 37 | 38 | strings.o : strings.cc strings.h 39 | $(COMP) 40 | 41 | dates.o : dates.cc dates.h 42 | $(COMP) 43 | 44 | radish_utils.o : radish_utils.cc radish_utils.h 45 | $(COMP) 46 | 47 | %.o : %.cpp 48 | $(COMP) 49 | 50 | dates_test : dates.o dates_test.cc 51 | $(LINK) 52 | 53 | run_dates_test : dates_test 54 | ./dates_test 55 | 56 | libracoc.a: strings.o radish_utils.o dates.o 57 | ar rcs $@ $^ 58 | -------------------------------------------------------------------------------- /c_test_environment/c_index_strings.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import sys 3 | 4 | #TODO take a schema as input 5 | 6 | 7 | class WordIndexer: 8 | def __init__(self, indexf): 9 | self.words = {} 10 | self.count = 0 11 | self.indexfw = open(indexf, 'w') 12 | 13 | def add_word(self, w): 14 | if w in self.words: 15 | return self.words[w] 16 | else: 17 | self.indexfw.write(w+'\n') 18 | t = self.count 19 | self.count += 1 20 | self.words[w] = t 21 | return t 22 | 23 | def close(self): 24 | self.indexfw.close() 25 | 26 | 27 | def indexing(inputf, delim_in): 28 | intfile = inputf + '.i' 29 | indexf = inputf + '.index' 30 | delim_out = ' ' 31 | 32 | wi = WordIndexer(indexf) 33 | with open(inputf, 'r') as ins: 34 | reader = csv.reader(ins, delimiter=delim_in) 35 | with open(intfile, 'w') as outs: 36 | writer = csv.writer(outs, delimiter=delim_out) 37 | for row in reader: 38 | cols = [wi.add_word(w) for w in row] 39 | writer.writerow(cols) 40 | 41 | wi.close() 42 | return intfile, indexf 43 | 44 | 45 | if __name__ == '__main__': 46 | if len(sys.argv) < 2: 47 | raise Exception("usage: %s inputfile [delim]" % sys.argv[0]) 48 | 49 | if len(sys.argv) == 3: 50 | delim = sys.argv[2] 51 | else: 52 | delim = ' ' 53 | 54 | indexing(sys.argv[1], delim_in=delim) 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /c_test_environment/clang_datalog_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from testquery import checkquery, checkstore 3 | from testquery import ClangRunner 4 | from generate_test_relations import generate_default 5 | from generate_test_relations import need_generate 6 | import raco.backends.cpp as clang 7 | import raco.backends.cpp.cppcommon as cppcommon 8 | from raco.platform_tests import DatalogPlatformTest 9 | 10 | import sys 11 | sys.path.append('./examples') 12 | from osutils import Chdir 13 | from raco.cpp_datalog_utils import emitCode 14 | import os 15 | 16 | 17 | class DatalogClangTest(unittest.TestCase, DatalogPlatformTest): 18 | def check(self, query, name): 19 | with Chdir("c_test_environment") as d: 20 | os.remove("%s.cpp" % name) if os.path.exists("%s.cpp" % name) else None 21 | emitCode(query, name, clang.CCAlgebra) 22 | checkquery(name, ClangRunner()) 23 | 24 | def check_file(self, query, name): 25 | with Chdir("c_test_environment") as d: 26 | os.remove("%s.cpp" % name) if os.path.exists("%s.cpp" % name) else None 27 | emitCode(query, name, clang.CCAlgebra, emit_print=cppcommon.EMIT_FILE) 28 | checkstore(name, ClangRunner()) 29 | 30 | def setUp(self): 31 | with Chdir("c_test_environment") as d: 32 | if need_generate(): 33 | generate_default() 34 | 35 | 36 | if __name__ == '__main__': 37 | unittest.main() 38 | -------------------------------------------------------------------------------- /c_test_environment/clang_myrial_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from testquery import checkquery 3 | from testquery import ClangRunner 4 | from generate_test_relations import generate_default 5 | from generate_test_relations import need_generate 6 | from raco.backends.cpp import CCAlgebra 7 | from raco.platform_tests import MyriaLPlatformTestHarness, MyriaLPlatformTests 8 | from raco.compile import compile 9 | 10 | import sys 11 | sys.path.append('./examples') 12 | from osutils import Chdir 13 | import os 14 | 15 | import raco.viz as viz 16 | 17 | #import logging 18 | #logging.basicConfig(level=logging.DEBUG) 19 | 20 | 21 | class MyriaLClangTest(MyriaLPlatformTestHarness, MyriaLPlatformTests): 22 | def check(self, query, name, **kwargs): 23 | kwargs['target_alg'] = CCAlgebra() 24 | plan = self.get_physical_plan(query, **kwargs) 25 | physical_dot = viz.operator_to_dot(plan) 26 | with open(os.path.join("c_test_environment", "%s.physical.dot"%(name)), 'w') as dwf: 27 | dwf.write(physical_dot) 28 | 29 | # generate code in the target language 30 | code = compile(plan) 31 | 32 | fname = os.path.join("c_test_environment", "{name}.cpp".format(name=name)) 33 | if os.path.exists(fname): 34 | os.remove(fname) 35 | with open(fname, 'w') as f: 36 | f.write(code) 37 | 38 | with Chdir("c_test_environment") as d: 39 | checkquery(name, ClangRunner()) 40 | 41 | def setUp(self): 42 | super(MyriaLClangTest, self).setUp() 43 | with Chdir("c_test_environment") as d: 44 | if need_generate(): 45 | generate_default() 46 | 47 | 48 | if __name__ == '__main__': 49 | unittest.main() 50 | -------------------------------------------------------------------------------- /c_test_environment/clang_processor.py: -------------------------------------------------------------------------------- 1 | import raco.myrial.parser as parser 2 | import raco.myrial.interpreter as interpreter 3 | import raco.compile 4 | 5 | 6 | class ClangProcessor: 7 | def __init__(self, catalog): 8 | self.parser = parser.Parser() 9 | self.processor = interpreter.StatementProcessor(catalog) 10 | 11 | def get_plan(self, query, **kwargs): 12 | """Get the MyriaL query plan for a query""" 13 | statements = self.parser.parse(query) 14 | self.processor.evaluate(statements) 15 | if kwargs.get('logical', False): 16 | return self.processor.get_logical_plan(**kwargs) 17 | else: 18 | return self.processor.get_physical_plan(**kwargs) 19 | 20 | def get_physical_plan(self, query, **kwargs): 21 | """Get the physical plan for a MyriaL query""" 22 | kwargs['logical'] = False 23 | return self.get_plan(query, **kwargs) 24 | 25 | def get_source_code(self, query, **kwargs): 26 | plan = self.get_physical_plan(query, kwargs) 27 | 28 | # generate code in the target language 29 | return raco.compile.compile(plan) 30 | 31 | def write_source_code(self, query, basename, **kwargs): 32 | code = self.get_source_code(query, kwargs) 33 | with open(basename+'.cpp', 'w') as f: 34 | f.write(code) 35 | -------------------------------------------------------------------------------- /c_test_environment/compare.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | with open('sql.ans', 'r') as f1: 4 | with open('code.ans', 'r') as f2: 5 | f1csv = csv.reader(f1, delimiter=',') 6 | f2csv = csv.reader(f2, delimiter=',') 7 | 8 | f1results = {} 9 | 10 | for row in f1csv: 11 | t = tuple(row) 12 | v = 0 13 | if t in f1results: 14 | v = f1results[t] 15 | 16 | f1results[t] = v+1 17 | 18 | for row in f2csv: 19 | t = tuple(row) 20 | 21 | if t in f1results: 22 | if f1results[t]==1: 23 | del f1results[t] 24 | else: 25 | v = f1results[t] 26 | f1results[t] = v-1 27 | else: 28 | print t,"is not in sql" 29 | assert False 30 | 31 | for t in f1results: 32 | print t,"is not in code" 33 | assert False 34 | 35 | print "success!" 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /c_test_environment/convert2bin.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | void CHECK(bool cond, std::string s) { 10 | if (!cond) { 11 | std::cerr << "assertion failed: " << s << std::endl; 12 | exit(1); 13 | } 14 | } 15 | 16 | 17 | template< typename Tuple > 18 | void convert2bin_withTuple( std::string fn, char delim=' ', uint64_t burn=0, int add_id=0) { 19 | std::ifstream infile(fn, std::ifstream::in); 20 | CHECK( infile.is_open(), fn + " failed to open"); 21 | 22 | std::string outpath = fn+".bin"; 23 | std::ofstream outfile(outpath, std::ios_base::out | std::ios_base::binary ); 24 | CHECK( outfile.is_open(), outpath + " failed to open"); 25 | 26 | int64_t linenum = 0; 27 | while( infile.good() ) { 28 | std::string line; 29 | std::getline( infile, line ); 30 | if (line.length() == 0) break; // takes care of EOF 31 | 32 | std::istringstream iss(line); 33 | auto t = Tuple::fromIStream(iss, delim); 34 | 35 | // add a sequential id to the data 36 | if (add_id) { 37 | outfile.write((char*)&linenum, sizeof(int64_t)); 38 | } 39 | 40 | outfile.write((char*) &(t.f0), Tuple::fieldsSize()); 41 | linenum++; 42 | } 43 | infile.close(); 44 | outfile.close(); 45 | std::cout << "binary: " << outpath << std::endl; 46 | std::cout << "rows: " << linenum << std::endl; 47 | std::cout << "cols: " << Tuple::numFields() << std::endl; 48 | std::cout << "tuple size: " << Tuple::fieldsSize() << std::endl; 49 | if (add_id) { 50 | std::cout << " + 1 column for id" << std::endl; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /c_test_environment/convert2bin.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import argparse 3 | import sys 4 | 5 | from raco.catalog import FromFileCatalog 6 | from raco.backends.cpp.cppcommon import StagedTupleRef 7 | 8 | """ 9 | given a schema, creates a C++ program to convert csv data to a binary format 10 | """ 11 | 12 | template = """ 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "convert2bin.h" 22 | #include "radish_utils.h" 23 | #include "strings.h" 24 | 25 | {definition} 26 | 27 | int main(int argc, char * const argv[]) {{ 28 | if (argc < 4) {{ 29 | std::cerr << "usage: " << argv[0] << " [file] [delim char] [burns] [add_id?]" << std::endl; 30 | exit(1); 31 | }} 32 | 33 | convert2bin_withTuple<{typ}>(argv[1], argv[2][0], atoi(argv[3]), atoi(argv[4])); 34 | }} 35 | """ 36 | 37 | 38 | def generate_tuple_class(rel_key, cat): 39 | sch = cat.get_scheme(rel_key) 40 | tupleref = StagedTupleRef(None, sch) 41 | definition = tupleref.generateDefinition() 42 | outfnbase = rel_key.split(':')[2] 43 | cpp_name = "{0}.convert.cpp".format(outfnbase) 44 | with open(cpp_name, 'w') as outf: 45 | outf.write(template.format(definition=definition, typ=tupleref.getTupleTypename())) 46 | 47 | subprocess.check_output(["make", "{fn}.convert".format(fn=outfnbase)]) 48 | return cpp_name 49 | 50 | 51 | def generate_tuple_class_from_file(name, catpath): 52 | cat = FromFileCatalog.load_from_file(catpath) 53 | 54 | if name is not None: 55 | rel_key = "public:adhoc:{0}".format(name) 56 | return cat, rel_key, generate_tuple_class(rel_key, cat) 57 | else: 58 | return cat, [(n, generate_tuple_class(n, cat)) for n in cat.get_keys()] 59 | 60 | 61 | if __name__ == "__main__": 62 | 63 | p = argparse.ArgumentParser(prog=sys.argv[0]) 64 | p.add_argument("-n", dest="name", help="name of relation [optional]. If not specified then will convert whole catalog") 65 | p.add_argument("-c", dest="catpath", help="path of catalog file, see FromFileCatalog for format", required=True) 66 | 67 | args = p.parse_args(sys.argv[1:]) 68 | generate_tuple_class_from_file(args.name, args.catpath) 69 | 70 | -------------------------------------------------------------------------------- /c_test_environment/counters_util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // How to use the counters: 4 | // 1) malloc space for as many counters as you need 5 | // int numCounters = 7; 6 | // int *counters = mallocCounterMemory(numCounters); 7 | // 2) Call getCounters(), specifying the particular address for storing the 8 | // counter values. Increment currCounter so that you don't overwrite values. 9 | // int currCounter = 0; 10 | // getCounters(counters, currCounter); 11 | // currCounter = currCounter + 1; // 1 12 | // 3) Print the difference between the counters 13 | // printDiffCounters(counters, numCounters); 14 | // 4) Free the memory storing the counter values 15 | // free(counters); 16 | 17 | int * mallocCounterMemory(int numCounters); 18 | void freeCounterMemory(int *counters); 19 | void getCounters(int *counters, int currCounter); 20 | void printDiffCounters(int *counters, int sz); 21 | void printCounters(int *counters, int sz); 22 | 23 | -------------------------------------------------------------------------------- /c_test_environment/dates.cc: -------------------------------------------------------------------------------- 1 | #include "dates.h" 2 | #include 3 | 4 | namespace dates { 5 | const uint32_t date_format_len = 11; 6 | 7 | uint64_t year(std::string date) { 8 | return std::stoi(date.substr(0, 4)); 9 | } 10 | 11 | uint64_t month(std::string date) { 12 | return std::stoi(date.substr(5, 2)); 13 | } 14 | 15 | uint64_t day(std::string date) { 16 | return std::stoi(date.substr(8, 2)); 17 | } 18 | 19 | namespace impl { 20 | std::string mkstrdate(const tm* timeptr) { 21 | char r[date_format_len]; 22 | strftime(r, date_format_len, "%Y-%m-%d", timeptr); 23 | return std::string(r); 24 | } 25 | } 26 | 27 | std::string add(std::string t, int64_t days) { 28 | tm tc_ = tm(); // initialize fields to 0 29 | tc_.tm_year = year(t)-1900; // years since 1900 30 | tc_.tm_mon = month(t)-1; // months since january 31 | tc_.tm_mday = day(t); // day of the month 32 | 33 | tc_.tm_mday += days; 34 | 35 | // fix up the struct tm 36 | std::mktime(&tc_); 37 | 38 | return impl::mkstrdate(&tc_); 39 | } 40 | } 41 | 42 | 43 | -------------------------------------------------------------------------------- /c_test_environment/dates.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | // date format is a string YYYY-MM-DD 6 | // 0123456789 7 | // "1998-01-01" 8 | 9 | namespace dates { 10 | 11 | uint64_t year(std::string date); 12 | uint64_t month(std::string date); 13 | uint64_t day(std::string date); 14 | 15 | // for passing an array 16 | // TODO implicit conversion to avoid this code, https://github.com/uwescience/raco/issues/454 17 | template 18 | uint64_t year(std::array date) { 19 | year(std::string(date.data())); 20 | } 21 | template 22 | uint64_t month(std::array date) { 23 | month(std::string(date.data())); 24 | } 25 | template 26 | uint64_t day(std::array date) { 27 | day(std::string(date.data())); 28 | } 29 | 30 | std::string add(std::string t, int64_t days); 31 | } 32 | 33 | 34 | -------------------------------------------------------------------------------- /c_test_environment/dates_test.cc: -------------------------------------------------------------------------------- 1 | #include "dates.h" 2 | #include 3 | #include 4 | 5 | int main() { 6 | // test string manipulations from TPC-H Q1 7 | std::string d = "1998-12-01"; 8 | std::string ct = dates::add(d, -60); 9 | 10 | std::cout << d << " " << ct << std::endl; 11 | 12 | std::string in = "1998-01-01"; 13 | std::cout << in << " <= " << ct << " | " << (in <= ct) << std::endl; 14 | 15 | std::string in2 = "1998-11-29"; 16 | std::cout << in2 << " <= " << ct << " | " << (in2 <= ct) << std::endl; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /c_test_environment/grappa_detect_new_files.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pushd $GRAPPA_HOME 3 | #./configure --gen=Make --mode=Release --cc=/sampa/share/distcc/gcc-4.7.2/bin/gcc --third-party=/sampa/share/grappa-third-party 4 | ./configure --gen=Make --mode=Release --cxx=`which g++` --third-party=/sampa/share/grappa-third-party/gcc-4.8.2 "$@" 5 | popd 6 | -------------------------------------------------------------------------------- /c_test_environment/grappalang_tests.py: -------------------------------------------------------------------------------- 1 | from nose.plugins.skip import SkipTest 2 | import os 3 | from osutils import Chdir 4 | import sys 5 | import unittest 6 | 7 | from raco.backends.radish import GrappaAlgebra 8 | 9 | from generate_test_relations import generate_default, need_generate 10 | from raco.platform_tests import DatalogPlatformTest 11 | from testquery import checkquery, checkstore, GrappalangRunner 12 | 13 | sys.path.append('./examples') 14 | from raco.cpp_datalog_utils import emitCode 15 | 16 | 17 | class DatalogGrappaTest(unittest.TestCase, DatalogPlatformTest): 18 | def check(self, query, name): 19 | with Chdir("c_test_environment") as d: 20 | emitCode(query, 'grappa_%s' % name, GrappaAlgebra) 21 | # TODO actually be able to check the query 22 | raise SkipTest(query) 23 | checkquery(name, GrappalangRunner(binary_input=False)) 24 | 25 | def check_file(self, query, name): 26 | # TODO implement this function 27 | raise SkipTest(query) 28 | 29 | def setUp(self): 30 | # TODO instead of returning, we should do something with GRAPPA_HOME 31 | return 32 | with Chdir("c_test_environment") as d: 33 | targetpath = os.path.join(os.environ.copy()['GRAPPA_HOME'], 'build/Make+Release/applications/join') 34 | if need_generate(targetpath): 35 | generate_default(targetpath) 36 | 37 | 38 | if __name__ == '__main__': 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /c_test_environment/io_util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | // How to use the I/O utilities: 9 | // 1) Inhale a particular file. Right now, expected to be a space separated 10 | // char *filePath = "/scratch/tmp/..."; 11 | // struct relationInfo relInfo; 12 | // struct relationInfo *ptr = binary_inhale(filePath, &relInfo); 13 | // OR 14 | // struct relationInfo *ptr = inhale(filePath, &relInfo); 15 | // 2) Manipulate the relation as you see fit. 16 | // ... 17 | // 3) Free the memory for the relation 18 | // free(relInfo.data); 19 | 20 | double timer(); 21 | 22 | class RangeIter; 23 | class RangeIter { 24 | private: 25 | uint64_t num; 26 | uint64_t next; 27 | public: 28 | RangeIter(uint64_t num, bool asEnd=false); 29 | 30 | uint64_t operator*(); 31 | 32 | RangeIter& operator++(); 33 | 34 | bool notequal(const RangeIter& o) const; 35 | }; 36 | 37 | class RangeIterable { 38 | private: 39 | uint64_t num; 40 | public: 41 | RangeIterable(uint64_t num); 42 | 43 | RangeIter begin(); 44 | RangeIter end(); 45 | }; 46 | 47 | 48 | struct relationInfo { 49 | uint64 tuples; 50 | uint64 fields; 51 | int64 *relation; 52 | 53 | RangeIterable range() { 54 | return RangeIterable(tuples); 55 | } 56 | }; 57 | 58 | bool operator!=(const RangeIter& o1, const RangeIter& o2); 59 | bool operator==(const RangeIter& o1, const RangeIter& o2); 60 | 61 | struct relationInfo *inhale(const char *path, struct relationInfo *relInfo); 62 | struct relationInfo *binary_inhale(const char *path, struct relationInfo *relInfo); 63 | 64 | void printrelation(struct relationInfo *R); 65 | 66 | 67 | template 68 | std::vector tuplesFromAscii(const char *path) { 69 | std::string pathst(path); 70 | std::ifstream testfile(pathst, std::ifstream::in); 71 | 72 | std::vector tuples; 73 | 74 | std::string line; 75 | while (std::getline(testfile,line)) { 76 | std::istringstream ss(line); 77 | tuples.push_back(T::fromIStream(ss)); 78 | } 79 | 80 | // rely on RVO to avoid content copy 81 | return tuples; 82 | } 83 | 84 | void write_count(const char* path, uint64_t count); 85 | 86 | 87 | #define ZAPPA 88 | 89 | -------------------------------------------------------------------------------- /c_test_environment/osutils.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import os 3 | 4 | 5 | class Chdir: 6 | """ 7 | Safe cd that is really a pushd then popd on leaving the scope 8 | """ 9 | def __init__( self, newPath ): 10 | self._newPath = newPath 11 | 12 | def __enter__( self ): 13 | self._savedPath = os.getcwd() 14 | os.chdir(self._newPath) 15 | 16 | def __exit__( self, x, y, z ): 17 | os.chdir( self._savedPath ) 18 | 19 | def mkdir_p(path): 20 | try: 21 | os.makedirs(path) 22 | except OSError as exc: 23 | if exc.errno == errno.EEXIST and os.path.isdir(path): 24 | pass 25 | else: raise 26 | -------------------------------------------------------------------------------- /c_test_environment/radish_utils.cc: -------------------------------------------------------------------------------- 1 | #include "radish_utils.h" 2 | 3 | uint64_t identity_hash( int64_t k ) { 4 | return k; 5 | } 6 | 7 | uint64_t linear_hash( int64_t k) { 8 | return (73251599 * k + 110802387) % 98764321261; 9 | } 10 | 11 | static pairhash ph; 12 | uint64_t pair_hash( std::pair k ) { 13 | return ph.operator()(k); 14 | } 15 | -------------------------------------------------------------------------------- /c_test_environment/repr_myrial_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from generate_test_relations import generate_default 3 | from generate_test_relations import need_generate 4 | from raco.backends.myria import MyriaLeftDeepTreeAlgebra 5 | from raco.platform_tests import MyriaLPlatformTestHarness, MyriaLPlatformTests 6 | from raco.from_repr import plan_from_repr 7 | 8 | import sys 9 | sys.path.append('./examples') 10 | from osutils import Chdir 11 | 12 | 13 | class MyriaLReprTest(MyriaLPlatformTestHarness, MyriaLPlatformTests): 14 | def check(self, query, name, **kwargs): 15 | kwargs['target_alg'] = MyriaLeftDeepTreeAlgebra() 16 | plan = self.get_physical_plan(query, **kwargs) 17 | assert plan == plan_from_repr(repr(plan)) 18 | 19 | def setUp(self): 20 | super(MyriaLReprTest, self).setUp() 21 | with Chdir("c_test_environment") as d: 22 | if need_generate(): 23 | generate_default() 24 | 25 | 26 | if __name__ == '__main__': 27 | unittest.main() 28 | -------------------------------------------------------------------------------- /c_test_environment/run_query.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ Executes cpp runner and retrieves the results """ 4 | 5 | import argparse 6 | import os 7 | import sys 8 | import subprocess 9 | sys.path.append('./c_test_environment') 10 | from testquery import ClangRunner, GrappalangRunner 11 | import osutils 12 | 13 | 14 | def parse_options(args): 15 | parser = argparse.ArgumentParser() 16 | 17 | parser.add_argument('platform', metavar='P', type=str, 18 | help='Type of platform to use: cpp or grappa', choices=['grappa', 'cpp']) 19 | 20 | parser.add_argument('file', help='File containing platform source program') 21 | parser.add_argument('--query', help='File containing myrial query') 22 | parser.add_argument('--catalog', help='File containing catalog') 23 | 24 | ns = parser.parse_args(args) 25 | return ns 26 | 27 | 28 | from raco.backends.cpp import CCAlgebra 29 | from raco.backends.radish import GrappaAlgebra 30 | from raco.catalog import FromFileCatalog 31 | from raco.backends.cpp.cppcommon import EMIT_FILE 32 | from clang_processor import ClangProcessor 33 | 34 | 35 | def main(args): 36 | opt = parse_options(args) 37 | osutils.mkdir_p("logs") 38 | abspath = os.path.abspath("logs") 39 | name = opt.file 40 | 41 | if opt.query: 42 | if opt.catalog is None: 43 | raise Exception("--query also requires a --catalog") 44 | 45 | with open(opt.query, 'r') as f: 46 | qt = f.read() 47 | 48 | target_alg = CCAlgebra(emit_print=EMIT_FILE) 49 | if opt.platform == 'grappa': 50 | target_alg = GrappaAlgebra(emit_print=EMIT_FILE) 51 | ClangProcessor(FromFileCatalog.load_from_file(opt.catalog))\ 52 | .write_source_code(qt, name, target_alg=target_alg) 53 | 54 | if opt.platform == 'grappa': 55 | runner = GrappalangRunner() 56 | runner.run(name, abspath) 57 | elif opt.platform == 'cpp': 58 | try: 59 | runner = ClangRunner() 60 | runner.run(name, abspath) 61 | except subprocess.CalledProcessError as e: 62 | print 'cpp runner for %s failed' % (name) 63 | print e.output 64 | raise 65 | 66 | 67 | if __name__ == "__main__": 68 | main(sys.argv[1:]) 69 | -------------------------------------------------------------------------------- /c_test_environment/strings.cc: -------------------------------------------------------------------------------- 1 | #include "strings.h" 2 | 3 | #include 4 | #include 5 | 6 | 7 | 8 | size_t StringIndex::size() const { 9 | return strings.size(); 10 | } 11 | 12 | StringIndex::StringIndex(const std::map& mapping) : strings(), indices() { 13 | // mapping stores the strings in sorted order 14 | // so this forms sorted vectors 15 | for (auto p : mapping) { 16 | strings.push_back(p.first); 17 | indices.push_back(p.second); 18 | } 19 | } 20 | 21 | // This integer represents strings not in the database 22 | const int64_t DB_NON_EXISTANT_STRING = -1; 23 | int64_t StringIndex::string_lookup(const std::string& s) const { 24 | // TODO: use trie structure instead of binary search 25 | 26 | auto ifound = QueryUtils::binary_search( this->strings.begin(), this->strings.end(), s); 27 | if (ifound == this->strings.end()) { 28 | return DB_NON_EXISTANT_STRING; 29 | } else { 30 | auto ind = (ifound - this->strings.begin()); 31 | return this->indices[ind]; 32 | } 33 | } 34 | 35 | StringIndex::StringIndex() : strings(), indices() {} 36 | 37 | StringIndex build_string_index(const std::string& indexfn) { 38 | std::map str2int; 39 | std::ifstream file( indexfn ); 40 | std::string line; 41 | int64_t ln = 0; 42 | while (getline( file, line )) { 43 | str2int[line] = ln++; 44 | } 45 | 46 | return StringIndex(str2int); 47 | } 48 | 49 | std::regex compile_like_pattern(const std::string& pattern) { 50 | // compile regex 51 | std::stringstream ss; 52 | for (auto c=pattern.begin(); c!=pattern.end(); ++c) { 53 | if (*c == '%') { 54 | ss << ".*"; 55 | } else { 56 | ss << *c; 57 | } 58 | } 59 | return std::regex(ss.str()); 60 | } 61 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/aggregate_count.sql: -------------------------------------------------------------------------------- 1 | select COUNT(a) from R1; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/aggregate_count_group_one.sql: -------------------------------------------------------------------------------- 1 | select b, COUNT(a) from R2 group by b; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/aggregate_count_group_one_notgroup_filtered_one.sql: -------------------------------------------------------------------------------- 1 | select b, COUNT(a) from R3 where c < 5 group by b; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/aggregate_count_group_one_notgroup_one.sql: -------------------------------------------------------------------------------- 1 | select b, COUNT(a) from R3 group by b; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/aggregate_double.sql: -------------------------------------------------------------------------------- 1 | select a, SUM(b) from D2 group by a; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/aggregate_max.sql: -------------------------------------------------------------------------------- 1 | select MAX(a) from T2; -------------------------------------------------------------------------------- /c_test_environment/testqueries/aggregate_min.sql: -------------------------------------------------------------------------------- 1 | select MIN(a) from T2; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/aggregate_of_binop.sql: -------------------------------------------------------------------------------- 1 | select SUM(a+b) from R2; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/aggregate_of_binop_double.sql: -------------------------------------------------------------------------------- 1 | select a, MAX(b-c) from D3 group by a; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/aggregate_of_binop_no_key_unionall_double.sql: -------------------------------------------------------------------------------- 1 | select MAX(b-c) from D3 2 | UNION ALL 3 | select MIN(c-b) from D3; 4 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/aggregate_string.sql: -------------------------------------------------------------------------------- 1 | select a, COUNT(b) from C3 group by a; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/aggregate_sum.sql: -------------------------------------------------------------------------------- 1 | select SUM(a) from R1; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/apply.sql: -------------------------------------------------------------------------------- 1 | select b from (select a, b from T2); 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/apply_and_self_join.sql: -------------------------------------------------------------------------------- 1 | select X.a, Y.b from (select a as a, c as b from T3 where b < 4) X, 2 | (select a as a, c as b from T3 where b < 4) Y 3 | where X.b=Y.a; 4 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/argmax.myl: -------------------------------------------------------------------------------- 1 | -- arbitrarily favor new value in case of tie 2 | def pickval(value, arg, _value, _arg): 3 | case when value >= _value then arg 4 | else _arg end; 5 | 6 | uda ArgMax(outcome, lprob) { 7 | -- init 8 | [0 as _outcome, 0 as _lprob]; 9 | 10 | -- update 11 | [pickval(lprob, outcome, _lprob, _outcome), 12 | pickval(lprob, lprob, _lprob, _lprob)]; 13 | 14 | -- output 15 | [_lprob, _outcome]; 16 | }; 17 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/argmax_all_uda.sql: -------------------------------------------------------------------------------- 1 | select b, max(c) from I3; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/argmax_uda.sql: -------------------------------------------------------------------------------- 1 | -- for each a, compute max c and the corresponding b (argmax) 2 | select a, b, max(c) from I3 group by a; 3 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/builtin_and_UDA.sql: -------------------------------------------------------------------------------- 1 | select a, b, MAX(c), SUM(b) from I3 group by a; -- seems wrong -------------------------------------------------------------------------------- /c_test_environment/testqueries/common_index_allowed.sql: -------------------------------------------------------------------------------- 1 | select t.a, t.b, r1.b, r2.b from T2 t, R2 r1, R2 r2 2 | where t.a=r1.a 3 | and r1.a=r2.a; 4 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/common_index_disallowed.sql: -------------------------------------------------------------------------------- 1 | select t.a, t.b, r1.b, r2.a from T2 t, R2 r1, R2 r2 2 | where t.a=r1.a 3 | and r1.a=r2.b; 4 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/countstar_string.sql: -------------------------------------------------------------------------------- 1 | select COUNT(b) from C3; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/directed_squares.sql: -------------------------------------------------------------------------------- 1 | select r.a, s.a, t.a, z.a from R2 r, S2 s, T2 t, R3 z where r.b=s.a and s.b=t.a and t.b=z.a and z.b=r.a; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/directed_triangles.sql: -------------------------------------------------------------------------------- 1 | select r.a, s.a, t.a from R2 r, S2 s, T2 t where r.b=s.a and s.b=t.a and t.b=r.a; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/expr_singleton.sql: -------------------------------------------------------------------------------- 1 | select (1 - 0.85)/1000; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/few_col_store.sql: -------------------------------------------------------------------------------- 1 | select a from R2 2 | where b=3; 3 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/groupby_string_key.sql: -------------------------------------------------------------------------------- 1 | select sum(C2.a), C2.b from C2 2 | group by C2.b; 3 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/groupby_string_multi_key.sql: -------------------------------------------------------------------------------- 1 | select sum(C3.a), C3.b, C3.c from C3 2 | group by C3.b, C3.c 3 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/head_scalar_op.sql: -------------------------------------------------------------------------------- 1 | select a+b from R2; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/idivide.sql: -------------------------------------------------------------------------------- 1 | select a/b from T2 where b!=0; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/join.sql: -------------------------------------------------------------------------------- 1 | select t3.a, r3.c from T3 t3, R3 r3 where t3.b=r3.b and r3.a=r3.c; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/join_of_aggregate_of_join.sql: -------------------------------------------------------------------------------- 1 | select a.rsum, s.b from S2 s, 2 | (select SUM(r.a) as rsum, t.b as tc from R2 r, T2 t 3 | where r.b = t.a group by t.b) a 4 | where 5 | a.tc = s.a; 6 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/join_of_two_unionalls.sql: -------------------------------------------------------------------------------- 1 | --unionall 2 | select A1.a from 3 | (select a, b from T2 4 | union all 5 | select a, b from R2) A1, 6 | (select a, b from T2 7 | union all 8 | select a, b from R2) A2 9 | where A1.a=A2.a; 10 | 11 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/join_string_key.sql: -------------------------------------------------------------------------------- 1 | select r1.a, r2.a from C3 r1, C3 r2 where r1.b=r2.c; 2 | 3 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/join_string_val.sql: -------------------------------------------------------------------------------- 1 | select C2.b, T2.b from C2, T2 where C2.a=T2.a; 2 | 3 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/join_swap_indexing.sql: -------------------------------------------------------------------------------- 1 | select t3.a, s3.b, r3.b from T3 t3, R3 r3, S3 s3 where s3.c=r3.a and r3.c=t3.c; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/join_then_aggregate.sql: -------------------------------------------------------------------------------- 1 | select SUM(R2.a), R2.b from R2, S2, T2 where R2.b=S2.a and S2.a=T2.a group by R2.b; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/join_two_types.sql: -------------------------------------------------------------------------------- 1 | select * from C3, R3 where C3.a = R3.c; -------------------------------------------------------------------------------- /c_test_environment/testqueries/like_begin.sql: -------------------------------------------------------------------------------- 1 | select * from C2 where b like "A%" or b like 'cof%'; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/like_begin_end.sql: -------------------------------------------------------------------------------- 1 | select * from C2 where b like "A%B" or b like "co%fe"; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/like_end.sql: -------------------------------------------------------------------------------- 1 | select * from C2 where b like "%A" or b like '%ee'; -------------------------------------------------------------------------------- /c_test_environment/testqueries/like_middle.sql: -------------------------------------------------------------------------------- 1 | select * from C2 where b like "%A%" or b like "%fe%"; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/matrix_mult.sql: -------------------------------------------------------------------------------- 1 | select t1.a as src, t2.b as dst, count(t1.a) from T2 t1, T2 t2 2 | where t1.b = t2.a 3 | group by t1.a, t2.b; 4 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/more_col_store.sql: -------------------------------------------------------------------------------- 1 | select r.a, s.a, t.a from R2 r, S2 s, T2 t 2 | where r.b = s.a 3 | and s.b = t.a 4 | and t.b = r.a; -------------------------------------------------------------------------------- /c_test_environment/testqueries/multi_builtin.sql: -------------------------------------------------------------------------------- 1 | select c, MAX(a), SUM(b) from I3 group by c; -------------------------------------------------------------------------------- /c_test_environment/testqueries/project_string.sql: -------------------------------------------------------------------------------- 1 | select b from C3; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/q2.sql: -------------------------------------------------------------------------------- 1 | select T1.a as inproc, T2.c as author, T3.c as booktitle, T4.c as title, T5.c as proc, T6.c as ee, T7.c as page, T8.c as url, T9.c as yr 2 | from R3 T1, 3 | R3 T2, 4 | R3 T3, 5 | R3 T4, 6 | R3 T5, 7 | R3 T6, 8 | R3 T7, 9 | R3 T8, 10 | R3 T9 11 | WHERE T1.a=T2.a 12 | and T2.a=T3.a 13 | and T3.a=T4.a 14 | and T4.a=T5.a 15 | and T5.a=T6.a 16 | and T6.a=T7.a 17 | and T7.a=T8.a 18 | and T8.a=T9.a 19 | and T1.b = 1 and T1.c > 5 20 | and T2.b = 1 21 | and T3.b = 1 22 | and T4.b = 1 23 | and T5.b = 1 24 | and T6.b = 1 25 | and T7.b = 1 26 | and T8.b = 1 27 | and T9.b = 1; 28 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/scan.sql: -------------------------------------------------------------------------------- 1 | select * from T1; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/select.sql: -------------------------------------------------------------------------------- 1 | select * from T1 where a > 5; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/select_conjunction.sql: -------------------------------------------------------------------------------- 1 | select * from T1 where a > 0 and a < 10; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/select_double.sql: -------------------------------------------------------------------------------- 1 | select a,c from D3 where b < 6.4; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/select_string.sql: -------------------------------------------------------------------------------- 1 | select b from C3 where b="coffee"; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/select_string_literal.sql: -------------------------------------------------------------------------------- 1 | select * from C3 where b = "coffee"; -------------------------------------------------------------------------------- /c_test_environment/testqueries/select_then_join.sql: -------------------------------------------------------------------------------- 1 | select t.a,t.b,t.c from T3 t, R2 r where t.a1 and S3.a>2 and T3.a>3; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/two_key_hash_join.sql: -------------------------------------------------------------------------------- 1 | select R3.c, T3.c from R3, T3 where R3.a=T3.a and R3.b=T3.b; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/two_key_hash_join_swap.sql: -------------------------------------------------------------------------------- 1 | select R3.c, T3.c from R3, T3 where R3.a=T3.b and R3.b=T3.a; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/two_path.sql: -------------------------------------------------------------------------------- 1 | select r.a, r.b, s.b from R2 r, S2 s where r.b=s.a; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/two_var_select.sql: -------------------------------------------------------------------------------- 1 | select a, b from T2 where a<9 and b<9; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/unionall.sql: -------------------------------------------------------------------------------- 1 | -- unionall 2 | select a from T1 3 | union all 4 | select a from R1; 5 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/unionall_3.sql: -------------------------------------------------------------------------------- 1 | select * from T1 2 | union all 3 | select * from R1 4 | union all 5 | select * from S1; 6 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/unionall_apply_and_self_join.sql: -------------------------------------------------------------------------------- 1 | --unionall 2 | select X.a, Y.a, Y.b from (select t.a as a, t.b as b from T2 t, R1 r where t.b < 4 and t.a=r.a 3 | union all 4 | select r.a as a, r.b as b from R2 r, T1 t where r.a=t.a) X, 5 | (select t.a as a, t.b as b from T2 t, R1 r where t.b < 4 and t.a=r.a 6 | union all 7 | select r.a as a, r.b as b from R2 r, T1 t where r.a=t.a) Y 8 | where X.b=Y.a; 9 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/unionall_of_join.sql: -------------------------------------------------------------------------------- 1 | --unionall 2 | select a, b from T2 3 | union all 4 | select r.a as a, t.b as b from R2 r, T2 t where r.b=t.a; 5 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/unionall_then_aggregate.sql: -------------------------------------------------------------------------------- 1 | select SUM(A2.a), A2.b from 2 | (select * from R2 3 | union all 4 | select * from S2) A2 5 | group by A2.b; 6 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/unionall_then_join.sql: -------------------------------------------------------------------------------- 1 | --unionall 2 | select A.a from 3 | (select a, b from T2 4 | union all 5 | select a, b from R2) A, 6 | S1 s 7 | where s.a=A.a; 8 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/while.sql: -------------------------------------------------------------------------------- 1 | select 0; 2 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/while_repeat_groupby.sql: -------------------------------------------------------------------------------- 1 | select SUM(I.a) as a, I.c as b, SUM(I.b) as c from 2 | (select SUM(T3.a) as a, T3.c as b, SUM(T3.b) as c from T3 group by T3.c) as I 3 | group by I.c; 4 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/while_repeat_join.sql: -------------------------------------------------------------------------------- 1 | -- iteration 2 2 | select s1.b, s1.c, s1.a 3 | -- iteration 1 4 | from (select s1.b as a, s1.c as b, s1.a as c from T3 s1, T3 s2 5 | where s1.a=s2.b) as s1, 6 | (select s1.b as a, s1.c as b, s1.a as c from T3 s1, T3 s2 7 | where s1.a=s2.b) as s2 8 | where s1.a=s2.b; 9 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/while_union_all.sql: -------------------------------------------------------------------------------- 1 | select 1234 2 | UNION ALL 3 | select 1234 4 | UNION ALL 5 | select 1234 6 | UNION ALL 7 | select 1234 8 | UNION ALL 9 | select 1234 10 | UNION ALL 11 | select 1234 12 | UNION ALL 13 | select 1234 14 | UNION ALL 15 | select 1234; 16 | -------------------------------------------------------------------------------- /c_test_environment/testqueries/zero_store.sql: -------------------------------------------------------------------------------- 1 | select r.a from R2 r 2 | where r.b = 11; 3 | -------------------------------------------------------------------------------- /c_test_environment/timing.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #if defined(__MTA__) 5 | #include 6 | #include 7 | #elif defined(__MACH__) 8 | #include 9 | #else 10 | #include 11 | #endif 12 | 13 | 14 | #define BILLION 1000000000 15 | 16 | 17 | /// "Universal" wallclock time (works at least for Mac, MTA, and most Linux) 18 | inline double walltime(void) { 19 | #if defined(__MTA__) 20 | return((double)mta_get_clock(0) / mta_clock_freq()); 21 | #elif defined(__MACH__) 22 | static mach_timebase_info_data_t info; 23 | mach_timebase_info(&info); 24 | uint64_t now = mach_absolute_time(); 25 | now *= info.numer; 26 | now /= info.denom; 27 | return 1.0e-9 * (double)now; 28 | #else 29 | struct timespec tp; 30 | #if defined(CLOCK_PROCESS_CPUTIME_ID) 31 | #define CLKID CLOCK_PROCESS_CPUTIME_ID 32 | #elif defined(CLOCK_REALTIME_ID) 33 | #define CLKID CLOCK_REALTIME_ID 34 | #endif 35 | clock_gettime(CLOCK_MONOTONIC, &tp); 36 | return (double)tp.tv_sec + (double)tp.tv_nsec / BILLION; 37 | #endif 38 | } 39 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | *.cpp 2 | *.cpp.orig 3 | *.dot 4 | build/ 5 | log*.rb 6 | *.logical.pdf 7 | *.physical*.pdf 8 | *.logical.ps 9 | *.physical.ps 10 | 11 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | 2 | %.png: %.dot 3 | dot -Tpng $< -o $@ 4 | 5 | %.ps: %.dot 6 | dot -Tps $< -o $@ 7 | 8 | %.pdf: %.ps 9 | ps2pdf $< 10 | pdfcrop $@ 11 | mv `basename $@ .pdf`-crop.pdf $@ 12 | -------------------------------------------------------------------------------- /examples/bad_column_name.myl: -------------------------------------------------------------------------------- 1 | T = empty(x:int); 2 | A = [from T emit SafeDiv(x, 3) AS SafeDiv]; 3 | store(A, OUTPUT); 4 | -------------------------------------------------------------------------------- /examples/cast.myl: -------------------------------------------------------------------------------- 1 | Emp = SCAN(public:adhoc:employee); 2 | Groups = [FROM Emp EMIT id + 3, string(salary)]; 3 | Store(Groups, OUTPUT); 4 | -------------------------------------------------------------------------------- /examples/catalog.py: -------------------------------------------------------------------------------- 1 | # Schemas corresponding to Myrial examples 2 | 3 | { 4 | 'public:adhoc:edges': [('src','LONG_TYPE'), ('dst', 'LONG_TYPE')], 5 | 'public:adhoc:vertices': [('id','LONG_TYPE')], 6 | 'public:adhoc:points': [('id','LONG_TYPE'), ('x','DOUBLE_TYPE'), ('y', 'DOUBLE_TYPE')], 7 | 'public:adhoc:sc_points': [('id', 'LONG_TYPE'), ('v', 'DOUBLE_TYPE')], 8 | 'public:adhoc:employee' : [('id', 'LONG_TYPE'), ('dept_id', 'LONG_TYPE'), ('name', 'STRING_TYPE'), 9 | ('salary','LONG_TYPE')], 10 | 'public:adhoc:departments' : [('id', 'LONG_TYPE'), ('name','STRING_TYPE')], 11 | 'armbrustlab:seaflow:all_data' : [('Cruise', 'LONG_TYPE'), 12 | ('Day', 'LONG_TYPE'), 13 | ('File_Id', 'LONG_TYPE'), 14 | ('chl_small', 'DOUBLE_TYPE'), 15 | ('pe', 'DOUBLE_TYPE')], 16 | 'public:adhoc:nodes_jstor' : [('paper_id', 'LONG_TYPE'), ('year','LONG_TYPE')], 17 | 'public:adhoc:links_jstor' : [('p1', 'LONG_TYPE'), ('p2','LONG_TYPE')], 18 | 'dhalperi:lineage:top_papers_jstor' : [('paper_id', 'LONG_TYPE')], 19 | 'public:adhoc:sp2bench' : [('subject', 'STRING_TYPE'), ('predicate','STRING_TYPE'), ('object','STRING_TYPE')], 20 | } 21 | -------------------------------------------------------------------------------- /examples/clog.py: -------------------------------------------------------------------------------- 1 | from raco.cpp_datalog_utils import emitCode 2 | from raco.backends.cpp import CCAlgebra 3 | import sys 4 | 5 | import logging 6 | logging.basicConfig(level=logging.DEBUG) 7 | LOG = logging.getLogger(__name__) 8 | 9 | if __name__ == "__main__": 10 | query = sys.argv[1] 11 | print query 12 | name = sys.argv[2] 13 | print name 14 | 15 | plan = "" 16 | if len(sys.argv) > 3: 17 | plan = sys.argv[3] 18 | 19 | lst = [] 20 | alg = CCAlgebra 21 | if plan: lst.append(plan) 22 | if name: lst.append(name) 23 | emitCode(query, "_".join(lst), alg, plan) 24 | 25 | -------------------------------------------------------------------------------- /examples/clog.sh: -------------------------------------------------------------------------------- 1 | query=$1 2 | name=$2 3 | 4 | cdir=`cd ..; pwd` 5 | cappsrcdir=$cdir/c_test_environment 6 | cbuilddir=$cdir/c_test_environment 7 | cappbuilddir=$gbuilddir/applications/join 8 | 9 | pushd $cbuilddir 10 | if [ ! -f R1 ]; then 11 | echo "GENERATING TEST DATA (first time)" 12 | python generate_test_relations.py 13 | fi 14 | popd 15 | 16 | 17 | echo "GENERATING QUERY CODE" 18 | PYTHONPATH=.. python clog.py "$query" $name 2> log.rb 19 | mv $name.cpp $cappsrcdir 20 | 21 | echo "COMPILING QUERY CODE" 22 | cd $cbuilddir; make $name.exe; echo "RUNNING QUERY CODE"; ./$name.exe 23 | 24 | -------------------------------------------------------------------------------- /examples/connected_components.myl: -------------------------------------------------------------------------------- 1 | E = scan(TwitterK); -- edges 2 | V = select distinct E.$0 from E; -- vertices 3 | CC = [from V emit V.$0 as node_id, V.$0 as component_id]; -- initial node IDs and component IDs 4 | do 5 | new_CC = [from E, CC where E.$0 = CC.$0 emit E.$1, CC.$1] + CC; -- join CC with the graph to propagate component IDs 6 | new_CC = [from new_CC emit new_CC.$0, MIN(new_CC.$1)]; -- for each vertex, only keep the minimum component ID 7 | delta = diff(CC, new_CC); 8 | CC = new_CC; 9 | while [from delta emit count(*) > 0]; -- while we have update 10 | store(CC, CC); 11 | 12 | -------------------------------------------------------------------------------- /examples/deadcode.myl: -------------------------------------------------------------------------------- 1 | -- Begin dead code block 2 | X = [3.14159 AS y, 3 AS id, 4 AS x]; 3 | Y = SCAN(public:adhoc:points); 4 | Z = SCAN(public:adhoc:points); 5 | 6 | X = [FROM X,Y WHERE X.y == Y.y EMIT X.id, Y.x, X.y]; 7 | X = DISTINCT(X); 8 | X = UNIONALL(X, Y); 9 | -- End dead code block 10 | 11 | X = SCAN(public:adhoc:points); 12 | Q = UNIONALL(X, Z); 13 | STORE(Q, OUTPUT); 14 | -------------------------------------------------------------------------------- /examples/deadcode2.myl: -------------------------------------------------------------------------------- 1 | x = [0 as val, 1 as exp]; 2 | do 3 | x = [from x emit val+1 as val, 2*exp as exp]; 4 | while [from x emit val < 5]; 5 | -- with no store, this should be the empty program 6 | -------------------------------------------------------------------------------- /examples/dept.csv: -------------------------------------------------------------------------------- 1 | 1, "accounting", 5 2 | 2, "human resources", 2 3 | 3, "engineering", 2 4 | 4, "sales", 7 -------------------------------------------------------------------------------- /examples/emp.csv: -------------------------------------------------------------------------------- 1 | 1, 2, "Bill Howe", 25000 2 | 2, 1, "Dan Halperin", 90000 3 | 3, 1, "Andrew Whitaker", 5000 4 | 4, 2, "Shumo Chu", 5000 5 | 5, 1, "Victor Almeida", 25000 6 | 6, 3, "Dan Suciu", 90000 7 | 7, 1, "Magdalena Balazinska", 25000 -------------------------------------------------------------------------------- /examples/grappa_test_query.py: -------------------------------------------------------------------------------- 1 | from raco import RACompiler 2 | 3 | import logging 4 | logging.basicConfig(level=logging.DEBUG) 5 | LOG = logging.getLogger(__name__) 6 | 7 | def comment(s): 8 | return "/*\n%s\n*/\n" % str(s) 9 | 10 | def testEmit(query, name): 11 | LOG.info("compiling %s: %s", name, query) 12 | 13 | # Create a compiler object 14 | dlog = RACompiler() 15 | 16 | # parse the query 17 | dlog.fromDatalog(query) 18 | #print dlog.parsed 19 | LOG.info("logical: %s",dlog.logicalplan) 20 | 21 | dlog.optimize(target=GrappaAlgebra) 22 | 23 | LOG.info("physical: %s",dlog.physicalplan[0][1]) 24 | 25 | # generate code in the target language 26 | code = "" 27 | code += comment("Query " + query) 28 | code += dlog.compile() 29 | 30 | with open(name+'.cpp', 'w') as f: 31 | f.write(code) 32 | 33 | 34 | queries = [ 35 | ("A(s1) :- T1(s1)", "scan"), 36 | ("A(s1) :- T1(s1), s1>10", "select"), 37 | ("A(s1) :- T1(s1), s1>0, s1<10", "select_conjunction"), 38 | ("A(s1,s2) :- T2(s1,s2), s1>10, s2>10", "two_var_select"), 39 | ("A(s1,o2) :- T3(s1,p1,o1), R3(o2,p1,o2)", "join"), 40 | ("A(a,b,c) :- R2(a,b), S2(b,c)", "two_path"), 41 | ("A(a,c) :- R2(a,b), S2(b,c)", "two_hop"), 42 | ("A(a,b,c) :- R2(a,b), S2(b,c), T2(c,d)", "three_path"), 43 | ("A(a,b,c) :- R2(a,b), S2(b,c), T2(c,a)", "directed_triangles"), 44 | ("A(a,b,c,d) :- R2(a,b), S2(b,c), T2(c,d), Z2(d,a)", "directed_squares"), 45 | ("A(s1,s2,s3) :- T3(s1,s2,s3), R2(s3,s4), s1 3: 17 | plan = sys.argv[3] 18 | 19 | lst = [] 20 | alg = GrappaAlgebra 21 | prefix = "grappa" 22 | lst.append(prefix) 23 | if plan: lst.append(plan) 24 | if name: lst.append(name) 25 | emitCode(query, "_".join(lst), alg, plan) 26 | 27 | -------------------------------------------------------------------------------- /examples/grappalog.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | query=$1 3 | name=$2 4 | plan=$3 5 | 6 | 7 | cappbuilddir=`cd ../c_test_environment; pwd` 8 | gdir=$GRAPPA_HOME 9 | gappsrcdir=$gdir/applications/join 10 | gbuilddir=$gdir/build/Make+Release 11 | gappbuilddir=$gbuilddir/applications/join 12 | 13 | pushd $gappbuilddir 14 | if [ ! -f R1 ]; then 15 | echo "GENERATING TEST DATA (first time)" 16 | python $cappbuilddir/generate_test_relations.py 17 | fi 18 | popd 19 | 20 | echo "GENERATING QUERY CODE" 21 | PYTHONPATH=.. python grappalog.py "$query" $name $plan 2> log.rb 22 | # get file name assuming it is most recent cpp file 23 | fullname=`ls -lt *cpp | head -n1 |awk '{gsub(/ +/, " ");print}' | cut -d' ' -f9 | cut -d'.' -f1` 24 | cp $fullname.cpp $gappsrcdir 25 | 26 | echo "COMPILING QUERY CODE" 27 | #TODO: make this not so new target dependent. Easy way is have a set of default targets that can be recycled 28 | cd $gdir; ./configure --gen=Make --mode=Release --cc=/sampa/share/distcc/gcc-4.7.2/bin/gcc --third-party=/sampa/share/grappa-third-party 29 | cd $gbuilddir; bin/distcc_make -j 24; cd $gappbuilddir; ../../bin/distcc_make -j24 $fullname.exe; echo "RUNNING QUERY CODE"; ../../bin/grappa_srun --ppn=4 --nnode=4 -f -- $fullname.exe 30 | #cd $gappbuilddir; ../../bin/distcc_make $fullname.exe; echo "RUNNING QUERY CODE"; ../../bin/grappa_srun --ppn=4 --nnode=4 -f -- $fullname.exe 31 | 32 | -------------------------------------------------------------------------------- /examples/groupby1.myl: -------------------------------------------------------------------------------- 1 | 2 | Emp = SCAN(public:adhoc:employee); 3 | Groups = [FROM Emp EMIT COUNT(salary), Emp.id]; 4 | Store(Groups, OUTPUT, [$1]); 5 | -------------------------------------------------------------------------------- /examples/groupby2.myl: -------------------------------------------------------------------------------- 1 | 2 | Emp = SCAN(public:adhoc:employee); 3 | Groups = [FROM Emp EMIT COUNT(*)]; 4 | Store(Groups, OUTPUT); 5 | -------------------------------------------------------------------------------- /examples/groupby3.myl: -------------------------------------------------------------------------------- 1 | 2 | Emp = SCAN(public:adhoc:employee); 3 | Groups = [FROM Emp EMIT id, AVG(salary), id]; 4 | Store(Groups, OUTPUT); 5 | -------------------------------------------------------------------------------- /examples/helloworld.py: -------------------------------------------------------------------------------- 1 | from raco.compile import compile, optimize 2 | from raco.expression.boolean import EQ, AND, OR 3 | from raco.expression import NamedAttributeRef, StringLiteral, NumericLiteral 4 | import raco.scheme 5 | import raco.catalog 6 | 7 | # declare the schema for each relation 8 | sch = raco.scheme.Scheme([("subject", int), ("predicate", int), ("object", int)]) 9 | 10 | # Create a relation object. We can add formats here as needed. 11 | trialdat = raco.catalog.ASCIIFile("trial.dat", sch) 12 | print sch 13 | 14 | # Now write the RA expression 15 | 16 | # Scan just takes a pointer to a relation object 17 | R = Scan(trialdat, sch) #TODO: is this supposed to pass sch? 18 | print R.scheme() 19 | 20 | 21 | # Select 22 | # EQ(x,y) means x=y, GT(x,y) means x>y, etc. 23 | sR = Select(EQ(NamedAttributeRef("predicate"), NumericLiteral(1133564893)), R) 24 | sS = Select(EQ(NamedAttributeRef("predicate"), NumericLiteral(77645021)), R) 25 | #sT = Select(EQ(NamedAttributeRef("predicate"), NumericLiteral(77645021)), R) 26 | sT = Select(EQ(NamedAttributeRef("object"), NumericLiteral(1018848684)), R) 27 | 28 | # Join([(w,x),(y,z)], R, S) means "JOIN R, S ON (R.w = S.x AND R.y = S.z)" 29 | sRsS = Join([("object","subject")], sR, sS) 30 | sRsSsT = Join([("object","subject")], sRsS, sT) 31 | 32 | # optimize applies a set of rules to translate a source 33 | # expression to a target expression 34 | result = optimize([("Ans", sT)], CCAlgebra) 35 | 36 | # compile generates the linear code from the expression tree 37 | print compile(result) 38 | -------------------------------------------------------------------------------- /examples/iteration.myl: -------------------------------------------------------------------------------- 1 | -- Invariant: val = 2^exp 2 | x = [1 as val, 0 as exp]; 3 | do 4 | x = [from x emit val*2 as val, exp+1 as exp]; 5 | while [from x emit exp < 5]; 6 | store(x, powersOfTwo); 7 | -------------------------------------------------------------------------------- /examples/join.myl: -------------------------------------------------------------------------------- 1 | out = [FROM SCAN(public:adhoc:departments) AS D, SCAN(public:adhoc:employee) AS E 2 | WHERE E.dept_id == D.id AND E.salary > 5000 3 | EMIT E.name AS emp_name, D.name AS dept_name]; 4 | STORE(out, OUTPUT); 5 | -------------------------------------------------------------------------------- /examples/join.sql: -------------------------------------------------------------------------------- 1 | emp = scan(public:adhoc:employee); 2 | dept = scan(public:adhoc:departments); 3 | out = select emp.name as emp_name, dept.name as dept_name 4 | from dept, emp 5 | where emp.dept_id == dept.id AND emp.salary > 5000; 6 | store(out, OUTPUT); 7 | -------------------------------------------------------------------------------- /examples/kmeans.myl: -------------------------------------------------------------------------------- 1 | 2 | DEF EuclideanDistance(x0, y0, x1, y1): 3 | sqrt(pow(x0 - x1, 2) + pow(y0 - y1, 2)); 4 | 5 | -- Load some points; assume each point has a unique ID 6 | Point = SCAN(public:adhoc:points); 7 | 8 | -- Create some initial cluster centers from the first K points 9 | -- TODO: We should choose these at random somehow... 10 | -- TODO: The cluster count should be expressable as a constant 11 | Centroid = [FROM LIMIT(Point, 3) AS K EMIT id AS cluster_id, x AS x,y AS y]; 12 | 13 | 14 | -- Assign each point to the first cluster 15 | FirstCluster = LIMIT(Centroid, 1); 16 | Kmeans = [FROM Point EMIT Point.id AS id, 17 | *FirstCluster.cluster_id AS cluster_id]; 18 | 19 | DO 20 | -- Calculate distance from each point to each centroid 21 | Distance = [FROM Point, Centroid 22 | EMIT Point.id AS id, 23 | Centroid.cluster_id AS cluster_id, 24 | EuclideanDistance(Point.x, Centroid.x, Point.y, Centroid.y) AS distance]; 25 | 26 | -- Choose closest cluster for each point 27 | Closest = [FROM Distance EMIT id, MIN(distance) AS distance]; 28 | NewKmeans = [FROM Closest, Distance 29 | WHERE Closest.id == Distance.id AND 30 | ABS(Closest.distance - Distance.distance) < .000001 31 | EMIT Closest.id AS id, MIN(Distance.cluster_id) AS cluster_id]; 32 | 33 | -- Compute delta from the previous iteration 34 | Delta = DIFF(NewKmeans, Kmeans); 35 | Continue = [FROM Delta EMIT COUNT(id) > 0]; 36 | 37 | Kmeans = NewKmeans; 38 | 39 | -- Update centroids 40 | PointsInCentroid = [FROM Centroid, Kmeans, Point 41 | WHERE Centroid.cluster_id == Kmeans.cluster_id AND 42 | Point.id == Kmeans.id 43 | EMIT Centroid.cluster_id AS cluster_id, Point.x AS x, 44 | Point.y AS y]; 45 | 46 | Centroid = [FROM PointsInCentroid EMIT cluster_id, avg(x) AS x, avg(y) AS y]; 47 | 48 | WHILE Continue; 49 | 50 | STORE(Kmeans, OUTPUT); 51 | -------------------------------------------------------------------------------- /examples/language_demo.myl: -------------------------------------------------------------------------------- 1 | T1 = scan(TwitterK); 2 | T2 = [from T1 emit $0 == "foo bar" as x]; 3 | 4 | -- wrong: 5 | T2 = [from T1 emit $0 == 'foo bar' as x]; 6 | 7 | def triangleArea(a,b): (a*b)/2; 8 | R = [from Foo emit triangleArea(x,y) as area]; 9 | 10 | apply RunningMean(value) { 11 | [0 as c, 0 as s]; 12 | [c + 1 as c, s + value as s]; 13 | s / c; 14 | }; 15 | 16 | -- number of allowed standard deviations 17 | N = [2]; 18 | 19 | --this is a comment 20 | 21 | newBad = empty(id:int, v:float); 22 | 23 | bc = [from emp emit emp.*]; 24 | 25 | out = [from emp where $0 * 2 == $1 emit *]; 26 | out = [from emp where $0 // $1 <> $1 emit *]; 27 | 28 | -- Unicode math operators ≤, ≥, ≠ 29 | out = [from emp where $0 ≤ $1 and $0 ≠ $1 and $1 ≥ $0 emit *]; 30 | 31 | do 32 | mean = [from Good emit avg(v) as val]; 33 | -- foo bar 34 | NewBad = [from Good where abs(Good.v - *mean) > *N * *std emit *]; 35 | continue = diff(Good, NewBad); 36 | while continue; 37 | 38 | store(Good, OUTPUT); 39 | 40 | -- comment 41 | T3 = [from T1 emit sin(a)/4 + b as x]; 42 | store(T2, JustX); -------------------------------------------------------------------------------- /examples/load_options.csv: -------------------------------------------------------------------------------- 1 | this file uses the pipe character ("|") as field delimiter and the tilde ("~") as the quote character 2 | the percent character ("%") is used to escape the field delimiter 3 | 1|foo|~abc|def~|1.0 4 | 2|bar|ghi%|jkl|2.0 5 | -------------------------------------------------------------------------------- /examples/load_opts.myl: -------------------------------------------------------------------------------- 1 | t = load("https://s3-us-west-2.amazonaws.com/myria/public-adhoc-TwitterK.csv", csv(schema(column0:int, column1:int), skip=1)); 2 | store(t, TwitterK2); 3 | -------------------------------------------------------------------------------- /examples/naivebayes/.gitignore: -------------------------------------------------------------------------------- 1 | msd_catalog*py 2 | -------------------------------------------------------------------------------- /examples/naivebayes/buckets.myl: -------------------------------------------------------------------------------- 1 | input = SCAN(testdata); 2 | 3 | discrete = select id, 4 | x0/10 as x0, 5 | x1/10 as x1, 6 | x2/10 as x2, 7 | x3/10 as x3, 8 | x4/10 as x4 9 | from input; 10 | 11 | store(discrete, OUTPUT); 12 | -------------------------------------------------------------------------------- /examples/naivebayes/cat_scheme: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from raco.catalog import FromFileCatalog 4 | import sys 5 | 6 | c1 = FromFileCatalog.load_from_file(sys.argv[1]) 7 | c2 = FromFileCatalog.load_from_file(sys.argv[2]) 8 | FromFileCatalog.print_cat(c1, c2) 9 | -------------------------------------------------------------------------------- /examples/naivebayes/catalog.py: -------------------------------------------------------------------------------- 1 | {'public:adhoc:conditionals': [('index', 'LONG_TYPE'), ('lp', 'DOUBLE_TYPE'), ('outcome', 'LONG_TYPE'), ('value', 'LONG_TYPE')], 2 | 'public:adhoc:testdata': [('id', 'LONG_TYPE'), ('x0', 'DOUBLE_TYPE'), 3 | ('x1', 'DOUBLE_TYPE'), 4 | ('x2', 'DOUBLE_TYPE'), 5 | ('x3', 'DOUBLE_TYPE'), 6 | ('x4', 'DOUBLE_TYPE')], 7 | 'public:adhoc:trainingdata': [('id', 'LONG_TYPE'), ('x0', 'DOUBLE_TYPE'), 8 | ('x1', 'DOUBLE_TYPE'), 9 | ('x2', 'DOUBLE_TYPE'), 10 | ('x3', 'DOUBLE_TYPE'), 11 | ('x4', 'DOUBLE_TYPE'), 12 | ('y', 'LONG_TYPE')] 13 | } 14 | -------------------------------------------------------------------------------- /examples/naivebayes/create_scheme.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import argparse 4 | 5 | AVG_COLS = 12 6 | COV_COLS = 78 7 | 8 | if __name__ == "__main__": 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("-a", dest='avg_cols', type=int, required=True, help='number of timbre average columns') 11 | parser.add_argument("-c", dest='cov_cols', type=int, required=True, help='number of timbre covariance columns') 12 | parser.add_argument("--no-id", dest='id', action='store_false', default=True, help='include an id [default=true]') 13 | parser.add_argument("--no-y", dest='y', action='store_false', default=True, help='include an id [default=true]') 14 | parser.add_argument("--input", dest='inputtype', help="test or train", required=True) 15 | 16 | opt = parser.parse_args(sys.argv[1:]) 17 | 18 | assert opt.avg_cols <= AVG_COLS 19 | assert opt.cov_cols <= COV_COLS 20 | 21 | sch = [] 22 | if opt.id: 23 | sch.append(('id', 'LONG_TYPE',)) 24 | 25 | if opt.y: 26 | sch.append(('y', 'LONG_TYPE',)) 27 | 28 | for i in range(opt.avg_cols): 29 | sch.append(('x{0}'.format(i), 'DOUBLE_TYPE',)) 30 | 31 | for i in range(opt.cov_cols): 32 | sch.append(('x{0}'.format(i+opt.avg_cols), 'DOUBLE_TYPE')) 33 | 34 | cat = {} 35 | 36 | if opt.inputtype == 'train': 37 | cat['public:adhoc:trainingdata'] = sch 38 | else: 39 | cat['public:adhoc:testdata'] = sch 40 | 41 | print cat 42 | -------------------------------------------------------------------------------- /examples/naivebayes/generate_parse.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | nfeat = int(sys.argv[1]) 4 | 5 | y = int(sys.argv[2]) 6 | 7 | if y==1: 8 | parse_template = "input_sp{i} = select INT(input.x{i}/{bwidth}) as value, {i} as index, y from input;" 9 | else: 10 | parse_template = "input_sp{i} = select id, INT(input.x{i}/{bwidth}) as value, {i} as index from input;" 11 | 12 | if y==1: 13 | print "input = SCAN(trainingdata);" 14 | else: 15 | print "input = SCAN(testdata);" 16 | 17 | bwidth = 10 18 | for i in range(nfeat): 19 | print parse_template.format(i=i, bwidth=bwidth) 20 | 21 | inputs = [] 22 | for i in range(nfeat): 23 | inputs.append("input_sp%d" % i) 24 | print "input_sp = UNIONALL(%s);" % ', '.join(inputs) 25 | -------------------------------------------------------------------------------- /examples/naivebayes/naivebayes_classify.myl: -------------------------------------------------------------------------------- 1 | -- arbitrarily favor new value in case of tie 2 | def pickval(value, arg, _value, _arg): 3 | case when value >= _value then arg 4 | else _arg end; 5 | 6 | uda ArgMax(outcome, lprob) { 7 | -- init 8 | [0 as _outcome, 0 as _lprob]; 9 | 10 | -- update 11 | [pickval(lprob, outcome, _lprob, _outcome), 12 | pickval(lprob, lprob, _lprob, _lprob)]; 13 | 14 | -- output 15 | [_lprob, _outcome]; 16 | }; 17 | 18 | 19 | CondP = SCAN(conditionals); 20 | 21 | -- calculate probability of outcomes 22 | Poe = select input_sp.id as inputId, 23 | sum(CondP.lp) as lprob, 24 | CondP.outcome as outcome 25 | from CondP, input_sp 26 | where 27 | CondP.index=input_sp.index 28 | and CondP.value=input_sp.value; 29 | -- double join! 30 | --group by CondP.outcome, input_sp.id; 31 | 32 | -- select the max probability outcome 33 | classes = select inputId, ArgMax(outcome, lprob) from Poe; 34 | 35 | store(classes, classified); 36 | -------------------------------------------------------------------------------- /examples/naivebayes/naivebayes_train.myl: -------------------------------------------------------------------------------- 1 | freq_o = select y as outcome, count(y) as freq from input_sp; 2 | 3 | freq_e_o = select y as outcome, index, value, count(y) as freq from input_sp; 4 | 5 | condp = select freq_e_o.index, 6 | -LOG(float(freq_e_o.freq) / freq_o.freq) as lp, 7 | freq_e_o.outcome as outcome, 8 | freq_e_o.value as value 9 | from freq_o, freq_e_o 10 | where freq_e_o.outcome = freq_o.outcome; 11 | 12 | STORE(condp, conditionals); 13 | -------------------------------------------------------------------------------- /examples/naivebayes/nb_classify.myl: -------------------------------------------------------------------------------- 1 | -- arbitrarily favor new value in case of tie 2 | def pickval(value, arg, _value, _arg): 3 | case when value >= _value then arg 4 | else _arg end; 5 | 6 | uda ArgMax(outcome, lprob) { 7 | -- init 8 | [0 as _outcome, 0 as _lprob]; 9 | 10 | -- update 11 | [pickval(lprob, outcome, _lprob, _outcome), 12 | pickval(lprob, lprob, _lprob, _lprob)]; 13 | 14 | -- output 15 | [_lprob, _outcome]; 16 | }; 17 | 18 | 19 | input = SCAN(testdata); 20 | CondP = SCAN(conditionals); 21 | 22 | -- an alternation operation like an unpivot 23 | -- may be unnecessary if input already comes in a sparse format 24 | input_sp0 = select input.id as id, input.x0 as value, 0 as index from input; 25 | input_sp1 = select input.id as id, input.x1 as value, 1 as index from input; 26 | input_sp2 = select input.id as id, input.x2 as value, 2 as index from input; 27 | input_sp3 = select input.id as id, input.x3 as value, 3 as index from input; 28 | input_sp4 = select input.id as id, input.x4 as value, 4 as index from input; 29 | input_sp = UNIONALL(input_sp0, input_sp1, input_sp2, input_sp3, input_sp4); 30 | 31 | -- calculate probability of outcomes 32 | Poe = select input_sp.id as inputId, 33 | sum(CondP.lp) as lprob, 34 | CondP.outcome as outcome 35 | from CondP, input_sp 36 | where 37 | CondP.index=input_sp.index 38 | and CondP.value=input_sp.value; 39 | --group by CondP.outcome, input_sp.id; 40 | 41 | -- select the max probability outcome 42 | classes = select inputId, ArgMax(outcome, lprob) from Poe; 43 | 44 | store(classes, OUTPUT); 45 | -------------------------------------------------------------------------------- /examples/naivebayes/nb_train.myl: -------------------------------------------------------------------------------- 1 | input = SCAN(trainingdata); 2 | 3 | -- an alternation operation like an unpivot 4 | -- may be unnecessary if input already comes in a sparse format 5 | --input_sp0 = select input.id as id, input.x0 as value, 0 as index, y from input; 6 | --input_sp1 = select input.id as id, input.x1 as value, 1 as index, y from input; 7 | --input_sp2 = select input.id as id, input.x2 as value, 2 as index, y from input; 8 | --input_sp3 = select input.id as id, input.x3 as value, 3 as index, y from input; 9 | --input_sp4 = select input.id as id, input.x4 as value, 4 as index, y from input; 10 | input_sp0 = select input.x0 as value, 0 as index, y from input; 11 | input_sp1 = select input.x1 as value, 1 as index, y from input; 12 | input_sp2 = select input.x2 as value, 2 as index, y from input; 13 | input_sp3 = select input.x3 as value, 3 as index, y from input; 14 | input_sp4 = select input.x4 as value, 4 as index, y from input; 15 | input_sp01 = UNIONALL(input_sp0, input_sp1); 16 | input_sp02 = UNIONALL(input_sp01, input_sp2); 17 | input_sp03 = UNIONALL(input_sp02, input_sp3); 18 | input_sp = UNIONALL(input_sp03, input_sp4); 19 | 20 | freq_o = select y as outcome, count(y) as freq from input_sp; 21 | 22 | freq_e_o = select y as outcome, index, value, count(y) as freq from input_sp; 23 | 24 | condp = select freq_e_o.index, 25 | -LOG(float(freq_e_o.freq) / freq_o.freq) as lp, 26 | freq_e_o.outcome as outcome, 27 | freq_e_o.value as value 28 | from freq_o, freq_e_o 29 | where freq_e_o.outcome = freq_o.outcome; 30 | 31 | STORE(condp, OUTPUT); 32 | -------------------------------------------------------------------------------- /examples/naivebayes/prepare_test.sh: -------------------------------------------------------------------------------- 1 | set -o errexit 2 | 3 | # created by msd_train.myl: 4 | # conditionals (scheme) 5 | # conditionals.bin (data) 6 | 7 | add_id=1 8 | catalog=msd_catalog_test.py 9 | catalog_wid=msd_catalog_test_wid.py 10 | catalog_all=msd_catalog_test_all.py 11 | navg=4 12 | ncov=4 13 | rel=testdata 14 | input='test' 15 | dataset=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_test.txt 16 | #dataset=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_test_small.txt 17 | binfile=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_test_8attr.txt 18 | #binfile=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_test_small_6attr.txt 19 | queryin=naivebayes_classify.myl 20 | query=msd_classify.myl 21 | convert_home=../../c_test_environment 22 | 23 | # without id 24 | python create_scheme.py -a $navg -c $ncov --input=$input --no-id --no-y > $catalog 25 | pushd $convert_home 26 | python convert2bin.py -n $rel -c ../examples/naivebayes/$catalog 27 | ./$rel.convert $dataset 0 $add_id 28 | popd 29 | mv $dataset.bin $binfile.bin 30 | 31 | # add in id now that we added it 32 | python create_scheme.py -a $navg -c $ncov --input=$input --no-y > $catalog_wid 33 | ./cat_scheme $GRAPPA_HOME/build/Make+Release/applications/join/conditionals $catalog_wid > $catalog_all 34 | python generate_parse.py $(($navg + $ncov)) 0 > tmp.myl 35 | cat tmp.myl $queryin > $query 36 | ../../scripts/myrial --emit=console -c --catalog=$catalog_all $query 37 | 38 | 39 | codef=`basename $query .myl`.cpp 40 | exef=grappa_`basename $query .myl`.exe 41 | scp $codef pal:~/grappa-nb/applications/join/grappa_$codef 42 | cp $codef $GRAPPA_HOME/applications/join/grappa_$codef 43 | pushd $GRAPPA_HOME/build/Make+Release/applications/join 44 | make -j $exef 45 | popd 46 | 47 | echo "--input_file_conditionals=$GRAPPA_HOME/build/Make+Release/applications/join/conditionals --output_file=$GRAPPA_HOME/build/Make+Release/applications/join/classified --input_file_testdata=$binfile --relations=/" 48 | -------------------------------------------------------------------------------- /examples/naivebayes/prepare_training.sh: -------------------------------------------------------------------------------- 1 | set -o errexit 2 | 3 | add_id=1 4 | catalog=msd_catalog_train.py 5 | catalog_wid=msd_catalog_train_wid.py 6 | #NOTE: convert does not actually pick this order; it picks the first navg+ncov features 7 | navg=4 8 | ncov=4 9 | rel=trainingdata 10 | input='train' 11 | dataset=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_train.txt 12 | #dataset=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_train_small.txt 13 | binfile=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_train_8attr.txt 14 | #binfile=/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_train_small_6attr.txt 15 | queryin=naivebayes_train.myl 16 | query=msd_train.myl 17 | convert_home=../../c_test_environment 18 | 19 | # without id 20 | python create_scheme.py -a $navg -c $ncov --input=$input --no-id > $catalog 21 | pushd $convert_home 22 | python convert2bin.py -n $rel -c ../examples/naivebayes/$catalog 23 | ./$rel.convert $dataset 0 $add_id 24 | popd 25 | mv $dataset.bin $binfile.bin 26 | 27 | # add in id now that we added it 28 | python create_scheme.py -a $navg -c $ncov --input=$input > $catalog_wid 29 | python generate_parse.py $(($navg + $ncov )) 1 > tmp.myl 30 | cat tmp.myl $queryin > $query 31 | ../../scripts/myrial -c --emit=file --catalog=$catalog_wid $query 32 | 33 | 34 | codef=`basename $query .myl`.cpp 35 | exef=grappa_`basename $query .myl`.exe 36 | cp $codef $GRAPPA_HOME/applications/join/grappa_$codef 37 | pushd $GRAPPA_HOME/build/Make+Release/applications/join 38 | make -j $exef 39 | popd 40 | 41 | echo "--input_file_trainingdata=$binfile --output_file=$GRAPPA_HOME/build/Make+Release/applications/join/conditionals --relations=/" 42 | -------------------------------------------------------------------------------- /examples/noschema.myl: -------------------------------------------------------------------------------- 1 | -- Scan of a table that has no schema in the catalog 2 | T1 = SCAN(foo:bar:baz); 3 | T2 = [FROM T1 EMIT x=$3]; 4 | T3 = [FROM T2 EMIT y=MIN(x)]; 5 | STORE (T3, bang:baz:bar); 6 | -------------------------------------------------------------------------------- /examples/openmp_examples/A.h: -------------------------------------------------------------------------------- 1 | #ifndef ____A__ 2 | #define ____A__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | //#include 10 | 11 | using namespace std; 12 | 13 | void query(const char* fname,int num_threads); 14 | 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /examples/openmp_examples/Makefile: -------------------------------------------------------------------------------- 1 | 2 | triangle_parallel: triangle_with_tuples.cpp 3 | gcc -O3 -lrt -fopenmp triangle_with_tuples.cpp -lstdc++ -o triangle_parallel 4 | 5 | clean: 6 | rm -f triangle_parallel 7 | -------------------------------------------------------------------------------- /examples/openmp_examples/igor_omp_tri.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'igor' 3 | 4 | $datasets="/sampa/home/bdmyers/graph_datasets" 5 | 6 | Igor do 7 | 8 | database 'join.omp.db', :triangles 9 | 10 | command 'srun -p grappa ./triangles_parallel %{fin} %{ppn}' 11 | 12 | sbatch_flags << "--time=60" 13 | 14 | params { 15 | nnode 1 16 | ppn 2 17 | fin "" 18 | tag 'none' 19 | } 20 | 21 | run { 22 | fin "#{$datasets}/berkstan/web-BerkStan.txt" 23 | } 24 | 25 | 26 | expect :triangles_runtime 27 | 28 | #$filtered = results{|t| t.select(:id, :nnode, :ppn, :tree, :run_at, :search_runtime) } 29 | 30 | interact # enter interactive mode 31 | end 32 | -------------------------------------------------------------------------------- /examples/pagerank.myl: -------------------------------------------------------------------------------- 1 | 2 | -- Simplified PageRank; assumes that all nodes have out degree > 0 3 | 4 | alpha = [.85]; 5 | epsilon = [.0001]; 6 | 7 | Edge = SCAN(public:adhoc:edges); 8 | Vertex = SCAN(public:adhoc:vertices); 9 | 10 | N = [FROM Vertex EMIT COUNT(id) AS val]; 11 | min_rank = [(1 - *alpha) / *N]; 12 | 13 | OutDegree = [FROM Edge EMIT Edge.src AS id, COUNT(Edge.dst) AS cnt]; 14 | PageRank = [FROM Vertex EMIT Vertex.id AS id, 1.0 / *N AS rank]; 15 | 16 | DO 17 | -- Calculate each node's outbound page rank contribution 18 | PrOut = [FROM PageRank, OutDegree WHERE PageRank.id == OutDegree.id 19 | EMIT PageRank.id AS id, PageRank.rank / OutDegree.cnt AS out_rank]; 20 | 21 | -- Compute the inbound summands for each node 22 | Summand = [FROM Vertex, Edge, PrOut 23 | WHERE Edge.dst == Vertex.id AND Edge.src == PrOut.id 24 | EMIT Vertex.id AS id, PrOut.out_rank AS summand]; 25 | 26 | -- Sum up the summands; adjust by alpha 27 | NewPageRank = [FROM Summand EMIT id AS id, 28 | *min_rank + *alpha * SUM(Summand.summand) AS rank]; 29 | Delta = [FROM NewPageRank, PageRank WHERE NewPageRank.id == PageRank.id 30 | EMIT ABS(NewPageRank.rank - PageRank.rank) AS val]; 31 | Continue = [FROM Delta EMIT MAX(Delta.val) > *epsilon]; 32 | PageRank = NewPageRank; 33 | WHILE Continue; 34 | 35 | STORE(PageRank, OUTPUT); 36 | -------------------------------------------------------------------------------- /examples/pagerank_dead.myl: -------------------------------------------------------------------------------- 1 | 2 | -- PageRank augmented with dead code; for use as an optimization test 3 | 4 | alpha = [.85]; 5 | epsilon = [.0001]; 6 | 7 | D0 = [3.14159 AS pi, 2.71828 AS e]; -- dead code 8 | 9 | Edge = SCAN(public:adhoc:edges); 10 | Vertex = SCAN(public:adhoc:vertices); 11 | 12 | N = [FROM Vertex EMIT COUNT(id) AS val]; 13 | min_rank = [(1 - *alpha) / *N]; 14 | 15 | OutDegree = [FROM Edge EMIT Edge.src AS id, COUNT(Edge.dst) AS cnt]; 16 | PageRank = [FROM Vertex EMIT Vertex.id AS id, 1.0 / *N AS rank]; 17 | 18 | DO 19 | D1 = [FROM Vertex EMIT COUNT(id) AS val]; -- dead code 20 | 21 | -- Calculate each node's outbound page rank contribution 22 | PrOut = [FROM PageRank, OutDegree WHERE PageRank.id == OutDegree.id 23 | EMIT PageRank.id AS id, PageRank.rank / OutDegree.cnt AS out_rank]; 24 | 25 | -- Compute the inbound summands for each node 26 | Summand = [FROM Vertex, Edge, PrOut 27 | WHERE Edge.dst == Vertex.id AND Edge.src == PrOut.id 28 | EMIT Vertex.id AS id, PrOut.out_rank AS summand]; 29 | 30 | -- Sum up the summands; adjust by alpha 31 | NewPageRank = [FROM Summand EMIT id AS id, 32 | *min_rank + *alpha * SUM(Summand.summand) AS rank]; 33 | Delta = [FROM NewPageRank, PageRank WHERE NewPageRank.id == PageRank.id 34 | EMIT ABS(NewPageRank.rank - PageRank.rank) AS val]; 35 | Continue = [FROM Delta EMIT MAX(Delta.val) > *epsilon]; 36 | Continue = [FROM Delta EMIT MAX(Delta.val) > *epsilon]; -- duplicate line 37 | PageRank = NewPageRank; 38 | WHILE Continue; 39 | 40 | STORE(PageRank, OUTPUT); 41 | -------------------------------------------------------------------------------- /examples/pairwise_distances.myl: -------------------------------------------------------------------------------- 1 | const partition: 0.5; 2 | const epsilon: 0.0000106; 3 | 4 | def mod(x, n): x - int(x/n)*n; 5 | def cell(v): int((v - mod(v, partition)) * (1/partition)); 6 | def is_ghost(xoffset, yoffset, zoffset): 7 | case when xoffset = 0 and 8 | yoffset = 0 and 9 | zoffset = 0 then 0 else 1 end; 10 | def is_replicated(x, y, z, xoffset, yoffset, zoffset): 11 | is_ghost(xoffset, yoffset, zoffset) = 0 or 12 | cell(x + epsilon*xoffset) != cell(x) or 13 | cell(y + epsilon*yoffset) != cell(y) or 14 | cell(z + epsilon*zoffset) != cell(z); 15 | def distance(x1, x2, y1, y2, z1, z2): sqrt((x1-x2)*(x1-x2) + 16 | (y1-y2)*(y1-y2) + 17 | (z1-z2)*(z1-z2)); 18 | 19 | points = load("https://s3-us-west-2.amazonaws.com/uwdb/sampleData/sampleCrossmatch/points.txt", 20 | csv(schema(id:int, 21 | x:float, 22 | y:float, 23 | z:float), skip=0)); 24 | permutations = load("https://s3-us-west-2.amazonaws.com/myria/permutations", 25 | csv(schema(xoffset:int, 26 | yoffset:int, 27 | zoffset:int), skip=0)); 28 | 29 | -- Partition into a grid with edges of size partition 30 | -- Replicate any point that falls within epsilon of a partition boundary 31 | 32 | partitions = [from points, permutations 33 | where is_replicated(x, y, z, xoffset, yoffset, zoffset) 34 | emit id, x, y, z, 35 | cell(x) + xoffset as px, 36 | cell(y) + yoffset as py, 37 | cell(z) + zoffset as pz, 38 | is_ghost(xoffset, yoffset, zoffset) as ghost]; 39 | 40 | ------------------------------------------- 41 | 42 | -- Cross product on partition + ghost cells; no shuffle required 43 | local = [from partitions left, 44 | partitions right 45 | where left.px = right.px and 46 | left.py = right.py and 47 | left.pz = right.pz 48 | emit *]; 49 | 50 | -- Calculate distances within each local pair and filter outliers 51 | distances = [from local 52 | where id < id1 and 53 | ghost = 0 and 54 | distance(x, x1, y, y1, z, z1) <= epsilon 55 | emit id as id1, 56 | id1 as id2, 57 | distance(x, x1, y, y1, z, z1) as distance]; 58 | 59 | store(distances, distances); -------------------------------------------------------------------------------- /examples/rdfsimple.myl: -------------------------------------------------------------------------------- 1 | R1 = scan(public:adhoc:sp2bench); 2 | R2 = scan(public:adhoc:sp2bench); 3 | r = [FROM R1, R2 4 | WHERE R1.subject = "" 5 | AND R1.object = R2.subject 6 | EMIT R1.subject, R2.object, 4+1 7 | ]; 8 | store(r, predicates); 9 | -------------------------------------------------------------------------------- /examples/reachable.myl: -------------------------------------------------------------------------------- 1 | Edge = SCAN(public:adhoc:edges); 2 | Source = [1 AS addr]; 3 | Reachable = Source; 4 | Delta = Source; 5 | 6 | DO 7 | NewlyReachable = DISTINCT([FROM Delta, Edge 8 | WHERE Delta.addr == Edge.src 9 | EMIT Edge.dst AS addr]); 10 | Delta = DIFF(NewlyReachable, Reachable); 11 | Reachable = UNIONALL(Delta, Reachable); 12 | WHILE [FROM COUNTALL(Delta) AS size EMIT *size > 0]; 13 | 14 | STORE(Reachable, OUTPUT); 15 | -------------------------------------------------------------------------------- /examples/samplescan.myl: -------------------------------------------------------------------------------- 1 | -- Sample from relation with-replacement 2 | T1 = samplescan(public:adhoc:employee, 1, WR); 3 | T2 = samplescan(public:adhoc:employee, 1.5%, WR); 4 | 5 | -- Sample from relation without-replacement 6 | T3 = samplescan(public:adhoc:employee, 1, WoR); 7 | T4 = samplescan(public:adhoc:employee, .5%, WoR); 8 | 9 | -- Uses With-Replacement sampling if no sample type specified 10 | T5 = samplescan(public:adhoc:employee, 1); 11 | T6 = samplescan(public:adhoc:employee, 1%); 12 | 13 | T = unionall(T1, T2, T3, T4, T5, T6); 14 | Store(T, samplescanquery); 15 | -------------------------------------------------------------------------------- /examples/seaflow.myl: -------------------------------------------------------------------------------- 1 | DEF transform(x): pow(10, x/pow(2,16)*3.5); 2 | AllData = SCAN(armbrustlab:seaflow:all_data); 3 | AllDataLinear = SELECT Cruise, Day, File_Id 4 | , transform(fsc_small) as fsc_small 5 | -- fsc_perp is measured differently, defer for later 6 | , transform(chl_small) as chl_small 7 | , transform(pe) as pe 8 | FROM AllData; 9 | STORE(AllDataLinear, armbrustlab:seaflow:all_data_linear); -------------------------------------------------------------------------------- /examples/seaflow2.myl: -------------------------------------------------------------------------------- 1 | DEF transform(x): pow(10, x/pow(2,16)*3.5); 2 | 3 | AllData = SCAN(armbrustlab:seaflow:all_data); 4 | AllDataLinear = SELECT Cruise, Day, File_Id 5 | , pow(10, fsc_small/pow(2,16)*3.5) as fsc_small 6 | -- fsc_perp is measured differently, defer for later 7 | , pow(10, chl_small/pow(2,16)*3.5) as chl_small 8 | , pow(10, pe/pow(2,16)*3.5) as pe 9 | FROM AllData; 10 | STORE(AllDataLinear, armbrustlab:seaflow:all_data_linear); 11 | -------------------------------------------------------------------------------- /examples/sigma-clipping-v0.myl: -------------------------------------------------------------------------------- 1 | -- Simple and slow implementation of sigma clipping; this query is not 2 | -- incremental, so it can re-scans points on every iteration. 3 | 4 | Good = scan(sc_points); 5 | 6 | -- number of allowed standard deviations 7 | const N: 2; 8 | 9 | do 10 | stats = [from Good emit avg(v) AS mean, stdev(v) as std]; 11 | NewBad = [from Good, stats where abs(v - mean) > N * std emit Good.*]; 12 | Good = diff(Good, NewBad); 13 | continue = [from NewBad emit count(NewBad.v) > 0]; 14 | while continue; 15 | 16 | store(Good, sc_points_clipped); 17 | -------------------------------------------------------------------------------- /examples/sigma-clipping.myl: -------------------------------------------------------------------------------- 1 | Points = SCAN(public:adhoc:sc_points); 2 | 3 | aggs = [from Points emit sum(v) as _sum, sum(v*v) as sumsq, count(v) as cnt]; 4 | newBad = empty(id:int, v:float); 5 | 6 | bounds = [from Points emit min(v) as lower, max(v) as upper]; 7 | 8 | -- number of allowed standard deviations 9 | const Nstd: 2; 10 | 11 | do 12 | -- Incrementally update aggs and stats 13 | new_aggs = [from newBad emit sum(v) as _sum, sum(v*v) as sumsq, 14 | count(v) as cnt]; 15 | aggs = [from aggs, new_aggs 16 | emit aggs._sum - new_aggs._sum as _sum, 17 | aggs.sumsq - new_aggs.sumsq as sumsq, 18 | aggs.cnt - new_aggs.cnt as cnt]; 19 | 20 | stats = [from aggs 21 | emit _sum/cnt as mean, 22 | SQRT(1.0/(cnt*(cnt-1)) * (cnt * sumsq - _sum * _sum)) as std]; 23 | 24 | -- Compute the new bounds 25 | newBounds = [from stats emit mean - Nstd * std as lower, 26 | mean + Nstd * std as upper]; 27 | 28 | newBad = [from Points, bounds, newBounds 29 | where (newBounds.upper < v 30 | and v <= bounds.upper) 31 | or (newBounds.lower > v 32 | and v >= bounds.lower) 33 | emit Points.*]; 34 | 35 | bounds = newBounds; 36 | continue = [from newBad emit count(v) > 0]; 37 | while continue; 38 | 39 | output = [from Points, bounds 40 | where Points.v > bounds.lower 41 | and Points.v < bounds.upper 42 | emit Points.*]; 43 | store(output, sc_points_clipped); 44 | -------------------------------------------------------------------------------- /examples/sigma_clipping_points.txt: -------------------------------------------------------------------------------- 1 | 25.0 2 | 27.2 3 | 23.4 4 | 25.1 5 | 26.3 6 | 24.9 7 | 23.5 8 | 22.7 9 | 108.2 10 | 26.2 11 | 25.3 12 | 24.7 13 | 25.01 14 | 26.1 15 | 22.8 16 | 2.2 17 | 24.8 18 | 25.05 19 | 25.15 20 | -------------------------------------------------------------------------------- /examples/sp2bench.py: -------------------------------------------------------------------------------- 1 | import test_query 2 | import sys 3 | 4 | if __name__ == "__main__": 5 | queryfile = sys.argv[1] 6 | tr = 'sp2bench_1m' 7 | with open(queryfile, 'r') as f: 8 | query = f.read() % locals() 9 | 10 | fname = test_query.testEmit(query, queryfile, test_query.CCAlgebra) 11 | 12 | 13 | -------------------------------------------------------------------------------- /examples/sp2bench/catalog.py: -------------------------------------------------------------------------------- 1 | # Schemas corresponding to Myrial examples 2 | 3 | { 4 | 'public:adhoc:sp2bench' : [('subject', 'STRING_TYPE'), ('predicate','STRING_TYPE'), ('object','STRING_TYPE')], 5 | } 6 | -------------------------------------------------------------------------------- /examples/sp2bench/q1.dlg: -------------------------------------------------------------------------------- 1 | A(yr) :- %(tr)s(journal, 'rdf:type', 'bench:Journal'), 2 | %(tr)s(journal, 'dc:title', 'Journal 1 (1940)'), 3 | %(tr)s(journal, 'dcterms:issued', yr) 4 | -------------------------------------------------------------------------------- /examples/sp2bench/q1.myl: -------------------------------------------------------------------------------- 1 | Triples = scan(public:adhoc:sp2bench); 2 | 3 | Q1 = SELECT 4 | T3.object AS yr 5 | FROM 6 | Triples T1 7 | , Triples T2 8 | , Triples T3 9 | WHERE 10 | T1.subject=T3.subject 11 | AND T1.subject=T2.subject 12 | AND T1.predicate="" 13 | AND T2.predicate="" 14 | AND T3.predicate="" 15 | AND T1.object="" 16 | AND T2.object='"Journal 1 (1940)"^^xsd:string'; 17 | 18 | store(Q1, Q1); 19 | -------------------------------------------------------------------------------- /examples/sp2bench/q2.myl: -------------------------------------------------------------------------------- 1 | R1 = scan(public:adhoc:sp2bench); 2 | 3 | SELECT 4 | T1.subject AS inproc, 5 | T2.object AS author, 6 | T3.val AS booktitle, 7 | T4.val AS title, 8 | T5.val AS proc, 9 | T6.val AS ee, 10 | T7.val AS page, 11 | T8.val AS URL, 12 | T9.val AS yr, 13 | AB.val AS abstract 14 | FROM 15 | Triples T1 16 | JOIN Triples T2 ON T1.subject=T2.subject 17 | JOIN Triples T3 ON T1.subject=T3.subject 18 | JOIN Triples T4 ON T1.subject=T4.subject 19 | JOIN Triples T5 ON T1.subject=T5.subject 20 | JOIN Triples T6 ON T1.subject=T6.subject 21 | JOIN Triples T7 ON T1.subject=T7.subject 22 | JOIN Triples T8 ON T1.subject=T8.subject 23 | JOIN Triples T9 ON T1.subject=T9.subject 24 | LEFT JOIN ( 25 | SELECT * 26 | FROM 27 | Triples T10 28 | WHERE 29 | T10.predicate='bench:abstract' 30 | ) AB ON T1.subject=AB.subject 31 | WHERE 32 | T1.predicate='rdf:type' 33 | AND T2.predicate='dc:creator' 34 | AND T3.predicate='bench:booktitle' 35 | AND T4.predicate='dc:title' 36 | AND T5.predicate='dcterms:partOf' 37 | AND T6.predicate='rdfs:seeAlso' 38 | AND T7.predicate='swrc:pages' 39 | AND T8.predicate='foaf:homepage' 40 | AND T9.predicate='dcterms:issued' 41 | AND T1.object='bench:Inproceedings' 42 | ORDER BY T9.object; 43 | AND T2.object='"Journal 1 (1940)"^^xsd:string'; 44 | store(Q2, Q2); 45 | -------------------------------------------------------------------------------- /examples/sp2bench/q3.myl: -------------------------------------------------------------------------------- 1 | Triples = scan(public:adhoc:sp2bench); 2 | 3 | Q3 = SELECT 4 | T1.subject AS article 5 | FROM 6 | Triples T1, 7 | Triples T2 8 | WHERE 9 | T1.subject=T2.subject 10 | AND T1.predicate="" 11 | AND T2.predicate="" 12 | AND T2.object=""; 13 | 14 | store(Q3, Q3); 15 | -------------------------------------------------------------------------------- /examples/sp2bench/q4.myl: -------------------------------------------------------------------------------- 1 | Triples = scan(public:adhoc:sp2bench); 2 | 3 | Q4 = SELECT 4 | T1.subject AS article 5 | FROM 6 | Triples T1, 7 | Triples T2 8 | WHERE 9 | T1.subject=T2.subject 10 | AND T1.predicate="rdf:type" 11 | AND T2.predicate="swrc:month" 12 | AND T2.object="bench:Article"; 13 | 14 | store(Q4, Q4); 15 | -------------------------------------------------------------------------------- /examples/standalone.myl: -------------------------------------------------------------------------------- 1 | Emp = load("./examples/emp.csv", csv(schema(id:int, dept_id:int, name:string, salary:int))); 2 | Dept = load("./examples/dept.csv", csv(schema(id:int, name:string, manager:int))); 3 | 4 | out = [from Emp, Dept 5 | where Emp.dept_id == Dept.id AND Emp.salary > 5000 6 | emit Emp.name as emp_name, Dept.name as dept_name]; 7 | dump(out); 8 | -------------------------------------------------------------------------------- /examples/tipsy.myl: -------------------------------------------------------------------------------- 1 | t = load("https://s3-us-west-2.amazonaws.com/uwdb/sampleData/sampleTipsy/cosmo8.33PLK.256g3bwK1C52.000970", tipsy(group="amiga")); 2 | --t = load("baz", tipsy()); 3 | store(t, t); -------------------------------------------------------------------------------- /examples/uda.myl: -------------------------------------------------------------------------------- 1 | -- test with user-defined aggregate 2 | uda LogicalAvg(x) { 3 | [0 as _sum, 0 as _count]; 4 | [_sum + x, _count + 1]; 5 | _sum / _count; 6 | }; 7 | uda LocalAvg(x) { 8 | [0 as _sum, 0 as _count]; 9 | [_sum + x, _count + 1]; 10 | }; 11 | uda RemoteAvg(_local_sum, _local_count) { 12 | [0 as _sum, 0 as _count]; 13 | [_sum + _local_sum, _count + _local_count]; 14 | [_sum/_count]; 15 | }; 16 | uda* LogicalAvg {LocalAvg, RemoteAvg}; 17 | 18 | out = [FROM SCAN(public:adhoc:employee) AS X EMIT dept_id, 19 | LogicalAvg(salary) + LogicalAvg($0)]; 20 | STORE(out, OUTPUT); 21 | -------------------------------------------------------------------------------- /examples/worker_id.myl: -------------------------------------------------------------------------------- 1 | X = [FROM SCAN(public:adhoc:employee) AS X EMIT X.id, WORKER_ID()]; 2 | STORE(X, OUTPUT); 3 | -------------------------------------------------------------------------------- /raco/__init__.py: -------------------------------------------------------------------------------- 1 | from raco.datalog.grammar import parse 2 | from raco.compile import optimize 3 | 4 | import logging 5 | LOG = logging.getLogger(__name__) 6 | 7 | 8 | class RACompiler(object): 9 | 10 | """Thin wrapper interface for lower level functions parse, optimize, 11 | compile""" 12 | 13 | def fromDatalog(self, program): 14 | """Parse datalog and convert to RA""" 15 | self.physicalplan = None 16 | self.source = program 17 | self.parsed = parse(program) 18 | LOG.debug("parser output: %s", self.parsed) 19 | self.logicalplan = self.parsed.toRA() 20 | 21 | def optimize(self, target, **kwargs): 22 | """Convert logical plan to physical plan""" 23 | self.physicalplan = optimize(self.logicalplan, target, **kwargs) 24 | -------------------------------------------------------------------------------- /raco/backends/__init__.py: -------------------------------------------------------------------------------- 1 | # everything in backend_common made public 2 | from backend_common import * 3 | -------------------------------------------------------------------------------- /raco/backends/cpp/__init__.py: -------------------------------------------------------------------------------- 1 | # everything in cpp.py made public 2 | from cpp import * 3 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/ascii_scan.cpp: -------------------------------------------------------------------------------- 1 | auto {{resultsym}} = tuplesFromAscii<{{result_type}}>("{{name}}"); 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/base_query.cpp: -------------------------------------------------------------------------------- 1 | // Precount_select: Use buckets to track the number of matches 2 | // Use buckets to copy into the result array 3 | #include 4 | #include // for exit() 5 | #include // for open() 6 | #include // for close() 7 | #include // for fstat() 8 | #include // for isdigit() 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #ifdef __MTA__ 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | 23 | typedef int int64; 24 | typedef unsigned uint64; 25 | #else 26 | #include 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | typedef int64_t int64; 33 | typedef uint64_t uint64; 34 | 35 | #include 36 | #include 37 | #include 38 | #endif 39 | 40 | #include "io_util.h" 41 | #include "hash.h" 42 | #include "radish_utils.h" 43 | #include "strings.h" 44 | #include "timing.h" 45 | 46 | // ------------------------------------------------------------------ 47 | 48 | 49 | {{declarations}} 50 | 51 | StringIndex string_index; 52 | void init( ) { 53 | } 54 | 55 | 56 | void query(struct relationInfo *resultInfo) 57 | { 58 | printf("\nstarting Query stdout\n");fflush(stdout); 59 | 60 | double start = timer(); 61 | 62 | uint64 resultcount = 0; 63 | struct relationInfo {{resultsym}}_val; 64 | struct relationInfo *{{resultsym}} = &{{resultsym}}_val; 65 | 66 | 67 | // ----------------------------------------------------------- 68 | // Fill in query here 69 | // ----------------------------------------------------------- 70 | {{initialized}} 71 | 72 | 73 | {{queryexec}} 74 | 75 | {{cleanups}} 76 | 77 | // return final result 78 | resultInfo->tuples = {{resultsym}}->tuples; 79 | resultInfo->fields = {{resultsym}}->fields; 80 | resultInfo->relation = {{resultsym}}->relation; 81 | 82 | } 83 | 84 | 85 | 86 | int main(int argc, char **argv) { 87 | 88 | struct relationInfo resultInfo; 89 | 90 | init(); 91 | 92 | printf("post-init stdout\n");fflush(stdout); 93 | 94 | // Execute the query 95 | query(&resultInfo); 96 | 97 | printf("post-query stdout\n");fflush(stdout); 98 | 99 | #ifdef ZAPPA 100 | // printrelation(&resultInfo); 101 | #endif 102 | // free(resultInfo.relation); 103 | 104 | printf("exiting stdout\n");fflush(stdout); 105 | 106 | } 107 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/clang_group_timing.cpp: -------------------------------------------------------------------------------- 1 | {% extends "group_timing.cpp" %} 2 | {% block printcode %} 3 | std::cout << "pipeline group {{ident}}: " 4 | << runtime_{{ident}} 5 | << " s" << std::endl; 6 | {% endblock %} 7 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/clang_pipeline_timing.cpp: -------------------------------------------------------------------------------- 1 | {% extends "pipeline_timing.cpp" %} 2 | 3 | {% block printstart %} 4 | std::cout {{ super() }} << std::endl; 5 | {% endblock %} 6 | 7 | {% block printruntime %} 8 | std::cout {{ super() }} << std::endl; 9 | {% endblock %} 10 | 11 | {% block printend %} 12 | std::cout {{ super() }} << std::endl; 13 | {% endblock %} 14 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/groupby/0key_declaration.cpp: -------------------------------------------------------------------------------- 1 | {{valtype}} {{hashname}} = {{initial_value}}; 2 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/groupby/0key_materialize.cpp: -------------------------------------------------------------------------------- 1 | {{op}}_insert({{hashname}}, {{val}}); 2 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/groupby/0key_scan.cpp: -------------------------------------------------------------------------------- 1 | { 2 | {{output_tuple_type}} {{output_tuple_name}}({{hashname}}); 3 | {{inner_code}} 4 | } 5 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/groupby/1key_declaration.cpp: -------------------------------------------------------------------------------- 1 | std::unordered_map<{{keytype}},{{valtype}}> {{hashname}}; 2 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/groupby/1key_materialize.cpp: -------------------------------------------------------------------------------- 1 | {{op}}_insert({{hashname}}, {{key1val}}, {{val}}); 2 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/groupby/1key_scan.cpp: -------------------------------------------------------------------------------- 1 | for (auto it={{hashname}}.begin(); it!={{hashname}}.end(); it++) { 2 | {{output_tuple_type}} {{output_tuple_name}}(it->first, it->second); 3 | {{inner_code}} 4 | } 5 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/groupby/2key_declaration.cpp: -------------------------------------------------------------------------------- 1 | std::unordered_map, {{valtype}}, pairhash> {{hashname}}; 2 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/groupby/2key_materialize.cpp: -------------------------------------------------------------------------------- 1 | {{op}}_insert({{hashname}}, {{key1val}}, {{key2val}}, {{val}}); 2 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/groupby/2key_scan.cpp: -------------------------------------------------------------------------------- 1 | for (auto it={{hashname}}.begin(); it!={{hashname}}.end(); it++) { 2 | {{output_tuple_type}} {{output_tuple_name}}(it->first.first, it->first.second, it->second); 3 | {{inner_code}} 4 | } 5 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/hashjoin/hash_declaration.cpp: -------------------------------------------------------------------------------- 1 | std::unordered_map<{{keytype}}, std::vector<{{in_tuple_type}}> > {{hashname}}; 2 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/hashjoin/insert_materialize.cpp: -------------------------------------------------------------------------------- 1 | insert({{hashname}}, {{keyval}}, {{in_tuple_name}}); 2 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/hashjoin/lookup.cpp: -------------------------------------------------------------------------------- 1 | for (auto {{right_tuple_name}} : lookup({{hashname}}, {{keyval}})) { 2 | auto {{out_tuple_name}} = {{append_func_name}}({{keyname}}, {{right_tuple_name}}); 3 | {{inner_plan_compiled}} 4 | } 5 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/materialized_tuple_ref_additional.cpp: -------------------------------------------------------------------------------- 1 | public: 2 | static {{tupletypename}} fromRelationInfo(relationInfo * rel, int row) { 3 | // DOESN'T WORK WITH SCHEMAS WITH STRINGS 4 | {{tupletypename}} _t; 5 | {% for ft in fieldtypes %} 6 | _t.f{{loop.index-1}} = *({{ft}}*)(&(rel->relation[row*rel->fields+{{loop.index-1}}])); 7 | {% endfor %} 8 | return _t; 9 | } 10 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/memory_scan.cpp: -------------------------------------------------------------------------------- 1 | for (auto {{tuple_name}} : {{inputsym}}) { 2 | {{inner_plan_compiled}} 3 | } // end scan over {{inputsym}} 4 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/relation_declaration.cpp: -------------------------------------------------------------------------------- 1 | std::vector<{{tuple_type}}> {{resultsym}}; 2 | -------------------------------------------------------------------------------- /raco/backends/cpp/c_templates/string_index_lookup.cpp: -------------------------------------------------------------------------------- 1 | auto {{name}} = string_index.string_lookup({{st}}); 2 | -------------------------------------------------------------------------------- /raco/backends/cpp/cbase_templates/assignment.cpp: -------------------------------------------------------------------------------- 1 | {{dst_set_func}} = {{src_expr_compiled}}; 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/cpp/cbase_templates/group_timing.cpp: -------------------------------------------------------------------------------- 1 | {% block precode %}{% endblock %} 2 | auto start_{{ident}} = walltime(); 3 | {{inner_code}} 4 | auto end_{{ident}} = walltime(); 5 | {% block postcode %}{% endblock %} 6 | auto runtime_{{ident}} = end_{{ident}} - start_{{ident}}; 7 | {% block printcode %}{% endblock %} 8 | -------------------------------------------------------------------------------- /raco/backends/cpp/cbase_templates/materialized_tuple_create_one.cpp: -------------------------------------------------------------------------------- 1 | static {{result_type}} {{convert_func_name}}(const {{type1}}& t1) { 2 | {{result_type}} t; 3 | {% for i in range(type1numfields) %} 4 | t.f{{i}} = t1.f{{i}}; 5 | {% endfor %} 6 | 7 | return t; 8 | } 9 | -------------------------------------------------------------------------------- /raco/backends/cpp/cbase_templates/materialized_tuple_create_two.cpp: -------------------------------------------------------------------------------- 1 | static {{result_type}} {{append_func_name}}(const {{type1}}& t1, const {{type2}}& t2) { 2 | {{result_type}} t; 3 | {% for i in range(type1numfields) %} 4 | t.f{{i}} = t1.f{{i}}; 5 | {% endfor %} 6 | 7 | {% for i in range(type2numfields) %} 8 | t.f{{i+type1numfields}} = t2.f{{i}}; 9 | {% endfor %} 10 | 11 | return t; 12 | } 13 | -------------------------------------------------------------------------------- /raco/backends/cpp/cbase_templates/output_stream_close.cpp: -------------------------------------------------------------------------------- 1 | {{output_stream_symbol}}.close(); 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/cpp/cbase_templates/output_stream_decl.cpp: -------------------------------------------------------------------------------- 1 | std::ofstream {{output_stream_symbol}}; 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/cpp/cbase_templates/output_stream_open.cpp: -------------------------------------------------------------------------------- 1 | {{output_stream_symbol}}.open("{{filename}}"); 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/cpp/cbase_templates/output_stream_write.cpp: -------------------------------------------------------------------------------- 1 | {{output_stream_symbol}} << "{{stringval}}" << std::endl; 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/cpp/cbase_templates/pipeline_timing.cpp: -------------------------------------------------------------------------------- 1 | auto start_{{ident}} = walltime(); 2 | {% block printstart %} << "timestamp {{ident}} start " << std::setprecision(15) << start_{{ident}}{% endblock %} 3 | 4 | {{inner_code}} 5 | auto end_{{ident}} = walltime(); 6 | auto runtime_{{ident}} = end_{{ident}} - start_{{ident}}; 7 | {% block printruntime %} << "pipeline {{ident}}: " << runtime_{{ident}} << " s"{% endblock %} 8 | 9 | {% block printend %} << "timestamp {{ident}} end " << std::setprecision(15) << end_{{ident}}{% endblock %} 10 | 11 | -------------------------------------------------------------------------------- /raco/backends/cpp/cbase_templates/select.cpp: -------------------------------------------------------------------------------- 1 | if ({{conditioncode}}) { 2 | {{inner_code_compiled}} 3 | } 4 | 5 | -------------------------------------------------------------------------------- /raco/backends/cpp/cbase_templates/tuple_declaration.cpp: -------------------------------------------------------------------------------- 1 | {{dst_type_name}} {{dst_name}}; 2 | -------------------------------------------------------------------------------- /raco/backends/cpp/cbase_templates/tuple_type_convert.cpp: -------------------------------------------------------------------------------- 1 | {{result_type}} {{result_name}} = {{convert_func_name}}({{input_tuple_name}}); 2 | -------------------------------------------------------------------------------- /raco/backends/cpp/cbase_templates/write_count.cpp: -------------------------------------------------------------------------------- 1 | write_count("{{filename}}", {{count_symbol}}); 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/cpp/operator_at_a_time_c_templates/ascii_scan.template: -------------------------------------------------------------------------------- 1 | /* 2 | ===================================== 3 | Scan(%(name)s) 4 | ===================================== 5 | */ 6 | 7 | printf("%(resultsym)s = Scan(%(name)s)\n"); 8 | 9 | struct relationInfo %(resultsym)s_val; 10 | 11 | #ifdef __MTA__ 12 | //binary_inhale("%(name)s", &%(resultsym)s_val); 13 | inhale("%(name)s", &%(resultsym)s_val); 14 | #else 15 | inhale("%(name)s", &%(resultsym)s_val); 16 | #endif // __MTA__ 17 | 18 | struct relationInfo *%(resultsym)s = &%(resultsym)s_val; 19 | -------------------------------------------------------------------------------- /raco/backends/cpp/operator_at_a_time_c_templates/binary_scan.template: -------------------------------------------------------------------------------- 1 | /* 2 | ===================================== 3 | Scan(%(name)s) 4 | ===================================== 5 | */ 6 | 7 | printf("%(resultsym)s = Scan(%(name)s)\n"); 8 | 9 | struct relationInfo %(resultsym)s_val; 10 | 11 | #ifdef __MTA__ 12 | binary_inhale("%(name)s", &%(resultsym)s_val); 13 | //inhale("%(name)s", &%(resultsym)s_val); 14 | #else 15 | inhale("%(name)s", &%(resultsym)s_val); 16 | #endif // __MTA__ 17 | 18 | struct relationInfo *%(resultsym)s = &%(resultsym)s_val; -------------------------------------------------------------------------------- /raco/backends/cpp/operator_at_a_time_c_templates/emit_joined_tuple.template: -------------------------------------------------------------------------------- 1 | 2 | printf("joined tuple: %d, %d, %d, %d\n", join1_leftrow, join1_rightrow, join2_leftrow, join2_rightrow); 3 | resultcount++; 4 | -------------------------------------------------------------------------------- /raco/backends/cpp/operator_at_a_time_c_templates/filtering_nestedloop_hashjoin_chain.template: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/cpp/operator_at_a_time_c_templates/filtering_nestedloop_join.template: -------------------------------------------------------------------------------- 1 | 2 | 3 | // Join %(depth)s 4 | for (uint64 join%(depth)s_rightrow = 0; join%(depth)s_rightrow < join%(depth)s_right->tuples; join%(depth)s_rightrow++) { 5 | if (%(right_condition)s) { // filter on join%(depth)s.right 6 | uint64 joini%(depth)s_leftrow = %(left_row_variable)s; 7 | if (check_condition(join%(depth)s_left 8 | , join%(depth)s_right 9 | , join%(depth)s_leftrow 10 | , join%(depth)s_rightrow 11 | , join%(depth)s_leftattribute 12 | , join%(depth)s_rightattribute)) { 13 | 14 | 15 | %(inner_plan_compiled)s 16 | 17 | 18 | } // Join %(depth)s condition 19 | } // filter on join1.right 20 | } // loop over join1.right 21 | 22 | 23 | -------------------------------------------------------------------------------- /raco/backends/cpp/operator_at_a_time_c_templates/filtering_nestedloop_join_chain.template: -------------------------------------------------------------------------------- 1 | { // Begin Filtering_NestedLoop_Join_Chain 2 | 3 | 4 | 5 | printf("V2 = Join(%(inner_plan)s,V1) \n"); 6 | // Assume left-deep plan 7 | 8 | // leaves of the tree 9 | %(relation_decls)s 10 | 11 | // Join 1 12 | %(join_decls)s 13 | 14 | double start = timer(); 15 | 16 | getCounters(counters, currCounter); 17 | currCounter = currCounter + 1; // 1 18 | 19 | #pragma mta trace "running join" 20 | // Left Root 21 | for (uint64 join%(depth)s_leftrow = 0; join%(depth)s_leftrow < join%(depth)s_left->tuples; join%(depth)s_leftrow++) { 22 | if (%(left_root_condition)s) { // filter on join%(depth)s.left 23 | // Join %(depth)s 24 | for (uint64 join%(depth)s_rightrow = 0; join%(depth)s_rightrow < join%(depth)s_right->tuples; join%(depth)s_rightrow++) { 25 | if (%(right_condition)s) { // filter on join%(depth)s.right 26 | if (check_condition(join%(depth)s_left, join%(depth)s_right 27 | , join%(depth)s_leftrow, join%(depth)s_rightrow, join%(depth)s_leftattribute, join%(depth)s_rightattribute)) { 28 | %(inner_plan_compiled)s 29 | } // Join 1 condition 30 | } // filter on join1.right 31 | } // loop over join1.right 32 | } // filter on join1.left 33 | } // loop over join1.left 34 | 35 | } // End Filtering_NestedLoop_Join_Chain 36 | -------------------------------------------------------------------------------- /raco/backends/cpp/operator_at_a_time_c_templates/scan.template: -------------------------------------------------------------------------------- 1 | /* 2 | ===================================== 3 | Scan(%(name)s) 4 | ===================================== 5 | */ 6 | 7 | printf("%(resultsym)s = Scan(%(name)s)\n"); 8 | 9 | struct relationInfo %(resultsym)s_val; 10 | 11 | #ifdef __MTA__ 12 | //binary_inhale("%(name)s", &%(resultsym)s_val); 13 | inhale("%(name)s", &%(resultsym)s_val); 14 | #else 15 | inhale("%(name)s", &%(resultsym)s_val); 16 | #endif // __MTA__ 17 | 18 | struct relationInfo *%(resultsym)s = &%(resultsym)s_val; 19 | -------------------------------------------------------------------------------- /raco/backends/cpp/operator_at_a_time_c_templates/select_simple_twopass.template: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================ 3 | ============ TwoPass Select ================ 4 | ============================================ 5 | */ 6 | struct relationInfo %(resultsym)s_val; 7 | struct relationInfo *%(resultsym)s = &%(resultsym)s_val; 8 | 9 | { 10 | printf("\n%(resultsym)s = TwoPassSelect(%(condition)s, %(inputsym)s)\n"); 11 | 12 | uint64 *relation = %(inputsym)s->relation; 13 | uint64 tuples = %(inputsym)s->tuples; 14 | uint64 fields = %(inputsym)s->fields; 15 | 16 | uint64 matches = 0; 17 | 18 | int numCounters = 4; 19 | int currCounter = 0; 20 | int *counters = mallocCounterMemory(numCounters); 21 | 22 | double start = timer(); 23 | 24 | getCounters(counters, currCounter); 25 | currCounter = currCounter + 1; // 1 26 | 27 | // Count the number of matching entries 28 | #pragma mta trace "1st pass to compute size for TwoPassSelect(%(condition)s, %(inputsym)s))" 29 | for (uint64 i = 0; i < tuples*fields; i += fields) { 30 | if (%(condition)s) { 31 | matches++; 32 | } 33 | } 34 | printf("\tfinished first pass\n"); 35 | 36 | getCounters(counters, currCounter); 37 | currCounter = currCounter + 1; // 2 38 | 39 | uint64 *%(resultsym)s_result; 40 | // allocate space for the result 41 | %(resultsym)s_result = (uint64 *) malloc(matches*fields*sizeof(uint64)); 42 | 43 | // check success 44 | if (!%(resultsym)s_result) { 45 | #pragma mta trace "Memory Allocation FAILURE in TwoPassSelect(%(condition)s, %(inputsym)s))" 46 | printf("Memory Allocation FAILURE in TwoPassSelect(%(condition)s, %(inputsym)s)\n"); 47 | exit(5); 48 | } 49 | 50 | uint64 current_result = 0; 51 | #pragma mta trace "begin 2nd pass" 52 | #pragma mta assert nodep 53 | for (uint64 i = 0; i < tuples*fields; i+=fields) { 54 | if (%(condition)s) { 55 | for( uint64 j = 0; j < fields; j = j + 1 ) { 56 | %(resultsym)s_result[current_result + j] = relation[i + j]; 57 | } 58 | current_result+=fields; 59 | } 60 | } 61 | 62 | getCounters(counters, currCounter); 63 | currCounter = currCounter + 1; // 3 64 | 65 | double finish = timer(); 66 | printf("\t%%f seconds\n", finish - start); 67 | printf("\t%%lu tuples in result\n", matches); 68 | if (matches) { 69 | %(resultsym)s->tuples = matches; 70 | %(resultsym)s->fields = fields; 71 | %(resultsym)s->relation = %(resultsym)s_result; 72 | } 73 | else { 74 | %(resultsym)s->tuples = 0; 75 | %(resultsym)s->fields = fields; 76 | %(resultsym)s->relation = NULL; 77 | } 78 | 79 | printDiffCounters(counters, numCounters); 80 | freeCounterMemory(counters); 81 | 82 | /* 83 | ============ End TwoPass Select ================ 84 | */ 85 | } 86 | -------------------------------------------------------------------------------- /raco/backends/logical.py: -------------------------------------------------------------------------------- 1 | import raco.rules as rules 2 | from raco.backends import Algebra 3 | 4 | 5 | class OptLogicalAlgebra(Algebra): 6 | 7 | @staticmethod 8 | def opt_rules(**kwargs): 9 | return [rules.RemoveTrivialSequences(), 10 | rules.SimpleGroupBy(), 11 | rules.SplitSelects(), 12 | rules.PushSelects(), 13 | rules.MergeSelects(), 14 | rules.ProjectToDistinctColumnSelect(), 15 | rules.JoinToProjectingJoin(), 16 | rules.PushApply(), 17 | rules.RemoveUnusedColumns(), 18 | rules.PushApply(), 19 | rules.RemoveUnusedColumns(), 20 | rules.PushApply(), 21 | rules.DeDupBroadcastInputs()] 22 | -------------------------------------------------------------------------------- /raco/backends/myria/__init__.py: -------------------------------------------------------------------------------- 1 | # everything in myria made public 2 | from myria import * 3 | -------------------------------------------------------------------------------- /raco/backends/myria/errors.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | class MyriaError(Exception): 5 | def __init__(self, err=None): 6 | if isinstance(err, requests.Response): 7 | msg = 'Error {} ({})'.format(err.status_code, err.reason) 8 | if err.text: 9 | msg = '{}: {}'.format(msg, err.text) 10 | Exception.__init__(self, msg) 11 | else: 12 | Exception.__init__(self, err) 13 | -------------------------------------------------------------------------------- /raco/backends/myria/tests/test_error.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from raco.backends.myria.errors import MyriaError 3 | 4 | 5 | class TestError(unittest.TestCase): 6 | def test_error(self): 7 | with self.assertRaises(MyriaError): 8 | raise MyriaError 9 | 10 | if __name__ == '__main__': 11 | unittest.main() 12 | -------------------------------------------------------------------------------- /raco/backends/radish/README.md: -------------------------------------------------------------------------------- 1 | # Radish 2 | 3 | Backend compiling Myria Algebra to C++ for execution on grappa. 4 | 5 | * Depends on ../clang templates and routines 6 | -------------------------------------------------------------------------------- /raco/backends/radish/__init__.py: -------------------------------------------------------------------------------- 1 | # everything in radish made public 2 | from radish import * 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/base_query.cpp: -------------------------------------------------------------------------------- 1 | // grappa 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace Grappa; 8 | 9 | // stl 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | // query library 16 | #include "relation_io.hpp" 17 | #include "MatchesDHT.hpp" 18 | #include "DoubleDHT.hpp" 19 | #include "MapReduce.hpp" 20 | //#include "HashJoin.hpp" 21 | #include "DHT_symmetric.hpp" 22 | #include "Aggregates.hpp" 23 | #include "Iterators.hpp" 24 | #include "radish_utils.h" 25 | #include "stats.h" 26 | #include "strings.h" 27 | #include "dates.h" 28 | #include "relation.hpp" 29 | #include "pipeline.hpp" 30 | #include "TemporaryTable.hpp" 31 | #include "dowhile.hpp" 32 | //FIXME: prefer to include this only for Iterator codes 33 | #include "Operators.hpp" 34 | 35 | DEFINE_uint64( nt, 30, "hack: number of tuples"); 36 | DEFINE_bool( jsonsplits, false, "interpret input file F as F/part-*," 37 | "and containing json records"); 38 | 39 | template 40 | struct counter { 41 | T count; 42 | static GlobalAddress> create(T init) { 43 | auto res = symmetric_global_alloc>(); 44 | on_all_cores([res, init] { 45 | res->count = init; 46 | }); 47 | return res; 48 | } 49 | } GRAPPA_BLOCK_ALIGNED; 50 | 51 | template 52 | T get_count(GlobalAddress> p) { 53 | return p->count; 54 | } 55 | 56 | {{declarations}} 57 | 58 | StringIndex string_index; 59 | void init( ) { 60 | } 61 | 62 | void query() { 63 | double start, end; 64 | double saved_scan_runtime = 0, saved_init_runtime = 0; 65 | start = walltime(); 66 | 67 | {{initialized}} 68 | 69 | end = walltime(); 70 | init_runtime += (end-start); 71 | saved_init_runtime += (end-start); 72 | 73 | {{queryexec}} 74 | 75 | // since reset the stats after scan, need to set these again 76 | scan_runtime = saved_scan_runtime; 77 | init_runtime = saved_init_runtime; 78 | } 79 | 80 | 81 | int main(int argc, char** argv) { 82 | init(&argc, &argv); 83 | 84 | run([] { 85 | 86 | init(); 87 | double start = Grappa::walltime(); 88 | query(); 89 | double end = Grappa::walltime(); 90 | query_runtime = end - start; 91 | on_all_cores([] { emit_count = result.size(); }); 92 | Metrics::merge_and_print(); 93 | }); 94 | 95 | finalize(); 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/define_cl_arg.cpp: -------------------------------------------------------------------------------- 1 | DEFINE_{{type}}({{name}}, {{default_value}}, "{{description}}"); 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/define_metric.cpp: -------------------------------------------------------------------------------- 1 | GRAPPA_DEFINE_METRIC({% block type %}{% endblock %}, {% block name %}{% endblock %}, {% block init %}{% endblock %}); -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/file_scan.cpp: -------------------------------------------------------------------------------- 1 | { 2 | if (FLAGS_bin) { 3 | BinaryRelationFileReader<{{result_type}}> reader; 4 | {{resultsym}} = reader.read( FLAGS_input_file_{{name}} + ".bin" ); 5 | } else if (FLAGS_jsonsplits) { 6 | SplitsRelationFileReader, {{result_type}}> reader; 7 | {{resultsym}} = reader.read( FLAGS_input_file_{{name}} ); 8 | } else { 9 | {{resultsym}}.data = readTuples<{{result_type}}>( FLAGS_input_file_{{name}}, FLAGS_nt); 10 | {{resultsym}}.numtuples = FLAGS_nt; 11 | auto l_{{resultsym}} = {{resultsym}}; 12 | on_all_cores([=]{ {{resultsym}} = l_{{resultsym}}; }); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/gce_app_metric.cpp: -------------------------------------------------------------------------------- 1 | {% extends "define_metric.cpp" %} 2 | 3 | {% block type %}CallbackMetric{% endblock %} 4 | 5 | {% block name %}app_{{pipeline_id}}_gce_incomplete{% endblock %} 6 | 7 | {% block init %}[] { 8 | return {{global_syncname}}.incomplete(); 9 | }{% endblock %} 10 | 11 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/global_array_memory_scan.cpp: -------------------------------------------------------------------------------- 1 | forall<&{{global_syncname}}>( {{inputsym}}.data, {{inputsym}}.numtuples, [=](int64_t i, {{tuple_type}}& {{tuple_name}}) { 2 | {{inner_code}} 3 | }); 4 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/global_array_relation_declaration.cpp: -------------------------------------------------------------------------------- 1 | Relation<{{tuple_type}}> {{resultsym}}; 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/graph_file_scan.cpp: -------------------------------------------------------------------------------- 1 | { 2 | tuple_graph tg; 3 | tg = readTuples( "{{name}}" ); 4 | 5 | FullEmpty>> f1; 6 | privateTask( [&f1,tg] { 7 | f1.writeXF( Graph::create(tg, /*directed=*/true) ); 8 | }); 9 | auto l_{{resultsym}}_index = f1.readFE(); 10 | 11 | on_all_cores([=] { 12 | {{resultsym}}_index = l_{{resultsym}}_index; 13 | }); 14 | } 15 | 16 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/grappa_group_timing.cpp: -------------------------------------------------------------------------------- 1 | {% extends "group_timing.cpp" %} 2 | {% block printcode %} 3 | {{timer_metric}} += runtime_{{ident}}; 4 | VLOG(1) << "pipeline group {{ident}}: " << runtime_{{ident}} << " s"; 5 | {% endblock %} 6 | 7 | {% block precode %}Grappa::Metrics::reset(); 8 | {{tracing_on}}{% endblock %} 9 | 10 | {% block postcode %}{{tracing_off}}{% endblock %} 11 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/grappa_pipeline_timing.cpp: -------------------------------------------------------------------------------- 1 | {% extends "pipeline_timing.cpp" %} 2 | 3 | {% block printstart %}VLOG(1) {{ super() }};{% endblock %} 4 | 5 | {% block printruntime %}VLOG(1) {{ super() }};{% endblock %} 6 | 7 | {% block printend %}VLOG(1) {{ super() }};{% endblock %} -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/0key_output.cpp: -------------------------------------------------------------------------------- 1 | {{comment}} 2 | auto {{output_tuple_name}}_tmp = reduce<{% block templateargs %}{% endblock %}>({{hashname}}); 3 | 4 | {% block output %}{% endblock %} 5 | 6 | {{inner_code}} 7 | 8 | // putting a wait here satisfies the invariant that inner code depends 9 | // on global synchronization by the pipeline source 10 | {{pipeline_sync}}.wait(); 11 | 12 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/combine_definition.cpp: -------------------------------------------------------------------------------- 1 | {{state_type}} {{name}}_combine(const {{state_type}}& state0, const {{state_type}}& state1) { 2 | {% for c in combine_updates %} 3 | {{ c }} 4 | {% endfor %} 5 | return {{state_type}}(std::make_tuple({{ combine_state_vars|join(',') }})); 6 | } 7 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/init_definition.cpp: -------------------------------------------------------------------------------- 1 | {{state_type}} {{name}}_init() { 2 | {% for u in init_updates %} 3 | {{u}} 4 | {% endfor %} 5 | 6 | return {{state_type}}( std::make_tuple({{ init_state_vars|join(',') }}) ); 7 | } -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/multi_uda_0key_output.cpp: -------------------------------------------------------------------------------- 1 | {% extends '0key_output.cpp' %} 2 | 3 | {% block templateargs %} 4 | {{state_type}}, &{{combine_func}} 5 | {% endblock %} 6 | 7 | {% block output %} 8 | {{output_tuple_type}} {{output_tuple_name}}; 9 | {{ assignmentcode }} 10 | {% endblock %} 11 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/multi_uda_0key_update.cpp: -------------------------------------------------------------------------------- 1 | auto {{hashname}}_local_ptr = {{hashname}}.localize(); 2 | *{{hashname}}_local_ptr = {{update_func}}(*{{hashname}}_local_ptr, {{update_val}}); 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/multi_uda_scan.cpp: -------------------------------------------------------------------------------- 1 | {% extends 'scan.cpp' %} 2 | 3 | {# depends on materialized_tuple_ref constructor of std::tuple #} 4 | {% block initializer %}std::tuple_cat({{ super() }}, {{mapping_var_name}}.second.to_tuple()){% endblock %} 5 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/nkey_update.cpp: -------------------------------------------------------------------------------- 1 | {{comment}} 2 | {{hashname}}->update<&{{pipeline_sync}}, {{input_type}}, &{{update_func}},&{{init_func}}>(std::make_tuple({{ keygets|join(',') }}), {{update_val}}); 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/one_built_in_0key_output.cpp: -------------------------------------------------------------------------------- 1 | {% extends '0key_output.cpp' %} 2 | 3 | {% block templateargs %}{{state_type}}, counter<{{state_type}}>, &{{combine_func}}, &get_count<{{state_type}}>{% endblock %} 4 | 5 | {% block output %} 6 | {{output_tuple_type}} {{output_tuple_name}}; 7 | {{output_tuple_set_func}} = {{output_tuple_name}}_tmp; 8 | {% endblock %} 9 | 10 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/one_built_in_0key_update.cpp: -------------------------------------------------------------------------------- 1 | {{hashname}}->count = {{update_func}}({{hashname}}->count, {{update_val}}); 2 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/one_built_in_scan.cpp: -------------------------------------------------------------------------------- 1 | {% extends 'scan.cpp' %} 2 | 3 | {# depends on materialized_tuple_ref constructor of std::tuple #} 4 | {% block initializer %}std::tuple_cat({{ super() }}, std::make_tuple({{mapping_var_name}}.second)){% endblock %} -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/scan.cpp: -------------------------------------------------------------------------------- 1 | {{comment}} 2 | {{hashname}}->forall_entries<&{{pipeline_sync}}>([=](std::pair&{{mapping_var_name}}) { 3 | {{output_tuple_type}} {{output_tuple_name}}({% block initializer %}{{mapping_var_name}}.first{% endblock %}); 4 | {{inner_code}} 5 | }); 6 | 7 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/update_definition.cpp: -------------------------------------------------------------------------------- 1 | {{state_type}} {{name}}_update(const {{state_type}}& state, const {{input_type}}& {{input_tuple_name}}) { 2 | {% for u in update_updates %} 3 | {{ u }} 4 | {% endfor %} 5 | return {{state_type}}(std::make_tuple({{ update_state_vars|join(',') }})); 6 | } -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/withkey_decl.cpp: -------------------------------------------------------------------------------- 1 | decltype(DHT_symmetric<{{keytype}},{{valtype}},hash_tuple::hash<{{keytype}}>>::create_DHT_symmetric( )) {{hashname}}; 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/withkey_init.cpp: -------------------------------------------------------------------------------- 1 | auto l_{{hashname}} = DHT_symmetric<{{keytype}},{{valtype}},hash_tuple::hash<{{keytype}}>>::create_DHT_symmetric( ); 2 | on_all_cores([=] { 3 | {{hashname}} = l_{{hashname}}; 4 | }); 5 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/groupby/withoutkey_init.cpp: -------------------------------------------------------------------------------- 1 | auto {{hashname}} = {{initializer}}; 2 | on_all_cores([=] { 3 | *({{hashname}}.localize()) = {{func_name}}_init(); 4 | }); 5 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/hashjoin/hash_declaration.cpp: -------------------------------------------------------------------------------- 1 | typedef MatchesDHT<{{keytype}}, {{in_tuple_type}}, hash_tuple::hash<{{keytype}}>> DHT_{{in_tuple_type}}_{{hashname}}; 2 | DHT_{{in_tuple_type}}_{{hashname}} {{hashname}}; 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/hashjoin/hash_init.cpp: -------------------------------------------------------------------------------- 1 | {{hashname}}.init_global_DHT( &{{hashname}}, cores()*16*1024 ); 2 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/hashjoin/insert_materialize.cpp: -------------------------------------------------------------------------------- 1 | {{comment}} 2 | {{hashname}}.insert_async<&{{pipeline_sync}}>({{keyval}}, {{keyname}}); 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/hashjoin/lookup.cpp: -------------------------------------------------------------------------------- 1 | {{comment}} 2 | {{hashname}}.lookup_iter<&{{pipeline_sync}}>( {{keyval}}, [=]({{right_tuple_type}}& {{right_tuple_name}}) { 3 | join_coarse_result_count++; 4 | {{out_tuple_type}} {{out_tuple_name}} = {{append_func_name}}({{keyname}}, {{right_tuple_name}}); 5 | {{inner_plan_compiled}} 6 | }); 7 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/input_relation_declarations.cpp: -------------------------------------------------------------------------------- 1 | DEFINE_string(input_file_{{name}}, "{{name}}", "Input file"); 2 | std::vector schema_{{resultsym}} = { {% for c in colnames %}"{{c}}",{% endfor %} }; 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/iterators/0key_groupby_source.cpp: -------------------------------------------------------------------------------- 1 | class {{class_symbol}} : public ZeroKeyAggregateSource<{{produce_type}}, {{state_type}}, &{{combine_func}}> { 2 | using ZeroKeyAggregateSource<{{produce_type}}, {{state_type}}, {{combine_func}}>::ZeroKeyAggregateSource; 3 | protected: 4 | void mktuple({{produce_type}}& {{produce_tuple_name}}, {{state_type}}& {{state_tuple_name}}) { 5 | {{assignment_code}} 6 | } 7 | 8 | }; -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/iterators/apply.cpp: -------------------------------------------------------------------------------- 1 | class {{class_symbol}} : public Apply<{{consume_type}}, {{produce_type}}> { 2 | using Apply<{{consume_type}}, {{produce_type}}>::Apply; 3 | protected: 4 | void apply({{produce_type}}& {{produce_tuple_name}}, {{consume_type}}& {{consume_tuple_name}}) { 5 | {{statements}} 6 | } 7 | }; -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/iterators/broadcast_stream.cpp: -------------------------------------------------------------------------------- 1 | class {{class_symbol}} : public BroadcastTupleStream<{{left_type}}, {{right_type}}, {{output_type}}> { 2 | using BroadcastTupleStream<{{left_type}}, {{right_type}}, {{output_type}}>::BroadcastTupleStream; 3 | protected: 4 | void mktuple({{output_type}}& {{output_name}}, {{left_type}}& l, {{right_type}}& r) { 5 | {{output_name}} = {{append_func_name}}(l, r); 6 | } 7 | }; -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/iterators/hashjoin_sink.cpp: -------------------------------------------------------------------------------- 1 | class {{class_symbol}} : public HashJoinSink{{side}}<{{keytype}}, 2 | {{left_tuple_type}}, 3 | {{right_tuple_type}}, 4 | hash_tuple::hash<{{keytype}}>, &{{pipeline_sync}}> { 5 | using HashJoinSink{{side}}<{{keytype}}, 6 | {{left_tuple_type}}, 7 | {{right_tuple_type}}, 8 | hash_tuple::hash<{{keytype}}>, &{{pipeline_sync}}>::HashJoinSink{{side}}; 9 | protected: 10 | {{keytype}} mktuple({% if side == 'Right' %} 11 | {{right_tuple_type}} 12 | {% else %} 13 | {{left_tuple_type}} 14 | {% endif %} 15 | &{{input_tuple_name}}) { 16 | 17 | return {{keyval}}; 18 | } 19 | }; 20 | 21 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/iterators/hashjoin_source.cpp: -------------------------------------------------------------------------------- 1 | class {{class_symbol}} : public HashJoinSource<{{keytype}}, 2 | {{left_tuple_type}}, 3 | {{right_tuple_type}}, 4 | hash_tuple::hash<{{keytype}}>, 5 | {{out_tuple_type}}> { 6 | 7 | using HashJoinSource<{{keytype}}, 8 | {{left_tuple_type}}, 9 | {{right_tuple_type}}, 10 | hash_tuple::hash<{{keytype}}>, {{out_tuple_type}}>::HashJoinSource; 11 | 12 | protected: 13 | {{out_tuple_type}} mktuple({{left_tuple_type}}& {{left_name}}, {{right_tuple_type}}& {{right_name}}) { 14 | return {{append_func_name}}({{left_name}}, {{right_name}}); 15 | } 16 | }; -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/iterators/instantiate_operator.cpp: -------------------------------------------------------------------------------- 1 | Operator<{{produce_type}}> * {{symbol}} = new {{call_constructor}}; 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/iterators/instantiate_sink.cpp: -------------------------------------------------------------------------------- 1 | {{symbol}} = new {{call_constructor}}; 2 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/iterators/multikey_groupby_sink.cpp: -------------------------------------------------------------------------------- 1 | class {{class_symbol}} : public AggregateSink<{{consume_type}}, {{keytype}}, {{state_type}}, &{{pipeline_sync}}> { 2 | using AggregateSink<{{consume_type}}, {{keytype}}, {{state_type}}, &{{pipeline_sync}}>::AggregateSink; 3 | protected: 4 | {{keytype}} mktuple({{consume_type}}& {{consume_tuple_name}}) { 5 | return std::make_tuple({{ keygets|join(',') }}); 6 | } 7 | 8 | }; -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/iterators/multikey_groupby_source.cpp: -------------------------------------------------------------------------------- 1 | class {{class_symbol}} : public AggregateSource<{{produce_type}}, {{keytype}}, {{state_type}}, {{input_type}}> { 2 | using AggregateSource<{{produce_type}}, {{keytype}}, {{state_type}}, {{input_type}}>::AggregateSource; 3 | 4 | private: 5 | typedef AggregateSource<{{produce_type}}, {{keytype}}, {{state_type}}, {{input_type}}>::map_output_t map_output_t; 6 | 7 | protected: 8 | void mktuple({{produce_type}}& {{produce_tuple_name}}, map_output_t& {{mapping_var_name}}) { 9 | {{produce_type}} {{produce_tuple_name}}_tmp(std::tuple_cat({{mapping_var_name}}.first, {{mapping_var_name}}.second.to_tuple())); 10 | {{produce_tuple_name}} = {{produce_tuple_name}}_tmp; 11 | } 12 | }; -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/iterators/partition_groupby/multikey_groupby_sink.cpp: -------------------------------------------------------------------------------- 1 | class {{class_symbol}} : public AggregatePartitionSink<{{consume_type}}, {{keytype}}, {{state_type}}> { 2 | using AggregatePartitionSink<{{consume_type}}, {{keytype}}, {{state_type}}>::AggregatePartitionSink; 3 | protected: 4 | {{keytype}} mktuple({{consume_type}}& {{consume_tuple_name}}) { 5 | return std::make_tuple({{ keygets|join(',') }}); 6 | } 7 | 8 | }; -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/iterators/select.cpp: -------------------------------------------------------------------------------- 1 | class {{class_symbol}} : public Select<{{consume_type}}, {{produce_type}}> { 2 | using Select<{{produce_type}}, {{produce_type}}>::Select; 3 | protected: 4 | bool predicate({{produce_type}}& {{consume_tuple_name}}) { 5 | return {{expression}}; 6 | } 7 | }; -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/iterators/sink_declaration.cpp: -------------------------------------------------------------------------------- 1 | Operator * {{symbol}}; -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/iterators/withkey_init.cpp: -------------------------------------------------------------------------------- 1 | auto {{hashname}} = DHT_symmetric_generic<{{keytype}},{{valtype}},{{update_val_type}},hash_tuple::hash<{{keytype}}>>::create_DHT_symmetric(&{{update_func}}, &{{init_func}}); 2 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/partition_groupby/nkey_update.cpp: -------------------------------------------------------------------------------- 1 | {{comment}} 2 | {{hashname}}->template update_partition<{{input_type}}, &{{update_func}},&{{init_func}}>(std::make_tuple({{ keygets|join(',') }}), {{update_val}}); 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/shuffle.cpp: -------------------------------------------------------------------------------- 1 | {{comment}} 2 | auto target = hash_tuple::hash<{{keytype}}>()({{keyval}}) % Grappa::cores(); 3 | // DEV NOTE: if something inside this call is not captured in the lambda, 4 | // (probably a data structure) then we need to change its declaration to a global one. 5 | // The alternative is just to capture [=] but this will mask unneeded communication. 6 | Grappa::delegate::call(target, [{{keyname}}] { 7 | {{inner_code}} 8 | }); -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/shufflehashjoin/delete.cpp: -------------------------------------------------------------------------------- 1 | freeJoinReducers({{hashname}}, {{hashname}}_num_reducers); 2 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/shufflehashjoin/hash_init.cpp: -------------------------------------------------------------------------------- 1 | auto {{hashname}}_num_reducers = cores(); 2 | auto {{hashname}} = allocateJoinReducers({{hashname}}_num_reducers); 3 | auto {{hashname}}_ctx = HashJoinContext({{hashname}}, {{hashname}}_num_reducers); 4 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/shufflehashjoin/materialize.cpp: -------------------------------------------------------------------------------- 1 | {{hashname}}_ctx.emitIntermediate{{side}}<&{{global_syncname}}>({{keyval}}, {{keyname}}); 2 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/shufflehashjoin/reduce.cpp: -------------------------------------------------------------------------------- 1 | %(hashname)s_ctx.reduceExecute(); 2 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/shufflehashjoin/result_scan.cpp: -------------------------------------------------------------------------------- 1 | MapReduce::forall_symmetric<&{{pipeline_sync}}>({{hashname}}, &JoinReducer::resultAccessor, [=]({{out_tuple_type}}& {{out_tuple_name}}) { 2 | {{inner_code_compiled}} 3 | }); 4 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/spawn.cpp: -------------------------------------------------------------------------------- 1 | auto {{name}} = Pipeline({{ident}}, { {{dependence_captures}} }, [=] { 2 | {{inner_code}} 3 | }); 4 | {{name}}.run(); 5 | 6 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/string_index_lookup.cpp: -------------------------------------------------------------------------------- 1 | auto l_{{sid}} = string_index.string_lookup({{st}}); 2 | on_all_cores([=] { {{sid}} = l_{{sid}}; }); 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/symmetric_array_file_scan.cpp: -------------------------------------------------------------------------------- 1 | if (FLAGS_bin) { 2 | BinaryRelationFileReader<{{result_type}}, 3 | aligned_vector<{{result_type}}>, 4 | SymmetricArrayRepresentation<{{result_type}}>> reader; 5 | // just always broadcast the name to all cores 6 | // although for some queries it is unnecessary 7 | auto l_{{resultsym}} = reader.read( FLAGS_input_file_{{name}} + ".bin" ); 8 | on_all_cores([=] { 9 | {{resultsym}} = l_{{resultsym}}; 10 | }); 11 | 12 | } else { 13 | 14 | CHECK(false) << "only --bin=true supported for symmetric array repr"; 15 | 16 | } 17 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/symmetric_array_memory_scan.cpp: -------------------------------------------------------------------------------- 1 | forall<&{{global_syncname}}>( {{readfrom}}, [=]({{tuple_type}}& {{tuple_name}}) { 2 | {{inner_code}} 3 | }); 4 | 5 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/symmetric_array_relation_declaration.cpp: -------------------------------------------------------------------------------- 1 | Relation> {{resultsym}}; 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/symmetric_array_relation_materialize.cpp: -------------------------------------------------------------------------------- 1 | {{sym}}.data->vector.push_back({{input_tuple_name}}); 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/symmetric_array_temprelation_declaration.cpp: -------------------------------------------------------------------------------- 1 | TemporaryTable<{{tuple_type}}> {{resultsym}}; 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/symmetric_array_temprelation_init.cpp: -------------------------------------------------------------------------------- 1 | {{sym}}.init(); 2 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/symmetric_array_temprelation_materialize.cpp: -------------------------------------------------------------------------------- 1 | {{sym}}.append({{input_tuple_name}}); 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/symmetric_array_temprelation_materializer_done.cpp: -------------------------------------------------------------------------------- 1 | {{sym}}.release_producer(); 2 | 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/symmetric_array_temprelation_recycle.cpp: -------------------------------------------------------------------------------- 1 | // recycle result vector 2 | {{sym}}.register_producers({{num_producers}}); 3 | 4 | 5 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/symmetrichashjoin/hash_declaration.cpp: -------------------------------------------------------------------------------- 1 | typedef DoubleDHT<{{keytype}}, {{left_in_tuple_type}}, {{right_in_tuple_type}}, hash_tuple::hash<{{keytype}}>> DHT_{{left_in_tuple_type}}_{{right_in_tuple_type}}_{{hashname}}; 2 | DHT_{{left_in_tuple_type}}_{{right_in_tuple_type}}_{{hashname}} {{hashname}}; 3 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/symmetrichashjoin/hash_init.cpp: -------------------------------------------------------------------------------- 1 | {{hashname}}.init_global_DHT( &{{hashname}}, cores()*16*1024 ); 2 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/symmetrichashjoin/hash_insert_lookup.cpp: -------------------------------------------------------------------------------- 1 | {{hashname}}.insert_lookup_iter_{{side}}<&{{global_syncname}}>({{keyval}}, {{keyname}}, [=]({{other_tuple_type}} {{valname}}) { 2 | join_coarse_result_count++; 3 | {{out_tuple_type}} {{out_tuple_name}} = {{append_func_name}}({{left_name}}, {{right_name}}); 4 | {{inner_plan_compiled}} 5 | }); 6 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/sync_declaration.cpp: -------------------------------------------------------------------------------- 1 | GlobalCompletionEvent {{global_syncname}}(true); 2 | -------------------------------------------------------------------------------- /raco/backends/radish/grappa_templates/wait_statement.cpp: -------------------------------------------------------------------------------- 1 | {{name}}.wait(); 2 | -------------------------------------------------------------------------------- /raco/backends/sparql/__init__.py: -------------------------------------------------------------------------------- 1 | # everything in sparql.py made public 2 | from sparql import * 3 | -------------------------------------------------------------------------------- /raco/backends/sql/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwescience/raco/1f2bedbef71bacf715340289f4973d85a3c1dc97/raco/backends/sql/__init__.py -------------------------------------------------------------------------------- /raco/backends/sql/test_case.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | import sqlalchemy 3 | import unittest 4 | 5 | import raco.algebra as algebra 6 | from raco.compile import optimize_by_rules 7 | from raco.backends.logical import OptLogicalAlgebra 8 | import raco.myrial.interpreter as interpreter 9 | import raco.myrial.parser as parser 10 | import raco.scheme as scheme 11 | from .catalog import SQLCatalog 12 | import raco.types as types 13 | 14 | 15 | class SQLTestCase(unittest.TestCase): 16 | """A base for testing the compilation of RACO programs to SQL queries""" 17 | 18 | emp_table = [ 19 | # id dept_id name salary 20 | (0, 1, "Hank Levy", 1000000, -1), 21 | (1, 2, "Bill Howe", 25000, 0), 22 | (2, 1, "Dan Halperin", 90000, 0), 23 | (3, 1, "Andrew Whitaker", 5000, 0), 24 | (4, 2, "Shumo Chu", 5000, 0), 25 | (5, 1, "Victor Almeida", 25000, 0), 26 | (6, 3, "Dan Suciu", 90000, 0), 27 | (7, 1, "Magdalena Balazinska", 25000, 0)] 28 | 29 | emp_schema = scheme.Scheme([("id", types.INT_TYPE), 30 | ("dept_id", types.INT_TYPE), 31 | ("name", types.STRING_TYPE), 32 | ("salary", types.LONG_TYPE), 33 | ("mgr_id", types.INT_TYPE)]) 34 | 35 | emp_key = "public:adhoc:employee" 36 | 37 | def setUp(self): 38 | # SQLAlchemy 39 | self.db = SQLCatalog(sqlalchemy. 40 | create_engine('sqlite:///:memory:', echo=True)) 41 | self.db.add_table(self.emp_key, self.emp_schema) 42 | self.db.add_tuples(self.emp_key, self.emp_schema, self.emp_table) 43 | # MyriaL 44 | self.parser = parser.Parser() 45 | self.processor = interpreter.StatementProcessor(self.db) 46 | 47 | def query_to_phys_plan(self, query, **kwargs): 48 | statements = self.parser.parse(query) 49 | self.processor.evaluate(statements) 50 | p = self.processor.get_logical_plan(**kwargs) 51 | p = optimize_by_rules(p, OptLogicalAlgebra.opt_rules()) 52 | if isinstance(p, (algebra.Store, algebra.StoreTemp)): 53 | p = p.input 54 | return p 55 | 56 | def execute(self, query, expected, **kwargs): 57 | p = self.query_to_phys_plan(query, **kwargs) 58 | ans = self.db.evaluate(p) 59 | self.assertEquals(expected, Counter(ans)) 60 | -------------------------------------------------------------------------------- /raco/catalog_tests/default_cardinality_relation.py: -------------------------------------------------------------------------------- 1 | {'A': [('a', 'DOUBLE_TYPE'), ('b', 'STRING_TYPE')], 2 | 'B': [('x', 'DOUBLE_TYPE'), ('y', 'STRING_TYPE'), ('z', 'LONG_TYPE')] 3 | } 4 | -------------------------------------------------------------------------------- /raco/catalog_tests/set_cardinality_relation.py: -------------------------------------------------------------------------------- 1 | {'A': [('b', 'STRING_TYPE')], 2 | 'B': [('x', 'DOUBLE_TYPE'), ('y', 'STRING_TYPE'), ('z', 'LONG_TYPE')], 3 | 'C': ([('a', 'DOUBLE_TYPE'), ('b', 'STRING_TYPE'), ('c', 'LONG_TYPE')], 12) 4 | } 5 | -------------------------------------------------------------------------------- /raco/clangtestdb.py: -------------------------------------------------------------------------------- 1 | 2 | from raco import relation_key 3 | from raco.catalog import Catalog 4 | from raco.algebra import DEFAULT_CARDINALITY 5 | import csv 6 | 7 | 8 | class ClangTestDatabase(Catalog): 9 | """ 10 | Interface for table metadata and ingest 11 | for raco.cpp query processor 12 | """ 13 | 14 | def __init__(self): 15 | # Map from relation keys to tuples of (Bag, scheme.Scheme) 16 | self.tables = {} 17 | 18 | def get_num_servers(self): 19 | return 1 20 | 21 | def num_tuples(self, rel_key): 22 | return DEFAULT_CARDINALITY 23 | 24 | def ingest(self, rel_key, contents, scheme): 25 | '''Directly load raw data into the database''' 26 | if isinstance(rel_key, basestring): 27 | rel_key = relation_key.RelationKey.from_string(rel_key) 28 | assert isinstance(rel_key, relation_key.RelationKey) 29 | 30 | with open(rel_key.relation, 'w') as writetable: 31 | writer = csv.writer(writetable, delimiter=' ') 32 | for tup in contents: 33 | writer.writerow(tup) 34 | 35 | self.tables[rel_key] = scheme 36 | 37 | def get_scheme(self, rel_key): 38 | if isinstance(rel_key, basestring): 39 | rel_key = relation_key.RelationKey.from_string(rel_key) 40 | 41 | assert isinstance(rel_key, relation_key.RelationKey) 42 | 43 | scheme = self.tables[rel_key] 44 | return scheme 45 | -------------------------------------------------------------------------------- /raco/clib/algorithms.h: -------------------------------------------------------------------------------- 1 | 2 | // function ptr syntax?? 3 | bool TwoPassSelect(condition(Tuple *), const Relation *input, Relation *output); 4 | 5 | bool HashJoin(const Attribute &leftattr, const Attribute &rightattr, const Relation *left, const Relation *right, Relation *output); 6 | 7 | bool Scan(string &name, const Catalog *catalog, Relation *output); 8 | -------------------------------------------------------------------------------- /raco/clib/boolean.cc: -------------------------------------------------------------------------------- 1 | #include "boolean.h" 2 | 3 | using namespace std; 4 | 5 | void BinaryExpression::PrintTo(ostream &os, int indent) { 6 | os << 7 | }; 8 | 9 | // AND, OR 10 | class BinaryBooleanExpression : public BooleanExpression { 11 | public: 12 | BinaryBooleanExpression(BooleanExpression &left, BooleanExpression &right); 13 | }; 14 | 15 | // attribute reference, literal 16 | class Value {}; 17 | 18 | // =, !=, <, >, <=, >= 19 | class BinaryBooleanOperator : public BooleanExpression { 20 | public: 21 | BinaryBooleanOperator(const Value &left, const Value &right) : left(left), right(right) {}; 22 | protected: 23 | const Value &left; 24 | const Value &right; 25 | }; 26 | 27 | template 28 | class Literal : public Value { 29 | public: 30 | Literal(T val) : value(val) {}; 31 | protected: 32 | T value; 33 | }; 34 | 35 | class Attribute : public Value { 36 | public: 37 | Attribute(string val) : value(val) {}; 38 | protected: 39 | string &value; 40 | }; 41 | 42 | class EQ : public BinaryBooleanOperator { 43 | public: 44 | EQ(const Value &left, const Value &right) : BinaryBooleanOperator(left, right) {}; 45 | }; 46 | 47 | -------------------------------------------------------------------------------- /raco/clib/boolean.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | using namespace std; 4 | 5 | template 6 | class BinaryOperator { 7 | public: 8 | BinaryOperator(const T &left, const T& right) : left(left), right(right) {}; 9 | virtual void PrintTo(ostream &os, int indent); 10 | protected: 11 | const T &left; 12 | const T &right; 13 | }; 14 | 15 | // all boolean expressions happen to be binary operators currently 16 | template 17 | class BooleanExpression : public BinaryOperator { 18 | BooleanExpression(const T &left, const T& right) : BinaryOperator(left, right) {}; 19 | }; 20 | 21 | // AND, OR 22 | class BinaryBooleanExpression : public BooleanExpression { 23 | public: 24 | BinaryBooleanExpression(BooleanExpression &left, BooleanExpression &right) : BooleanExpression(left, right) {}; 25 | }; 26 | 27 | // attribute reference or literal 28 | class Value {}; 29 | 30 | // =, !=, <, >, <=, >= 31 | class Comparator : public BooleanExpression { 32 | public: 33 | Comparator(const Value &left, const Value &right) : BooleanExpression(left, right) {}; 34 | }; 35 | 36 | template 37 | class Literal : public Value { 38 | public: 39 | Literal(T val) : value(val) {}; 40 | protected: 41 | T value; 42 | }; 43 | 44 | class Attribute : public Value { 45 | public: 46 | Attribute(string val) : value(val) {}; 47 | protected: 48 | string &value; 49 | }; 50 | 51 | class EQ : public BinaryBooleanOperator { 52 | public: 53 | EQ(const Value &left, const Value &right) : BinaryBooleanOperator(left, right) {}; 54 | }; 55 | 56 | -------------------------------------------------------------------------------- /raco/clib/testboolean.cc: -------------------------------------------------------------------------------- 1 | #include "boolean.h" 2 | #include 3 | 4 | using namespace std; 5 | 6 | int main(int argc, char **argv) { 7 | 8 | BooleanExpression cond = EQ(Attribute(string("X")), Literal(1)); 9 | 10 | cond.PrintTo(cond); 11 | //cout << cond << endl; 12 | } 13 | 14 | -------------------------------------------------------------------------------- /raco/clib/testboolean.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwescience/raco/1f2bedbef71bacf715340289f4973d85a3c1dc97/raco/clib/testboolean.o -------------------------------------------------------------------------------- /raco/cpp_datalog_utils.py: -------------------------------------------------------------------------------- 1 | from raco import RACompiler 2 | from raco.compile import compile 3 | 4 | from raco.backends.radish import GrappaSymmetricHashJoin 5 | from raco.backends.radish import GrappaShuffleHashJoin 6 | 7 | import raco.viz as viz 8 | 9 | import logging 10 | LOG = logging.getLogger(__name__) 11 | 12 | 13 | def comment(s): 14 | return "/*\n%s\n*/\n" % str(s) 15 | 16 | 17 | def hack_plan(alg, plan): 18 | # plan hacking 19 | newRule = None 20 | if plan == "sym": 21 | alg.set_join_type(GrappaSymmetricHashJoin) 22 | elif plan == "shuf": 23 | alg.set_join_type(GrappaShuffleHashJoin) 24 | 25 | 26 | def emitCode(query, name, algType, plan=None, emit_print=None, dir='.'): 27 | if emit_print is not None: 28 | alg = algType(emit_print) 29 | else: 30 | alg = algType() 31 | 32 | hack_plan(alg, plan) 33 | 34 | LOG.info("compiling %s: %s", name, query) 35 | 36 | # Create a compiler object 37 | dlog = RACompiler() 38 | 39 | # parse the query 40 | dlog.fromDatalog(query) 41 | # print dlog.parsed 42 | LOG.info("logical: %s", dlog.logicalplan) 43 | 44 | print dlog.logicalplan 45 | logical_dot = viz.operator_to_dot(dlog.logicalplan) 46 | with open("%s.logical.dot" % (name), 'w') as dwf: 47 | dwf.write(logical_dot) 48 | 49 | dlog.optimize(target=alg) 50 | 51 | LOG.info("physical: %s", dlog.physicalplan) 52 | 53 | print dlog.physicalplan 54 | physical_dot = viz.operator_to_dot(dlog.physicalplan) 55 | with open("%s.physical.dot" % (name), 'w') as dwf: 56 | dwf.write(physical_dot) 57 | 58 | # generate code in the target language 59 | code = "" 60 | code += comment("Query " + query) 61 | code += compile(dlog.physicalplan) 62 | 63 | fname = '{dir}/{name}.cpp'.format(dir=dir, name=name) 64 | with open(fname, 'w') as f: 65 | f.write(code) 66 | 67 | # returns name of code file 68 | return fname 69 | -------------------------------------------------------------------------------- /raco/datalog/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwescience/raco/1f2bedbef71bacf715340289f4973d85a3c1dc97/raco/datalog/__init__.py -------------------------------------------------------------------------------- /raco/datalog/datalog_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import json 3 | 4 | import raco.fakedb 5 | from raco import RACompiler 6 | from raco.backends.myria import (compile_to_json, 7 | MyriaLeftDeepTreeAlgebra, 8 | MyriaHyperCubeAlgebra) 9 | from raco.catalog import FakeCatalog 10 | 11 | 12 | class DatalogTestCase(unittest.TestCase): 13 | 14 | def setUp(self): 15 | self.db = raco.fakedb.FakeDatabase() 16 | 17 | def execute_query(self, query, test_logical=False, skip_json=False, 18 | output="OUTPUT", algebra=MyriaLeftDeepTreeAlgebra): 19 | """Run a test query against the fake database""" 20 | 21 | dlog = RACompiler() 22 | dlog.fromDatalog(query) 23 | 24 | assert algebra in [MyriaLeftDeepTreeAlgebra, 25 | MyriaHyperCubeAlgebra] 26 | 27 | if test_logical: 28 | plan = dlog.logicalplan 29 | else: 30 | if algebra == MyriaLeftDeepTreeAlgebra: 31 | dlog.optimize(MyriaLeftDeepTreeAlgebra()) 32 | else: 33 | dlog.optimize(MyriaHyperCubeAlgebra(FakeCatalog(64))) 34 | plan = dlog.physicalplan 35 | 36 | if not skip_json: 37 | # test whether we can generate json without errors 38 | json_string = json.dumps(compile_to_json( 39 | query, dlog.logicalplan, dlog.physicalplan, "datalog")) 40 | assert json_string 41 | 42 | self.db.evaluate(plan) 43 | return self.db.get_table(output) 44 | 45 | def check_result(self, query, expected, test_logical=False, 46 | skip_json=False, output="OUTPUT", 47 | algebra=MyriaLeftDeepTreeAlgebra): 48 | """Execute a test query with an expected output""" 49 | actual = self.execute_query(query, test_logical=test_logical, 50 | skip_json=skip_json, output=output, 51 | algebra=algebra) 52 | self.assertEquals(actual, expected) 53 | -------------------------------------------------------------------------------- /raco/datastructure/UnionFind.py: -------------------------------------------------------------------------------- 1 | """UnionFind.py 2 | 3 | Union-find data structure. Based on Josiah Carlson's code, 4 | http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/215912 5 | with significant additional changes by D. Eppstein. 6 | """ 7 | 8 | 9 | class UnionFind: 10 | 11 | """Union-find data structure. 12 | 13 | Each unionFind instance X maintains a family of disjoint sets of 14 | hashable objects, supporting the following two methods: 15 | 16 | - X[item] returns a name for the set containing the given item. 17 | Each set is named by an arbitrarily-chosen one of its members; as 18 | long as the set remains unchanged it will keep the same name. If 19 | the item is not yet part of a set in X, a new singleton set is 20 | created for it. 21 | 22 | - X.union(item1, item2, ...) merges the sets containing each item 23 | into a single larger set. If any item is not yet part of a set 24 | in X, it is added to X as one of the members of the merged set. 25 | """ 26 | 27 | def __init__(self): 28 | """Create a new empty union-find structure.""" 29 | self.weights = {} 30 | self.parents = {} 31 | 32 | def __getitem__(self, object): 33 | if object not in self.parents: 34 | raise KeyError(object) 35 | 36 | # find path of objects leading to the root 37 | path = [object] 38 | root = self.parents[object] 39 | while root != path[-1]: 40 | path.append(root) 41 | root = self.parents[root] 42 | 43 | # compress the path and return 44 | for ancestor in path: 45 | self.parents[ancestor] = root 46 | return root 47 | 48 | def get_or_insert(self, object): 49 | """Find and return the name of the set containing the object.""" 50 | 51 | # check for previously unknown object 52 | if object not in self.parents: 53 | self.parents[object] = object 54 | self.weights[object] = 1 55 | return object 56 | 57 | return self[object] 58 | 59 | def __iter__(self): 60 | """Iterate through all items ever found or unioned by this structure. 61 | """ 62 | return iter(self.parents) 63 | 64 | def union(self, *objects): 65 | """Find the sets containing the objects and merge them all.""" 66 | roots = [self.get_or_insert(x) for x in objects] 67 | heaviest = max([(self.weights[r], r) for r in roots])[1] 68 | for r in roots: 69 | if r != heaviest: 70 | self.weights[heaviest] += self.weights[r] 71 | self.parents[r] = heaviest 72 | -------------------------------------------------------------------------------- /raco/datastructure/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwescience/raco/1f2bedbef71bacf715340289f4973d85a3c1dc97/raco/datastructure/__init__.py -------------------------------------------------------------------------------- /raco/datastructure/test_union_find.py: -------------------------------------------------------------------------------- 1 | from UnionFind import UnionFind 2 | import copy 3 | import unittest 4 | 5 | 6 | class TestUnionFind(unittest.TestCase): 7 | 8 | uf = UnionFind() 9 | uf.get_or_insert(1) 10 | uf.get_or_insert(10) 11 | uf.get_or_insert(2) 12 | uf.get_or_insert(5) 13 | 14 | def test_insert_or_get(self): 15 | uf = copy.deepcopy(self.uf) 16 | self.assertIn(1, uf) 17 | self.assertIn(10, uf) 18 | self.assertIn(2, uf) 19 | self.assertIn(5, uf) 20 | self.assertEqual(uf.get_or_insert(1), 1) 21 | self.assertEqual(uf.get_or_insert(5), 5) 22 | self.assertEqual(uf.get_or_insert(10), 10) 23 | self.assertEqual(uf.get_or_insert(2), 2) 24 | self.assertEqual(uf.get_or_insert(30), 30) 25 | self.assertEqual(uf.get_or_insert(52), 52) 26 | 27 | def test_get(self): 28 | uf = self.uf 29 | self.assertEqual(uf[1], 1) 30 | self.assertEqual(uf[10], 10) 31 | 32 | def test_get_error(self): 33 | with self.assertRaises(Exception): 34 | _ = self.uf[52] 35 | with self.assertRaises(Exception): 36 | _ = self.uf[30] 37 | 38 | def test_union(self): 39 | uf = copy.deepcopy(self.uf) 40 | uf.union(1, 10) 41 | self.assertEqual(uf.get_or_insert(1), uf.get_or_insert(10)) 42 | uf.union(2, 5) 43 | self.assertEqual(uf.get_or_insert(2), uf.get_or_insert(5)) 44 | uf.union(2, 1) 45 | self.assertEqual(uf.get_or_insert(10), uf.get_or_insert(5)) 46 | -------------------------------------------------------------------------------- /raco/expression/__init__.py: -------------------------------------------------------------------------------- 1 | from .expression import * 2 | from .aggregate import * 3 | from .boolean import * 4 | from .function import * 5 | from .util import * 6 | from .statevar import * 7 | -------------------------------------------------------------------------------- /raco/expression/statevar.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | # This type represents a state variable, as used by StatefulApply and UDAs 4 | StateVar = collections.namedtuple( 5 | 'StateVar', ['name', 'init_expr', 'update_expr']) 6 | -------------------------------------------------------------------------------- /raco/expression/udf.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | # A user-defined function 4 | Function = collections.namedtuple('Function', ['args', 'sexpr']) 5 | 6 | # A user-defined stateful apply or UDA 7 | StatefulFunc = collections.namedtuple( 8 | 'StatefulFunc', ['args', 'statemods', "sexpr"]) 9 | -------------------------------------------------------------------------------- /raco/fake_data.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import raco.scheme as scheme 3 | import raco.types as types 4 | 5 | """This class contains fake data used by several unit tests.""" 6 | 7 | 8 | class FakeData(object): 9 | emp_table = collections.Counter([ 10 | # id dept_id name salary 11 | (1, 2, "Bill Howe", 25000), 12 | (2, 1, "Dan Halperin", 90000), 13 | (3, 1, "Andrew Whitaker", 5000), 14 | (4, 2, "Shumo Chu", 5000), 15 | (5, 1, "Victor Almeida", 25000), 16 | (6, 3, "Dan Suciu", 90000), 17 | (7, 1, "Magdalena Balazinska", 25000)]) 18 | 19 | emp_schema = scheme.Scheme([("id", types.INT_TYPE), 20 | ("dept_id", types.INT_TYPE), 21 | ("name", types.STRING_TYPE), 22 | ("salary", types.LONG_TYPE)]) 23 | 24 | emp_key = "public:adhoc:employee" 25 | 26 | dept_table = collections.Counter([ 27 | (1, "accounting", 5), 28 | (2, "human resources", 2), 29 | (3, "engineering", 2), 30 | (4, "sales", 7)]) 31 | 32 | dept_schema = scheme.Scheme([("id", types.LONG_TYPE), 33 | ("name", types.STRING_TYPE), 34 | ("manager", types.LONG_TYPE)]) 35 | 36 | dept_key = "public:adhoc:department" 37 | 38 | numbers_table = collections.Counter([ 39 | (1, 3), 40 | (2, 5), 41 | (3, -2), 42 | (16, -4.3)]) 43 | 44 | numbers_schema = scheme.Scheme([("id", types.LONG_TYPE), 45 | ("val", types.DOUBLE_TYPE)]) 46 | 47 | numbers_key = "public:adhoc:numbers" 48 | 49 | test_function = ("test", "function_text", 1, 50 | "id (INT_TYPE), dept_id (INT_TYPE)", 51 | "INT_TYPE", "test_body") 52 | -------------------------------------------------------------------------------- /raco/from_repr.py: -------------------------------------------------------------------------------- 1 | # import all the expressions and algebras 2 | 3 | from raco.backends.myria import * 4 | from raco.backends.cpp import * 5 | from raco.backends.cpp.cppcommon import * 6 | from raco.backends.radish import * 7 | from raco.algebra import * 8 | from raco.scheme import * 9 | from raco.expression.expression import * 10 | from raco.expression.aggregate import * 11 | from raco.types import * 12 | from raco.relation_key import * 13 | from raco.expression.boolean import * 14 | 15 | 16 | import logging 17 | logging.basicConfig() 18 | _LOG = logging.getLogger(name=__name__) 19 | 20 | 21 | def plan_from_repr(repr_string): 22 | _LOG.warning("Relying on eval! " 23 | "This module should only be used in " 24 | "trusted development situations\n") 25 | return eval(repr_string) 26 | -------------------------------------------------------------------------------- /raco/myrial/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwescience/raco/1f2bedbef71bacf715340289f4973d85a3c1dc97/raco/myrial/__init__.py -------------------------------------------------------------------------------- /raco/myrial/cli_test.py: -------------------------------------------------------------------------------- 1 | """Basic test of the command-line interface to Myrial.""" 2 | 3 | import subprocess 4 | import unittest 5 | 6 | 7 | class CliTest(unittest.TestCase): 8 | 9 | def test_cli(self): 10 | out = subprocess.check_output(['python', 'scripts/myrial', 11 | 'examples/reachable.myl']) 12 | self.assertIn('DO', out) 13 | self.assertIn('WHILE', out) 14 | 15 | def test_cli_standalone_execute(self): 16 | out = subprocess.check_output(['python', 'scripts/myrial', '-f', 17 | 'examples/standalone.myl']) 18 | self.assertIn('Dan Suciu,engineering', out) 19 | 20 | def test_cli_standalone_json(self): 21 | out = subprocess.check_output(['python', 'scripts/myrial', '-j', 22 | 'examples/cast.myl']) 23 | self.assertIn('rawQuery', out) 24 | 25 | def test_cli_standalone_logical(self): 26 | out = subprocess.check_output(['python', 'scripts/myrial', '-l', 27 | 'examples/standalone.myl']) 28 | self.assertIn("CrossProduct[FileScan", out) 29 | 30 | def test_cli_standalone_repr(self): 31 | out = subprocess.check_output(['python', 'scripts/myrial', '-r', 32 | 'examples/standalone.myl']) 33 | self.assertIn("FileScan('./examples/dept.csv'", out) 34 | 35 | def test_cli_reserved_column_name(self): 36 | proc = subprocess.Popen( 37 | ['python', 'scripts/myrial', 'examples/bad_column_name.myl'], 38 | stdout=subprocess.PIPE) 39 | out = proc.communicate()[0] 40 | self.assertIn('The token "SafeDiv" on line 2 is reserved', out) 41 | -------------------------------------------------------------------------------- /raco/myrial/empty_aggregate_tests.py: -------------------------------------------------------------------------------- 1 | """Test of aggregations over empty relations. 2 | 3 | Aggregation queries without grouping should return sensible default values: 4 | COUNT(empty) == 0 5 | SUM(empty) == 0 6 | """ 7 | 8 | import collections 9 | 10 | import raco.myrial.myrial_test as myrial_test 11 | 12 | 13 | class EmptyAggregateTests(myrial_test.MyrialTestCase): 14 | 15 | def setUp(self): 16 | super(EmptyAggregateTests, self).setUp() 17 | 18 | def test_count(self): 19 | query = """ 20 | W = EMPTY(v:int); 21 | X = [FROM W EMIT COUNT(v)]; 22 | STORE(X, OUTPUT); 23 | """ 24 | 25 | self.check_result(query, collections.Counter([(0,)])) 26 | 27 | def test_sum(self): 28 | query = """ 29 | W = EMPTY(v:int); 30 | X = [FROM W EMIT SUM(v)]; 31 | STORE(X, OUTPUT); 32 | """ 33 | 34 | self.check_result(query, collections.Counter([(0,)])) 35 | -------------------------------------------------------------------------------- /raco/myrial/filescan_tests.py: -------------------------------------------------------------------------------- 1 | 2 | import collections 3 | 4 | import raco.algebra 5 | import raco.scheme as scheme 6 | import raco.myrial.myrial_test as myrial_test 7 | from raco import types 8 | 9 | 10 | class FileScanTest(myrial_test.MyrialTestCase): 11 | 12 | def test_filescan(self): 13 | query = """ 14 | x = load("examples/load_options.csv", 15 | csv( 16 | schema(column0:int, column1:string, column2:string, column3:float), 17 | delimiter="|", quote="~", escape="%", skip=2)); 18 | store(x, OUTPUT); 19 | """ 20 | expected = collections.Counter([ 21 | (1, "foo", "abc|def", 1.0), 22 | (2, "bar", "ghi|jkl", 2.0), 23 | ]) 24 | self.check_result(query, expected) 25 | -------------------------------------------------------------------------------- /raco/myrial/keywords.py: -------------------------------------------------------------------------------- 1 | """Emit all Myrial/SQL keywords as lowercase strings.""" 2 | 3 | from raco.myrial.scanner import (builtins, keywords, 4 | types, comprehension_keywords, 5 | word_operators) 6 | from raco.expression.expressions_library import EXPRESSIONS 7 | 8 | 9 | def get_keywords(): 10 | """Return a list of Myrial/SQL keywords. 11 | 12 | This includes reserved lex tokens and system-defined functions. 13 | """ 14 | return { 15 | 'builtins': sorted( 16 | EXPRESSIONS.keys() + [kw.lower() for kw in builtins]), 17 | 'keywords': sorted(kw.lower() for kw in keywords), 18 | 'types': sorted(kw.lower() for kw in types), 19 | 'comprehension_keywords': sorted( 20 | kw.lower() for kw in comprehension_keywords), 21 | 'word_operators': sorted(kw.lower() for kw in word_operators), 22 | } 23 | -------------------------------------------------------------------------------- /raco/myrial/kmeans_test.py: -------------------------------------------------------------------------------- 1 | """Unit test of kmeans. 2 | 3 | TODO: implement a clustering algorithm that is less sensitive to the 4 | initial cluster selection. We can't verify the output because this algorithm 5 | chooses initial clusters in a non-robust way. 6 | """ 7 | 8 | import collections 9 | 10 | import raco.scheme as scheme 11 | import raco.myrial.myrial_test as myrial_test 12 | from raco import types 13 | 14 | 15 | class KmeansTest(myrial_test.MyrialTestCase): 16 | points = [(1, 1.0, 1.0), (2, .99, .99), (3, 1.01, 1.01), (4, 10.0, 10.0), 17 | (5, 10.99, 10.99), (6, 10.01, 10.01), (7, 100.0, 100.0), 18 | (8, 100.99, 100.99), (9, 100.01, 100.01)] 19 | points_table = collections.Counter(points) 20 | 21 | points_schema = scheme.Scheme([('id', types.LONG_TYPE), 22 | ('x', types.DOUBLE_TYPE), 23 | ('y', types.DOUBLE_TYPE)]) 24 | points_key = "public:adhoc:points" 25 | 26 | def setUp(self): 27 | super(KmeansTest, self).setUp() 28 | 29 | self.db.ingest(KmeansTest.points_key, 30 | KmeansTest.points_table, 31 | KmeansTest.points_schema) 32 | 33 | def test_kmeans(self): 34 | with open('examples/kmeans.myl') as fh: 35 | query = fh.read() 36 | self.execute_query(query, skip_json=True) 37 | -------------------------------------------------------------------------------- /raco/myrial/pagerank_test.py: -------------------------------------------------------------------------------- 1 | """PageRank unit test. 2 | 3 | Example data taken from: 4 | http://select.cs.cmu.edu/code/graphlab/doxygen/html/pagerank_example.html 5 | """ 6 | 7 | import collections 8 | 9 | import raco.scheme as scheme 10 | import raco.myrial.myrial_test as myrial_test 11 | from raco import types 12 | 13 | 14 | class PageRankTest(myrial_test.MyrialTestCase): 15 | 16 | edge_table = collections.Counter([ 17 | (0, 3), 18 | (1, 0), 19 | (1, 2), 20 | (2, 0), 21 | (2, 1), 22 | (2, 3), 23 | (3, 0), 24 | (3, 1), 25 | (3, 2), 26 | (3, 4), 27 | (4, 0), 28 | (4, 1), 29 | (4, 2), 30 | (4, 3), 31 | (4, 4)]) 32 | 33 | edge_schema = scheme.Scheme([("src", types.LONG_TYPE), 34 | ("dst", types.LONG_TYPE)]) 35 | edge_key = "public:adhoc:edges" 36 | 37 | vertex_table = collections.Counter([(x,) for x in range(5)]) 38 | vertex_key = "public:adhoc:vertices" 39 | vertex_schema = scheme.Scheme([("id", types.LONG_TYPE)]) 40 | 41 | def setUp(self): 42 | super(PageRankTest, self).setUp() 43 | 44 | self.db.ingest(PageRankTest.edge_key, 45 | PageRankTest.edge_table, 46 | PageRankTest.edge_schema) 47 | 48 | self.db.ingest(PageRankTest.vertex_key, 49 | PageRankTest.vertex_table, 50 | PageRankTest.vertex_schema) 51 | 52 | def __do_test(self, phile): 53 | with open(phile) as fh: 54 | query = fh.read() 55 | 56 | result = self.execute_query(query) 57 | d = dict(result.elements()) 58 | 59 | self.assertAlmostEqual(d[0], 0.23576110832410296) 60 | self.assertAlmostEqual(d[1], 0.16544845649781043) 61 | self.assertAlmostEqual(d[2], 0.18370688939571236) 62 | self.assertAlmostEqual(d[3], 0.3016893082129546) 63 | self.assertAlmostEqual(d[4], 0.11339423756941983) 64 | 65 | def test_pagerank(self): 66 | self.__do_test('examples/pagerank.myl') 67 | 68 | def verify_undefined(self, var): 69 | with self.assertRaises(KeyError): 70 | self.db.get_temp_table(var) 71 | 72 | def test_pagerank_deadcode(self): 73 | """Test of page rank with numerous dead code statements.""" 74 | self.__do_test('examples/pagerank_dead.myl') 75 | 76 | # Verify that D0, D1 tables are compiled out 77 | self.verify_undefined("D0") 78 | self.verify_undefined("D1") 79 | -------------------------------------------------------------------------------- /raco/myrial/reachable_tests.py: -------------------------------------------------------------------------------- 1 | 2 | import collections 3 | 4 | import raco.algebra 5 | import raco.scheme as scheme 6 | import raco.myrial.myrial_test as myrial_test 7 | from raco import types 8 | 9 | 10 | class ReachableTest(myrial_test.MyrialTestCase): 11 | 12 | edge_table = collections.Counter([ 13 | (1, 2), 14 | (2, 3), 15 | (3, 4), 16 | (4, 3), 17 | (3, 5), 18 | (4, 13), 19 | (5, 4), 20 | (1, 9), 21 | (7, 1), 22 | (6, 1), 23 | (10, 11), 24 | (11, 12), 25 | (12, 10), 26 | (13, 4), 27 | (10, 1)]) 28 | 29 | edge_schema = scheme.Scheme([("src", types.LONG_TYPE), 30 | ("dst", types.LONG_TYPE)]) 31 | edge_key = "public:adhoc:edges" 32 | 33 | def setUp(self): 34 | super(ReachableTest, self).setUp() 35 | 36 | self.db.ingest(ReachableTest.edge_key, 37 | ReachableTest.edge_table, 38 | ReachableTest.edge_schema) 39 | 40 | def test_reachable(self): 41 | with open('examples/reachable.myl') as fh: 42 | query = fh.read() 43 | 44 | expected = collections.Counter([ 45 | (1,), 46 | (2,), 47 | (3,), 48 | (4,), 49 | (5,), 50 | (9,), 51 | (13,), 52 | ]) 53 | 54 | self.check_result(query, expected, skip_json=True) 55 | 56 | def test_multi_condition_join(self): 57 | query = """ 58 | Edge = SCAN(public:adhoc:edges); 59 | Symmetric = [FROM Edge AS E1, Edge AS E2 60 | WHERE E1.src==E2.dst 61 | AND E2.src==E1.dst 62 | AND E1.src < E1.dst 63 | EMIT E1.src AS src, E1.dst AS dst]; 64 | STORE(Symmetric, OUTPUT); 65 | """ 66 | table = ReachableTest.edge_table 67 | expected = collections.Counter( 68 | [(a, b) for (a, b) in table for (c, d) in table 69 | if a == d and b == c and a < b]) 70 | self.check_result(query, expected) 71 | 72 | def test_cross_plus_selection_becomes_join(self): 73 | """Test that the optimizer compiles away cross-products.""" 74 | with open('examples/reachable.myl') as fh: 75 | query = fh.read() 76 | 77 | def plan_contains_cross(plan): 78 | def f(op): 79 | if isinstance(op, raco.algebra.CrossProduct) and not \ 80 | isinstance(op.left, raco.algebra.SingletonRelation): 81 | yield True 82 | 83 | return any(plan.postorder(f)) 84 | 85 | statements = self.parser.parse(query) 86 | self.processor.evaluate(statements) 87 | 88 | lp = self.processor.get_logical_plan() 89 | self.assertTrue(plan_contains_cross(lp)) 90 | 91 | pp = self.processor.get_physical_plan() 92 | self.assertFalse(plan_contains_cross(pp)) 93 | -------------------------------------------------------------------------------- /raco/myrial/sample_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import raco.myrial.myrial_test as myrial_test 3 | from raco.fake_data import FakeData 4 | 5 | 6 | class TestSamplingOperations(myrial_test.MyrialTestCase, FakeData): 7 | def setUp(self): 8 | super(TestSamplingOperations, self).setUp() 9 | 10 | self.db.ingest(TestSamplingOperations.emp_key, 11 | TestSamplingOperations.emp_table, 12 | TestSamplingOperations.emp_schema) 13 | 14 | def run_samplescan(self, sample_size, sample_type, is_pct=False): 15 | pct = '%' if is_pct else '' 16 | query = """ 17 | emp = SAMPLESCAN({rel_key}, {size}{pct}, {type}); 18 | STORE(emp, OUTPUT); 19 | """.format(rel_key=self.emp_key, size=sample_size, pct=pct, 20 | type=sample_type) 21 | 22 | res = self.execute_query(query) 23 | if is_pct: 24 | expected_len = int(round(len(res) * (sample_size / 100.0))) 25 | else: 26 | expected_len = sample_size 27 | self.assertEquals(len(res), expected_len) 28 | 29 | def test_samplescan__wr_zero(self): 30 | self.run_samplescan(0, 'WR') 31 | 32 | def test_samplescan__wr_zero_pct(self): 33 | self.run_samplescan(0, 'WR', True) 34 | 35 | def test_samplescan__wor_zero(self): 36 | self.run_samplescan(0, 'WoR') 37 | 38 | def test_samplescan__wor_zero_pct(self): 39 | self.run_samplescan(0, 'WoR', True) 40 | 41 | def test_samplescan__wr_one(self): 42 | self.run_samplescan(1, 'WR') 43 | 44 | def test_samplescan__wor_one(self): 45 | self.run_samplescan(1, 'WoR') 46 | 47 | def test_samplescan__wr_all(self): 48 | self.run_samplescan(len(self.emp_table), 'WR') 49 | 50 | def test_samplescan__wr_100_pct(self): 51 | self.run_samplescan(100, 'WR', True) 52 | 53 | def test_samplescan__wor_all(self): 54 | self.run_samplescan(len(self.emp_table), 'WoR') 55 | 56 | def test_samplescan__wor_100_pct(self): 57 | self.run_samplescan(100, 'WoR', True) 58 | -------------------------------------------------------------------------------- /raco/myrial/sigma_clipping_test.py: -------------------------------------------------------------------------------- 1 | 2 | import collections 3 | 4 | import raco.scheme as scheme 5 | import raco.myrial.myrial_test as myrial_test 6 | from raco import types 7 | 8 | 9 | class SigmaClippingTest(myrial_test.MyrialTestCase): 10 | points = [25.0, 27.2, 23.4, 25.1, 26.3, 24.9, 23.5, 22.7, 108.2, 11 | 26.2, 25.3, 24.7, 25.01, 26.1, 22.8, 2.2, 24.8, 25.05, 25.15] 12 | points_tuples = [(i, x) for i, x in enumerate(points)] 13 | points_table = collections.Counter(points_tuples) 14 | 15 | points_schema = scheme.Scheme([('id', types.LONG_TYPE), ('v', types.DOUBLE_TYPE)]) # noqa 16 | points_key = "public:adhoc:sc_points" 17 | 18 | def setUp(self): 19 | super(SigmaClippingTest, self).setUp() 20 | 21 | self.db.ingest(SigmaClippingTest.points_key, 22 | SigmaClippingTest.points_table, 23 | SigmaClippingTest.points_schema) 24 | 25 | # TODO: Better support for empty relations in the language 26 | self.db.ingest("empty", collections.Counter(), 27 | SigmaClippingTest.points_schema) 28 | 29 | def run_it(self, query): 30 | points = [(i, x) for i, x in self.points_tuples if x < 28 and x > 22] 31 | expected = collections.Counter(points) 32 | self.check_result(query, expected, output='sc_points_clipped') 33 | 34 | def test_v0(self): 35 | with open('examples/sigma-clipping-v0.myl') as fh: 36 | query = fh.read() 37 | self.run_it(query) 38 | 39 | def test_v2(self): 40 | with open('examples/sigma-clipping.myl') as fh: 41 | query = fh.read() 42 | self.run_it(query) 43 | -------------------------------------------------------------------------------- /raco/python/__init__.py: -------------------------------------------------------------------------------- 1 | from convert import convert 2 | -------------------------------------------------------------------------------- /raco/python/convert.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ Functions to convert Python functions to RACO expressions """ 3 | 4 | import ast 5 | 6 | from raco.python.exceptions import PythonSyntaxException, \ 7 | PythonConvertException 8 | from raco.python.util import visitor 9 | from raco.python.util.decompile import get_source 10 | 11 | 12 | def convert(source_or_ast_or_callable, schema, udfs=None): 13 | """ 14 | Convert a Python function into its RACO equivalent 15 | :param source_or_ast_or_callable: Source string, callable, or AST node 16 | :param schema: List of schema for the input parameter(s) 17 | :param udfs: List of (name, arity) pairs of UDFs 18 | :return: RACO expression representing the source, callable, or AST node 19 | """ 20 | if isinstance(source_or_ast_or_callable, basestring): 21 | try: 22 | return convert(ast.parse(source_or_ast_or_callable), schema, udfs) 23 | except SyntaxError as e: 24 | raise PythonSyntaxException(e.msg, e.lineno, e.offset) 25 | elif callable(source_or_ast_or_callable): 26 | return convert(get_source(source_or_ast_or_callable), schema, udfs) 27 | elif isinstance(source_or_ast_or_callable, ast.AST): 28 | return visitor.ExpressionVisitor(schema or [], udfs or []).visit( 29 | source_or_ast_or_callable) or None 30 | else: 31 | raise PythonConvertException( 32 | 'Argument was not a source string, callable, or AST node') 33 | -------------------------------------------------------------------------------- /raco/python/exceptions.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ Exceptions that occur during Python->RACO conversion """ 3 | 4 | 5 | class PythonConvertException(Exception): 6 | """ Base class for conversion exceptions """ 7 | pass 8 | 9 | 10 | class PythonTokenException(PythonConvertException): 11 | """ Base exception class for errors associated with a specific token """ 12 | def __init__(self, token, line, column): 13 | self.token = token 14 | self.line = line 15 | self.column = column 16 | 17 | 18 | class PythonUnrecognizedTokenException(PythonTokenException): 19 | """ Error occurring when an unrecognized token is encountered """ 20 | def __str__(self): 21 | return 'Conversion error at token %s on line %d:%d' % \ 22 | (self.token, self.line, self.column) 23 | 24 | 25 | class PythonOutOfRangeException(PythonUnrecognizedTokenException): 26 | """ Error occurring when a slice is out of range""" 27 | def __str__(self): 28 | return 'Slice out of range error near token %s on line %d:%d' % \ 29 | (self.token, self.line, self.column) 30 | 31 | 32 | class PythonSyntaxException(PythonConvertException): 33 | """ Error occurring when a Python source string contains a syntax error """ 34 | def __init__(self, message, line, column): 35 | self.token = message 36 | self.line = line 37 | self.column = column 38 | 39 | def __str__(self): 40 | return 'Syntax error: %s (%d%s)' % \ 41 | (self.token, self.line, 42 | ':' + str(self.column) if self.column else '') 43 | 44 | 45 | class PythonUnsupportedOperationException(PythonSyntaxException): 46 | """ Error occurring when an unsupported operation is detected """ 47 | def __str__(self): 48 | return 'Unsupported operation: %s (%d%s)' % \ 49 | (self.token, self.line, 50 | ':' + str(self.column) if self.column else '') 51 | 52 | 53 | class PythonArgumentException(PythonSyntaxException): 54 | """ Error occurring when a problem with an argument is detected """ 55 | def __str__(self): 56 | return '%s (%d%s)' % \ 57 | (self.token, self.line, 58 | ':' + str(self.column) if self.column else '') 59 | -------------------------------------------------------------------------------- /raco/python/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwescience/raco/1f2bedbef71bacf715340289f4973d85a3c1dc97/raco/python/tests/__init__.py -------------------------------------------------------------------------------- /raco/python/tests/convert_tests.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | """ Tests for the 'convert' entry-point function """ 4 | import ast 5 | import unittest 6 | 7 | from raco.expression import NumericLiteral 8 | from raco.python import convert 9 | 10 | 11 | class TestConvert(unittest.TestCase): 12 | def test_string(self): 13 | f = "lambda: 0" 14 | e = convert(f, None) 15 | self.assertEqual(e, NumericLiteral(0)) 16 | 17 | def test_ast(self): 18 | t = ast.parse("lambda: 0") 19 | e = convert(t, None) 20 | self.assertEqual(e, NumericLiteral(0)) 21 | 22 | def test_lambda(self): 23 | f = lambda: 0 24 | e = convert(f, None) 25 | self.assertEqual(e, NumericLiteral(0)) 26 | 27 | def test_function(self): 28 | def f(): 29 | return 0 30 | e = convert(f, None) 31 | self.assertEqual(e, NumericLiteral(0)) 32 | -------------------------------------------------------------------------------- /raco/python/tests/decompile_lambda_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | """ Tests for decompiling lambdas """ 4 | 5 | import unittest 6 | 7 | from raco.python.exceptions import PythonConvertException 8 | from raco.python.util import decompile 9 | 10 | 11 | class TestDecompileLambdas(unittest.TestCase): 12 | def test_simple(self): 13 | s = decompile.get_source(lambda: 0) 14 | self.assertEquals(eval(s)(), 0) 15 | 16 | def test_no_space(self): 17 | s = decompile.get_source(lambda:0) 18 | self.assertEquals(eval(s)(), 0) 19 | 20 | def test_space_after_colon(self): 21 | s = decompile.get_source(lambda :0) 22 | self.assertEquals(eval(s)(), 0) 23 | 24 | def test_variable(self): 25 | f = lambda: 0 26 | s = decompile.get_source(f) 27 | self.assertEquals(eval(s)(), f()) 28 | 29 | def test_newline(self): 30 | f = \ 31 | lambda: 0 32 | s = decompile.get_source(f) 33 | self.assertEquals(eval(s)(), f()) 34 | 35 | def test_newline2(self): 36 | f = lambda: \ 37 | 0 38 | s = decompile.get_source(f) 39 | self.assertEquals(eval(s)(), f()) 40 | 41 | def test_tuple(self): 42 | t = (lambda: 0), 5 43 | s = decompile.get_source(t[0]) 44 | self.assertEquals(eval(s)(), t[0]()) 45 | 46 | def test_tuple2(self): 47 | f = lambda : (0, 5) 48 | s = decompile.get_source(f) 49 | self.assertEquals(eval(s)(), f()) 50 | 51 | def test_multiple_lambdas(self): 52 | t = lambda: 1, lambda: 2, lambda: 3 53 | for f in t: 54 | s = decompile.get_source(f) 55 | self.assertEquals(eval(s)(), f()) 56 | 57 | def test_embedded_lambda_token(self): 58 | f = lambda: "lambda: 0" 59 | s = decompile.get_source(f) 60 | self.assertEquals(eval(s)(), f()) 61 | 62 | def test_parameters(self): 63 | f = lambda x: x 64 | s = decompile.get_source(f) 65 | self.assertEquals(eval(s)(5), f(5)) 66 | 67 | def test_multiple_parameters(self): 68 | f = lambda x, y: x + y 69 | s = decompile.get_source(f) 70 | self.assertEquals(eval(s)(5, 6), f(5, 6)) 71 | 72 | def test_args_kwargs(self): 73 | f = lambda *args, **kwargs: args[0] + kwargs['foo'] 74 | s = decompile.get_source(f) 75 | self.assertEqual(eval(s)(5, foo=6), f(5, foo=6)) 76 | 77 | def test_unpacking(self): 78 | """ Unpacking is not currently supported """ 79 | f = lambda (x, y): x + y 80 | self.assertRaises(PythonConvertException, 81 | lambda: decompile.get_source(f)) 82 | -------------------------------------------------------------------------------- /raco/python/tests/projection_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ Tests for projection expressions """ 3 | 4 | from collections import Counter 5 | from raco.python import convert 6 | from raco.algebra import Apply 7 | from python_test import PythonTestCase 8 | 9 | 10 | class TestProjection(PythonTestCase): 11 | def _execute_projection(self, query, expected): 12 | projection = convert(query, [self.schema]) 13 | self.assertIsNotNone(projection) 14 | 15 | expression = Apply([('out', projection)], self.scan) 16 | plan = self.get_query(expression) 17 | return self.check_result(plan, expected) 18 | 19 | def test_name(self): 20 | self._execute_projection("""lambda t: t.name""", 21 | Counter([(t[2],) for t in self.emp_table])) 22 | 23 | def test_expression(self): 24 | self._execute_projection("""lambda t: t.id + 1""", 25 | Counter([(t[0] + 1,) for t 26 | in self.emp_table])) 27 | 28 | def test_index(self): 29 | self._execute_projection("""lambda t: t[2]""", 30 | Counter([(t[2],) for t in self.emp_table])) 31 | -------------------------------------------------------------------------------- /raco/python/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwescience/raco/1f2bedbef71bacf715340289f4973d85a3c1dc97/raco/python/util/__init__.py -------------------------------------------------------------------------------- /raco/relation_key.py: -------------------------------------------------------------------------------- 1 | """Representation of a Myria relation key. 2 | 3 | Myria relations are identified by a tuple of user, program, relation_name.""" 4 | 5 | 6 | class RelationKey(object): 7 | def __init__(self, *args): 8 | if len(args) == 1: 9 | self.user = "public" 10 | self.program = "adhoc" 11 | self.relation = args[0] 12 | else: 13 | self.user, self.program, self.relation = args 14 | assert self.user and isinstance(self.user, basestring) 15 | assert self.program and isinstance(self.program, basestring) 16 | assert self.relation and isinstance(self.relation, basestring) 17 | 18 | def __repr__(self): 19 | return 'RelationKey(%r,%r,%r)' % (self.user, self.program, 20 | self.relation) 21 | 22 | def __str__(self): 23 | return '%s:%s:%s' % (self.user, self.program, self.relation) 24 | 25 | def __eq__(self, other): 26 | return self.__dict__ == other.__dict__ 27 | 28 | def __hash__(self): 29 | return hash(str(self)) 30 | 31 | @classmethod 32 | def from_string(cls, s): 33 | """Create a RelationKey from a colon-delimited string.""" 34 | toks = s.split(':') 35 | assert len(toks) <= 3 36 | 37 | return cls(*toks) 38 | -------------------------------------------------------------------------------- /raco/replace_with_repr.py: -------------------------------------------------------------------------------- 1 | # These imports are required here -- for eval inside replace_with_repr 2 | from raco.expression import * 3 | from raco.algebra import * 4 | from raco.relation_key import * 5 | from raco.scheme import * 6 | from raco.backends.myria import * 7 | from raco.backends.cpp import * 8 | from raco.backends.radish import * 9 | from raco.backends.sparql import * 10 | 11 | # NOTES: relying on import * for eval is error prone due 12 | # to namespace collisions 13 | # NOTES: what to do if a operator has two constructors? 14 | 15 | 16 | def replace_with_repr(plan): 17 | r = repr(plan) 18 | try: 19 | return eval(r) 20 | except (TypeError, AttributeError, SyntaxError): 21 | print 'Error with repr {r} of plan {p}'.format(r=r, p=plan) 22 | raise 23 | -------------------------------------------------------------------------------- /raco/representation.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class RepresentationProperties(object): 4 | 5 | def __init__( 6 | self, 7 | hash_partitioned=tuple(), 8 | sorted=None, 9 | grouped=None, 10 | broadcasted=False): 11 | """ 12 | @param hash_partitioned: None or set of AttributeRefs in hash key 13 | @param sorted: None or list of (AttributeRefs, ASC/DESC) in sort order 14 | @param grouped: None or list of AttributeRefs to group by 15 | 16 | None means that no knowledge about the interesting property is 17 | known 18 | """ 19 | 20 | # TODO: make it a set of sets, representing a conjunction of hashes 21 | # TODO: for example, after a HashJoin($1=$4) we know h($1) && h($4) 22 | # TODO: which is not equivalent to h($1, $4). Currently can only 23 | # TODO: represent conjunctions of size 1 24 | self.hash_partitioned = hash_partitioned 25 | self.broadcasted = broadcasted 26 | 27 | assert not (len(self.hash_partitioned) > 0 and self.broadcasted), \ 28 | "inconsistent state: cannot be partitioned and broadcasted" 29 | 30 | if sorted is not None or grouped is not None: 31 | raise NotImplementedError("sorted and grouped not yet supported") 32 | 33 | def __str__(self): 34 | return "{clazz}(hash: {hash_attrs}, broadcasted: {b})".format( 35 | clazz=self.__class__.__name__, 36 | hash_attrs=self.hash_partitioned, 37 | b=self.broadcasted) 38 | 39 | def __repr__(self): 40 | return "{clazz}({hp!r}, {sort!r}, {grp!r}, {br!r})".format( 41 | clazz=self.__class__.__name__, 42 | hp=self.hash_partitioned, 43 | sort=None, 44 | grp=None, 45 | br=self.broadcasted 46 | ) 47 | 48 | def __eq__(self, other): 49 | """Override the default Equals behavior""" 50 | if isinstance(other, self.__class__): 51 | return self.__dict__ == other.__dict__ 52 | return NotImplemented 53 | 54 | def __ne__(self, other): 55 | """Define a non-equality test""" 56 | if isinstance(other, self.__class__): 57 | return not self.__eq__(other) 58 | return NotImplemented 59 | 60 | def __hash__(self): 61 | """Override the default hash behavior 62 | (that returns the id of the object)""" 63 | return hash(tuple(sorted(self.__dict__.items()))) 64 | -------------------------------------------------------------------------------- /raco/sparql_tests.py: -------------------------------------------------------------------------------- 1 | from raco.backends.sparql import SPARQLAlgebra 2 | from raco.platform_tests import MyriaLPlatformTestHarness 3 | import raco.compile 4 | 5 | 6 | class SPARQLTests(object): 7 | # TODO: refactor MyrialPlatformTests to share code 8 | def check_sub_tables(self, query, name, **kwargs): 9 | self.check(query % self.tables, name, **kwargs) 10 | 11 | def test_scan(self): 12 | self.check_sub_tables(""" 13 | T1 = SCAN(%(T1)s); 14 | STORE(T1, OUTPUT); 15 | """, "scan") 16 | 17 | def test_select(self): 18 | self.check_sub_tables(""" 19 | T1 = SCAN(%(T1)s); 20 | x = [FROM T1 WHERE a>5 EMIT a]; 21 | STORE(x, OUTPUT); 22 | """, "select") 23 | 24 | def test_join(self): 25 | self.check_sub_tables(""" 26 | T3 = SCAN(%(T3)s); 27 | R3 = SCAN(%(R3)s); 28 | out = JOIN(T3, b, R3, b); 29 | out2 = [FROM out WHERE $3 = $5 EMIT $0, $3]; 30 | STORE(out2, OUTPUT); 31 | """, "join") 32 | 33 | 34 | class SPARQLMyriaLTests(MyriaLPlatformTestHarness, SPARQLTests): 35 | 36 | def check(self, query, name): 37 | plan = self.get_physical_plan(query, target_alg=SPARQLAlgebra()) 38 | 39 | sparql = raco.compile.compile(plan) 40 | 41 | # TODO pretty lenient tests: is it a non empty string? 42 | assert isinstance(sparql, ''.__class__) 43 | assert sparql != '' 44 | -------------------------------------------------------------------------------- /raco/test_style.py: -------------------------------------------------------------------------------- 1 | from nose.plugins.skip import SkipTest 2 | import subprocess 3 | import unittest 4 | 5 | 6 | def check_output_and_print_stderr(args): 7 | """Run the specified command. If it does not exit cleanly, print the stderr 8 | of the command to stdout. Note that stderr prints are displayed as tests 9 | run, whereas stdout prints show up next to the failed test. We want the 10 | latter.""" 11 | try: 12 | subprocess.check_output(args, stderr=subprocess.STDOUT) 13 | except subprocess.CalledProcessError as e: 14 | print e.output 15 | raise 16 | 17 | 18 | class StyleTest(unittest.TestCase): 19 | """run flake8 with the right arguments and ensure all files pass""" 20 | 21 | def test_style(self): 22 | "run flake8 with the right arguments and ensure all files pass" 23 | check_output_and_print_stderr([ 24 | 'flake8', 25 | '--ignore=F', 26 | '--exclude=parsetab.py,' + 27 | 'decompile_lambda_test.py,' + 28 | 'decompile_function_test.py', 29 | 'raco']) 30 | 31 | def test_pylint(self): 32 | "run pylint -E to catch obvious errors" 33 | # TODO fix this. 34 | raise SkipTest() 35 | check_output_and_print_stderr(['pylint', '-E', 'raco']) 36 | -------------------------------------------------------------------------------- /raco/test_utility.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from raco.utility import real_str 4 | from collections import OrderedDict 5 | 6 | 7 | class TestUtility(unittest.TestCase): 8 | def test_real_str_int(self): 9 | self.assertEqual(real_str(1), str(1)) 10 | self.assertEqual(real_str(1, skip_out=True), str(1)) 11 | 12 | def test_real_str_string(self): 13 | self.assertEqual(real_str("abc"), str("abc")) 14 | self.assertEqual(real_str("abc", skip_out=True), str("abc")) 15 | 16 | def test_real_str_list(self): 17 | self.assertEqual(real_str([1, 2]), "[1,2]") 18 | self.assertEqual(real_str([1, 2], skip_out=True), "1,2") 19 | 20 | def test_real_str_dict(self): 21 | d = OrderedDict([(1, 2), (3, 4)]) 22 | self.assertEqual(real_str(d), "{1:2,3:4}") 23 | self.assertEqual(real_str(d, skip_out=True), "1:2,3:4") 24 | 25 | def test_real_str_set(self): 26 | # Python has no built-in ordered set, so we do not know the item order 27 | self.assertIn(real_str({1, 2}), ["{1,2}", "{2,1}"]) 28 | self.assertIn(real_str({1, 2}, skip_out=True), ["1,2", "2,1"]) 29 | 30 | def test_real_str_nested_collections(self): 31 | self.assertEqual(real_str([[1, 2], {3: 4}, []]), 32 | "[[1,2],{3:4},[]]") 33 | self.assertEqual(real_str([[1, 2], {3: 4}, []], skip_out=True), 34 | "[1,2],{3:4},[]") 35 | -------------------------------------------------------------------------------- /raco/types.py: -------------------------------------------------------------------------------- 1 | 2 | """Names of primitive types understand by raco. 3 | 4 | Note that raco internally supports a limited set of types. Different backends 5 | can support a richer set of types, but these aren't understood by raco's 6 | type system. For example, raco doesn't distinguish between int and long, 7 | so unsafe casts are not detected inside raco. 8 | """ 9 | 10 | # Internal and external types; these are understood by raco's type system 11 | LONG_TYPE = "LONG_TYPE" 12 | BOOLEAN_TYPE = "BOOLEAN_TYPE" 13 | DOUBLE_TYPE = "DOUBLE_TYPE" 14 | STRING_TYPE = "STRING_TYPE" 15 | DATETIME_TYPE = "DATETIME_TYPE" 16 | BLOB_TYPE = "BLOB_TYPE" 17 | 18 | INTERNAL_TYPES = {LONG_TYPE, BOOLEAN_TYPE, DOUBLE_TYPE, STRING_TYPE, DATETIME_TYPE, BLOB_TYPE} # noqa 19 | 20 | # External only types; not understood by raco's type system 21 | INT_TYPE = "INT_TYPE" 22 | FLOAT_TYPE = "FLOAT_TYPE" 23 | 24 | NUMERIC_TYPES = {LONG_TYPE, DOUBLE_TYPE} 25 | 26 | TYPE_MAP = {k: k for k in INTERNAL_TYPES} 27 | TYPE_MAP[INT_TYPE] = LONG_TYPE 28 | TYPE_MAP[FLOAT_TYPE] = DOUBLE_TYPE 29 | ALL_TYPES = TYPE_MAP.keys() 30 | 31 | 32 | # Map from python primitive types to internal typess 33 | python_type_map = { 34 | int: LONG_TYPE, 35 | bool: BOOLEAN_TYPE, 36 | float: DOUBLE_TYPE, 37 | str: STRING_TYPE, 38 | } 39 | 40 | reverse_python_type_map = {v: k for k, v in python_type_map.iteritems()} 41 | 42 | 43 | def map_type(s): 44 | """Convert an arbitrary type to an internal type.""" 45 | return TYPE_MAP[s] 46 | 47 | 48 | def parse_string(s, _type): 49 | """Convert from a string to an internal python representation.""" 50 | assert _type in reverse_python_type_map 51 | return reverse_python_type_map[_type](s) 52 | -------------------------------------------------------------------------------- /raco/utility.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | 4 | def emit(*args): 5 | """Return blocks of code as a string.""" 6 | return "\n".join([str(x) for x in args if len(str(x)) > 0]) 7 | 8 | 9 | def emitlist(argslist): 10 | """Return blocks of code as a string.""" 11 | return "\n".join([str(x) for x in argslist if len(str(x)) > 0]) 12 | 13 | 14 | def real_str(obj, skip_out=False): 15 | """Convert the input object to a string, recursively stringifying elements 16 | inside of containers. If skip_out is True, the container bounds will not 17 | be displayed. E.g. real_str([1, 2]) == "[1,2]" but 18 | real_str([1, 2], skip_out=True) == "1,2".""" 19 | 20 | # Hack around basestrings being containers 21 | if (not isinstance(obj, basestring) 22 | and isinstance(obj, collections.Container)): 23 | 24 | if isinstance(obj, collections.Sequence): 25 | inner = ','.join(real_str(e) for e in obj) 26 | if skip_out: 27 | return inner 28 | return '[{inn}]'.format(inn=inner) 29 | elif isinstance(obj, collections.Mapping): 30 | inner = ','.join('{a}:{b}'.format(a=real_str(a), b=real_str(b)) 31 | for a, b in obj.items()) 32 | if skip_out: 33 | return inner 34 | return '{{{inn}}}'.format(inn=inner) 35 | elif isinstance(obj, collections.Set): 36 | inner = ','.join(real_str(e) for e in obj) 37 | if skip_out: 38 | return inner 39 | return '{{{inn}}}'.format(inn=inner) 40 | else: 41 | raise NotImplementedError( 42 | "real_str(obj) for type(obj)={t}".format(t=type(obj))) 43 | 44 | return str(obj) 45 | 46 | 47 | class Printable(object): 48 | @classmethod 49 | def opname(cls): 50 | return str(cls.__name__) 51 | 52 | def __str__(self): 53 | return self.opname() 54 | 55 | 56 | # Optional raco dependency: termcolor 57 | # Without it, coloring will not happen 58 | def colored(s, color): 59 | return s 60 | try: 61 | from termcolor import colored 62 | except ImportError: 63 | pass 64 | -------------------------------------------------------------------------------- /raco/viz.py: -------------------------------------------------------------------------------- 1 | from raco import algebra 2 | 3 | 4 | def graph_to_dot(graph, **kwargs): 5 | """Graph is expected to be a dict of the form { 'nodes' : list(), 'edges' : 6 | list() }. This function returns a string that will be input to dot.""" 7 | 8 | title = kwargs.get('title', '') 9 | 10 | # Template, including setup and formatting: 11 | template = """digraph G { 12 | ratio = "fill" ; 13 | size = "4.0, 4.0" ; 14 | page = "4, 4" ; 15 | margin = 0 ; 16 | mincross = 2.0 ; 17 | rankdir = "BT" ; 18 | nodesep = 0.25 ; 19 | ranksep = 0.25 ; 20 | node [fontname="Helvetica", fontsize=10, 21 | shape=oval, style=filled, fillcolor=white ] ; 22 | 23 | // The nodes 24 | %s 25 | 26 | // The edges 27 | %s 28 | 29 | // The title 30 | labelloc="t"; 31 | label="%s"; 32 | }""" 33 | 34 | # Nodes: 35 | nodes = ['"%s" [label="%s"] ;' % (id(n), n.shortStr().replace(r'"', r'\"')) 36 | for n in graph['nodes']] 37 | node_str = '\n '.join(nodes) 38 | 39 | # Edges: 40 | edges = ['"%s" -> "%s" ;' % (id(x), id(y)) for (x, y) in graph['edges']] 41 | edge_str = '\n '.join(edges) 42 | 43 | return template % (node_str, edge_str, title) 44 | 45 | 46 | def operator_to_dot(operator, graph=None, **kwargs): 47 | """Operator is expected to be an object of class raco.algebra.Operator""" 48 | graph = operator.collectGraph(graph) 49 | return graph_to_dot(graph, **kwargs) 50 | 51 | 52 | def get_dot(obj): 53 | if isinstance(obj, dict) and 'nodes' in dict and 'edges' in dict: 54 | return graph_to_dot(obj) 55 | elif isinstance(obj, algebra.Operator): 56 | return operator_to_dot(obj) 57 | raise NotImplementedError('Unable to get dot from object type %s' % type(obj)) # noqa 58 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | nose 2 | coverage 3 | cloud 4 | 5 | # Flake8: hardcode version plus dependency versions 6 | flake8 == 2.1.0 7 | pep8 == 1.5.4 8 | pyflakes == 0.8.1 9 | mccabe == 0.2.1 10 | httmock >= 1.2.1 11 | # /Flake8 12 | -------------------------------------------------------------------------------- /scripts/see-rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import raco.rules 4 | import raco.language.myrialang 5 | 6 | import inspect 7 | 8 | for r in raco.rules.Rule.__subclasses__(): 9 | # skip one to ones 10 | if r.__name__ != "OneToOne": 11 | num_args = len(inspect.getargspec(r.__init__).args) - 1 12 | try: 13 | instance = r(*[None for i in range(num_args)]) 14 | print instance 15 | except Exception as e: 16 | print "Could not print rule named", r.__name__ 17 | # uncomment for debugging rule printing 18 | # raise e 19 | 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | ### Add find_packages function, see 4 | # https://wiki.python.org/moin/Distutils/Cookbook/AutoPackageDiscovery 5 | import os 6 | 7 | def is_package(path): 8 | return ( 9 | os.path.isdir(path) and 10 | os.path.isfile(os.path.join(path, '__init__.py')) 11 | ) 12 | 13 | def find_packages(path=".", base="", exclude=None): 14 | """Find all packages in path""" 15 | if not exclude: 16 | exclude = [] 17 | packages = {} 18 | for item in os.listdir(path): 19 | dir = os.path.join(path, item) 20 | if is_package(dir) and dir not in exclude: 21 | if base: 22 | module_name = "{base}.{item}".format(base=base,item=item) 23 | else: 24 | module_name = item 25 | packages[module_name] = dir 26 | packages.update(find_packages(dir, module_name)) 27 | return packages 28 | ### 29 | 30 | setup(name='raco', 31 | version='1.3.7', 32 | description='Relational Algebra COmpiler', 33 | author='Bill Howe, Andrew Whitaker, Daniel Halperin', 34 | author_email='raco@cs.washington.edu', 35 | url='https://github.com/uwescience/raco', 36 | packages=find_packages(exclude=['clang']), 37 | package_data={'': ['c_templates/*.template','grappa_templates/*.template']}, 38 | install_requires=['networkx==1.11', 'ply', 'pyparsing', 'SQLAlchemy', 'jinja2', 'requests', 'requests_toolbelt' ], 39 | scripts=['scripts/myrial'] 40 | ) 41 | --------------------------------------------------------------------------------