├── setm
    ├── test
    ├── project
    │   └── assembly.sbt
    ├── build.sbt
    ├── src
    │   └── main
    │   │   └── scala
    │   │       └── dbis
    │   │           └── setm
    │   │               ├── Main.scala
    │   │               └── SETM.scala
    └── .gitignore
├── ceplib
    ├── .gitignore
    ├── build.sbt
    └── src
    │   ├── main
    │       └── scala
    │       │   └── dbis
    │       │       └── piglet
    │       │           └── cep
    │       │               ├── ops
    │       │                   ├── Outputter.scala
    │       │                   ├── MatchCollector.scala
    │       │                   ├── EngineConf.scala
    │       │                   └── Strategies.scala
    │       │               ├── nfa
    │       │                   └── RelatedValue.scala
    │       │               ├── spark
    │       │                   ├── CustomRDDMatcher.scala
    │       │                   ├── CustomDStreamMatcher.scala
    │       │                   └── RDDMatcher.scala
    │       │               ├── flink
    │       │                   ├── CustomDataSetMatcher.scala
    │       │                   ├── CustomDataStreamMatcher.scala
    │       │                   ├── DataSetMatcher.scala
    │       │                   └── DataStreamMatcher.scala
    │       │               └── engines
    │       │                   ├── NextMatch.scala
    │       │                   ├── FirstMatch.scala
    │       │                   ├── ContiguityMatch.scala
    │       │                   ├── AnyMatch.scala
    │       │                   └── CEPEngine.scala
    │   └── test
    │       └── resources
    │           └── logback-test.xml
├── common
    ├── .gitignore
    ├── build.sbt
    └── src
    │   └── main
    │       └── scala
    │           └── dbis
    │               └── piglet
    │                   ├── tools
    │                       ├── HdfsCommand.scala
    │                       └── logging
    │                       │   └── PigletLogging.scala
    │                   └── backends
    │                       ├── BackendConf.scala
    │                       └── CppConfig.scala
├── flinklib
    ├── .gitignore
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   ├── application.conf
    │   │   │   └── log4j.properties
    │   │   └── scala
    │   │   │   └── dbis
    │   │   │       └── piglet
    │   │   │           └── backends
    │   │   │               └── flink
    │   │   │                   ├── streaming
    │   │   │                       ├── FlinkExtensions.scala
    │   │   │                       ├── FlinksConf.scala
    │   │   │                       └── UTF8StringSchema.scala
    │   │   │                   ├── FlinkConf.scala
    │   │   │                   └── PigFuncs.scala
    │   └── test
    │   │   └── resources
    │   │       └── logback-test.xml
    └── build.sbt
├── mapreducelib
    ├── .gitignore
    ├── build.sbt
    └── src
    │   └── main
    │       └── scala
    │           └── dbis
    │               └── piglet
    │                   └── backends
    │                       └── mapreduce
    │                           └── PigRun.scala
├── sparklib
    ├── .gitignore
    ├── src
    │   ├── test
    │   │   ├── resources
    │   │   │   ├── person.csv
    │   │   │   ├── values.csv
    │   │   │   └── logback-test.xml
    │   │   └── scala
    │   │   │   └── dbis
    │   │   │       └── piglet
    │   │   │           └── backends
    │   │   │               └── spark
    │   │   │                   └── Person.scala
    │   └── main
    │   │   ├── resources
    │   │       └── application.conf
    │   │   └── scala
    │   │       └── dbis
    │   │           └── piglet
    │   │               └── backends
    │   │                   └── spark
    │   │                       ├── SparkStream.scala
    │   │                       ├── PigFuncs.scala
    │   │                       ├── SparkSRun.scala
    │   │                       └── FileStreamReader.scala
    └── build.sbt
├── zeppelin
    ├── .gitignore
    └── build.sbt
├── src
    ├── it
    │   └── resources
    │   │   ├── truth
    │   │       ├── result3.data
    │   │       ├── splitY.data
    │   │       ├── accumulate.data
    │   │       ├── filtered.data
    │   │       ├── aggrwogrouping.data
    │   │       ├── nested.data
    │   │       ├── splitX.data
    │   │       ├── splitZ.data
    │   │       ├── embedded.data
    │   │       ├── unique.data
    │   │       ├── sampling.data
    │   │       ├── top.data
    │   │       ├── distances.data
    │   │       ├── grouping2.data
    │   │       ├── macro1.data
    │   │       ├── result2.data
    │   │       ├── aggregate.data
    │   │       ├── spatialjoin.data
    │   │       ├── sorted.data
    │   │       ├── spatialfilter.data
    │   │       ├── twojoins.data
    │   │       ├── result1.data
    │   │       ├── jdbc-data.data
    │   │       ├── sorted_multiple_directions.data
    │   │       ├── joined_filtered.data
    │   │       ├── aggregate2.data
    │   │       ├── groupall.data
    │   │       ├── construct.data
    │   │       ├── grouping.data
    │   │       ├── united.data
    │   │       ├── bag.data
    │   │       ├── joined.data
    │   │       ├── simple-matrix-res.data
    │   │       ├── joined_ambiguous_fieldnames.data
    │   │       ├── marycount.data
    │   │       ├── rdf_starjoin_plain.data
    │   │       ├── rdf_pathjoin_plain.data
    │   │       ├── bgpfilter.data
    │   │       ├── crossed.data
    │   │       └── cross2.csv
    │   │   ├── json.data
    │   │   ├── import2.pig
    │   │   ├── skyline.data
    │   │   ├── input
    │   │       ├── split.csv
    │   │       ├── events.csv
    │   │       ├── file.csv
    │   │       ├── aggregate.csv
    │   │       ├── construct.csv
    │   │       ├── file.txt
    │   │       ├── unsorted.csv
    │   │       ├── unsorted_top.csv
    │   │       ├── file.json
    │   │       ├── joinInput.csv
    │   │       ├── nested.csv
    │   │       ├── grouping.txt
    │   │       ├── test.mv.db
    │   │       ├── duplicates.csv
    │   │       ├── mary.txt
    │   │       └── matrix_data.csv
    │   │   ├── load.pig
    │   │   ├── import1.pig
    │   │   ├── stream_load.pig
    │   │   ├── load2.pig
    │   │   ├── groupall.pig
    │   │   ├── bag.pig
    │   │   ├── load3.pig
    │   │   ├── stream_load2.pig
    │   │   ├── grouping.pig
    │   │   ├── socket_write.pig
    │   │   ├── sampling.pig
    │   │   ├── filter.pig
    │   │   ├── top.pig
    │   │   ├── socket_read.pig
    │   │   ├── stream_filter.pig
    │   │   ├── sort.pig
    │   │   ├── grouping2.pig
    │   │   ├── aggrwogrouping.pig
    │   │   ├── jdbc.pig
    │   │   ├── sort_multiple_directions.pig
    │   │   ├── accumulate.pig
    │   │   ├── aggregate.pig
    │   │   ├── top_schema.pig
    │   │   ├── construct.pig
    │   │   ├── windowDistinct.pig
    │   │   ├── foreach1.pig
    │   │   ├── windowFilter.pig
    │   │   ├── json.pig
    │   │   ├── streaming
    │   │       ├── aggregate.pig
    │   │       ├── construct.pig
    │   │       ├── accumulate.pig
    │   │       └── union.pig
    │   │   ├── windowGrouping.pig
    │   │   ├── embedded.pig
    │   │   ├── stream_foreach1.pig
    │   │   ├── windowSort.pig
    │   │   ├── cross.pig
    │   │   ├── selfjoin.pig
    │   │   ├── bgpfilter.pig
    │   │   ├── simple_matrix.pig
    │   │   ├── splitInto.pig
    │   │   ├── selfjoin_filtered.pig
    │   │   ├── selfjoin_ambiguous_fieldnames.pig
    │   │   ├── union.pig
    │   │   ├── groupforeach.pig
    │   │   ├── rdf_pathjoin_plain.pig
    │   │   ├── spatialfilter.pig
    │   │   ├── nforeach.pig
    │   │   ├── rdf_starjoin_plain.pig
    │   │   ├── windowCount.pig
    │   │   ├── spatialfilterwithindex.pig
    │   │   ├── windowCross.pig
    │   │   ├── windowJoin.pig
    │   │   ├── rscript.pig
    │   │   ├── nforeach2.pig
    │   │   ├── spatialpartitioning.pig
    │   │   ├── windowNforeach.pig
    │   │   ├── crossmany.pig
    │   │   ├── macro1.pig
    │   │   ├── skyline.pig
    │   │   ├── two_joins.pig
    │   │   ├── wordcount.pig
    │   │   ├── spatialjoin.pig
    │   │   └── spatialjoinwithindex.pig
    ├── test
    │   ├── scala
    │   │   └── dbis
    │   │   │   └── piglet
    │   │   │       ├── tools
    │   │   │           ├── TestTools.scala
    │   │   │           ├── CodeMatcherSpec.scala
    │   │   │           └── RingBufferSpec.scala
    │   │   │       └── CompilerSpec.scala
    │   └── resources
    │   │   └── logback-test.xml
    └── main
    │   ├── scala
    │       └── dbis
    │       │   └── piglet
    │       │       ├── plan
    │       │           ├── rewriting
    │       │           │   ├── internals
    │       │           │   │   ├── package.scala
    │       │           │   │   ├── MutingSupport.scala
    │       │           │   │   └── EmbedSupport.scala
    │       │           │   ├── rulesets
    │       │           │   │   └── Ruleset.scala
    │       │           │   └── dsl
    │       │           │   │   ├── words
    │       │           │   │       ├── CheckWord.scala
    │       │           │   │       ├── ImmediateEndWord.scala
    │       │           │   │       ├── ReplaceWord.scala
    │       │           │   │       └── MergeWord.scala
    │       │           │   │   ├── traits
    │       │           │   │       └── EndWordT.scala
    │       │           │   │   └── builders
    │       │           │   │       └── ReplacementBuilder.scala
    │       │           └── PrettyPrinter.scala
    │       │       ├── tools
    │       │           ├── ProductTools.scala
    │       │           ├── UpdateMap.scala
    │       │           └── RingBuffer.scala
    │       │       ├── codegen
    │       │           ├── flink
    │       │           │   └── emitter
    │       │           │   │   ├── DumpEmitter.scala
    │       │           │   │   ├── StreamDumpEmitter.scala
    │       │           │   │   ├── StreamSampleEmitter.scala
    │       │           │   │   ├── LoadEmitter.scala
    │       │           │   │   ├── StreamStoreEmitter.scala
    │       │           │   │   ├── StreamLoadEmitter.scala
    │       │           │   │   ├── StoreEmitter.scala
    │       │           │   │   ├── StreamOpEmitter.scala
    │       │           │   │   ├── StreamDistinctEmitter.scala
    │       │           │   │   ├── LimitEmitter.scala
    │       │           │   │   ├── OrderByEmitter.scala
    │       │           │   │   ├── SocketWriteEmitter.scala
    │       │           │   │   ├── StreamFilterEmitter.scala
    │       │           │   │   └── SocketReadEmitter.scala
    │       │           ├── spark
    │       │           │   ├── StreamDumpEmitter.scala
    │       │           │   ├── StreamStoreEmitter.scala
    │       │           │   ├── StreamLoadEmitter.scala
    │       │           │   ├── CacheEmitter.scala
    │       │           │   ├── VisualizeEmitter.scala
    │       │           │   ├── StreamDistinctEmitter.scala
    │       │           │   ├── SpatialIndexEmitter.scala
    │       │           │   ├── StreamOrderByEmitter.scala
    │       │           │   ├── SpatialEmitterHelper.scala
    │       │           │   ├── StreamGroupingEmitter.scala
    │       │           │   ├── PartitionerEmitter.scala
    │       │           │   └── DelayEmitter.scala
    │       │           └── scala_lang
    │       │           │   ├── EmptyEmitter.scala
    │       │           │   ├── HdfsCmdEmitter.scala
    │       │           │   ├── IntersectionEmitter.scala
    │       │           │   ├── UnionEmitter.scala
    │       │           │   ├── SampleEmitter.scala
    │       │           │   ├── TimingEmitter.scala
    │       │           │   ├── DistinctEmitter.scala
    │       │           │   ├── FilterEmitter.scala
    │       │           │   ├── DumpEmitter.scala
    │       │           │   ├── StoreEmitter.scala
    │       │           │   ├── DifferenceEmitter.scala
    │       │           │   ├── LimitEmitter.scala
    │       │           │   ├── LoadEmitter.scala
    │       │           │   └── StreamOpEmitter.scala
    │       │       ├── op
    │       │           ├── Intersection.scala
    │       │           ├── Materialize.scala
    │       │           ├── TimingOp.scala
    │       │           ├── Visualize.scala
    │       │           ├── Partition.scala
    │       │           ├── Difference.scala
    │       │           ├── Empty.scala
    │       │           ├── Cache.scala
    │       │           ├── cmd
    │       │           │   ├── RegisterCmd.scala
    │       │           │   ├── SetCmd.scala
    │       │           │   ├── DefineCmd.scala
    │       │           │   └── HdfsCmd.scala
    │       │           ├── Describe.scala
    │       │           ├── Limit.scala
    │       │           ├── Display.scala
    │       │           ├── RScript.scala
    │       │           ├── Dump.scala
    │       │           ├── Top.scala
    │       │           ├── Distinct.scala
    │       │           ├── Tuplify.scala
    │       │           ├── WindowApply.scala
    │       │           ├── IndexOp.scala
    │       │           ├── Delay.scala
    │       │           └── SplitInto.scala
    │       │       ├── expr
    │       │           └── Traverser.scala
    │       │       ├── mm
    │       │           ├── CacheEntry.scala
    │       │           └── MaterializationPoint.scala
    │       │       └── api
    │       │           └── PigletInterpreterAPI.scala
    │   └── resources
    │       └── logback.xml
├── project
    ├── build.properties
    ├── assembly.sbt
    └── plugins.sbt
├── lib_unmanaged
    ├── stark.jar
    └── jvmr_2.11-2.11.2.1.jar
├── script
    └── simplestatserver.sh
├── .dockerignore
├── Dockerfile
├── materialization_scripts
    ├── gdelt_gold_tone_roi.pig
    ├── taxi_tip_avg.pig
    ├── gdelt_url_eventcode.pig
    └── taxi_high_tip_block.pig
├── .gitignore
├── make-distribution.sh
└── Zeppelin.md


/setm/test:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ceplib/.gitignore:
--------------------------------------------------------------------------------
1 | /bin/
2 | 


--------------------------------------------------------------------------------
/common/.gitignore:
--------------------------------------------------------------------------------
1 | /bin/
2 | 


--------------------------------------------------------------------------------
/flinklib/.gitignore:
--------------------------------------------------------------------------------
1 | /bin/
2 | 


--------------------------------------------------------------------------------
/mapreducelib/.gitignore:
--------------------------------------------------------------------------------
1 | /bin/
2 | 


--------------------------------------------------------------------------------
/sparklib/.gitignore:
--------------------------------------------------------------------------------
1 | /bin/
2 | 


--------------------------------------------------------------------------------
/zeppelin/.gitignore:
--------------------------------------------------------------------------------
1 | /bin/
2 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/result3.data:
--------------------------------------------------------------------------------
1 | small


--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.1.4
2 | 


--------------------------------------------------------------------------------
/src/it/resources/json.data:
--------------------------------------------------------------------------------
1 | Hage,(Ilmenau,98693)


--------------------------------------------------------------------------------
/src/it/resources/truth/splitY.data:
--------------------------------------------------------------------------------
1 | 4,5,6
2 | 


--------------------------------------------------------------------------------
/src/it/resources/import2.pig:
--------------------------------------------------------------------------------
1 | A = LOAD 'input';
2 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/accumulate.data:
--------------------------------------------------------------------------------
1 | 1,3,9,5,1.8


--------------------------------------------------------------------------------
/src/it/resources/truth/filtered.data:
--------------------------------------------------------------------------------
1 | 2,2
2 | 3,3
3 | 


--------------------------------------------------------------------------------
/src/it/resources/skyline.data:
--------------------------------------------------------------------------------
1 | event#1,1.0
2 | event#2,1.0


--------------------------------------------------------------------------------
/src/it/resources/truth/aggrwogrouping.data:
--------------------------------------------------------------------------------
1 | 5,21,4.2
2 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/nested.data:
--------------------------------------------------------------------------------
1 | BB,2
2 | CC,2
3 | AA,3


--------------------------------------------------------------------------------
/src/it/resources/truth/splitX.data:
--------------------------------------------------------------------------------
1 | 1,2,3
2 | 4,5,6
3 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/splitZ.data:
--------------------------------------------------------------------------------
1 | 1,2,3
2 | 7,8,9
3 | 


--------------------------------------------------------------------------------
/src/it/resources/input/split.csv:
--------------------------------------------------------------------------------
1 | 1,2,3
2 | 4,5,6
3 | 7,8,9
4 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/embedded.data:
--------------------------------------------------------------------------------
1 | 2
2 | 4
3 | 3
4 | 6
5 | 4
6 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/unique.data:
--------------------------------------------------------------------------------
1 | 1,1
2 | 2,2
3 | 3,3
4 | 4,4
5 | 


--------------------------------------------------------------------------------
/sparklib/src/test/resources/person.csv:
--------------------------------------------------------------------------------
1 | Anna,21
2 | John,53
3 | Mike,32


--------------------------------------------------------------------------------
/src/it/resources/input/events.csv:
--------------------------------------------------------------------------------
1 | event#1,50.0,10.1
2 | event#2,50.1,10.2


--------------------------------------------------------------------------------
/src/it/resources/input/file.csv:
--------------------------------------------------------------------------------
1 | 1,1
2 | 2,2
3 | 1,2
4 | 3,3
5 | 3,1
6 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/sampling.data:
--------------------------------------------------------------------------------
1 | 1,1
2 | 2,2
3 | 1,2
4 | 3,3
5 | 3,1


--------------------------------------------------------------------------------
/src/it/resources/truth/top.data:
--------------------------------------------------------------------------------
1 | b,A,1
2 | c,B,9
3 | b,D,7
4 | a,D,3
5 | 


--------------------------------------------------------------------------------
/src/it/resources/input/aggregate.csv:
--------------------------------------------------------------------------------
1 | 1,3
2 | 1,3
3 | 2,3
4 | 4,5
5 | 2,7
6 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/distances.data:
--------------------------------------------------------------------------------
1 | event#1,50.01,13.6
2 | event#2,50.11,13.7


--------------------------------------------------------------------------------
/src/it/resources/truth/grouping2.data:
--------------------------------------------------------------------------------
1 | 1,2.0
2 | 4,2.5
3 | 7,2.0
4 | 8,3.5
5 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/macro1.data:
--------------------------------------------------------------------------------
1 | 43,0
2 | 44,1
3 | 43,1
4 | 45,2
5 | 45,0


--------------------------------------------------------------------------------
/src/it/resources/truth/result2.data:
--------------------------------------------------------------------------------
1 | 1,1
2 | 2,2
3 | 1,2
4 | 3,3
5 | 3,1
6 | 


--------------------------------------------------------------------------------
/project/assembly.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.5")
2 | 


--------------------------------------------------------------------------------
/sparklib/src/test/resources/values.csv:
--------------------------------------------------------------------------------
1 | 0.001,21.5
2 | 0.00004,53.9
3 | 0.023456,32.3


--------------------------------------------------------------------------------
/src/it/resources/input/construct.csv:
--------------------------------------------------------------------------------
1 | 100,101,aaaa
2 | 200,202,bbbb
3 | 300,302,cccc


--------------------------------------------------------------------------------
/src/it/resources/truth/aggregate.data:
--------------------------------------------------------------------------------
1 | 1,2,6,3.0
2 | 2,2,10,5.0
3 | 4,1,5,5.0
4 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/spatialjoin.data:
--------------------------------------------------------------------------------
1 | event#1,event#1
2 | event#2,event#2
3 | 


--------------------------------------------------------------------------------
/setm/project/assembly.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.0")
2 | 


--------------------------------------------------------------------------------
/src/it/resources/input/file.txt:
--------------------------------------------------------------------------------
1 | Hallo
2 | this
3 | is
4 | a
5 | small
6 | text
7 | file.


--------------------------------------------------------------------------------
/src/it/resources/input/unsorted.csv:
--------------------------------------------------------------------------------
1 | c,B,9
2 | b,D,7
3 | a,C,3
4 | d,F,4
5 | b,A,1
6 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/sorted.data:
--------------------------------------------------------------------------------
1 | a,C,3
2 | b,A,1
3 | b,D,7
4 | c,B,9
5 | d,F,4
6 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/spatialfilter.data:
--------------------------------------------------------------------------------
1 | event#2,STObject(POINT (50.1 10.2),None)
2 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/twojoins.data:
--------------------------------------------------------------------------------
1 | 4,4
2 | 4,4
3 | 4,4
4 | 4,4
5 | 4,4
6 | 4,4
7 | 


--------------------------------------------------------------------------------
/src/it/resources/input/unsorted_top.csv:
--------------------------------------------------------------------------------
1 | c,B,9
2 | b,D,7
3 | a,D,3
4 | d,F,4
5 | b,A,1
6 | 


--------------------------------------------------------------------------------
/src/it/resources/load.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.txt';
2 | STORE A INTO '$outfile';
3 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/result1.data:
--------------------------------------------------------------------------------
1 | Hallo
2 | this
3 | is
4 | a
5 | small
6 | text
7 | file.


--------------------------------------------------------------------------------
/lib_unmanaged/stark.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbis-ilm/piglet/HEAD/lib_unmanaged/stark.jar


--------------------------------------------------------------------------------
/src/it/resources/input/file.json:
--------------------------------------------------------------------------------
1 | {"name":"Hage","address":{"city":"Ilmenau","zipcode":"98693"}}


--------------------------------------------------------------------------------
/src/it/resources/input/joinInput.csv:
--------------------------------------------------------------------------------
1 | 1,2,3
2 | 4,2,1
3 | 8,3,4
4 | 4,3,3
5 | 7,2,5
6 | 8,4,3
7 | 


--------------------------------------------------------------------------------
/src/it/resources/input/nested.csv:
--------------------------------------------------------------------------------
1 | AA,bb
2 | AA,cc
3 | AA,dd
4 | BB,cc
5 | BB,dd
6 | CC,aa
7 | CC,dd


--------------------------------------------------------------------------------
/src/it/resources/import1.pig:
--------------------------------------------------------------------------------
1 | IMPORT 'src/it/resources/import2.pig';
2 | B = FILTER A BY $0 > 10;
3 | 


--------------------------------------------------------------------------------
/src/it/resources/stream_load.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.txt';
2 | STORE A INTO '$outfile';
3 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/jdbc-data.data:
--------------------------------------------------------------------------------
1 | 1,One
2 | 2,Two
3 | 3,Three
4 | 4,Four
5 | 5,Five
6 | 6,Six
7 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/sorted_multiple_directions.data:
--------------------------------------------------------------------------------
1 | a,C,3
2 | b,D,7
3 | b,A,1
4 | c,B,9
5 | d,F,4


--------------------------------------------------------------------------------
/src/it/resources/truth/joined_filtered.data:
--------------------------------------------------------------------------------
1 | 4,2,1,4,2,1
2 | 4,2,1,4,3,3
3 | 4,3,3,4,2,1
4 | 4,3,3,4,3,3
5 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/aggregate2.data:
--------------------------------------------------------------------------------
1 | 1,1,3,3.0
2 | 1,2,6,3.0
3 | 2,1,3,3.0
4 | 2,2,10,5.0
5 | 4,1,5,5.0
6 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/groupall.data:
--------------------------------------------------------------------------------
1 | all,{(1,2,3),(4,2,1),(8,3,4),(4,3,3),(7,2,5),(8,4,3),(1,2,5),(7,2,8)}
2 | 


--------------------------------------------------------------------------------
/src/it/resources/input/grouping.txt:
--------------------------------------------------------------------------------
1 | 1	2	3
2 | 4	2	1
3 | 8	3	4
4 | 4	3	3
5 | 7	2	5
6 | 8	4	3
7 | 1	2	5
8 | 7	2	8
9 | 


--------------------------------------------------------------------------------
/src/it/resources/input/test.mv.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbis-ilm/piglet/HEAD/src/it/resources/input/test.mv.db


--------------------------------------------------------------------------------
/lib_unmanaged/jvmr_2.11-2.11.2.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbis-ilm/piglet/HEAD/lib_unmanaged/jvmr_2.11-2.11.2.1.jar


--------------------------------------------------------------------------------
/src/it/resources/input/duplicates.csv:
--------------------------------------------------------------------------------
 1 | 1,1
 2 | 2,2
 3 | 2,2
 4 | 3,3
 5 | 3,3
 6 | 3,3
 7 | 4,4
 8 | 4,4
 9 | 4,4
10 | 4,4
11 | 


--------------------------------------------------------------------------------
/src/it/resources/load2.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.csv' USING PigStorage(',') AS (f1:chararray, f2: int);
2 | STORE A INTO '$outfile';
3 | 


--------------------------------------------------------------------------------
/script/simplestatserver.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | FILE=$1
4 | PORT=$2
5 | 
6 | nc -p $PORT -l -o $FILE --append-output --recv-only --keep-open
7 | 


--------------------------------------------------------------------------------
/src/it/resources/groupall.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/grouping.txt' AS (f1:int, f2:int, f3:int);
2 | B = GROUP A ALL;
3 | STORE B INTO '$outfile';


--------------------------------------------------------------------------------
/src/it/resources/bag.pig:
--------------------------------------------------------------------------------
1 | A  = LOAD '$inbase/input/mary.txt' AS (f1:chararray);
2 | X = FOREACH A GENERATE TOKENIZE(f1);
3 | STORE X INTO '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/input/mary.txt:
--------------------------------------------------------------------------------
1 | Mary had a little lamb
2 | its fleece was white as snow
3 | and everywhere that Mary went
4 | the lamb was sure to go.


--------------------------------------------------------------------------------
/src/it/resources/load3.pig:
--------------------------------------------------------------------------------
1 | a = load '$inbase/input/file.txt' using PigStorage(':');
2 | b = filter a by $0 == "small";
3 | store b into '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/stream_load2.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.csv' USING PigStream(',') AS (f1:chararray, f2: int);
2 | STORE A INTO '$outfile';
3 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/construct.data:
--------------------------------------------------------------------------------
1 | (100,101),{(100),(101)},[aaaa#100]
2 | (200,202),{(200),(202)},[bbbb#200]
3 | (300,302),{(300),(302)},[cccc#300]


--------------------------------------------------------------------------------
/src/it/resources/grouping.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/grouping.txt' AS (f1:int, f2:int, f3:int);
2 | B = GROUP A BY (f1,f2);
3 | STORE B INTO '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/socket_write.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.csv' USING PigStream(',') AS (f1:chararray,f2:int);
2 | SOCKET_WRITE A TO 'localhost:9999';
3 | 


--------------------------------------------------------------------------------
/setm/build.sbt:
--------------------------------------------------------------------------------
1 | name := "setm"
2 | 
3 | version := "0.1"
4 | 
5 | scalaVersion := "2.11.8"
6 | 
7 | libraryDependencies += "fm.void.jetm" % "jetm" % "1.2.3"
8 | 


--------------------------------------------------------------------------------
/src/it/resources/input/matrix_data.csv:
--------------------------------------------------------------------------------
1 | 1.0,2.0,3.0,1.5,2.5,3.5
2 | 10.0,20.0,30.0,10.5,20.5,30.5
3 | 11.0,12.0,13.0,11.5,12.5,13.5
4 | 21.0,22.0,23.0,21.5,22.5,23.5


--------------------------------------------------------------------------------
/src/it/resources/sampling.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.csv' USING PigStorage(',') AS (f1:int, f2: int);
2 | B = SAMPLE A 1.0;
3 | STORE B INTO '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/filter.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.csv' USING PigStorage(',') AS (f1:int, f2: int);
2 | B = FILTER A BY f1>1 AND f2>1;
3 | STORE B INTO '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/grouping.data:
--------------------------------------------------------------------------------
1 | (1,2),{(1,2,3),(1,2,5)}
2 | (4,2),{(4,2,1)}
3 | (4,3),{(4,3,3)}
4 | (7,2),{(7,2,5),(7,2,8)}
5 | (8,3),{(8,3,4)}
6 | (8,4),{(8,4,3)}
7 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/united.data:
--------------------------------------------------------------------------------
 1 | 1,1
 2 | 2,2
 3 | 1,2
 4 | 3,3
 5 | 3,1
 6 | 1,1
 7 | 2,2
 8 | 1,2
 9 | 3,3
10 | 3,1
11 | 1,1
12 | 2,2
13 | 1,2
14 | 3,3
15 | 3,1
16 | 


--------------------------------------------------------------------------------
/src/it/resources/top.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/unsorted_top.csv' USING PigStorage(',');
2 | B = order A by $1 asc, $2 desc;
3 | C = limit B 4;
4 | STORE C INTO '$outfile';
5 | 


--------------------------------------------------------------------------------
/src/it/resources/socket_read.pig:
--------------------------------------------------------------------------------
1 | A = SOCKET_READ 'tcp://localhost:9999' MODE ZMQ USING PigStream(',') AS (f1:double, f2:double, f3:double);
2 | B = FILTER A BY f2>0;
3 | DUMP B;
4 | 


--------------------------------------------------------------------------------
/src/it/resources/stream_filter.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.csv' USING PigStream(',') AS (f1:int, f2: int);
2 | B = FILTER A BY f1>1 AND f2>1;
3 | STORE B INTO '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/sort.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/unsorted.csv' USING PigStorage(',') AS (f1:chararray, f2: chararray, f3: int);
2 | B = ORDER A BY f1, f2, f3;
3 | STORE B INTO '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/grouping2.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/grouping.txt' AS (f1:int, f2:int, f3:int);
2 | B = GROUP A BY f1;
3 | C = FOREACH B GENERATE A.f1, AVG(A.f2);
4 | STORE C INTO '$outfile';
5 | 


--------------------------------------------------------------------------------
/src/it/resources/aggrwogrouping.pig:
--------------------------------------------------------------------------------
1 | a = load '$inbase/input/aggregate.csv' using PigStorage(',') as (x:int, y:int);
2 | b = foreach a generate COUNT(y), SUM(y), AVG(y);
3 | store b into '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/jdbc.pig:
--------------------------------------------------------------------------------
1 | A = LOAD 'data' USING JdbcStorage('org.h2.Driver', 'jdbc:h2:file:$inbase/input/test?user=sa&ACCESS_MODE_DATA=r') AS (col1: int, col2:chararray);
2 | STORE A INTO '$outfile';
3 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/bag.data:
--------------------------------------------------------------------------------
1 | {(Mary),(had),(a),(little),(lamb)}
2 | {(its),(fleece),(was),(white),(as),(snow)}
3 | {(and),(everywhere),(that),(Mary),(went)}
4 | {(the),(lamb),(was),(sure),(to),(go.)}


--------------------------------------------------------------------------------
/src/it/resources/truth/joined.data:
--------------------------------------------------------------------------------
 1 | 4,2,1,4,2,1
 2 | 4,2,1,4,3,3
 3 | 4,3,3,4,2,1
 4 | 4,3,3,4,3,3
 5 | 8,3,4,8,3,4
 6 | 8,3,4,8,4,3
 7 | 8,4,3,8,3,4
 8 | 8,4,3,8,4,3
 9 | 1,2,3,1,2,3
10 | 7,2,5,7,2,5
11 | 


--------------------------------------------------------------------------------
/src/test/scala/dbis/piglet/tools/TestTools.scala:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | package dbis.piglet.tools
4 | 
5 | import java.net.URI
6 | 
7 | object TestTools {
8 |   implicit def strToUri(str: String): URI = new URI(str)
9 | }


--------------------------------------------------------------------------------
/src/it/resources/sort_multiple_directions.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/unsorted.csv' USING PigStorage(',') AS (f1:chararray, f2: chararray, f3: int);
2 | B = ORDER A BY f1 asc, f2 desc;
3 | STORE B INTO '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/accumulate.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.csv' USING PigStorage(',') AS (f1: int, f2: int);
2 | B = ACCUMULATE A GENERATE min(f1), max(f1), sum(f2), count(f2), avg(f2);
3 | STORE B INTO '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/simple-matrix-res.data:
--------------------------------------------------------------------------------
1 | 1.0  3.0  2.5  
2 | 2.0  1.5  3.5  
3 | 10.0  30.0  20.5  
4 | 20.0  10.5  30.5  
5 | 11.0  13.0  12.5  
6 | 12.0  11.5  13.5  
7 | 21.0  23.0  22.5  
8 | 22.0  21.5  23.5  
9 | 


--------------------------------------------------------------------------------
/src/it/resources/aggregate.pig:
--------------------------------------------------------------------------------
1 | a = load '$inbase/input/aggregate.csv' using PigStorage(',') as (x:int, y:int);
2 | b = group a by x ;
3 | c = foreach b generate group, COUNT(a.y), SUM(a.y), AVG(a.y);
4 | store c into '$outfile';


--------------------------------------------------------------------------------
/src/it/resources/top_schema.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/unsorted_top.csv' USING PigStorage(',') as (a: chararray, b: chararray, c: int);
2 | B = order A by $1 asc, $2 desc;
3 | C = limit B 4;
4 | STORE C INTO '$outfile';
5 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/joined_ambiguous_fieldnames.data:
--------------------------------------------------------------------------------
 1 | 1,2,3,1,2,3
 2 | 4,2,1,4,2,1
 3 | 4,2,1,4,3,3
 4 | 4,3,3,4,2,1
 5 | 4,3,3,4,3,3
 6 | 7,2,5,7,2,5
 7 | 8,3,4,8,3,4
 8 | 8,3,4,8,4,3
 9 | 8,4,3,8,3,4
10 | 8,4,3,8,4,3
11 | 


--------------------------------------------------------------------------------
/src/it/resources/construct.pig:
--------------------------------------------------------------------------------
1 | data = load '$inbase/input/construct.csv' using PigStorage(',') as (f1: int, f2: int, name:chararray);
2 | out = foreach data generate (f1, f2), {f1, f2}, [name, f1];
3 | STORE out INTO '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/windowDistinct.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/duplicates.csv' USING PigStream(',') AS (f1:int, f2: int);
2 | B = WINDOW A RANGE 10 SECONDS SLIDE RANGE 10 SECONDS;
3 | C = DISTINCT B;
4 | STORE C INTO '$outfile';
5 | 


--------------------------------------------------------------------------------
/src/it/resources/foreach1.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/events.csv' USING PigStorage(',') AS (id:chararray, longitude: double, latitude: double);
2 | B = FOREACH A GENERATE id, longitude + 0.01, latitude + 3.5;
3 | STORE B INTO '$outfile';


--------------------------------------------------------------------------------
/src/it/resources/windowFilter.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.csv' USING PigStream(',') AS (f1:int, f2: int);
2 | B = WINDOW A RANGE 5 SECONDS SLIDE RANGE 5 SECONDS;
3 | C = FILTER B BY f1>1 AND f2>1;
4 | STORE C INTO '$outfile';
5 | 


--------------------------------------------------------------------------------
/src/it/resources/json.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.json' USING JsonStorage() AS (address:(city:chararray, zipcode:chararray),name:chararray);
2 | B = FOREACH A GENERATE address.city, address.zipcode, name;
3 | STORE B INTO '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/streaming/aggregate.pig:
--------------------------------------------------------------------------------
1 | a = load '$inbase/input/aggregate.csv' using PigStream(',') as (x:int, y:int);
2 | b = group a by x ;
3 | c = foreach b generate group, COUNT(a.y), SUM(a.y), AVG(a.y);
4 | store c into '$outfile';
5 | 


--------------------------------------------------------------------------------
/src/it/resources/streaming/construct.pig:
--------------------------------------------------------------------------------
1 | data = load '$inbase/input/construct.csv' using PigStream(',') as (f1: int, f2: int, name:chararray);
2 | out = foreach data generate (f1, f2), {f1, f2}, [name, f1];
3 | STORE out INTO '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/windowGrouping.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/grouping.txt' USING PigStream('\t') AS (f1:int, f2:int, f3:int);
2 | B = WINDOW A RANGE 5 SECONDS SLIDE RANGE 5 SECONDS;
3 | C = GROUP B BY (f1,f2);
4 | STORE C INTO '$outfile';
5 | 


--------------------------------------------------------------------------------
/src/it/resources/embedded.pig:
--------------------------------------------------------------------------------
1 | <%
2 | def myFunc(i1: Int, i2: Int): Int = i1 + i2
3 | %>
4 | A = LOAD '$inbase/input/file.csv' USING PigStorage(',') AS (f1:int, f2: int);
5 | B = FOREACH A GENERATE myFunc(f1, f2);
6 | STORE B INTO '$outfile';
7 | 


--------------------------------------------------------------------------------
/src/it/resources/stream_foreach1.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/events.csv' USING PigStream(',') AS (id:chararray, longitude: double, latitude: double);
2 | B = FOREACH A GENERATE id, longitude + 0.01, latitude + 3.5;
3 | STORE B INTO '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/windowSort.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/unsorted.csv' USING PigStream(',') AS (f1:chararray, f2: chararray, f3: int);
2 | B = WINDOW A RANGE 5 SECONDS SLIDE RANGE 5 SECONDS;
3 | C = ORDER B BY f1, f2, f3;
4 | STORE C INTO '$outfile';
5 | 


--------------------------------------------------------------------------------
/src/it/resources/cross.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.txt' USING PigStorage(',') AS (f1:chararray); --, f2:int
2 | B = LOAD '$inbase/input/file.txt' USING PigStorage(',') AS (f1:chararray);
3 | D = CROSS A,B;
4 | STORE D INTO '$outfile';
5 | -- DUMP D;
6 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/marycount.data:
--------------------------------------------------------------------------------
 1 | Mary,2
 2 | had,1
 3 | a,1
 4 | little,1
 5 | lamb,2
 6 | its,1
 7 | fleece,1
 8 | was,2
 9 | white,1
10 | as,1
11 | snow,1
12 | and,1
13 | everywhere,1
14 | that,1
15 | went,1
16 | the,1
17 | sure,1
18 | to,1
19 | go.,1


--------------------------------------------------------------------------------
/src/it/resources/streaming/accumulate.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.csv' USING PigStream(',') AS (f1: int, f2: int);
2 | B = GROUP A BY f1;
3 | C = ACCUMULATE B GENERATE min(A.f1), max(A.f1), sum(A.f2), count(A.f2), avg(A.f2);
4 | STORE C INTO '$outfile';
5 | 


--------------------------------------------------------------------------------
/mapreducelib/build.sbt:
--------------------------------------------------------------------------------
 1 | import Dependencies._
 2 | 
 3 | name := "mapreduce"
 4 | 
 5 | libraryDependencies ++= Seq(
 6 |     scalaTest % "test" withSources(),
 7 |     pig % "provided",
 8 |     hadoop,
 9 |     typesafe
10 | )
11 | 
12 | test in assembly := {}
13 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Currently, it's easier to to exclude all files and
 2 | # to specify excludes from these excludes. :)
 3 | 
 4 | *
 5 | 
 6 | !Dockerfile
 7 | !script/piglet
 8 | !sparklib/target/scala-2.11/sparklib_2.11-*.jar
 9 | !target/scala-2.11/PigCompiler.jar
10 | 


--------------------------------------------------------------------------------
/common/build.sbt:
--------------------------------------------------------------------------------
 1 | import Dependencies._
 2 | 
 3 | name := "common"
 4 | 
 5 | libraryDependencies ++= Seq(
 6 | 	"ch.qos.logback" % "logback-classic" % "1.2.3",
 7 | 	"org.slf4j" % "slf4j-api" % "1.7.25" % "provided",
 8 | 	hadoop % "provided",
 9 |     json4s
10 | )
11 | 


--------------------------------------------------------------------------------
/src/it/resources/selfjoin.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/joinInput.csv' USING PigStorage(',') AS (a1:int,a2:int,a3:int);
2 | B = LOAD '$inbase/input/joinInput.csv' USING PigStorage(',') AS (b1:int,b2:int,b3:int);
3 | X = JOIN A BY a1, B BY b1;
4 | STORE X INTO '$outfile';
5 | 


--------------------------------------------------------------------------------
/src/it/resources/bgpfilter.pig:
--------------------------------------------------------------------------------
1 | a = LOAD '$inbase/input/sibdataset.nt' using PigStorage(' ') as (subject: chararray, predicate: chararray, object:chararray);
2 | b = BGP_FILTER a BY {
3 |     ?user "<http://rdfs.org/sioc/ns#account_of>" ?person
4 | };
5 | STORE b INTO '$outfile';


--------------------------------------------------------------------------------
/src/it/resources/simple_matrix.pig:
--------------------------------------------------------------------------------
1 | A  = LOAD '$inbase/input/matrix_data.csv' USING PigStorage(',') AS (v11: double, v12: double, v21: double, v22: double, v31: double, v32: double);
2 | B = FOREACH A GENERATE ddmatrix(2, 3, {v11, v12, v21, v22, v31, v32});
3 | STORE B INTO '$outfile';
4 | 


--------------------------------------------------------------------------------
/src/it/resources/splitInto.pig:
--------------------------------------------------------------------------------
1 | -- Based on: http://pig.apache.org/docs/r0.14.0/basic.html
2 | A = LOAD '$inbase/input/split.csv' USING PigStream(',') AS (f1:int,f2:int,f3:int);
3 | SPLIT A INTO X IF f1<7, Y IF f2==5, Z IF (f3<6 OR f3>6);
4 | STORE X INTO '$outfile';
5 | DUMP Y;
6 | DUMP Z;
7 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/rdf_starjoin_plain.data:
--------------------------------------------------------------------------------
1 | "Ling","Chen",<http://www.ins.cwi.nl/sib/person/p671>
2 | "Laurent","Ciss\u0329",<http://www.ins.cwi.nl/sib/person/p148>
3 | "Jean-Pierre","Hnatow",<http://www.ins.cwi.nl/sib/person/p149>
4 | "Julia","Hooda",<http://www.ins.cwi.nl/sib/person/p228>
5 | 


--------------------------------------------------------------------------------
/src/it/resources/selfjoin_filtered.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/joinInput.csv' USING PigStorage(',') AS (a1:int,a2:int,a3:int);
2 | B = LOAD '$inbase/input/joinInput.csv' USING PigStorage(',') AS (b1:int,b2:int,b3:int);
3 | X = JOIN A BY a1, B BY b1;
4 | Y = FILTER X BY a1 == 4;
5 | STORE Y INTO '$outfile';
6 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/plan/rewriting/internals/package.scala:
--------------------------------------------------------------------------------
1 | package dbis.piglet.plan.rewriting
2 | 
3 | /** This package is not meant for public use, it's only providing functions for various parts of the
4 |   * [[dbis.piglet.plan.rewriting.Rewriter]] object.
5 |   */
6 | package object internals {
7 | }
8 | 


--------------------------------------------------------------------------------
/flinklib/src/main/resources/application.conf:
--------------------------------------------------------------------------------
 1 | backends {
 2 | 	flink {
 3 | 		name = "flinklib"
 4 | 		template = "flink-template.stg"
 5 |         connector = "PigStorage"
 6 | 	}
 7 | 	
 8 | 	flinks {
 9 | 		name = "flinks"
10 | 		template = "flinks-template.stg"
11 |         connector = "PigStream"
12 | 	}
13 | }
14 | 


--------------------------------------------------------------------------------
/sparklib/src/test/scala/dbis/piglet/backends/spark/Person.scala:
--------------------------------------------------------------------------------
1 | package dbis.piglet.backends.spark
2 | 
3 | import dbis.piglet.backends.SchemaClass
4 | 
5 | case class Person(name: String, age: Int) extends java.io.Serializable with SchemaClass {
6 |   override def mkString(delim: String) = s"$name$delim$age"
7 | }
8 | 


--------------------------------------------------------------------------------
/src/it/resources/selfjoin_ambiguous_fieldnames.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/joinInput.csv' USING PigStorage(',') AS (a1:int,a2:int,a3:int);
2 | B = LOAD '$inbase/input/joinInput.csv' USING PigStorage(',') AS (a1:int,a2:int,a3:int);
3 | X = JOIN A BY a1, B BY a1;
4 | Y = ORDER X BY B::a1 ASC;
5 | STORE Y INTO '$outfile';
6 | 


--------------------------------------------------------------------------------
/src/it/resources/union.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.csv' USING PigStorage(',') AS (f1:int, f2: int);
2 | B = LOAD '$inbase/input/file.csv' USING PigStorage(',') AS (f1:int, f2: int);
3 | C = LOAD '$inbase/input/file.csv' USING PigStorage(',') AS (f1:int, f2: int);
4 | D = UNION A, B, C;
5 | STORE D INTO '$outfile';
6 | 


--------------------------------------------------------------------------------
/src/it/resources/groupforeach.pig:
--------------------------------------------------------------------------------
1 | -- triples = LOAD '$inbase/input/rdf-data.nt' USING RDFFileStorage AS (subject: chararray, predicate: chararray, object: chararray);
2 | triples = RDFLOAD('$inbase/input/rdf-data.nt');
3 | stmts = GROUP triples BY subject;
4 | tmp = FOREACH stmts GENERATE *;
5 | STORE tmp INTO '$outfile';
6 | 


--------------------------------------------------------------------------------
/src/it/resources/streaming/union.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.csv' USING PigStream(',') AS (f1:int, f2: int);
2 | B = LOAD '$inbase/input/file.csv' USING PigStream(',') AS (f1:int, f2: int);
3 | C = LOAD '$inbase/input/file.csv' USING PigStream(',') AS (f1:int, f2: int);
4 | D = UNION A, B, C;
5 | STORE D INTO '$outfile';
6 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/tools/ProductTools.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.tools
 2 | 
 3 | case class ProductTools(p: Product) {
 4 |   def mkString(sep: String = ",") = p.productIterator.mkString(sep)
 5 | }
 6 | 
 7 | object ProductTools {
 8 |   implicit def productMkString(p: Product): ProductTools = ProductTools(p)
 9 | }
10 | 


--------------------------------------------------------------------------------
/sparklib/src/main/resources/application.conf:
--------------------------------------------------------------------------------
 1 | backends {
 2 |     name = "sparklib"
 3 | 	spark {		
 4 | 		# if in src/main/resources, file name is enough
 5 | 		template = "spark-template.stg"
 6 | 		connector = "PigStorage"
 7 | 	}
 8 | 	sparks {	
 9 | 		template = "sparks-template.stg"
10 | 		connector = "PigStream"
11 | 	}  
12 | }
13 | 


--------------------------------------------------------------------------------
/common/src/main/scala/dbis/piglet/tools/HdfsCommand.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.tools
 2 | 
 3 | object HdfsCommand extends Enumeration {
 4 |   type HdfsCommand = Value
 5 |   val COPYTOLOCAL,
 6 |       COPYTOREMOTE,
 7 |       RM,
 8 |       RMDIR,
 9 |       MKDIR,
10 |       LS,
11 |       CAT,
12 |       GETMERGE = Value
13 |       
14 | }


--------------------------------------------------------------------------------
/src/it/resources/rdf_pathjoin_plain.pig:
--------------------------------------------------------------------------------
1 | a = LOAD '$inbase/input/sibdataset.nt' using PigStorage(' ') as (subject: chararray, predicate: chararray, object:chararray);
2 | b = BGP_FILTER a BY {
3 |     ?user "<http://rdfs.org/sioc/ns#account_of>" ?person .
4 |     ?person "<http://rdfs.org/sioc/ns#email>" ?email
5 | };
6 | STORE b INTO '$outfile';


--------------------------------------------------------------------------------
/src/it/resources/spatialfilter.pig:
--------------------------------------------------------------------------------
1 | a = load '$inbase/input/events.csv' using PigStorage(',') as (name: chararray, lat: double, lon: chararray);
2 | b = foreach a GENERATE  name, geometry("POINT("+lat+" "+lon+")") as loc;
3 | c = SPATIAL_FILTER b BY containedby(loc, geometry("POINT(50.1 10.2)"));
4 | STORE c INTO '$outfile';
5 | -- DUMP c;
6 | 


--------------------------------------------------------------------------------
/src/it/resources/nforeach.pig:
--------------------------------------------------------------------------------
1 | daily = load '$inbase/input/nested.csv' using PigStorage(',') as (exchange, symbol);
2 | grpd  = group daily by exchange;
3 | uniqcnt  = foreach grpd {
4 |            sym      = daily.symbol;
5 |            uniq_sym = distinct sym;
6 |            generate group, COUNT(uniq_sym);
7 | };
8 | store uniqcnt into '$outfile';


--------------------------------------------------------------------------------
/src/it/resources/rdf_starjoin_plain.pig:
--------------------------------------------------------------------------------
1 | a = LOAD '$inbase/input/sibdataset.nt' using PigStorage(' ') as (subject: chararray, predicate: chararray, object:chararray);
2 | b = BGP_FILTER a BY {
3 |     ?person "<http://xmlns.com/foaf/0.1/firstName>" ?f .
4 |     ?person "<http://xmlns.com/foaf/0.1/lastName>" ?l
5 |     };
6 | STORE b INTO '$outfile';


--------------------------------------------------------------------------------
/src/it/resources/truth/rdf_pathjoin_plain.data:
--------------------------------------------------------------------------------
1 | "Ling671@gmail.com",<http://www.ins.cwi.nl/sib/person/p671>,<http://www.ins.cwi.nl/sib/user/u671>
2 | "Jean-Pierre149@gmail.com",<http://www.ins.cwi.nl/sib/person/p149>,<http://www.ins.cwi.nl/sib/user/u149>
3 | "Julia228@yahoo.com",<http://www.ins.cwi.nl/sib/person/p228>,<http://www.ins.cwi.nl/sib/user/u228>


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/flink/emitter/DumpEmitter.scala:
--------------------------------------------------------------------------------
1 | package dbis.piglet.codegen.flink.emitter
2 | 
3 | class DumpEmitter extends dbis.piglet.codegen.scala_lang.DumpEmitter {
4 |   override def template: String = """    <in>.map(_.mkString()).print""".stripMargin
5 | }
6 | 
7 | object DumpEmitter {
8 | 	lazy val instance = new DumpEmitter
9 | }


--------------------------------------------------------------------------------
/src/it/resources/windowCount.pig:
--------------------------------------------------------------------------------
1 | input = load '$inbase/input/mary.txt' using TextLoader() as (line);
2 | words = foreach input generate flatten(TOKENIZE(line)) as word;
3 | win = window words range 10 seconds slide range 10 seconds;
4 | grpd = group win by word;
5 | cntd = foreach grpd generate group, COUNT(win);
6 | -- dump cntd;
7 | store cntd into '$outfile';
8 | 


--------------------------------------------------------------------------------
/flinklib/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | hadoop.root.logger=WARN, console
2 | log4j.rootLogger =WARN, console
3 | log4j.appender.console=org.apache.log4j.ConsoleAppender
4 | log4j.appender.console.target=System.out
5 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
6 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
7 | 


--------------------------------------------------------------------------------
/src/it/resources/spatialfilterwithindex.pig:
--------------------------------------------------------------------------------
1 | a = load '$inbase/input/events.csv' using PigStorage(',') as (name: chararray, lat: double, lon: chararray);
2 | b = foreach a GENERATE  name, geometry("POINT("+lat+" "+lon+")") as loc;
3 | c = SPATIAL_FILTER b BY containedby(loc, geometry("POINT(50.1 10.2)")) using index rtree(order=2);;
4 | STORE c INTO '$outfile';
5 | -- DUMP c;
6 | 


--------------------------------------------------------------------------------
/src/it/resources/windowCross.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/joinInput.csv' USING PigStream(',') AS (a1:int,a2:int,a3:int);
2 | B = LOAD '$inbase/input/joinInput.csv' USING PigStream(',') AS (b1:int,b2:int,b3:int);
3 | C = WINDOW A RANGE 10 seconds SLIDE RANGE 10 seconds;
4 | D = WINDOW B RANGE 10 seconds SLIDE RANGE 10 seconds;
5 | X = CROSS C, D; 
6 | STORE X INTO '$outfile';
7 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/flink/emitter/StreamDumpEmitter.scala:
--------------------------------------------------------------------------------
1 | package dbis.piglet.codegen.flink.emitter
2 | 
3 | class StreamDumpEmitter extends dbis.piglet.codegen.scala_lang.DumpEmitter {
4 |   override def template: String = """    <in>.map(_.mkString()).print""".stripMargin
5 | }
6 | 
7 | object StreamDumpEmitter {
8 | 	lazy val instance = new StreamDumpEmitter
9 | }


--------------------------------------------------------------------------------
/src/it/resources/windowJoin.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/joinInput.csv' USING PigStream(',') AS (a1:int,a2:int,a3:int);
2 | B = LOAD '$inbase/input/joinInput.csv' USING PigStream(',') AS (b1:int,b2:int,b3:int);
3 | C = WINDOW A RANGE 10 seconds SLIDE RANGE 10 seconds;
4 | D = WINDOW B RANGE 10 seconds SLIDE RANGE 10 seconds;
5 | X = JOIN C BY a1, D BY b1;
6 | STORE X INTO '$outfile';
7 | 


--------------------------------------------------------------------------------
/ceplib/build.sbt:
--------------------------------------------------------------------------------
 1 | import Dependencies._
 2 | 
 3 | name := "ceplib"
 4 | 
 5 | libraryDependencies ++= Seq(
 6 |     scalaCompiler,
 7 |     scalaTest % "test" withSources(),
 8 |     sparkCore % "provided",
 9 |     sparkStreaming % "provided",
10 |     flinkScala % "provided",
11 |     flinkStreaming % "provided",
12 |     typesafe,
13 |     log4j
14 | )
15 | 
16 | test in assembly := {}
17 | 


--------------------------------------------------------------------------------
/src/it/resources/rscript.pig:
--------------------------------------------------------------------------------
1 | A = LOAD 'src/it/resources/input/cluster-data.csv' USING PigStorage(',') AS (x: double, y: double);
2 | B = RSCRIPT A USING 'library(fpc);db = dbscan($_, eps=.3, MinPts=5);cluster = cbind(inp, data.frame(db$cluster + 1L)); res = data.matrix(cluster)';
3 | RES = FOREACH B GENERATE $0 AS x: double, $1 AS y: double, $2 AS cluster: int;
4 | STORE RES INTO 'cluster.out';
5 | 


--------------------------------------------------------------------------------
/src/it/resources/nforeach2.pig:
--------------------------------------------------------------------------------
1 | triples = RDFLOAD('$inbase/input/rdf-data.nt');
2 | stmts = GROUP triples BY subject;
3 | tmp = FOREACH stmts {
4 |     r1 = FILTER triples BY (predicate == "<http://purl.org/dc/elements/1.1/date>");
5 |     r2 = FILTER triples BY (predicate == "<http://purl.org/dc/elements/1.1/publisher>");
6 |     GENERATE *, COUNT(r1) AS cnt1, COUNT(r2) AS cnt2;
7 | };
8 | STORE tmp INTO '$outfile';
9 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/flink/emitter/StreamSampleEmitter.scala:
--------------------------------------------------------------------------------
1 | package dbis.piglet.codegen.flink.emitter
2 | 
3 | class StreamSampleEmitter extends dbis.piglet.codegen.scala_lang.SampleEmitter {
4 |   override def template: String = """    val <out> = <in>.filter(t => util.Random.nextDouble \<= <expr>)""".stripMargin
5 | }
6 | 
7 | object StreamSampleEmitter {
8 | 	lazy val instance = new StreamSampleEmitter
9 | }


--------------------------------------------------------------------------------
/src/it/resources/spatialpartitioning.pig:
--------------------------------------------------------------------------------
1 | a = load '$inbase/input/events.csv' using PigStorage(',') as (name: chararray, lat: double, lon: chararray);
2 | b = foreach a GENERATE  name, geometry("POINT("+lat+" "+lon+")") as loc;
3 | c = partition b on loc using grid(partitionsPerDimension=4, withExtent=false);
4 | d = SPATIAL_FILTER c BY containedby(loc, geometry("POINT(50.1 10.2)"));
5 | STORE d INTO '$outfile';
6 | -- DUMP c;
7 | 


--------------------------------------------------------------------------------
/src/it/resources/windowNforeach.pig:
--------------------------------------------------------------------------------
 1 | daily = load '$inbase/input/nested.csv' using PigStream(',') as (exchange, symbol);
 2 | win = window daily range 10 seconds slide range 10 seconds;
 3 | grpd  = group win by exchange;
 4 | uniqcnt  = foreach grpd {
 5 |            sym      = win.symbol;
 6 |            uniq_sym = distinct sym;
 7 |            generate group, COUNT(uniq_sym);
 8 | };
 9 | store uniqcnt into '$outfile';
10 | 


--------------------------------------------------------------------------------
/src/it/resources/crossmany.pig:
--------------------------------------------------------------------------------
1 | A = LOAD '$inbase/input/file.csv' USING PigStorage(',') AS (f1:chararray, f2: int);
2 | B = LOAD '$inbase/input/file.csv' USING PigStorage(',') AS (f1:int, f2: int);
3 | C = LOAD '$inbase/input/file.csv' USING PigStorage(',') AS (f1:int, f2: chararray);
4 | D = LOAD '$inbase/input/file.txt' USING PigStorage(',') AS (f1: chararray);
5 | E = CROSS A, B, C, D;
6 | STORE E INTO '$outfile';
7 | -- DUMP E;
8 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/flink/emitter/LoadEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.flink.emitter
 2 | 
 3 | class LoadEmitter extends dbis.piglet.codegen.scala_lang.LoadEmitter {
 4 |   override def template: String = """val <out> = <func>[<class>]().load(env, "<file>", <extractor><if (params)>, <params><endif>)""".stripMargin
 5 | 
 6 | }
 7 | 
 8 | object LoadEmitter {
 9 | 	lazy val instance = new LoadEmitter
10 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/flink/emitter/StreamStoreEmitter.scala:
--------------------------------------------------------------------------------
1 | package dbis.piglet.codegen.flink.emitter
2 | 
3 | class StreamStoreEmitter extends dbis.piglet.codegen.scala_lang.StoreEmitter {
4 |   override def template: String = """    <func>[<class>]().writeStream("<file>", <in><if (params)>, <params><endif>)""".stripMargin
5 | }
6 | 
7 | object StreamStoreEmitter {
8 | 	lazy val instance = new StreamStoreEmitter
9 | }


--------------------------------------------------------------------------------
/zeppelin/build.sbt:
--------------------------------------------------------------------------------
 1 | import Dependencies._
 2 | 
 3 | name := "piglet-interpreter"
 4 | 
 5 | 
 6 | libraryDependencies ++= Seq(
 7 |     sparkCore % "provided",
 8 |     sparkSql % "provided",
 9 |     "org.apache.spark" %% "spark-repl" % "1.5.0",
10 |     "org.apache.zeppelin" % "zeppelin-interpreter" % "0.5.0-incubating"
11 | )
12 | 
13 | dependencyOverrides += "org.slf4j" % "slf4j-log4j12" % "1.7.5"
14 | 
15 | test in assembly := {}
16 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/flink/emitter/StreamLoadEmitter.scala:
--------------------------------------------------------------------------------
1 | package dbis.piglet.codegen.flink.emitter
2 | 
3 | class StreamLoadEmitter extends dbis.piglet.codegen.scala_lang.LoadEmitter {
4 |   override def template: String = """    val <out> = <func>[<class>]().loadStream(env, "<file>", <extractor><if (params)>, <params><endif>)""".stripMargin
5 | }
6 | 
7 | object StreamLoadEmitter {
8 | 	lazy val instance = new StreamLoadEmitter
9 | }


--------------------------------------------------------------------------------
/src/it/resources/macro1.pig:
--------------------------------------------------------------------------------
 1 | DEFINE my_macro(in_alias, p) RETURNS out_alias {
 2 |     $out_alias = FOREACH $in_alias GENERATE $0 + $p, $1;
 3 | };
 4 | 
 5 | DEFINE my_macro2(in_alias, p) RETURNS out_alias {
 6 |     $out_alias = FOREACH $in_alias GENERATE $0, $1 - $p;
 7 | };
 8 | 
 9 | in = LOAD '$inbase/input/file.csv' USING PigStorage(',') AS (f1:int, f2: int);
10 | out = my_macro(in, 42);
11 | out2 = my_macro2(out, 1);
12 | 
13 | STORE out2 INTO '$outfile';
14 | 


--------------------------------------------------------------------------------
/src/it/resources/skyline.pig:
--------------------------------------------------------------------------------
1 | REGISTER 'eventlib/target/scala-2.11/eventlib_2.11-1.0.jar';
2 | A = LOAD 'src/it/resources/events.csv' USING PigStorage(',') AS (id: chararray, longitude: double, latitude: double);
3 | B = FOREACH A GENERATE id, dbis.events.Distances.spatialDistance(longitude, latitude, 50.0, 10.0) AS dist: double;
4 | C = STREAM B THROUGH dbis.events.Skyline.process(2, 5, "eventDominates") AS (id: chararray, dist: double);
5 | STORE C INTO 'skyline.out';
6 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/flink/emitter/StoreEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.flink.emitter
 2 | 
 3 | class StoreEmitter extends dbis.piglet.codegen.scala_lang.StoreEmitter {
 4 |   override def template: String = """    <func>[<class>]().write("<file>", <in><if (params)>, <params><endif>)
 5 |                                     |    env.execute("Starting Query")""".stripMargin
 6 | }
 7 | 
 8 | object StoreEmitter {
 9 | 	lazy val instance = new StoreEmitter
10 | }


--------------------------------------------------------------------------------
/src/it/resources/two_joins.pig:
--------------------------------------------------------------------------------
 1 | A = LOAD '$inbase/input/joinInput.csv' USING PigStorage(',') AS (a1:int,a2:int,a3:int);
 2 | B = LOAD '$inbase/input/joinInput.csv' USING PigStorage(',') AS (b1:int,b2:int,b3:int);
 3 | 
 4 | BF1 = FILTER B BY b1 == 4;
 5 | BF2 = FILTER B BY b3 == 1;
 6 | 
 7 | X = JOIN A BY a1, BF1 BY b1;
 8 | X2 = JOIN A BY a1, BF2 BY b1;
 9 | 
10 | x0 = FOREACH X GENERATE a1, b1;
11 | x1 = FOREACH X2 GENERATE a1, b1;
12 | u = UNION x0, x1;
13 | 
14 | STORE u INTO '$outfile';
15 | 


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/ops/Outputter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.cep.ops
 2 | import scala.reflect.ClassTag
 3 | import dbis.piglet.backends.{SchemaClass => Event}
 4 | package object Outputter {
 5 |   def convertEventsToArray[T <: Event: ClassTag](collector: MatchCollector[T]): Any = {
 6 |     collector.convertEventsToArray()
 7 |   }
 8 |   def convertEventsToBoolean[T <: Event: ClassTag](collector: MatchCollector[T]): Any = {
 9 |     collector.convertEventsToBoolean()
10 |   }
11 | }


--------------------------------------------------------------------------------
/sparklib/src/main/scala/dbis/piglet/backends/spark/SparkStream.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.backends.spark
 2 | 
 3 | import org.apache.spark._
 4 | import org.apache.spark.streaming._
 5 | 
 6 | object SparkStream {
 7 |   lazy val conf = new SparkConf()
 8 |   lazy val cx = new SparkContext(conf)
 9 |   lazy val ssc = new StreamingContext(cx, Seconds(1))
10 |   
11 |   def setAppName(appName: String) = conf.setAppName(appName)
12 |   def setMaster(master: String) = conf.setMaster(master)
13 | }
14 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/bgpfilter.data:
--------------------------------------------------------------------------------
1 | <http://www.ins.cwi.nl/sib/user/u228>,<http://rdfs.org/sioc/ns#account_of>,<http://www.ins.cwi.nl/sib/person/p228>
2 | <http://www.ins.cwi.nl/sib/user/u671>,<http://rdfs.org/sioc/ns#account_of>,<http://www.ins.cwi.nl/sib/person/p671>
3 | <http://www.ins.cwi.nl/sib/user/u148>,<http://rdfs.org/sioc/ns#account_of>,<http://www.ins.cwi.nl/sib/person/p148>
4 | <http://www.ins.cwi.nl/sib/user/u149>,<http://rdfs.org/sioc/ns#account_of>,<http://www.ins.cwi.nl/sib/person/p149>


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/spark/StreamDumpEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.spark
 2 | 
 3 | import dbis.piglet.codegen.scala_lang.DumpEmitter
 4 | 
 5 | /**
 6 |   * Created by kai on 12.12.16.
 7 |   */
 8 | 
 9 | class StreamDumpEmitter extends DumpEmitter {
10 |   override def template: String = """    <in>.foreachRDD(rdd => rdd.foreach(elem => println(elem.mkString())))""".stripMargin
11 | }
12 | 
13 | object StreamDumpEmitter {
14 | 	lazy val instance = new StreamDumpEmitter
15 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/spark/StreamStoreEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.spark
 2 | 
 3 | import dbis.piglet.codegen.scala_lang.StoreEmitter
 4 | 
 5 | /**
 6 |   * Created by kai on 12.12.16.
 7 |   */
 8 | 
 9 | class StreamStoreEmitter extends StoreEmitter {
10 |   override def template: String = """    <func>[<class>]().writeStream("<file>", <in><if (params)>, <params><endif>)""".stripMargin
11 | }
12 | 
13 | object StreamStoreEmitter {
14 | 	lazy val instance = new StreamStoreEmitter
15 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/spark/StreamLoadEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.spark
 2 | 
 3 | import dbis.piglet.codegen.scala_lang.LoadEmitter
 4 | 
 5 | /**
 6 |   * Created by kai on 12.12.16.
 7 |   */
 8 | class StreamLoadEmitter extends LoadEmitter {
 9 |   override def template: String = """    val <out> = <func>[<class>]().loadStream(ssc, "<file>", <extractor><if (params)>, <params><endif>)""".stripMargin
10 | }
11 | 
12 | object StreamLoadEmitter {
13 | 	lazy val instance = new StreamLoadEmitter
14 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/flink/emitter/StreamOpEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.flink.emitter
 2 | 
 3 | class StreamOpEmitter extends dbis.piglet.codegen.scala_lang.StreamOpEmitter {
 4 |   override def template: String = """    val <in>_helper = <in>.map(t => List(<in_fields>))
 5 |                                     |    val <out> = <op>(env, <in>_helper<params>).map(t => <class>(<out_fields>))""".stripMargin
 6 | 
 7 | }
 8 | 
 9 | object StreamOpEmitter {
10 | 	lazy val instance = new StreamOpEmitter
11 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/tools/UpdateMap.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.tools
 2 | 
 3 | import scala.collection.mutable.{Map => MutableMap}
 4 | 
 5 | class UpdateMap[K,V](m: MutableMap[K,V]) {
 6 | 
 7 |   def insertOrUpdate(k: K)( f: Option[V] => V): Unit = {
 8 | 
 9 |     if(m.contains(k)) {
10 |       m(k) = f(Some(m(k)))
11 |     } else {
12 |       m(k) = f(None)
13 |     }
14 |   }
15 | }
16 | 
17 | object UpdateMap {
18 |   implicit def createUpdateMap[K,V](m: MutableMap[K,V]): UpdateMap[K,V] = new UpdateMap[K,V](m)
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/scala_lang/EmptyEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.scala_lang
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext, CodeGenException}
 4 | import dbis.piglet.op.Empty
 5 | 
 6 | /**
 7 |   * Created by kai on 09.12.16.
 8 |   */
 9 | class EmptyEmitter extends CodeEmitter[Empty] {
10 |   override def template: String = ""
11 | 
12 |   override def code(ctx: CodeGenContext, node: Empty): String = template
13 | }
14 | 
15 | object EmptyEmitter {
16 |   lazy val instance = new EmptyEmitter
17 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Intersection.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.op
 2 | 
 3 | case class Intersection(out: Pipe, in1: Pipe, in2: Pipe) extends PigOperator(List(out), List(in1, in2)) {
 4 | 
 5 |   override def lineageString: String = {
 6 |     s"""INTERSECTION%""" + super.lineageString
 7 |   }
 8 | 
 9 |   override def toString =
10 |     s"""INTERSECTION
11 |        |  out = $outPipeName
12 |        |  ins = ${inPipeNames.mkString(",")}
13 |        |  inSchema = $inputSchema
14 |        |  outSchema = $schema""".stripMargin
15 | 
16 | }
17 | 


--------------------------------------------------------------------------------
/flinklib/build.sbt:
--------------------------------------------------------------------------------
 1 | import Dependencies._
 2 | 
 3 | name := "flinklib"
 4 | 
 5 | libraryDependencies ++= Seq(
 6 |     scalaCompiler,
 7 |     scalaTest % "test" withSources(),
 8 |     jeromq,
 9 |     flinkScala % "provided",
10 |     flinkStreaming % "provided",
11 |     typesafe,
12 |     log4j,
13 |     akkaLogging
14 | )
15 | 
16 | resolvers += "Sonatype (releases)" at "https://oss.sonatype.org/content/repositories/releases/"
17 | 
18 | scalacOptions ++= Seq("-feature","-language:implicitConversions")
19 | 
20 | test in assembly := {}
21 | logLevel in assembly := Level.Error
22 | //
23 | 


--------------------------------------------------------------------------------
/setm/src/main/scala/dbis/setm/Main.scala:
--------------------------------------------------------------------------------
 1 | package dbis.setm
 2 | 
 3 | import dbis.setm.SETM._
 4 | 
 5 | object Main {
 6 | 
 7 |     def myFunction(s: String) = timing("greeting func") {
 8 |       // complex operations, e.g.
 9 |       (0 until 100).foreach(i => println(s"Hello $s"))
10 |     }
11 | 
12 |     def main(args: Array[String]) {
13 | 
14 |       timing("program total") {
15 | 
16 |         val names = timing("create names") { Array("Tick","Trick","Track") }
17 | 
18 |         for(name <- names)
19 |           myFunction(name)
20 | 
21 |       }
22 | 
23 |       collect()
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/sparklib/build.sbt:
--------------------------------------------------------------------------------
 1 | import Dependencies._
 2 | 
 3 | name := "sparklib"
 4 | 
 5 | libraryDependencies ++= Seq(
 6 |     scalaCompiler,
 7 |     scalaTest % "test" withSources(),
 8 |     sparkCore % "provided",
 9 |     //sparkREPL % "provided", // doesn't work yet due to some incompatibilities with jetty
10 |     sparkSql % "provided",
11 |     sparkStreaming % "provided",
12 |     typesafe,
13 |     //scalikejdbc,
14 |     //scalikejdbc_config,
15 |     jdbc,
16 |     json4s
17 | )
18 | 
19 | test in assembly := {}
20 | 
21 | scalacOptions ++= Seq("-feature","-language:implicitConversions")
22 | 
23 | sourcesInBase := false
24 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/crossed.data:
--------------------------------------------------------------------------------
 1 | 1,2,3,1,2,3
 2 | 1,2,3,4,2,1
 3 | 1,2,3,8,3,4
 4 | 1,2,3,4,3,3
 5 | 1,2,3,7,2,5
 6 | 1,2,3,8,4,3
 7 | 4,2,1,1,2,3
 8 | 4,2,1,4,2,1
 9 | 4,2,1,8,3,4
10 | 4,2,1,4,3,3
11 | 4,2,1,7,2,5
12 | 4,2,1,8,4,3
13 | 8,3,4,1,2,3
14 | 8,3,4,4,2,1
15 | 8,3,4,8,3,4
16 | 8,3,4,4,3,3
17 | 8,3,4,7,2,5
18 | 8,3,4,8,4,3
19 | 4,3,3,1,2,3
20 | 4,3,3,4,2,1
21 | 4,3,3,8,3,4
22 | 4,3,3,4,3,3
23 | 4,3,3,7,2,5
24 | 4,3,3,8,4,3
25 | 7,2,5,1,2,3
26 | 7,2,5,4,2,1
27 | 7,2,5,8,3,4
28 | 7,2,5,4,3,3
29 | 7,2,5,7,2,5
30 | 7,2,5,8,4,3
31 | 8,4,3,1,2,3
32 | 8,4,3,4,2,1
33 | 8,4,3,8,3,4
34 | 8,4,3,4,3,3
35 | 8,4,3,7,2,5
36 | 8,4,3,8,4,3
37 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Materialize.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.op
 2 | 
 3 | /**
 4 |  * This represent a MATERIALIZE operator in Pig
 5 |  * 
 6 |  */
 7 | case class Materialize(private val in: Pipe) extends PigOperator(List(), List(in)) {
 8 |   
 9 |   /**
10 |    * Returns the lineage string describing the sub-plan producing the input for this operator.
11 |    *
12 |    * @return a string representation of the sub-plan.
13 |    */
14 |   override def lineageString: String = {
15 |     s"""MATERIALIZE%""" + super.lineageString
16 |   }
17 | 
18 |   override def toString =
19 |     s"""MATERIALIZE
20 |        |  in = $inPipeName
21 |      """.stripMargin
22 | }


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
 1 | logLevel := Level.Warn
 2 | 
 3 | // resolvers += Resolver.url(
 4 | //   "bintray-sbt-plugin-releases",
 5 | //   url("http://dl.bintray.com/content/sbt/sbt-plugin-releases"))(
 6 | //     Resolver.ivyStylePatterns)
 7 | //
 8 | // addSbtPlugin("me.lessis" % "bintray-sbt" % "0.3.0")
 9 | addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.7.0")
10 | 
11 | addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.5.1")
12 | 
13 | addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0")
14 | 
15 | addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "5.2.4")
16 | 
17 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.0")
18 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/flink/emitter/StreamDistinctEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.flink.emitter
 2 | 
 3 | import dbis.piglet.codegen.CodeGenContext
 4 | import dbis.piglet.op.Distinct
 5 | import dbis.piglet.codegen.CodeEmitter
 6 | 
 7 | class StreamDistinctEmitter extends dbis.piglet.codegen.scala_lang.DistinctEmitter {
 8 |   override def template: String = """""".stripMargin
 9 |   def templateHelper: String = "		.toList.distinct"
10 |   
11 |   def windowApply(ctx: CodeGenContext, op: Distinct): String = {
12 |     CodeEmitter.render(templateHelper, Map())
13 |   }
14 | }
15 | 
16 | object StreamDistinctEmitter {
17 | 	lazy val instance = new StreamDistinctEmitter
18 | }


--------------------------------------------------------------------------------
/src/it/resources/wordcount.pig:
--------------------------------------------------------------------------------
 1 | -- Load input from the file named mary.txt and call the single
 2 | -- field in the record 'line'.
 3 | input = load '$inbase/input/mary.txt' as (line: chararray);
 4 | 
 5 | -- TOKENIZE splits the line into a field for each word.
 6 | -- flatten will take the collection of records returned b
 7 | -- TOKENIZE and produce a separate record for each one, calling the single
 8 | -- field in the record word.
 9 | words = foreach input generate flatten(TOKENIZE(line)) as word;
10 | 
11 | -- Now group them together by each word.
12 | grpd = group words by word;
13 | 
14 | -- Count them.
15 | cntd = foreach grpd generate group, COUNT(words);
16 | 
17 | store cntd into '$outfile';
18 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/scala_lang/HdfsCmdEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.scala_lang
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext, CodeGenException}
 4 | import dbis.piglet.op.PigOperator
 5 | import dbis.piglet.op.cmd.HdfsCmd
 6 | 
 7 | /**
 8 |   * Created by kai on 12.12.16.
 9 |   */
10 | class HdfsCmdEmitter extends CodeEmitter[HdfsCmd] {
11 |   override def template: String = """HDFSService.process("<cmd>", <params>)""".stripMargin
12 | 
13 |   override def code(ctx: CodeGenContext, op: HdfsCmd): String = render(Map("cmd" -> op.cmd, "params" -> s"List(${op.paramString})"))
14 | }
15 | 
16 | object HdfsCmdEmitter {
17 |   lazy val instance = new HdfsCmdEmitter
18 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/scala_lang/IntersectionEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.scala_lang
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext}
 4 | import dbis.piglet.op.Intersection
 5 | 
 6 | class IntersectionEmitter extends CodeEmitter[Intersection] {
 7 |     override def template: String = """val <out> = <in1>.intersection(<in2>)""".stripMargin
 8 | 
 9 | 
10 |     override def code(ctx: CodeGenContext, op: Intersection): String = render(Map("out" -> op.outPipeName,
11 |       "in1" -> op.inPipeNames.head,
12 |       "in2" -> op.inPipeNames.last
13 |     ))
14 | 
15 |   }
16 | 
17 |   object IntersectionEmitter {
18 |     lazy val instance = new IntersectionEmitter
19 |   }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/TimingOp.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.op
 2 | 
 3 | case class TimingOp (
 4 |     private[op] val out: Pipe, 
 5 |     private[op] val in: Pipe, 
 6 |     operatorId: String) extends PigOperator(out, in, in.producer.schema ) {
 7 |   
 8 | 
 9 |   
10 |   override def equals(other: Any) = other match {
11 |     case o: TimingOp => operatorId == o.operatorId && outPipeName == o.outPipeName
12 |     case _ => false
13 |   }
14 |   
15 |   override def hashCode() = (operatorId+outPipeName).hashCode()
16 | 
17 |   override def toString =
18 |     s"""TIMING
19 |        |  out = $outPipeName
20 |        |  in = $inPipeName
21 |        |  schema = $schema""".stripMargin
22 | 
23 |   
24 | }
25 | 


--------------------------------------------------------------------------------
/src/it/resources/truth/cross2.csv:
--------------------------------------------------------------------------------
 1 | Hallo,Hallo
 2 | Hallo,this
 3 | Hallo,is
 4 | Hallo,a
 5 | Hallo,small
 6 | Hallo,text
 7 | Hallo,file.
 8 | this,Hallo
 9 | this,this
10 | this,is
11 | this,a
12 | this,small
13 | this,text
14 | this,file.
15 | is,Hallo
16 | is,this
17 | is,is
18 | is,a
19 | is,small
20 | is,text
21 | is,file.
22 | a,Hallo
23 | a,this
24 | a,is
25 | a,a
26 | a,small
27 | a,text
28 | a,file.
29 | small,Hallo
30 | small,this
31 | small,is
32 | small,a
33 | small,small
34 | small,text
35 | small,file.
36 | text,Hallo
37 | text,this
38 | text,is
39 | text,a
40 | text,small
41 | text,text
42 | text,file.
43 | file.,Hallo
44 | file.,this
45 | file.,is
46 | file.,a
47 | file.,small
48 | file.,text
49 | file.,file.
50 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/scala_lang/UnionEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.scala_lang
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext, CodeGenException}
 4 | import dbis.piglet.op.{Union, PigOperator}
 5 | 
 6 | /**
 7 |   * Created by kai on 03.12.16.
 8 |   */
 9 | class UnionEmitter extends CodeEmitter[Union] {
10 |   override def template: String = """val <out> = <in><others:{ e | .union(<e>)}>""".stripMargin
11 | 
12 | 
13 |   override def code(ctx: CodeGenContext, op: Union): String = render(Map("out" -> op.outPipeName,
14 |           "in" -> op.inPipeName,
15 |           "others" -> op.inPipeNames.tail))
16 | 
17 | }
18 | 
19 | object UnionEmitter {
20 | 	lazy val instance = new UnionEmitter
21 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/flink/emitter/LimitEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.flink.emitter
 2 | 
 3 | import dbis.piglet.codegen.CodeGenContext
 4 | import dbis.piglet.op.Limit
 5 | 
 6 | class LimitEmitter extends dbis.piglet.codegen.scala_lang.LimitEmitter {
 7 |   override def template: String = """    val <out> = <in>.first(<num>)""".stripMargin
 8 | 
 9 |   override def code(ctx: CodeGenContext, op: Limit): String = {
10 | 
11 |     val params = Map(
12 |       "out" -> op.outPipeName,
13 |       "in" -> op.inPipeName,
14 |       "num" -> op.num,
15 |       "lineage" -> op.lineageSignature)
16 | 
17 |     render(params)
18 | 
19 |   }
20 | 
21 | }
22 | 
23 | object LimitEmitter {
24 | 	lazy val instance = new LimitEmitter
25 | }


--------------------------------------------------------------------------------
/setm/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | logs
 3 | project/project
 4 | project/target
 5 | target
 6 | tmp
 7 | .history
 8 | dist
 9 | /.idea
10 | /*.iml
11 | /out
12 | .idea_modules
13 | .classpath
14 | .project
15 | /RUNNING_PID
16 | .settings
17 | .target
18 | /bin
19 | *.jpage
20 | lodhub_data
21 | .cache
22 | .worksheet/
23 | *.sc
24 | .sbt_completion_cache
25 | .tags
26 | *.bak
27 | *.class
28 | *.log
29 | 
30 | __my_script/
31 | 
32 | #####
33 | # ignore database
34 | db
35 | 
36 | # sbt specific
37 | .cache
38 | .cache-*
39 | .history
40 | .lib/
41 | .scalastyle
42 | dist/*
43 | target/
44 | lib_managed/
45 | src_managed/
46 | project/boot/
47 | project/plugins/project/
48 | buildinfo.properties
49 | 
50 | # Scala-IDE specific
51 | .scala_dependencies
52 | .worksheet
53 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/scala_lang/SampleEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.scala_lang
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext, CodeGenException}
 4 | import dbis.piglet.op.{Sample, PigOperator}
 5 | 
 6 | /**
 7 |   * Created by kai on 03.12.16.
 8 |   */
 9 | class SampleEmitter extends CodeEmitter[Sample] {
10 |   override def template: String = """ val <out> = <in>.sample(false, <expr>)""".stripMargin
11 | 
12 | 
13 |   override def code(ctx: CodeGenContext, op: Sample): String = render(Map("out" -> op.outPipeName,
14 |           "in" -> op.inPipeName,
15 |           "expr" -> ScalaEmitter.emitExpr(ctx, op.expr)))
16 | 
17 | }
18 | 
19 | 
20 | object SampleEmitter {
21 | 	lazy val instance = new SampleEmitter
22 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/spark/CacheEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.spark
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext}
 4 | import dbis.piglet.op.Cache
 5 | 
 6 | /**
 7 |   * Created by hage on 11.07.17.
 8 |   */
 9 | class CacheEmitter extends CodeEmitter[Cache] {
10 |   override def template: String = "val <out> = <in>.persist(org.apache.spark.storage.StorageLevel.<mode>)"
11 | 
12 |   override def code(ctx: CodeGenContext, node: Cache): String = {
13 | 
14 |     val mode = node.cacheMode.toString
15 | 
16 |     val map = Map("out" -> node.outPipeName, "in" -> node.inPipeName, "mode" -> mode)
17 | 
18 |     render(map)
19 |   }
20 | }
21 | 
22 | object CacheEmitter {
23 |   lazy val instance = new CacheEmitter
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/scala_lang/TimingEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.scala_lang
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext}
 4 | import dbis.piglet.op.TimingOp
 5 | 
 6 | class TimingEmitter extends CodeEmitter[TimingOp] {
 7 |   override def template = """val <out> = <in>.mapPartitionsWithIndex({case (idx,iter) => 
 8 |     |  PerfMonitor.notify(url, "<lineage>", <in>,idx, System.currentTimeMillis)
 9 |     |  iter
10 |     \},true)""".stripMargin
11 |   
12 |   override def code(ctx: CodeGenContext, op: TimingOp): String = render(Map(
13 |       "out"-> op.outPipeName, 
14 |       "in" -> op.inPipeName,
15 |       "lineage" -> op.operatorId))
16 | }
17 | 
18 | object TimingEmitter {
19 | 	lazy val instance = new TimingEmitter
20 | }


--------------------------------------------------------------------------------
/src/it/resources/spatialjoin.pig:
--------------------------------------------------------------------------------
 1 | a1 = load '$inbase/input/events.csv' using PigStorage(',') as (name: chararray, lat: double, lon: chararray);
 2 | b1 = foreach a1 GENERATE  name, geometry("POINT("+lat+" "+lon+")") as loc;
 3 | 
 4 | a2 = load '$inbase/input/events.csv' using PigStorage(',') as (name: chararray, lat: double, lon: chararray);
 5 | b2 = foreach a2 GENERATE  name, geometry("POINT("+lat+" "+lon+")") as loc;
 6 | 
 7 | -- loc fields are automatically resolved as we expect the first one to be from the left input
 8 | --  and the second one to be from the right input relation.
 9 | -- Hence, no need for (b1::loc, b2::loc) disambiguation or renaming
10 | c = SPATIAL_JOIN b1, b2 ON containedby(loc, loc);
11 | 
12 | d = foreach c GENERATE b1::name, b2::name;
13 | 
14 | --DUMP d;
15 | STORE d INTO '$outfile';
16 | 


--------------------------------------------------------------------------------
/src/it/resources/spatialjoinwithindex.pig:
--------------------------------------------------------------------------------
 1 | a1 = load '$inbase/input/events.csv' using PigStorage(',') as (name: chararray, lat: double, lon: chararray);
 2 | b1 = foreach a1 GENERATE  name, geometry("POINT("+lat+" "+lon+")") as loc;
 3 | 
 4 | a2 = load '$inbase/input/events.csv' using PigStorage(',') as (name: chararray, lat: double, lon: chararray);
 5 | b2 = foreach a2 GENERATE  name, geometry("POINT("+lat+" "+lon+")") as loc;
 6 | 
 7 | -- loc fields are automatically resolved as we expect the first one to be from the left input
 8 | --  and the second one to be from the right input relation.
 9 | -- Hence, no need for (b1::loc, b2::loc) disambiguation or renaming
10 | c = SPATIAL_JOIN b1, b2 ON containedby(loc, loc) using index rtree(order = 2);
11 | 
12 | d = foreach c GENERATE b1::name, b2::name;
13 | 
14 | --DUMP d;
15 | STORE d INTO '$outfile';
16 | 


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/nfa/RelatedValue.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.cep.nfa
 2 | import scala.reflect.ClassTag
 3 | import dbis.piglet.backends.{SchemaClass => Event}
 4 | 
 5 | case class NotInitializedException(private val msg: String) extends Exception(msg)
 6 | 
 7 | trait RelatedValue[T]{
 8 |     def updateValue(event: T): Unit
 9 |     def initValue(event: T): Unit
10 |     def getValue(): Double
11 | }
12 | 
13 | abstract class PreviousRelatedValue[T <: Event: ClassTag] extends RelatedValue[T]{
14 |   var value: Option[Double] = None
15 |   override def initValue(event: T): Unit = updateValue(event)
16 |   override def updateValue(event: T): Unit
17 |   override def getValue(): Double = {
18 |     value match {
19 |       case Some(v) => v
20 |       case None => throw NotInitializedException("Related value is not initialized")
21 |     }
22 |   }
23 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Visualize.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.op
 2 | 
 3 | import dbis.piglet.expr.Ref
 4 | 
 5 | case class Visualize(private val in: Pipe, field: Ref, path: String, width: Int, height: Int, pointSize: Option[Int] = None) extends PigOperator(List(), List(in)) {
 6 | 
 7 | 
 8 |   lazy val (pathNoExt,fileType) = {
 9 |     val i = path.lastIndexOf(".")
10 |     if(i > 0) {
11 |       val p = path.substring(0,i)
12 |       val ext = path.substring(i+1)
13 |       (p,ext)
14 |     }
15 |     else
16 |       (path,"")
17 |   }
18 | 
19 | 
20 |   override def lineageString: String = {
21 |     s"""VISUALIZE%""" + super.lineageString
22 |   }
23 | 
24 |   override def toString =
25 |     s"""VISUALIZE
26 |        |  in = $inPipeName
27 |        |  type = $path
28 |        |  size = $width x $height
29 |        |  pointSize = $pointSize""".stripMargin
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM java:latest
 2 | MAINTAINER stefan.hagedorn@tu-ilmenau.de
 3 | 
 4 | COPY /script/piglet /piglet/
 5 | 
 6 | COPY target/scala-2.11/PigCompiler.jar /piglet/
 7 | ENV PIG_LIB /piglet/PigCompiler.jar 
 8 | 
 9 | COPY /sparklib/target/scala-2.11/sparklib_2.11-*.jar /sparklib/target/scala-2.11/
10 | ENV BACKEND_DIR /sparklib/target/scala-2.11/*
11 | 
12 | # enable these to support other backends. 
13 | #COPY /flinklib/target/scala-2.11/flinklib_2.11-*.jar /flinklib/target/scala-2.11/
14 | #COPY /mapreduce/target/scala-2.11/mapreduce_2.11-*.jar /mapreduce/target/scala-2.11/
15 | 
16 | ENV SPARK_JAR /piglet/spark-assembly-1.5.1-hadoop2.4.0.jar 
17 | 
18 | 
19 | RUN wget -q -P /piglet http://moria.prakinf.tu-ilmenau.de/spark-assembly-1.5.1-hadoop2.4.0.jar 
20 | #RUN mv spark-assembly-1.5.1-hadoop2.4.0.jar /piglet/
21 | 
22 | 
23 | ENTRYPOINT ["/piglet/piglet"]
24 | CMD ["--help"]
25 | 


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/ops/MatchCollector.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.cep.ops
 2 | import dbis.piglet.cep.nfa.NFAStructure
 3 | import scala.reflect.ClassTag
 4 | import scala.collection.mutable.ListBuffer
 5 | import scala.collection.mutable.ArrayBuffer
 6 | import dbis.piglet.backends.{SchemaClass => Event}
 7 | 
 8 | class MatchCollector[ T <: Event: ClassTag] extends Serializable {
 9 |   var macthSequences: ListBuffer[NFAStructure[T]] = new ListBuffer()
10 |   def +(that: NFAStructure[T]): Unit = macthSequences += that
11 |   def size: Int = macthSequences.size
12 |   def convertEventsToArray(): ArrayBuffer[T] = {
13 |     var events: ArrayBuffer[T] = new ArrayBuffer()
14 |     macthSequences.foreach (seq =>  events ++= seq.events)
15 |     events
16 |   }
17 |   def convertEventsToBoolean(): ArrayBuffer[Boolean] = {
18 |     ArrayBuffer(macthSequences.size > 0)
19 |   }
20 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Partition.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.op
 2 | 
 3 | object PartitionMethod extends Enumeration {
 4 |   type PartitionMethod = Value
 5 |   val GRID, BSP, Hash = Value
 6 | }
 7 | 
 8 | import PartitionMethod.PartitionMethod
 9 | import dbis.piglet.expr.Ref
10 | 
11 | case class Partition(
12 |   private val out: Pipe,
13 |   private val in: Pipe,
14 |   field: Ref,
15 |   method: PartitionMethod,
16 |   params: Seq[String]
17 | ) extends PigOperator(out, in) {
18 |   
19 |   override def lineageString = 
20 |     s"""PARTITION%$method%$field%${params.mkString}"""+super.lineageString
21 | 
22 |   override def toString =
23 |     s"""PARTITION
24 |        |  out = $outPipeName
25 |        |  in = $inPipeName
26 |        |  schema = $schema
27 |        |  field = $field
28 |        |  method = $method
29 |        |  params = ${params.mkString(",")}""".stripMargin
30 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Difference.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.op
 2 | 
 3 | import dbis.piglet.expr.Ref
 4 | 
 5 | case class Difference(private val out: Pipe, private val in1: Pipe, private val in2: Pipe,
 6 |                       refs1: Option[List[Ref]] = None,
 7 |                       refs2: Option[List[Ref]] = None
 8 |                      ) extends PigOperator(List(out), List(in1, in2)) {
 9 | 
10 |   override def lineageString: String = {
11 |     s"""DIFFERENCE%""" + super.lineageString
12 |   }
13 | 
14 |   override def toString =
15 |     s"""DIFFERENCE
16 |        |  out = $outPipeName
17 |        |  ins = ${inPipeNames.mkString(",")},
18 |        |  refs1 = ${refs1.map(_.mkString(",")).getOrElse("--")},
19 |        |  refs2 = ${refs2.map(_.mkString(",")).getOrElse("--")},
20 |        |  inSchema = $inputSchema
21 |        |  outSchema = $schema""".stripMargin
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/scala_lang/DistinctEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.scala_lang
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext, CodeGenException}
 4 | import dbis.piglet.op.{Distinct, PigOperator}
 5 | 
 6 | /**
 7 |   * Created by kai on 03.12.16.
 8 |   */
 9 | class DistinctEmitter extends CodeEmitter[Distinct] {
10 |   override def template: String = """val <out> = <in>.distinct<if (profiling)>.map{t =>
11 |                                     |    PerfMonitor.sampleSize(t,"<lineage>", accum, randFactor)
12 |                                     |  t
13 |                                     |}<endif>""".stripMargin
14 | 
15 | 
16 |   override def code(ctx: CodeGenContext, op: Distinct): String = 
17 |         render(Map("out" -> op.outPipeName, "in" -> op.inPipeName, "lineage" -> op.lineageSignature))
18 | 
19 | }
20 | 
21 | object DistinctEmitter {
22 |   lazy val instance = new DistinctEmitter
23 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/spark/VisualizeEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.spark
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext}
 4 | import dbis.piglet.op.Visualize
 5 | 
 6 | class VisualizeEmitter extends CodeEmitter[Visualize] {
 7 |   override def template: String = s"""<in><keyby>.visualize(<width>,<height>, "<path>", "<ext>"<pointsize>)"""
 8 | 
 9 |   override def code(ctx: CodeGenContext, op: Visualize): String = {
10 |     val m = Map(
11 |       "in" -> op.inPipeName,
12 |       "width" -> op.width,
13 |       "height" -> op.height,
14 |       "path" -> op.pathNoExt,
15 |       "ext" -> op.fileType,
16 |       "keyby" -> SpatialEmitterHelper.keyByCode(op.schema,op.field, ctx),
17 |       "pointsize" -> op.pointSize.map(p => s",pointSize = $p").getOrElse("")
18 |     )
19 | 
20 |     render(m)
21 |   }
22 | }
23 | 
24 | object VisualizeEmitter {
25 |   lazy val instance = new VisualizeEmitter
26 | }
27 | 


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/spark/CustomRDDMatcher.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.cep.spark
 2 | 
 3 | import org.apache.spark.SparkContext
 4 | import dbis.piglet.cep.ops.SelectionStrategy._
 5 | import dbis.piglet.cep.ops.OutputStrategy._
 6 | import org.apache.spark.rdd._
 7 | import scala.reflect.ClassTag
 8 | import dbis.piglet.cep.nfa.NFAController
 9 | import dbis.piglet.backends.{SchemaClass => Event}
10 | 
11 | class CustomRDDMatcher[T <: Event: ClassTag](rdd: RDD[T]) {
12 | 
13 |   def matchNFA(nfa: NFAController[T], sstr: SelectionStrategy = FirstMatch, out: OutputStrategy = Combined) = {
14 |     // println("create a new RDD matcher")
15 |     val newRDD = rdd.coalesce(1, true)
16 |     new RDDMatcher(newRDD, nfa, sstr, out)
17 |   }
18 | 
19 | }
20 | 
21 | object CustomRDDMatcher {
22 | 
23 |   implicit def addRDDMatcher[T <: Event: ClassTag](rdd: RDD[T]) = {
24 |     // println("add a custom RDD function")
25 |     new CustomRDDMatcher(rdd)
26 |   }
27 | }


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/spark/CustomDStreamMatcher.scala:
--------------------------------------------------------------------------------
 1 | 
 2 | package dbis.piglet.cep.spark
 3 | 
 4 | import org.apache.spark.SparkContext._
 5 | import org.apache.spark.streaming.dstream._
 6 | import scala.reflect.ClassTag
 7 | import dbis.piglet.cep.ops.SelectionStrategy._
 8 | import dbis.piglet.cep.ops.OutputStrategy._
 9 | import dbis.piglet.cep.nfa.NFAController
10 | import dbis.piglet.backends.{SchemaClass => Event}
11 | 
12 | class CustomDStreamMatcher[T <: Event: ClassTag](dstream: DStream[T]) {
13 | 
14 |   def matchNFA(nfa: NFAController[T], sstr: SelectionStrategy = FirstMatch, out: OutputStrategy = Combined) = {
15 |     // println("create a new DStream matcher")
16 |     new DStreamMatcher(dstream, nfa, sstr, out)
17 |   }
18 | 
19 | }
20 | 
21 | object CustomDStreamMatcher {
22 | 
23 |   implicit def addDStreamMatcher[T <: Event: ClassTag](dstream: DStream[T]) = {
24 |     // println("add a custom DStream function")
25 |     new CustomDStreamMatcher(dstream)
26 |   }
27 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/plan/rewriting/rulesets/Ruleset.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.plan.rewriting.rulesets
18 | 
19 | trait Ruleset {
20 |   def registerRules(): Unit
21 | }
22 | 


--------------------------------------------------------------------------------
/flinklib/src/main/scala/dbis/piglet/backends/flink/streaming/FlinkExtensions.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.backends.flink.streaming
 2 | 
 3 | /*
 4 | import org.apache.flink.streaming.api.scala.WindowedDataStream
 5 | import org.apache.flink.streaming.api.scala.createTypeInformation
 6 | import org.apache.flink.util.Collector
 7 | 
 8 | 
 9 | object FlinkExtensions {
10 |   implicit class EnhancedWindowedDataStream(val w: WindowedDataStream[List[Any]]) {
11 | 
12 |     def distinct = w.mapWindow(distinctMapFunction _)
13 | 
14 |     private def distinctMapFunction(ts: Iterable[List[Any]], out: Collector[List[Any]]) ={
15 |       ts.toList.distinct.foreach{ x => out.collect(x) }
16 |     }
17 |   }
18 | 
19 |   implicit class EnhancedWindowedDataStreamString(val w: WindowedDataStream[List[String]]) {
20 | 
21 |     def distinct = w.mapWindow(distinctMapFunction _)
22 | 
23 |     private def distinctMapFunction(ts: Iterable[List[String]], out: Collector[List[String]]) ={
24 |       ts.toList.distinct.foreach{ x => out.collect(x) }
25 |     }   
26 |   }
27 | }*/
28 | 


--------------------------------------------------------------------------------
/materialization_scripts/gdelt_gold_tone_roi.pig:
--------------------------------------------------------------------------------
 1 | gdelt = LOAD '$gdelt' using PigStorage()
 2 | fields = FOREACH gdelt GENERATE $0 as eventid, $1 as day, $4 as a1code, $5 as a1countrycode, $8 as a1ethniccode, $14 as a2code, $15 as a2countrycode, $18 as a2ethniccode, $29 as goldstein, $33 as avgtone, $39 as a1lat, $40 as a1lon, $47 as a2lat,  $48 as a2lon;
 3 | withLoc = FILTER fields BY NONEMPTY(a1lat) and NONEMPTY(a1lon) and NONEMPTY(goldstein) and nonempty(avgtone)
 4 | gdeltGeo = FOREACH withLoc GENERATE geometry("POINT("+a1lat+" "+a1lon+")"), (double)goldstein as gold, (double)avgtone as tone;
 5 | roi = LOAD '$rgdelt' USING PigStorage(';') as (id: int, wkt: chararray);
 6 | roiGeo = FOREACH roi GENERATE geometry(wkt) as geo, id;
 7 | toneregion = SPATIAL_JOIN gdeltGeo, roiGeo ON CONTAINEDBY using index rtree(order=5);
 8 | toneregionid = FOREACH toneregion GENERATE id, gold, tone;
 9 | toneByRegion = GROUP toneregionid BY id
10 | tonePerRegion = FOREACH toneByRegion GENERATE group as regionId, avg(toneregionid.gold), avg(toneregionid.tone)
11 | dump tonePerRegion mute


--------------------------------------------------------------------------------
/sparklib/src/main/scala/dbis/piglet/backends/spark/PigFuncs.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package dbis.piglet.backends.spark
19 | 
20 | import dbis.piglet.CommonPigFuncs
21 | 
22 | object PigFuncs extends CommonPigFuncs {
23 | 
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/scala_lang/FilterEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.scala_lang
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext}
 4 | import dbis.piglet.op.Filter
 5 | 
 6 | /**
 7 |   * Created by kai on 01.12.16.
 8 |   */
 9 | class FilterEmitter extends CodeEmitter[Filter] {
10 |   override def template: String =
11 |     """val <out> = <in>.filter{t =>
12 |       |  val res = <pred>
13 |       |  <if (profiling)>
14 |       |  if(res) {
15 |       |    PerfMonitor.sampleSize(t, "<lineage>", accum, randFactor)
16 |       |  }
17 |       |  <endif>
18 |       |  res
19 |       |\}""".stripMargin
20 | 
21 | 
22 |   override def code(ctx: CodeGenContext, op: Filter): String = {
23 |     val m = Map("out" -> op.outPipeName,
24 |       "in" -> op.inPipeName,
25 |       "lineage" -> op.lineageSignature,
26 |       "pred" -> ScalaEmitter.emitPredicate(CodeGenContext(ctx, Map[String,Any]("schema" -> op.schema)), op.pred))
27 | 
28 |     render(m)
29 |   }
30 | }
31 | 
32 | object FilterEmitter {
33 |   lazy val instance = new FilterEmitter
34 | }


--------------------------------------------------------------------------------
/materialization_scripts/taxi_tip_avg.pig:
--------------------------------------------------------------------------------
 1 | <%
 2 | def dateToMonth(date: String): Int = {
 3 |   val formatter = java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
 4 |   java.time.LocalDate.parse(date,formatter).getMonthValue()
 5 | }
 6 | %>
 7 | 
 8 | raw = load '$taxi' using PigStorage(',',skipEmpty=true) as
 9 |   (vendor_id:chararray,pickup_datetime:chararray,dropoff_datetime:chararray,passenger_count:chararray,
10 |   trip_distance:chararray, pickup_longitude:chararray,pickup_latitude:chararray,rate_code:chararray,
11 |   store_and_fwd_flag:chararray,dropoff_longitude:chararray,dropoff_latitude:chararray,payment_type:chararray,
12 |   fare_amount:chararray,surcharge:chararray,mta_tax:chararray,tip_amount:chararray,tolls_amount:chararray,total_amount:chararray);
13 | 
14 | noHeader = filter raw by not STARTSWITH(lower(vendor_id),"vendor");
15 | month_tip = FOREACH noHeader GENERATE dateToMonth(pickup_datetime) as month:int, (double)tip_amount as tip
16 | 
17 | grp = GROUP month_tip by month;
18 | avg = FOREACH grp GENERATE group, AVG(month_tip.tip);
19 | dump avg mute;
20 | 


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/ops/EngineConf.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.cep.ops
 2 | 
 3 | import dbis.piglet.cep.engines._
 4 | import scala.reflect.ClassTag
 5 | import dbis.piglet.cep.ops.SelectionStrategy._
 6 | import dbis.piglet.cep.nfa.NFAController
 7 | import dbis.piglet.backends.{SchemaClass => Event}
 8 | 
 9 | abstract class EngineConf[T <: Event: ClassTag](nfa: NFAController[T], sstr: SelectionStrategy) {
10 |   val collector: MatchCollector[T] = new MatchCollector()
11 |   var engine: CEPEngine[T] = sstr match {
12 |       case SelectionStrategy.FirstMatch        => new FirstMatch(nfa, collector)
13 |       case SelectionStrategy.AllMatches        => new AnyMatch(nfa, collector)
14 |       case SelectionStrategy.NextMatches       => new NextMatch(nfa, collector)
15 |       case SelectionStrategy.ContiguityMatches => new ContiguityMatch(nfa, collector)
16 |       case _                                   => throw new Exception("The Strategy is not supported")
17 | 
18 |   }
19 | }
20 | /*
21 | trait EngineConfig [T] extends EngineConf[T] {
22 |   implicit def event: Event
23 | }*/


--------------------------------------------------------------------------------
/src/test/scala/dbis/piglet/CompilerSpec.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet
 2 | 
 3 | import dbis.piglet.codegen.PigletCompiler
 4 | import org.scalatest.{Matchers, FlatSpec}
 5 | 
 6 | /**
 7 |  * Created by kai on 13.07.15.
 8 |  */
 9 | class CompilerSpec extends FlatSpec with Matchers {
10 |   "The compiler" should "substitute parameters in a source line" in {
11 |     val source = """a = FOREACH b GENERATE $0 AS $P1, myFunc($1) AS $PARAM2;"""
12 |     val substitutedLine = PigletCompiler.replaceParameters(source, Map("P1" -> "column", "PARAM2" -> "funcResult"))
13 |     substitutedLine should be ("""a = FOREACH b GENERATE $0 AS column, myFunc($1) AS funcResult;""")
14 |   }
15 | 
16 |   it should "resolve IMPORT statements recursively" in {
17 |     val source = List("IMPORT 'src/it/resources/import1.pig';", "C = FOREACH B GENERATE $0;")
18 |     val (output, p) = PigletCompiler.resolveImports(source.toIterator)
19 |     output.mkString("\n") should be (
20 |       """A = LOAD 'input';
21 |         |B = FILTER A BY $0 > 10;
22 |         |C = FOREACH B GENERATE $0;""".stripMargin)
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/flinklib/src/main/scala/dbis/piglet/backends/flink/FlinkConf.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.backends.flink
 2 | 
 3 | import dbis.piglet.backends.BackendConf
 4 | import com.typesafe.config.ConfigFactory
 5 | import dbis.piglet.backends.PigletBackend
 6 | 
 7 | /**
 8 |  * @author hage
 9 |  */
10 | class FlinkConf extends BackendConf {
11 |   
12 |   // loads the default configuration file in resources/application.conf
13 |   private val appconf = ConfigFactory.load()
14 |   
15 |   /**
16 |    * Get the name of this backend
17 |    * 
18 |    * @return Returns the name of this backend
19 |    */
20 |   override def name: String = appconf.getString("backends.flink.name")
21 |   
22 |   /**
23 |    * Get the path to the runner class that implements the PigletBackend interface
24 |    */
25 |   override def runnerClass: PigletBackend = {
26 |     new FlinkRun
27 |   } 
28 |   
29 |   override def templateFile: String = appconf.getString("backends.flink.template")
30 | 
31 |   override def defaultConnector: String = appconf.getString("backends.flink.connector")
32 |   
33 |   override def raw = false
34 | }
35 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/expr/Traverser.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.expr
 2 | 
 3 | //import dbis.piglet.op.{Func, RefExpr, Expr, NamedField}
 4 | import dbis.piglet.schema.Schema
 5 | 
 6 | import scala.collection.mutable.ListBuffer
 7 | 
 8 | 
 9 | class NamedFieldExtractor {
10 |   val fields = ListBuffer[NamedField]()
11 | 
12 |   def collectNamedFields(schema: Schema, ex: Expr): Boolean = ex match {
13 |     case RefExpr(r) => r match {
14 |       case NamedField(n, _) => fields += r.asInstanceOf[NamedField]; true
15 |       case _ => true
16 |     }
17 |     case _ => true
18 |   }
19 | }
20 | 
21 | class RefExprExtractor {
22 |   val exprs = ListBuffer[RefExpr]()
23 | 
24 |   def collectRefExprs(schema: Schema, ex: Expr): Boolean = ex match {
25 |     case RefExpr(r) => exprs += ex.asInstanceOf[RefExpr]; true
26 |     case _ => true
27 |   }
28 | }
29 | 
30 | class FuncExtractor {
31 |   val funcs = ListBuffer[Func]()
32 | 
33 |   def collectFuncExprs(schema: Schema, ex: Expr): Boolean = ex match {
34 |     case Func(f, params) => funcs += ex.asInstanceOf[Func]; true
35 |     case _ => true
36 |   }
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/flinklib/src/main/scala/dbis/piglet/backends/flink/streaming/FlinksConf.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.backends.flink.streaming
 2 | 
 3 | import dbis.piglet.backends.BackendConf
 4 | import dbis.piglet.backends.flink.FlinkRun
 5 | import dbis.piglet.backends.PigletBackend
 6 | import com.typesafe.config.ConfigFactory
 7 | 
 8 | /**
 9 |  * @author hage
10 |  */
11 | class FlinksConf extends BackendConf {
12 |   // loads the default configuration file in resources/application.conf
13 |   private val appconf = ConfigFactory.load()
14 |   
15 |   /**
16 |    * Get the name of this backend
17 |    * 
18 |    * @return Returns the name of this backend
19 |    */
20 |   override def name: String = appconf.getString("backends.flinks.name")
21 |   
22 |   /**
23 |    * Get the path to the runner class that implements the PigletBackend interface
24 |    */
25 |   override def runnerClass: PigletBackend = {
26 |     new FlinkRun
27 |   } 
28 |   
29 |   override def templateFile: String = appconf.getString("backends.flinks.template")
30 | 
31 |   override def defaultConnector: String = appconf.getString("backends.flinks.connector")
32 |   
33 |   override def raw = false
34 | }
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | #########################################
 2 | # File with contents to be ignored by git
 3 | #########################################
 4 | logs
 5 | project/project
 6 | project/target
 7 | target
 8 | tmp
 9 | .history
10 | dist
11 | /.idea
12 | /*.iml
13 | /out
14 | .idea_modules
15 | .classpath
16 | .project
17 | /RUNNING_PID
18 | .settings
19 | .target
20 | /bin
21 | *.jpage
22 | lodhub_data
23 | .cache
24 | .worksheet/
25 | *.sc
26 | .sbt_completion_cache
27 | .tags
28 | *.bak
29 | *.class
30 | *.log
31 | 
32 | __my_script*/
33 | 
34 | #####
35 | 
36 | piglet-dist*
37 | 
38 | 
39 | #################
40 | # ignore database
41 | #################
42 | db
43 | 
44 | ##############
45 | # sbt specific
46 | ##############
47 | .cache
48 | .cache-*
49 | .history
50 | .lib/
51 | .scalastyle
52 | dist/*
53 | target/
54 | lib_managed/
55 | src_managed/
56 | project/boot/
57 | project/plugins/project/
58 | buildinfo.properties
59 | 
60 | # Scala-IDE specific
61 | .scala_dependencies
62 | .worksheet
63 | 
64 | .LICENSE-COPY.crc
65 | 
66 | ################################
67 | # Operating Systems
68 | ################################
69 | *~
70 | *.swp
71 | .DS_Store
72 | 


--------------------------------------------------------------------------------
/common/src/main/scala/dbis/piglet/backends/BackendConf.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.backends
 2 | 
 3 | /**
 4 |  * @author hage
 5 |  */
 6 | trait BackendConf {
 7 |   /**
 8 |    * Get the name of this backend
 9 |    *
10 |    * @return Returns the name of this backend
11 |    */
12 |   def name: String
13 |   
14 |   /**
15 |    * Get an instance of runner that will be used to run the jobs 
16 |    *
17 |    * @return Returns the full qualified name of the runner class
18 |    */
19 |   def runnerClass: PigletBackend
20 |   
21 |   /**
22 |    * Get the full path to the template file to use for the backend
23 |    *
24 |    * @return the name of the template file
25 |    */
26 |   def templateFile: String
27 | 
28 |   /**
29 |    * Get the default connection function used for source and sink nodes
30 |    *
31 |    * @return the name of the function
32 |    */
33 |   def defaultConnector: String
34 | 
35 |   /**
36 |    * Defines that a backends needs the raw Pig script
37 |    * rather than the generated code
38 |    * 
39 |    * @return <code>True</code> if the backends wants the original script, otherwise <code>false</code> 
40 |    */
41 |   def raw: Boolean
42 | }
43 | 


--------------------------------------------------------------------------------
/sparklib/src/main/scala/dbis/piglet/backends/spark/SparkSRun.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package dbis.piglet.backends.spark
19 | 
20 | class SparkSRun extends SparkRun {
21 |   override def templateFile = appconf.getString("backends.sparks.template")
22 |   override def defaultConnector = appconf.getString("backends.sparks.connector")
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Empty.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.op
18 | 
19 | /** Empty represents PigOperators or an entire structure of them that has been removed
20 |   *
21 |   * @param in
22 |   */
23 | //noinspection ScalaDocMissingParameterDescription
24 | case class Empty(private val in: Pipe) extends PigOperator(List(), List(in)) 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/mm/CacheEntry.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.mm
 2 | 
 3 | import dbis.piglet.Piglet.Lineage
 4 | 
 5 | import scala.concurrent.duration._
 6 | 
 7 | case class CacheEntry(lineage: Lineage, uri: String, _benefit: Long, bytes: Long, var lastLoaded: Option[Long] = None, var written: Option[Long] = None,
 8 |                       var useCount: Int = 0, var fixed: Boolean = false) {
 9 | 
10 | 
11 |   def benefit: Duration = _benefit.milliseconds
12 | 
13 |   def markWritten() = written = Some(System.currentTimeMillis())
14 | 
15 |   def markLoaded() = {
16 |     lastLoaded = Some(System.currentTimeMillis())
17 |     useCount += 1
18 |   }
19 | 
20 |   override def toString =
21 |     s"""CacheEntry
22 |        |  lineage: $lineage   file: $uri   benefit: ${benefit.toSeconds} (${_benefit} ms)   bytes: $bytes    lastLoaded: ${lastLoaded.getOrElse("-")}   written: ${written.getOrElse("-")}
23 |        |     use count: $useCount    fixed: $fixed""".stripMargin
24 | 
25 |   override def equals(obj: scala.Any): Boolean = obj match {
26 |     case o: CacheEntry =>
27 |       o.lineage equals lineage
28 |     case _ => false
29 |   }
30 | 
31 |   override def hashCode(): Int = lineage.hashCode
32 | }
33 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/plan/rewriting/dsl/words/CheckWord.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.plan.rewriting.dsl.words
18 | 
19 | import dbis.piglet.op.PigOperator
20 | import dbis.piglet.plan.rewriting.dsl.traits.{BuilderT, CheckWordT}
21 | 
22 | class CheckWord[FROM <: PigOperator, TO](override val b: BuilderT[FROM, TO]) extends CheckWordT[FROM, TO] {
23 | 
24 | }


--------------------------------------------------------------------------------
/materialization_scripts/gdelt_url_eventcode.pig:
--------------------------------------------------------------------------------
 1 | <%
 2 | def extractDomain(url: String): String = {
 3 |   if(!url.startsWith("http"))
 4 |     url
 5 |   else {
 6 |     val startPos = url.indexOf("//")+2
 7 |     val endPos = if(url.indexOf("/",startPos) < 0) { url.size } else { url.indexOf("/",startPos) }
 8 |     url.substring(startPos, endPos)
 9 |   }
10 | }
11 | def diff(d1: Double, d2: Double): Double = {
12 |   math.abs(d1 - d2)
13 | }
14 | def isnum(s: String): Boolean = {
15 |   scala.util.Try {
16 |       s.toDouble
17 |     }.map(_ => true).getOrElse(false)
18 | }
19 | %>
20 | gdelt = LOAD '$gdelt' using PigStorage();
21 | fields = FOREACH gdelt GENERATE $26 as eventcode, (double)$34 as avgtone, $57 as url;
22 | withURL = FILTER fields BY nonempty(eventcode) and isnum(eventcode) and nonempty(url)
23 | domain = FOREACH withURL GENERATE extractDomain(url) as site, (int)eventcode as ecode, avgtone;
24 | grp = GROUP domain BY (site, ecode);
25 | avgtones1 = FOREACH grp GENERATE group as siteecode, avg(domain.avgtone) as avgtone
26 | avgtones = FILTER avgtones1 BY avgtone != 0
27 | f = FOREACH avgtones GENERATE  siteecode.site as site,siteecode.ecode as code, avgtone
28 | ordered = ORDER f BY site, code
29 | dump ordered mute;


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/flink/emitter/OrderByEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.flink.emitter
 2 | 
 3 | import dbis.piglet.codegen.{ CodeEmitter, CodeGenContext, CodeGenException }
 4 | import dbis.piglet.op.{ OrderBy, OrderByDirection, OrderBySpec, PigOperator }
 5 | import dbis.piglet.schema.Types
 6 | import dbis.piglet.expr.NamedField
 7 | import dbis.piglet.expr.PositionalField
 8 | import dbis.piglet.schema.Schema
 9 | import dbis.piglet.expr.Ref
10 | import dbis.piglet.codegen.flink.FlinkHelper
11 | 
12 | class OrderByEmitter extends dbis.piglet.codegen.scala_lang.OrderByEmitter {
13 |   override def template: String = """    val <out> = <in>.setParallelism(1)<key,asc:{k, a|.sortPartition(<k>, Order.<a>)}>""".stripMargin
14 | 
15 |   override def code(ctx: CodeGenContext, op: OrderBy): String = {
16 |     val key = op.orderSpec.map(spec => FlinkHelper.getOrderIndex(op.schema, spec.field))
17 |     val orders = op.orderSpec.map(spec => if (spec.dir == OrderByDirection.AscendingOrder) "ASCENDING" else "DESCENDING")
18 |     render(Map("out" -> op.outPipeName, "in" -> op.inPipeName, "key" -> key, "asc" -> orders))
19 |   }
20 | }
21 | 
22 | object OrderByEmitter {
23 | 	lazy val instance = new OrderByEmitter
24 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/plan/rewriting/dsl/words/ImmediateEndWord.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.plan.rewriting.dsl.words
18 | 
19 | import dbis.piglet.op.PigOperator
20 | import dbis.piglet.plan.rewriting.dsl.traits.{BuilderT, EndWordT}
21 | 
22 | class ImmediateEndWord[FROM <: PigOperator, TO](override val b: BuilderT[FROM, TO]) extends EndWordT[FROM, TO]{
23 | 
24 | }
25 | 


--------------------------------------------------------------------------------
/common/src/main/scala/dbis/piglet/tools/logging/PigletLogging.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.tools.logging
 2 | 
 3 | import java.io.PrintStream
 4 | 
 5 | import org.slf4j.LoggerFactory
 6 | 
 7 | trait PigletLogging {
 8 | 
 9 |   /*
10 |    * This ugly hack is used to suppress the annoying warning of multiple bindings in slf4j.
11 |    */
12 |   val filteredErr = new PrintStream(System.err) {
13 |     override def println(l: String) = if (!l.startsWith("SLF4J") && !l.startsWith("[INFO ] [EtmMonitor]")) super.println(l)
14 |   }
15 |   System.setErr(filteredErr)
16 |   
17 |   val filteredOut = new PrintStream(System.out) {
18 |     override def println(l: String) = if (!l.startsWith("SLF4J") && !l.startsWith("[INFO ] [EtmMonitor]")) super.println(l)
19 |   }
20 |   System.setOut(filteredOut)
21 |   
22 |   
23 | 
24 | 
25 |   protected val logger: PigletLogger = {
26 |     val baseLogger = LoggerFactory.getLogger(getClass.getName)
27 |     
28 |     if(baseLogger.isInstanceOf[ch.qos.logback.classic.Logger])
29 |     	PigletLogger(baseLogger.asInstanceOf[ch.qos.logback.classic.Logger])
30 |     else { 
31 |       Console.err.println(s"Could not bind logger: $baseLogger")
32 |       new PigletLogger(None)
33 |     }
34 |   }
35 |     
36 |   
37 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/scala_lang/DumpEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.scala_lang
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext}
 4 | import dbis.piglet.op.Dump
 5 | 
 6 | /**
 7 |   * Created by kai on 05.12.16.
 8 |   */
 9 | class DumpEmitter extends CodeEmitter[Dump] {
10 |   override def template: String = """<if(mute)><in>.foreach{t=><if (profiling)>
11 |                                     |  PerfMonitor.sampleSize(t,"<lineage>", accum, randFactor)
12 |                                     |<endif>}<else>
13 |                                     |<in><if (profiling)>.map{t =>
14 |                                     |    PerfMonitor.sampleSize(t,"<lineage>", accum, randFactor)
15 |                                     |  t
16 |                                     |}<endif>.collect.foreach(t => println(t.toString()))<endif>""".stripMargin
17 | 
18 | 
19 |   override def code(ctx: CodeGenContext, op: Dump): String = {
20 |     val map = collection.mutable.Map("in" -> op.inPipeName,  "lineage" -> op.lineageSignature)
21 |     if(op.mute)
22 |       map += ("mute" -> op.mute.toString)
23 |     render(map.toMap)
24 |   }
25 | 
26 | }
27 | 
28 | object DumpEmitter {
29 |   lazy val instance = new DumpEmitter 
30 | }
31 | 


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/flink/CustomDataSetMatcher.scala:
--------------------------------------------------------------------------------
 1 | 
 2 | package dbis.piglet.cep.flink
 3 | 
 4 | import scala.reflect.ClassTag
 5 | import dbis.piglet.cep.ops.SelectionStrategy._
 6 | import dbis.piglet.cep.ops.OutputStrategy._
 7 | import dbis.piglet.cep.nfa.NFAController
 8 | import dbis.piglet.backends.{SchemaClass => Event}
 9 | import org.apache.flink.api.common.typeinfo.TypeInformation
10 | //import org.apache.flink.api.java.ExecutionEnvironment
11 | //import org.apache.flink.api.java.DataSet
12 | import scala.collection.JavaConversions._
13 | import org.apache.flink.api.scala._
14 | 
15 | class CustomDataSetMatcher[T <: Event: ClassTag: TypeInformation](dataSet: DataSet[T]) {
16 | 
17 |   def matchNFA(nfa: NFAController[T], sstr: SelectionStrategy = FirstMatch, out: OutputStrategy = Combined)  = {
18 |     // println("create a new DataSet matcher")
19 |     val flinkEnv = dataSet.getExecutionEnvironment
20 |     new DataSetMatcher(dataSet, nfa, flinkEnv, sstr, out).compute()
21 |   }
22 | 
23 | }
24 | 
25 | object CustomDataSetMatcher {
26 | 
27 |   implicit def addDataSetMatcher[T <: Event: ClassTag: TypeInformation](dataSet: DataSet[T]) = {
28 |     // println("add a custom DataSet function")
29 |     new CustomDataSetMatcher(dataSet)
30 |   }
31 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Cache.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.op
 2 | 
 3 | object CacheMode extends Enumeration {
 4 |   type CacheMode = Value
 5 |   val NONE,
 6 |       MEMORY_ONLY,
 7 |       MEMORY_AND_DISK,
 8 |       MEMORY_ONLY_SER,
 9 |       MEMORY_AND_DISK_SER,
10 |       DISK_ONLY,
11 |       MEMORY_ONLY_2,
12 |       MEMORY_AND_DISK_2 = Value
13 | }
14 | 
15 | import CacheMode.CacheMode
16 | 
17 | case class Cache (private[op] val out: Pipe,
18 |                   private[op] val in: Pipe,
19 |                   operatorId: String,
20 |                   cacheMode: CacheMode) extends PigOperator(out, in) {
21 | 
22 |   if(in.producer != null) {
23 |     schema = in.producer.schema
24 |   }
25 | 
26 | 
27 |   override def equals(other: Any) = other match {
28 |     case o: Cache => operatorId == o.operatorId && outPipeName == o.outPipeName
29 |     case _ => false
30 |   }
31 | 
32 |   override def hashCode() = (operatorId+outPipeName).hashCode()
33 | 
34 |   override def toString =
35 |     s"""CACHE
36 |        |  out = $outPipeName
37 |        |  in = $inPipeName
38 |        |  operatorId = $operatorId
39 |        |  mode = $cacheMode
40 |      """.stripMargin
41 | 
42 |   override def lineageString = s"CACHE%$operatorId%$cacheMode%${super.lineageString}"
43 | }
44 | 


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/engines/NextMatch.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.cep.engines
 2 | 
 3 | import dbis.piglet.backends.{SchemaClass => Event}
 4 | import scala.reflect.ClassTag
 5 | import dbis.piglet.cep.nfa.NFAStructure
 6 | import dbis.piglet.cep.nfa.NFAController
 7 | import dbis.piglet.cep.nfa.NormalState
 8 | import dbis.piglet.cep.ops.MatchCollector
 9 | class NextMatch[T <: Event: ClassTag](nfaController: NFAController[T], collector: MatchCollector[T]) extends CEPEngine(nfaController, collector) with Serializable {
10 |    var statics: Long = 0
11 |   override def runEngine(event: T): Unit = {
12 |     runningStructursPool.foreach ( str => engineProcess(event, str))
13 |     createNewStructue(event)
14 |     runGCStructures()
15 |   }
16 |   private[NextMatch] def engineProcess(event: T, strInfo: (Long, NFAStructure[T])) {
17 |     val currenStr=  strInfo._2
18 |     val result: Int = checkPredicate(event, currenStr)
19 |     if (result != -1) { // the predicate if ok.
20 |       currenStr.addEvent(event, currenStr.getCurrentState.asInstanceOf[NormalState[T]].getEdgeByIndex(result))
21 |       if (currenStr.complete) { //final state
22 |         statics += 1
23 |         collector + currenStr
24 |         wantToDeletedStructurs += strInfo._1
25 |       }
26 |     }
27 |   }
28 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/spark/StreamDistinctEmitter.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.codegen.spark
18 | 
19 | import dbis.piglet.codegen.scala_lang.DistinctEmitter
20 | 
21 | class StreamDistinctEmitter extends DistinctEmitter {
22 |   override def template: String = """    val <out> = <in>.transform(rdd => rdd.distinct)""".stripMargin
23 | }
24 | 
25 | object StreamDistinctEmitter {
26 | 	lazy val instance = new StreamDistinctEmitter
27 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/spark/SpatialIndexEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.spark
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext}
 4 | import dbis.piglet.op.{IndexMethod, IndexOp}
 5 | 
 6 | class SpatialIndexEmitter extends CodeEmitter[IndexOp] {
 7 | //  new dbis.stark.spatial.partitioner.SpatialGridPartitioner(<in>KeyBy, partitionsPerDimension=20, pointsOnly=false)
 8 |   override def template =
 9 |     """val <in>KeyBy = <in><keyby>
10 |       |val idxParti<out> = new dbis.stark.spatial.partitioner.BSPartitioner(<in>KeyBy, 1, 1000, false)
11 |       |val <out> = <in>KeyBy.index(Some(idxParti<out>)<if (params)>, <params><endif>)<if (profiling)>.map{ idx =>
12 |       |  PerfMonitor.sampleSize(idx, "<lineage>", accum, randFactor)
13 |       |  idx
14 |       |}<endif>""".stripMargin
15 |   
16 |   override def code(ctx: CodeGenContext, op: IndexOp): String = render(Map(
17 |       "out" -> op.outPipeName,
18 |       "in" -> op.inPipeName,
19 |       "method" -> IndexMethod.methodName(op.method),
20 |       "params" -> op.params.mkString(","),
21 |       "keyby" -> SpatialEmitterHelper.keyByCode(op.inputSchema, op.field, ctx),
22 |       "lineage" -> op.lineageSignature
23 |     ) )
24 | }
25 | 
26 | object SpatialIndexEmitter {
27 | 	lazy val instance = new SpatialIndexEmitter
28 | }


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/engines/FirstMatch.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.cep.engines
 2 | 
 3 | import dbis.piglet.backends.{SchemaClass => Event}
 4 | import scala.reflect.ClassTag
 5 | import dbis.piglet.cep.nfa.NFAStructure
 6 | import dbis.piglet.cep.nfa.NFAController
 7 | import dbis.piglet.cep.nfa.NormalState
 8 | import dbis.piglet.cep.ops.MatchCollector
 9 | class FirstMatch[T <: Event: ClassTag](nfaController: NFAController[T], collector: MatchCollector[T]) extends CEPEngine(nfaController, collector) with Serializable {
10 |    var statics: Long = 0
11 |   
12 |    override def runEngine(event: T): Unit = {
13 |     if (runningStructursPool.size == 0)
14 |       createNewStructue(event);
15 |     else {
16 |       engineProcess(event, runningStructursPool.head._2);
17 |     }
18 |   }
19 |   private[FirstMatch] def engineProcess(event: T, currenStr: NFAStructure[T]) {
20 |     val result: Int = checkPredicate(event, currenStr)
21 |     if (result != -1) { // the predicate if ok.
22 |       currenStr.addEvent(event, currenStr.getCurrentState.asInstanceOf[NormalState[T]].getEdgeByIndex(result))
23 |       if (currenStr.complete) { //final state
24 |         statics += 1
25 |         //println("complete")
26 |         collector + currenStr
27 |         runningStructursPool.clear()
28 |       }
29 |     }
30 |   }
31 | }


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/flink/CustomDataStreamMatcher.scala:
--------------------------------------------------------------------------------
 1 | 
 2 | package dbis.piglet.cep.flink
 3 | 
 4 | import scala.reflect.ClassTag
 5 | import dbis.piglet.cep.ops.SelectionStrategy._
 6 | import dbis.piglet.cep.ops.OutputStrategy._
 7 | import dbis.piglet.cep.nfa.NFAController
 8 | import dbis.piglet.backends.{SchemaClass => Event}
 9 | import org.apache.flink.api.common.typeinfo.TypeInformation
10 | //import org.apache.flink.api.java.ExecutionEnvironment
11 | //import org.apache.flink.api.java.DataSet
12 | import scala.collection.JavaConversions._
13 | import org.apache.flink.streaming.api.scala._
14 | 
15 | class CustomDataStreamMatcher[T <: Event: ClassTag: TypeInformation](@transient val dataStream: DataStream[T]) {
16 | 
17 |   def matchNFA(nfa: NFAController[T], flinkEnv: StreamExecutionEnvironment, sstr: SelectionStrategy = FirstMatch, out: OutputStrategy = Combined)  = {
18 |     // println("create a new DataStream matcher")
19 |     new DataStreamMatcher(dataStream, nfa, flinkEnv, sstr, out).compute()
20 |   }
21 | 
22 | }
23 | 
24 | object CustomDataStreamMatcher {
25 | 
26 |   implicit def addDataSetMatcher[T <: Event: ClassTag: TypeInformation](@transient dataStream: DataStream[T]) = {
27 |     // println("add a custom DataStream function")
28 |     new CustomDataStreamMatcher(dataStream)
29 |   }
30 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/spark/StreamOrderByEmitter.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.codegen.spark
18 | 
19 | import dbis.piglet.codegen.scala_lang.OrderByEmitter
20 | 
21 | class StreamOrderByEmitter extends OrderByEmitter {
22 |   override def template: String = """    val <out> = <in>.transform(rdd => rdd.repartition(1).sortBy(t => <key>, <asc>))""".stripMargin
23 | }
24 | 
25 | object StreamOrderByEmitter {
26 | 	lazy val instance = new StreamOrderByEmitter
27 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/cmd/RegisterCmd.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package dbis.piglet.op.cmd
19 | 
20 | import dbis.piglet.op.PigOperator
21 | 
22 | 
23 | /**
24 |  * Register represents a pseudo operator for the REGISTER statement. This "operator" will
25 |  * be eliminated during building the dataflow plan.
26 |  *
27 |  * @param jarFile the URI of the Jar file to be registered
28 |  */
29 | case class RegisterCmd(jarFile: String) extends PigOperator(List(), List())
30 | 
31 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/cmd/SetCmd.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package dbis.piglet.op.cmd
19 | 
20 | import dbis.piglet.expr.Value
21 | import dbis.piglet.op.PigOperator
22 | 
23 | 
24 | /**
25 |  * SetCmd represents a pseudo operator for the SET statement.
26 |  *
27 |  * @param param the parameter name
28 |  * @param value the value of the parameter set by this statement
29 |  */
30 | case class SetCmd(param: String, value: Value) extends PigOperator(List(), List())
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/spark/SpatialEmitterHelper.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.spark
 2 | 
 3 | import dbis.piglet.codegen.scala_lang.ScalaEmitter
 4 | import dbis.piglet.op.PigOperator
 5 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext}
 6 | import dbis.piglet.expr.NamedField
 7 | import dbis.piglet.expr.PositionalField
 8 | import dbis.piglet.expr.Ref
 9 | import dbis.piglet.schema.Schema
10 | 
11 | object SpatialEmitterHelper {
12 |   
13 |   
14 |   def geomIsFirstPos[T <: PigOperator](ref: Ref, op: T): Boolean = {
15 |  
16 |     val pos = ref match {
17 |       case nf : NamedField =>
18 |         op.inputSchema.get.indexOfField(nf)
19 |       case pf : PositionalField =>
20 |         pf.pos
21 |       case _ => throw new IllegalArgumentException(s"expected field reference, got: $ref")
22 |     }
23 |     
24 |     pos == 0
25 |   }
26 | 
27 | 
28 |   def keyByCode(schema: Option[Schema], ref: Ref, ctx: CodeGenContext): String =
29 |     s".keyBy(${ctx.asString("tuplePrefix")} => ${ScalaEmitter.emitRef(CodeGenContext(ctx,Map("schema"->schema)), ref)})"
30 | 
31 | 
32 |   def keyByCode(schema: Option[Schema], refs: Iterable[Ref], ctx: CodeGenContext): String =
33 |     s".keyBy(${ctx.asString("tuplePrefix")} => (${refs.map(ref => ScalaEmitter.emitRef(CodeGenContext(ctx,Map("schema"->schema)), ref)).mkString(",")}))"
34 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/plan/rewriting/dsl/words/ReplaceWord.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.plan.rewriting.dsl.words
18 | 
19 | import dbis.piglet.op.PigOperator
20 | import dbis.piglet.plan.rewriting.dsl.builders.PigOperatorBuilder
21 | import dbis.piglet.plan.rewriting.dsl.traits.{CheckWordT, BuilderT, EndWordT}
22 | 
23 | class ReplaceWord[FROM <: PigOperator](override val b: BuilderT[FROM, PigOperator])
24 |   extends EndWordT[FROM, PigOperator] with CheckWordT[FROM, PigOperator] {
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/tools/RingBuffer.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.tools
 2 | 
 3 | import scala.reflect.ClassTag
 4 | 
 5 | trait RingLike[T] extends Seq[T] {
 6 |   def put(o: T)
 7 | }
 8 | 
 9 | class RingBuffer[T: ClassTag](capacity: Int) extends RingLike[T] {
10 | 
11 |   require(capacity > 0, s"capacity must be > 0 , but is $capacity")
12 | 
13 |   private val ring = Array.fill(capacity){Option.empty[T]}
14 | 
15 |   private var curr = 0
16 | 
17 |   override def put(o: T): Unit = {
18 |     require(o != null)
19 | 
20 |     ring(curr) = Some(o)
21 | 
22 |     curr = (curr + 1) % capacity
23 |   }
24 | 
25 |   override def length: Int = ring.count(_.isDefined)
26 | 
27 |   override def foreach[U](f: (T) => U): Unit = iterator.foreach(f)
28 | 
29 |   override def apply(idx: Int): T = {
30 |     require(idx > 0, s"idx must be > 0, but is $idx")
31 |     val a = ring.apply(idx % capacity)
32 | 
33 |     if(a.isDefined)
34 |       a.get
35 |     else
36 |       throw new ArrayIndexOutOfBoundsException("no such index $idx")
37 |   }
38 | 
39 |   override def iterator: Iterator[T] = ring.iterator.filter(_.isDefined).map(_.get)
40 | }
41 | 
42 | object RingBuffer {
43 |   def apply[T:ClassTag](elements: T*): RingBuffer[T] = {
44 |     val b = new RingBuffer[T](elements.length)
45 |     for(e <- elements)
46 |       b.put(e)
47 | 
48 |     b
49 |   }
50 | }


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/engines/ContiguityMatch.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.cep.engines
 2 | 
 3 | import dbis.piglet.backends.{SchemaClass => Event}
 4 | import scala.reflect.ClassTag
 5 | import dbis.piglet.cep.nfa.NFAStructure
 6 | import dbis.piglet.cep.nfa.NFAController
 7 | import dbis.piglet.cep.nfa.NormalState
 8 | import dbis.piglet.cep.ops.MatchCollector
 9 | class ContiguityMatch[T <: Event: ClassTag](nfaController: NFAController[T], collector: MatchCollector[T]) extends CEPEngine(nfaController, collector) with Serializable {
10 |    var statics: Long = 0
11 |   override def runEngine(event: T): Unit = {
12 |     runningStructursPool.foreach ( str => engineProcess(event, str))
13 |     createNewStructue(event)
14 |     runGCStructures()
15 |   }
16 |   private[ContiguityMatch] def engineProcess(event: T, strInfo: (Long, NFAStructure[T])) {
17 |     val currenStr=  strInfo._2
18 |     val result: Int = checkPredicate(event, currenStr)
19 |     if (result != -1) { // the predicate if ok.
20 |       currenStr.addEvent(event, currenStr.getCurrentState.asInstanceOf[NormalState[T]].getEdgeByIndex(result))
21 |       if (currenStr.complete) { //final state
22 |         statics += 1
23 |         collector + currenStr
24 |         wantToDeletedStructurs += strInfo._1
25 |       }
26 |     }
27 |     else
28 |       wantToDeletedStructurs += strInfo._1
29 |   }
30 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/scala_lang/StoreEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.scala_lang
 2 | 
 3 | import dbis.piglet.backends.BackendManager
 4 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext, CodeGenException}
 5 | import dbis.piglet.op.{PigOperator, Store}
 6 | 
 7 | /**
 8 |   * Created by kai on 05.12.16.
 9 |   */
10 | class StoreEmitter extends CodeEmitter[Store] {
11 |   override def template: String =
12 | //    """<func>.write("<file>", <in><if (params)>, <params><endif>)""".stripMargin
13 |     """    <func>[<class>]().write("<file>", <in><if (params)>, <params><endif>)""".stripMargin
14 | 
15 | 
16 |   override def code(ctx: CodeGenContext, op: Store): String = {
17 |         var paramMap = Map("in" -> op.inPipeName,
18 |           "file" -> op.file.toString,
19 |           "func" -> op.func.getOrElse(BackendManager.backend.defaultConnector))
20 |         op.schema match {
21 |           case Some(s) =>
22 |             val cName = ScalaEmitter.schemaClassName(s)
23 | 
24 |             paramMap += ("class" -> cName)
25 |           case None => paramMap += ("class" -> "Record")
26 |         }
27 | 
28 |         if (op.params != null && op.params.nonEmpty)
29 |           paramMap += ("params" -> op.params.mkString(","))
30 |         render(paramMap)
31 |   }
32 | 
33 | }
34 | 
35 | object StoreEmitter {
36 | 	lazy val instance = new StoreEmitter
37 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/scala_lang/DifferenceEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.scala_lang
 2 | 
 3 | import dbis.piglet.codegen.spark.SpatialEmitterHelper
 4 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext}
 5 | import dbis.piglet.op.Difference
 6 | 
 7 | class DifferenceEmitter extends CodeEmitter[Difference] {
 8 |   override def template: String = """val <out> = <in1>.subtract(<in2>)""".stripMargin
 9 | 
10 |   def templateKeyed = """val <out> = <in1><keyby1>.subtractByKey(<in2><keyby2>).map(_._2)"""
11 | 
12 |   override def code(ctx: CodeGenContext, op: Difference): String = {
13 | 
14 |     val (templ,params) = if(op.refs1.isDefined) {
15 |       val m = Map("out" -> op.outPipeName,
16 |         "in1" -> op.inPipeNames.head,
17 |         "in2" -> op.inPipeNames.last,
18 |         "keyby1" -> SpatialEmitterHelper.keyByCode(op.inputs.head.producer.schema, op.refs1.get,ctx),
19 |         "keyby2" -> SpatialEmitterHelper.keyByCode(op.inputs.last.producer.schema, op.refs2.get,ctx)
20 |       )
21 |       (templateKeyed, m)
22 |     } else {
23 |       val m = Map("out" -> op.outPipeName,
24 |         "in1" -> op.inPipeNames.head,
25 |         "in2" -> op.inPipeNames.last
26 |       )
27 |       (template, m)
28 |     }
29 | 
30 |     CodeEmitter.render(templ, params)
31 |   }
32 | 
33 | }
34 | 
35 | object DifferenceEmitter {
36 |   lazy val instance = new DifferenceEmitter
37 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/plan/rewriting/dsl/words/MergeWord.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.plan.rewriting.dsl.words
18 | 
19 | import dbis.piglet.op.PigOperator
20 | import dbis.piglet.plan.rewriting.dsl.traits.{CheckWordT, EndWordT, BuilderT}
21 | 
22 | import scala.reflect.ClassTag
23 | 
24 | class MergeWord[FROM1 <: PigOperator : ClassTag, FROM2 <: PigOperator : ClassTag]
25 |   (override val b: BuilderT[(FROM1, FROM2), PigOperator])
26 |   extends EndWordT[(FROM1, FROM2), PigOperator] with CheckWordT[(FROM1, FROM2), PigOperator] {
27 | }
28 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/mm/MaterializationPoint.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.mm
 2 | 
 3 | import dbis.piglet.Piglet.Lineage
 4 | 
 5 | import scala.concurrent.duration.Duration
 6 | 
 7 | /**
 8 |   * A MaterializationPoint object represents information about a possible materialization of the result
 9 |   * of a dataflow operator. It is identified by a hash of the lineage string of the operator and collects
10 |   * profile information.
11 |   *
12 |   * @param lineage the MD5 hash of the lineage string of the operator
13 |   * @param benefit the cumulative benefit of this materialization point compared to the root operator
14 |   * @param prob The probability for re-using this operator
15 |   * @param cost The duration that this operator takes
16 |  */
17 | case class MaterializationPoint(lineage: Lineage, prob: Double, cost: Long, bytes: Long, benefit: Duration = Duration.Undefined) {
18 |   override def hashCode(): Int = lineage.hashCode
19 | 
20 |   override def equals(obj: scala.Any): Boolean = obj match {
21 |     case m:MaterializationPoint => m.lineage equals lineage
22 |     case _ => false
23 |   }
24 | 
25 |   override def toString = s"MaterializationPoint($lineage, prob=$prob, cost=$cost ms, benefit=${benefit.toMillis} ms)"
26 | }
27 | 
28 | 
29 | object MaterializationPoint {
30 |   def dummy(lineage: Lineage): MaterializationPoint = MaterializationPoint(lineage, -1,-1, -1, Duration.Undefined)
31 | }


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/flink/DataSetMatcher.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.cep.flink
 2 | 
 3 | import scala.reflect.ClassTag
 4 | import dbis.piglet.cep.nfa.NFAController
 5 | import dbis.piglet.cep.engines._
 6 | import dbis.piglet.cep.ops.SelectionStrategy._
 7 | import dbis.piglet.cep.ops.OutputStrategy._
 8 | import dbis.piglet.backends.{SchemaClass => Event}
 9 | import dbis.piglet.cep.ops.MatchCollector
10 | import org.apache.flink.api.common.typeinfo.TypeInformation
11 | import dbis.piglet.cep.ops.SelectionStrategy
12 | //import org.apache.flink.api.java.operators.CustomUnaryOperation
13 | //import scala.collection.mutable.ArrayBuffer
14 | import scala.collection.mutable.ListBuffer
15 | //import org.apache.flink.api.java.DataSet
16 | //import org.apache.flink.api.java.ExecutionEnvironment
17 | import scala.collection.JavaConversions._
18 | import org.apache.flink.api.scala._
19 | import dbis.piglet.cep.ops.EngineConf
20 | 
21 | class DataSetMatcher[T <: Event: ClassTag: TypeInformation](input: DataSet[T], nfa: NFAController[T], flinkEnv: ExecutionEnvironment, sstr: SelectionStrategy = SelectionStrategy.FirstMatch, out: OutputStrategy = Combined) extends EngineConf[T](nfa, sstr) with java.io.Serializable {
22 |   def compute(): DataSet[T] = {
23 |    input.collect().foreach ( event => engine.runEngine(event)  )
24 |    flinkEnv.fromCollection(collector.convertEventsToArray().toSeq)
25 |   }
26 | 
27 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/plan/PrettyPrinter.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.plan
18 | 
19 | import dbis.piglet.op.PigOperator
20 | 
21 | object PrettyPrinter extends org.kiama.output.PrettyPrinter{
22 |   def pretty(op: PigOperator): String = {
23 |     super.pretty(show(op))
24 |   }
25 | 
26 |   def show(op: PigOperator): Doc = {
27 |     val prettyInputs = op.inputs.map(p => show(p.producer))
28 |     parens (
29 |       value(op)
30 |       <> nest(
31 |         line
32 |         <> ssep(prettyInputs, line)))
33 |   }
34 | 
35 |   def show(p: List[PigOperator]): Doc = any(p)
36 | }
37 | 


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/engines/AnyMatch.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.cep.engines
 2 | 
 3 | import dbis.piglet.backends.{SchemaClass => Event}
 4 | import scala.reflect.ClassTag
 5 | import dbis.piglet.cep.nfa.NFAStructure
 6 | import dbis.piglet.cep.nfa.NFAController
 7 | import dbis.piglet.cep.nfa.NormalState
 8 | import dbis.piglet.cep.ops.MatchCollector
 9 | 
10 | class AnyMatch[T <: Event: ClassTag](nfaController: NFAController[T], collector: MatchCollector[T]) extends CEPEngine(nfaController, collector) with Serializable {
11 |    var statics: Long = 0
12 |   override def runEngine(event: T): Unit = {
13 |     runningStructursPool.foreach ( str => engineProcess(event, str))
14 |     createNewStructue(event)
15 |     runGCStructures()
16 |   }
17 |   private[AnyMatch] def engineProcess(event: T, strInfo: (Long, NFAStructure[T])) {
18 |     val currenStr=  strInfo._2
19 |     val result: Int = checkPredicate(event, currenStr)
20 |     if (result != -1) { // the predicate if ok.
21 |       val cloneStr = currenStr.clone
22 |       runningStructursPool+= (structureID() -> cloneStr)
23 |       currenStr.addEvent(event, currenStr.getCurrentState.asInstanceOf[NormalState[T]].getEdgeByIndex(result))
24 |       if (currenStr.complete) { //final state
25 |         statics += 1
26 |         collector + currenStr
27 |         wantToDeletedStructurs += strInfo._1
28 |       }
29 |     }
30 |     else
31 |       wantToDeletedStructurs += strInfo._1
32 |   }
33 | }


--------------------------------------------------------------------------------
/src/test/scala/dbis/piglet/tools/CodeMatcherSpec.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.tools
 2 | 
 3 | import org.scalatest.{Matchers, FlatSpec}
 4 | 
 5 | /**
 6 |   * Created by kai on 18.11.15.
 7 |   */
 8 | class CodeMatcherSpec extends FlatSpec with Matchers {
 9 |   "The SnippetMatcher" should "match two equal strings" in {
10 |     SnippetMatcher.matches("abc 12 def", "abc 12 def") should be (true)
11 |   }
12 | 
13 |   it should "not match two different strings" in {
14 |     SnippetMatcher.matches("abc 12 def", "abc 12") should be (false)
15 |   }
16 | 
17 |   it should "match a string to a corresponding template" in {
18 |     SnippetMatcher.matches("abc x_8_ def x_8_", "abc x_$1_ def x_$1_") should be (true)
19 |   }
20 | 
21 |   it should "match a string to another corresponding template" in {
22 |     SnippetMatcher.matches("abc x_6_ def x_7_", "abc x_$2_ def x_$1_") should be (true)
23 |   }
24 | 
25 |   it should "match a string with longer ids to a corresponding template" in {
26 |     SnippetMatcher.matches("abc x_82_ def x_82_", "abc x_$1_ def x_$1_") should be (true)
27 |   }
28 | 
29 |   it should "match a string with different longer ids to a corresponding template" in {
30 |     SnippetMatcher.matches("abc x_82_ def x_83_", "abc x_$1_ def x_$2_") should be (true)
31 |   }
32 | 
33 |   it should "not match a string to a wrong template" in {
34 |     SnippetMatcher.matches("abc x_8_ def x_9_", "abc x_$1_ def x_$1 _") should be (false)
35 |   }
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/setm/src/main/scala/dbis/setm/SETM.scala:
--------------------------------------------------------------------------------
 1 | package dbis.setm
 2 | 
 3 | import etm.core.configuration.BasicEtmConfigurator
 4 | import etm.core.configuration.EtmManager
 5 | import etm.core.renderer.{MeasurementRenderer, SimpleTextRenderer}
 6 | 
 7 | /**
 8 |  * SETM is a simple wrapper for JETM to provide a
 9 |  * more Scala-like usage.
10 |  *
11 |  * See http://jetm.void.fm/
12 |  */
13 | object SETM {
14 |   BasicEtmConfigurator.configure(true) // nested
15 |   private val monitor = EtmManager.getEtmMonitor()
16 | 
17 |   // Start monitoring
18 |   monitor.start()
19 | 
20 |   var quiet: Boolean = false
21 |   def enable = monitor.enableCollection()
22 |   def disable = monitor.disableCollection()
23 |   /*
24 |    * Stop monitoring, collect results and render them
25 |    *
26 |    * @param renderer The renderer to use
27 |    */
28 |   def collect() = {
29 |     monitor.render(new SimpleTextRenderer())
30 |     monitor.stop()
31 |   }
32 | 
33 |   /**
34 |    * Measure execution time of the given function
35 |    *
36 |    * @param name A human readable name to identify this timing measurement
37 |    * @param f The function to measure execution time of
38 |    */
39 |   def timing[T](name: String)(f: => T) = {
40 |     val p = monitor.createPoint(name)
41 |     if(!quiet)
42 |       print(s"==> $name                                                           \r")
43 |       
44 |     try {
45 |       f
46 |     } finally {
47 |       p.collect
48 |     }
49 |   }
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Describe.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.op
18 | 
19 | /**
20 | * Describe represents the DESCRIBE operator of Pig.
21 | *
22 | * @param in the input pipe
23 | */
24 | case class Describe(private val in: Pipe) extends PigOperator(List(), List(in)) {
25 | 
26 |   /**
27 |    * Returns the lineage string describing the sub-plan producing the input for this operator.
28 |    *
29 |    * @return a string representation of the sub-plan.
30 |    */
31 |   override def lineageString: String = {
32 |     s"""DESCRIBE%""" + super.lineageString
33 |   }
34 | 
35 |   override def toString =
36 |     s"""DESCRIBE
37 |        |  in = $inPipeName""".stripMargin
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/scala_lang/LimitEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.scala_lang
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext}
 4 | import dbis.piglet.op.Limit
 5 | 
 6 | /**
 7 |   * Created by kai on 03.12.16.
 8 |   */
 9 | class LimitEmitter extends CodeEmitter[Limit] {
10 |   // val <out> = sc.parallelize(<in>.take(<num>))
11 |   override def template: String = smallLimitTemplate
12 | 
13 |   lazy val smallLimitTemplate = s"""val <out> = sc.parallelize(<in>.take(<num>))<if (profiling)>.map{e =>
14 |                                      |  PerfMonitor.sampleSize(e,"<lineage>", accum, randFactor)
15 |                                      |  e
16 |                                      |}<endif>""".stripMargin
17 | 
18 | 
19 |   lazy val largeLimitTemplage = """val <out> = <in>.zipWithIndex.filter{case (_,idx) => idx \< <num>}.map{t =>
20 |     |  val res = t._1
21 |     |  <if (profiling)>
22 |     |    PerfMonitor.sampleSize(res,"<lineage>", accum, randFactor)
23 |     |  <endif>
24 |     |  res
25 |     |}""".stripMargin
26 | 
27 |   override def code(ctx: CodeGenContext, op: Limit): String = {
28 | 
29 |     val params = Map(
30 |       "out" -> op.outPipeName,
31 |       "in" -> op.inPipeName,
32 |       "num" -> op.num,
33 |       "lineage" -> op.lineageSignature)
34 | 
35 |     if(op.num > 1000)
36 |       CodeEmitter.render(largeLimitTemplage,params)
37 |     else
38 |       render(params)
39 | 
40 |   }
41 | 
42 | }
43 | 
44 | object LimitEmitter {
45 |   lazy val instance = new LimitEmitter
46 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/scala_lang/LoadEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.scala_lang
 2 | 
 3 | import dbis.piglet.backends.BackendManager
 4 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext}
 5 | import dbis.piglet.op.Load
 6 | 
 7 | /**
 8 |   * Created by kai on 03.12.16.
 9 |   */
10 | class LoadEmitter extends CodeEmitter[Load] {
11 |   override def template: String =
12 | //    """val <out> = <func>.load[<class>](sc,"<file>"<if (extractor)>, <extractor><endif><if (params)>, <params><endif>)"""
13 |     """    val <out> = <func>[<class>](<if (profiling)>randFactor<endif>).load(sc, "<file>"<if (extractor)>, <extractor><endif><if (params)>, <params><endif>, <if (profiling)>lineageAndAccum = Some(("<lineage>",accum))<else>lineageAndAccum = None<endif>)""".stripMargin
14 | 
15 | 
16 | 
17 |   override def code(ctx: CodeGenContext, op: Load): String = {
18 |     var paramMap = ScalaEmitter.emitExtractorFunc(op, op.loaderFunc)
19 |     paramMap += ("out" -> op.outPipeName)
20 |     paramMap += ("file" -> op.file.toString)
21 |     paramMap += ("lineage" -> op.lineageSignature)
22 |     if (op.loaderFunc.isEmpty)
23 |       paramMap += ("func" -> BackendManager.backend.defaultConnector)
24 |     else {
25 |       paramMap += ("func" -> op.loaderFunc.get)
26 |       if (op.loaderParams != null && op.loaderParams.nonEmpty)
27 |         paramMap += ("params" -> op.loaderParams.mkString(","))
28 |     }
29 |     render(paramMap)
30 |   }
31 | }
32 | 
33 | object LoadEmitter {
34 |   lazy val instance = new LoadEmitter
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Limit.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.op
18 | 
19 | /**
20 |  * Limit represents the LIMIT operator of Pig.
21 |  *
22 |  * @param out the output pipe (relation).
23 |  * @param in the input pipe.
24 |  * @param num the maximum number of tuples produced by this operator
25 |  */
26 | case class Limit(private val out: Pipe, private val in: Pipe, num: Int) extends PigOperator(out, in) {
27 | 
28 |   override def lineageString: String = {
29 |     s"""LIMIT%$num%""" + super.lineageString
30 |   }
31 | 
32 |   override def toString =
33 |     s"""LIMIT
34 |        |  out = $outPipeName
35 |        |  in = $inPipeName
36 |        |  num = $num""".stripMargin
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <configuration>
 3 | 
 4 | 	<!-- Send debug messages to System.out -->
 5 | 	<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 6 | 		<!-- By default, encoders are assigned the type ch.qos.logback.classic.encoder.PatternLayoutEncoder -->
 7 | 		<encoder>
 8 | 			<pattern>%d{HH:mm:ss.SSS} %-5level %logger{5}: %msg%n</pattern>
 9 | 		</encoder>
10 | 	</appender>
11 | 
12 | 	<logger name="org" level="OFF" additivity="false">
13 |             <appender-ref ref="STDOUT" />
14 | 	</logger>
15 | 
16 | 	<logger name="com" level="ERROR" additivity="false">
17 |             <appender-ref ref="STDOUT" />
18 | 	</logger>
19 | 	
20 | 	<logger name="io" level="ERROR" additivity="false">
21 |             <appender-ref ref="STDOUT" />
22 | 	</logger>
23 | 	
24 | 	<logger name="Remoting" level="ERROR" additivity="false">
25 |             <appender-ref ref="STDOUT" />
26 | 	</logger>
27 | 	
28 | 	<logger name="akka" level="ERROR" additivity="false">
29 |             <appender-ref ref="STDOUT" />
30 | 	</logger>
31 | 	
32 | 	<logger name="scalikejdbc" level="ERROR" additivity="false">
33 |             <appender-ref ref="STDOUT" />
34 | 	</logger>
35 | 	
36 | 	<logger name="org.apache.hadoop.conf.Configuration" level="OFF" additivity="false">
37 |             <appender-ref ref="STDOUT" />
38 | 	</logger>
39 | 		
40 | 
41 | 	<!-- By default, the level of the root level is set to DEBUG -->
42 | 	<root level="WARN">
43 | 		<appender-ref ref="STDOUT" />
44 | 	</root>
45 | </configuration>
46 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/cmd/DefineCmd.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package dbis.piglet.op.cmd
19 | 
20 | import dbis.piglet.op.PigOperator
21 | import dbis.piglet.expr.Value
22 | 
23 | 
24 | /**
25 |  * DefineCmd represents a pseudo operator for the DEFINE statement. This "operator" will
26 |  * be eliminated during building the dataflow plan.
27 |  *
28 |  * @param alias the alias name of the UDF
29 |  * @param scalaName the full classified Scala name of the function
30 |  * @param paramList a list of values uses as the first standard parameters in the function call
31 |  */
32 | case class DefineCmd(alias: String, scalaName: String, paramList: List[Value]) extends PigOperator(List(), List())
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Display.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.op
18 | 
19 | /**
20 |  * Display represents the DISPLAY operator used to produce data for Zeppelin.
21 |  *
22 |  * @param in the input pipe
23 |  */
24 | case class Display(private val in: Pipe) extends PigOperator(List(), List(in)) {
25 | 
26 |   /**
27 |    * Returns the lineage string describing the sub-plan producing the input for this operator.
28 |    *
29 |    * @return a string representation of the sub-plan.
30 |    */
31 |   override def lineageString: String = {
32 |     s"""DISPLAY%""" + super.lineageString
33 |   }
34 | 
35 |   override def toString =
36 |     s"""DISPLAY
37 |        |  in = $inPipeName""".stripMargin
38 | }
39 | 


--------------------------------------------------------------------------------
/make-distribution.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | TRUE=0
 4 | FALSE=1
 5 | 
 6 | PIGLET_HOME=.
 7 | 
 8 | FILES=(target/scala-2.11/piglet.jar
 9 |  sparklib/target/scala-2.11/sparklib_2.11-*.jar
10 |  flinklib/target/scala-2.11/flinklib_2.11-*.jar
11 |  common/target/scala-2.11/common_2.11-*.jar
12 |  ceplib/target/scala-2.11/ceplib_2.11-*.jar
13 |  mapreducelib/target/scala-2.11/mapreduce_2.11-*.jar
14 |  script/piglet)
15 | 
16 | TARGET_DIR=$PIGLET_HOME/piglet-dist
17 | 
18 | 
19 | function checkfile {
20 |   # echo "checking file $1"
21 |   if [ -r $1 ]; then
22 |     FEXISTS=TRUE
23 |   else
24 |     FEXISTS=FALSE
25 |   fi
26 | 
27 | }
28 | 
29 | if [ -z "$PIGLET_HOME" ]; then
30 |   echo "Please set PIGLET_HOME"
31 |   exit 1
32 | fi
33 | 
34 | rm -rf  $TARGET_DIR
35 | mkdir $TARGET_DIR
36 | 
37 | for f in ${FILES[@]}
38 | do
39 |   echo -ne "\r copying $f                                                                                              "
40 |   sourcefile=$PIGLET_HOME/$f
41 |   checkfile $sourcefile
42 |   if [ $FEXISTS == TRUE ]; then
43 |     # targetfile=$TARGET_DIR/$f
44 |     cp --parents $sourcefile $TARGET_DIR
45 |   else
46 |     echo "File $f does not exist - aborting"
47 |     rm -rf $TARGET_DIR
48 |     exit 1
49 |   fi
50 | done
51 | echo -e "\rcopied files                                                                                                "
52 | 
53 | echo -n "creating archive..."
54 | tar jcf ${TARGET_DIR}.tar.bz2 ${TARGET_DIR}
55 | echo -e "\rcreated archive at ${TARGET_DIR}.tar.bz2"
56 | 
57 | echo "cleanup"
58 | rm -rf $TARGET_DIR
59 | 


--------------------------------------------------------------------------------
/src/test/resources/logback-test.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <configuration>
 3 | 
 4 | 	<!-- Send debug messages to System.out -->
 5 | 	<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 6 | 		<!-- By default, encoders are assigned the type ch.qos.logback.classic.encoder.PatternLayoutEncoder -->
 7 | 		<encoder>
 8 | 			<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{5} - %msg%n</pattern>
 9 | 		</encoder>
10 | 	</appender>
11 | 
12 | 	<logger name="org" level="OFF" additivity="false">
13 |             <appender-ref ref="STDOUT" />
14 | 	</logger>
15 | 
16 | 	<logger name="com" level="ERROR" additivity="false">
17 |             <appender-ref ref="STDOUT" />
18 | 	</logger>
19 | 	
20 | 	<logger name="io" level="ERROR" additivity="false">
21 |             <appender-ref ref="STDOUT" />
22 | 	</logger>
23 | 	
24 | 	<logger name="Remoting" level="ERROR" additivity="false">
25 |             <appender-ref ref="STDOUT" />
26 | 	</logger>
27 | 	
28 | 	<logger name="akka" level="ERROR" additivity="false">
29 |             <appender-ref ref="STDOUT" />
30 | 	</logger>
31 | 	
32 | 	<logger name="scalikejdbc" level="ERROR" additivity="false">
33 |             <appender-ref ref="STDOUT" />
34 | 	</logger>
35 | 	
36 | 	<logger name="org.apache.hadoop.conf.Configuration" level="OFF" additivity="false">
37 |             <appender-ref ref="STDOUT" />
38 | 	</logger>
39 | 		
40 | 
41 | 	<!-- By default, the level of the root level is set to DEBUG -->
42 | 	<root level="WARN">
43 | 		<appender-ref ref="STDOUT" />
44 | 	</root>
45 | </configuration>


--------------------------------------------------------------------------------
/ceplib/src/test/resources/logback-test.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <configuration>
 3 | 
 4 | 	<!-- Send debug messages to System.out -->
 5 | 	<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 6 | 		<!-- By default, encoders are assigned the type ch.qos.logback.classic.encoder.PatternLayoutEncoder -->
 7 | 		<encoder>
 8 | 			<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{5} - %msg%n</pattern>
 9 | 		</encoder>
10 | 	</appender>
11 | 
12 | 	<logger name="org" level="OFF" additivity="false">
13 |             <appender-ref ref="STDOUT" />
14 | 	</logger>
15 | 
16 | 	<logger name="com" level="ERROR" additivity="false">
17 |             <appender-ref ref="STDOUT" />
18 | 	</logger>
19 | 	
20 | 	<logger name="io" level="ERROR" additivity="false">
21 |             <appender-ref ref="STDOUT" />
22 | 	</logger>
23 | 	
24 | 	<logger name="Remoting" level="ERROR" additivity="false">
25 |             <appender-ref ref="STDOUT" />
26 | 	</logger>
27 | 	
28 | 	<logger name="akka" level="ERROR" additivity="false">
29 |             <appender-ref ref="STDOUT" />
30 | 	</logger>
31 | 	
32 | 	<logger name="scalikejdbc" level="ERROR" additivity="false">
33 |             <appender-ref ref="STDOUT" />
34 | 	</logger>
35 | 	
36 | 	<logger name="org.apache.hadoop.conf.Configuration" level="OFF" additivity="false">
37 |             <appender-ref ref="STDOUT" />
38 | 	</logger>
39 | 		
40 | 
41 | 	<!-- By default, the level of the root level is set to DEBUG -->
42 | 	<root level="WARN">
43 | 		<appender-ref ref="STDOUT" />
44 | 	</root>
45 | </configuration>


--------------------------------------------------------------------------------
/flinklib/src/test/resources/logback-test.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <configuration>
 3 | 
 4 | 	<!-- Send debug messages to System.out -->
 5 | 	<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 6 | 		<!-- By default, encoders are assigned the type ch.qos.logback.classic.encoder.PatternLayoutEncoder -->
 7 | 		<encoder>
 8 | 			<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{5} - %msg%n</pattern>
 9 | 		</encoder>
10 | 	</appender>
11 | 
12 | 	<logger name="org" level="OFF" additivity="false">
13 |             <appender-ref ref="STDOUT" />
14 | 	</logger>
15 | 
16 | 	<logger name="com" level="ERROR" additivity="false">
17 |             <appender-ref ref="STDOUT" />
18 | 	</logger>
19 | 	
20 | 	<logger name="io" level="ERROR" additivity="false">
21 |             <appender-ref ref="STDOUT" />
22 | 	</logger>
23 | 	
24 | 	<logger name="Remoting" level="ERROR" additivity="false">
25 |             <appender-ref ref="STDOUT" />
26 | 	</logger>
27 | 	
28 | 	<logger name="akka" level="ERROR" additivity="false">
29 |             <appender-ref ref="STDOUT" />
30 | 	</logger>
31 | 	
32 | 	<logger name="scalikejdbc" level="ERROR" additivity="false">
33 |             <appender-ref ref="STDOUT" />
34 | 	</logger>
35 | 	
36 | 	<logger name="org.apache.hadoop.conf.Configuration" level="OFF" additivity="false">
37 |             <appender-ref ref="STDOUT" />
38 | 	</logger>
39 | 		
40 | 
41 | 	<!-- By default, the level of the root level is set to DEBUG -->
42 | 	<root level="WARN">
43 | 		<appender-ref ref="STDOUT" />
44 | 	</root>
45 | </configuration>


--------------------------------------------------------------------------------
/sparklib/src/test/resources/logback-test.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <configuration>
 3 | 
 4 | 	<!-- Send debug messages to System.out -->
 5 | 	<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 6 | 		<!-- By default, encoders are assigned the type ch.qos.logback.classic.encoder.PatternLayoutEncoder -->
 7 | 		<encoder>
 8 | 			<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{5} - %msg%n</pattern>
 9 | 		</encoder>
10 | 	</appender>
11 | 
12 | 	<logger name="org" level="OFF" additivity="false">
13 |             <appender-ref ref="STDOUT" />
14 | 	</logger>
15 | 
16 | 	<logger name="com" level="ERROR" additivity="false">
17 |             <appender-ref ref="STDOUT" />
18 | 	</logger>
19 | 	
20 | 	<logger name="io" level="ERROR" additivity="false">
21 |             <appender-ref ref="STDOUT" />
22 | 	</logger>
23 | 	
24 | 	<logger name="Remoting" level="ERROR" additivity="false">
25 |             <appender-ref ref="STDOUT" />
26 | 	</logger>
27 | 	
28 | 	<logger name="akka" level="ERROR" additivity="false">
29 |             <appender-ref ref="STDOUT" />
30 | 	</logger>
31 | 	
32 | 	<logger name="scalikejdbc" level="ERROR" additivity="false">
33 |             <appender-ref ref="STDOUT" />
34 | 	</logger>
35 | 	
36 | 	<logger name="org.apache.hadoop.conf.Configuration" level="OFF" additivity="false">
37 |             <appender-ref ref="STDOUT" />
38 | 	</logger>
39 | 		
40 | 
41 | 	<!-- By default, the level of the root level is set to DEBUG -->
42 | 	<root level="WARN">
43 | 		<appender-ref ref="STDOUT" />
44 | 	</root>
45 | </configuration>


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/RScript.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.op
18 | 
19 | import dbis.piglet.schema.Schema
20 | 
21 | /**
22 |  * Created by kai on 13.07.15.
23 |  */
24 | case class RScript(
25 |     private val out: Pipe, 
26 |     private val in: Pipe,
27 |     script: String,
28 |     loadSchema: Option[Schema] = None
29 |   ) extends PigOperator(List(out), List(in), loadSchema) {
30 | 
31 |   override def lineageString: String = s"""STREAM%""" + super.lineageString
32 | 
33 |   override def toString =
34 |     s"""RScript
35 |        |  out = $outPipeName
36 |        |  in = $inPipeName
37 |        |  script = $script
38 |        |  load schema = $loadSchema
39 |      """.stripMargin
40 | 
41 | }
42 | 
43 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/plan/rewriting/internals/MutingSupport.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.plan.rewriting.internals
 2 | 
 3 | import dbis.piglet.tools.logging.PigletLogging
 4 | import dbis.piglet.plan.DataflowPlan
 5 | import dbis.piglet.op.Store
 6 | import dbis.piglet.op.Dump
 7 | import dbis.piglet.plan.rewriting.Rewriter
 8 | import org.kiama.rewriting.Rewriter._
 9 | import org.kiama.rewriting.Strategy
10 | 
11 | trait MutingSupport extends PigletLogging {
12 |   
13 |   def mute(plan: DataflowPlan): DataflowPlan = {
14 |     
15 |     val strategy = (op: Any) => op match {
16 |       case s: Store => 
17 |         val dump = Dump(s.inputs.head, mute = true)
18 |         Rewriter.replace(plan, s, dump)
19 |         logger.debug(s"replaced $s with $dump")
20 |         Some(dump)
21 |       case d: Dump if !d.mute => 
22 |         logger.debug(s"muting $d")
23 |         d.mute = true
24 |         Some(d)
25 |         
26 |       case _ => None 
27 |     }
28 |     
29 |     
30 |     Rewriter.rewritePlan(plan, manybu(strategyf(t => strategy(t))))
31 | //    var newPlan = plan
32 | //    
33 | //    val sinks = newPlan.sinkNodes
34 | //    
35 | //    sinks.foreach { sink => sink match {
36 | //      case s: Store =>
37 | //        val dump = Dump(s.inputs.head, quietMode = true)
38 | //        newPlan = newPlan.replace(s, dump)
39 | //      case d: Dump if !d.quietMode =>
40 | //        d.quietMode = true
41 | //      case _ => // ignore other consumers (such as display, empty)
42 | //        
43 | //      } 
44 | //    }
45 | //    
46 | //    newPlan  
47 | //    
48 |   }
49 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/flink/emitter/SocketWriteEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.flink.emitter
 2 | 
 3 | import dbis.piglet.op.SocketWrite
 4 | import dbis.piglet.codegen.CodeGenContext
 5 | import dbis.piglet.backends.BackendManager
 6 | import dbis.piglet.codegen.CodeEmitter
 7 | import dbis.piglet.codegen.scala_lang.ScalaEmitter
 8 | 
 9 | 
10 | 
11 | class SocketWriteEmitter extends CodeEmitter[SocketWrite] {
12 |   override def template: String = """<if (mode)>
13 |                                     |    <func>[<class>]().zmqPublish("<addr.protocol><addr.hostname>:<addr.port>", <in><if (params)>, <params><endif>)
14 |                                     |<else>
15 |                                     |    <func>[<class>]().bind("<addr.hostname>", <addr.port>, <in><if (params)>, <params><endif>)
16 |                                     |<endif>""".stripMargin
17 | 
18 |   override def code(ctx: CodeGenContext, op: SocketWrite): String = {
19 |     var paramMap = Map("in" -> op.inPipeName, "addr" -> op.addr,
20 |       "func" -> op.func.getOrElse(BackendManager.backend.defaultConnector))
21 |     op.schema match {
22 |       case Some(s) => paramMap += ("class" -> ScalaEmitter.schemaClassName(s.className))
23 |       case None => paramMap += ("class" -> "Record")
24 |     }
25 |     if (op.mode != "") paramMap += ("mode" -> op.mode)
26 |     if (op.params != null && op.params.nonEmpty) paramMap += ("params" -> op.params.mkString(","))
27 |     render(paramMap)
28 |   }
29 | }
30 | 
31 | object SocketWriteEmitter {
32 | 	lazy val instance = new SocketWriteEmitter
33 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Dump.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.op
18 | 
19 | /**
20 |  * Dump represents the DUMP operator of Pig.
21 |  *
22 |  * @param in the input pipe
23 |  */
24 | case class Dump(private val in: Pipe, var mute: Boolean = false) extends PigOperator(List(), List(in)) {
25 | 
26 |   /**
27 |    * Returns the lineage string describing the sub-plan producing the input for this operator.
28 |    *
29 |    * @return a string representation of the sub-plan.
30 |    */
31 |   override def lineageString: String = {
32 |     s"""DUMP%""" + super.lineageString
33 |   }
34 | 
35 |   override def toString =
36 |     s"""DUMP
37 |        |  in = $inPipeName
38 |        |  ${if(mute) "muted" else ""}""".stripMargin
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/spark/StreamGroupingEmitter.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.codegen.spark
18 | 
19 | import dbis.piglet.codegen.scala_lang.GroupingEmitter
20 | 
21 | class StreamGroupingEmitter extends GroupingEmitter {
22 |   override def template: String = """<if (expr)>
23 |                                     |    val <out> = <in>.transform(rdd => rdd.groupBy(t => {<expr>}).map{case (k,v) => <class>(<keyExtr>,v)})
24 |                                     |<else>
25 |                                     |    val <out> = <in>.transform(rdd => rdd.coalesce(1).glom.map(t => <class>("all", t)))
26 |                                     |<endif>""".stripMargin
27 | }
28 | 
29 | object StreamGroupingEmitter {
30 | 	lazy val instance = new StreamGroupingEmitter
31 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Top.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.op
18 | 
19 | /** An operator for top-k queries.
20 |   *
21 |   * It can also be used for bottom-k by changing the orderSpec
22 |   *
23 |   * @param out
24 |   * @param in
25 |   * @param orderSpec
26 |   * @param num
27 |   */
28 | case class Top(
29 |     private val out: Pipe, 
30 |     private val in: Pipe, 
31 |     orderSpec: List[OrderBySpec], 
32 |     num: Int
33 |   ) extends PigOperator(out, in) {
34 | 
35 |   override def lineageString: String = s"""TOP$num""" + super.lineageString
36 | 
37 |   override def toString =
38 |     s"""TOP
39 |        |  out = $outPipeName
40 |        |  in = $inPipeName
41 |        |  order = ${orderSpec.mkString(",")}
42 |        |  num = $num""".stripMargin
43 | }
44 | 


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/spark/RDDMatcher.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.cep.spark
 2 | 
 3 | import org.apache.spark.rdd.RDD
 4 | import org.apache.spark.{Partition, TaskContext}
 5 | import scala.reflect.ClassTag
 6 | import dbis.piglet.cep.nfa.NFAController
 7 | import dbis.piglet.cep.engines._
 8 | import dbis.piglet.cep.ops.SelectionStrategy._
 9 | import dbis.piglet.cep.ops.OutputStrategy._
10 | import dbis.piglet.backends.{SchemaClass => Event}
11 | import dbis.piglet.cep.ops.MatchCollector
12 | import dbis.piglet.cep.ops.SelectionStrategy
13 | 
14 | class RDDMatcher[T <: Event: ClassTag](parent: RDD[T], nfa: NFAController[T], sstr: SelectionStrategy = SelectionStrategy.FirstMatch, out: OutputStrategy = Combined) extends RDD[T](parent){
15 |    val collector: MatchCollector[T] = new MatchCollector()
16 |    val engine: CEPEngine[T] = sstr match {
17 |     case SelectionStrategy.FirstMatch        => new FirstMatch(nfa, collector)
18 |     case SelectionStrategy.AllMatches        => new AnyMatch(nfa, collector)
19 |     case SelectionStrategy.NextMatches       => new NextMatch(nfa, collector)
20 |     case SelectionStrategy.ContiguityMatches => new ContiguityMatch(nfa, collector)
21 |     case _                                   => throw new Exception("The Strategy is not supported")
22 | 
23 |   }
24 |   override def compute(split: Partition, context: TaskContext): Iterator[T] =  {
25 |     firstParent[T].iterator(split, context).foreach (event => engine.runEngine(event))
26 |     collector.convertEventsToArray().iterator
27 |   }
28 |   
29 | 
30 |   override protected def getPartitions: Array[Partition] = firstParent[Event].partitions
31 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/scala_lang/StreamOpEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.scala_lang
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext, CodeGenException}
 4 | import dbis.piglet.expr.Ref
 5 | import dbis.piglet.op.StreamOp
 6 | 
 7 | /**
 8 |   * Created by kai on 01.12.16.
 9 |   */
10 | class StreamOpEmitter extends CodeEmitter[StreamOp] {
11 |   override def template: String = """        val <in>_helper = <in>.map(t => List(<in_fields>))
12 |                                     |        val <out> = <op>(sc, <in>_helper<params>).map(t => <class>(<out_fields>))
13 |                                     |""".stripMargin
14 | 
15 |   override def code(ctx: CodeGenContext, op: StreamOp): String = {
16 |     if(op.schema.isEmpty) {
17 |       throw CodeGenException("Schema must be set for STREAM THROUGH operator")
18 |     }
19 | 
20 |     val className = ScalaEmitter.schemaClassName(op.schema.get.className)
21 | 
22 |     val inFields = op.inputSchema.get.fields.zipWithIndex.map{ case (f, i) => s"t._$i"}.mkString(", ")
23 |     val outFields = op.schema.get.fields.zipWithIndex.map{ case (f, i) => s"t($i).asInstanceOf[${ScalaEmitter.scalaTypeMappingTable(f.fType)}]"}.mkString(", ")
24 | 
25 |     render(Map("out" -> op.outPipeName,
26 |         "op" -> op.opName,
27 |         "in" -> op.inPipeName,
28 |         "class" -> className,
29 |         "in_fields" -> inFields,
30 |         "out_fields" -> outFields,
31 |         "params" -> ScalaEmitter.emitParamList(CodeGenContext(ctx, Map("schema" -> op.schema)), op.params)))
32 |   }
33 | }
34 | 
35 | object StreamOpEmitter {
36 | 	lazy val instance = new StreamOpEmitter
37 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/plan/rewriting/dsl/traits/EndWordT.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.plan.rewriting.dsl.traits
18 | 
19 | /** A trait supplying methods to set the function in a [[dbis.piglet.plan.rewriting.dsl.traits.BuilderT]] and call its
20 |   * apply method.
21 |   */
22 | trait EndWordT[FROM, TO] {
23 |   val b: BuilderT[FROM, TO]
24 | 
25 |   /** Apply ``f`` (a total function) when rewriting.
26 |     *
27 |     * @param f
28 |     */
29 |   def applyRule(f: (FROM => Option[TO])): Unit = {
30 |     b.func = f
31 |     b.build()
32 |   }
33 | 
34 |   /** Apply ``f`` (a partial function) when rewriting.
35 |     *
36 |     */
37 |   def applyPattern(f: scala.PartialFunction[FROM, TO]): Unit = {
38 |     val lifted = f.lift
39 | 
40 |     b.func = lifted
41 |     b.build()
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Distinct.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.op
18 | 
19 | /**
20 |  * Distinct represents the DISTINCT operator of Pig.
21 |  *
22 |  * @param out the output pipe (relation).
23 |  * @param in the input pipe.
24 |  * @param windowMode true if processed on a window on a data stream
25 |  */
26 | case class Distinct(
27 |     private val out: Pipe, 
28 |     private val in: Pipe, 
29 |     var windowMode: Boolean = false
30 |   ) extends PigOperator(out, in) {
31 | 
32 |   override def lineageString: String = {
33 |     s"""DISTINCT%""" + super.lineageString
34 |   }
35 | 
36 |   override def toString =
37 |     s"""DISTINCT
38 |        |  out = $outPipeName
39 |        |  in = $inPipeName
40 |        |  inSchema = $inputSchema
41 |        |  outSchema = $schema""".stripMargin
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/Zeppelin.md:
--------------------------------------------------------------------------------
 1 | ## Zeppelin integration
 2 | 
 3 | We provide an integration with Apache Zeppelin - a web-bases notebook
 4 | for data analytics. It allows to write and execute Piglet scripts in
 5 | notebooks and visualize the results directly. For installation you
 6 | need
 7 | * the Zeppelin project from [here](https://zeppelin.incubator.apache.org/)
 8 | * Spark 1.5 built with Scala 2.11
 9 | * the zeppelin branch of Piglet
10 | 
11 | Setting up the Piglet interpreter requires the following steps:
12 | 1. Build the zeppelin interpreter with sbt:
13 | ```
14 | sbt> package
15 | sbt> assembly
16 | sbt> project zeppelin
17 | sbt> package
18 | ```
19 | 
20 | 1. Copy the following Jar files to ZEPPELIN_HOME/interpreter/piglet
21 |   * PIGLET_HOME/common/target/scala-2.11/common_2.11-0.3.jar
22 |   * PIGLET_HOME/sparklib/target/scala-2.11/sparklib_2.11-0.3.jar
23 |   * PIGLET_HOME/target/scala-2.11/PigCompiler.jar
24 |   * PIGLET_HOME/zeppelin/target/scala-2.11/piglet-interpreter_2.11-0.3.jar
25 |   * spark-assembly-1.5.2-hadoop2.6.0.jar
26 | 
27 | 1. Add the Piglet interpreter in ZEPPELIN_HOME/conf/zeppelin-site.xml
28 | by adding `dbis.piglet.PigletInterpreter` to the property value
29 | `zeppelin.interpreters`.
30 | 
31 | 1. In Zeppelin, go to `INTERPRETER` and click `+Create`. Enter a name (piglet) 
32 | and select the Piglet interpreter from the drop down menu. Currently, no 
33 | additional properties have to be set.
34 | 
35 | 1. Create a new notebook, go to "Interpreter binding", and activate
36 |    `piglet %piglet`.
37 | 
38 | 1. Enter your script and mark it as Piglet using `%piglet`.
39 | 
40 | 1. Note that you have to use `DISPLAY relation` instead of `DUMP` to visualize the result.


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/flink/emitter/StreamFilterEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.flink.emitter
 2 | 
 3 | import dbis.piglet.codegen.{ CodeEmitter, CodeGenContext, CodeGenException }
 4 | import dbis.piglet.op.Filter
 5 | import dbis.piglet.codegen.scala_lang.ScalaEmitter
 6 | 
 7 | class StreamFilterEmitter extends CodeEmitter[Filter] {
 8 |   override def template: String = """<if (windowMode)>
 9 |                                     |    val <out> = <in>.mapWindow(custom<out>Filter _)
10 |                                     |<else>
11 |                                     |    val <out> = <in>.filter(t => {<pred>})
12 |                                     |<endif>
13 |                                     |""".stripMargin
14 |   def templateHelper: String = """    .filter(t => {<pred>})""".stripMargin
15 | 
16 |   def windowApply(ctx: CodeGenContext, op: Filter): String = {
17 |     CodeEmitter.render(templateHelper, Map("pred" -> ScalaEmitter.emitPredicate(CodeGenContext(ctx, Map("schema" -> op.schema)), op.pred)))
18 |   }
19 |   
20 |   override def code(ctx: CodeGenContext, op: Filter): String = {
21 |     if (op.windowMode) return ""
22 |     if (!op.schema.isDefined)
23 |       throw CodeGenException("FILTER requires a schema definition")
24 | 
25 |     val className = ScalaEmitter.schemaClassName(op.schema.get.className)
26 |     render(Map("out" -> op.outPipeName,
27 |       "in" -> op.inPipeName,
28 |       "class" -> className,
29 |       "pred" -> ScalaEmitter.emitPredicate(CodeGenContext(ctx, Map[String, Any]("schema" -> op.schema)), op.pred)))
30 |   }
31 | }
32 | 
33 | object StreamFilterEmitter {
34 | 	lazy val instance = new StreamFilterEmitter
35 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Tuplify.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.op
18 | 
19 | import dbis.piglet.expr.Ref
20 | 
21 | /**
22 |  *
23 |  * @param initialOutPipeName the name of the initial output pipe (relation) which is needed to construct the plan, but
24 |  *                           can be changed later.
25 |  * @param initialInPipeName
26 |  * @param ref a reference des
27 |  */
28 | case class Tuplify(private val out: Pipe, private val in: Pipe, ref: Ref) extends PigOperator(out, in) {
29 | 
30 |   override def lineageString: String = s"""TUPLIFY%""" + super.lineageString
31 | 
32 |   // TODO
33 |   override def checkSchemaConformance: Boolean = true
34 | 
35 |   override def toString =
36 |     s"""TUPLIFY
37 |        |  out = $outPipeName
38 |        |  in = $inPipeName
39 |        |  ref = $ref""".stripMargin
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/plan/rewriting/dsl/builders/ReplacementBuilder.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.plan.rewriting.dsl.builders
18 | 
19 | import dbis.piglet.op.PigOperator
20 | import dbis.piglet.plan.rewriting.Rewriter
21 | import dbis.piglet.plan.rewriting.dsl.traits.BuilderT
22 | 
23 | import scala.reflect.ClassTag
24 | 
25 | /** A builder for applying a rewriting method that rewrites a single [[dbis.piglet.op.PigOperator]] to another one.
26 |  *
27 |  * @tparam FROM
28 |  * @tparam TO
29 |  */
30 | class ReplacementBuilder[FROM <: PigOperator : ClassTag, TO <: PigOperator : ClassTag] extends
31 |   PigOperatorBuilder[FROM, TO] {
32 |   override def wrapInFixer(func: (FROM => Option[TO])): (FROM => Option[TO]) = func
33 | 
34 |   override def addAsStrategy(func: (FROM => Option[TO])) = {
35 |     Rewriter.addTypedStrategy(func)
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/mapreducelib/src/main/scala/dbis/piglet/backends/mapreduce/PigRun.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.backends.mapreduce
 2 | 
 3 | import dbis.piglet.backends.BackendConf
 4 | import dbis.piglet.backends.PigletBackend
 5 | import java.nio.file.Path
 6 | import org.apache.pig.PigServer
 7 | import org.apache.pig.ExecType
 8 | import org.apache.pig.tools.pigstats.PigProgressNotificationListener
 9 | import org.apache.pig.PigRunner
10 | 
11 | /**
12 |  * @author hage
13 |  */
14 | class PigRun extends PigletBackend with BackendConf {
15 | 
16 |   override def execute(master: String, className: String, jarFile: Path, backendArgs: Map[String,String], profiling: Boolean) = ???
17 |   
18 |   override def executeRaw(program: Path, master: String, backendArgs: Map[String,String]) {
19 |     
20 |     val ba = backendArgs.flatMap{ case (k,v) => Array(k,v)}
21 |     
22 |     val args = Array("-x", execType(master), program.toAbsolutePath().toString() ) ++ ba  
23 |     
24 |     val stats = PigRunner.run(args, null)
25 |     
26 |   }
27 | 
28 |   /**
29 |    * Get the name of this backend
30 |    * 
31 |    * @return Returns the name of this backend
32 |    */
33 |   override def name: String = "MapReduce - Pig"
34 |   
35 |   /**
36 |    * Get the path to the runner class that implements the PigletBackend interface
37 |    */
38 |   override def runnerClass: PigletBackend = this
39 |   
40 |   override def templateFile = null
41 | 
42 |   override def defaultConnector = "PigStorage"
43 |   
44 |   override def raw = true
45 |   
46 |   private def execType(master: String) = if(master.startsWith("local")) "local" else "mapreduce"
47 | //  implicit private def execType(master: String) = if(master.toLowerCase().startsWith("local")) ExecType.LOCAL else ExecType.MAPREDUCE
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/flink/emitter/SocketReadEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.flink.emitter
 2 | 
 3 | import dbis.piglet.codegen.CodeEmitter
 4 | import dbis.piglet.op.SocketRead
 5 | import dbis.piglet.codegen.CodeGenContext
 6 | import dbis.piglet.backends.BackendManager
 7 | import dbis.piglet.codegen.scala_lang.ScalaEmitter
 8 | 
 9 | class SocketReadEmitter extends CodeEmitter[SocketRead] {
10 |   override def template: String = """<if (mode)>
11 |                                     |    val <out> = <func>[<class>]().zmqSubscribe(env, "<addr.protocol><addr.hostname>:<addr.port>", <extractor><params>)
12 |                                     |<else>
13 |                                     |    val <out> = <func>[<class>]().connect(env, "<addr.hostname>", <addr.port>, <extractor><params>)    
14 |                                     |<endif>""".stripMargin
15 | 
16 |   override def code(ctx: CodeGenContext, op: SocketRead): String = {
17 |     var paramMap = ScalaEmitter.emitExtractorFunc(op, op.streamFunc)
18 |     op.schema match {
19 |       case Some(s) => paramMap += ("class" -> ScalaEmitter.schemaClassName(s.className))
20 |       case None => paramMap += ("class" -> "Record")
21 |     }
22 |     val params = if (op.streamParams != null && op.streamParams.nonEmpty) ", " + op.streamParams.mkString(",") else ""
23 |     val func = op.streamFunc.getOrElse(BackendManager.backend.defaultConnector)
24 |     paramMap ++= Map(
25 |       "out" -> op.outPipeName,
26 |       "addr" -> op.addr,
27 |       "func" -> func,
28 |       "params" -> params)
29 |     if (op.mode != "") paramMap += ("mode" -> op.mode)
30 |     render(paramMap)
31 |   }
32 | }
33 | 
34 | object SocketReadEmitter {
35 | 	lazy val instance = new SocketReadEmitter
36 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/plan/rewriting/internals/EmbedSupport.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.plan.rewriting.internals
18 | 
19 | import com.twitter.util.Eval
20 | import org.kiama.rewriting.Strategy
21 | 
22 | /** Provides methods for evaluating embedded code.
23 |   *
24 |   */
25 | trait EmbedSupport {
26 |   /** The imports that are automatically added to eval'd code
27 |     *
28 |     */
29 |   private val imports = """
30 |                           |import dbis.piglet.op._
31 |                           |import dbis.piglet.plan.rewriting.Extractors._
32 |                           |import dbis.piglet.plan.rewriting.Rewriter._
33 |                         """.stripMargin
34 | 
35 |   /** Evals each String in ``ruleCode``
36 |     */
37 |   protected def evalExtraRuleCode(ruleCode: Seq[String]): Unit =
38 |     ruleCode map { imports ++ _ } map { c => (new Eval).apply[scala.runtime.BoxedUnit](c) }
39 | }
40 | 


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/ops/Strategies.scala:
--------------------------------------------------------------------------------
 1 | 
 2 | package dbis.piglet.cep.ops
 3 | import dbis.piglet.backends.{SchemaClass => Event}
 4 | 
 5 | package object OutputTypes {
 6 | type PossibleTypes = Event with Boolean
 7 | }
 8 | /**
 9 |  * @brief this enumerations represents the available selection strategies
10 |  * where the matching should be done accordingly.
11 |  */
12 | object SelectionStrategy extends Enumeration {
13 |   type SelectionStrategy = Value
14 |   val NextMatches, AllMatches, ContiguityMatches, FirstMatch, RecentMatch = Value
15 | }
16 | 
17 | /**
18 |  * @brief this enumerations represents the available output strategies
19 |  * where the result output would be generated accordingly,
20 |  * because the output of this operator is a complex event or a combination of tuples
21 |  * one by one means generate the tuples one after another, in this case the resulting tuples
22 |  * have fixed schema, whereas combined strategy combines all tuples (complex event) in one big tuple
23 |  * which has variable schema
24 |  *
25 |  */
26 | object OutputStrategy extends Enumeration {
27 |   type OutputStrategy = Value
28 |   val OneByOne, Combined, TrueValues = Value
29 | }
30 | 
31 | /**
32 |  * @brief this enumerations represents the available model strategies 
33 |  * for processing or detecting the complex event accordingly.
34 |  * in this engine, a non-deterministic finite automaton (NFA) based approach is used.
35 |  * Moreover, we used a tree based evaluation model for pattern queries.
36 |  * Each approach has its advantages and drawbacks in terms of performance,
37 |  * optimization and its expressiveness
38 |  *
39 |  */
40 | object MatchingStrategy extends Enumeration {
41 |   type OutputStrategy = Value
42 |   val TreeBased, NFABased = Value
43 | }
44 | 
45 | 


--------------------------------------------------------------------------------
/flinklib/src/main/scala/dbis/piglet/backends/flink/PigFuncs.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package dbis.piglet.backends.flink
19 | 
20 | import java.util.Random
21 | 
22 | import dbis.piglet.CommonPigFuncs
23 | import dbis.piglet.backends._
24 | import org.apache.flink.api.common.typeinfo.TypeInformation
25 | import org.apache.flink.api.java.functions._
26 | import org.apache.flink.api.scala._
27 | 
28 | import scala.reflect.ClassTag
29 | 
30 | class CustomSampler[T <: SchemaClass: ClassTag: TypeInformation](dataSet: DataSet[T]) {
31 |   def sample(withReplacement: Boolean, fraction: Double, seed: Long = new Random().nextLong()) = {
32 |     dataSet.mapPartition(new SampleWithFraction[T](withReplacement, fraction, seed))
33 |   }
34 | 
35 | }
36 | 
37 | object Sampler {
38 |   implicit def addSampler[T <: SchemaClass: ClassTag: TypeInformation](dataSet: DataSet[T]) = {
39 |     new CustomSampler(dataSet)
40 |   }
41 | }
42 | 
43 | object PigFuncs extends CommonPigFuncs {
44 | }
45 | 


--------------------------------------------------------------------------------
/flinklib/src/main/scala/dbis/piglet/backends/flink/streaming/UTF8StringSchema.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package dbis.piglet.backends.flink.streaming
19 | 
20 | import org.apache.commons.lang3.SerializationUtils
21 | import org.apache.flink.streaming.util.serialization._
22 | import org.apache.flink.api.common.typeinfo.TypeInformation
23 | import org.apache.flink.api.java.typeutils.TypeExtractor
24 | 
25 | class UTF8StringSchema extends DeserializationSchema[String] with SerializationSchema[String] {
26 | 
27 |   override def deserialize(message: Array[Byte]): String = {
28 |     new String(message, "UTF-8")
29 |   }   
30 | 
31 |   override def isEndOfStream(nextElement: String): Boolean = {
32 |     false
33 |   }   
34 | 
35 |   override def serialize(element: String): Array[Byte] = {
36 |     element.getBytes("UTF-8")
37 |   }   
38 | 
39 |   override def getProducedType(): TypeInformation[String] = {
40 |     TypeExtractor.getForClass(classOf[String])
41 |   }   
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/WindowApply.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.op
18 | 
19 | import dbis.piglet.schema.Schema
20 | 
21 | /**
22 |  * WindowApply is used to transform Windows back to a single continuous Stream.
23 |  *
24 |  * @param out the name of the output pipe.
25 |  * @param in the name of the input pipe.
26 |  * @param fname the name of the function which will be applied to the input window operator.
27 |  */
28 | case class WindowApply(
29 |     private val out: Pipe, 
30 |     private val in: Pipe, 
31 |     fname: String
32 |   ) extends PigOperator(out, in) {
33 |   
34 |   override def constructSchema: Option[Schema] = {
35 |     schema
36 |   }
37 |   override def lineageString: String = {
38 |     s"""WINDOWAPPLY%$fname%""" + super.lineageString
39 |   }
40 | 
41 |   override def toString =
42 |     s"""WINDOWAPPLY
43 |        |  out = $outPipeName
44 |        |  in = $inPipeName
45 |        |  fname = $fname
46 |      """.stripMargin
47 | 
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/cmd/HdfsCmd.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package dbis.piglet.op.cmd
19 | 
20 | import dbis.piglet.op.PigOperator
21 | import dbis.piglet.tools.HDFSService
22 | import dbis.piglet.tools.HdfsCommand
23 | 
24 | 
25 | /**
26 |  * HdfsCmd represents a pseudo operator for HDFS commands.
27 |  */
28 | case class HdfsCmd(cmd: HdfsCommand.HdfsCommand, params: List[String]) extends PigOperator(List(), List())
29 | {
30 | 
31 | //  if (!isValid)
32 | //    throw new java.lang.IllegalArgumentException("unknown fs command '" + cmd + "'")
33 | 
34 | 
35 |   override def outPipeNames: List[String] = List()
36 | 
37 | //  def isValid = HdfsCommand.values.map{v => v.toString().toLowerCase()}.exists { s => s.equalsIgnoreCase(cmd) }
38 | 
39 |   def paramString(): String = params.map(p => s""""$p"""").mkString(",")
40 | 
41 |   override def toString =
42 |     s"""HDFS COMMAND
43 |        |  cmd = $cmd
44 |        |  params = ${params.mkString(",")}""".stripMargin
45 | 
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/flink/DataStreamMatcher.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.cep.flink
 2 | 
 3 | import scala.reflect.ClassTag
 4 | import dbis.piglet.cep.nfa.NFAController
 5 | import dbis.piglet.cep.engines._
 6 | import dbis.piglet.cep.ops.SelectionStrategy._
 7 | import dbis.piglet.cep.ops.OutputStrategy._
 8 | import dbis.piglet.backends.{SchemaClass => Event}
 9 | import org.apache.flink.api.common.typeinfo.TypeInformation
10 | import org.apache.flink.streaming.api.windowing.windows.GlobalWindow
11 | import org.apache.flink.streaming.api.windowing.assigners.GlobalWindows
12 | import dbis.piglet.cep.ops.MatchCollector
13 | import dbis.piglet.cep.ops.SelectionStrategy
14 | //import org.apache.flink.api.java.operators.CustomUnaryOperation
15 | import scala.collection.mutable.ListBuffer
16 | //import org.apache.flink.api.java.DataSet
17 | //import org.apache.flink.api.java.ExecutionEnvironment
18 | import scala.collection.JavaConversions._
19 | import org.apache.flink.streaming.api.scala._
20 | import dbis.piglet.cep.ops.EngineConf
21 | import org.apache.flink.util.Collector
22 | 
23 | 
24 | class DataStreamMatcher[T <: Event: ClassTag: TypeInformation](@transient val input: DataStream[T], nfa: NFAController[T], flinkEnv: StreamExecutionEnvironment, sstr: SelectionStrategy = SelectionStrategy.FirstMatch, out: OutputStrategy = Combined) extends EngineConf[T](nfa, sstr) with java.io.Serializable {
25 |   object DataStreamProcess {
26 |     def customRun(gw: GlobalWindow, ts: Iterable[T], out: Collector[T]) = {
27 |       ts.foreach { event => engine.runEngine(event)}
28 |       val result = collector.convertEventsToArray()
29 |       result.foreach { res => out.collect(res) }
30 |     }
31 |   }
32 |   def compute(): DataStream[T] = {
33 |     input.windowAll(GlobalWindows.create()).apply(DataStreamProcess.customRun _)   
34 |   }
35 | 
36 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/spark/PartitionerEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.spark
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext}
 4 | import dbis.piglet.op.{Partition, PartitionMethod}
 5 | 
 6 | class PartitionerEmitter extends CodeEmitter[Partition]  {
 7 | 
 8 |   private val partitionerTemplate = "new <method>(<params>)"
 9 |   private val spatialPartitionerTemplate = "new <method>(<in>_helper,<params>)"
10 | 
11 |   override def template = """val <out> = {
12 |          |  val <in>_helper = <in><keyby> 
13 |          |  <in>_helper.partitionBy(<partitioner>).map{case (_,v)=>
14 |          |    <if (profiling)>
15 |          |      PerfMonitor.sampleSize(v,"<lineage>", accum, randFactor)
16 |          |    <endif>
17 |          |
18 |          |    v
19 |          |  \}
20 |          |\}""".stripMargin
21 |   
22 |   override def code(ctx: CodeGenContext, op: Partition): String = {
23 | 
24 | 
25 |     val partitioner = {
26 | 
27 |       val (template, methodClass) = op.method match {
28 |         case PartitionMethod.GRID => (spatialPartitionerTemplate, "SpatialGridPartitioner")
29 |         case PartitionMethod.BSP => (spatialPartitionerTemplate, "BSPartitioner")
30 |         case PartitionMethod.Hash => (partitionerTemplate, "org.apache.spark.HashPartitioner")
31 |       }
32 | 
33 |       CodeEmitter.render(template, Map(
34 |         "method" -> methodClass,
35 |         "params" -> op.params.mkString(",")))
36 | 
37 |     }
38 | 
39 |     render(Map(
40 |       "out" -> op.outPipeName,
41 |       "in" -> op.inPipeName,
42 |       "partitioner" -> partitioner,
43 |       "keyby" -> SpatialEmitterHelper.keyByCode(op.inputSchema, op.field, ctx),
44 |       "lineage" -> op.lineageSignature
45 | 		))
46 |     
47 |   }
48 | }
49 | 
50 | object PartitionerEmitter {
51 | 	lazy val instance = new PartitionerEmitter
52 | }


--------------------------------------------------------------------------------
/src/test/scala/dbis/piglet/tools/RingBufferSpec.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.tools
 2 | 
 3 | import org.scalatest.{FlatSpec, Matchers}
 4 | 
 5 | /**
 6 |   * Created by hage on 31.05.17.
 7 |   */
 8 | class RingBufferSpec extends FlatSpec with Matchers {
 9 | 
10 |   "A RingBuffer" should "return the correct length for empty buffer" in {
11 |     val b = new RingBuffer[Int](3)
12 | 
13 |     b.length shouldBe 0
14 |   }
15 | 
16 |   it should "return the correct length for single element in buffer" in {
17 |     val b = RingBuffer(5)
18 | 
19 |     b.length shouldBe 1
20 |   }
21 | 
22 |   it should "return the correct length more elements than capacity" in {
23 |     val b = new RingBuffer[Int](3)
24 | 
25 |     Array(1,2,3,4,5,6,7,8).foreach(b.put)
26 | 
27 |     b.length shouldBe 3
28 |   }
29 | 
30 |   it should "accept fewer elements as capacity" in {
31 |     val b = new RingBuffer[Int](3)
32 | 
33 |     b.put(2)
34 |     b.put(1)
35 | 
36 |     b should contain theSameElementsAs List(1, 2)
37 |   }
38 | 
39 |   it should "accept same number of elements as capacity" in {
40 |     val b = new RingBuffer[Int](3)
41 | 
42 |     b.put(2)
43 |     b.put(1)
44 |     b.put(3)
45 | 
46 |     b should contain theSameElementsAs List(3, 1, 2)
47 |   }
48 | 
49 |   it should "remove the first entry when inserting cap + 1st element" in {
50 |     val b = RingBuffer(1,2,3)
51 | 
52 |     b.put(4)
53 | 
54 |     b should contain theSameElementsAs List(4,2,3)
55 |   }
56 | 
57 |   it should "accept more elements than capacity" in {
58 | 
59 |     val b = new RingBuffer[Int](3)
60 | 
61 |     Array(1,2,3,4,5,6,7,8).foreach(b.put)
62 | 
63 |     b should contain theSameElementsAs List(6,7,8)
64 |   }
65 | 
66 |   it should "create a buffer with apply method" in {
67 |     val b = RingBuffer(1,2,3,4,5,6,7,8)
68 |     b should contain theSameElementsAs List(1,2,3,4,5,6,7,8)
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/common/src/main/scala/dbis/piglet/backends/CppConfig.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package dbis.piglet.backends
19 | import scala.collection.immutable.List
20 | 
21 | 
22 | /**
23 |  * Defines the interface to the c++ compiler
24 |  */
25 | trait CppConfig {
26 | 
27 |   /**
28 |    * Get a C++ compiler, this can be done g++, clang++, ....
29 |    */
30 |   def getCompiler: String
31 | 
32 |   /**
33 |    * Get the libraries which are used during compiling. The compiler has to link to these
34 |    * libraries otherwise, linking error will be shown
35 |    */
36 |   def getLibraries: List[String]
37 | 
38 |   /**
39 |    * Get options for compiling the code accordingly such as the optimization level, enabling some
40 |    * macros, etc.
41 |    */
42 |   def getOptions: List[String]
43 |   /**
44 |    * Get directories for libraries which are essential during the linking
45 |    */
46 |   def getLibDirs: List[String]
47 |   /**
48 |    *  Get include directories for finding the header files.
49 |    */
50 |   def getIncludeDirs: List[String]
51 | }
52 | 


--------------------------------------------------------------------------------
/materialization_scripts/taxi_high_tip_block.pig:
--------------------------------------------------------------------------------
 1 | <%
 2 | 
 3 | def dateToMonth(date: String): Int = {
 4 |   val formatter = java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
 5 |   java.time.LocalDate.parse(date,formatter).getMonthValue()
 6 | }
 7 | %>
 8 | 
 9 | raw = load '$taxi' using PigStorage(',',skipEmpty=true) as
10 |   (vendor_id:chararray,pickup_datetime:chararray,dropoff_datetime:chararray,passenger_count:chararray,
11 |   trip_distance:chararray, pickup_longitude:chararray,pickup_latitude:chararray,rate_code:chararray,
12 |   store_and_fwd_flag:chararray,dropoff_longitude:chararray,dropoff_latitude:chararray,payment_type:chararray,
13 |   fare_amount:chararray,surcharge:chararray,mta_tax:chararray,tip_amount:chararray,tolls_amount:chararray,total_amount:chararray);
14 | 
15 | noHeader = filter raw by not STARTSWITH(LOWER(vendor_id),"vendor") and dropoff_longitude != "" and dropoff_latitude != ""
16 |                                                                    and total_amount != "" and tip_amount != "";
17 | 
18 | month_total = FOREACH noHeader GENERATE geometry("POINT("+ dropoff_latitude +" "+ dropoff_longitude +")") as dropoffloc,
19 |                                         (double)total_amount as total, (double)tip_amount as tip;
20 | 
21 | allBlocks = load '$blocks' using PigStorage(';') as (blockid: int, wkt: chararray);
22 | blocks = FOREACH allBlocks GENERATE geometry(wkt) as blockbounds, blockid;
23 | 
24 | dropoff = SPATIAL_JOIN month_total, blocks ON CONTAINEDBY using index RTree(order=10);
25 | dropoff_block = FOREACH dropoff GENERATE blockid, total, tip;
26 | 
27 | 
28 | grp = GROUP dropoff_block by blockid;
29 | avgs = FOREACH grp GENERATE group as blockid,   AVG(dropoff_block.tip) * 100 / AVG(dropoff_block.total) as p:double ;
30 | 
31 | hightip = FILTER avgs BY p >= 20;
32 | 
33 | sorted = ORDER hightip BY p DESC;
34 | 
35 | DUMP sorted;
36 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/IndexOp.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.op
 2 | 
 3 | import dbis.piglet.expr.{NamedField, Ref, RefExpr}
 4 | import dbis.piglet.schema._
 5 | 
 6 | object IndexMethod extends Enumeration {
 7 |   type IndexMethod = Value
 8 |   val RTREE = Value
 9 |   
10 |   def methodName(method: IndexMethod.IndexMethod): String = method match {
11 |     case RTREE => "RTree"
12 |     case _ => throw new IllegalArgumentException(s"unknown index method: $method")
13 |   }
14 | }
15 | 
16 | import dbis.piglet.op.IndexMethod.IndexMethod
17 | 
18 | case class IndexOp(
19 |   private val out: Pipe,
20 |   private val in: Pipe,
21 |   field: Ref,
22 |   method: IndexMethod,
23 |   params: Seq[String]
24 | ) extends PigOperator(out, in) {
25 | 
26 | 
27 |   override def constructSchema = {
28 |     val inSchema = inputs.head.producer.schema
29 | 
30 |     val inputType = inSchema match {
31 |       case Some(s) => s.element.valueType
32 |       case None => TupleType(Array(Field("", Types.ByteArrayType)))
33 |     }
34 | 
35 |     val keyField = field match {
36 |       case nf:NamedField =>
37 |         Field(nf.name, RefExpr(field).resultType(inSchema), nf.lineage)
38 |       case _ =>
39 |         Field("", RefExpr(field).resultType(inSchema))
40 |     }
41 | 
42 |     val nested = Field(in.name, inputType)
43 |     val fields = Array(keyField, nested)
44 | 
45 |     val iBag = BagType(IndexType(TupleType(fields), IndexMethod.methodName(method)))
46 | 
47 |     schema = Some(Schema(iBag))
48 |     schema
49 |   }
50 | 
51 | 
52 |   override def lineageString = 
53 |     s"""INDEX%$method%$field%${params.mkString}"""+super.lineageString
54 | 
55 |   override def toString =
56 |     s"""INDEX
57 |        |  out = $outPipeName
58 |        |  in = $inPipeName
59 |        |  field = $field
60 |        |  index method = $method
61 |        |  params = ${params.mkString(",")}
62 |      """.stripMargin
63 |   
64 | }


--------------------------------------------------------------------------------
/sparklib/src/main/scala/dbis/piglet/backends/spark/FileStreamReader.scala:
--------------------------------------------------------------------------------
 1 | 
 2 | package dbis.piglet.backends.spark
 3 | import org.apache.spark.storage.StorageLevel
 4 | import org.apache.spark.streaming.receiver.Receiver
 5 | import scala.io.Source
 6 | import java.io.{ FileNotFoundException, IOException }
 7 | import org.apache.spark.streaming.scheduler._
 8 | import org.apache.spark.streaming.StreamingContext
 9 | 
10 | class FileStreamReader(file: String, @transient val ssc: StreamingContext) extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) {
11 | 
12 |   def onStart() {
13 |     // Start the thread that reads data from a file
14 |     new Thread("FileStreamReader") {
15 |       override def run() { receive() }
16 |     }.start()
17 |   }
18 | 
19 |   def onStop() {
20 |     // There is nothing to do here
21 |   }
22 | 
23 |   /** Create a reader to read  data from the file till EOF */
24 |   private def receive() {
25 |     try {
26 |       for (line <- Source.fromFile(file).getLines()) {
27 |         store(line)
28 |         //Thread sleep 1000 // for testing
29 |       }
30 |       //stop("stopped ...") // stop receiver
31 |       //ssc.stop()
32 |       //SparkStream.ssc.stop(true, true) // stop streaming context gracefully
33 |     } catch {
34 |       case ex: FileNotFoundException => println(s"Could not find $file file.")
35 |       case ex: IOException           => println(s"Had an IOException during reading $file file")
36 |     } finally {
37 |       stop("Stopped Receiver")
38 |       ssc.stop(true, true)
39 |       SparkStream.ssc.stop(true, true)
40 |       //sys.exit()
41 |       
42 |       
43 |     }
44 |   }
45 | }
46 | class FileReader(ssc: StreamingContext) {
47 |   def readFile(file: String) = ssc.receiverStream(new FileStreamReader(file, ssc))
48 | }
49 | object FileStreamReader {
50 |   implicit def customFileStreamReader(ssc: StreamingContext) =
51 |     new FileReader(ssc)
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/codegen/spark/DelayEmitter.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.codegen.spark
 2 | 
 3 | import dbis.piglet.codegen.{CodeEmitter, CodeGenContext}
 4 | import dbis.piglet.op.Delay
 5 | 
 6 | class DelayEmitter extends CodeEmitter[Delay] {
 7 |   override def template: String =
 8 |     """val <out> = <in>.mapPartitions({ iter =>
 9 |       |  Thread.sleep(<wtime>)
10 |       |  <processor>
11 |       |},true)""".stripMargin
12 | 
13 | 
14 |   lazy val processorFilterTemplate = s"""iter.filter{ t =>
15 |     |
16 |     |  val decision = scala.util.Random.nextInt(<sampleFactor>) == 0
17 |     |  <if (profiling)>
18 |     |  if(decision)
19 |     |    PerfMonitor.sampleSize(t,"<lineage>", accum, randFactor)
20 |     |  <endif>
21 |     |  decision
22 |     |}""".stripMargin
23 | 
24 |   lazy val processorDuplTemplate = s"""iter.flatMap{ t =>
25 |     |  (0 until <sampleFactor>).iterator.map{_ =>
26 |     |    <if (profiling)>
27 |     |    PerfMonitor.sampleSize(t,"<lineage>", accum, randFactor)
28 |     |    <endif>
29 |     |    t
30 |     |  }
31 |     |}""".stripMargin
32 | 
33 | 
34 |   override def code(ctx: CodeGenContext, op: Delay): String = {
35 | 
36 |     val processorParams = Map(
37 |       "sampleFactor" -> math.abs(op.sampleFactor), // always use positive value
38 |       "lineage" -> op.lineageSignature
39 |     )
40 | 
41 |     // if sampleFactor is negative, use a filter to reduce tuples, otherwise duplicate them
42 |     val processorCode = if(op.sampleFactor < 0 )
43 |       CodeEmitter.render(processorFilterTemplate, processorParams)
44 |     else CodeEmitter.render(processorDuplTemplate, processorParams)
45 | 
46 |     val m = Map("out" -> op.outPipeName,
47 |       "in" -> op.inPipeName,
48 |       "wtime" -> op.wtime.toMillis,
49 |       "processor" -> processorCode
50 |       )
51 | 
52 |     render(m)
53 |   }
54 | }
55 | 
56 | object DelayEmitter {
57 |   lazy val instance = new DelayEmitter
58 | }


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/api/PigletInterpreterAPI.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.api
 2 | 
 3 | import dbis.piglet.plan.rewriting.Rewriter._
 4 | import dbis.piglet.plan.DataflowPlan
 5 | import dbis.piglet.parser.PigParser
 6 | import dbis.piglet.backends.BackendManager
 7 | import dbis.piglet.tools.Conf
 8 | import dbis.piglet.codegen.CodeGenerator
 9 | import dbis.piglet.tools.logging.PigletLogging
10 | import dbis.piglet.schema.Schema
11 | 
12 | object PigletInterpreterAPI extends PigletLogging {
13 | 
14 |   /**
15 |    * Create Scala code for the given backend from the source string.
16 |    * This method is provided mainly for Zeppelin.
17 |    *
18 |    * @param source the Piglet script
19 |    * @param backend the backend used to compile and execute
20 |    * @return the generated Scala code
21 |    */
22 |   def createCodeFromInput(source: String, backend: String): String = {
23 | 	  import scala.collection.JavaConverters._
24 | 
25 | 	  Schema.init()
26 |     var plan = new DataflowPlan(PigParser.parseScript(source))
27 | 
28 |     if (!plan.checkConnectivity) {
29 |       logger.error(s"dataflow plan not connected")
30 |       return ""
31 |     }
32 | 
33 |     logger.debug(s"successfully created dataflow plan")
34 |     plan = rewritePlan(plan)
35 | 
36 |     // compile it into Scala code for Spark
37 |     val generatorClass = Conf.backendGenerator(backend)
38 |     val extension = Conf.backendExtension(backend)
39 |     val backendConf = BackendManager.init(backend)
40 | //      BackendManager.backend = backendConf
41 |     val templateFile = backendConf.templateFile
42 |     val args = Array(templateFile).asInstanceOf[Array[AnyRef]]
43 |     val compiler = Class.forName(generatorClass).getConstructors()(0).newInstance(args: _*).asInstanceOf[CodeGenerator]
44 | 
45 |     // 5. generate the Scala code
46 |     val code = compiler.generate("blubs", plan, profiling = None, forREPL = true)
47 |     logger.debug("successfully generated scala program")
48 |     code
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/Delay.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.op
18 | 
19 | import scala.concurrent.duration.FiniteDuration
20 | 
21 | /**
22 |  * Delay represents the DELAY operator of Pig.
23 |  *
24 |  * @param out the output pipe (relation).
25 |  * @param in the input pipe.
26 |  * @param sampleFactor the percentage of input tuples that is passed to the output pipe
27 |  * @param wtime the time for delaying the processing
28 |  *
29 |  */
30 | case class Delay(
31 |                   private val out: Pipe,
32 |                   private val in: Pipe,
33 |                   sampleFactor: Int,
34 |                   wtime: FiniteDuration
35 |   ) extends PigOperator(out, in) {
36 | 
37 |   private val r = 0 //System.currentTimeMillis()
38 | 
39 |   override def lineageString: String = {
40 |     s"""DELAY%$sampleFactor%$wtime%$r%""" + super.lineageString
41 |   }
42 | 
43 |   override def toString =
44 |     s"""DELAY
45 |         |  out = $outPipeName
46 |         |  in = $inPipeName
47 |         |  sample factor = $sampleFactor
48 |         |  waiting time = ${wtime._1} - ${wtime._2}""".stripMargin
49 | 
50 | 
51 | 
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/scala/dbis/piglet/op/SplitInto.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | package dbis.piglet.op
18 | 
19 | import dbis.piglet.expr.Predicate
20 | 
21 | case class SplitBranch(output: Pipe, expr: Predicate) {
22 |   protected[op] def lineageSignature = s"""SPLITBRANCH($expr)"""
23 | 
24 |   override def toString = s"${output.name} $expr"
25 | }
26 | 
27 | /**
28 |  * SplitInto represents the SPLIT INTO operator of Pig.
29 |  *
30 |  * @param splits a list of split branches (output pipe + condition)
31 |  */
32 | case class SplitInto(private val in: Pipe, splits: List[SplitBranch]) extends PigOperator(splits.map(s => s.output), List(in)) {
33 | 
34 |   // override def initialOutPipeNames: List[String] = splits.map{ branch => branch.output.name }
35 | 
36 |    override def lineageString: String = {
37 |     s"""SPLIT%${splits.map(_.lineageSignature).mkString("%")}%""" + super.lineageString
38 |   }
39 | 
40 |   override def toString =
41 |     s"""SPLITINTO
42 |        |  out = ${outPipeNames.mkString(",")}
43 |        |  in = $inPipeName
44 |        |  inSchema = $inputSchema
45 |        |  splits = ${splits.mkString(",")}
46 |      """.stripMargin
47 | }
48 | 


--------------------------------------------------------------------------------
/ceplib/src/main/scala/dbis/piglet/cep/engines/CEPEngine.scala:
--------------------------------------------------------------------------------
 1 | package dbis.piglet.cep.engines
 2 | 
 3 | import dbis.piglet.backends.{SchemaClass => Event}
 4 | import scala.reflect.ClassTag
 5 | import scala.collection.mutable.ListBuffer
 6 | import dbis.piglet.cep.nfa.NFAStructure
 7 | import dbis.piglet.cep.nfa.NFAController
 8 | import dbis.piglet.cep.nfa.NormalState
 9 | import scala.collection.mutable.Map
10 | import dbis.piglet.cep.ops.MatchCollector
11 | 
12 | abstract class CEPEngine[T <: Event: ClassTag](nfaController: NFAController[T], collector: MatchCollector[T]) extends Serializable {
13 |    val structureID = { var sid: Long = 0; () => { sid += 1; sid } }
14 |    var runningStructursPool: Map[Long, NFAStructure[T]] = Map()
15 |    var wantToDeletedStructurs: ListBuffer[Long] = new ListBuffer()
16 |   def createNewStructue(event: T): Unit = {
17 |     val start = nfaController.getStartState
18 |     start.edges.foreach { e =>
19 |       if (e.evaluate(event)) {
20 |         val newStr = new NFAStructure[T](nfaController)
21 |         newStr.addEvent(event, e)
22 |         runningStructursPool += (structureID() -> newStr)
23 |       }
24 |     }
25 |   }
26 |   def runGCStructures(): Unit = {
27 |     if(runningStructursPool.size > 0) {
28 |       runningStructursPool --=  wantToDeletedStructurs
29 |     //wantToDeletedStructurs.foreach { x =>  runningStructursPool -= x  }
30 |       wantToDeletedStructurs.clear()
31 |     }
32 |   }
33 | 
34 |   def checkPredicate(event: T, currenStr: NFAStructure[T]): Int = {
35 |     var result: Int = -1
36 |     if (currenStr.getCurrentState.isInstanceOf[NormalState[T]]) {
37 |       val currentState = currenStr.getCurrentState.asInstanceOf[NormalState[T]]
38 |       currentState.edges.zipWithIndex.foreach {
39 |         case (e, i) =>
40 |           if (e.evaluate(event, currenStr)) {
41 |             result = i
42 |           }
43 |       }
44 |     }
45 |     result
46 |   }
47 |   def runEngine(event: T): Unit
48 |   //def printNumMatches(): Unit
49 | }


--------------------------------------------------------------------------------