├── .gitignore
├── README
├── pom.xml
└── src
├── main
└── java
│ ├── com
│ └── tdunning
│ │ └── plume
│ │ ├── CombinerFn.java
│ │ ├── DoFn.java
│ │ ├── EmitFn.java
│ │ ├── Ordering.java
│ │ ├── PCollection.java
│ │ ├── PTable.java
│ │ ├── Pair.java
│ │ ├── Plume.java
│ │ ├── Tuple2.java
│ │ ├── Tuple3.java
│ │ ├── avro
│ │ ├── AvroFile.java
│ │ └── AvroTypes.java
│ │ ├── local
│ │ ├── eager
│ │ │ ├── LocalCollection.java
│ │ │ ├── LocalPlume.java
│ │ │ └── LocalTable.java
│ │ ├── lazy
│ │ │ ├── ExecutionStep.java
│ │ │ ├── LazyCollection.java
│ │ │ ├── LazyPlume.java
│ │ │ ├── LazyTable.java
│ │ │ ├── LocalExecutor.java
│ │ │ ├── MSCR.java
│ │ │ ├── MSCRCombiner.java
│ │ │ ├── MSCRMapper.java
│ │ │ ├── MSCRReducer.java
│ │ │ ├── MapRedExecutor.java
│ │ │ ├── Optimizer.java
│ │ │ ├── OptimizerTools.java
│ │ │ ├── PlumeWorkflow.java
│ │ │ └── op
│ │ │ │ ├── CombineValues.java
│ │ │ │ ├── DeferredOp.java
│ │ │ │ ├── Flatten.java
│ │ │ │ ├── GroupByKey.java
│ │ │ │ ├── MultipleParallelDo.java
│ │ │ │ ├── OneToOneOp.java
│ │ │ │ └── ParallelDo.java
│ │ └── mapReduce
│ │ │ ├── Collector.java
│ │ │ ├── MapReduce.java
│ │ │ ├── MapReduceBuilder.java
│ │ │ ├── Mapper.java
│ │ │ ├── Reducer.java
│ │ │ └── package.html
│ │ ├── package.html
│ │ └── types
│ │ ├── BooleanType.java
│ │ ├── BytesType.java
│ │ ├── DoubleType.java
│ │ ├── FloatType.java
│ │ ├── IntegerType.java
│ │ ├── LongType.java
│ │ ├── PCollectionType.java
│ │ ├── PTableType.java
│ │ ├── PType.java
│ │ ├── PairType.java
│ │ ├── RecordType.java
│ │ └── StringType.java
│ └── org
│ └── apache
│ └── hadoop
│ └── mapreduce
│ └── lib
│ └── input
│ └── FileInputSplitWrapper.java
└── test
├── java
└── com
│ └── tdunning
│ └── plume
│ ├── FlattenTest.java
│ ├── WordCountTest.java
│ ├── avro
│ └── WriteAvroFile.java
│ └── local
│ ├── LogParseTest.java
│ ├── lazy
│ ├── BaseTestClass.java
│ ├── BasicOptimizerTest.java
│ ├── LocalExecutorTest.java
│ ├── MapRedBypassTest.java
│ ├── MapRedFlattenTest.java
│ ├── MapRedMultipleGroupsTest.java
│ ├── MapRedOnlyFlattensTest.java
│ ├── MapRedSequenceFileTest.java
│ ├── MapRedSingleFlattenChannelTest.java
│ ├── MapRedTwoSequentialGBKTest.java
│ ├── MapRedWordCountTest.java
│ ├── TestOptimizer.java
│ └── TestOptimizerTools.java
│ └── mapReduce
│ └── MapReduceTest.java
└── resources
├── event2users.txt
├── eventslog.txt
├── log.txt
├── simple-text.avro
└── simple-text.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | *.iml
3 | target/
4 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | Plume is a (so far) serial, eager approximate clone of FlumeJava. The intent is to experiment with the design of the API both to understand the design decisions the Google team made and to see if there are good alternatives.
2 |
3 | The ultimate goal is to provide something comparable to FlumeJava on top of Hadoop, but with a much more flexible execution model so that it is easy and efficient to code small problems using Plume as well as large ones. My theory is that small problems often grow into large ones and it is really nice to not have to re-implement everything as scaling happens.
4 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.tdunning
8 | plume
9 | 1.0
10 |
11 |
12 |
16 |
17 | Apache Snapshots
18 | http://repository.apache.org/snapshots/
19 |
20 | true
21 |
22 |
23 |
24 | JBoss
25 | http://repository.jboss.org/nexus/content/groups/public/
26 |
27 | true
28 |
29 |
30 |
31 | Apache Snapshots-group
32 | https://repository.apache.org/content/repositories/snapshots-group/
33 |
34 | true
35 |
36 |
37 | true
38 |
39 |
40 |
41 |
42 |
43 |
44 | junit
45 | junit
46 | 4.13.1
47 |
48 |
49 | com.google.guava
50 | guava
51 | r03
52 |
53 |
54 | org.apache.avro
55 | avro
56 | 1.4.0-SNAPSHOT
57 |
58 |
59 | org.apache.hadoop
60 | hadoop-common
61 | 0.21.0-SNAPSHOT
62 |
63 |
64 | org.apache.hadoop
65 | hadoop-mapred
66 | 0.21.0-SNAPSHOT
67 |
68 |
69 | org.apache.hadoop
70 | hadoop-hdfs
71 | 0.21.0-SNAPSHOT
72 |
73 |
74 | org.slf4j
75 | slf4j-log4j12
76 | 1.6.1
77 |
78 |
79 |
80 |
81 |
82 |
83 | org.apache.maven.plugins
84 | maven-compiler-plugin
85 |
86 | 1.6
87 | 1.6
88 |
89 |
90 |
91 |
92 |
93 |
94 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/CombinerFn.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume;
19 |
20 | /**
21 | * Describes the interface for an associative aggregation function that can be applied one or more
22 | * times. Since this function is associative, it can be used as a combiner in addition to being
23 | * part of reducer.
24 | */
25 | public abstract class CombinerFn {
26 | public abstract T combine(Iterable stuff);
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/DoFn.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume;
19 |
20 | /**
21 | * Describes the key functional object that processes an input record and
22 | * outputs zero or more objects by means of an emitter object.
23 | */
24 | public abstract class DoFn {
25 | public abstract void process(In v, EmitFn emitter);
26 |
27 | @Override
28 | public String toString() {
29 | String f = super.toString();
30 | return "DoFn " + f.substring(f.indexOf("@") + 1, f.length());
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/EmitFn.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume;
19 |
20 | /**
21 | * Describes the interface for an object used to emit results from a DoFn.
22 | */
23 | public abstract class EmitFn {
24 | public abstract void emit(Out v);
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/Ordering.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume;
19 |
20 | /**
21 | * Placeholder interface for ordering collection results. We should investigate what
22 | * guava provides with respect to ordering predicates before deciding whether to just
23 | * use java Comparator's.
24 | */
25 | public abstract class Ordering {
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/PCollection.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume;
19 |
20 | import com.tdunning.plume.types.PCollectionType;
21 | import com.tdunning.plume.types.PTableType;
22 |
23 | /**
24 | * Parallel collection.
25 | */
26 | public interface PCollection extends Iterable {
27 | public PCollection map(DoFn fn, PCollectionType t);
28 | public PTable map(DoFn> fn, PTableType t);
29 |
30 | // derived operations
31 |
32 | public PTable count();
33 | }
34 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/PTable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume;
19 |
20 | import com.tdunning.plume.types.PCollectionType;
21 | import com.tdunning.plume.types.PTableType;
22 |
23 | /**
24 | * Parallel table that supports map and reduce operations.
25 | */
26 | public interface PTable extends PCollection>, Iterable> {
27 |
28 | /**
29 | * Performs a function on each element of a parallel table returning a collection of values.
30 | *
31 | * @param fn The function to perform.
32 | * @return A parallel collection whose content is the result of applying fn to each element of
33 | * this.
34 | */
35 | public PCollection map(DoFn, R> fn, PCollectionType type);
36 |
37 | /**
38 | * Performs an operation on each element of a collection returning a transformed table.
39 | *
40 | * @param fn The function to perform on key/value pairs.
41 | * @return A parallel table containing the transformed data.
42 | */
43 | public PTable map(DoFn, Pair> fn, PTableType type);
44 |
45 | /**
46 | * Groups the elements of a table by key returning a new table with the same keys, but all values
47 | * for the same key grouped together.
48 | *
49 | * @return The grouped table.
50 | */
51 | public PTable> groupByKey();
52 |
53 | /**
54 | * Groups the elements of a table by key returning a new table with the same keys, but all values
55 | * for the same key grouped together and in the order specified by the ordering.
56 | *
57 | * @param order Determines the ordering of the values for each key
58 | * @return
59 | */
60 | public PTable> groupByKey(Ordering order);
61 |
62 | // TODO how can we state that V is Iterable for this one method?
63 |
64 | /**
65 | * Applies (possibly recursively) an associative function to elements of lists contained in a
66 | * table.
67 | *
68 | * @param fn The combination function transformation.
69 | * @return A table containing the combined values.
70 | */
71 | public PTable combine(CombinerFn fn);
72 |
73 | // derived operations
74 |
75 | public PTable, Iterable>> join(PTable other);
76 | }
77 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/Pair.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume;
19 |
20 | /**
21 | * A pair of values, generally used as a key and value as the input or output of
22 | * a DoFn in the case where the input or output respectively are a PTable.
23 | *
24 | * IF cons'ing lots of Pairs becomes a problem, we may need to make this class
25 | * final to allow better compiler optimizations.
26 | */
27 | public class Pair {
28 | private K key;
29 | private V value;
30 |
31 | public Pair(K key, V value) {
32 | this.key = key;
33 | this.value = value;
34 | }
35 |
36 | public static Pair create(V1 x1, V2 x2) {
37 | return new Pair(x1, x2);
38 | }
39 |
40 | public K getKey() {
41 | return key;
42 | }
43 |
44 | public V getValue() {
45 | return value;
46 | }
47 |
48 | @Override
49 | public String toString() {
50 | return "Pair{" +
51 | "key=" + key +
52 | ", value=" + value +
53 | '}';
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/Plume.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume;
19 |
20 | import java.io.IOException;
21 |
22 | import com.tdunning.plume.types.*;
23 |
24 | /**
25 | * A plume provides the runtime support for writing data-parallel programs. Each Plume implementation
26 | * defines a mode of execution. For instance, the local.eager.LocalPlume provides instant execution
27 | * without execution planning or any parallel implementation.
28 | */
29 | public abstract class Plume {
30 | // general collection operations
31 | public abstract PCollection readTextFile(String name) throws IOException;
32 | public abstract PCollection readResourceFile(String name) throws IOException;
33 | public abstract PCollection readAvroFile(String name, PType type);
34 | public abstract PCollection fromJava(Iterable source);
35 | public abstract PCollection flatten(PCollection... args);
36 |
37 | public abstract void writeAvroFile(String name, PCollection data, PType type) throws IOException;
38 |
39 | public static StringType strings() { return new StringType(); }
40 | public static IntegerType integers() { return new IntegerType(); }
41 | public static LongType longs() { return new LongType(); }
42 | public static FloatType floats() { return new FloatType(); }
43 | public static DoubleType doubles() { return new DoubleType(); }
44 | public static BytesType bytes() { return new BytesType(); }
45 | public static BooleanType booleans() { return new BooleanType(); }
46 |
47 | public static PTableType tableOf(PType keyType, PType valueType) {
48 | return new PTableType(keyType, valueType);
49 | }
50 |
51 | public static PCollectionType collectionOf(PType elementType) {
52 | return new PCollectionType(elementType);
53 | }
54 |
55 | public static RecordType recordsOf(Class recordClass) {
56 | return new RecordType(recordClass);
57 | }
58 |
59 | }
60 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/Tuple2.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume;
19 |
20 | /**
21 | * Container for two objects used during joins. It is an open question whether a Pair should just
22 | * be a Tuple2 (or vice versa).
23 | */
24 | public class Tuple2 {
25 | private V0 v0;
26 | private V1 v1;
27 |
28 | private Tuple2(V0 v0, V1 v1) {
29 | this.v0 = v0;
30 | this.v1 = v1;
31 | }
32 |
33 | public static Tuple2 create(V0 v0, V1 v1) {
34 | return new Tuple2(v0, v1);
35 | }
36 |
37 | public V0 get0() {
38 | return v0;
39 | }
40 |
41 | public V1 get1() {
42 | return v1;
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/Tuple3.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume;
19 |
20 | /**
21 | * A triple of three objects.
22 | */
23 | public class Tuple3 {
24 | public Tuple3(V1 v1, V2 v2, V3 v3) {
25 | //To change body of created methods use File | Settings | File Templates.
26 | }
27 |
28 | public static Tuple3 create(V1 v1, V2 v2, V3 v3) {
29 | return new Tuple3(v1, v2, v3);
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/avro/AvroFile.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume.avro;
19 |
20 | import java.io.*;
21 | import java.util.Iterator;
22 | import java.util.Map;
23 |
24 | import com.tdunning.plume.types.PCollectionType;
25 | import com.tdunning.plume.types.PTableType;
26 | import org.apache.avro.Schema;
27 | import org.apache.avro.specific.SpecificDatumReader;
28 | import org.apache.avro.file.DataFileStream;
29 |
30 | import com.google.common.collect.Maps;
31 |
32 | import com.tdunning.plume.*;
33 | import com.tdunning.plume.types.PType;
34 | import com.tdunning.plume.local.eager.*;
35 |
36 | /** A PCollection for an Avro file. */
37 | public class AvroFile implements PCollection {
38 | private File file;
39 | private Schema schema;
40 |
41 | public AvroFile(String name, PType type) {
42 | this.file = new File(name);
43 | this.schema = AvroTypes.getSchema(type);
44 | }
45 |
46 | @Override
47 | public PCollection map(DoFn fn, PCollectionType type) {
48 | final LocalCollection r = new LocalCollection();
49 |
50 | for (T t : this) {
51 | fn.process(t, new EmitFn() {
52 | @Override
53 | public void emit(R y) {
54 | r.getData().add(y);
55 | }
56 | });
57 | }
58 | return r;
59 | }
60 |
61 | @Override
62 | public PTable map(DoFn> fn, PTableType type) {
63 | final LocalTable r = new LocalTable();
64 | for (final T t : this) {
65 | fn.process(t, new EmitFn>() {
66 | @Override
67 | public void emit(Pair value) {
68 | r.getData().add(value);
69 | }
70 | });
71 | }
72 | return r;
73 | }
74 |
75 | @Override
76 | public PTable count() {
77 | Map x = Maps.newHashMap();
78 | for (T t : this) {
79 | Integer v = x.get(t);
80 | if (v == null) {
81 | x.put(t, 1);
82 | } else {
83 | x.put(t, v + 1);
84 | }
85 | }
86 | LocalTable r = new LocalTable();
87 | for (T t : x.keySet()) {
88 | r.getData().add(new Pair(t, x.get(t)));
89 | }
90 | return r;
91 | }
92 |
93 | @Override
94 | public Iterator iterator() {
95 | try {
96 | final DataFileStream data =
97 | new DataFileStream
98 | (new BufferedInputStream(new FileInputStream(file)),
99 | new SpecificDatumReader(schema));
100 | // wrapper that closes the file when iteration is complete
101 | return new Iterator() {
102 | public boolean hasNext() {
103 | boolean value = data.hasNext();
104 | if (!value) {
105 | close();
106 | }
107 | return value;
108 | }
109 | public T next() { return data.next(); }
110 | public void remove() { throw new UnsupportedOperationException(); }
111 | protected void finalize() { close(); }
112 | private void close() {
113 | try {
114 | data.close();
115 | } catch (IOException e){
116 | throw new RuntimeException(e);
117 | }
118 | }
119 | };
120 | } catch (IOException e) {
121 | throw new RuntimeException(e);
122 | }
123 | }
124 |
125 | }
126 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/avro/AvroTypes.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume.avro;
19 |
20 | import com.tdunning.plume.types.*;
21 | import com.tdunning.plume.types.PType.Kind;
22 |
23 | import org.apache.avro.Schema;
24 | import org.apache.avro.Schema.Type;
25 | import org.apache.avro.mapred.Pair;
26 |
27 | /**
28 | * Translate between Plume types to Avro types.
29 | */
30 | public class AvroTypes {
31 | // no public ctor
32 | private AvroTypes() {
33 | }
34 |
35 | /**
36 | * Convert Plume types to Avro types.
37 | */
38 | public static Schema getSchema(PType type) {
39 | switch (type.kind()) {
40 | case BOOLEAN:
41 | return Schema.create(Type.BOOLEAN);
42 | case BYTES:
43 | return Schema.create(Type.BYTES);
44 | case DOUBLE:
45 | return Schema.create(Type.DOUBLE);
46 | case FLOAT:
47 | return Schema.create(Type.FLOAT);
48 | case INTEGER:
49 | return Schema.create(Type.INT);
50 | case LONG:
51 | return Schema.create(Type.LONG);
52 | case PAIR:
53 | PairType pairType = (PairType) type;
54 | return Pair.getPairSchema(
55 | getSchema(pairType.keyType()),
56 | getSchema(pairType.valueType()));
57 | case COLLECTION:
58 | PType elementType = ((PCollectionType) type).elementType();
59 | return Schema.createArray(getSchema(elementType));
60 | case TABLE:
61 | PType keyType = ((PTableType) type).keyType();
62 | PType valueType = ((PTableType) type).valueType();
63 | // PTable is an Avro map
64 | if (keyType.kind() == Kind.STRING) {
65 | return Schema.createMap(getSchema(new PairType(keyType, valueType)));
66 | }
67 | return Schema.createArray(getSchema(keyType));
68 | case RECORD:
69 | return ((RecordType) type).schema();
70 | case STRING:
71 | return Schema.create(Type.STRING);
72 | default:
73 | throw new RuntimeException("Unknown type: " + type);
74 | }
75 | }
76 |
77 | /**
78 | * Convert Avro types to Plume types.
79 | */
80 | public static PType getPType(Schema schema) {
81 | // TODO FIXME
82 | throw new RuntimeException("Not yet implemented.");
83 | }
84 |
85 | }
86 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/local/eager/LocalCollection.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume.local.eager;
19 |
20 | import com.google.common.collect.Lists;
21 | import com.google.common.collect.Maps;
22 | import com.tdunning.plume.*;
23 | import com.tdunning.plume.types.PCollectionType;
24 | import com.tdunning.plume.types.PTableType;
25 |
26 | import java.util.Iterator;
27 | import java.util.List;
28 | import java.util.Map;
29 |
30 | /**
31 | * Completely local version of a PCollection.
32 | */
33 | public class LocalCollection implements PCollection {
34 | private List data = Lists.newArrayList();
35 |
36 | @Override
37 | public PCollection map(DoFn fn, PCollectionType type) {
38 | final LocalCollection r = new LocalCollection();
39 | for (T t : data) {
40 | fn.process(t, new EmitFn() {
41 | @Override
42 | public void emit(R y) {
43 | r.data.add(y);
44 | }
45 | });
46 | }
47 | return r;
48 | }
49 |
50 | @Override
51 | public PTable map(DoFn> fn, PTableType type) {
52 | final LocalTable r = new LocalTable();
53 | for (final T t : data) {
54 | fn.process(t, new EmitFn>() {
55 | @Override
56 | public void emit(Pair value) {
57 | r.getData().add(value);
58 | }
59 | });
60 | }
61 | return r;
62 | }
63 |
64 | @Override
65 | public PTable count() {
66 | Map x = Maps.newHashMap();
67 | for (T t : data) {
68 | Integer v = x.get(t);
69 | if (v == null) {
70 | x.put(t, 1);
71 | } else {
72 | x.put(t, v + 1);
73 | }
74 | }
75 | LocalTable r = new LocalTable();
76 | for (T t : x.keySet()) {
77 | r.getData().add(new Pair(t, x.get(t)));
78 | }
79 | return r;
80 | }
81 |
82 | public static LocalCollection wrap(Iterable data) {
83 | return new LocalCollection().addAll(data);
84 | }
85 |
86 | public LocalCollection addAll(Iterable data) {
87 | for (T t : data) {
88 | this.data.add(t);
89 | }
90 | return this;
91 | }
92 |
93 | public List getData() {
94 | return data;
95 | }
96 |
97 | /**
98 | * Returns an iterator over a set of elements of type T.
99 | *
100 | * @return an Iterator.
101 | */
102 | public Iterator iterator() {
103 | return data.iterator();
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/local/eager/LocalPlume.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume.local.eager;
19 |
20 | import com.google.common.base.Charsets;
21 | import com.google.common.io.Files;
22 | import com.google.common.io.Resources;
23 | import com.tdunning.plume.*;
24 | import com.tdunning.plume.types.*;
25 | import com.tdunning.plume.avro.*;
26 |
27 | import org.apache.avro.Schema;
28 | import org.apache.avro.file.DataFileReader;
29 | import org.apache.avro.file.DataFileWriter;
30 | import org.apache.avro.specific.SpecificDatumReader;
31 | import org.apache.avro.specific.SpecificDatumWriter;
32 |
33 | import java.io.File;
34 | import java.io.IOException;
35 |
36 | /**
37 | * Local plume runtime. All files are local, all tasks are run in threads. Currently threads == thread
38 | */
39 | public class LocalPlume extends Plume {
40 | @Override
41 | public LocalCollection readTextFile(String name) throws IOException {
42 | return LocalCollection.wrap(Files.readLines(new File(name), Charsets.UTF_8));
43 | }
44 |
45 | @Override
46 | public PCollection readResourceFile(String name) throws IOException {
47 | return LocalCollection.wrap(Resources.readLines(Resources.getResource(name), Charsets.UTF_8));
48 | }
49 |
50 | @Override
51 | public PCollection readAvroFile(String name, PType type) {
52 | return new AvroFile(name, type);
53 | }
54 |
55 | @Override
56 | public LocalCollection fromJava(Iterable data) {
57 | return new LocalCollection().addAll(data);
58 | }
59 |
60 | @Override
61 | public PCollection flatten(PCollection... args) {
62 | LocalCollection r = new LocalCollection();
63 | for (PCollection arg : args) {
64 | if (arg instanceof LocalCollection) {
65 | r.addAll(((LocalCollection) arg).getData());
66 | } else {
67 | throw new UnsupportedOperationException("Can't flatten a " + arg.getClass() + " onto local collections");
68 | }
69 | }
70 | return r;
71 | }
72 |
73 | @Override
74 | public void writeAvroFile(String name, PCollection data, PType type) throws IOException {
75 | Schema schema = AvroTypes.getSchema(type);
76 | DataFileWriter factory = new DataFileWriter(new SpecificDatumWriter(schema));
77 | DataFileWriter out = factory.create(schema, new File(name));
78 | for (T t : data) {
79 | out.append(t);
80 | }
81 | out.close();
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/local/eager/LocalTable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume.local.eager;
19 |
20 | import com.google.common.collect.Lists;
21 | import com.google.common.collect.Maps;
22 |
23 | import com.tdunning.plume.types.PCollectionType;
24 | import com.tdunning.plume.types.PTableType;
25 | import com.tdunning.plume.CombinerFn;
26 | import com.tdunning.plume.DoFn;
27 | import com.tdunning.plume.EmitFn;
28 | import com.tdunning.plume.Ordering;
29 | import com.tdunning.plume.PCollection;
30 | import com.tdunning.plume.PTable;
31 | import com.tdunning.plume.Pair;
32 | import com.tdunning.plume.Tuple2;
33 |
34 | import java.util.Iterator;
35 | import java.util.List;
36 | import java.util.Map;
37 |
38 | /**
39 | * Completely local eager version of a PTable.
40 | */
41 | public class LocalTable extends LocalCollection> implements PTable {
42 |
43 | private List> data = Lists.newArrayList();
44 |
45 | /**
46 | * Performs a function on each element of a parallel table returning a collection of values.
47 | *
48 | * @param fn The function to perform.
49 | * @return A parallel collection whose content is the result of applying fn to each element of
50 | * this.
51 | */
52 | @Override
53 | public PCollection map(DoFn, R> fn, PCollectionType type) {
54 | final LocalCollection r = new LocalCollection();
55 | for (Pair v : data) {
56 | fn.process(v, new EmitFn() {
57 | @Override
58 | public void emit(R y) {
59 | r.getData().add(y);
60 | }
61 | });
62 | }
63 | return r;
64 | }
65 |
66 |
67 | /**
68 | * Performs an operation on each element of a collection returning a transformed table.
69 | *
70 | * @param fn The function to perform on key/value pairs.
71 | * @return A parallel table containing the transformed data.
72 | */
73 | @Override
74 | public PTable map(DoFn, Pair> fn, PTableType type) {
75 | final LocalTable r = new LocalTable();
76 | for (Pair v : data) {
77 | fn.process(v, new EmitFn>() {
78 | @Override
79 | public void emit(Pair value) {
80 | r.getData().add(value);
81 | }
82 |
83 | });
84 | }
85 | return r;
86 | }
87 |
88 | /**
89 | * Groups the elements of a table by key returning a new table with the same keys, but all values
90 | * for the same key grouped together.
91 | *
92 | * @return The grouped table.
93 | */
94 | @Override
95 | public PTable> groupByKey() {
96 | // can't use a guava multimap here because identical key,value pairs would be suppressed.
97 | Map> r = Maps.newHashMap();
98 | for (Pair v : data) {
99 | List values = r.get(v.getKey());
100 | if (values == null) {
101 | values = Lists.newArrayList();
102 | r.put(v.getKey(), values);
103 | }
104 | values.add(v.getValue());
105 | }
106 | return LocalTable.wrap(r);
107 | }
108 |
109 | private static PTable> wrap(Map> data) {
110 | LocalTable> r = new LocalTable>();
111 | List>> list = r.getData();
112 | for (K k : data.keySet()) {
113 | list.add(Pair.>create(k, data.get(k)));
114 | }
115 | return r;
116 | }
117 |
118 | /**
119 | * Groups the elements of a table by key returning a new table with the same keys, but all values
120 | * for the same key grouped together and in the order specified by the ordering.
121 | *
122 | * @return A table of keys and groups.
123 | */
124 | @Override
125 | public PTable> groupByKey(Ordering order) {
126 | throw new UnsupportedOperationException("Not implemented yet ... help by making this work");
127 | }
128 |
129 | /**
130 | * Applies (possibly recursively) an associative function to elements of lists contained in a
131 | * table.
132 | *
133 | * @return A table containing the combined values.
134 | */
135 | @Override
136 | public PTable combine(CombinerFn combiner) {
137 | final LocalTable r = new LocalTable();
138 | for (final Pair x : data) {
139 | @SuppressWarnings({"unchecked"}) Iterable v = (Iterable) x.getValue();
140 | r.getData().add(Pair.create(x.getKey(), combiner.combine(v)));
141 | }
142 | return r;
143 | }
144 |
145 | @Override
146 | public PTable, Iterable>> join(PTable other) {
147 | Map> m0 = Maps.newHashMap();
148 | for (Pair kvPair : data) {
149 | List v = m0.get(kvPair.getKey());
150 | if (v == null) {
151 | v = Lists.newArrayList();
152 | m0.put(kvPair.getKey(), v);
153 | }
154 | v.add(kvPair.getValue());
155 | }
156 | Map> m1 = Maps.newHashMap();
157 | for (Pair kvPair : ((LocalTable) other).getData()) {
158 | List v = m1.get(kvPair.getKey());
159 | if (v == null) {
160 | v = Lists.newArrayList();
161 | m1.put(kvPair.getKey(), v);
162 | }
163 | v.add(kvPair.getValue());
164 | }
165 |
166 | LocalTable, Iterable>> z = new LocalTable, Iterable>>();
167 | for (K k : m0.keySet()) {
168 | Iterable v0 = m0.get(k);
169 | Iterable v1 = m1.get(k);
170 | if (v1 == null) {
171 | v1 = Lists.newArrayList();
172 | } else {
173 | m1.remove(k);
174 | }
175 | z.getData().add(Pair.create(k, Tuple2.create(v0, v1)));
176 | }
177 |
178 | for (K k : m1.keySet()) {
179 | List v1 = m1.get(k);
180 | List v0 = m0.get(k);
181 | if (v0 == null) {
182 | v0 = Lists.newArrayList();
183 | }
184 | z.getData().add(Pair.create(k, Tuple2., Iterable>create(v0, v1)));
185 | }
186 | return z;
187 | }
188 |
189 |
190 | public List> getData() {
191 | return data;
192 | }
193 |
194 | /**
195 | * Returns an iterator over a set of elements of type T.
196 | *
197 | * @return an Iterator.
198 | */
199 | public Iterator> iterator() {
200 | return data.iterator();
201 | }
202 | }
203 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/local/lazy/ExecutionStep.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume.local.lazy;
19 |
20 | import java.util.HashSet;
21 | import java.util.Set;
22 |
23 | /**
24 | * Defines an execution plan recursively from a first ExecutionStep.
25 | * All the MSCRs in an ExecutionStep can be executed concurrently.
26 | */
27 | public class ExecutionStep {
28 |
29 | Set mscrSteps = new HashSet();
30 | ExecutionStep nextStep;
31 |
32 | public Set getMscrSteps() {
33 | return mscrSteps;
34 | }
35 |
36 | public ExecutionStep getNextStep() {
37 | return nextStep;
38 | }
39 |
40 | @Override
41 | public String toString() {
42 | return mscrSteps+(nextStep != null ? " \n"+nextStep : "");
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/local/lazy/LazyCollection.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume.local.lazy;
19 |
20 | import java.util.ArrayList;
21 | import java.util.Iterator;
22 | import java.util.List;
23 |
24 | import com.google.common.collect.Lists;
25 | import com.tdunning.plume.DoFn;
26 | import com.tdunning.plume.PCollection;
27 | import com.tdunning.plume.PTable;
28 | import com.tdunning.plume.Pair;
29 | import com.tdunning.plume.local.lazy.op.DeferredOp;
30 | import com.tdunning.plume.local.lazy.op.ParallelDo;
31 | import com.tdunning.plume.types.PCollectionType;
32 | import com.tdunning.plume.types.PTableType;
33 |
34 | /**
35 | * A LazyCollection that can be either materialized or unmaterialized.
36 | * Unmaterialized collections have a reference to the {@link DeferredOp} that creates them.
37 | */
38 | public class LazyCollection implements PCollection {
39 |
40 | PCollectionType type;
41 |
42 | boolean materialized = false;
43 | private List data;
44 | private String file; // points to a file in local filesystem, if collection is materialized like that TODO to be better defined
45 |
46 | DeferredOp deferredOp;
47 |
48 | List downOps;
49 |
50 | String plumeId = "";
51 |
52 | public String getPlumeId() {
53 | return plumeId;
54 | }
55 |
56 | public void setPlumeId(String plumeId) {
57 | this.plumeId = plumeId;
58 | }
59 |
60 | /**
61 | * Build a PCollection with materialized state
62 | *
63 | * @param data Concrete data from which to build the PCollection.
64 | */
65 | public LazyCollection(Iterable data, PCollectionType type) {
66 | this.data = Lists.newArrayList(data);
67 | this.type = type;
68 | materialized = true;
69 | }
70 |
71 | public List getData() {
72 | return data;
73 | }
74 |
75 | /**
76 | * Unmaterialized PCollection constructor
77 | */
78 | LazyCollection() {
79 | }
80 |
81 | protected void addDownOp(DeferredOp op) {
82 | if(downOps == null) {
83 | downOps = new ArrayList();
84 | }
85 | downOps.add(op);
86 | }
87 |
88 | @Override
89 | public Iterator iterator() {
90 | if(materialized) {
91 | return data.iterator();
92 | } else {
93 | throw new UnsupportedOperationException("Can't iterate over unmaterialized PCollection");
94 | }
95 | }
96 |
97 | /**
98 | * Creates a new LazyCollection from a deferred operation
99 | * which maps a PCollection to another PCollection
100 | */
101 | @Override
102 | public PCollection map(DoFn fn, PCollectionType type) {
103 | LazyCollection dest = new LazyCollection();
104 | ParallelDo op = new ParallelDo(fn, this, dest);
105 | dest.deferredOp = op;
106 | dest.type = type;
107 | addDownOp(op);
108 | return dest;
109 | }
110 |
111 | /**
112 | * Creates a new LazyTable from a deferred operation
113 | * which maps a PCollection to a PTable
114 | */
115 | public PTable map(DoFn> fn, PTableType type) {
116 | LazyTable dest = new LazyTable();
117 | ParallelDo> op = new ParallelDo>(fn, this, dest);
118 | dest.deferredOp = op;
119 | dest.type = type;
120 | addDownOp(op);
121 | return dest;
122 | }
123 |
124 | public PCollectionType getType() {
125 | return type;
126 | }
127 |
128 | public DeferredOp getDeferredOp() {
129 | return deferredOp;
130 | }
131 |
132 | public void setDeferredOp(DeferredOp deferredOp) {
133 | this.deferredOp = deferredOp;
134 | }
135 |
136 | public boolean isMaterialized() {
137 | return materialized;
138 | }
139 |
140 | public List getDownOps() {
141 | return downOps;
142 | }
143 |
144 | /**
145 | * TODO
146 | */
147 | @Override
148 | public PTable count() {
149 | throw new UnsupportedOperationException("Net yet implemented");
150 | }
151 |
152 | public String getFile() {
153 | return file;
154 | }
155 |
156 | public void setFile(String file) {
157 | this.file = file;
158 | }
159 |
160 | @Override
161 | public String toString() {
162 | String n = super.toString();
163 | n = n.substring(n.indexOf("@") + 1, n.length());
164 | return (getPlumeId() != "" ? getPlumeId() : n);
165 | }
166 | }
167 |
--------------------------------------------------------------------------------
/src/main/java/com/tdunning/plume/local/lazy/LazyPlume.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.tdunning.plume.local.lazy;
19 |
20 | import java.io.IOException;
21 |
22 | import com.google.common.base.Charsets;
23 | import com.google.common.collect.Lists;
24 | import com.google.common.io.Resources;
25 | import com.tdunning.plume.PCollection;
26 | import com.tdunning.plume.PTable;
27 | import com.tdunning.plume.Pair;
28 | import com.tdunning.plume.Plume;
29 | import com.tdunning.plume.avro.AvroFile;
30 | import com.tdunning.plume.local.lazy.op.Flatten;
31 | import com.tdunning.plume.types.PCollectionType;
32 | import com.tdunning.plume.types.PTableType;
33 | import com.tdunning.plume.types.PType;
34 | import com.tdunning.plume.types.StringType;
35 |
36 | /**
37 | * Runtime for Plume implementing deferred execution and optimization.
38 | */
39 | public class LazyPlume extends Plume {
40 |
41 | /**
42 | * Just points to a file, doesn't read it
43 | *
44 | * @param
45 | * @param name
46 | * @return
47 | * @throws IOException
48 | */
49 | public PCollection readFile(String name, PCollectionType type) throws IOException {
50 | LazyCollection coll = new LazyCollection();
51 | coll.materialized = true;
52 | coll.type = type;
53 | coll.setFile(name);
54 | return coll;
55 | }
56 |
57 | public PCollection fromJava(Iterable source, PCollectionType type) {
58 | return new LazyCollection(source, type);
59 | }
60 |
61 | public