├── .gitignore ├── server └── src │ └── main │ ├── resources │ └── assets │ │ ├── exhibit.css │ │ ├── profile-attrs.html │ │ ├── profile-metrics.html │ │ ├── profile-network.html │ │ ├── profile.html │ │ ├── profile-tables.html │ │ ├── editor.html │ │ ├── navbar.html │ │ └── index.html │ └── java │ └── com │ └── cloudera │ └── exhibit │ └── server │ ├── resources │ ├── CalculationResponse.java │ ├── SaveRequest.java │ ├── ComputeRequest.java │ ├── FetchResponse.java │ ├── CalculationResource.java │ ├── ComputeResource.java │ └── FetchResource.java │ ├── json │ ├── ExhibitIdSerializer.java │ ├── ExhibitIdDeserializer.java │ └── FrameSerializer.java │ ├── checks │ └── ExhibitStoresCheck.java │ ├── calcs │ ├── Calculation.java │ └── CalculationStore.java │ └── main │ ├── ExhibitConfiguration.java │ └── ExhibitStoreConfig.java ├── octave ├── src │ └── main │ │ └── java │ │ └── com │ │ └── cloudera │ │ └── exhibit │ │ └── octave │ │ ├── OctaveUnsupportedTypeException.java │ │ ├── OctaveScriptFormatException.java │ │ └── OctaveCalculator.java ├── README.md └── pom.xml ├── hive └── src │ └── main │ ├── resources │ └── udfs.hql │ └── java │ └── com │ └── cloudera │ └── exhibit │ └── hive │ ├── HiveObsDescriptor.java │ ├── JavascriptUDF.java │ ├── HiveObs.java │ ├── CollectDistinctUDAF.java │ ├── HiveAttributes.java │ ├── HivePrimitiveObsDescriptor.java │ ├── CollectAllUDAF.java │ ├── HiveVector.java │ ├── HiveFrame.java │ ├── HiveStructObsDescriptor.java │ ├── HiveEnumerator.java │ ├── WithinUDTF.java │ ├── WithinUDF.java │ ├── CodeUDF.java │ ├── ArrayUnionUDF.java │ └── WithinArrayUDF.java ├── README.md ├── core ├── src │ ├── main │ │ └── java │ │ │ └── com │ │ │ └── cloudera │ │ │ └── exhibit │ │ │ └── core │ │ │ ├── vector │ │ │ ├── Vector.java │ │ │ ├── GenericVector.java │ │ │ ├── LongVector.java │ │ │ ├── FloatVector.java │ │ │ ├── ShortVector.java │ │ │ ├── IntVector.java │ │ │ ├── BooleanVector.java │ │ │ ├── DoubleVector.java │ │ │ ├── VectorBuilder.java │ │ │ └── VectorUtils.java │ │ │ ├── Functor.java │ │ │ ├── Calculator.java │ │ │ ├── ExhibitStore.java │ │ │ ├── Exhibit.java │ │ │ ├── Vec.java │ │ │ ├── Frame.java │ │ │ ├── composite │ │ │ ├── UpdatableExhibitDescriptor.java │ │ │ ├── CompositeObs.java │ │ │ ├── NeighborLookup.java │ │ │ ├── UpdatableExhibit.java │ │ │ └── CompositeObsDescriptor.java │ │ │ ├── calculators │ │ │ └── LookupCalculator.java │ │ │ ├── ExhibitId.java │ │ │ ├── Column.java │ │ │ ├── simple │ │ │ ├── SimpleExhibitStore.java │ │ │ ├── SimpleFrame.java │ │ │ └── SimpleObs.java │ │ │ ├── multi │ │ │ └── MultiExhibitStore.java │ │ │ ├── Obs.java │ │ │ ├── ObsDescriptor.java │ │ │ └── ExhibitDescriptor.java │ └── test │ │ └── java │ │ └── com │ │ └── cloudera │ │ └── exhibit │ │ └── core │ │ ├── SimpleTest.java │ │ ├── CompositeTest.java │ │ └── vector │ │ └── VectorTest.java └── pom.xml ├── etl └── src │ ├── main │ └── java │ │ └── com │ │ └── cloudera │ │ └── exhibit │ │ └── etl │ │ ├── config │ │ ├── ReadableConfig.java │ │ ├── SourceConfig.java │ │ ├── OutputConfig.java │ │ ├── PivotConfig.java │ │ ├── AggConfig.java │ │ └── BuildConfig.java │ │ ├── fn │ │ ├── FilterOutFn.java │ │ ├── SchemaMapFn.java │ │ └── CollectFn.java │ │ ├── tbl │ │ ├── TblType.java │ │ ├── Tbl.java │ │ └── TblCache.java │ │ ├── SchemaProvider.java │ │ └── SchemaUtil.java │ └── test │ └── java │ └── com │ └── cloudera │ └── exhibit │ └── etl │ ├── ConfigHelperTest.java │ ├── JSQLTest.java │ └── tbl │ ├── RatioTblTest.java │ └── SumTopTblTest.java ├── javascript ├── src │ └── main │ │ └── java │ │ └── com │ │ └── cloudera │ │ └── exhibit │ │ └── javascript │ │ ├── SandboxWrapFactory.java │ │ ├── SandboxNativeJavaObject.java │ │ ├── ExhibitContextFactory.java │ │ ├── ScriptableObs.java │ │ ├── ScriptableVec.java │ │ └── ScriptableFrame.java └── pom.xml ├── mongodb ├── src │ └── main │ │ └── java │ │ └── com │ │ └── cloudera │ │ └── exhibit │ │ └── mongodb │ │ ├── BSONObs.java │ │ ├── BSONFrame.java │ │ └── BSONObsDescriptor.java └── pom.xml ├── thrift ├── src │ └── main │ │ └── java │ │ └── com │ │ └── cloudera │ │ └── exhibit │ │ └── thrift │ │ ├── ThriftObs.java │ │ └── ThriftFrame.java └── pom.xml ├── sql ├── src │ └── main │ │ └── java │ │ └── com │ │ └── cloudera │ │ └── exhibit │ │ └── sql │ │ ├── VectorEnumerator.java │ │ ├── ModifiableSchema.java │ │ └── FrameEnumerator.java └── pom.xml ├── avro ├── src │ └── main │ │ └── java │ │ └── com │ │ └── cloudera │ │ └── exhibit │ │ └── avro │ │ ├── AvroObs.java │ │ └── AvroObsDescriptor.java └── pom.xml └── spark └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | .project 3 | .settings 4 | .cache 5 | target 6 | *.iml 7 | .idea 8 | -------------------------------------------------------------------------------- /server/src/main/resources/assets/exhibit.css: -------------------------------------------------------------------------------- 1 | body { padding-top: 70px; } 2 | 3 | #profile-tables { 4 | border: 20px solid transparent; 5 | } 6 | -------------------------------------------------------------------------------- /octave/src/main/java/com/cloudera/exhibit/octave/OctaveUnsupportedTypeException.java: -------------------------------------------------------------------------------- 1 | package com.cloudera.exhibit.octave; 2 | 3 | public class OctaveUnsupportedTypeException extends Exception { 4 | public OctaveUnsupportedTypeException(String message) { 5 | super(message); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /server/src/main/resources/assets/profile-attrs.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |

{{profileTitle}}

4 | 7 |
8 |
9 | -------------------------------------------------------------------------------- /server/src/main/resources/assets/profile-metrics.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |

Metrics

4 |
5 |
6 | 9 |
10 |
11 | -------------------------------------------------------------------------------- /hive/src/main/resources/udfs.hql: -------------------------------------------------------------------------------- 1 | 2 | CREATE TEMPORARY FUNCTION within AS 'com.cloudera.exhibit.hive.WithinUDF'; 3 | CREATE TEMPORARY FUNCTION within_table AS 'com.cloudera.exhibit.hive.WithinUDTF'; 4 | CREATE TEMPORARY FUNCTION collect_all AS 'com.cloudera.exhibit.hive.CollectAllUDAF'; 5 | CREATE TEMPORARY FUNCTION collect_distinct AS 'com.cloudera.exhibit.hive.CollectDistinctUDAF'; 6 | CREATE TEMPORARY FUNCTION array_union AS 'com.cloudera.exhibit.hive.ArrayUnionUDF'; 7 | -------------------------------------------------------------------------------- /server/src/main/resources/assets/profile-network.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
{{entity}}
4 | 9 |
10 |
11 | -------------------------------------------------------------------------------- /server/src/main/resources/assets/profile.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 | 5 |
6 |
7 | 8 |
9 |
10 | 11 |
12 |
13 |
14 |
15 | 16 |
17 |
18 |
19 | -------------------------------------------------------------------------------- /octave/README.md: -------------------------------------------------------------------------------- 1 | exhibit-octave 2 | ============== 3 | 4 | Module for Octave bindings within Exhibit. 5 | 6 | This modules requires [JavaOctave](https://kenai.com/projects/javaoctave/pages/Home) to build, and the `octave` executable to present in `$PATH` to execute. 7 | 8 | Minimal setup instructions for OSX are provided below: 9 | 10 | ```sh 11 | # Install JavaOctave 12 | $ hg clone https://hg.kenai.com/hg/javaoctave~source-code-repository javaoctave-src 13 | $ cd javaoctave-src/javaoctave 14 | $ mvn clean install 15 | 16 | # Install Octave 17 | # brew install octave 18 | ``` 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Exhibit: It's SQL All the Way Down 2 | 3 | Exhibit is an evolving collection of various projects for executing SQL against 4 | things that look like tiny database tables, including: 5 | 6 | 1. Hive arrays of structs 7 | 2. Collections of Avro and Thrift records 8 | 3. Arrays of BSON objects from MongoDB 9 | 10 | To get started, you'll want to run: 11 | 12 | mvn clean package 13 | 14 | To build all of the jars, including the `exhibit-*-jar-with-dependencies.jar` 15 | JAR that is useful for playing with Exhibit and Hive. There is more documentation 16 | in the README file for the hive module. 17 | -------------------------------------------------------------------------------- /server/src/main/resources/assets/profile-tables.html: -------------------------------------------------------------------------------- 1 |
2 | 7 |
8 |
10 | 11 |
12 |
13 |
14 |
15 | -------------------------------------------------------------------------------- /server/src/main/resources/assets/editor.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 | 5 |
6 | 7 | 8 | 9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/vector/Vector.java: -------------------------------------------------------------------------------- 1 | package com.cloudera.exhibit.core.vector; 2 | 3 | import com.cloudera.exhibit.core.FieldType; 4 | import com.cloudera.exhibit.core.Frame; 5 | import com.cloudera.exhibit.core.Obs; 6 | import com.cloudera.exhibit.core.Vec; 7 | import com.cloudera.exhibit.core.simple.SimpleFrame; 8 | import com.cloudera.exhibit.core.simple.SimpleObs; 9 | import com.cloudera.exhibit.core.simple.SimpleObsDescriptor; 10 | import com.google.common.base.Function; 11 | import com.google.common.collect.ImmutableList; 12 | import com.google.common.collect.Lists; 13 | 14 | public abstract class Vector implements Vec { 15 | private final FieldType type; 16 | 17 | protected Vector(FieldType type){ 18 | this.type = type; 19 | } 20 | 21 | @Override 22 | public FieldType getType(){ 23 | return this.type; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /octave/src/main/java/com/cloudera/exhibit/octave/OctaveScriptFormatException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.octave; 16 | 17 | public class OctaveScriptFormatException extends Exception { 18 | OctaveScriptFormatException(String ex){ 19 | super(ex); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/HiveObsDescriptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import com.cloudera.exhibit.core.ObsDescriptor; 18 | 19 | abstract class HiveObsDescriptor extends ObsDescriptor { 20 | abstract Object[] convert(Object rawObs); 21 | } 22 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/Functor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core; 16 | 17 | import com.google.common.base.Function; 18 | 19 | public interface Functor extends Function { 20 | public ExhibitDescriptor initialize(ExhibitDescriptor descriptor); 21 | public void cleanup(); 22 | } 23 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/Calculator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core; 16 | 17 | import com.google.common.base.Function; 18 | 19 | public interface Calculator extends Function> { 20 | public ObsDescriptor initialize(ExhibitDescriptor descriptor); 21 | public void cleanup(); 22 | } 23 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/ExhibitStore.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core; 16 | 17 | import com.google.common.base.Optional; 18 | 19 | import java.util.Set; 20 | 21 | public interface ExhibitStore { 22 | boolean isConnected(); 23 | Set entities(); 24 | Optional find(ExhibitId id); 25 | } 26 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/Exhibit.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core; 16 | 17 | import java.io.Serializable; 18 | import java.util.Map; 19 | 20 | public interface Exhibit extends Serializable { 21 | ExhibitDescriptor descriptor(); 22 | Obs attributes(); 23 | Map frames(); 24 | Map vectors(); 25 | } 26 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/resources/CalculationResponse.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.resources; 16 | 17 | import com.fasterxml.jackson.annotation.JsonProperty; 18 | 19 | public class CalculationResponse { 20 | @JsonProperty 21 | public String code; 22 | 23 | public CalculationResponse(String code) { 24 | this.code = code; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /server/src/main/resources/assets/navbar.html: -------------------------------------------------------------------------------- 1 | 22 | -------------------------------------------------------------------------------- /etl/src/main/java/com/cloudera/exhibit/etl/config/ReadableConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl.config; 16 | 17 | /** 18 | * Config info for in-memory frames that will be available for every exhibit during proceessing. 19 | */ 20 | public class ReadableConfig { 21 | // The name of this in-memory frame. 22 | public String name; 23 | 24 | // The Kite URI to read the frame from. 25 | public String uri; 26 | } 27 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/resources/SaveRequest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.resources; 16 | 17 | import com.fasterxml.jackson.annotation.JsonProperty; 18 | 19 | public class SaveRequest { 20 | @JsonProperty 21 | String code; 22 | 23 | public SaveRequest() { 24 | this.code = null; 25 | } 26 | 27 | public SaveRequest(String code) { 28 | this.code = code; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/vector/GenericVector.java: -------------------------------------------------------------------------------- 1 | package com.cloudera.exhibit.core.vector; 2 | 3 | import com.cloudera.exhibit.core.FieldType; 4 | import com.cloudera.exhibit.core.Frame; 5 | import com.cloudera.exhibit.core.ObsDescriptor; 6 | import com.cloudera.exhibit.core.simple.SimpleObsDescriptor; 7 | 8 | import java.util.Collections; 9 | import java.util.Iterator; 10 | import java.util.List; 11 | 12 | public class GenericVector extends Vector { 13 | private List values; 14 | 15 | public GenericVector(FieldType fieldType){ 16 | this(fieldType, Collections.emptyList()); 17 | } 18 | 19 | public GenericVector(FieldType fieldType, List values){ 20 | super(fieldType); 21 | this.values = values; 22 | } 23 | 24 | @Override 25 | public Object get(int index) { 26 | return values.get(index); 27 | } 28 | 29 | @Override 30 | public int size() { 31 | return values.size(); 32 | } 33 | 34 | @Override 35 | public Iterator iterator() { 36 | return values.iterator(); 37 | } 38 | } 39 | 40 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/JavascriptUDF.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import com.google.common.base.Joiner; 18 | 19 | public class JavascriptUDF extends CodeUDF { 20 | protected JavascriptUDF() { 21 | super("javascript"); 22 | } 23 | 24 | @Override 25 | public String getDisplayString(String[] args) { 26 | assert (args.length > 1); 27 | return "js(" + Joiner.on(',').join(args) + ")"; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /core/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | com.cloudera.exhibit 7 | exhibit-parent 8 | 0.8.0 9 | 10 | 11 | exhibit-core 12 | Exhibit Core 13 | 14 | 15 | 16 | com.google.guava 17 | guava 18 | 19 | 20 | 21 | junit 22 | junit 23 | test 24 | 25 | 26 | 27 | 28 | 29 | 30 | org.apache.maven.plugins 31 | maven-compiler-plugin 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /javascript/src/main/java/com/cloudera/exhibit/javascript/SandboxWrapFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.javascript; 16 | 17 | import org.mozilla.javascript.Context; 18 | import org.mozilla.javascript.Scriptable; 19 | import org.mozilla.javascript.WrapFactory; 20 | 21 | class SandboxWrapFactory extends WrapFactory { 22 | @Override 23 | public Scriptable wrapAsJavaObject(Context cx, Scriptable scope, Object javaObject, Class staticType) { 24 | return new SandboxNativeJavaObject(scope, javaObject, staticType); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/Vec.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.cloudera.exhibit.core; 19 | 20 | import java.io.Serializable; 21 | 22 | public interface Vec extends Iterable, Serializable { 23 | 24 | FieldType getType(); 25 | 26 | Object get(int index); 27 | 28 | int size(); 29 | } 30 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/Frame.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core; 16 | 17 | import java.io.Serializable; 18 | 19 | public abstract class Frame implements Iterable, Serializable { 20 | public abstract ObsDescriptor descriptor(); 21 | public abstract int size(); 22 | public abstract Obs get(int rowIndex); 23 | 24 | public Column $(int columnIndex) { 25 | return Column.create(this, columnIndex); 26 | } 27 | 28 | public Column $(String columnName) { 29 | return Column.create(this, columnName); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /etl/src/test/java/com/cloudera/exhibit/etl/ConfigHelperTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl; 16 | 17 | import com.cloudera.exhibit.etl.config.ComputeConfig; 18 | import com.cloudera.exhibit.etl.config.ConfigHelper; 19 | import org.junit.Test; 20 | 21 | public class ConfigHelperTest { 22 | 23 | private String testCfg = null; 24 | 25 | @Test 26 | public void testHelper() throws Exception { 27 | //TODO: replace w/resource file 28 | if (testCfg != null) { 29 | ComputeConfig cc = ConfigHelper.parseComputeConfig(testCfg); 30 | System.out.println("Success"); 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/resources/ComputeRequest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.resources; 16 | 17 | import com.cloudera.exhibit.core.ExhibitId; 18 | import com.fasterxml.jackson.annotation.JsonProperty; 19 | 20 | public class ComputeRequest { 21 | 22 | @JsonProperty 23 | public ExhibitId id; 24 | 25 | @JsonProperty 26 | public String code; 27 | 28 | public ComputeRequest() { 29 | this.id = null; 30 | this.code = null; 31 | } 32 | 33 | public ComputeRequest(ExhibitId id, String code) { 34 | this.id = id; 35 | this.code = code; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /javascript/src/main/java/com/cloudera/exhibit/javascript/SandboxNativeJavaObject.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.javascript; 16 | 17 | import org.mozilla.javascript.NativeJavaObject; 18 | import org.mozilla.javascript.Scriptable; 19 | 20 | class SandboxNativeJavaObject extends NativeJavaObject { 21 | public SandboxNativeJavaObject(Scriptable scope, Object javaObject, Class staticType) { 22 | super(scope, javaObject, staticType); 23 | } 24 | 25 | @Override 26 | public Object get(String name, Scriptable start) { 27 | if (name.equals("getClass")) { 28 | return NOT_FOUND; 29 | } 30 | return super.get(name, start); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/HiveObs.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import com.cloudera.exhibit.core.ObsDescriptor; 18 | import com.cloudera.exhibit.core.Obs; 19 | 20 | class HiveObs extends Obs { 21 | 22 | private final ObsDescriptor descriptor; 23 | private final Object[] values; 24 | 25 | public HiveObs(HiveObsDescriptor descriptor, Object listElement) { 26 | this.descriptor = descriptor; 27 | this.values = descriptor.convert(listElement); 28 | } 29 | 30 | @Override 31 | public ObsDescriptor descriptor() { 32 | return descriptor; 33 | } 34 | 35 | @Override 36 | public Object get(int index) { 37 | return values[index]; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /javascript/src/main/java/com/cloudera/exhibit/javascript/ExhibitContextFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.javascript; 16 | 17 | import org.mozilla.javascript.Context; 18 | import org.mozilla.javascript.ContextFactory; 19 | 20 | class ExhibitContextFactory extends ContextFactory { 21 | @Override 22 | protected boolean hasFeature(Context cx, int featureIndex) { 23 | if (featureIndex == Context.FEATURE_DYNAMIC_SCOPE) { 24 | return true; 25 | } 26 | return super.hasFeature(cx, featureIndex); 27 | } 28 | 29 | @Override 30 | protected Context makeContext() { 31 | Context cx = super.makeContext(); 32 | cx.setWrapFactory(new SandboxWrapFactory()); 33 | return cx; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /mongodb/src/main/java/com/cloudera/exhibit/mongodb/BSONObs.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.mongodb; 16 | 17 | import com.cloudera.exhibit.core.ObsDescriptor; 18 | import com.cloudera.exhibit.core.Obs; 19 | import org.bson.BSONObject; 20 | 21 | public class BSONObs extends Obs { 22 | 23 | private BSONObsDescriptor descriptor; 24 | private BSONObject bson; 25 | 26 | public BSONObs(BSONObsDescriptor descriptor, BSONObject bson) { 27 | this.descriptor = descriptor; 28 | this.bson = bson; 29 | } 30 | 31 | @Override 32 | public ObsDescriptor descriptor() { 33 | return descriptor; 34 | } 35 | 36 | @Override 37 | public Object get(int index) { 38 | return bson.get(descriptor.getBSONColumn(index)); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/resources/FetchResponse.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.resources; 16 | 17 | import com.cloudera.exhibit.core.Exhibit; 18 | import com.cloudera.exhibit.core.ExhibitId; 19 | import com.fasterxml.jackson.annotation.JsonProperty; 20 | 21 | import java.util.Map; 22 | 23 | public class FetchResponse { 24 | 25 | @JsonProperty 26 | public ExhibitId id; 27 | 28 | @JsonProperty 29 | public Exhibit exhibit; 30 | 31 | @JsonProperty 32 | public Map> metrics; 33 | 34 | public FetchResponse(ExhibitId id, Exhibit exhibit, Map> metrics) { 35 | this.id = id; 36 | this.exhibit = exhibit; 37 | this.metrics = metrics; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /thrift/src/main/java/com/cloudera/exhibit/thrift/ThriftObs.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.thrift; 16 | 17 | import com.cloudera.exhibit.core.ObsDescriptor; 18 | import com.cloudera.exhibit.core.Obs; 19 | import org.apache.thrift.TBase; 20 | 21 | public class ThriftObs extends Obs { 22 | 23 | private final ThriftObsDescriptor descriptor; 24 | private final TBase tBase; 25 | 26 | public ThriftObs(ThriftObsDescriptor descriptor, TBase tBase) { 27 | this.descriptor = descriptor; 28 | this.tBase = tBase; 29 | } 30 | 31 | @Override 32 | public ObsDescriptor descriptor() { 33 | return descriptor; 34 | } 35 | 36 | @Override 37 | public Object get(int index) { 38 | return descriptor.getFieldValue(index, tBase); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/json/ExhibitIdSerializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.json; 16 | 17 | import com.cloudera.exhibit.core.ExhibitId; 18 | import com.fasterxml.jackson.core.JsonGenerator; 19 | import com.fasterxml.jackson.databind.JsonSerializer; 20 | import com.fasterxml.jackson.databind.SerializerProvider; 21 | 22 | import java.io.IOException; 23 | 24 | public class ExhibitIdSerializer extends JsonSerializer { 25 | @Override 26 | public void serialize(ExhibitId exhibitId, JsonGenerator gen, SerializerProvider provider) throws IOException { 27 | gen.writeStartObject(); 28 | gen.writeStringField("entity", exhibitId.getEntity()); 29 | gen.writeStringField("id", exhibitId.getId()); 30 | gen.writeEndObject(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /core/src/test/java/com/cloudera/exhibit/core/SimpleTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core; 16 | 17 | import com.cloudera.exhibit.core.simple.SimpleObsDescriptor; 18 | import org.junit.Test; 19 | 20 | import static org.junit.Assert.assertEquals; 21 | 22 | public class SimpleTest { 23 | 24 | public static final ObsDescriptor ATTR_DESC = SimpleObsDescriptor.builder() 25 | .booleanField("a") 26 | .stringField("b") 27 | .intField("c") 28 | .build(); 29 | 30 | @Test 31 | public void testSimpleObsDescriptor() throws Exception{ 32 | assertEquals(3, ATTR_DESC.size()); 33 | assertEquals(1, ATTR_DESC.indexOf("b")); 34 | assertEquals(-1, ATTR_DESC.indexOf("q")); 35 | assertEquals(new ObsDescriptor.Field("c", FieldType.INTEGER), ATTR_DESC.get(2)); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/checks/ExhibitStoresCheck.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.checks; 16 | 17 | import com.cloudera.exhibit.core.ExhibitStore; 18 | import com.codahale.metrics.health.HealthCheck; 19 | 20 | public class ExhibitStoresCheck extends HealthCheck { 21 | 22 | private final ExhibitStore stores; 23 | 24 | public ExhibitStoresCheck(ExhibitStore stores) { 25 | this.stores = stores; 26 | } 27 | 28 | @Override 29 | protected Result check() throws Exception { 30 | try { 31 | if (stores.isConnected()) { 32 | return Result.healthy(); 33 | } else { 34 | return Result.unhealthy("Disconnected exhibit stores"); 35 | } 36 | } catch (Throwable t) { 37 | return Result.unhealthy(t); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/vector/LongVector.java: -------------------------------------------------------------------------------- 1 | package com.cloudera.exhibit.core.vector; 2 | 3 | import com.cloudera.exhibit.core.FieldType; 4 | import com.google.common.primitives.Longs; 5 | 6 | import java.util.Collections; 7 | import java.util.Iterator; 8 | import java.util.List; 9 | 10 | public class LongVector extends Vector { 11 | 12 | private long [] values; 13 | private int size; 14 | 15 | protected LongVector(){ 16 | this(Collections.emptyList()); 17 | } 18 | 19 | // Construct which avoids the copy of data 20 | public LongVector(final long[] arr) { 21 | super(FieldType.LONG); 22 | size = arr.length; 23 | values = arr; 24 | } 25 | 26 | protected LongVector(List values) { 27 | super(FieldType.LONG); 28 | this.size = values.size(); 29 | this.values = new long[this.size]; 30 | int idx = 0; 31 | for(Object o: values) { 32 | if(!(o instanceof Long)){ 33 | throw new IllegalArgumentException("Received non-long value" + o.toString() ); 34 | } 35 | this.values[idx] = (Long)o; 36 | idx++; 37 | } 38 | } 39 | 40 | @Override 41 | public Long get(int index) { 42 | return values[index]; 43 | } 44 | 45 | @Override 46 | public int size() { 47 | return size; 48 | } 49 | 50 | @Override 51 | public Iterator iterator() { 52 | return ((List)Longs.asList(values)).iterator(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/vector/FloatVector.java: -------------------------------------------------------------------------------- 1 | package com.cloudera.exhibit.core.vector; 2 | 3 | import com.cloudera.exhibit.core.FieldType; 4 | import com.google.common.primitives.Floats; 5 | 6 | import java.util.Collections; 7 | import java.util.Iterator; 8 | import java.util.List; 9 | 10 | public class FloatVector extends Vector { 11 | 12 | private float [] values; 13 | private int size; 14 | 15 | protected FloatVector(){ 16 | this(Collections.emptyList()); 17 | } 18 | 19 | // Construct which avoids the copy of data 20 | public FloatVector(final float[] arr) { 21 | super(FieldType.FLOAT); 22 | size = arr.length; 23 | values = arr; 24 | } 25 | 26 | protected FloatVector(List values) { 27 | super(FieldType.FLOAT); 28 | this.size = values.size(); 29 | this.values = new float[this.size]; 30 | int idx = 0; 31 | for(Object o: values) { 32 | if(!(o instanceof Float)){ 33 | throw new IllegalArgumentException("Received non-float value" + o.toString() ); 34 | } 35 | this.values[idx] = (Float)o; 36 | idx++; 37 | } 38 | } 39 | 40 | @Override 41 | public Float get(int index) { 42 | return values[index]; 43 | } 44 | 45 | @Override 46 | public int size() { 47 | return size; 48 | } 49 | 50 | @Override 51 | public Iterator iterator() { 52 | return ((List)Floats.asList(values)).iterator(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /etl/src/main/java/com/cloudera/exhibit/etl/fn/FilterOutFn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl.fn; 16 | 17 | import org.apache.avro.generic.GenericData; 18 | import org.apache.avro.generic.GenericRecord; 19 | import org.apache.crunch.DoFn; 20 | import org.apache.crunch.Emitter; 21 | import org.apache.crunch.Pair; 22 | 23 | public class FilterOutFn extends DoFn, GenericData.Record> { 24 | 25 | private final int outputIndex; 26 | 27 | public FilterOutFn(int outputIndex) { 28 | this.outputIndex = outputIndex; 29 | } 30 | 31 | @Override 32 | public void process(Pair input, Emitter emitter) { 33 | if (outputIndex == input.first()) { 34 | emitter.emit((GenericData.Record) input.second().get("value")); 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/vector/ShortVector.java: -------------------------------------------------------------------------------- 1 | package com.cloudera.exhibit.core.vector; 2 | 3 | import com.cloudera.exhibit.core.FieldType; 4 | import com.google.common.primitives.Shorts; 5 | 6 | import java.util.Collections; 7 | import java.util.Iterator; 8 | import java.util.List; 9 | 10 | public class ShortVector extends Vector { 11 | 12 | private short [] values; 13 | private int size; 14 | 15 | protected ShortVector(){ 16 | this(Collections.emptyList()); 17 | } 18 | 19 | // Construct which avoids the copy of data 20 | public ShortVector(final short[] arr) { 21 | super(FieldType.SHORT); 22 | size = arr.length; 23 | values = arr; 24 | } 25 | 26 | protected ShortVector(List values) { 27 | super(FieldType.SHORT); 28 | this.size = values.size(); 29 | this.values = new short[this.size]; 30 | int idx = 0; 31 | for(Object o: values) { 32 | if(!(o instanceof Short)){ 33 | throw new IllegalArgumentException("Received non-short value" + o.toString() + " class: " + o.getClass().getCanonicalName()); 34 | } 35 | this.values[idx] = (Short)o; 36 | idx++; 37 | } 38 | } 39 | 40 | @Override 41 | public Short get(int index) { 42 | return values[index]; 43 | } 44 | 45 | @Override 46 | public int size() { 47 | return size; 48 | } 49 | 50 | @Override 51 | public Iterator iterator() { 52 | return ((List)Shorts.asList(values)).iterator(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/composite/UpdatableExhibitDescriptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core.composite; 16 | 17 | import com.cloudera.exhibit.core.ExhibitDescriptor; 18 | import com.cloudera.exhibit.core.FieldType; 19 | import com.cloudera.exhibit.core.ObsDescriptor; 20 | import com.google.common.collect.Maps; 21 | 22 | public class UpdatableExhibitDescriptor extends ExhibitDescriptor { 23 | public UpdatableExhibitDescriptor(ExhibitDescriptor base) { 24 | super(base.attributes(), Maps.newHashMap(base.frames()), Maps.newHashMap(base.vectors())); 25 | } 26 | 27 | public UpdatableExhibitDescriptor add(String name, ObsDescriptor od) { 28 | frames().put(name, od); 29 | return this; 30 | } 31 | 32 | public UpdatableExhibitDescriptor add(String name, FieldType vd) { 33 | vectors().put(name, vd); 34 | return this; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/json/ExhibitIdDeserializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.json; 16 | 17 | import com.cloudera.exhibit.core.ExhibitId; 18 | import com.fasterxml.jackson.core.JsonParser; 19 | import com.fasterxml.jackson.core.ObjectCodec; 20 | import com.fasterxml.jackson.databind.DeserializationContext; 21 | import com.fasterxml.jackson.databind.JsonDeserializer; 22 | import com.fasterxml.jackson.databind.JsonNode; 23 | 24 | import java.io.IOException; 25 | 26 | public class ExhibitIdDeserializer extends JsonDeserializer { 27 | @Override 28 | public ExhibitId deserialize(JsonParser parser, DeserializationContext ctxt) throws IOException { 29 | ObjectCodec oc = parser.getCodec(); 30 | JsonNode node = oc.readTree(parser); 31 | return ExhibitId.create(node.get("entity").asText(), node.get("id").asText()); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/vector/IntVector.java: -------------------------------------------------------------------------------- 1 | package com.cloudera.exhibit.core.vector; 2 | 3 | import com.cloudera.exhibit.core.FieldType; 4 | import com.google.common.primitives.Ints; 5 | 6 | import java.util.Collections; 7 | import java.util.Iterator; 8 | import java.util.List; 9 | 10 | public class IntVector extends Vector { 11 | 12 | private int [] values; 13 | private int size; 14 | 15 | protected IntVector(){ 16 | this(Collections.emptyList()); 17 | } 18 | 19 | // Construct which avoids the copy of data 20 | public IntVector(final int[] arr) { 21 | super(FieldType.INTEGER); 22 | size = arr.length; 23 | values = arr; 24 | } 25 | 26 | protected IntVector(List values) { 27 | super(FieldType.INTEGER); 28 | this.size = values.size(); 29 | this.values = new int[this.size]; 30 | int idx = 0; 31 | for(Object o: values) { 32 | if(!(o instanceof Integer)){ 33 | throw new IllegalArgumentException("Received non-int value" + o.toString() ); 34 | } 35 | this.values[idx] = (Integer)o; 36 | idx++; 37 | } 38 | } 39 | 40 | public int[] getData() { 41 | return values; 42 | } 43 | 44 | @Override 45 | public Integer get(int index) { 46 | return values[index]; 47 | } 48 | 49 | @Override 50 | public int size() { 51 | return size; 52 | } 53 | 54 | @Override 55 | public Iterator iterator() { 56 | return ((List)Ints.asList(values)).iterator(); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/calculators/LookupCalculator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core.calculators; 16 | 17 | import com.cloudera.exhibit.core.Calculator; 18 | import com.cloudera.exhibit.core.Exhibit; 19 | import com.cloudera.exhibit.core.ExhibitDescriptor; 20 | import com.cloudera.exhibit.core.Obs; 21 | import com.cloudera.exhibit.core.ObsDescriptor; 22 | 23 | public class LookupCalculator implements Calculator { 24 | 25 | private final String frame; 26 | 27 | public LookupCalculator(String frame) { 28 | this.frame = frame; 29 | } 30 | 31 | @Override 32 | public ObsDescriptor initialize(ExhibitDescriptor descriptor) { 33 | return descriptor.frames().get(frame); 34 | } 35 | 36 | @Override 37 | public void cleanup() { 38 | } 39 | 40 | @Override 41 | public Iterable apply(Exhibit exhibit) { 42 | return exhibit.frames().get(frame); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/vector/BooleanVector.java: -------------------------------------------------------------------------------- 1 | package com.cloudera.exhibit.core.vector; 2 | 3 | import com.cloudera.exhibit.core.FieldType; 4 | import com.google.common.primitives.Booleans; 5 | 6 | import java.util.Collections; 7 | import java.util.Iterator; 8 | import java.util.List; 9 | 10 | public class BooleanVector extends Vector { 11 | 12 | private boolean [] values; 13 | private int size; 14 | 15 | protected BooleanVector(){ 16 | this(Collections.emptyList()); 17 | } 18 | 19 | // Construct which avoids the copy of data 20 | public BooleanVector(final boolean[] arr) { 21 | super(FieldType.BOOLEAN); 22 | size = arr.length; 23 | values = arr; 24 | } 25 | 26 | protected BooleanVector(List values) { 27 | super(FieldType.BOOLEAN); 28 | this.size = values.size(); 29 | this.values = new boolean[this.size]; 30 | int idx = 0; 31 | for(Object o: values) { 32 | if(!(o instanceof Boolean)){ 33 | throw new IllegalArgumentException("Received non-boolean value" + o.toString() ); 34 | } 35 | this.values[idx] = (Boolean)o; 36 | idx++; 37 | } 38 | } 39 | 40 | public boolean[] getData() { 41 | return values; 42 | } 43 | 44 | @Override 45 | public Boolean get(int index) { 46 | return values[index]; 47 | } 48 | 49 | @Override 50 | public int size() { 51 | return size; 52 | } 53 | 54 | @Override 55 | public Iterator iterator() { 56 | return ((List)Booleans.asList(values)).iterator(); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/vector/DoubleVector.java: -------------------------------------------------------------------------------- 1 | package com.cloudera.exhibit.core.vector; 2 | 3 | import com.cloudera.exhibit.core.FieldType; 4 | import com.google.common.primitives.Doubles; 5 | 6 | import java.util.Collections; 7 | import java.util.Iterator; 8 | import java.util.List; 9 | 10 | public class DoubleVector extends Vector { 11 | 12 | private double [] values; 13 | private int size; 14 | 15 | protected DoubleVector(){ 16 | this(Collections.emptyList()); 17 | } 18 | 19 | // Construct which avoids the copy of data 20 | public DoubleVector(final double[] arr) { 21 | super(FieldType.DOUBLE); 22 | size = arr.length; 23 | values = arr; 24 | } 25 | 26 | protected DoubleVector(List values) { 27 | super(FieldType.DOUBLE); 28 | this.size = values.size(); 29 | this.values = new double[this.size]; 30 | int idx = 0; 31 | for(Object o: values) { 32 | if(!(o instanceof Number)){ 33 | throw new IllegalArgumentException("Received non-double value" + o.toString() ); 34 | } 35 | this.values[idx] = ((Number)o).doubleValue(); 36 | idx++; 37 | } 38 | } 39 | 40 | public double[] getData() { 41 | return values; 42 | } 43 | 44 | @Override 45 | public Double get(int index) { 46 | return values[index]; 47 | } 48 | 49 | @Override 50 | public int size() { 51 | return size; 52 | } 53 | 54 | @Override 55 | public Iterator iterator() { 56 | return ((List)Doubles.asList(values)).iterator(); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/composite/CompositeObs.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core.composite; 16 | 17 | import com.cloudera.exhibit.core.Obs; 18 | import com.cloudera.exhibit.core.ObsDescriptor; 19 | 20 | import java.util.List; 21 | 22 | public class CompositeObs extends Obs { 23 | 24 | private CompositeObsDescriptor descriptor; 25 | private List components; 26 | 27 | public CompositeObs(List components) { 28 | this.components = components; 29 | } 30 | 31 | public CompositeObs(CompositeObsDescriptor descriptor, List components) { 32 | this.descriptor = descriptor; 33 | this.components = components; 34 | } 35 | 36 | @Override 37 | public ObsDescriptor descriptor() { 38 | return descriptor; 39 | } 40 | 41 | @Override 42 | public Object get(int index) { 43 | int offsetIndex = descriptor.getOffsetIndex(index); 44 | int cmpIdx = index - descriptor.getOffset(offsetIndex); 45 | return components.get(offsetIndex).get(cmpIdx); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/vector/VectorBuilder.java: -------------------------------------------------------------------------------- 1 | package com.cloudera.exhibit.core.vector; 2 | 3 | import com.cloudera.exhibit.core.FieldType; 4 | 5 | import java.util.List; 6 | 7 | public class VectorBuilder { 8 | 9 | public static Vector build(FieldType type, List values) { 10 | switch(type) { 11 | case LONG: 12 | case FLOAT: 13 | case DOUBLE: 14 | return new DoubleVector(values); 15 | case BOOLEAN: 16 | return new BooleanVector(values); 17 | case SHORT: 18 | case INTEGER: 19 | return new IntVector(values); 20 | case STRING: 21 | case DATE: 22 | case TIME: 23 | case TIMESTAMP: 24 | case DECIMAL: 25 | return new GenericVector(type, values); 26 | } 27 | throw new IllegalArgumentException("Unsupported FieldType: " + type); 28 | } 29 | 30 | public static Vector doubles(List values) { 31 | return new DoubleVector(values); 32 | } 33 | 34 | public static Vector bools(List values) { 35 | return new BooleanVector(values); 36 | } 37 | 38 | public static Vector shorts(List values) { 39 | return new IntVector(values); 40 | // return new ShortVector(values); 41 | } 42 | 43 | public static Vector ints(List values) { 44 | return new IntVector(values); 45 | } 46 | 47 | public static Vector longs(List values) { 48 | return new DoubleVector(values); 49 | // return new LongVector(values); 50 | } 51 | 52 | public static Vector floats(List values) { 53 | return new DoubleVector(values); 54 | // return new FloatVector(values); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/calcs/Calculation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.calcs; 16 | 17 | import com.cloudera.exhibit.core.Calculator; 18 | import com.cloudera.exhibit.core.Exhibit; 19 | import com.cloudera.exhibit.core.Frame; 20 | import com.cloudera.exhibit.core.Obs; 21 | import com.cloudera.exhibit.sql.SQLCalculator; 22 | 23 | public class Calculation { 24 | 25 | private int id; 26 | private String code; 27 | private Calculator calculator; 28 | private boolean initialized; 29 | 30 | 31 | public Calculation(int id, String code) { 32 | this.id = id; 33 | this.code = code; 34 | this.calculator = SQLCalculator.create(null, code); //TODO 35 | } 36 | 37 | public int getId() { 38 | return id; 39 | } 40 | 41 | public String getCode() { 42 | return code; 43 | } 44 | 45 | public Iterable apply(Exhibit e) { 46 | if (!initialized) { 47 | calculator.initialize(e.descriptor()); 48 | initialized = true; 49 | } 50 | return calculator.apply(e); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /sql/src/main/java/com/cloudera/exhibit/sql/VectorEnumerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.sql; 16 | 17 | import com.cloudera.exhibit.core.Vec; 18 | import org.apache.calcite.linq4j.Enumerator; 19 | 20 | public class VectorEnumerator implements Enumerator { 21 | 22 | private final Vec vector; 23 | private Object current; 24 | private int currentIndex; 25 | 26 | public VectorEnumerator(Vec vector) { 27 | this.vector = vector; 28 | this.current = null; 29 | this.currentIndex = -1; 30 | } 31 | 32 | @Override 33 | public Object current() { 34 | return current; 35 | } 36 | 37 | @Override 38 | public boolean moveNext() { 39 | currentIndex++; 40 | boolean hasNext = currentIndex < vector.size(); 41 | if (hasNext) { 42 | this.current = vector.get(currentIndex); 43 | } 44 | return hasNext; 45 | } 46 | 47 | @Override 48 | public void reset() { 49 | currentIndex = -1; 50 | current = null; 51 | } 52 | 53 | @Override 54 | public void close() { 55 | // No-op 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/CollectDistinctUDAF.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 18 | import org.apache.hadoop.hive.ql.parse.SemanticException; 19 | import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver; 20 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; 21 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 22 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; 23 | 24 | public class CollectDistinctUDAF extends AbstractGenericUDAFResolver { 25 | @Override 26 | public GenericUDAFEvaluator getEvaluator(TypeInfo[] typeInfo) throws SemanticException { 27 | if (typeInfo.length != 1) { 28 | throw new UDFArgumentException("Only one argument expected to collect_distinct method"); 29 | } 30 | if (typeInfo[0].getCategory() == ObjectInspector.Category.LIST) { 31 | return new AbstractCollectArrayEvaluator.Sets(); 32 | } 33 | return new AbstractCollectEvaluator.Sets(); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /sql/src/main/java/com/cloudera/exhibit/sql/ModifiableSchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.sql; 16 | 17 | import com.google.common.collect.Maps; 18 | import org.apache.calcite.schema.Table; 19 | import org.apache.calcite.schema.impl.AbstractSchema; 20 | 21 | import java.util.Map; 22 | 23 | public class ModifiableSchema extends AbstractSchema { 24 | 25 | private final Map tableMap; 26 | 27 | public ModifiableSchema() { 28 | this.tableMap = Maps.newHashMap(); 29 | } 30 | 31 | @Override 32 | public Map getTableMap() { 33 | return tableMap; 34 | } 35 | 36 | public FrameTable getFrame(String name) { 37 | return (FrameTable) tableMap.get(name); 38 | } 39 | 40 | public VectorTable getVector(String name) { 41 | return (VectorTable) tableMap.get(name); 42 | } 43 | 44 | @Override 45 | public boolean isMutable() { 46 | return true; 47 | } 48 | 49 | @Override 50 | public boolean contentsHaveChangedSince(long lastCheck, long now) { 51 | return true; // disable caching for now 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/HiveAttributes.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import com.cloudera.exhibit.core.Obs; 18 | import com.cloudera.exhibit.core.ObsDescriptor; 19 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; 20 | 21 | import java.util.Arrays; 22 | import java.util.List; 23 | 24 | class HiveAttributes extends Obs { 25 | 26 | private final ObsDescriptor desc; 27 | private final List oi; 28 | private final List values; 29 | 30 | public HiveAttributes(ObsDescriptor desc, List pois) { 31 | super(); 32 | this.desc = desc; 33 | this.oi = pois; 34 | this.values = Arrays.asList(new Object[pois.size()]); 35 | } 36 | 37 | void update(int index, Object value) { 38 | this.values.set(index, value); 39 | } 40 | 41 | @Override 42 | public ObsDescriptor descriptor() { 43 | return desc; 44 | } 45 | 46 | @Override 47 | public Object get(int index) { 48 | return oi.get(index).getPrimitiveJavaObject(values.get(index)); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /etl/src/main/java/com/cloudera/exhibit/etl/config/SourceConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl.config; 16 | 17 | import com.google.common.collect.Sets; 18 | import org.apache.avro.Schema; 19 | 20 | import java.io.Serializable; 21 | import java.util.Set; 22 | 23 | public class SourceConfig implements Serializable { 24 | public String name; 25 | 26 | String schemaJson; 27 | 28 | transient Schema schema; 29 | 30 | public String uri; 31 | 32 | public String path; 33 | 34 | public boolean embedded = false; 35 | 36 | public boolean repeated = true; 37 | 38 | public boolean nullable = true; 39 | 40 | public Set nested = Sets.newHashSet(); 41 | 42 | public Set keyFields; 43 | 44 | public Set invalidKeys = Sets.newHashSet(); 45 | 46 | public Set drop = Sets.newHashSet(); 47 | 48 | public void setSchema(Schema schema) { 49 | this.schemaJson = schema.toString(); 50 | } 51 | 52 | public Schema getSchema() { 53 | if (schema == null) { 54 | schema = (new Schema.Parser()).parse(schemaJson); 55 | } 56 | return schema; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /javascript/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | com.cloudera.exhibit 7 | exhibit-parent 8 | 0.8.0 9 | 10 | 11 | exhibit-javascript 12 | Exhibit Javascript 13 | 14 | 15 | 16 | com.google.guava 17 | guava 18 | 19 | 20 | 21 | com.cloudera.exhibit 22 | exhibit-core 23 | 24 | 25 | 26 | org.mozilla 27 | rhino 28 | 1.7R4 29 | 30 | 31 | 32 | com.cloudera.exhibit 33 | exhibit-avro 34 | test 35 | 36 | 37 | 38 | com.cloudera.exhibit 39 | exhibit-mongodb 40 | test 41 | 42 | 43 | 44 | junit 45 | junit 46 | test 47 | 48 | 49 | 50 | 51 | 52 | 53 | org.apache.maven.plugins 54 | maven-compiler-plugin 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/vector/VectorUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core.vector; 16 | 17 | import com.cloudera.exhibit.core.FieldType; 18 | import com.cloudera.exhibit.core.Frame; 19 | import com.cloudera.exhibit.core.Obs; 20 | import com.cloudera.exhibit.core.ObsDescriptor; 21 | import com.cloudera.exhibit.core.Vec; 22 | import com.cloudera.exhibit.core.simple.SimpleFrame; 23 | import com.cloudera.exhibit.core.simple.SimpleObs; 24 | import com.cloudera.exhibit.core.simple.SimpleObsDescriptor; 25 | import com.google.common.base.Function; 26 | import com.google.common.collect.Lists; 27 | 28 | public class VectorUtils { 29 | 30 | public static ObsDescriptor asObsDescriptor(String name, FieldType vectorType) { 31 | return SimpleObsDescriptor.of(name, vectorType); 32 | } 33 | 34 | public static Frame asFrame(String name, Vec vec) { 35 | final ObsDescriptor od = asObsDescriptor(name, vec.getType()); 36 | return new SimpleFrame(od, Lists.transform(Lists.newArrayList(vec), new Function() { 37 | @Override 38 | public Obs apply(Object o) { 39 | return SimpleObs.of(od, o); 40 | } 41 | })); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/HivePrimitiveObsDescriptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import com.google.common.collect.ImmutableList; 18 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; 19 | 20 | import java.util.Iterator; 21 | 22 | class HivePrimitiveObsDescriptor extends HiveObsDescriptor { 23 | 24 | private PrimitiveObjectInspector poi; 25 | 26 | public HivePrimitiveObsDescriptor(PrimitiveObjectInspector poi) { 27 | this.poi = poi; 28 | } 29 | 30 | @Override 31 | public Field get(int i) { 32 | return new Field("c1", HiveUtils.getFieldType(poi)); 33 | } 34 | 35 | @Override 36 | public int indexOf(String name) { 37 | if ("c1".equals(name)) { 38 | return 0; 39 | } else { 40 | return -1; 41 | } 42 | } 43 | 44 | @Override 45 | public int size() { 46 | return 1; 47 | } 48 | 49 | @Override 50 | public Iterator iterator() { 51 | return ImmutableList.of(get(0)).iterator(); 52 | } 53 | 54 | @Override 55 | public Object[] convert(Object rawObs) { 56 | return new Object[] { HiveUtils.asJavaType(poi.getPrimitiveJavaObject(rawObs)) }; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/json/FrameSerializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.json; 16 | 17 | import com.cloudera.exhibit.core.Frame; 18 | import com.cloudera.exhibit.core.Obs; 19 | import com.fasterxml.jackson.core.JsonGenerator; 20 | import com.fasterxml.jackson.databind.JsonSerializer; 21 | import com.fasterxml.jackson.databind.SerializerProvider; 22 | 23 | import java.io.IOException; 24 | 25 | public class FrameSerializer extends JsonSerializer { 26 | @Override 27 | public void serialize(Frame res, JsonGenerator gen, SerializerProvider provider) throws IOException { 28 | gen.writeStartObject(); 29 | gen.writeArrayFieldStart("columns"); 30 | for (int i = 0; i < res.descriptor().size(); i++) { 31 | gen.writeString(res.descriptor().get(i).name); 32 | } 33 | gen.writeEndArray(); 34 | 35 | gen.writeArrayFieldStart("data"); 36 | for (Obs obs : res) { 37 | gen.writeStartArray(); 38 | for (int i = 0; i < res.descriptor().size(); i++) { 39 | gen.writeObject(obs.get(i)); 40 | } 41 | gen.writeEndArray(); 42 | } 43 | gen.writeEndArray(); 44 | gen.writeEndObject(); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /etl/src/main/java/com/cloudera/exhibit/etl/tbl/TblType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl.tbl; 16 | 17 | import java.util.Map; 18 | 19 | public enum TblType { 20 | SUM { 21 | @Override 22 | public Tbl create(Map values, Map options) { 23 | return new SumTbl(values); 24 | } 25 | }, 26 | SUM_TOP { 27 | @Override 28 | public Tbl create(Map values, Map options) { 29 | return new SumTopTbl(values, options); 30 | } 31 | }, 32 | PERCENTILE { 33 | @Override 34 | public Tbl create(Map values, Map options) { 35 | return new PercentileTbl(values, options); 36 | } 37 | }, 38 | RATIO { 39 | @Override 40 | public Tbl create(Map values, Map options) { 41 | return new RatioTbl(values, options); 42 | } 43 | }, 44 | TOP_LIST { 45 | @Override 46 | public Tbl create(Map values, Map options) { 47 | return new TopListTbl(values, options); 48 | } 49 | } 50 | ; 51 | 52 | public abstract Tbl create(Map values, Map options); 53 | } 54 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/ExhibitId.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core; 16 | 17 | import com.google.common.base.Preconditions; 18 | 19 | public class ExhibitId { 20 | String entity; 21 | String id; 22 | 23 | public static ExhibitId create(String entity, Object id) { 24 | return new ExhibitId(entity, id.toString()); 25 | } 26 | 27 | public ExhibitId(String entity, String id) { 28 | this.entity = Preconditions.checkNotNull(entity); 29 | this.id = Preconditions.checkNotNull(id); 30 | } 31 | 32 | public String getEntity() { 33 | return entity; 34 | } 35 | 36 | public String getId() { 37 | return id; 38 | } 39 | 40 | @Override 41 | public int hashCode() { 42 | return entity.hashCode() + 17 * id.hashCode(); 43 | } 44 | 45 | @Override 46 | public boolean equals(Object other) { 47 | if (other == null || !(other instanceof ExhibitId)) { 48 | return false; 49 | } 50 | ExhibitId eid = (ExhibitId) other; 51 | return entity.equals(eid.entity) && id.equals(eid.id); 52 | } 53 | 54 | @Override 55 | public String toString() { 56 | return new StringBuilder().append(entity).append(": ").append(id).toString(); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /javascript/src/main/java/com/cloudera/exhibit/javascript/ScriptableObs.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.javascript; 16 | 17 | import com.cloudera.exhibit.core.Obs; 18 | import org.mozilla.javascript.Scriptable; 19 | import org.mozilla.javascript.ScriptableObject; 20 | 21 | public class ScriptableObs extends ScriptableObject { 22 | 23 | private final Obs obs; 24 | 25 | public ScriptableObs(Obs obs) { 26 | this.obs = obs; 27 | } 28 | 29 | public Obs obs() { return obs; } 30 | 31 | @Override 32 | public String getClassName() { 33 | return "Obs"; 34 | } 35 | 36 | @Override 37 | public Object[] getIds() { 38 | Object[] ids = new Object[obs.descriptor().size()]; 39 | for (int i = 0; i < ids.length; i++) { 40 | ids[i] = obs.descriptor().get(i).name; 41 | } 42 | return ids; 43 | } 44 | 45 | @Override 46 | public Object get(String name, Scriptable scriptable) { 47 | return obs.get(name); 48 | } 49 | 50 | @Override 51 | public boolean has(String name, Scriptable scriptable) { 52 | return obs.descriptor().indexOf(name) > -1; 53 | } 54 | 55 | @Override 56 | public Object getDefaultValue(Class typeHint) { 57 | return obs.toString(); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/resources/CalculationResource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.resources; 16 | 17 | import com.cloudera.exhibit.server.calcs.CalculationStore; 18 | import com.google.common.base.Preconditions; 19 | import io.dropwizard.jersey.params.IntParam; 20 | 21 | import javax.ws.rs.Consumes; 22 | import javax.ws.rs.GET; 23 | import javax.ws.rs.POST; 24 | import javax.ws.rs.Path; 25 | import javax.ws.rs.Produces; 26 | import javax.ws.rs.QueryParam; 27 | import javax.ws.rs.core.MediaType; 28 | import javax.ws.rs.core.Response; 29 | 30 | @Path("/calculation") 31 | @Consumes(MediaType.APPLICATION_JSON) 32 | @Produces(MediaType.APPLICATION_JSON) 33 | public class CalculationResource { 34 | 35 | private CalculationStore calcs; 36 | 37 | public CalculationResource(CalculationStore calcs) { 38 | this.calcs = Preconditions.checkNotNull(calcs); 39 | } 40 | 41 | @GET 42 | public CalculationResponse getCode(@QueryParam("id") IntParam id) { 43 | return new CalculationResponse(calcs.getCode(id.get())); 44 | } 45 | 46 | @POST 47 | public Response save(SaveRequest request) { 48 | calcs.addCalculation(request.code); 49 | return Response.ok().build(); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/resources/ComputeResource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.resources; 16 | 17 | import com.cloudera.exhibit.core.Exhibit; 18 | import com.cloudera.exhibit.core.Frame; 19 | import com.cloudera.exhibit.core.ExhibitStore; 20 | import com.cloudera.exhibit.sql.SQLCalculator; 21 | import com.google.common.base.Preconditions; 22 | 23 | import javax.validation.Valid; 24 | import javax.ws.rs.Consumes; 25 | import javax.ws.rs.POST; 26 | import javax.ws.rs.Path; 27 | import javax.ws.rs.Produces; 28 | import javax.ws.rs.core.MediaType; 29 | import java.sql.SQLException; 30 | 31 | @Path("/compute") 32 | @Consumes(MediaType.APPLICATION_JSON) 33 | @Produces(MediaType.APPLICATION_JSON) 34 | public class ComputeResource { 35 | 36 | private final ExhibitStore store; 37 | 38 | public ComputeResource(ExhibitStore store) { 39 | this.store = Preconditions.checkNotNull(store); 40 | } 41 | 42 | @POST 43 | public Frame compute(@Valid ComputeRequest req) throws SQLException { 44 | Exhibit exhibit = store.find(req.id).orNull(); 45 | SQLCalculator calc = SQLCalculator.create(null, req.code); //TODO 46 | calc.initialize(exhibit.descriptor()); 47 | return calc.apply(exhibit); 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /avro/src/main/java/com/cloudera/exhibit/avro/AvroObs.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.avro; 16 | 17 | import com.cloudera.exhibit.core.FieldType; 18 | import com.cloudera.exhibit.core.Obs; 19 | import com.cloudera.exhibit.core.ObsDescriptor; 20 | import org.apache.avro.generic.GenericRecord; 21 | 22 | public class AvroObs extends Obs { 23 | 24 | private GenericRecord record; 25 | private ObsDescriptor descriptor; 26 | 27 | public AvroObs(GenericRecord record) { 28 | this(new AvroObsDescriptor(record.getSchema()), record); 29 | } 30 | 31 | public AvroObs(ObsDescriptor descriptor, GenericRecord record) { 32 | this.descriptor = descriptor; 33 | this.record = record; 34 | } 35 | 36 | GenericRecord record() { 37 | return record; 38 | } 39 | 40 | @Override 41 | public String toString() { 42 | return record.toString(); 43 | } 44 | 45 | @Override 46 | public ObsDescriptor descriptor() { 47 | return descriptor; 48 | } 49 | 50 | @Override 51 | public Object get(int index) { 52 | ObsDescriptor.Field f = descriptor.get(index); 53 | Object r = record.get(f.name); 54 | if (f.type == FieldType.STRING) { 55 | return r == null ? null : r.toString(); 56 | } else { 57 | return r; 58 | } 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/Column.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core; 16 | 17 | import com.cloudera.exhibit.core.simple.SimpleFrame; 18 | import com.cloudera.exhibit.core.simple.SimpleObs; 19 | import com.cloudera.exhibit.core.simple.SimpleObsDescriptor; 20 | import com.google.common.base.Function; 21 | import com.google.common.collect.ImmutableList; 22 | import com.google.common.collect.Lists; 23 | 24 | import java.util.AbstractList; 25 | 26 | public class Column extends AbstractList implements Vec { 27 | 28 | private final Frame frame; 29 | private final int index; 30 | 31 | public static Column create(Frame frame, String name) { 32 | return create(frame, frame.descriptor().indexOf(name)); 33 | } 34 | 35 | public static Column create(Frame frame, int index) { 36 | return new Column(frame, index); 37 | } 38 | 39 | public Column(Frame frame, int index) { 40 | this.frame = frame; 41 | this.index = index; 42 | } 43 | 44 | @Override 45 | public FieldType getType() { 46 | return frame.descriptor().get(index).type; 47 | } 48 | 49 | @Override 50 | public Object get(int i) { 51 | return frame.get(i).get(index); 52 | } 53 | 54 | @Override 55 | public int size() { 56 | return frame.size(); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/CollectAllUDAF.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 18 | import org.apache.hadoop.hive.ql.parse.SemanticException; 19 | import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver; 20 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; 21 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 22 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; 23 | import org.slf4j.Logger; 24 | import org.slf4j.LoggerFactory; 25 | 26 | public class CollectAllUDAF extends AbstractGenericUDAFResolver { 27 | 28 | private static final Logger LOG = LoggerFactory.getLogger(CollectAllUDAF.class); 29 | 30 | @Override 31 | public GenericUDAFEvaluator getEvaluator(TypeInfo[] typeInfo) throws SemanticException { 32 | if (typeInfo.length != 1) { 33 | throw new UDFArgumentException("Only one argument expected to collect_all method"); 34 | } 35 | if (typeInfo[0].getCategory() == ObjectInspector.Category.LIST) { 36 | LOG.info("Using CollectArrayEvaluator"); 37 | return new AbstractCollectArrayEvaluator.Lists(); 38 | } 39 | LOG.info("Using CollectEvaluator"); 40 | return new AbstractCollectEvaluator.Lists(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /sql/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | com.cloudera.exhibit 7 | exhibit-parent 8 | 0.8.0 9 | 10 | 11 | exhibit-sql 12 | Exhibit SQL 13 | 14 | 15 | 16 | 17 | com.google.guava 18 | guava 19 | provided 20 | 21 | 22 | 23 | com.cloudera.exhibit 24 | exhibit-core 25 | 26 | 27 | 28 | org.apache.calcite 29 | calcite-core 30 | 31 | 32 | 33 | org.apache.calcite 34 | calcite-avatica 35 | 36 | 37 | 38 | com.cloudera.exhibit 39 | exhibit-avro 40 | test 41 | 42 | 43 | 44 | com.cloudera.exhibit 45 | exhibit-mongodb 46 | test 47 | 48 | 49 | 50 | junit 51 | junit 52 | test 53 | 54 | 55 | 56 | 57 | 58 | 59 | org.apache.maven.plugins 60 | maven-compiler-plugin 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /javascript/src/main/java/com/cloudera/exhibit/javascript/ScriptableVec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.javascript; 16 | 17 | import com.cloudera.exhibit.core.Vec; 18 | import org.mozilla.javascript.Scriptable; 19 | import org.mozilla.javascript.ScriptableObject; 20 | 21 | public class ScriptableVec extends ScriptableObject { 22 | 23 | private final Vec vec; 24 | 25 | public ScriptableVec(Vec vec) { 26 | this.vec = vec; 27 | } 28 | 29 | public Vec vec() { return vec; } 30 | 31 | @Override 32 | public String getClassName() { 33 | return "Vec"; 34 | } 35 | 36 | @Override 37 | public Object get(int index, Scriptable scriptable) { 38 | return vec.get(index); 39 | } 40 | 41 | @Override 42 | public Object get(String property, Scriptable scriptable) { 43 | if ("length".equals(property)) { 44 | return vec.size(); 45 | } 46 | return super.get(property, scriptable); 47 | } 48 | 49 | @Override 50 | public boolean has(int index, Scriptable scriptable) { 51 | return 0 <= index && index < vec.size(); 52 | } 53 | 54 | @Override 55 | public boolean has(String name, Scriptable scriptable) { 56 | return "length".equals(name); 57 | } 58 | 59 | @Override 60 | public Object getDefaultValue(Class typeHint) { 61 | return vec.toString(); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /etl/src/main/java/com/cloudera/exhibit/etl/fn/SchemaMapFn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl.fn; 16 | 17 | import com.cloudera.exhibit.etl.SchemaProvider; 18 | import org.apache.avro.generic.GenericData; 19 | import org.apache.crunch.MapFn; 20 | import org.apache.crunch.Pair; 21 | 22 | public class SchemaMapFn extends 23 | MapFn>, 24 | Pair, Pair>> { 25 | 26 | private final int index; 27 | private final SchemaProvider provider; 28 | 29 | public SchemaMapFn(int index, SchemaProvider provider) { 30 | this.index = index; 31 | this.provider = provider; 32 | } 33 | 34 | @Override 35 | public Pair, Pair> map( 36 | Pair> input) { 37 | int aggIdx = input.second().first(); 38 | GenericData.Record outKey = new GenericData.Record(provider.get(0)); 39 | outKey.put("index", index); 40 | outKey.put("key", input.first()); 41 | GenericData.Record outValue = new GenericData.Record(provider.get(1)); 42 | outValue.put("value", input.second().second()); 43 | return Pair.of(Pair.of(outKey, aggIdx), Pair.of(aggIdx, outValue)); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/composite/NeighborLookup.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core.composite; 16 | 17 | import com.cloudera.exhibit.core.Column; 18 | import com.cloudera.exhibit.core.ExhibitId; 19 | import com.cloudera.exhibit.core.Frame; 20 | import com.google.common.collect.Maps; 21 | import com.google.common.collect.Sets; 22 | 23 | import java.util.Map; 24 | import java.util.Set; 25 | 26 | public class NeighborLookup { 27 | private final Map columnToEntity; 28 | 29 | public static NeighborLookup create(String column, String entity, String... args) { 30 | Map cte = Maps.newHashMap(); 31 | cte.put(column, entity); 32 | for (int i = 0; i < args.length; i += 2) { 33 | cte.put(args[i], args[i + 1]); 34 | } 35 | return new NeighborLookup(cte); 36 | } 37 | 38 | public NeighborLookup(Map columnToEntity) { 39 | this.columnToEntity = columnToEntity; 40 | } 41 | 42 | public Set lookupIds(Frame frame) { 43 | Set ids = Sets.newHashSet(); 44 | for (Map.Entry e : columnToEntity.entrySet()) { 45 | Column column = Column.create(frame, e.getKey()); 46 | for (Object value : column) { 47 | ids.add(ExhibitId.create(e.getValue(), value)); 48 | } 49 | } 50 | return ids; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/main/ExhibitConfiguration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.main; 16 | 17 | import com.cloudera.exhibit.core.ExhibitStore; 18 | import com.cloudera.exhibit.core.multi.MultiExhibitStore; 19 | import com.fasterxml.jackson.annotation.JsonProperty; 20 | import com.google.common.base.Function; 21 | import com.google.common.collect.Lists; 22 | import io.dropwizard.Configuration; 23 | import io.dropwizard.setup.Environment; 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | import javax.validation.Valid; 28 | import java.util.List; 29 | 30 | public class ExhibitConfiguration extends Configuration { 31 | 32 | private static final Logger LOG = LoggerFactory.getLogger(ExhibitConfiguration.class); 33 | 34 | @JsonProperty 35 | @Valid 36 | List exhibits; 37 | 38 | public ExhibitStore getExhibitStores(final Environment env, final org.apache.hadoop.conf.Configuration conf) { 39 | return MultiExhibitStore.create(Lists.transform(exhibits, new Function() { 40 | @Override 41 | public ExhibitStore apply(ExhibitStoreConfig exhibitStoreConfig) { 42 | LOG.info("Creating exhibit store from config: " + exhibitStoreConfig); 43 | return exhibitStoreConfig.create(env, conf); 44 | } 45 | })); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /etl/src/main/java/com/cloudera/exhibit/etl/tbl/Tbl.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl.tbl; 16 | 17 | import com.cloudera.exhibit.core.Obs; 18 | import com.cloudera.exhibit.core.ObsDescriptor; 19 | import com.cloudera.exhibit.etl.SchemaProvider; 20 | import org.apache.avro.generic.GenericData; 21 | 22 | import java.util.List; 23 | 24 | public interface Tbl { 25 | // How many rows of output will the finalize method generate? 26 | int arity(); 27 | 28 | // Generate the sch$emas (intermediate and output) for a given set of inputs. 29 | // Do this first on the client-side 30 | SchemaProvider getSchemas(ObsDescriptor od, int outputId, int aggIdx); 31 | 32 | // Inside of a map/reduce phase: initialize a Tbl using the SchemaProvider 33 | // we generated on the client 34 | void initialize(SchemaProvider provider); 35 | 36 | // Add a single additional observation 37 | void add(Obs obs); 38 | 39 | // Get the current (intermediate) state of this table 40 | GenericData.Record getValue(); 41 | 42 | // Merge two intermediate states of this table type together 43 | GenericData.Record merge(GenericData.Record current, GenericData.Record next); 44 | 45 | // In the reduce phase, "finalize" the output-- transform it from its 46 | // intermediate form to its final output form. 47 | List finalize(GenericData.Record value); 48 | } 49 | -------------------------------------------------------------------------------- /mongodb/src/main/java/com/cloudera/exhibit/mongodb/BSONFrame.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.mongodb; 16 | 17 | import com.cloudera.exhibit.core.ObsDescriptor; 18 | import com.cloudera.exhibit.core.Frame; 19 | import com.cloudera.exhibit.core.Obs; 20 | import com.google.common.base.Function; 21 | import com.google.common.collect.Iterators; 22 | import org.bson.BSONObject; 23 | 24 | import java.util.Iterator; 25 | import java.util.List; 26 | 27 | public class BSONFrame extends Frame { 28 | 29 | private BSONObsDescriptor descriptor; 30 | private List records; 31 | 32 | public BSONFrame(BSONObsDescriptor descriptor, List records) { 33 | this.descriptor = descriptor; 34 | this.records = records; 35 | } 36 | 37 | @Override 38 | public ObsDescriptor descriptor() { 39 | return descriptor; 40 | } 41 | 42 | @Override 43 | public int size() { 44 | return records.size(); 45 | } 46 | 47 | @Override 48 | public Obs get(int index) { 49 | return new BSONObs(descriptor, records.get(index)); 50 | } 51 | 52 | @Override 53 | public Iterator iterator() { 54 | return Iterators.transform(records.iterator(), new Function() { 55 | @Override 56 | public Obs apply(BSONObject bsonObject) { 57 | return new BSONObs(descriptor, bsonObject); 58 | } 59 | }); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /etl/src/main/java/com/cloudera/exhibit/etl/config/OutputConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl.config; 16 | 17 | import com.google.common.collect.Lists; 18 | import org.apache.crunch.Target; 19 | 20 | import java.io.Serializable; 21 | import java.util.List; 22 | 23 | public class OutputConfig implements Serializable { 24 | // The Kite URI to write the output to (required) 25 | public String uri = ""; 26 | // The underlying path that the data should be written to (required) 27 | public String path = ""; 28 | 29 | // The output format to use (parquet or avro) 30 | public String format = "parquet"; 31 | 32 | public Target.WriteMode writeMode = Target.WriteMode.OVERWRITE; 33 | 34 | // A single frame that can be used for non-aggregated output tables 35 | // that run as map-only jobs 36 | public FrameConfig collect = null; 37 | 38 | // Any attributes of the Exhibit that should be included in the output keys 39 | public List attrs = Lists.newArrayList(); 40 | 41 | // The names of the fields from the output frames that should be included in the output keys 42 | public List keys = Lists.newArrayList(); 43 | 44 | // The aggregations to perform by the attrs/keys specified above. 45 | public List aggregates = Lists.newArrayList(); 46 | 47 | // Enable debug counters for this output and its aggregations 48 | public boolean debug = false; 49 | } 50 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/HiveVector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import com.cloudera.exhibit.core.FieldType; 18 | import com.cloudera.exhibit.core.Vec; 19 | import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; 20 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; 21 | 22 | import java.util.AbstractList; 23 | 24 | public class HiveVector extends AbstractList implements Vec { 25 | 26 | private final FieldType fieldType; 27 | private final ListObjectInspector listOI; 28 | private final PrimitiveObjectInspector pOI; 29 | private Object values; 30 | 31 | public HiveVector(FieldType fieldType, ListObjectInspector listOI) { 32 | this.fieldType = fieldType; 33 | this.listOI = listOI; 34 | this.pOI = (PrimitiveObjectInspector) listOI.getListElementObjectInspector(); 35 | } 36 | 37 | @Override 38 | public FieldType getType() { 39 | return fieldType; 40 | } 41 | 42 | @Override 43 | public Object get(int index) { 44 | Object v = listOI.getListElement(values, index); 45 | return HiveUtils.asJavaType(pOI.getPrimitiveJavaObject(v)); 46 | } 47 | 48 | @Override 49 | public int size() { 50 | return listOI.getListLength(values); 51 | } 52 | 53 | public HiveVector updateValues(Object values) { 54 | this.values = values; 55 | return this; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /sql/src/main/java/com/cloudera/exhibit/sql/FrameEnumerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.sql; 16 | 17 | import com.cloudera.exhibit.core.Frame; 18 | import com.cloudera.exhibit.core.Obs; 19 | import org.apache.calcite.linq4j.Enumerator; 20 | 21 | public class FrameEnumerator implements Enumerator { 22 | 23 | private final Frame frame; 24 | private Object current; 25 | private int currentIndex = -1; 26 | 27 | public FrameEnumerator(Frame frame) { 28 | this.frame = frame; 29 | this.current = new Object[frame.descriptor().size()]; 30 | } 31 | 32 | @Override 33 | public Object current() { 34 | return current; 35 | } 36 | 37 | @Override 38 | public boolean moveNext() { 39 | currentIndex++; 40 | boolean hasNext = currentIndex < frame.size(); 41 | if (hasNext) { 42 | Obs obs = frame.get(currentIndex); 43 | if (frame.descriptor().size() > 1) { 44 | Object[] values = new Object[frame.descriptor().size()]; 45 | for (int i = 0; i < values.length; i++) { 46 | values[i] = obs.get(i); 47 | } 48 | this.current = values; 49 | } else { 50 | this.current = obs.get(0); 51 | } 52 | 53 | } 54 | return hasNext; 55 | 56 | } 57 | 58 | @Override 59 | public void reset() { 60 | currentIndex = -1; 61 | current = null; 62 | } 63 | 64 | @Override 65 | public void close() { 66 | // No-op 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/simple/SimpleExhibitStore.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core.simple; 16 | 17 | import com.cloudera.exhibit.core.Exhibit; 18 | import com.cloudera.exhibit.core.ExhibitId; 19 | import com.cloudera.exhibit.core.ExhibitStore; 20 | import com.google.common.base.Optional; 21 | import com.google.common.base.Preconditions; 22 | import com.google.common.collect.ImmutableSet; 23 | 24 | import java.util.Map; 25 | import java.util.Set; 26 | 27 | public class SimpleExhibitStore implements ExhibitStore { 28 | 29 | private final String entity; 30 | private final Map exhibits; 31 | 32 | public static SimpleExhibitStore of(String entity, Map exhibits) { 33 | return new SimpleExhibitStore(entity, exhibits); 34 | } 35 | 36 | public SimpleExhibitStore(String entity, Map exhibits) { 37 | this.entity = Preconditions.checkNotNull(entity); 38 | this.exhibits = Preconditions.checkNotNull(exhibits); 39 | } 40 | 41 | @Override 42 | public boolean isConnected() { 43 | return true; 44 | } 45 | 46 | @Override 47 | public Set entities() { 48 | return ImmutableSet.of(entity); 49 | } 50 | 51 | @Override 52 | public Optional find(ExhibitId id) { 53 | if (!entity.equals(id.getEntity())) { 54 | return Optional.absent(); 55 | } 56 | return Optional.fromNullable(exhibits.get(id.getId())); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /avro/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | com.cloudera.exhibit 7 | exhibit-parent 8 | 0.8.0 9 | 10 | 11 | exhibit-avro 12 | Exhibit for Apache Avro 13 | 14 | 15 | 16 | com.cloudera.exhibit 17 | exhibit-core 18 | 19 | 20 | 21 | org.apache.avro 22 | avro 23 | 24 | 25 | 26 | junit 27 | junit 28 | test 29 | 30 | 31 | 32 | 33 | 34 | 35 | org.apache.maven.plugins 36 | maven-compiler-plugin 37 | 38 | 39 | maven-assembly-plugin 40 | 41 | 42 | 43 | com.cloudera.exhibit.mongodb.ExampleClient 44 | 45 | 46 | 47 | jar-with-dependencies 48 | 49 | 50 | 51 | 52 | make-assembly 53 | package 54 | 55 | single 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /mongodb/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | com.cloudera.exhibit 7 | exhibit-parent 8 | 0.8.0 9 | 10 | 11 | exhibit-mongodb 12 | Exhibit for MongoDB 13 | 14 | 15 | 16 | com.cloudera.exhibit 17 | exhibit-core 18 | 19 | 20 | 21 | org.mongodb 22 | mongo-java-driver 23 | 24 | 25 | 26 | junit 27 | junit 28 | test 29 | 30 | 31 | 32 | 33 | 34 | 35 | org.apache.maven.plugins 36 | maven-compiler-plugin 37 | 38 | 39 | maven-assembly-plugin 40 | 41 | 42 | 43 | com.cloudera.exhibit.mongodb.ExampleClient 44 | 45 | 46 | 47 | jar-with-dependencies 48 | 49 | 50 | 51 | 52 | make-assembly 53 | package 54 | 55 | single 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /thrift/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | com.cloudera.exhibit 7 | exhibit-parent 8 | 0.8.0 9 | 10 | 11 | exhibit-thrift 12 | Exhibit for Apache Thrift 13 | 14 | 15 | 16 | com.cloudera.exhibit 17 | exhibit-core 18 | 19 | 20 | 21 | org.apache.thrift 22 | libthrift 23 | 24 | 25 | 26 | junit 27 | junit 28 | test 29 | 30 | 31 | 32 | 33 | 34 | 35 | org.apache.maven.plugins 36 | maven-compiler-plugin 37 | 38 | 39 | maven-assembly-plugin 40 | 41 | 42 | 43 | com.cloudera.exhibit.mongodb.ExampleClient 44 | 45 | 46 | 47 | jar-with-dependencies 48 | 49 | 50 | 51 | 52 | make-assembly 53 | package 54 | 55 | single 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /etl/src/main/java/com/cloudera/exhibit/etl/config/PivotConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl.config; 16 | 17 | import com.google.common.collect.Lists; 18 | import com.google.common.collect.Maps; 19 | 20 | import java.io.Serializable; 21 | import java.util.List; 22 | import java.util.Map; 23 | 24 | /** 25 | *

Configuration information used to pivot a given frame computation. It's best to explain 26 | * how this works with an example.

27 | * 28 | *

Let's say we have a frame computation against a table called "users" that looks like this: 29 | * 30 | *
SELECT age, sex, sum(income) inc, count(*) cnt FROM users GROUP BY age, sex; 31 | * 32 | *
For each age, we will get two rows corresponding to the sum of incomes and counts for 33 | * males and females. If we want our output frame to have a single row for each age that 34 | * contains four columns (inc_male, inc_female, cnt_male, cnt_female), then we can use a 35 | * pivot operation on the resulting table, like this sample YAML snippet: 36 | * 37 | *
pivot: {by: [age], variables: {sex: [male, female]}}

38 | * 39 | */ 40 | public class PivotConfig implements Serializable { 41 | public List by = Lists.newArrayList(); 42 | 43 | // A mapping of variable names to a list of valid values that they can take on 44 | // that will be used to generate the pivot columns. 45 | public Map> variables = Maps.newLinkedHashMap(); 46 | } 47 | -------------------------------------------------------------------------------- /javascript/src/main/java/com/cloudera/exhibit/javascript/ScriptableFrame.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.javascript; 16 | 17 | import com.cloudera.exhibit.core.Frame; 18 | import org.mozilla.javascript.Scriptable; 19 | import org.mozilla.javascript.ScriptableObject; 20 | 21 | public class ScriptableFrame extends ScriptableObject { 22 | 23 | private final Frame frame; 24 | 25 | public ScriptableFrame(Frame frame) { 26 | super(); 27 | this.frame = frame; 28 | } 29 | 30 | public Frame frame() { return frame; } 31 | 32 | @Override 33 | public String getClassName() { 34 | return "Frame"; 35 | } 36 | 37 | @Override 38 | public Object[] getIds() { 39 | Object[] ids = new Object[frame.descriptor().size()]; 40 | for (int i = 0; i < ids.length; i++) { 41 | ids[i] = frame.descriptor().get(i).name; 42 | } 43 | return ids; 44 | } 45 | 46 | @Override 47 | public Object get(int index, Scriptable scriptable) { 48 | return new ScriptableObs(frame.get(index)); 49 | } 50 | 51 | @Override 52 | public Object get(String name, Scriptable scriptable) { 53 | if ("length".equals(name)) { 54 | return frame.size(); 55 | } 56 | return new ScriptableVec(frame.$(name)); 57 | } 58 | 59 | @Override 60 | public boolean has(String name, Scriptable scriptable) { 61 | return frame.descriptor().indexOf(name) > -1; 62 | } 63 | 64 | @Override 65 | public Object getDefaultValue(Class typeHint) { 66 | return frame.toString(); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /etl/src/main/java/com/cloudera/exhibit/etl/SchemaProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl; 16 | 17 | import com.google.common.base.Function; 18 | import com.google.common.collect.Lists; 19 | import com.google.common.collect.Maps; 20 | import org.apache.avro.Schema; 21 | 22 | import javax.annotation.Nullable; 23 | import java.io.Serializable; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | public class SchemaProvider implements Serializable { 28 | private final List json; 29 | private transient List schemas; 30 | 31 | public SchemaProvider(List schemas) { 32 | this.schemas = schemas; 33 | this.json = Lists.newArrayList(Lists.transform(schemas, new Function() { 34 | @Nullable 35 | @Override 36 | public String apply(Schema schema) { 37 | return schema.toString(); 38 | } 39 | })); 40 | } 41 | 42 | public Schema get(int i) { 43 | return getSchemas().get(i); 44 | } 45 | 46 | private List getSchemas() { 47 | if (schemas == null) { 48 | Map defined = Maps.newHashMap(); 49 | this.schemas = Lists.newArrayList(); 50 | for (String s : json) { 51 | if (defined.containsKey(s)) { 52 | schemas.add(defined.get(s)); 53 | } else { 54 | Schema.Parser sp = new Schema.Parser(); 55 | Schema schema = sp.parse(s); 56 | defined.put(s, schema); 57 | schemas.add(schema); 58 | } 59 | } 60 | } 61 | return schemas; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/simple/SimpleFrame.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core.simple; 16 | 17 | import com.cloudera.exhibit.core.Frame; 18 | import com.cloudera.exhibit.core.Obs; 19 | import com.cloudera.exhibit.core.ObsDescriptor; 20 | import com.google.common.collect.ImmutableList; 21 | 22 | import java.util.Iterator; 23 | import java.util.List; 24 | 25 | public class SimpleFrame extends Frame { 26 | 27 | private final ObsDescriptor descriptor; 28 | private final List observations; 29 | 30 | public static SimpleFrame of(Obs... obs) { 31 | return new SimpleFrame(ImmutableList.copyOf(obs)); 32 | } 33 | 34 | public SimpleFrame(ObsDescriptor descriptor) { 35 | this(descriptor, ImmutableList.of()); 36 | } 37 | 38 | public SimpleFrame(List observations) { 39 | this(observations.get(0).descriptor(), observations); 40 | } 41 | 42 | public SimpleFrame(ObsDescriptor descriptor, List observations) { 43 | this.descriptor = descriptor; 44 | this.observations = observations; 45 | } 46 | 47 | @Override 48 | public ObsDescriptor descriptor() { 49 | return descriptor; 50 | } 51 | 52 | @Override 53 | public int size() { 54 | return observations.size(); 55 | } 56 | 57 | @Override 58 | public Obs get(int index) { 59 | return observations.get(index); 60 | } 61 | 62 | @Override 63 | public Iterator iterator() { 64 | return observations.iterator(); 65 | } 66 | 67 | @Override 68 | public String toString() { 69 | return observations.toString(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /server/src/main/resources/assets/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Exhibit Prototype 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
32 | 33 |
34 | 35 |
36 | 37 | 38 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/simple/SimpleObs.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core.simple; 16 | 17 | import com.cloudera.exhibit.core.Obs; 18 | import com.cloudera.exhibit.core.ObsDescriptor; 19 | import com.google.common.base.Preconditions; 20 | import com.google.common.collect.Lists; 21 | 22 | import java.util.List; 23 | 24 | public class SimpleObs extends Obs { 25 | 26 | private final ObsDescriptor descriptor; 27 | private final List values; 28 | 29 | public static SimpleObs of(ObsDescriptor desc, Object... args) { 30 | return new SimpleObs(desc, Lists.newArrayList(args)); 31 | } 32 | 33 | public SimpleObs(ObsDescriptor descriptor, List values) { 34 | assert(descriptor.size() == values.size()); 35 | this.descriptor = Preconditions.checkNotNull(descriptor); 36 | this.values = Preconditions.checkNotNull(values); 37 | } 38 | 39 | @Override 40 | public ObsDescriptor descriptor() { 41 | return descriptor; 42 | } 43 | 44 | @Override 45 | public Object get(int index) { 46 | return values.get(index); 47 | } 48 | 49 | public List getValues() { 50 | return values; 51 | } 52 | 53 | @Override 54 | public int hashCode() { 55 | return descriptor.hashCode() + 17 * values.hashCode(); 56 | } 57 | 58 | @Override 59 | public boolean equals(Object other) { 60 | if (other == null || !(other instanceof SimpleObs)) { 61 | return false; 62 | } 63 | SimpleObs obs = (SimpleObs) other; 64 | return descriptor.equals(obs.descriptor) && values.equals(obs.values); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/resources/FetchResource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.resources; 16 | 17 | import com.cloudera.exhibit.core.Exhibit; 18 | import com.cloudera.exhibit.core.ExhibitId; 19 | import com.cloudera.exhibit.core.ExhibitStore; 20 | import com.cloudera.exhibit.server.calcs.CalculationStore; 21 | import com.google.common.base.Preconditions; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | import javax.ws.rs.Consumes; 26 | import javax.ws.rs.GET; 27 | import javax.ws.rs.Path; 28 | import javax.ws.rs.PathParam; 29 | import javax.ws.rs.Produces; 30 | import javax.ws.rs.core.MediaType; 31 | import java.util.Map; 32 | 33 | @Path("/exhibit/{entity}/{id}") 34 | @Produces(MediaType.APPLICATION_JSON) 35 | public class FetchResource { 36 | 37 | private static final Logger LOG = LoggerFactory.getLogger(FetchResource.class); 38 | 39 | private ExhibitStore exhibits; 40 | private CalculationStore calcs; 41 | 42 | public FetchResource(ExhibitStore exhibits, CalculationStore calcs) { 43 | this.exhibits = Preconditions.checkNotNull(exhibits); 44 | this.calcs = Preconditions.checkNotNull(calcs); 45 | } 46 | 47 | @GET 48 | public FetchResponse fetch(@PathParam("entity") String entity, @PathParam("id") String id) { 49 | ExhibitId eid = ExhibitId.create(entity, id); 50 | LOG.info("Looking up " + eid); 51 | Exhibit exhibit = exhibits.find(eid).orNull(); 52 | Map> metrics = calcs.computeKPIs(exhibit); 53 | return new FetchResponse(eid, exhibit, metrics); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/multi/MultiExhibitStore.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core.multi; 16 | 17 | import com.cloudera.exhibit.core.Exhibit; 18 | import com.cloudera.exhibit.core.ExhibitId; 19 | import com.cloudera.exhibit.core.ExhibitStore; 20 | import com.google.common.base.Optional; 21 | import com.google.common.base.Preconditions; 22 | import com.google.common.collect.Maps; 23 | 24 | import java.util.List; 25 | import java.util.Map; 26 | import java.util.Set; 27 | 28 | public class MultiExhibitStore implements ExhibitStore { 29 | 30 | private final Map stores; 31 | 32 | public static MultiExhibitStore create(List stores) { 33 | Map storeMap = Maps.newHashMap(); 34 | for (ExhibitStore store : stores) { 35 | for (String entity : store.entities()) { 36 | // TODO double check this 37 | storeMap.put(entity, store); 38 | } 39 | } 40 | return new MultiExhibitStore(storeMap); 41 | } 42 | 43 | public MultiExhibitStore(Map stores) { 44 | this.stores = Preconditions.checkNotNull(stores); 45 | } 46 | 47 | @Override 48 | public boolean isConnected() { 49 | for (ExhibitStore store : stores.values()) { 50 | if (!store.isConnected()) { 51 | return false; 52 | } 53 | } 54 | return true; 55 | } 56 | 57 | @Override 58 | public Set entities() { 59 | return stores.keySet(); 60 | } 61 | 62 | @Override 63 | public Optional find(ExhibitId id) { 64 | return stores.get(id.getEntity()).find(id); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /thrift/src/main/java/com/cloudera/exhibit/thrift/ThriftFrame.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.thrift; 16 | 17 | import com.cloudera.exhibit.core.ObsDescriptor; 18 | import com.cloudera.exhibit.core.Frame; 19 | import com.cloudera.exhibit.core.Obs; 20 | import com.google.common.base.Function; 21 | import com.google.common.collect.ImmutableList; 22 | import com.google.common.collect.Iterators; 23 | import org.apache.thrift.TBase; 24 | 25 | import java.util.Iterator; 26 | import java.util.List; 27 | 28 | public class ThriftFrame extends Frame { 29 | 30 | private final ThriftObsDescriptor descriptor; 31 | private final List records; 32 | 33 | public ThriftFrame(Class clazz) { 34 | this.descriptor = new ThriftObsDescriptor(clazz); 35 | this.records = ImmutableList.of(); 36 | } 37 | 38 | public ThriftFrame(List records) { 39 | this.descriptor = new ThriftObsDescriptor(records.get(0).getClass()); 40 | this.records = records; 41 | } 42 | 43 | @Override 44 | public ObsDescriptor descriptor() { 45 | return descriptor; 46 | } 47 | 48 | @Override 49 | public int size() { 50 | return records.size(); 51 | } 52 | 53 | @Override 54 | public Obs get(int index) { 55 | return new ThriftObs(descriptor, records.get(index)); 56 | } 57 | 58 | @Override 59 | public Iterator iterator() { 60 | return Iterators.transform(records.iterator(), new Function() { 61 | @Override 62 | public Obs apply(TBase tBase) { 63 | return new ThriftObs(descriptor, tBase); 64 | } 65 | }); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/main/ExhibitStoreConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.main; 16 | 17 | import com.cloudera.exhibit.avro.AvroExhibit; 18 | import com.cloudera.exhibit.core.Exhibit; 19 | import com.cloudera.exhibit.core.ExhibitStore; 20 | import com.cloudera.exhibit.core.simple.SimpleExhibitStore; 21 | import com.fasterxml.jackson.annotation.JsonProperty; 22 | import com.google.common.collect.Maps; 23 | import io.dropwizard.setup.Environment; 24 | import org.apache.avro.generic.GenericRecord; 25 | import org.apache.hadoop.conf.Configuration; 26 | import org.kitesdk.data.Dataset; 27 | import org.kitesdk.data.DatasetReader; 28 | import org.kitesdk.data.Datasets; 29 | 30 | import javax.validation.Valid; 31 | import java.util.Map; 32 | 33 | public class ExhibitStoreConfig { 34 | @JsonProperty 35 | @Valid 36 | String name; 37 | 38 | @JsonProperty 39 | @Valid 40 | String uri; 41 | 42 | @JsonProperty 43 | @Valid 44 | String idColumn; 45 | 46 | public ExhibitStore create(Environment env, Configuration conf) { 47 | Dataset data = Datasets.load(uri); 48 | DatasetReader reader = data.newReader(); 49 | Map exhibits = Maps.newHashMap(); 50 | try { 51 | while (reader.hasNext()) { 52 | GenericRecord rec = reader.next(); 53 | Exhibit e = AvroExhibit.create(rec); 54 | exhibits.put(e.attributes().get(idColumn, String.class), e); 55 | } 56 | } finally { 57 | reader.close(); 58 | } 59 | return SimpleExhibitStore.of(name, exhibits); 60 | } 61 | 62 | @Override 63 | public String toString() { 64 | return name; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /core/src/test/java/com/cloudera/exhibit/core/CompositeTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core; 16 | 17 | import com.cloudera.exhibit.core.composite.CompositeObsDescriptor; 18 | import com.cloudera.exhibit.core.simple.SimpleObsDescriptor; 19 | import com.google.common.collect.ImmutableList; 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | public class CompositeTest { 25 | 26 | public static final ObsDescriptor F1_DESC = SimpleObsDescriptor.builder() 27 | .doubleField("v0") 28 | .stringField("v1") 29 | .build(); 30 | 31 | public static final ObsDescriptor F2_DESC = SimpleObsDescriptor.builder() 32 | .intField("x") 33 | .build(); 34 | 35 | @Test 36 | public void testCompositeObsDescriptor() throws Exception { 37 | CompositeObsDescriptor cod = new CompositeObsDescriptor(ImmutableList.of(F1_DESC, F2_DESC)); 38 | assertEquals(3, cod.size()); 39 | assertEquals(1, cod.indexOf("v1")); 40 | assertEquals(2, cod.indexOf("x")); 41 | assertEquals(-1, cod.indexOf("v3")); 42 | assertEquals(new ObsDescriptor.Field("x", FieldType.INTEGER), cod.get(2)); 43 | } 44 | 45 | @Test 46 | public void testCompositeObsDescriptorWithEmpty() throws Exception { 47 | CompositeObsDescriptor cod = new CompositeObsDescriptor(ImmutableList.of(ObsDescriptor.EMPTY, F1_DESC)); 48 | assertEquals(2, cod.size()); 49 | assertEquals(0, cod.indexOf("v0")); 50 | assertEquals(1, cod.indexOf("v1")); 51 | assertEquals(-1, cod.indexOf("q")); 52 | assertEquals(new ObsDescriptor.Field("v1", FieldType.STRING), cod.get(1)); 53 | assertEquals(new ObsDescriptor.Field("v0", FieldType.DOUBLE), cod.get(0)); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /core/src/test/java/com/cloudera/exhibit/core/vector/VectorTest.java: -------------------------------------------------------------------------------- 1 | package com.cloudera.exhibit.core.vector; 2 | 3 | import com.cloudera.exhibit.core.FieldType; 4 | import com.google.common.collect.ImmutableList; 5 | import com.google.common.primitives.Doubles; 6 | import junit.framework.TestCase; 7 | 8 | import java.util.Iterator; 9 | import java.util.List; 10 | 11 | public class VectorTest extends TestCase { 12 | 13 | public void testGetType() throws Exception { 14 | double [] doubles = new double[]{1.0, 2.0, 3.0}; 15 | DoubleVector dv = new DoubleVector(doubles); 16 | assertEquals(FieldType.DOUBLE, dv.getType()); 17 | 18 | List list = ImmutableList.of("A", "B", "C"); 19 | Vector vector = VectorBuilder.build(FieldType.STRING, list); 20 | assertEquals(FieldType.STRING, vector.getType()); 21 | } 22 | 23 | public void testInvalidDoubles() throws Exception { 24 | List list = ImmutableList.of("A", "B", "C"); 25 | try { 26 | VectorBuilder.build(FieldType.DOUBLE, list); 27 | } catch ( IllegalArgumentException ex ){ 28 | // success, exception should be thrown 29 | return; 30 | } 31 | fail("Invalid Doubles were created into a vector"); 32 | } 33 | 34 | public void testGet() throws Exception { 35 | double [] doubles = new double[]{1.0, 2.0, 3.0}; 36 | List doubleList = Doubles.asList(doubles); 37 | Vector dv = VectorBuilder.build(FieldType.DOUBLE, doubleList); 38 | assertEquals(2.0, dv.get(1)); 39 | 40 | List list = ImmutableList.of("A", "B", "C"); 41 | Vector vector = VectorBuilder.build(FieldType.STRING, list); 42 | assertEquals("C", vector.get(2)); 43 | } 44 | 45 | public void testSize() throws Exception { 46 | double [] doubles = new double[]{1.0, 2.0, 3.0}; 47 | DoubleVector dv = new DoubleVector(doubles); 48 | assertEquals(3, dv.size()); 49 | } 50 | 51 | public void testIterator() throws Exception { 52 | double [] doubles = new double[]{1.0, 2.0, 3.0}; 53 | DoubleVector dv = new DoubleVector(doubles); 54 | Iterator it = dv.iterator(); 55 | assertTrue(it.hasNext()); 56 | assertEquals(1.0, it.next()); 57 | 58 | List list = ImmutableList.of("A", "B", "C"); 59 | Vector vector = VectorBuilder.build(FieldType.STRING, list); 60 | Iterator vIt = vector.iterator(); 61 | assertEquals("A", vIt.next()); 62 | } 63 | } -------------------------------------------------------------------------------- /etl/src/main/java/com/cloudera/exhibit/etl/fn/CollectFn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl.fn; 16 | 17 | import com.cloudera.exhibit.core.Calculator; 18 | import com.cloudera.exhibit.core.Exhibit; 19 | import com.cloudera.exhibit.core.Obs; 20 | import com.cloudera.exhibit.core.ObsDescriptor; 21 | import com.cloudera.exhibit.etl.config.FrameConfig; 22 | import org.apache.avro.Schema; 23 | import org.apache.avro.generic.GenericData; 24 | import org.apache.crunch.DoFn; 25 | import org.apache.crunch.Emitter; 26 | 27 | public class CollectFn extends DoFn { 28 | private final FrameConfig frame; 29 | private final String json; 30 | 31 | private transient Calculator calc; 32 | private transient Schema schema; 33 | private boolean initialized; 34 | 35 | public CollectFn(FrameConfig frame, Schema mapsideSchema) { 36 | this.frame = frame; 37 | this.json = mapsideSchema.toString(); 38 | } 39 | 40 | @Override 41 | public void initialize() { 42 | this.calc = frame.getCalculator(); 43 | this.schema = (new Schema.Parser()).parse(json); 44 | this.initialized = false; 45 | } 46 | 47 | @Override 48 | public void process(Exhibit exhibit, Emitter emitter) { 49 | if (!initialized) { 50 | calc.initialize(exhibit.descriptor()); 51 | initialized = true; 52 | } 53 | for (Obs obs : calc.apply(exhibit)) { 54 | GenericData.Record out = new GenericData.Record(schema); 55 | for (ObsDescriptor.Field f : obs.descriptor()) { 56 | out.put(f.name, obs.get(f.name)); 57 | } 58 | emitter.emit(out); 59 | } 60 | } 61 | 62 | @Override 63 | public void cleanup(Emitter emitter) { 64 | calc.cleanup(); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /etl/src/main/java/com/cloudera/exhibit/etl/SchemaUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl; 16 | 17 | import com.google.common.collect.Lists; 18 | import org.apache.avro.Schema; 19 | 20 | import java.util.List; 21 | 22 | public class SchemaUtil { 23 | 24 | public static Schema getOrParse(Schema s, String json) { 25 | if (s == null) { 26 | s = (new Schema.Parser()).parse(json); 27 | } 28 | return s; 29 | } 30 | 31 | public static Schema unwrapNull(Schema s) { 32 | if (s.getType() == Schema.Type.UNION) { 33 | List cmps = s.getTypes(); 34 | if (cmps.size() == 2) { 35 | if (cmps.get(0).getType() == Schema.Type.NULL) { 36 | return cmps.get(1); 37 | } else if (cmps.get(1).getType() == Schema.Type.NULL) { 38 | return cmps.get(0); 39 | } 40 | } 41 | } 42 | return s; 43 | } 44 | 45 | public static Schema unionKeySchema(String name, List schemas) { 46 | Schema wrapper = Schema.createRecord(name, "exhibit", "", false); 47 | Schema unionSchema = Schema.createUnion(schemas); 48 | Schema.Field idx = new Schema.Field("index", Schema.create(Schema.Type.INT), "", null); 49 | Schema.Field key = new Schema.Field("key", unionSchema, "", null); 50 | wrapper.setFields(Lists.newArrayList(idx, key)); 51 | return wrapper; 52 | } 53 | 54 | public static Schema unionValueSchema(String name, List schemas) { 55 | Schema wrapper = Schema.createRecord(name, "exhibit", "", false); 56 | Schema unionSchema = Schema.createUnion(schemas); 57 | Schema.Field sf = new Schema.Field("value", unionSchema, "", null); 58 | wrapper.setFields(Lists.newArrayList(sf)); 59 | return wrapper; 60 | } 61 | 62 | private SchemaUtil() {} 63 | } 64 | -------------------------------------------------------------------------------- /etl/src/main/java/com/cloudera/exhibit/etl/tbl/TblCache.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl.tbl; 16 | 17 | import com.cloudera.exhibit.core.Obs; 18 | import com.cloudera.exhibit.etl.SchemaProvider; 19 | import com.cloudera.exhibit.etl.config.AggConfig; 20 | import com.google.common.collect.Maps; 21 | import org.apache.avro.generic.GenericData; 22 | import org.apache.crunch.Emitter; 23 | import org.apache.crunch.Pair; 24 | 25 | import java.util.Map; 26 | 27 | public class TblCache { 28 | 29 | private final Map cache; 30 | private final AggConfig config; 31 | private final int aggIdx; 32 | private final Emitter>> emitter; 33 | private final SchemaProvider provider; 34 | 35 | public TblCache(final AggConfig config, final int aggIdx, 36 | final Emitter>> emitter, 37 | final SchemaProvider provider) { 38 | this.cache = Maps.newHashMap(); 39 | this.config = config; 40 | this.aggIdx = aggIdx; 41 | this.emitter = emitter; 42 | this.provider = provider; 43 | } 44 | 45 | public void update(GenericData.Record key, Obs obs) { 46 | Tbl tbl = cache.get(key); 47 | if (tbl == null) { 48 | if (cache.size() > config.cacheSize) { 49 | flush(); 50 | } 51 | tbl = config.createTbl(); 52 | tbl.initialize(provider); 53 | cache.put(key, tbl); 54 | } 55 | tbl.add(obs); 56 | } 57 | 58 | public void flush() { 59 | for (Map.Entry e : cache.entrySet()) { 60 | Tbl tbl = e.getValue(); 61 | emitter.emit(Pair.of(e.getKey(), Pair.of(aggIdx, tbl.getValue()))); 62 | } 63 | cache.clear(); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /etl/src/test/java/com/cloudera/exhibit/etl/JSQLTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl; 16 | 17 | import com.cloudera.exhibit.core.*; 18 | import com.cloudera.exhibit.core.composite.UpdatableExhibit; 19 | import com.cloudera.exhibit.core.simple.SimpleExhibit; 20 | import com.cloudera.exhibit.core.simple.SimpleFrame; 21 | import com.cloudera.exhibit.core.simple.SimpleObs; 22 | import com.cloudera.exhibit.core.simple.SimpleObsDescriptor; 23 | import com.cloudera.exhibit.javascript.JSCalculator; 24 | import com.cloudera.exhibit.sql.SQLCalculator; 25 | import com.google.common.collect.ImmutableList; 26 | import com.google.common.collect.ImmutableMap; 27 | import com.google.common.collect.Iterables; 28 | import org.junit.Test; 29 | 30 | import static org.junit.Assert.assertEquals; 31 | 32 | public class JSQLTest { 33 | 34 | @Test 35 | public void testJS2SQL() throws Exception { 36 | JSCalculator jsc = new JSCalculator("[{id: 123}]"); 37 | ObsDescriptor od = SimpleObsDescriptor.builder().doubleField("a").booleanField("b").build(); 38 | Obs obs = SimpleObs.of(od, 1729, true); 39 | Obs one = SimpleObs.of(od, 17, true); 40 | Obs two = SimpleObs.of(od, 12, false); 41 | Frame frame = SimpleFrame.of(one, two); 42 | Exhibit e = new SimpleExhibit(obs, ImmutableMap.of("df", frame)); 43 | jsc.initialize(e.descriptor()); 44 | UpdatableExhibit ue = new UpdatableExhibit(e); 45 | ue.add("jsres", (Frame) jsc.apply(e)); 46 | SQLCalculator sql = SQLCalculator.create(null, "SELECT count(*) suma FROM jsres where id > 0"); 47 | sql.initialize(ue.descriptor()); 48 | Iterable res = sql.apply(ue); 49 | assertEquals( 50 | new SimpleObs(SimpleObsDescriptor.of("suma", FieldType.LONG), ImmutableList.of(1L)), 51 | Iterables.getOnlyElement(res)); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/HiveFrame.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import com.cloudera.exhibit.core.ObsDescriptor; 18 | import com.cloudera.exhibit.core.Frame; 19 | import com.cloudera.exhibit.core.Obs; 20 | import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; 21 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 22 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; 23 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; 24 | 25 | import java.util.Iterator; 26 | 27 | public class HiveFrame extends Frame { 28 | 29 | private final HiveObsDescriptor descriptor; 30 | private final ListObjectInspector listOI; 31 | private Object values; 32 | 33 | public HiveFrame(ListObjectInspector listOI) { 34 | this.listOI = listOI; 35 | ObjectInspector elOI = listOI.getListElementObjectInspector(); 36 | if (elOI instanceof StructObjectInspector) { 37 | this.descriptor = new HiveStructObsDescriptor((StructObjectInspector) elOI); 38 | } else { 39 | this.descriptor = new HivePrimitiveObsDescriptor((PrimitiveObjectInspector) elOI); 40 | } 41 | } 42 | 43 | public HiveFrame updateValues(Object values) { 44 | this.values = values; 45 | return this; 46 | } 47 | 48 | @Override 49 | public ObsDescriptor descriptor() { 50 | return descriptor; 51 | } 52 | 53 | @Override 54 | public int size() { 55 | if (values == null) { 56 | return 0; 57 | } 58 | return listOI.getListLength(values); 59 | } 60 | 61 | @Override 62 | public Obs get(int index) { 63 | return new HiveObs(descriptor, listOI.getListElement(values, index)); 64 | } 65 | 66 | @Override 67 | public Iterator iterator() { 68 | return null; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /server/src/main/java/com/cloudera/exhibit/server/calcs/CalculationStore.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.server.calcs; 16 | 17 | import com.cloudera.exhibit.core.Exhibit; 18 | import com.cloudera.exhibit.core.Obs; 19 | import com.google.common.collect.Lists; 20 | import com.google.common.collect.Maps; 21 | 22 | import java.util.List; 23 | import java.util.Map; 24 | 25 | public class CalculationStore { 26 | 27 | private List calculations; 28 | 29 | public CalculationStore() { 30 | // TODO: remove this 31 | this.calculations = Lists.newArrayList(); 32 | addCalculation("select (sum(yds)/count(distinct gid)) pass_ypg from passes"); 33 | addCalculation("select (sum(yds)/count(distinct gid)) rush_ypg from rushes"); 34 | } 35 | 36 | public synchronized Map> computeKPIs(Exhibit exhibit) { 37 | Map> ret = Maps.newHashMap(); 38 | if (exhibit == null) { 39 | return ret; 40 | } 41 | 42 | for (Calculation calc : calculations) { 43 | Iterable frame = calc.apply(exhibit); 44 | for (Obs obs : frame) { 45 | // TODO: multi row? Real objects, probably? 46 | for (int i = 0; i < obs.descriptor().size(); i++) { 47 | Map base = Maps.newHashMap(); 48 | base.put("id", calc.getId()); 49 | base.put("value", obs.get(i)); 50 | ret.put(obs.descriptor().get(i).name, base); 51 | } 52 | } 53 | } 54 | return ret; 55 | } 56 | 57 | public synchronized void addCalculation(String code) { 58 | int id = calculations.size(); 59 | Calculation c = new Calculation(id, code); 60 | calculations.add(c); 61 | } 62 | 63 | public synchronized String getCode(int id) { 64 | return calculations.get(id).getCode(); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /octave/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | exhibit-parent 7 | com.cloudera.exhibit 8 | 0.8.0 9 | 10 | 4.0.0 11 | 12 | octave 13 | Exhibit Octave 14 | 15 | 16 | 17 | com.google.guava 18 | guava 19 | 20 | 21 | 22 | com.cloudera.exhibit 23 | exhibit-core 24 | 25 | 26 | 27 | dk.ange 28 | javaoctave 29 | 0.6.4-SNAPSHOT 30 | 31 | 32 | 33 | com.cloudera.exhibit 34 | exhibit-avro 35 | test 36 | 37 | 38 | 39 | junit 40 | junit 41 | test 42 | 43 | 44 | 45 | com.btmatthews.hamcrest 46 | hamcrest-matchers 47 | 1.0.0 48 | test 49 | 50 | 51 | 52 | org.hamcrest 53 | hamcrest-library 54 | 1.3 55 | test 56 | 57 | 58 | 59 | 60 | 61 | 62 | org.apache.maven.plugins 63 | maven-compiler-plugin 64 | 65 | 66 | 67 | 68 | 69 | 70 | maven-repository.javaoctave.kenai.com 71 | JavaOctave Maven Repository 72 | https://kenai.com/svn/javaoctave~maven-repository/maven2 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/Obs.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core; 16 | 17 | import com.google.common.collect.ImmutableList; 18 | 19 | import java.io.Serializable; 20 | import java.util.Iterator; 21 | 22 | public abstract class Obs implements Iterable, Serializable { 23 | public abstract ObsDescriptor descriptor(); 24 | 25 | public abstract Object get(int index); 26 | 27 | public Object get(String name) { 28 | return get(descriptor().indexOf(name)); 29 | } 30 | 31 | public T get(String name, Class clazz) { 32 | return clazz.cast(get(name)); 33 | } 34 | 35 | public Iterator iterator() { 36 | return new Iterator() { 37 | int offset = 0; 38 | @Override 39 | public boolean hasNext() { 40 | return offset < descriptor().size(); 41 | } 42 | 43 | @Override 44 | public Object next() { 45 | Object ret = get(offset); 46 | offset++; 47 | return ret; 48 | } 49 | 50 | @Override 51 | public void remove() { 52 | throw new UnsupportedOperationException(); 53 | } 54 | }; 55 | } 56 | public static final Obs EMPTY = new Obs() { 57 | @Override 58 | public Iterator iterator() { 59 | return ImmutableList.of().iterator(); 60 | } 61 | 62 | @Override 63 | public ObsDescriptor descriptor() { 64 | return ObsDescriptor.EMPTY; 65 | } 66 | 67 | @Override 68 | public Object get(int index) { 69 | throw new ArrayIndexOutOfBoundsException("Empty Obs"); 70 | } 71 | }; 72 | 73 | @Override 74 | public String toString() { 75 | StringBuilder sb = new StringBuilder("["); 76 | if (descriptor().size() > 0) { 77 | sb.append(get(0)); 78 | for (int i = 1; i < descriptor().size(); i++) { 79 | sb.append(',').append(get(i)); 80 | } 81 | } 82 | return sb.append(']').toString(); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/ObsDescriptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core; 16 | 17 | import com.google.common.base.Preconditions; 18 | 19 | import java.io.Serializable; 20 | import java.util.AbstractList; 21 | import java.util.Collections; 22 | import java.util.Iterator; 23 | 24 | public abstract class ObsDescriptor extends AbstractList implements Serializable { 25 | 26 | public static class Field implements Serializable { 27 | public final String name; 28 | public final FieldType type; 29 | 30 | public Field(String name, FieldType type) { 31 | this.name = Preconditions.checkNotNull(name); 32 | this.type = Preconditions.checkNotNull(type); 33 | } 34 | 35 | @Override 36 | public int hashCode() { 37 | return name.hashCode() + 17 * type.hashCode(); 38 | } 39 | 40 | @Override 41 | public boolean equals(Object other) { 42 | if (other == null || !(other instanceof Field)) { 43 | return false; 44 | } 45 | Field field = (Field) other; 46 | return name.equals(field.name) && type.equals(field.type); 47 | } 48 | @Override 49 | public String toString() { 50 | return name + ": " + type.toString().toLowerCase(); 51 | } 52 | } 53 | 54 | public abstract int indexOf(String name); 55 | 56 | public static final ObsDescriptor EMPTY = new ObsDescriptor() { 57 | @Override 58 | public Field get(int i) { 59 | throw new ArrayIndexOutOfBoundsException("Empty ObsDescriptor"); 60 | } 61 | 62 | @Override 63 | public int indexOf(String name) { 64 | return -1; 65 | } 66 | 67 | @Override 68 | public int size() { 69 | return 0; 70 | } 71 | 72 | @Override 73 | public Iterator iterator() { 74 | return Collections.emptyIterator(); 75 | } 76 | 77 | @Override 78 | public String toString() { return ""; } 79 | }; 80 | } 81 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/HiveStructObsDescriptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import com.google.common.base.Function; 18 | import com.google.common.collect.Iterators; 19 | import com.google.common.collect.Lists; 20 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 21 | import org.apache.hadoop.hive.serde2.objectinspector.StructField; 22 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; 23 | 24 | import java.util.Iterator; 25 | import java.util.List; 26 | 27 | class HiveStructObsDescriptor extends HiveObsDescriptor { 28 | private final StructObjectInspector obji; 29 | 30 | public HiveStructObsDescriptor(StructObjectInspector obji) { 31 | this.obji = obji; 32 | } 33 | 34 | @Override 35 | public Field get(int i) { 36 | StructField sf = obji.getAllStructFieldRefs().get(i); 37 | return new Field(sf.getFieldName(), HiveUtils.getFieldType(sf.getFieldObjectInspector())); 38 | } 39 | 40 | @Override 41 | public int indexOf(String name) { 42 | return obji.getAllStructFieldRefs().indexOf(obji.getStructFieldRef(name)); 43 | } 44 | 45 | @Override 46 | public int size() { 47 | return obji.getAllStructFieldRefs().size(); 48 | } 49 | 50 | @Override 51 | public Iterator iterator() { 52 | return Iterators.transform(obji.getAllStructFieldRefs().iterator(), new Function() { 53 | @Override 54 | public Field apply(StructField structField) { 55 | return new Field(structField.getFieldName(), HiveUtils.getFieldType(structField.getFieldObjectInspector())); 56 | } 57 | }); 58 | } 59 | 60 | @Override 61 | public Object[] convert(Object rawObs) { 62 | List v = Lists.newArrayListWithExpectedSize(obji.getAllStructFieldRefs().size()); 63 | ObjectInspectorUtils.copyToStandardObject(v, rawObs, obji, ObjectInspectorUtils.ObjectInspectorCopyOption.JAVA); 64 | for (int i = 0; i < v.size(); i++) { 65 | v.set(i, HiveUtils.asJavaType(v.get(i))); 66 | } 67 | return v.toArray(); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /spark/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | com.cloudera.exhibit 7 | exhibit-parent 8 | 0.8.0 9 | 10 | 11 | exhibit-spark 12 | Exhibit for Spark Data Frames 13 | 14 | 15 | 16 | org.apache.spark 17 | spark-core_${scala.base.version} 18 | provided 19 | 20 | 21 | 22 | org.apache.spark 23 | spark-sql_${scala.base.version} 24 | provided 25 | 26 | 27 | 28 | org.apache.avro 29 | avro-mapred 30 | hadoop2 31 | 32 | 33 | 34 | com.cloudera.exhibit 35 | exhibit-core 36 | 37 | 38 | 39 | com.cloudera.exhibit 40 | exhibit-avro 41 | 42 | 43 | 44 | com.cloudera.exhibit 45 | exhibit-javascript 46 | 47 | 48 | 49 | com.cloudera.exhibit 50 | exhibit-sql 51 | 52 | 53 | 54 | junit 55 | junit 56 | test 57 | 58 | 59 | 60 | 61 | 62 | 63 | org.apache.maven.plugins 64 | maven-compiler-plugin 65 | 66 | 67 | net.alchim31.maven 68 | scala-maven-plugin 69 | 70 | 71 | 72 | compile 73 | testCompile 74 | 75 | 76 | 77 | -deprecation 78 | -dependencyfile 79 | ${project.build.directory}/.scala_dependencies 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /etl/src/main/java/com/cloudera/exhibit/etl/config/AggConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl.config; 16 | 17 | import com.cloudera.exhibit.core.Calculator; 18 | import com.cloudera.exhibit.core.ExhibitDescriptor; 19 | import com.cloudera.exhibit.core.ObsDescriptor; 20 | import com.cloudera.exhibit.etl.tbl.SumTbl; 21 | import com.cloudera.exhibit.etl.tbl.Tbl; 22 | import com.cloudera.exhibit.etl.tbl.TblType; 23 | import com.google.common.collect.Lists; 24 | import com.google.common.collect.Maps; 25 | 26 | import java.io.Serializable; 27 | import java.util.List; 28 | import java.util.Map; 29 | 30 | public class AggConfig implements Serializable { 31 | 32 | // The type of aggregation that will be performed on the computed records (SUM, PERCENTILE, SUM_TOP, TOP) 33 | public TblType type = TblType.SUM; 34 | 35 | // Table-specific options (e.g., which fields to sort on for the TOP tbl) 36 | public Map options = Maps.newHashMap(); 37 | 38 | // The frame used to generate the output keys and values 39 | public FrameConfig frame = null; 40 | 41 | // The names of the grouping keys for the computed frame if they are different from the 42 | // names of the keys for the parent {@link OutputConfig}. 43 | public List keys = Lists.newArrayList(); 44 | 45 | // A mapping from the names of fields in the computed frame to their names in the output 46 | // aggregation (to allow for columns to be renamed) 47 | public Map values = Maps.newHashMap(); 48 | 49 | // The maximum number of keys whose aggregate values should be cached in memory. 50 | public long cacheSize = 5000; 51 | 52 | public Calculator getCalculator() { 53 | if (frame == null) { 54 | throw new IllegalStateException("Invalid AggConfig: no frame specified"); 55 | } 56 | return frame.getCalculator(); 57 | } 58 | 59 | public ObsDescriptor getFrameDescriptor(ExhibitDescriptor ed) { 60 | if (frame != null) { 61 | Calculator c = frame.getCalculator(); 62 | return c.initialize(ed); 63 | } 64 | throw new IllegalStateException("Invalid AggConfig: no frame specified"); 65 | } 66 | 67 | public Tbl createTbl() { 68 | //TODO: force validate values before this point 69 | return type.create(values, options); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /mongodb/src/main/java/com/cloudera/exhibit/mongodb/BSONObsDescriptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.mongodb; 16 | 17 | import com.cloudera.exhibit.core.FieldType; 18 | import com.cloudera.exhibit.core.ObsDescriptor; 19 | import com.google.common.collect.ImmutableMap; 20 | import com.google.common.collect.Lists; 21 | import com.google.common.collect.UnmodifiableIterator; 22 | 23 | import java.util.Iterator; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | public class BSONObsDescriptor extends ObsDescriptor { 28 | 29 | private final List names; 30 | private final List fieldTypes; 31 | private final List columns; 32 | 33 | public BSONObsDescriptor(List names, List fieldTypes) { 34 | this(names, fieldTypes, ImmutableMap.of()); 35 | } 36 | 37 | public BSONObsDescriptor(List names, List fieldTypes, Map mappings) { 38 | this.names = names; 39 | this.fieldTypes = fieldTypes; 40 | this.columns = getColumns(names, mappings); 41 | } 42 | 43 | private static List getColumns(List names, final Map mappings) { 44 | List ret = Lists.newArrayListWithExpectedSize(names.size()); 45 | for (int i = 0; i < names.size(); i++) { 46 | String key = names.get(i); 47 | String col = mappings.get(key); 48 | ret.add(col == null ? key : col); 49 | } 50 | return ret; 51 | } 52 | 53 | @Override 54 | public Field get(int i) { 55 | return new Field(names.get(i), fieldTypes.get(i)); 56 | } 57 | 58 | String getBSONColumn(int i) { 59 | return columns.get(i); 60 | } 61 | 62 | @Override 63 | public int indexOf(String name) { 64 | return names.indexOf(name); 65 | } 66 | 67 | @Override 68 | public int size() { 69 | return names.size(); 70 | } 71 | 72 | @Override 73 | public Iterator iterator() { 74 | return new UnmodifiableIterator() { 75 | private int index = 0; 76 | @Override 77 | public boolean hasNext() { 78 | return index < size(); 79 | } 80 | 81 | @Override 82 | public Field next() { 83 | Field f = get(index); 84 | index++; 85 | return f; 86 | } 87 | }; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/HiveEnumerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import com.google.common.collect.Lists; 18 | import org.apache.calcite.linq4j.Enumerator; 19 | import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; 20 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 21 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 22 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; 23 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; 24 | 25 | import java.util.List; 26 | 27 | class HiveEnumerator implements Enumerator { 28 | 29 | private final Object obj; 30 | private final ListObjectInspector listOI; 31 | private final ObjectInspector elOI; 32 | private final int size; 33 | private int currentIndex = -1; 34 | private Object currentValue; 35 | 36 | public HiveEnumerator(Object obj, ListObjectInspector listOI) { 37 | this.obj = obj; 38 | this.listOI = listOI; 39 | this.elOI = listOI.getListElementObjectInspector(); 40 | this.size = listOI.getListLength(obj); 41 | this.currentValue = null; 42 | } 43 | 44 | @Override 45 | public Object current() { 46 | return currentValue; 47 | } 48 | 49 | @Override 50 | public boolean moveNext() { 51 | currentIndex++; 52 | boolean hasNext = currentIndex < size; 53 | if (hasNext) { 54 | updateValues(); 55 | } 56 | return hasNext; 57 | } 58 | 59 | @Override 60 | public void reset() { 61 | currentIndex = -1; 62 | } 63 | 64 | @Override 65 | public void close() { 66 | } 67 | 68 | private void updateValues() { 69 | Object row = listOI.getListElement(obj, currentIndex); 70 | if (elOI.getCategory() == ObjectInspector.Category.PRIMITIVE) { 71 | currentValue = ((PrimitiveObjectInspector) elOI).getPrimitiveJavaObject(row); 72 | currentValue = HiveUtils.asJavaType(currentValue); 73 | } else { 74 | List v = Lists.newArrayList(); 75 | ObjectInspectorUtils.copyToStandardObject(v, row, (StructObjectInspector) elOI, 76 | ObjectInspectorUtils.ObjectInspectorCopyOption.JAVA); 77 | for (int i = 0; i < v.size(); i++) { 78 | v.set(i, HiveUtils.asJavaType(v.get(i))); 79 | } 80 | currentValue = v.toArray(); 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/WithinUDTF.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import com.cloudera.exhibit.core.Calculator; 18 | import com.cloudera.exhibit.core.Exhibit; 19 | import com.cloudera.exhibit.core.Obs; 20 | import com.cloudera.exhibit.core.ObsDescriptor; 21 | import org.apache.hadoop.hive.ql.exec.Description; 22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; 24 | import org.apache.hadoop.hive.ql.metadata.HiveException; 25 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; 26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 27 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; 28 | 29 | @Description(name = "within_table", 30 | value = "_FUNC_(query_str, ...) - Yo dawg, I heard you liked SQL. So we put SQL in your SQL, so you can " + 31 | "query while you query.") 32 | public class WithinUDTF extends GenericUDTF { 33 | 34 | private Calculator calculator; 35 | private ObjectInspector[] inspectors; 36 | 37 | private transient Exhibit exhibit; 38 | private transient Object[] results; 39 | 40 | public WithinUDTF() { 41 | } 42 | 43 | @Override 44 | public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException { 45 | if (args.length <= 1) { 46 | throw new UDFArgumentLengthException("The 'within' function takes at least two arguments"); 47 | } 48 | 49 | this.inspectors = args; 50 | this.calculator = HiveUtils.getCalculator(args[0]); 51 | this.exhibit = HiveUtils.getExhibit(args); 52 | ObsDescriptor od = calculator.initialize(exhibit.descriptor()); 53 | 54 | this.results = new Object[od.size()]; 55 | return (StructObjectInspector) HiveUtils.fromDescriptor(od, true); 56 | } 57 | 58 | @Override 59 | public void process(Object[] args) throws HiveException { 60 | HiveUtils.update(exhibit, inspectors, args); 61 | Iterable res = calculator.apply(exhibit); 62 | for (Obs obs : res) { 63 | for (int i = 0; i < results.length; i++) { 64 | results[i] = HiveUtils.asHiveType(obs.get(i)); 65 | } 66 | forward(results); 67 | } 68 | } 69 | 70 | @Override 71 | public void close() throws HiveException { 72 | calculator.cleanup(); 73 | calculator = null; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/WithinUDF.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import com.cloudera.exhibit.core.Calculator; 18 | import com.cloudera.exhibit.core.Exhibit; 19 | import com.cloudera.exhibit.core.Obs; 20 | import com.cloudera.exhibit.core.ObsDescriptor; 21 | import com.google.common.base.Joiner; 22 | import com.google.common.collect.Iterables; 23 | import com.google.common.collect.Lists; 24 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 25 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; 26 | import org.apache.hadoop.hive.ql.metadata.HiveException; 27 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 29 | 30 | import java.util.List; 31 | 32 | public class WithinUDF extends GenericUDF { 33 | 34 | private Calculator calculator; 35 | private ObjectInspector[] inspectors; 36 | 37 | private transient Exhibit exhibit; 38 | 39 | public WithinUDF() { 40 | } 41 | 42 | @Override 43 | public ObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException { 44 | if (args.length <= 1) { 45 | throw new UDFArgumentLengthException("The 'within' function takes at least two arguments"); 46 | } 47 | this.inspectors = args; 48 | this.calculator = HiveUtils.getCalculator(args[0]); 49 | this.exhibit = HiveUtils.getExhibit(args); 50 | ObsDescriptor od = calculator.initialize(exhibit.descriptor()); 51 | return HiveUtils.fromDescriptor(od, false); 52 | } 53 | 54 | @Override 55 | public Object evaluate(DeferredObject[] args) throws HiveException { 56 | HiveUtils.update(exhibit, inspectors, args); 57 | return getResult(Iterables.getOnlyElement(calculator.apply(exhibit))); 58 | } 59 | 60 | private Object getResult(Obs obs) { 61 | if (obs.descriptor().size() == 1) { 62 | return HiveUtils.asHiveType(obs.get(0)); 63 | } else { 64 | List values = Lists.newArrayListWithExpectedSize(obs.descriptor().size()); 65 | for (int i = 0; i < obs.descriptor().size(); i++) { 66 | values.add(HiveUtils.asHiveType(obs.get(i))); 67 | } 68 | return values; 69 | } 70 | } 71 | 72 | @Override 73 | public String getDisplayString(String[] args) { 74 | assert (args.length > 1); 75 | return "within(" + Joiner.on(',').join(args) + ")"; 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /octave/src/main/java/com/cloudera/exhibit/octave/OctaveCalculator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.octave; 16 | 17 | import java.io.Serializable; 18 | import java.util.Set; 19 | 20 | import com.cloudera.exhibit.core.Calculator; 21 | import com.cloudera.exhibit.core.Exhibit; 22 | import com.cloudera.exhibit.core.ExhibitDescriptor; 23 | import com.cloudera.exhibit.core.Obs; 24 | import com.cloudera.exhibit.core.ObsDescriptor; 25 | import com.cloudera.exhibit.core.vector.VectorUtils; 26 | import com.google.common.collect.ImmutableList; 27 | import com.google.common.collect.Iterables; 28 | import com.google.common.collect.Sets; 29 | 30 | public class OctaveCalculator implements Calculator, Serializable{ 31 | private OctaveFunctor functor; 32 | private String varName; 33 | 34 | OctaveCalculator(String script) throws OctaveScriptFormatException { 35 | this(null, script); 36 | } 37 | 38 | OctaveCalculator(ExhibitDescriptor descriptor, String script) throws OctaveScriptFormatException { 39 | this.functor = new OctaveFunctor(descriptor, script); 40 | } 41 | 42 | @Override 43 | public ObsDescriptor initialize(ExhibitDescriptor input) { 44 | ExhibitDescriptor res = functor.initialize(input); 45 | if (!res.attributes().isEmpty()) { 46 | return res.attributes(); 47 | } else if (!res.vectors().isEmpty()) { 48 | this.varName = Iterables.getOnlyElement(res.vectors().keySet()); 49 | return VectorUtils.asObsDescriptor(varName, res.vectors().get(varName)); 50 | } else if (!res.frames().isEmpty()) { 51 | this.varName = Iterables.getOnlyElement(res.frames().keySet()); 52 | return res.frames().get(varName); 53 | } 54 | throw new IllegalArgumentException("Could not determine return variable for octave calculation"); 55 | } 56 | 57 | @Override 58 | public void cleanup() { 59 | functor.cleanup(); 60 | } 61 | 62 | @Override 63 | public Iterable apply(Exhibit input) { 64 | return extract(functor.apply(input)); 65 | } 66 | 67 | 68 | private Iterable extract(Exhibit result) { 69 | if (result.frames().containsKey(varName)) { 70 | return result.frames().get(varName); 71 | } else if (result.vectors().containsKey(varName)) { 72 | return VectorUtils.asFrame(varName, result.vectors().get(varName)); 73 | } else { 74 | return ImmutableList.of(result.attributes()); 75 | } 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /etl/src/main/java/com/cloudera/exhibit/etl/config/BuildConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl.config; 16 | 17 | import com.google.common.collect.Lists; 18 | import org.apache.avro.Schema; 19 | import org.apache.crunch.Target; 20 | import org.apache.crunch.types.PType; 21 | import org.apache.crunch.types.avro.Avros; 22 | 23 | import java.util.List; 24 | 25 | public class BuildConfig { 26 | public long sleepTimeMsec = 30000L; 27 | 28 | public static enum KeyType { 29 | STRING { 30 | @Override 31 | public PType getPType() { 32 | return Avros.strings(); 33 | } 34 | 35 | @Override 36 | public Schema getSchema() { 37 | return Schema.create(Schema.Type.STRING); 38 | } 39 | 40 | @Override 41 | public Object parse(String stringKey) { 42 | return stringKey; 43 | } 44 | }, 45 | 46 | INT { 47 | @Override 48 | public PType getPType() { 49 | return Avros.ints(); 50 | } 51 | 52 | @Override 53 | public Schema getSchema() { 54 | return Schema.create(Schema.Type.INT); 55 | } 56 | 57 | @Override 58 | public Object parse(String stringKey) { 59 | return Integer.valueOf(stringKey); 60 | } 61 | }, 62 | 63 | LONG { 64 | @Override 65 | public PType getPType() { 66 | return Avros.longs(); 67 | } 68 | 69 | @Override 70 | public Schema getSchema() { 71 | return Schema.create(Schema.Type.LONG); 72 | } 73 | 74 | @Override 75 | public Object parse(String stringKey) { 76 | return Long.valueOf(stringKey); 77 | } 78 | }; 79 | 80 | public abstract PType getPType(); 81 | 82 | public abstract Schema getSchema(); 83 | 84 | public abstract Object parse(String stringKey); 85 | } 86 | 87 | public String uri; 88 | 89 | public String format = "avro"; 90 | 91 | public String compress = "uncompressed"; 92 | 93 | public String name; 94 | 95 | public String keyField; 96 | 97 | public KeyType keyType; 98 | 99 | public Target.WriteMode writeMode = Target.WriteMode.OVERWRITE; 100 | 101 | public int parallelism = -1; 102 | 103 | public List sources = Lists.newArrayList(); 104 | 105 | public ComputeConfig compute = null; 106 | } 107 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/ExhibitDescriptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core; 16 | 17 | import com.google.common.collect.ImmutableMap; 18 | 19 | import java.io.Serializable; 20 | import java.util.Collections; 21 | import java.util.Map; 22 | 23 | public class ExhibitDescriptor implements Serializable { 24 | 25 | private final ObsDescriptor attributes; 26 | private final Map frames; 27 | private final Map vectors; 28 | 29 | public static ExhibitDescriptor of(String name, ObsDescriptor frame) { 30 | return new ExhibitDescriptor(ObsDescriptor.EMPTY 31 | , ImmutableMap.of(name, frame) 32 | , Collections.emptyMap()); 33 | } 34 | 35 | public ExhibitDescriptor(ObsDescriptor attributes, Map frames, Map vectors) { 36 | this.attributes = attributes; 37 | this.frames = frames; 38 | this.vectors = vectors; 39 | } 40 | 41 | public ObsDescriptor attributes() { 42 | return attributes; 43 | } 44 | 45 | public Map frames() { 46 | return frames; 47 | } 48 | 49 | public Map vectors() { 50 | return vectors; 51 | } 52 | 53 | public String toString() { 54 | StringBuilder sb = new StringBuilder(); 55 | sb.append("Attributes:\n"); 56 | toStringHelper(sb, 2, attributes); 57 | sb.append("Frames:\n"); 58 | for (Map.Entry e : frames.entrySet()) { 59 | sb.append(" ").append(e.getKey()).append("\n"); 60 | toStringHelper(sb, 4, e.getValue()); 61 | } 62 | sb.append("Vectors:\n"); 63 | for (Map.Entry e : vectors.entrySet()) { 64 | sb.append(" ").append(e.getKey()).append("\n"); 65 | toStringHelper(sb, 4, e.getKey(), e.getValue()); 66 | } 67 | return sb.toString(); 68 | } 69 | 70 | private static void toStringHelper(StringBuilder sb, int indent, String name, FieldType type) { 71 | for (int j = 0; j < indent; j++) { 72 | sb.append(' '); 73 | } 74 | sb.append(name).append(": ").append(type).append("\n"); 75 | } 76 | 77 | private static void toStringHelper(StringBuilder sb, int indent, ObsDescriptor desc) { 78 | for (int i = 0; i < desc.size(); i++) { 79 | ObsDescriptor.Field f = desc.get(i); 80 | toStringHelper(sb, indent, f.name, f.type); 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/composite/UpdatableExhibit.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core.composite; 16 | 17 | import com.cloudera.exhibit.core.Exhibit; 18 | import com.cloudera.exhibit.core.ExhibitDescriptor; 19 | import com.cloudera.exhibit.core.Frame; 20 | import com.cloudera.exhibit.core.Obs; 21 | import com.cloudera.exhibit.core.Vec; 22 | import com.cloudera.exhibit.core.vector.Vector; 23 | import com.google.common.collect.Maps; 24 | 25 | import java.util.Map; 26 | 27 | public class UpdatableExhibit implements Exhibit { 28 | 29 | private final Exhibit base; 30 | private final Map frames; 31 | private final Map vectors; 32 | private UpdatableExhibitDescriptor descriptor; 33 | 34 | public UpdatableExhibit(Exhibit base) { 35 | this.base = base; 36 | this.frames = Maps.newHashMap(); 37 | this.vectors = Maps.newHashMap(); 38 | this.descriptor = new UpdatableExhibitDescriptor(base.descriptor()); 39 | } 40 | 41 | public UpdatableExhibit add(String name, Vector vector) { 42 | this.vectors.put(name, vector); 43 | this.descriptor.add(name, vector.getType()); 44 | return this; 45 | } 46 | public UpdatableExhibit add(String name, Frame frame) { 47 | this.frames.put(name, frame); 48 | this.descriptor.add(name, frame.descriptor()); 49 | return this; 50 | } 51 | 52 | public UpdatableExhibit addAllVectors(Map vectors) { 53 | for (Map.Entry e : vectors.entrySet()) { 54 | add(e.getKey(), e.getValue()); 55 | } 56 | return this; 57 | } 58 | 59 | public UpdatableExhibit addAllFrames(Map frames) { 60 | for (Map.Entry e : frames.entrySet()) { 61 | add(e.getKey(), e.getValue()); 62 | } 63 | return this; 64 | } 65 | 66 | @Override 67 | public ExhibitDescriptor descriptor() { 68 | return descriptor; 69 | } 70 | 71 | @Override 72 | public Obs attributes() { 73 | return base.attributes(); 74 | } 75 | 76 | @Override 77 | public Map frames() { 78 | Map union = Maps.newHashMap(); 79 | union.putAll(base.frames()); 80 | union.putAll(frames); 81 | return union; 82 | } 83 | 84 | @Override 85 | public Map vectors() { 86 | Map union = Maps.newHashMap(); 87 | union.putAll(base.vectors()); 88 | union.putAll(vectors); 89 | return union; 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/CodeUDF.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import com.google.common.collect.Lists; 18 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 19 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; 20 | import org.apache.hadoop.hive.ql.metadata.HiveException; 21 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 22 | import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; 23 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 24 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; 25 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 26 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 27 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; 28 | 29 | import java.util.List; 30 | 31 | public abstract class CodeUDF extends GenericUDF { 32 | 33 | private String engine; 34 | 35 | protected CodeUDF(String engine) { 36 | this.engine = engine; 37 | } 38 | 39 | @Override 40 | public ObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException { 41 | if (args.length != 1) { 42 | throw new UDFArgumentLengthException("Code UDFs take exactly one argument"); 43 | } 44 | ObjectInspector codeOI = args[0]; 45 | if (!ObjectInspectorUtils.isConstantObjectInspector(codeOI)) { 46 | throw new UDFArgumentException("Code argument must be a constant value"); 47 | } 48 | String code; 49 | Object codeValue = ObjectInspectorUtils.getWritableConstantValue(codeOI); 50 | if (codeOI instanceof StringObjectInspector) { 51 | code = codeValue.toString(); 52 | } else { 53 | ListObjectInspector lcoi = (ListObjectInspector) codeOI; 54 | StringBuilder sb = new StringBuilder(); 55 | for (int i = 0; i < lcoi.getListLength(codeValue); i++) { 56 | sb.append(lcoi.getListElement(codeValue, i)).append('\n'); 57 | } 58 | code = sb.toString(); 59 | } 60 | List ret = Lists.newArrayList(engine, code); 61 | return ObjectInspectorFactory.getStandardConstantListObjectInspector( 62 | PrimitiveObjectInspectorFactory.javaStringObjectInspector, ret); 63 | } 64 | 65 | @Override 66 | public Object evaluate(DeferredObject[] args) throws HiveException { 67 | return Lists.newArrayList(engine, args[0].get()); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /etl/src/test/java/com/cloudera/exhibit/etl/tbl/RatioTblTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl.tbl; 16 | 17 | import com.cloudera.exhibit.avro.AvroObs; 18 | import com.cloudera.exhibit.avro.AvroObsDescriptor; 19 | import com.cloudera.exhibit.core.ObsDescriptor; 20 | import com.cloudera.exhibit.etl.SchemaProvider; 21 | import com.google.common.collect.Maps; 22 | import org.apache.avro.Schema; 23 | import org.apache.avro.SchemaBuilder; 24 | import org.apache.avro.generic.GenericData; 25 | import org.junit.Before; 26 | import org.junit.Test; 27 | 28 | import java.util.Map; 29 | 30 | import static org.junit.Assert.assertEquals; 31 | 32 | public class RatioTblTest { 33 | private Schema schema = SchemaBuilder.record("test").fields() 34 | .optionalDouble("a") 35 | .optionalInt("b") 36 | .endRecord(); 37 | private Map values = Maps.newHashMap(); 38 | 39 | @Before 40 | public void setUp() throws Exception { 41 | 42 | values.put("out", "out"); 43 | } 44 | 45 | @Test 46 | public void testRatio() { 47 | GenericData.Record one = new GenericData.Record(schema); 48 | one.put("a", 10.0); 49 | one.put("b", 100); 50 | GenericData.Record two = new GenericData.Record(schema); 51 | two.put("a", 40.0); 52 | two.put("b", 100); 53 | 54 | Map opts = Maps.newHashMap(); 55 | opts.put("numerator", "a"); 56 | opts.put("denominator", "b"); 57 | RatioTbl rt = new RatioTbl(values, opts); 58 | ObsDescriptor od = new AvroObsDescriptor(schema); 59 | 60 | SchemaProvider sp = rt.getSchemas(od, 0, 0); 61 | rt.initialize(sp); 62 | GenericData.Record r0 = rt.getValue(); 63 | assertEquals(0.0, (Double) r0.get("ratio"), 1e-6); 64 | assertEquals(0.0, (Double) r0.get("denominator"), 1e-6); 65 | 66 | rt.add(new AvroObs(od, one)); 67 | GenericData.Record r1 = rt.getValue(); 68 | assertEquals(0.10, (Double) r1.get("ratio"), 1e-6); 69 | assertEquals(100.0, (Double) r1.get("denominator"), 1e-6); 70 | 71 | rt.add(new AvroObs(od, two)); 72 | GenericData.Record r2 = rt.getValue(); 73 | assertEquals(0.25, (Double) r2.get("ratio"), 1e-6); 74 | assertEquals(200.0, (Double) r2.get("denominator"), 1e-6); 75 | 76 | GenericData.Record m = rt.merge(r1, r2); 77 | assertEquals(0.2, (Double) m.get("ratio"), 1e-6); 78 | assertEquals(300.0, (Double) m.get("denominator"), 1e-6); 79 | 80 | GenericData.Record f = rt.finalize(m).get(0); 81 | assertEquals(0.2, (Double) f.get("out"), 1e-6); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/ArrayUnionUDF.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import com.google.common.base.Joiner; 18 | import com.google.common.collect.Lists; 19 | import com.google.common.collect.Sets; 20 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 21 | import org.apache.hadoop.hive.ql.metadata.HiveException; 22 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 23 | import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; 24 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 25 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; 26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 27 | 28 | import java.util.List; 29 | import java.util.Set; 30 | 31 | public class ArrayUnionUDF extends GenericUDF { 32 | 33 | private List argOIs; 34 | 35 | @Override 36 | public ObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException { 37 | if (args.length < 2) { 38 | throw new UDFArgumentException("Expecting at least two arguments to array_union"); 39 | } 40 | this.argOIs = Lists.newArrayListWithExpectedSize(args.length); 41 | ObjectInspector elemOI = null; 42 | for (ObjectInspector oi : args) { 43 | ListObjectInspector loi = (ListObjectInspector) oi; 44 | argOIs.add(loi); 45 | ObjectInspector eoi = ObjectInspectorUtils.getStandardObjectInspector(loi.getListElementObjectInspector()); 46 | if (elemOI == null) { 47 | elemOI = eoi; 48 | } else if (!elemOI.equals(eoi)) { 49 | throw new UDFArgumentException("Array elements must all be of the same type"); 50 | } 51 | } 52 | return ObjectInspectorFactory.getStandardListObjectInspector(elemOI); 53 | } 54 | 55 | @Override 56 | public Object evaluate(DeferredObject[] args) throws HiveException { 57 | Set distinct = Sets.newHashSet(); 58 | for (int i = 0; i < args.length; i++) { 59 | ListObjectInspector loi = argOIs.get(i); 60 | List list = loi.getList(args[i].get()); 61 | for (int j = 0; j < list.size(); j++) { 62 | distinct.add(ObjectInspectorUtils.copyToStandardObject(list.get(j), loi.getListElementObjectInspector())); 63 | } 64 | } 65 | return Lists.newArrayList(distinct); 66 | } 67 | 68 | @Override 69 | public String getDisplayString(String[] args) { 70 | assert (args.length > 1); 71 | return "array_union(" + Joiner.on(',').join(args) + ")"; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /hive/src/main/java/com/cloudera/exhibit/hive/WithinArrayUDF.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.hive; 16 | 17 | import com.cloudera.exhibit.core.Calculator; 18 | import com.cloudera.exhibit.core.Exhibit; 19 | import com.cloudera.exhibit.core.Obs; 20 | import com.cloudera.exhibit.core.ObsDescriptor; 21 | import com.google.common.base.Joiner; 22 | import com.google.common.collect.Lists; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 24 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; 25 | import org.apache.hadoop.hive.ql.metadata.HiveException; 26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; 29 | 30 | import java.util.List; 31 | 32 | public class WithinArrayUDF extends GenericUDF { 33 | private Calculator calculator; 34 | private ObjectInspector[] inspectors; 35 | private transient Exhibit exhibit; 36 | 37 | public WithinArrayUDF() { 38 | } 39 | 40 | @Override 41 | public ObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException { 42 | if (args.length <= 1) { 43 | throw new UDFArgumentLengthException("The 'within_array' function takes at least two arguments"); 44 | } 45 | 46 | this.inspectors = args; 47 | this.calculator = HiveUtils.getCalculator(args[0]); 48 | this.exhibit = HiveUtils.getExhibit(args); 49 | ObsDescriptor od = calculator.initialize(exhibit.descriptor()); 50 | return ObjectInspectorFactory.getStandardListObjectInspector(HiveUtils.fromDescriptor(od, false)); 51 | } 52 | 53 | @Override 54 | public Object evaluate(GenericUDF.DeferredObject[] args) throws HiveException { 55 | HiveUtils.update(exhibit, inspectors, args); 56 | List results = Lists.newArrayList(); 57 | for (Obs obs : calculator.apply(exhibit)) { 58 | results.add(getResult(obs)); 59 | } 60 | return results; 61 | } 62 | 63 | private Object getResult(Obs obs) { 64 | if (obs.descriptor().size() == 1) { 65 | return HiveUtils.asHiveType(obs.get(0)); 66 | } else { 67 | List values = Lists.newArrayListWithExpectedSize(obs.descriptor().size()); 68 | for (int i = 0; i < obs.descriptor().size(); i++) { 69 | values.add(HiveUtils.asHiveType(obs.get(i))); 70 | } 71 | return values; 72 | } 73 | } 74 | 75 | @Override 76 | public String getDisplayString(String[] args) { 77 | assert (args.length > 1); 78 | return "within_array(" + Joiner.on(',').join(args) + ")"; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /avro/src/main/java/com/cloudera/exhibit/avro/AvroObsDescriptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.avro; 16 | 17 | import com.cloudera.exhibit.core.FieldType; 18 | import com.cloudera.exhibit.core.ObsDescriptor; 19 | import com.google.common.collect.Iterators; 20 | import org.apache.avro.Schema; 21 | 22 | import java.io.IOException; 23 | import java.util.List; 24 | 25 | public class AvroObsDescriptor extends ObsDescriptor { 26 | 27 | private static Schema NULL = Schema.create(Schema.Type.NULL); 28 | 29 | private Schema schema; 30 | 31 | public AvroObsDescriptor(Schema schema) { 32 | this.schema = unwrap(schema); 33 | } 34 | 35 | Schema schema() { 36 | return schema; 37 | } 38 | 39 | @Override 40 | public Field get(int index) { 41 | Schema.Field f = schema.getFields().get(index); 42 | return new Field(f.name(), getFieldType(f.schema())); 43 | } 44 | 45 | @Override 46 | public int indexOf(String name) { 47 | Schema.Field f = schema.getField(name); 48 | return f == null ? -1 : f.pos(); 49 | } 50 | 51 | static Schema unwrap(Schema s) { 52 | if (s.getType() == Schema.Type.UNION) { 53 | List ut = s.getTypes(); 54 | if (NULL.equals(ut.get(0))) { 55 | return unwrap(ut.get(1)); 56 | } else if (NULL.equals(ut.get(1))) { 57 | return unwrap(ut.get(0)); 58 | } 59 | } 60 | return s; 61 | } 62 | 63 | static FieldType getFieldType(Schema s) { 64 | s = unwrap(s); 65 | switch (s.getType()) { 66 | case BOOLEAN: 67 | return FieldType.BOOLEAN; 68 | case INT: 69 | return FieldType.INTEGER; 70 | case LONG: 71 | return FieldType.LONG; 72 | case FLOAT: 73 | return FieldType.FLOAT; 74 | case DOUBLE: 75 | return FieldType.DOUBLE; 76 | case STRING: 77 | return FieldType.STRING; 78 | default: 79 | System.err.println("Unknown schema type = " + s); // TODO: remove 80 | return null; 81 | } 82 | } 83 | 84 | @Override 85 | public String toString() { 86 | return Iterators.toString(iterator()); 87 | } 88 | 89 | @Override 90 | public int size() { 91 | return schema.getFields().size(); 92 | } 93 | 94 | private void writeObject(java.io.ObjectOutputStream out) throws IOException { 95 | out.writeUTF(schema.toString()); 96 | } 97 | 98 | private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { 99 | schema = (new Schema.Parser()).parse(in.readUTF()); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /core/src/main/java/com/cloudera/exhibit/core/composite/CompositeObsDescriptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.core.composite; 16 | 17 | import com.cloudera.exhibit.core.ObsDescriptor; 18 | import com.google.common.base.Function; 19 | import com.google.common.collect.Iterators; 20 | import com.google.common.collect.Lists; 21 | import com.google.common.collect.Maps; 22 | 23 | import java.util.Arrays; 24 | import java.util.Iterator; 25 | import java.util.List; 26 | import java.util.Map; 27 | 28 | public class CompositeObsDescriptor extends ObsDescriptor { 29 | 30 | private List components; 31 | private Map fieldNames; 32 | private int[] offsets; 33 | 34 | public CompositeObsDescriptor(List components) { 35 | this.components = components; 36 | this.fieldNames = Maps.newHashMap(); 37 | this.offsets = new int[components.size() + 1]; 38 | int idx = 0; 39 | for (int i = 1; i < offsets.length; i++) { 40 | ObsDescriptor descriptor = components.get(i - 1); 41 | offsets[i] = offsets[i - 1] + descriptor.size(); 42 | for (Field f : descriptor) { 43 | if (fieldNames.containsKey(f.name)) { 44 | throw new IllegalStateException("Duplicate field name in composite obs: " + f.name); 45 | } 46 | fieldNames.put(f.name, idx); 47 | idx++; 48 | } 49 | } 50 | } 51 | 52 | public int getOffsetIndex(int index) { 53 | int offset = Arrays.binarySearch(offsets, index); 54 | if (offset < 0) { 55 | offset = -offset - 2; 56 | while (offset < offsets.length -1 && offsets[offset] == offsets[offset + 1]) { 57 | offset++; 58 | } 59 | } 60 | return offset; 61 | } 62 | 63 | public int getOffset(int offsetIndex) { 64 | return offsets[offsetIndex]; 65 | } 66 | 67 | @Override 68 | public Field get(int i) { 69 | int offsetIndex = getOffsetIndex(i); 70 | int compIdx = i - offsets[offsetIndex]; 71 | return components.get(offsetIndex).get(compIdx); 72 | } 73 | 74 | @Override 75 | public int indexOf(String name) { 76 | Integer ret = fieldNames.get(name); 77 | return ret == null ? -1 : ret; 78 | } 79 | 80 | @Override 81 | public int size() { 82 | return offsets[offsets.length - 1]; 83 | } 84 | 85 | @Override 86 | public Iterator iterator() { 87 | return Iterators.concat(Lists.transform(components, new Function>() { 88 | @Override 89 | public Iterator apply(ObsDescriptor descriptor) { 90 | return descriptor.iterator(); 91 | } 92 | }).iterator()); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /etl/src/test/java/com/cloudera/exhibit/etl/tbl/SumTopTblTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved. 3 | * 4 | * Cloudera, Inc. licenses this file to you under the Apache License, 5 | * Version 2.0 (the "License"). You may not use this file except in 6 | * compliance with the License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for 12 | * the specific language governing permissions and limitations under the 13 | * License. 14 | */ 15 | package com.cloudera.exhibit.etl.tbl; 16 | 17 | import com.cloudera.exhibit.avro.AvroObsDescriptor; 18 | import com.cloudera.exhibit.core.ObsDescriptor; 19 | import com.cloudera.exhibit.etl.SchemaProvider; 20 | import com.google.common.collect.Maps; 21 | import org.apache.avro.Schema; 22 | import org.apache.avro.SchemaBuilder; 23 | import org.apache.avro.generic.GenericData; 24 | import org.junit.Before; 25 | import org.junit.Test; 26 | 27 | import java.util.List; 28 | import java.util.Map; 29 | 30 | import static org.junit.Assert.assertEquals; 31 | 32 | public class SumTopTblTest { 33 | 34 | private Schema schema = SchemaBuilder.record("test").fields() 35 | .optionalString("key") 36 | .optionalDouble("a") 37 | .optionalInt("b") 38 | .endRecord(); 39 | private Map cv = Maps.newHashMap(); 40 | private Map values = Maps.newHashMap(); 41 | 42 | @Before 43 | public void setUp() throws Exception { 44 | GenericData.Record one = new GenericData.Record(schema); 45 | one.put("key", "x"); 46 | one.put("a", 17.0); 47 | one.put("b", 12); 48 | GenericData.Record two = new GenericData.Record(schema); 49 | two.put("key", "y"); 50 | two.put("a", -1.2); 51 | two.put("b", 32); 52 | cv.put("one", one); 53 | cv.put("two", two); 54 | 55 | values.put("key", "key"); 56 | values.put("a", "a"); 57 | values.put("b", "b"); 58 | } 59 | 60 | @Test 61 | public void testOrder() { 62 | Map opts = Maps.newHashMap(); 63 | opts.put("by", "key"); 64 | opts.put("order", "a + b"); 65 | opts.put("limit", 1); 66 | SumTopTbl stt = new SumTopTbl(values, opts); 67 | ObsDescriptor od = new AvroObsDescriptor(schema); 68 | 69 | SchemaProvider sp = stt.getSchemas(od, 0, 0); 70 | stt.initialize(sp); 71 | List> elem = stt.sort(stt.filter(cv)); 72 | assertEquals(2, elem.size()); 73 | assertEquals("y", elem.get(0).getValue().get("key")); 74 | assertEquals("x", elem.get(1).getValue().get("key")); 75 | } 76 | 77 | @Test 78 | public void testFilter() { 79 | Map opts = Maps.newHashMap(); 80 | opts.put("by", "key"); 81 | opts.put("order", "a * b"); 82 | opts.put("limit", 1); 83 | opts.put("keep", "a > 0 && b > 0"); 84 | SumTopTbl stt = new SumTopTbl(values, opts); 85 | ObsDescriptor od = new AvroObsDescriptor(schema); 86 | 87 | SchemaProvider sp = stt.getSchemas(od, 0, 0); 88 | stt.initialize(sp); 89 | List> elem = stt.filter(cv); 90 | assertEquals(1, elem.size()); 91 | assertEquals("x", elem.get(0).getValue().get("key")); 92 | } 93 | } 94 | --------------------------------------------------------------------------------