find(ExhibitId id) {
53 | if (!entity.equals(id.getEntity())) {
54 | return Optional.absent();
55 | }
56 | return Optional.fromNullable(exhibits.get(id.getId()));
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/avro/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 |
6 | com.cloudera.exhibit
7 | exhibit-parent
8 | 0.8.0
9 |
10 |
11 | exhibit-avro
12 | Exhibit for Apache Avro
13 |
14 |
15 |
16 | com.cloudera.exhibit
17 | exhibit-core
18 |
19 |
20 |
21 | org.apache.avro
22 | avro
23 |
24 |
25 |
26 | junit
27 | junit
28 | test
29 |
30 |
31 |
32 |
33 |
34 |
35 | org.apache.maven.plugins
36 | maven-compiler-plugin
37 |
38 |
39 | maven-assembly-plugin
40 |
41 |
42 |
43 | com.cloudera.exhibit.mongodb.ExampleClient
44 |
45 |
46 |
47 | jar-with-dependencies
48 |
49 |
50 |
51 |
52 | make-assembly
53 | package
54 |
55 | single
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/mongodb/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 |
6 | com.cloudera.exhibit
7 | exhibit-parent
8 | 0.8.0
9 |
10 |
11 | exhibit-mongodb
12 | Exhibit for MongoDB
13 |
14 |
15 |
16 | com.cloudera.exhibit
17 | exhibit-core
18 |
19 |
20 |
21 | org.mongodb
22 | mongo-java-driver
23 |
24 |
25 |
26 | junit
27 | junit
28 | test
29 |
30 |
31 |
32 |
33 |
34 |
35 | org.apache.maven.plugins
36 | maven-compiler-plugin
37 |
38 |
39 | maven-assembly-plugin
40 |
41 |
42 |
43 | com.cloudera.exhibit.mongodb.ExampleClient
44 |
45 |
46 |
47 | jar-with-dependencies
48 |
49 |
50 |
51 |
52 | make-assembly
53 | package
54 |
55 | single
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/thrift/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 |
6 | com.cloudera.exhibit
7 | exhibit-parent
8 | 0.8.0
9 |
10 |
11 | exhibit-thrift
12 | Exhibit for Apache Thrift
13 |
14 |
15 |
16 | com.cloudera.exhibit
17 | exhibit-core
18 |
19 |
20 |
21 | org.apache.thrift
22 | libthrift
23 |
24 |
25 |
26 | junit
27 | junit
28 | test
29 |
30 |
31 |
32 |
33 |
34 |
35 | org.apache.maven.plugins
36 | maven-compiler-plugin
37 |
38 |
39 | maven-assembly-plugin
40 |
41 |
42 |
43 | com.cloudera.exhibit.mongodb.ExampleClient
44 |
45 |
46 |
47 | jar-with-dependencies
48 |
49 |
50 |
51 |
52 | make-assembly
53 | package
54 |
55 | single
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/etl/src/main/java/com/cloudera/exhibit/etl/config/PivotConfig.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.etl.config;
16 |
17 | import com.google.common.collect.Lists;
18 | import com.google.common.collect.Maps;
19 |
20 | import java.io.Serializable;
21 | import java.util.List;
22 | import java.util.Map;
23 |
24 | /**
25 | * Configuration information used to pivot a given frame computation. It's best to explain
26 | * how this works with an example.
27 | *
28 | * Let's say we have a frame computation against a table called "users" that looks like this:
29 | *
30 | * SELECT age, sex, sum(income) inc, count(*) cnt FROM users GROUP BY age, sex;
31 | *
32 | * For each age, we will get two rows corresponding to the sum of incomes and counts for
33 | * males and females. If we want our output frame to have a single row for each age that
34 | * contains four columns (inc_male, inc_female, cnt_male, cnt_female), then we can use a
35 | * pivot operation on the resulting table, like this sample YAML snippet:
36 | *
37 | * pivot: {by: [age], variables: {sex: [male, female]}}
38 | *
39 | */
40 | public class PivotConfig implements Serializable {
41 | public List by = Lists.newArrayList();
42 |
43 | // A mapping of variable names to a list of valid values that they can take on
44 | // that will be used to generate the pivot columns.
45 | public Map> variables = Maps.newLinkedHashMap();
46 | }
47 |
--------------------------------------------------------------------------------
/javascript/src/main/java/com/cloudera/exhibit/javascript/ScriptableFrame.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.javascript;
16 |
17 | import com.cloudera.exhibit.core.Frame;
18 | import org.mozilla.javascript.Scriptable;
19 | import org.mozilla.javascript.ScriptableObject;
20 |
21 | public class ScriptableFrame extends ScriptableObject {
22 |
23 | private final Frame frame;
24 |
25 | public ScriptableFrame(Frame frame) {
26 | super();
27 | this.frame = frame;
28 | }
29 |
30 | public Frame frame() { return frame; }
31 |
32 | @Override
33 | public String getClassName() {
34 | return "Frame";
35 | }
36 |
37 | @Override
38 | public Object[] getIds() {
39 | Object[] ids = new Object[frame.descriptor().size()];
40 | for (int i = 0; i < ids.length; i++) {
41 | ids[i] = frame.descriptor().get(i).name;
42 | }
43 | return ids;
44 | }
45 |
46 | @Override
47 | public Object get(int index, Scriptable scriptable) {
48 | return new ScriptableObs(frame.get(index));
49 | }
50 |
51 | @Override
52 | public Object get(String name, Scriptable scriptable) {
53 | if ("length".equals(name)) {
54 | return frame.size();
55 | }
56 | return new ScriptableVec(frame.$(name));
57 | }
58 |
59 | @Override
60 | public boolean has(String name, Scriptable scriptable) {
61 | return frame.descriptor().indexOf(name) > -1;
62 | }
63 |
64 | @Override
65 | public Object getDefaultValue(Class> typeHint) {
66 | return frame.toString();
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/etl/src/main/java/com/cloudera/exhibit/etl/SchemaProvider.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.etl;
16 |
17 | import com.google.common.base.Function;
18 | import com.google.common.collect.Lists;
19 | import com.google.common.collect.Maps;
20 | import org.apache.avro.Schema;
21 |
22 | import javax.annotation.Nullable;
23 | import java.io.Serializable;
24 | import java.util.List;
25 | import java.util.Map;
26 |
27 | public class SchemaProvider implements Serializable {
28 | private final List json;
29 | private transient List schemas;
30 |
31 | public SchemaProvider(List schemas) {
32 | this.schemas = schemas;
33 | this.json = Lists.newArrayList(Lists.transform(schemas, new Function() {
34 | @Nullable
35 | @Override
36 | public String apply(Schema schema) {
37 | return schema.toString();
38 | }
39 | }));
40 | }
41 |
42 | public Schema get(int i) {
43 | return getSchemas().get(i);
44 | }
45 |
46 | private List getSchemas() {
47 | if (schemas == null) {
48 | Map defined = Maps.newHashMap();
49 | this.schemas = Lists.newArrayList();
50 | for (String s : json) {
51 | if (defined.containsKey(s)) {
52 | schemas.add(defined.get(s));
53 | } else {
54 | Schema.Parser sp = new Schema.Parser();
55 | Schema schema = sp.parse(s);
56 | defined.put(s, schema);
57 | schemas.add(schema);
58 | }
59 | }
60 | }
61 | return schemas;
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/core/src/main/java/com/cloudera/exhibit/core/simple/SimpleFrame.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.core.simple;
16 |
17 | import com.cloudera.exhibit.core.Frame;
18 | import com.cloudera.exhibit.core.Obs;
19 | import com.cloudera.exhibit.core.ObsDescriptor;
20 | import com.google.common.collect.ImmutableList;
21 |
22 | import java.util.Iterator;
23 | import java.util.List;
24 |
25 | public class SimpleFrame extends Frame {
26 |
27 | private final ObsDescriptor descriptor;
28 | private final List observations;
29 |
30 | public static SimpleFrame of(Obs... obs) {
31 | return new SimpleFrame(ImmutableList.copyOf(obs));
32 | }
33 |
34 | public SimpleFrame(ObsDescriptor descriptor) {
35 | this(descriptor, ImmutableList.of());
36 | }
37 |
38 | public SimpleFrame(List observations) {
39 | this(observations.get(0).descriptor(), observations);
40 | }
41 |
42 | public SimpleFrame(ObsDescriptor descriptor, List observations) {
43 | this.descriptor = descriptor;
44 | this.observations = observations;
45 | }
46 |
47 | @Override
48 | public ObsDescriptor descriptor() {
49 | return descriptor;
50 | }
51 |
52 | @Override
53 | public int size() {
54 | return observations.size();
55 | }
56 |
57 | @Override
58 | public Obs get(int index) {
59 | return observations.get(index);
60 | }
61 |
62 | @Override
63 | public Iterator iterator() {
64 | return observations.iterator();
65 | }
66 |
67 | @Override
68 | public String toString() {
69 | return observations.toString();
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/server/src/main/resources/assets/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Exhibit Prototype
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/core/src/main/java/com/cloudera/exhibit/core/simple/SimpleObs.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.core.simple;
16 |
17 | import com.cloudera.exhibit.core.Obs;
18 | import com.cloudera.exhibit.core.ObsDescriptor;
19 | import com.google.common.base.Preconditions;
20 | import com.google.common.collect.Lists;
21 |
22 | import java.util.List;
23 |
24 | public class SimpleObs extends Obs {
25 |
26 | private final ObsDescriptor descriptor;
27 | private final List values;
28 |
29 | public static SimpleObs of(ObsDescriptor desc, Object... args) {
30 | return new SimpleObs(desc, Lists.newArrayList(args));
31 | }
32 |
33 | public SimpleObs(ObsDescriptor descriptor, List values) {
34 | assert(descriptor.size() == values.size());
35 | this.descriptor = Preconditions.checkNotNull(descriptor);
36 | this.values = Preconditions.checkNotNull(values);
37 | }
38 |
39 | @Override
40 | public ObsDescriptor descriptor() {
41 | return descriptor;
42 | }
43 |
44 | @Override
45 | public Object get(int index) {
46 | return values.get(index);
47 | }
48 |
49 | public List getValues() {
50 | return values;
51 | }
52 |
53 | @Override
54 | public int hashCode() {
55 | return descriptor.hashCode() + 17 * values.hashCode();
56 | }
57 |
58 | @Override
59 | public boolean equals(Object other) {
60 | if (other == null || !(other instanceof SimpleObs)) {
61 | return false;
62 | }
63 | SimpleObs obs = (SimpleObs) other;
64 | return descriptor.equals(obs.descriptor) && values.equals(obs.values);
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/server/src/main/java/com/cloudera/exhibit/server/resources/FetchResource.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.server.resources;
16 |
17 | import com.cloudera.exhibit.core.Exhibit;
18 | import com.cloudera.exhibit.core.ExhibitId;
19 | import com.cloudera.exhibit.core.ExhibitStore;
20 | import com.cloudera.exhibit.server.calcs.CalculationStore;
21 | import com.google.common.base.Preconditions;
22 | import org.slf4j.Logger;
23 | import org.slf4j.LoggerFactory;
24 |
25 | import javax.ws.rs.Consumes;
26 | import javax.ws.rs.GET;
27 | import javax.ws.rs.Path;
28 | import javax.ws.rs.PathParam;
29 | import javax.ws.rs.Produces;
30 | import javax.ws.rs.core.MediaType;
31 | import java.util.Map;
32 |
33 | @Path("/exhibit/{entity}/{id}")
34 | @Produces(MediaType.APPLICATION_JSON)
35 | public class FetchResource {
36 |
37 | private static final Logger LOG = LoggerFactory.getLogger(FetchResource.class);
38 |
39 | private ExhibitStore exhibits;
40 | private CalculationStore calcs;
41 |
42 | public FetchResource(ExhibitStore exhibits, CalculationStore calcs) {
43 | this.exhibits = Preconditions.checkNotNull(exhibits);
44 | this.calcs = Preconditions.checkNotNull(calcs);
45 | }
46 |
47 | @GET
48 | public FetchResponse fetch(@PathParam("entity") String entity, @PathParam("id") String id) {
49 | ExhibitId eid = ExhibitId.create(entity, id);
50 | LOG.info("Looking up " + eid);
51 | Exhibit exhibit = exhibits.find(eid).orNull();
52 | Map> metrics = calcs.computeKPIs(exhibit);
53 | return new FetchResponse(eid, exhibit, metrics);
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/core/src/main/java/com/cloudera/exhibit/core/multi/MultiExhibitStore.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.core.multi;
16 |
17 | import com.cloudera.exhibit.core.Exhibit;
18 | import com.cloudera.exhibit.core.ExhibitId;
19 | import com.cloudera.exhibit.core.ExhibitStore;
20 | import com.google.common.base.Optional;
21 | import com.google.common.base.Preconditions;
22 | import com.google.common.collect.Maps;
23 |
24 | import java.util.List;
25 | import java.util.Map;
26 | import java.util.Set;
27 |
28 | public class MultiExhibitStore implements ExhibitStore {
29 |
30 | private final Map stores;
31 |
32 | public static MultiExhibitStore create(List stores) {
33 | Map storeMap = Maps.newHashMap();
34 | for (ExhibitStore store : stores) {
35 | for (String entity : store.entities()) {
36 | // TODO double check this
37 | storeMap.put(entity, store);
38 | }
39 | }
40 | return new MultiExhibitStore(storeMap);
41 | }
42 |
43 | public MultiExhibitStore(Map stores) {
44 | this.stores = Preconditions.checkNotNull(stores);
45 | }
46 |
47 | @Override
48 | public boolean isConnected() {
49 | for (ExhibitStore store : stores.values()) {
50 | if (!store.isConnected()) {
51 | return false;
52 | }
53 | }
54 | return true;
55 | }
56 |
57 | @Override
58 | public Set entities() {
59 | return stores.keySet();
60 | }
61 |
62 | @Override
63 | public Optional find(ExhibitId id) {
64 | return stores.get(id.getEntity()).find(id);
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/thrift/src/main/java/com/cloudera/exhibit/thrift/ThriftFrame.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.thrift;
16 |
17 | import com.cloudera.exhibit.core.ObsDescriptor;
18 | import com.cloudera.exhibit.core.Frame;
19 | import com.cloudera.exhibit.core.Obs;
20 | import com.google.common.base.Function;
21 | import com.google.common.collect.ImmutableList;
22 | import com.google.common.collect.Iterators;
23 | import org.apache.thrift.TBase;
24 |
25 | import java.util.Iterator;
26 | import java.util.List;
27 |
28 | public class ThriftFrame extends Frame {
29 |
30 | private final ThriftObsDescriptor descriptor;
31 | private final List extends TBase> records;
32 |
33 | public ThriftFrame(Class extends TBase> clazz) {
34 | this.descriptor = new ThriftObsDescriptor(clazz);
35 | this.records = ImmutableList.of();
36 | }
37 |
38 | public ThriftFrame(List extends TBase> records) {
39 | this.descriptor = new ThriftObsDescriptor(records.get(0).getClass());
40 | this.records = records;
41 | }
42 |
43 | @Override
44 | public ObsDescriptor descriptor() {
45 | return descriptor;
46 | }
47 |
48 | @Override
49 | public int size() {
50 | return records.size();
51 | }
52 |
53 | @Override
54 | public Obs get(int index) {
55 | return new ThriftObs(descriptor, records.get(index));
56 | }
57 |
58 | @Override
59 | public Iterator iterator() {
60 | return Iterators.transform(records.iterator(), new Function() {
61 | @Override
62 | public Obs apply(TBase tBase) {
63 | return new ThriftObs(descriptor, tBase);
64 | }
65 | });
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/server/src/main/java/com/cloudera/exhibit/server/main/ExhibitStoreConfig.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.server.main;
16 |
17 | import com.cloudera.exhibit.avro.AvroExhibit;
18 | import com.cloudera.exhibit.core.Exhibit;
19 | import com.cloudera.exhibit.core.ExhibitStore;
20 | import com.cloudera.exhibit.core.simple.SimpleExhibitStore;
21 | import com.fasterxml.jackson.annotation.JsonProperty;
22 | import com.google.common.collect.Maps;
23 | import io.dropwizard.setup.Environment;
24 | import org.apache.avro.generic.GenericRecord;
25 | import org.apache.hadoop.conf.Configuration;
26 | import org.kitesdk.data.Dataset;
27 | import org.kitesdk.data.DatasetReader;
28 | import org.kitesdk.data.Datasets;
29 |
30 | import javax.validation.Valid;
31 | import java.util.Map;
32 |
33 | public class ExhibitStoreConfig {
34 | @JsonProperty
35 | @Valid
36 | String name;
37 |
38 | @JsonProperty
39 | @Valid
40 | String uri;
41 |
42 | @JsonProperty
43 | @Valid
44 | String idColumn;
45 |
46 | public ExhibitStore create(Environment env, Configuration conf) {
47 | Dataset data = Datasets.load(uri);
48 | DatasetReader reader = data.newReader();
49 | Map exhibits = Maps.newHashMap();
50 | try {
51 | while (reader.hasNext()) {
52 | GenericRecord rec = reader.next();
53 | Exhibit e = AvroExhibit.create(rec);
54 | exhibits.put(e.attributes().get(idColumn, String.class), e);
55 | }
56 | } finally {
57 | reader.close();
58 | }
59 | return SimpleExhibitStore.of(name, exhibits);
60 | }
61 |
62 | @Override
63 | public String toString() {
64 | return name;
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/core/src/test/java/com/cloudera/exhibit/core/CompositeTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.core;
16 |
17 | import com.cloudera.exhibit.core.composite.CompositeObsDescriptor;
18 | import com.cloudera.exhibit.core.simple.SimpleObsDescriptor;
19 | import com.google.common.collect.ImmutableList;
20 | import org.junit.Test;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | public class CompositeTest {
25 |
26 | public static final ObsDescriptor F1_DESC = SimpleObsDescriptor.builder()
27 | .doubleField("v0")
28 | .stringField("v1")
29 | .build();
30 |
31 | public static final ObsDescriptor F2_DESC = SimpleObsDescriptor.builder()
32 | .intField("x")
33 | .build();
34 |
35 | @Test
36 | public void testCompositeObsDescriptor() throws Exception {
37 | CompositeObsDescriptor cod = new CompositeObsDescriptor(ImmutableList.of(F1_DESC, F2_DESC));
38 | assertEquals(3, cod.size());
39 | assertEquals(1, cod.indexOf("v1"));
40 | assertEquals(2, cod.indexOf("x"));
41 | assertEquals(-1, cod.indexOf("v3"));
42 | assertEquals(new ObsDescriptor.Field("x", FieldType.INTEGER), cod.get(2));
43 | }
44 |
45 | @Test
46 | public void testCompositeObsDescriptorWithEmpty() throws Exception {
47 | CompositeObsDescriptor cod = new CompositeObsDescriptor(ImmutableList.of(ObsDescriptor.EMPTY, F1_DESC));
48 | assertEquals(2, cod.size());
49 | assertEquals(0, cod.indexOf("v0"));
50 | assertEquals(1, cod.indexOf("v1"));
51 | assertEquals(-1, cod.indexOf("q"));
52 | assertEquals(new ObsDescriptor.Field("v1", FieldType.STRING), cod.get(1));
53 | assertEquals(new ObsDescriptor.Field("v0", FieldType.DOUBLE), cod.get(0));
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/core/src/test/java/com/cloudera/exhibit/core/vector/VectorTest.java:
--------------------------------------------------------------------------------
1 | package com.cloudera.exhibit.core.vector;
2 |
3 | import com.cloudera.exhibit.core.FieldType;
4 | import com.google.common.collect.ImmutableList;
5 | import com.google.common.primitives.Doubles;
6 | import junit.framework.TestCase;
7 |
8 | import java.util.Iterator;
9 | import java.util.List;
10 |
11 | public class VectorTest extends TestCase {
12 |
13 | public void testGetType() throws Exception {
14 | double [] doubles = new double[]{1.0, 2.0, 3.0};
15 | DoubleVector dv = new DoubleVector(doubles);
16 | assertEquals(FieldType.DOUBLE, dv.getType());
17 |
18 | List list = ImmutableList.of("A", "B", "C");
19 | Vector vector = VectorBuilder.build(FieldType.STRING, list);
20 | assertEquals(FieldType.STRING, vector.getType());
21 | }
22 |
23 | public void testInvalidDoubles() throws Exception {
24 | List list = ImmutableList.of("A", "B", "C");
25 | try {
26 | VectorBuilder.build(FieldType.DOUBLE, list);
27 | } catch ( IllegalArgumentException ex ){
28 | // success, exception should be thrown
29 | return;
30 | }
31 | fail("Invalid Doubles were created into a vector");
32 | }
33 |
34 | public void testGet() throws Exception {
35 | double [] doubles = new double[]{1.0, 2.0, 3.0};
36 | List doubleList = Doubles.asList(doubles);
37 | Vector dv = VectorBuilder.build(FieldType.DOUBLE, doubleList);
38 | assertEquals(2.0, dv.get(1));
39 |
40 | List list = ImmutableList.of("A", "B", "C");
41 | Vector vector = VectorBuilder.build(FieldType.STRING, list);
42 | assertEquals("C", vector.get(2));
43 | }
44 |
45 | public void testSize() throws Exception {
46 | double [] doubles = new double[]{1.0, 2.0, 3.0};
47 | DoubleVector dv = new DoubleVector(doubles);
48 | assertEquals(3, dv.size());
49 | }
50 |
51 | public void testIterator() throws Exception {
52 | double [] doubles = new double[]{1.0, 2.0, 3.0};
53 | DoubleVector dv = new DoubleVector(doubles);
54 | Iterator it = dv.iterator();
55 | assertTrue(it.hasNext());
56 | assertEquals(1.0, it.next());
57 |
58 | List list = ImmutableList.of("A", "B", "C");
59 | Vector vector = VectorBuilder.build(FieldType.STRING, list);
60 | Iterator vIt = vector.iterator();
61 | assertEquals("A", vIt.next());
62 | }
63 | }
--------------------------------------------------------------------------------
/etl/src/main/java/com/cloudera/exhibit/etl/fn/CollectFn.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.etl.fn;
16 |
17 | import com.cloudera.exhibit.core.Calculator;
18 | import com.cloudera.exhibit.core.Exhibit;
19 | import com.cloudera.exhibit.core.Obs;
20 | import com.cloudera.exhibit.core.ObsDescriptor;
21 | import com.cloudera.exhibit.etl.config.FrameConfig;
22 | import org.apache.avro.Schema;
23 | import org.apache.avro.generic.GenericData;
24 | import org.apache.crunch.DoFn;
25 | import org.apache.crunch.Emitter;
26 |
27 | public class CollectFn extends DoFn {
28 | private final FrameConfig frame;
29 | private final String json;
30 |
31 | private transient Calculator calc;
32 | private transient Schema schema;
33 | private boolean initialized;
34 |
35 | public CollectFn(FrameConfig frame, Schema mapsideSchema) {
36 | this.frame = frame;
37 | this.json = mapsideSchema.toString();
38 | }
39 |
40 | @Override
41 | public void initialize() {
42 | this.calc = frame.getCalculator();
43 | this.schema = (new Schema.Parser()).parse(json);
44 | this.initialized = false;
45 | }
46 |
47 | @Override
48 | public void process(Exhibit exhibit, Emitter emitter) {
49 | if (!initialized) {
50 | calc.initialize(exhibit.descriptor());
51 | initialized = true;
52 | }
53 | for (Obs obs : calc.apply(exhibit)) {
54 | GenericData.Record out = new GenericData.Record(schema);
55 | for (ObsDescriptor.Field f : obs.descriptor()) {
56 | out.put(f.name, obs.get(f.name));
57 | }
58 | emitter.emit(out);
59 | }
60 | }
61 |
62 | @Override
63 | public void cleanup(Emitter emitter) {
64 | calc.cleanup();
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/etl/src/main/java/com/cloudera/exhibit/etl/SchemaUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.etl;
16 |
17 | import com.google.common.collect.Lists;
18 | import org.apache.avro.Schema;
19 |
20 | import java.util.List;
21 |
22 | public class SchemaUtil {
23 |
24 | public static Schema getOrParse(Schema s, String json) {
25 | if (s == null) {
26 | s = (new Schema.Parser()).parse(json);
27 | }
28 | return s;
29 | }
30 |
31 | public static Schema unwrapNull(Schema s) {
32 | if (s.getType() == Schema.Type.UNION) {
33 | List cmps = s.getTypes();
34 | if (cmps.size() == 2) {
35 | if (cmps.get(0).getType() == Schema.Type.NULL) {
36 | return cmps.get(1);
37 | } else if (cmps.get(1).getType() == Schema.Type.NULL) {
38 | return cmps.get(0);
39 | }
40 | }
41 | }
42 | return s;
43 | }
44 |
45 | public static Schema unionKeySchema(String name, List schemas) {
46 | Schema wrapper = Schema.createRecord(name, "exhibit", "", false);
47 | Schema unionSchema = Schema.createUnion(schemas);
48 | Schema.Field idx = new Schema.Field("index", Schema.create(Schema.Type.INT), "", null);
49 | Schema.Field key = new Schema.Field("key", unionSchema, "", null);
50 | wrapper.setFields(Lists.newArrayList(idx, key));
51 | return wrapper;
52 | }
53 |
54 | public static Schema unionValueSchema(String name, List schemas) {
55 | Schema wrapper = Schema.createRecord(name, "exhibit", "", false);
56 | Schema unionSchema = Schema.createUnion(schemas);
57 | Schema.Field sf = new Schema.Field("value", unionSchema, "", null);
58 | wrapper.setFields(Lists.newArrayList(sf));
59 | return wrapper;
60 | }
61 |
62 | private SchemaUtil() {}
63 | }
64 |
--------------------------------------------------------------------------------
/etl/src/main/java/com/cloudera/exhibit/etl/tbl/TblCache.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.etl.tbl;
16 |
17 | import com.cloudera.exhibit.core.Obs;
18 | import com.cloudera.exhibit.etl.SchemaProvider;
19 | import com.cloudera.exhibit.etl.config.AggConfig;
20 | import com.google.common.collect.Maps;
21 | import org.apache.avro.generic.GenericData;
22 | import org.apache.crunch.Emitter;
23 | import org.apache.crunch.Pair;
24 |
25 | import java.util.Map;
26 |
27 | public class TblCache {
28 |
29 | private final Map cache;
30 | private final AggConfig config;
31 | private final int aggIdx;
32 | private final Emitter>> emitter;
33 | private final SchemaProvider provider;
34 |
35 | public TblCache(final AggConfig config, final int aggIdx,
36 | final Emitter>> emitter,
37 | final SchemaProvider provider) {
38 | this.cache = Maps.newHashMap();
39 | this.config = config;
40 | this.aggIdx = aggIdx;
41 | this.emitter = emitter;
42 | this.provider = provider;
43 | }
44 |
45 | public void update(GenericData.Record key, Obs obs) {
46 | Tbl tbl = cache.get(key);
47 | if (tbl == null) {
48 | if (cache.size() > config.cacheSize) {
49 | flush();
50 | }
51 | tbl = config.createTbl();
52 | tbl.initialize(provider);
53 | cache.put(key, tbl);
54 | }
55 | tbl.add(obs);
56 | }
57 |
58 | public void flush() {
59 | for (Map.Entry e : cache.entrySet()) {
60 | Tbl tbl = e.getValue();
61 | emitter.emit(Pair.of(e.getKey(), Pair.of(aggIdx, tbl.getValue())));
62 | }
63 | cache.clear();
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/etl/src/test/java/com/cloudera/exhibit/etl/JSQLTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.etl;
16 |
17 | import com.cloudera.exhibit.core.*;
18 | import com.cloudera.exhibit.core.composite.UpdatableExhibit;
19 | import com.cloudera.exhibit.core.simple.SimpleExhibit;
20 | import com.cloudera.exhibit.core.simple.SimpleFrame;
21 | import com.cloudera.exhibit.core.simple.SimpleObs;
22 | import com.cloudera.exhibit.core.simple.SimpleObsDescriptor;
23 | import com.cloudera.exhibit.javascript.JSCalculator;
24 | import com.cloudera.exhibit.sql.SQLCalculator;
25 | import com.google.common.collect.ImmutableList;
26 | import com.google.common.collect.ImmutableMap;
27 | import com.google.common.collect.Iterables;
28 | import org.junit.Test;
29 |
30 | import static org.junit.Assert.assertEquals;
31 |
32 | public class JSQLTest {
33 |
34 | @Test
35 | public void testJS2SQL() throws Exception {
36 | JSCalculator jsc = new JSCalculator("[{id: 123}]");
37 | ObsDescriptor od = SimpleObsDescriptor.builder().doubleField("a").booleanField("b").build();
38 | Obs obs = SimpleObs.of(od, 1729, true);
39 | Obs one = SimpleObs.of(od, 17, true);
40 | Obs two = SimpleObs.of(od, 12, false);
41 | Frame frame = SimpleFrame.of(one, two);
42 | Exhibit e = new SimpleExhibit(obs, ImmutableMap.of("df", frame));
43 | jsc.initialize(e.descriptor());
44 | UpdatableExhibit ue = new UpdatableExhibit(e);
45 | ue.add("jsres", (Frame) jsc.apply(e));
46 | SQLCalculator sql = SQLCalculator.create(null, "SELECT count(*) suma FROM jsres where id > 0");
47 | sql.initialize(ue.descriptor());
48 | Iterable res = sql.apply(ue);
49 | assertEquals(
50 | new SimpleObs(SimpleObsDescriptor.of("suma", FieldType.LONG), ImmutableList.of(1L)),
51 | Iterables.getOnlyElement(res));
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/hive/src/main/java/com/cloudera/exhibit/hive/HiveFrame.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.hive;
16 |
17 | import com.cloudera.exhibit.core.ObsDescriptor;
18 | import com.cloudera.exhibit.core.Frame;
19 | import com.cloudera.exhibit.core.Obs;
20 | import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
21 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
22 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
23 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
24 |
25 | import java.util.Iterator;
26 |
27 | public class HiveFrame extends Frame {
28 |
29 | private final HiveObsDescriptor descriptor;
30 | private final ListObjectInspector listOI;
31 | private Object values;
32 |
33 | public HiveFrame(ListObjectInspector listOI) {
34 | this.listOI = listOI;
35 | ObjectInspector elOI = listOI.getListElementObjectInspector();
36 | if (elOI instanceof StructObjectInspector) {
37 | this.descriptor = new HiveStructObsDescriptor((StructObjectInspector) elOI);
38 | } else {
39 | this.descriptor = new HivePrimitiveObsDescriptor((PrimitiveObjectInspector) elOI);
40 | }
41 | }
42 |
43 | public HiveFrame updateValues(Object values) {
44 | this.values = values;
45 | return this;
46 | }
47 |
48 | @Override
49 | public ObsDescriptor descriptor() {
50 | return descriptor;
51 | }
52 |
53 | @Override
54 | public int size() {
55 | if (values == null) {
56 | return 0;
57 | }
58 | return listOI.getListLength(values);
59 | }
60 |
61 | @Override
62 | public Obs get(int index) {
63 | return new HiveObs(descriptor, listOI.getListElement(values, index));
64 | }
65 |
66 | @Override
67 | public Iterator iterator() {
68 | return null;
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/server/src/main/java/com/cloudera/exhibit/server/calcs/CalculationStore.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.server.calcs;
16 |
17 | import com.cloudera.exhibit.core.Exhibit;
18 | import com.cloudera.exhibit.core.Obs;
19 | import com.google.common.collect.Lists;
20 | import com.google.common.collect.Maps;
21 |
22 | import java.util.List;
23 | import java.util.Map;
24 |
25 | public class CalculationStore {
26 |
27 | private List calculations;
28 |
29 | public CalculationStore() {
30 | // TODO: remove this
31 | this.calculations = Lists.newArrayList();
32 | addCalculation("select (sum(yds)/count(distinct gid)) pass_ypg from passes");
33 | addCalculation("select (sum(yds)/count(distinct gid)) rush_ypg from rushes");
34 | }
35 |
36 | public synchronized Map> computeKPIs(Exhibit exhibit) {
37 | Map> ret = Maps.newHashMap();
38 | if (exhibit == null) {
39 | return ret;
40 | }
41 |
42 | for (Calculation calc : calculations) {
43 | Iterable frame = calc.apply(exhibit);
44 | for (Obs obs : frame) {
45 | // TODO: multi row? Real objects, probably?
46 | for (int i = 0; i < obs.descriptor().size(); i++) {
47 | Map base = Maps.newHashMap();
48 | base.put("id", calc.getId());
49 | base.put("value", obs.get(i));
50 | ret.put(obs.descriptor().get(i).name, base);
51 | }
52 | }
53 | }
54 | return ret;
55 | }
56 |
57 | public synchronized void addCalculation(String code) {
58 | int id = calculations.size();
59 | Calculation c = new Calculation(id, code);
60 | calculations.add(c);
61 | }
62 |
63 | public synchronized String getCode(int id) {
64 | return calculations.get(id).getCode();
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/octave/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | exhibit-parent
7 | com.cloudera.exhibit
8 | 0.8.0
9 |
10 | 4.0.0
11 |
12 | octave
13 | Exhibit Octave
14 |
15 |
16 |
17 | com.google.guava
18 | guava
19 |
20 |
21 |
22 | com.cloudera.exhibit
23 | exhibit-core
24 |
25 |
26 |
27 | dk.ange
28 | javaoctave
29 | 0.6.4-SNAPSHOT
30 |
31 |
32 |
33 | com.cloudera.exhibit
34 | exhibit-avro
35 | test
36 |
37 |
38 |
39 | junit
40 | junit
41 | test
42 |
43 |
44 |
45 | com.btmatthews.hamcrest
46 | hamcrest-matchers
47 | 1.0.0
48 | test
49 |
50 |
51 |
52 | org.hamcrest
53 | hamcrest-library
54 | 1.3
55 | test
56 |
57 |
58 |
59 |
60 |
61 |
62 | org.apache.maven.plugins
63 | maven-compiler-plugin
64 |
65 |
66 |
67 |
68 |
69 |
70 | maven-repository.javaoctave.kenai.com
71 | JavaOctave Maven Repository
72 | https://kenai.com/svn/javaoctave~maven-repository/maven2
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/core/src/main/java/com/cloudera/exhibit/core/Obs.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.core;
16 |
17 | import com.google.common.collect.ImmutableList;
18 |
19 | import java.io.Serializable;
20 | import java.util.Iterator;
21 |
22 | public abstract class Obs implements Iterable, Serializable {
23 | public abstract ObsDescriptor descriptor();
24 |
25 | public abstract Object get(int index);
26 |
27 | public Object get(String name) {
28 | return get(descriptor().indexOf(name));
29 | }
30 |
31 | public T get(String name, Class clazz) {
32 | return clazz.cast(get(name));
33 | }
34 |
35 | public Iterator iterator() {
36 | return new Iterator() {
37 | int offset = 0;
38 | @Override
39 | public boolean hasNext() {
40 | return offset < descriptor().size();
41 | }
42 |
43 | @Override
44 | public Object next() {
45 | Object ret = get(offset);
46 | offset++;
47 | return ret;
48 | }
49 |
50 | @Override
51 | public void remove() {
52 | throw new UnsupportedOperationException();
53 | }
54 | };
55 | }
56 | public static final Obs EMPTY = new Obs() {
57 | @Override
58 | public Iterator iterator() {
59 | return ImmutableList.of().iterator();
60 | }
61 |
62 | @Override
63 | public ObsDescriptor descriptor() {
64 | return ObsDescriptor.EMPTY;
65 | }
66 |
67 | @Override
68 | public Object get(int index) {
69 | throw new ArrayIndexOutOfBoundsException("Empty Obs");
70 | }
71 | };
72 |
73 | @Override
74 | public String toString() {
75 | StringBuilder sb = new StringBuilder("[");
76 | if (descriptor().size() > 0) {
77 | sb.append(get(0));
78 | for (int i = 1; i < descriptor().size(); i++) {
79 | sb.append(',').append(get(i));
80 | }
81 | }
82 | return sb.append(']').toString();
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/core/src/main/java/com/cloudera/exhibit/core/ObsDescriptor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.core;
16 |
17 | import com.google.common.base.Preconditions;
18 |
19 | import java.io.Serializable;
20 | import java.util.AbstractList;
21 | import java.util.Collections;
22 | import java.util.Iterator;
23 |
24 | public abstract class ObsDescriptor extends AbstractList implements Serializable {
25 |
26 | public static class Field implements Serializable {
27 | public final String name;
28 | public final FieldType type;
29 |
30 | public Field(String name, FieldType type) {
31 | this.name = Preconditions.checkNotNull(name);
32 | this.type = Preconditions.checkNotNull(type);
33 | }
34 |
35 | @Override
36 | public int hashCode() {
37 | return name.hashCode() + 17 * type.hashCode();
38 | }
39 |
40 | @Override
41 | public boolean equals(Object other) {
42 | if (other == null || !(other instanceof Field)) {
43 | return false;
44 | }
45 | Field field = (Field) other;
46 | return name.equals(field.name) && type.equals(field.type);
47 | }
48 | @Override
49 | public String toString() {
50 | return name + ": " + type.toString().toLowerCase();
51 | }
52 | }
53 |
54 | public abstract int indexOf(String name);
55 |
56 | public static final ObsDescriptor EMPTY = new ObsDescriptor() {
57 | @Override
58 | public Field get(int i) {
59 | throw new ArrayIndexOutOfBoundsException("Empty ObsDescriptor");
60 | }
61 |
62 | @Override
63 | public int indexOf(String name) {
64 | return -1;
65 | }
66 |
67 | @Override
68 | public int size() {
69 | return 0;
70 | }
71 |
72 | @Override
73 | public Iterator iterator() {
74 | return Collections.emptyIterator();
75 | }
76 |
77 | @Override
78 | public String toString() { return ""; }
79 | };
80 | }
81 |
--------------------------------------------------------------------------------
/hive/src/main/java/com/cloudera/exhibit/hive/HiveStructObsDescriptor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.hive;
16 |
17 | import com.google.common.base.Function;
18 | import com.google.common.collect.Iterators;
19 | import com.google.common.collect.Lists;
20 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
21 | import org.apache.hadoop.hive.serde2.objectinspector.StructField;
22 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
23 |
24 | import java.util.Iterator;
25 | import java.util.List;
26 |
27 | class HiveStructObsDescriptor extends HiveObsDescriptor {
28 | private final StructObjectInspector obji;
29 |
30 | public HiveStructObsDescriptor(StructObjectInspector obji) {
31 | this.obji = obji;
32 | }
33 |
34 | @Override
35 | public Field get(int i) {
36 | StructField sf = obji.getAllStructFieldRefs().get(i);
37 | return new Field(sf.getFieldName(), HiveUtils.getFieldType(sf.getFieldObjectInspector()));
38 | }
39 |
40 | @Override
41 | public int indexOf(String name) {
42 | return obji.getAllStructFieldRefs().indexOf(obji.getStructFieldRef(name));
43 | }
44 |
45 | @Override
46 | public int size() {
47 | return obji.getAllStructFieldRefs().size();
48 | }
49 |
50 | @Override
51 | public Iterator iterator() {
52 | return Iterators.transform(obji.getAllStructFieldRefs().iterator(), new Function() {
53 | @Override
54 | public Field apply(StructField structField) {
55 | return new Field(structField.getFieldName(), HiveUtils.getFieldType(structField.getFieldObjectInspector()));
56 | }
57 | });
58 | }
59 |
60 | @Override
61 | public Object[] convert(Object rawObs) {
62 | List v = Lists.newArrayListWithExpectedSize(obji.getAllStructFieldRefs().size());
63 | ObjectInspectorUtils.copyToStandardObject(v, rawObs, obji, ObjectInspectorUtils.ObjectInspectorCopyOption.JAVA);
64 | for (int i = 0; i < v.size(); i++) {
65 | v.set(i, HiveUtils.asJavaType(v.get(i)));
66 | }
67 | return v.toArray();
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/spark/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 |
6 | com.cloudera.exhibit
7 | exhibit-parent
8 | 0.8.0
9 |
10 |
11 | exhibit-spark
12 | Exhibit for Spark Data Frames
13 |
14 |
15 |
16 | org.apache.spark
17 | spark-core_${scala.base.version}
18 | provided
19 |
20 |
21 |
22 | org.apache.spark
23 | spark-sql_${scala.base.version}
24 | provided
25 |
26 |
27 |
28 | org.apache.avro
29 | avro-mapred
30 | hadoop2
31 |
32 |
33 |
34 | com.cloudera.exhibit
35 | exhibit-core
36 |
37 |
38 |
39 | com.cloudera.exhibit
40 | exhibit-avro
41 |
42 |
43 |
44 | com.cloudera.exhibit
45 | exhibit-javascript
46 |
47 |
48 |
49 | com.cloudera.exhibit
50 | exhibit-sql
51 |
52 |
53 |
54 | junit
55 | junit
56 | test
57 |
58 |
59 |
60 |
61 |
62 |
63 | org.apache.maven.plugins
64 | maven-compiler-plugin
65 |
66 |
67 | net.alchim31.maven
68 | scala-maven-plugin
69 |
70 |
71 |
72 | compile
73 | testCompile
74 |
75 |
76 |
77 | -deprecation
78 | -dependencyfile
79 | ${project.build.directory}/.scala_dependencies
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
--------------------------------------------------------------------------------
/etl/src/main/java/com/cloudera/exhibit/etl/config/AggConfig.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.etl.config;
16 |
17 | import com.cloudera.exhibit.core.Calculator;
18 | import com.cloudera.exhibit.core.ExhibitDescriptor;
19 | import com.cloudera.exhibit.core.ObsDescriptor;
20 | import com.cloudera.exhibit.etl.tbl.SumTbl;
21 | import com.cloudera.exhibit.etl.tbl.Tbl;
22 | import com.cloudera.exhibit.etl.tbl.TblType;
23 | import com.google.common.collect.Lists;
24 | import com.google.common.collect.Maps;
25 |
26 | import java.io.Serializable;
27 | import java.util.List;
28 | import java.util.Map;
29 |
30 | public class AggConfig implements Serializable {
31 |
32 | // The type of aggregation that will be performed on the computed records (SUM, PERCENTILE, SUM_TOP, TOP)
33 | public TblType type = TblType.SUM;
34 |
35 | // Table-specific options (e.g., which fields to sort on for the TOP tbl)
36 | public Map options = Maps.newHashMap();
37 |
38 | // The frame used to generate the output keys and values
39 | public FrameConfig frame = null;
40 |
41 | // The names of the grouping keys for the computed frame if they are different from the
42 | // names of the keys for the parent {@link OutputConfig}.
43 | public List keys = Lists.newArrayList();
44 |
45 | // A mapping from the names of fields in the computed frame to their names in the output
46 | // aggregation (to allow for columns to be renamed)
47 | public Map values = Maps.newHashMap();
48 |
49 | // The maximum number of keys whose aggregate values should be cached in memory.
50 | public long cacheSize = 5000;
51 |
52 | public Calculator getCalculator() {
53 | if (frame == null) {
54 | throw new IllegalStateException("Invalid AggConfig: no frame specified");
55 | }
56 | return frame.getCalculator();
57 | }
58 |
59 | public ObsDescriptor getFrameDescriptor(ExhibitDescriptor ed) {
60 | if (frame != null) {
61 | Calculator c = frame.getCalculator();
62 | return c.initialize(ed);
63 | }
64 | throw new IllegalStateException("Invalid AggConfig: no frame specified");
65 | }
66 |
67 | public Tbl createTbl() {
68 | //TODO: force validate values before this point
69 | return type.create(values, options);
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/mongodb/src/main/java/com/cloudera/exhibit/mongodb/BSONObsDescriptor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.mongodb;
16 |
17 | import com.cloudera.exhibit.core.FieldType;
18 | import com.cloudera.exhibit.core.ObsDescriptor;
19 | import com.google.common.collect.ImmutableMap;
20 | import com.google.common.collect.Lists;
21 | import com.google.common.collect.UnmodifiableIterator;
22 |
23 | import java.util.Iterator;
24 | import java.util.List;
25 | import java.util.Map;
26 |
27 | public class BSONObsDescriptor extends ObsDescriptor {
28 |
29 | private final List names;
30 | private final List fieldTypes;
31 | private final List columns;
32 |
33 | public BSONObsDescriptor(List names, List fieldTypes) {
34 | this(names, fieldTypes, ImmutableMap.of());
35 | }
36 |
37 | public BSONObsDescriptor(List names, List fieldTypes, Map mappings) {
38 | this.names = names;
39 | this.fieldTypes = fieldTypes;
40 | this.columns = getColumns(names, mappings);
41 | }
42 |
43 | private static List getColumns(List names, final Map mappings) {
44 | List ret = Lists.newArrayListWithExpectedSize(names.size());
45 | for (int i = 0; i < names.size(); i++) {
46 | String key = names.get(i);
47 | String col = mappings.get(key);
48 | ret.add(col == null ? key : col);
49 | }
50 | return ret;
51 | }
52 |
53 | @Override
54 | public Field get(int i) {
55 | return new Field(names.get(i), fieldTypes.get(i));
56 | }
57 |
58 | String getBSONColumn(int i) {
59 | return columns.get(i);
60 | }
61 |
62 | @Override
63 | public int indexOf(String name) {
64 | return names.indexOf(name);
65 | }
66 |
67 | @Override
68 | public int size() {
69 | return names.size();
70 | }
71 |
72 | @Override
73 | public Iterator iterator() {
74 | return new UnmodifiableIterator() {
75 | private int index = 0;
76 | @Override
77 | public boolean hasNext() {
78 | return index < size();
79 | }
80 |
81 | @Override
82 | public Field next() {
83 | Field f = get(index);
84 | index++;
85 | return f;
86 | }
87 | };
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/hive/src/main/java/com/cloudera/exhibit/hive/HiveEnumerator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.hive;
16 |
17 | import com.google.common.collect.Lists;
18 | import org.apache.calcite.linq4j.Enumerator;
19 | import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
20 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
21 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
22 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
23 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
24 |
25 | import java.util.List;
26 |
27 | class HiveEnumerator implements Enumerator {
28 |
29 | private final Object obj;
30 | private final ListObjectInspector listOI;
31 | private final ObjectInspector elOI;
32 | private final int size;
33 | private int currentIndex = -1;
34 | private Object currentValue;
35 |
36 | public HiveEnumerator(Object obj, ListObjectInspector listOI) {
37 | this.obj = obj;
38 | this.listOI = listOI;
39 | this.elOI = listOI.getListElementObjectInspector();
40 | this.size = listOI.getListLength(obj);
41 | this.currentValue = null;
42 | }
43 |
44 | @Override
45 | public Object current() {
46 | return currentValue;
47 | }
48 |
49 | @Override
50 | public boolean moveNext() {
51 | currentIndex++;
52 | boolean hasNext = currentIndex < size;
53 | if (hasNext) {
54 | updateValues();
55 | }
56 | return hasNext;
57 | }
58 |
59 | @Override
60 | public void reset() {
61 | currentIndex = -1;
62 | }
63 |
64 | @Override
65 | public void close() {
66 | }
67 |
68 | private void updateValues() {
69 | Object row = listOI.getListElement(obj, currentIndex);
70 | if (elOI.getCategory() == ObjectInspector.Category.PRIMITIVE) {
71 | currentValue = ((PrimitiveObjectInspector) elOI).getPrimitiveJavaObject(row);
72 | currentValue = HiveUtils.asJavaType(currentValue);
73 | } else {
74 | List v = Lists.newArrayList();
75 | ObjectInspectorUtils.copyToStandardObject(v, row, (StructObjectInspector) elOI,
76 | ObjectInspectorUtils.ObjectInspectorCopyOption.JAVA);
77 | for (int i = 0; i < v.size(); i++) {
78 | v.set(i, HiveUtils.asJavaType(v.get(i)));
79 | }
80 | currentValue = v.toArray();
81 | }
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/hive/src/main/java/com/cloudera/exhibit/hive/WithinUDTF.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.hive;
16 |
17 | import com.cloudera.exhibit.core.Calculator;
18 | import com.cloudera.exhibit.core.Exhibit;
19 | import com.cloudera.exhibit.core.Obs;
20 | import com.cloudera.exhibit.core.ObsDescriptor;
21 | import org.apache.hadoop.hive.ql.exec.Description;
22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
24 | import org.apache.hadoop.hive.ql.metadata.HiveException;
25 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
27 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
28 |
29 | @Description(name = "within_table",
30 | value = "_FUNC_(query_str, ...) - Yo dawg, I heard you liked SQL. So we put SQL in your SQL, so you can " +
31 | "query while you query.")
32 | public class WithinUDTF extends GenericUDTF {
33 |
34 | private Calculator calculator;
35 | private ObjectInspector[] inspectors;
36 |
37 | private transient Exhibit exhibit;
38 | private transient Object[] results;
39 |
40 | public WithinUDTF() {
41 | }
42 |
43 | @Override
44 | public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
45 | if (args.length <= 1) {
46 | throw new UDFArgumentLengthException("The 'within' function takes at least two arguments");
47 | }
48 |
49 | this.inspectors = args;
50 | this.calculator = HiveUtils.getCalculator(args[0]);
51 | this.exhibit = HiveUtils.getExhibit(args);
52 | ObsDescriptor od = calculator.initialize(exhibit.descriptor());
53 |
54 | this.results = new Object[od.size()];
55 | return (StructObjectInspector) HiveUtils.fromDescriptor(od, true);
56 | }
57 |
58 | @Override
59 | public void process(Object[] args) throws HiveException {
60 | HiveUtils.update(exhibit, inspectors, args);
61 | Iterable res = calculator.apply(exhibit);
62 | for (Obs obs : res) {
63 | for (int i = 0; i < results.length; i++) {
64 | results[i] = HiveUtils.asHiveType(obs.get(i));
65 | }
66 | forward(results);
67 | }
68 | }
69 |
70 | @Override
71 | public void close() throws HiveException {
72 | calculator.cleanup();
73 | calculator = null;
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/hive/src/main/java/com/cloudera/exhibit/hive/WithinUDF.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.hive;
16 |
17 | import com.cloudera.exhibit.core.Calculator;
18 | import com.cloudera.exhibit.core.Exhibit;
19 | import com.cloudera.exhibit.core.Obs;
20 | import com.cloudera.exhibit.core.ObsDescriptor;
21 | import com.google.common.base.Joiner;
22 | import com.google.common.collect.Iterables;
23 | import com.google.common.collect.Lists;
24 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
25 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
26 | import org.apache.hadoop.hive.ql.metadata.HiveException;
27 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
29 |
30 | import java.util.List;
31 |
32 | public class WithinUDF extends GenericUDF {
33 |
34 | private Calculator calculator;
35 | private ObjectInspector[] inspectors;
36 |
37 | private transient Exhibit exhibit;
38 |
39 | public WithinUDF() {
40 | }
41 |
42 | @Override
43 | public ObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
44 | if (args.length <= 1) {
45 | throw new UDFArgumentLengthException("The 'within' function takes at least two arguments");
46 | }
47 | this.inspectors = args;
48 | this.calculator = HiveUtils.getCalculator(args[0]);
49 | this.exhibit = HiveUtils.getExhibit(args);
50 | ObsDescriptor od = calculator.initialize(exhibit.descriptor());
51 | return HiveUtils.fromDescriptor(od, false);
52 | }
53 |
54 | @Override
55 | public Object evaluate(DeferredObject[] args) throws HiveException {
56 | HiveUtils.update(exhibit, inspectors, args);
57 | return getResult(Iterables.getOnlyElement(calculator.apply(exhibit)));
58 | }
59 |
60 | private Object getResult(Obs obs) {
61 | if (obs.descriptor().size() == 1) {
62 | return HiveUtils.asHiveType(obs.get(0));
63 | } else {
64 | List values = Lists.newArrayListWithExpectedSize(obs.descriptor().size());
65 | for (int i = 0; i < obs.descriptor().size(); i++) {
66 | values.add(HiveUtils.asHiveType(obs.get(i)));
67 | }
68 | return values;
69 | }
70 | }
71 |
72 | @Override
73 | public String getDisplayString(String[] args) {
74 | assert (args.length > 1);
75 | return "within(" + Joiner.on(',').join(args) + ")";
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/octave/src/main/java/com/cloudera/exhibit/octave/OctaveCalculator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.octave;
16 |
17 | import java.io.Serializable;
18 | import java.util.Set;
19 |
20 | import com.cloudera.exhibit.core.Calculator;
21 | import com.cloudera.exhibit.core.Exhibit;
22 | import com.cloudera.exhibit.core.ExhibitDescriptor;
23 | import com.cloudera.exhibit.core.Obs;
24 | import com.cloudera.exhibit.core.ObsDescriptor;
25 | import com.cloudera.exhibit.core.vector.VectorUtils;
26 | import com.google.common.collect.ImmutableList;
27 | import com.google.common.collect.Iterables;
28 | import com.google.common.collect.Sets;
29 |
30 | public class OctaveCalculator implements Calculator, Serializable{
31 | private OctaveFunctor functor;
32 | private String varName;
33 |
34 | OctaveCalculator(String script) throws OctaveScriptFormatException {
35 | this(null, script);
36 | }
37 |
38 | OctaveCalculator(ExhibitDescriptor descriptor, String script) throws OctaveScriptFormatException {
39 | this.functor = new OctaveFunctor(descriptor, script);
40 | }
41 |
42 | @Override
43 | public ObsDescriptor initialize(ExhibitDescriptor input) {
44 | ExhibitDescriptor res = functor.initialize(input);
45 | if (!res.attributes().isEmpty()) {
46 | return res.attributes();
47 | } else if (!res.vectors().isEmpty()) {
48 | this.varName = Iterables.getOnlyElement(res.vectors().keySet());
49 | return VectorUtils.asObsDescriptor(varName, res.vectors().get(varName));
50 | } else if (!res.frames().isEmpty()) {
51 | this.varName = Iterables.getOnlyElement(res.frames().keySet());
52 | return res.frames().get(varName);
53 | }
54 | throw new IllegalArgumentException("Could not determine return variable for octave calculation");
55 | }
56 |
57 | @Override
58 | public void cleanup() {
59 | functor.cleanup();
60 | }
61 |
62 | @Override
63 | public Iterable apply(Exhibit input) {
64 | return extract(functor.apply(input));
65 | }
66 |
67 |
68 | private Iterable extract(Exhibit result) {
69 | if (result.frames().containsKey(varName)) {
70 | return result.frames().get(varName);
71 | } else if (result.vectors().containsKey(varName)) {
72 | return VectorUtils.asFrame(varName, result.vectors().get(varName));
73 | } else {
74 | return ImmutableList.of(result.attributes());
75 | }
76 | }
77 |
78 | }
79 |
--------------------------------------------------------------------------------
/etl/src/main/java/com/cloudera/exhibit/etl/config/BuildConfig.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.etl.config;
16 |
17 | import com.google.common.collect.Lists;
18 | import org.apache.avro.Schema;
19 | import org.apache.crunch.Target;
20 | import org.apache.crunch.types.PType;
21 | import org.apache.crunch.types.avro.Avros;
22 |
23 | import java.util.List;
24 |
25 | public class BuildConfig {
26 | public long sleepTimeMsec = 30000L;
27 |
28 | public static enum KeyType {
29 | STRING {
30 | @Override
31 | public PType> getPType() {
32 | return Avros.strings();
33 | }
34 |
35 | @Override
36 | public Schema getSchema() {
37 | return Schema.create(Schema.Type.STRING);
38 | }
39 |
40 | @Override
41 | public Object parse(String stringKey) {
42 | return stringKey;
43 | }
44 | },
45 |
46 | INT {
47 | @Override
48 | public PType> getPType() {
49 | return Avros.ints();
50 | }
51 |
52 | @Override
53 | public Schema getSchema() {
54 | return Schema.create(Schema.Type.INT);
55 | }
56 |
57 | @Override
58 | public Object parse(String stringKey) {
59 | return Integer.valueOf(stringKey);
60 | }
61 | },
62 |
63 | LONG {
64 | @Override
65 | public PType> getPType() {
66 | return Avros.longs();
67 | }
68 |
69 | @Override
70 | public Schema getSchema() {
71 | return Schema.create(Schema.Type.LONG);
72 | }
73 |
74 | @Override
75 | public Object parse(String stringKey) {
76 | return Long.valueOf(stringKey);
77 | }
78 | };
79 |
80 | public abstract PType> getPType();
81 |
82 | public abstract Schema getSchema();
83 |
84 | public abstract Object parse(String stringKey);
85 | }
86 |
87 | public String uri;
88 |
89 | public String format = "avro";
90 |
91 | public String compress = "uncompressed";
92 |
93 | public String name;
94 |
95 | public String keyField;
96 |
97 | public KeyType keyType;
98 |
99 | public Target.WriteMode writeMode = Target.WriteMode.OVERWRITE;
100 |
101 | public int parallelism = -1;
102 |
103 | public List sources = Lists.newArrayList();
104 |
105 | public ComputeConfig compute = null;
106 | }
107 |
--------------------------------------------------------------------------------
/core/src/main/java/com/cloudera/exhibit/core/ExhibitDescriptor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.core;
16 |
17 | import com.google.common.collect.ImmutableMap;
18 |
19 | import java.io.Serializable;
20 | import java.util.Collections;
21 | import java.util.Map;
22 |
23 | public class ExhibitDescriptor implements Serializable {
24 |
25 | private final ObsDescriptor attributes;
26 | private final Map frames;
27 | private final Map vectors;
28 |
29 | public static ExhibitDescriptor of(String name, ObsDescriptor frame) {
30 | return new ExhibitDescriptor(ObsDescriptor.EMPTY
31 | , ImmutableMap.of(name, frame)
32 | , Collections.emptyMap());
33 | }
34 |
35 | public ExhibitDescriptor(ObsDescriptor attributes, Map frames, Map vectors) {
36 | this.attributes = attributes;
37 | this.frames = frames;
38 | this.vectors = vectors;
39 | }
40 |
41 | public ObsDescriptor attributes() {
42 | return attributes;
43 | }
44 |
45 | public Map frames() {
46 | return frames;
47 | }
48 |
49 | public Map vectors() {
50 | return vectors;
51 | }
52 |
53 | public String toString() {
54 | StringBuilder sb = new StringBuilder();
55 | sb.append("Attributes:\n");
56 | toStringHelper(sb, 2, attributes);
57 | sb.append("Frames:\n");
58 | for (Map.Entry e : frames.entrySet()) {
59 | sb.append(" ").append(e.getKey()).append("\n");
60 | toStringHelper(sb, 4, e.getValue());
61 | }
62 | sb.append("Vectors:\n");
63 | for (Map.Entry e : vectors.entrySet()) {
64 | sb.append(" ").append(e.getKey()).append("\n");
65 | toStringHelper(sb, 4, e.getKey(), e.getValue());
66 | }
67 | return sb.toString();
68 | }
69 |
70 | private static void toStringHelper(StringBuilder sb, int indent, String name, FieldType type) {
71 | for (int j = 0; j < indent; j++) {
72 | sb.append(' ');
73 | }
74 | sb.append(name).append(": ").append(type).append("\n");
75 | }
76 |
77 | private static void toStringHelper(StringBuilder sb, int indent, ObsDescriptor desc) {
78 | for (int i = 0; i < desc.size(); i++) {
79 | ObsDescriptor.Field f = desc.get(i);
80 | toStringHelper(sb, indent, f.name, f.type);
81 | }
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/core/src/main/java/com/cloudera/exhibit/core/composite/UpdatableExhibit.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.core.composite;
16 |
17 | import com.cloudera.exhibit.core.Exhibit;
18 | import com.cloudera.exhibit.core.ExhibitDescriptor;
19 | import com.cloudera.exhibit.core.Frame;
20 | import com.cloudera.exhibit.core.Obs;
21 | import com.cloudera.exhibit.core.Vec;
22 | import com.cloudera.exhibit.core.vector.Vector;
23 | import com.google.common.collect.Maps;
24 |
25 | import java.util.Map;
26 |
27 | public class UpdatableExhibit implements Exhibit {
28 |
29 | private final Exhibit base;
30 | private final Map frames;
31 | private final Map vectors;
32 | private UpdatableExhibitDescriptor descriptor;
33 |
34 | public UpdatableExhibit(Exhibit base) {
35 | this.base = base;
36 | this.frames = Maps.newHashMap();
37 | this.vectors = Maps.newHashMap();
38 | this.descriptor = new UpdatableExhibitDescriptor(base.descriptor());
39 | }
40 |
41 | public UpdatableExhibit add(String name, Vector vector) {
42 | this.vectors.put(name, vector);
43 | this.descriptor.add(name, vector.getType());
44 | return this;
45 | }
46 | public UpdatableExhibit add(String name, Frame frame) {
47 | this.frames.put(name, frame);
48 | this.descriptor.add(name, frame.descriptor());
49 | return this;
50 | }
51 |
52 | public UpdatableExhibit addAllVectors(Map vectors) {
53 | for (Map.Entry e : vectors.entrySet()) {
54 | add(e.getKey(), e.getValue());
55 | }
56 | return this;
57 | }
58 |
59 | public UpdatableExhibit addAllFrames(Map frames) {
60 | for (Map.Entry e : frames.entrySet()) {
61 | add(e.getKey(), e.getValue());
62 | }
63 | return this;
64 | }
65 |
66 | @Override
67 | public ExhibitDescriptor descriptor() {
68 | return descriptor;
69 | }
70 |
71 | @Override
72 | public Obs attributes() {
73 | return base.attributes();
74 | }
75 |
76 | @Override
77 | public Map frames() {
78 | Map union = Maps.newHashMap();
79 | union.putAll(base.frames());
80 | union.putAll(frames);
81 | return union;
82 | }
83 |
84 | @Override
85 | public Map vectors() {
86 | Map union = Maps.newHashMap();
87 | union.putAll(base.vectors());
88 | union.putAll(vectors);
89 | return union;
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/hive/src/main/java/com/cloudera/exhibit/hive/CodeUDF.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.hive;
16 |
17 | import com.google.common.collect.Lists;
18 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
19 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
20 | import org.apache.hadoop.hive.ql.metadata.HiveException;
21 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
22 | import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
23 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
24 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
25 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
26 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
27 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
28 |
29 | import java.util.List;
30 |
31 | public abstract class CodeUDF extends GenericUDF {
32 |
33 | private String engine;
34 |
35 | protected CodeUDF(String engine) {
36 | this.engine = engine;
37 | }
38 |
39 | @Override
40 | public ObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
41 | if (args.length != 1) {
42 | throw new UDFArgumentLengthException("Code UDFs take exactly one argument");
43 | }
44 | ObjectInspector codeOI = args[0];
45 | if (!ObjectInspectorUtils.isConstantObjectInspector(codeOI)) {
46 | throw new UDFArgumentException("Code argument must be a constant value");
47 | }
48 | String code;
49 | Object codeValue = ObjectInspectorUtils.getWritableConstantValue(codeOI);
50 | if (codeOI instanceof StringObjectInspector) {
51 | code = codeValue.toString();
52 | } else {
53 | ListObjectInspector lcoi = (ListObjectInspector) codeOI;
54 | StringBuilder sb = new StringBuilder();
55 | for (int i = 0; i < lcoi.getListLength(codeValue); i++) {
56 | sb.append(lcoi.getListElement(codeValue, i)).append('\n');
57 | }
58 | code = sb.toString();
59 | }
60 | List ret = Lists.newArrayList(engine, code);
61 | return ObjectInspectorFactory.getStandardConstantListObjectInspector(
62 | PrimitiveObjectInspectorFactory.javaStringObjectInspector, ret);
63 | }
64 |
65 | @Override
66 | public Object evaluate(DeferredObject[] args) throws HiveException {
67 | return Lists.newArrayList(engine, args[0].get());
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/etl/src/test/java/com/cloudera/exhibit/etl/tbl/RatioTblTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.etl.tbl;
16 |
17 | import com.cloudera.exhibit.avro.AvroObs;
18 | import com.cloudera.exhibit.avro.AvroObsDescriptor;
19 | import com.cloudera.exhibit.core.ObsDescriptor;
20 | import com.cloudera.exhibit.etl.SchemaProvider;
21 | import com.google.common.collect.Maps;
22 | import org.apache.avro.Schema;
23 | import org.apache.avro.SchemaBuilder;
24 | import org.apache.avro.generic.GenericData;
25 | import org.junit.Before;
26 | import org.junit.Test;
27 |
28 | import java.util.Map;
29 |
30 | import static org.junit.Assert.assertEquals;
31 |
32 | public class RatioTblTest {
33 | private Schema schema = SchemaBuilder.record("test").fields()
34 | .optionalDouble("a")
35 | .optionalInt("b")
36 | .endRecord();
37 | private Map values = Maps.newHashMap();
38 |
39 | @Before
40 | public void setUp() throws Exception {
41 |
42 | values.put("out", "out");
43 | }
44 |
45 | @Test
46 | public void testRatio() {
47 | GenericData.Record one = new GenericData.Record(schema);
48 | one.put("a", 10.0);
49 | one.put("b", 100);
50 | GenericData.Record two = new GenericData.Record(schema);
51 | two.put("a", 40.0);
52 | two.put("b", 100);
53 |
54 | Map opts = Maps.newHashMap();
55 | opts.put("numerator", "a");
56 | opts.put("denominator", "b");
57 | RatioTbl rt = new RatioTbl(values, opts);
58 | ObsDescriptor od = new AvroObsDescriptor(schema);
59 |
60 | SchemaProvider sp = rt.getSchemas(od, 0, 0);
61 | rt.initialize(sp);
62 | GenericData.Record r0 = rt.getValue();
63 | assertEquals(0.0, (Double) r0.get("ratio"), 1e-6);
64 | assertEquals(0.0, (Double) r0.get("denominator"), 1e-6);
65 |
66 | rt.add(new AvroObs(od, one));
67 | GenericData.Record r1 = rt.getValue();
68 | assertEquals(0.10, (Double) r1.get("ratio"), 1e-6);
69 | assertEquals(100.0, (Double) r1.get("denominator"), 1e-6);
70 |
71 | rt.add(new AvroObs(od, two));
72 | GenericData.Record r2 = rt.getValue();
73 | assertEquals(0.25, (Double) r2.get("ratio"), 1e-6);
74 | assertEquals(200.0, (Double) r2.get("denominator"), 1e-6);
75 |
76 | GenericData.Record m = rt.merge(r1, r2);
77 | assertEquals(0.2, (Double) m.get("ratio"), 1e-6);
78 | assertEquals(300.0, (Double) m.get("denominator"), 1e-6);
79 |
80 | GenericData.Record f = rt.finalize(m).get(0);
81 | assertEquals(0.2, (Double) f.get("out"), 1e-6);
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/hive/src/main/java/com/cloudera/exhibit/hive/ArrayUnionUDF.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.hive;
16 |
17 | import com.google.common.base.Joiner;
18 | import com.google.common.collect.Lists;
19 | import com.google.common.collect.Sets;
20 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
21 | import org.apache.hadoop.hive.ql.metadata.HiveException;
22 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
23 | import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
24 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
25 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
27 |
28 | import java.util.List;
29 | import java.util.Set;
30 |
31 | public class ArrayUnionUDF extends GenericUDF {
32 |
33 | private List argOIs;
34 |
35 | @Override
36 | public ObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
37 | if (args.length < 2) {
38 | throw new UDFArgumentException("Expecting at least two arguments to array_union");
39 | }
40 | this.argOIs = Lists.newArrayListWithExpectedSize(args.length);
41 | ObjectInspector elemOI = null;
42 | for (ObjectInspector oi : args) {
43 | ListObjectInspector loi = (ListObjectInspector) oi;
44 | argOIs.add(loi);
45 | ObjectInspector eoi = ObjectInspectorUtils.getStandardObjectInspector(loi.getListElementObjectInspector());
46 | if (elemOI == null) {
47 | elemOI = eoi;
48 | } else if (!elemOI.equals(eoi)) {
49 | throw new UDFArgumentException("Array elements must all be of the same type");
50 | }
51 | }
52 | return ObjectInspectorFactory.getStandardListObjectInspector(elemOI);
53 | }
54 |
55 | @Override
56 | public Object evaluate(DeferredObject[] args) throws HiveException {
57 | Set distinct = Sets.newHashSet();
58 | for (int i = 0; i < args.length; i++) {
59 | ListObjectInspector loi = argOIs.get(i);
60 | List list = loi.getList(args[i].get());
61 | for (int j = 0; j < list.size(); j++) {
62 | distinct.add(ObjectInspectorUtils.copyToStandardObject(list.get(j), loi.getListElementObjectInspector()));
63 | }
64 | }
65 | return Lists.newArrayList(distinct);
66 | }
67 |
68 | @Override
69 | public String getDisplayString(String[] args) {
70 | assert (args.length > 1);
71 | return "array_union(" + Joiner.on(',').join(args) + ")";
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/hive/src/main/java/com/cloudera/exhibit/hive/WithinArrayUDF.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.hive;
16 |
17 | import com.cloudera.exhibit.core.Calculator;
18 | import com.cloudera.exhibit.core.Exhibit;
19 | import com.cloudera.exhibit.core.Obs;
20 | import com.cloudera.exhibit.core.ObsDescriptor;
21 | import com.google.common.base.Joiner;
22 | import com.google.common.collect.Lists;
23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
24 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
25 | import org.apache.hadoop.hive.ql.metadata.HiveException;
26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
29 |
30 | import java.util.List;
31 |
32 | public class WithinArrayUDF extends GenericUDF {
33 | private Calculator calculator;
34 | private ObjectInspector[] inspectors;
35 | private transient Exhibit exhibit;
36 |
37 | public WithinArrayUDF() {
38 | }
39 |
40 | @Override
41 | public ObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
42 | if (args.length <= 1) {
43 | throw new UDFArgumentLengthException("The 'within_array' function takes at least two arguments");
44 | }
45 |
46 | this.inspectors = args;
47 | this.calculator = HiveUtils.getCalculator(args[0]);
48 | this.exhibit = HiveUtils.getExhibit(args);
49 | ObsDescriptor od = calculator.initialize(exhibit.descriptor());
50 | return ObjectInspectorFactory.getStandardListObjectInspector(HiveUtils.fromDescriptor(od, false));
51 | }
52 |
53 | @Override
54 | public Object evaluate(GenericUDF.DeferredObject[] args) throws HiveException {
55 | HiveUtils.update(exhibit, inspectors, args);
56 | List results = Lists.newArrayList();
57 | for (Obs obs : calculator.apply(exhibit)) {
58 | results.add(getResult(obs));
59 | }
60 | return results;
61 | }
62 |
63 | private Object getResult(Obs obs) {
64 | if (obs.descriptor().size() == 1) {
65 | return HiveUtils.asHiveType(obs.get(0));
66 | } else {
67 | List values = Lists.newArrayListWithExpectedSize(obs.descriptor().size());
68 | for (int i = 0; i < obs.descriptor().size(); i++) {
69 | values.add(HiveUtils.asHiveType(obs.get(i)));
70 | }
71 | return values;
72 | }
73 | }
74 |
75 | @Override
76 | public String getDisplayString(String[] args) {
77 | assert (args.length > 1);
78 | return "within_array(" + Joiner.on(',').join(args) + ")";
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/avro/src/main/java/com/cloudera/exhibit/avro/AvroObsDescriptor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.avro;
16 |
17 | import com.cloudera.exhibit.core.FieldType;
18 | import com.cloudera.exhibit.core.ObsDescriptor;
19 | import com.google.common.collect.Iterators;
20 | import org.apache.avro.Schema;
21 |
22 | import java.io.IOException;
23 | import java.util.List;
24 |
25 | public class AvroObsDescriptor extends ObsDescriptor {
26 |
27 | private static Schema NULL = Schema.create(Schema.Type.NULL);
28 |
29 | private Schema schema;
30 |
31 | public AvroObsDescriptor(Schema schema) {
32 | this.schema = unwrap(schema);
33 | }
34 |
35 | Schema schema() {
36 | return schema;
37 | }
38 |
39 | @Override
40 | public Field get(int index) {
41 | Schema.Field f = schema.getFields().get(index);
42 | return new Field(f.name(), getFieldType(f.schema()));
43 | }
44 |
45 | @Override
46 | public int indexOf(String name) {
47 | Schema.Field f = schema.getField(name);
48 | return f == null ? -1 : f.pos();
49 | }
50 |
51 | static Schema unwrap(Schema s) {
52 | if (s.getType() == Schema.Type.UNION) {
53 | List ut = s.getTypes();
54 | if (NULL.equals(ut.get(0))) {
55 | return unwrap(ut.get(1));
56 | } else if (NULL.equals(ut.get(1))) {
57 | return unwrap(ut.get(0));
58 | }
59 | }
60 | return s;
61 | }
62 |
63 | static FieldType getFieldType(Schema s) {
64 | s = unwrap(s);
65 | switch (s.getType()) {
66 | case BOOLEAN:
67 | return FieldType.BOOLEAN;
68 | case INT:
69 | return FieldType.INTEGER;
70 | case LONG:
71 | return FieldType.LONG;
72 | case FLOAT:
73 | return FieldType.FLOAT;
74 | case DOUBLE:
75 | return FieldType.DOUBLE;
76 | case STRING:
77 | return FieldType.STRING;
78 | default:
79 | System.err.println("Unknown schema type = " + s); // TODO: remove
80 | return null;
81 | }
82 | }
83 |
84 | @Override
85 | public String toString() {
86 | return Iterators.toString(iterator());
87 | }
88 |
89 | @Override
90 | public int size() {
91 | return schema.getFields().size();
92 | }
93 |
94 | private void writeObject(java.io.ObjectOutputStream out) throws IOException {
95 | out.writeUTF(schema.toString());
96 | }
97 |
98 | private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
99 | schema = (new Schema.Parser()).parse(in.readUTF());
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/core/src/main/java/com/cloudera/exhibit/core/composite/CompositeObsDescriptor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.core.composite;
16 |
17 | import com.cloudera.exhibit.core.ObsDescriptor;
18 | import com.google.common.base.Function;
19 | import com.google.common.collect.Iterators;
20 | import com.google.common.collect.Lists;
21 | import com.google.common.collect.Maps;
22 |
23 | import java.util.Arrays;
24 | import java.util.Iterator;
25 | import java.util.List;
26 | import java.util.Map;
27 |
28 | public class CompositeObsDescriptor extends ObsDescriptor {
29 |
30 | private List components;
31 | private Map fieldNames;
32 | private int[] offsets;
33 |
34 | public CompositeObsDescriptor(List components) {
35 | this.components = components;
36 | this.fieldNames = Maps.newHashMap();
37 | this.offsets = new int[components.size() + 1];
38 | int idx = 0;
39 | for (int i = 1; i < offsets.length; i++) {
40 | ObsDescriptor descriptor = components.get(i - 1);
41 | offsets[i] = offsets[i - 1] + descriptor.size();
42 | for (Field f : descriptor) {
43 | if (fieldNames.containsKey(f.name)) {
44 | throw new IllegalStateException("Duplicate field name in composite obs: " + f.name);
45 | }
46 | fieldNames.put(f.name, idx);
47 | idx++;
48 | }
49 | }
50 | }
51 |
52 | public int getOffsetIndex(int index) {
53 | int offset = Arrays.binarySearch(offsets, index);
54 | if (offset < 0) {
55 | offset = -offset - 2;
56 | while (offset < offsets.length -1 && offsets[offset] == offsets[offset + 1]) {
57 | offset++;
58 | }
59 | }
60 | return offset;
61 | }
62 |
63 | public int getOffset(int offsetIndex) {
64 | return offsets[offsetIndex];
65 | }
66 |
67 | @Override
68 | public Field get(int i) {
69 | int offsetIndex = getOffsetIndex(i);
70 | int compIdx = i - offsets[offsetIndex];
71 | return components.get(offsetIndex).get(compIdx);
72 | }
73 |
74 | @Override
75 | public int indexOf(String name) {
76 | Integer ret = fieldNames.get(name);
77 | return ret == null ? -1 : ret;
78 | }
79 |
80 | @Override
81 | public int size() {
82 | return offsets[offsets.length - 1];
83 | }
84 |
85 | @Override
86 | public Iterator iterator() {
87 | return Iterators.concat(Lists.transform(components, new Function>() {
88 | @Override
89 | public Iterator apply(ObsDescriptor descriptor) {
90 | return descriptor.iterator();
91 | }
92 | }).iterator());
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/etl/src/test/java/com/cloudera/exhibit/etl/tbl/SumTopTblTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
3 | *
4 | * Cloudera, Inc. licenses this file to you under the Apache License,
5 | * Version 2.0 (the "License"). You may not use this file except in
6 | * compliance with the License. You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | * CONDITIONS OF ANY KIND, either express or implied. See the License for
12 | * the specific language governing permissions and limitations under the
13 | * License.
14 | */
15 | package com.cloudera.exhibit.etl.tbl;
16 |
17 | import com.cloudera.exhibit.avro.AvroObsDescriptor;
18 | import com.cloudera.exhibit.core.ObsDescriptor;
19 | import com.cloudera.exhibit.etl.SchemaProvider;
20 | import com.google.common.collect.Maps;
21 | import org.apache.avro.Schema;
22 | import org.apache.avro.SchemaBuilder;
23 | import org.apache.avro.generic.GenericData;
24 | import org.junit.Before;
25 | import org.junit.Test;
26 |
27 | import java.util.List;
28 | import java.util.Map;
29 |
30 | import static org.junit.Assert.assertEquals;
31 |
32 | public class SumTopTblTest {
33 |
34 | private Schema schema = SchemaBuilder.record("test").fields()
35 | .optionalString("key")
36 | .optionalDouble("a")
37 | .optionalInt("b")
38 | .endRecord();
39 | private Map cv = Maps.newHashMap();
40 | private Map values = Maps.newHashMap();
41 |
42 | @Before
43 | public void setUp() throws Exception {
44 | GenericData.Record one = new GenericData.Record(schema);
45 | one.put("key", "x");
46 | one.put("a", 17.0);
47 | one.put("b", 12);
48 | GenericData.Record two = new GenericData.Record(schema);
49 | two.put("key", "y");
50 | two.put("a", -1.2);
51 | two.put("b", 32);
52 | cv.put("one", one);
53 | cv.put("two", two);
54 |
55 | values.put("key", "key");
56 | values.put("a", "a");
57 | values.put("b", "b");
58 | }
59 |
60 | @Test
61 | public void testOrder() {
62 | Map opts = Maps.newHashMap();
63 | opts.put("by", "key");
64 | opts.put("order", "a + b");
65 | opts.put("limit", 1);
66 | SumTopTbl stt = new SumTopTbl(values, opts);
67 | ObsDescriptor od = new AvroObsDescriptor(schema);
68 |
69 | SchemaProvider sp = stt.getSchemas(od, 0, 0);
70 | stt.initialize(sp);
71 | List> elem = stt.sort(stt.filter(cv));
72 | assertEquals(2, elem.size());
73 | assertEquals("y", elem.get(0).getValue().get("key"));
74 | assertEquals("x", elem.get(1).getValue().get("key"));
75 | }
76 |
77 | @Test
78 | public void testFilter() {
79 | Map opts = Maps.newHashMap();
80 | opts.put("by", "key");
81 | opts.put("order", "a * b");
82 | opts.put("limit", 1);
83 | opts.put("keep", "a > 0 && b > 0");
84 | SumTopTbl stt = new SumTopTbl(values, opts);
85 | ObsDescriptor od = new AvroObsDescriptor(schema);
86 |
87 | SchemaProvider sp = stt.getSchemas(od, 0, 0);
88 | stt.initialize(sp);
89 | List> elem = stt.filter(cv);
90 | assertEquals(1, elem.size());
91 | assertEquals("x", elem.get(0).getValue().get("key"));
92 | }
93 | }
94 |
--------------------------------------------------------------------------------