├── README.md
├── pom.xml
└── src
├── main
└── java
│ └── com
│ └── google
│ └── wave
│ └── prototype
│ └── dataflow
│ ├── coder
│ ├── AggregateDataCoder.java
│ └── SFCoder.java
│ ├── function
│ ├── AggregateDataEnricher.java
│ ├── CSVFormatter.java
│ └── TableRowFormatter.java
│ ├── model
│ ├── AggregatedData.java
│ ├── SFConfig.java
│ ├── SFReferenceData.java
│ └── SFWaveWriteResult.java
│ ├── pipeline
│ ├── AdDataJob.java
│ └── SFReferenceDataJob.java
│ ├── sf
│ ├── SFSOQLExecutor.java
│ └── SFWaveDatasetWriter.java
│ ├── transform
│ ├── AggregateEvents.java
│ ├── SFRead.java
│ └── SFWaveWrite.java
│ └── util
│ ├── CSVUtil.java
│ ├── FileUtil.java
│ ├── GCSFileUtil.java
│ ├── JobConstants.java
│ └── SFConstants.java
└── test
└── java
└── com
└── google
└── wave
└── prototype
└── dataflow
├── BaseTest.java
├── coder
├── AggregateDataCoderTest.java
└── SFCoderTest.java
├── function
├── AggregateDataEnricherTest.java
├── CSVFormatterTest.java
└── TableRowFormatterTest.java
├── model
└── SFConfigTest.java
├── pipeline
├── AdDataJobTest.java
└── SFReferenceDataJobTest.java
├── sf
├── SFSOQLExecutorTest.java
└── SFWaveDatasetWriterTest.java
└── transform
├── AggregateEventsTest.java
├── SFReadTest.java
└── SFWaveWriteTest.java
/README.md:
--------------------------------------------------------------------------------
1 | # README #
2 |
3 | ### springML Inc Repository ###
4 |
5 | Google Dataflow Jobs
6 | --------------------
7 |
8 |
9 | Following two classes take care of Google cloud dataflow jobs
10 |
11 | SFReferenceDataJob - Will fetch the reference data from SF (Oppurtunity) and populate bigQuery
12 | AdDataJob - Will fetch the raw data from GCS and SF reference data from bigquery. Enrich the data and populate bigQuery with the enriched data
13 |
14 |
15 | SFReferenceDataJob
16 | ------------------
17 |
18 | This requires the following inputs
19 |
20 | 1. Google cloud project
21 | 2. Google cloud Staging location
22 | 3. BigQuery output table
23 | 4. SF UserId
24 | 5. SF Password
25 |
26 | On completion of the job, bigquery table SFDCReferenceData.SFRef will be populated with SF Reference data
27 |
28 |
29 | AdDataJob
30 | ---------
31 |
32 | This requires the following inputs
33 |
34 | 1. Google cloud project
35 | 2. Google cloud Staging location
36 | 3. Ad Raw data (CSV)
37 | 4. BigQuery Reference data table
38 | 5. BigQuery output table
39 |
40 | On completion of the job bigquery table SFDCReferenceData.EnrichedSample will be populated withenriched data.
41 |
42 |
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 | com.google.wave
6 | wave_connector_prototype
7 | 0.0.2-SNAPSHOT
8 | jar
9 |
10 |
11 |
12 | in-project1
13 | In Project Repo
14 | file://${project.basedir}\lib
15 |
16 |
17 |
18 | wave_connector_prototype
19 | http://maven.apache.org
20 |
21 |
22 | UTF-8
23 |
24 |
25 |
26 |
27 | junit
28 | junit
29 | 4.12
30 | test
31 |
32 |
33 |
34 | com.google.cloud.dataflow
35 | google-cloud-dataflow-java-sdk-all
36 | LATEST
37 |
38 |
39 |
40 | com.google.apis
41 | google-api-services-storage
42 | v1-rev25-1.19.1
43 |
44 |
46 |
47 | com.google.guava
48 | guava-jdk5
49 |
50 |
51 |
52 |
53 |
54 | com.google.apis
55 | google-api-services-bigquery
56 | v2-rev187-1.19.1
57 |
58 |
60 |
61 | com.google.guava
62 | guava-jdk5
63 |
64 |
65 |
66 |
67 |
68 | com.google.http-client
69 | google-http-client-jackson2
70 | 1.19.0
71 |
72 |
74 |
75 | com.google.guava
76 | guava-jdk5
77 |
78 |
79 |
80 |
81 |
82 | com.fasterxml.jackson.core
83 | jackson-core
84 | 2.4.2
85 |
86 |
87 |
88 | com.fasterxml.jackson.core
89 | jackson-annotations
90 | 2.4.2
91 |
92 |
93 |
94 |
95 | org.slf4j
96 | slf4j-api
97 | 1.7.7
98 |
99 |
100 |
101 | org.hamcrest
102 | hamcrest-all
103 | 1.3
104 | test
105 |
106 |
107 |
108 | com.google.appengine.tools
109 | appengine-gcs-client
110 | RELEASE
111 |
112 |
113 |
114 | org.apache.commons
115 | commons-lang3
116 | 3.4
117 |
118 |
119 |
120 | commons-io
121 | commons-io
122 | 2.4
123 |
124 |
125 |
126 |
127 | sf
128 | enterprise
129 | 1
130 |
131 |
132 | sf
133 | partner
134 | 1
135 |
136 |
137 | sf
138 | wsc
139 | 1
140 |
141 |
142 |
143 | org.mockito
144 | mockito-core
145 | 2.0.26-beta
146 | test
147 |
148 |
149 |
150 | com.github.jsqlparser
151 | jsqlparser
152 | 0.9.3
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 | maven-compiler-plugin
162 |
163 | 1.7
164 | 1.7
165 |
166 |
167 |
168 |
169 |
170 |
171 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/coder/AggregateDataCoder.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.coder;
2 |
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 | import java.io.OutputStream;
6 |
7 | import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
8 | import com.google.cloud.dataflow.sdk.coders.CoderException;
9 | import com.google.wave.prototype.dataflow.model.AggregatedData;
10 |
11 | /**
12 | * Coder for {@link AggregatedData}
13 | * It just uses AggregatedData.toString() to encode
14 | * AggregatedData.toString() will produce CSV of {@link AggregatedData}
15 | * In decode,
16 | * CSV is separated into fields by String.split(',') and
17 | * {@link AggregatedData} is constructed using the fields
18 | */
19 | public class AggregateDataCoder extends AtomicCoder {
20 | private static final long serialVersionUID = 4037984240347308918L;
21 | private static final int COL_PROPOSAL_ID = 0;
22 | private static final int COL_OPPORTUNITY_ID = 1;
23 | private static final int COL_CLICK_COUNT = 2;
24 | private static final int COL_IMP_COUNT = 3;
25 |
26 | private static final AggregateDataCoder INSTANCE = new AggregateDataCoder();
27 | private AggregateDataCoder() { }
28 |
29 | public static AggregateDataCoder getInstance() {
30 | return INSTANCE;
31 | }
32 |
33 | @Override
34 | public void encode(AggregatedData value, OutputStream outStream,
35 | com.google.cloud.dataflow.sdk.coders.Coder.Context context)
36 | throws CoderException, IOException {
37 | // Returning bytes of CSV
38 | // AggregatedData.toString() will be a CSV
39 | outStream.write(value.toString().getBytes());
40 | }
41 |
42 | @Override
43 | public AggregatedData decode(InputStream inStream,
44 | com.google.cloud.dataflow.sdk.coders.Coder.Context context)
45 | throws CoderException, IOException {
46 | int csvRowSize = inStream.available();
47 | byte[] csvRow = new byte[csvRowSize];
48 | inStream.read(csvRow);
49 | // Stream is converted into String
50 | // String will be a CSV
51 | // CSV splitted using comma to get the fields
52 | // AggregatedData constructed using the fields
53 | String aggDataStr = new String(csvRow);
54 | String[] addDataFields = aggDataStr.split(",");
55 |
56 |
57 | return new AggregatedData(addDataFields[COL_PROPOSAL_ID],
58 | addDataFields[COL_OPPORTUNITY_ID],
59 | Integer.parseInt(addDataFields[COL_CLICK_COUNT]),
60 | Integer.parseInt(addDataFields[COL_IMP_COUNT]));
61 | }
62 |
63 | }
64 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/coder/SFCoder.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.coder;
2 |
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 | import java.io.OutputStream;
6 |
7 | import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
8 | import com.google.cloud.dataflow.sdk.coders.CoderException;
9 | import com.google.wave.prototype.dataflow.model.SFReferenceData;
10 |
11 | /**
12 | * Coder for {@link SFReferenceData}
13 | * It just uses SFReferenceData.toString() to encode
14 | * SFReferenceData.toString() will produce CSV of {@link SFReferenceData}
15 | * In decode,
16 | * CSV is separated into fields by String.split(',') and
17 | * {@link SFReferenceData} is constructed using the fields
18 | */
19 | public class SFCoder extends AtomicCoder {
20 | private static final long serialVersionUID = 4037984240347308918L;
21 | private static final int COL_ACCOUNT_ID = 0;
22 | private static final int COL_OPPORTUNITY_ID = 1;
23 | private static final int COL_PROPOSAL_ID = 2;
24 |
25 | private static final SFCoder INSTANCE = new SFCoder();
26 | private SFCoder() { }
27 |
28 | public static SFCoder getInstance() {
29 | return INSTANCE;
30 | }
31 |
32 | @Override
33 | public void encode(SFReferenceData value, OutputStream outStream,
34 | com.google.cloud.dataflow.sdk.coders.Coder.Context context)
35 | throws CoderException, IOException {
36 | // SFReferenceData.toString will provide a String as CSV
37 | outStream.write(value.toString().getBytes());
38 | }
39 |
40 | @Override
41 | public SFReferenceData decode(InputStream inStream,
42 | com.google.cloud.dataflow.sdk.coders.Coder.Context context)
43 | throws CoderException, IOException {
44 | int size = inStream.available();
45 | byte[] sfRefBytes = new byte[size];
46 | inStream.read(sfRefBytes);
47 | String refStr = new String(sfRefBytes);
48 | String[] sfRefDataFields = refStr.split(",");
49 |
50 | String proposalId = null;
51 | // Proposal may be null for some rows and hence adding only if it is present
52 | if (sfRefDataFields.length > 2) {
53 | proposalId = sfRefDataFields[COL_PROPOSAL_ID];
54 | }
55 | return new SFReferenceData(sfRefDataFields[COL_ACCOUNT_ID], sfRefDataFields[COL_OPPORTUNITY_ID], proposalId);
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/function/AggregateDataEnricher.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.function;
2 |
3 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_OPPORTUNITY_ID;
4 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_PROPOSAL_ID;
5 |
6 | import org.slf4j.Logger;
7 | import org.slf4j.LoggerFactory;
8 |
9 | import com.google.api.services.bigquery.model.TableRow;
10 | import com.google.cloud.dataflow.sdk.transforms.DoFn;
11 | import com.google.cloud.dataflow.sdk.values.PCollectionView;
12 | import com.google.wave.prototype.dataflow.model.AggregatedData;
13 | import com.google.wave.prototype.dataflow.pipeline.AdDataJob;
14 |
15 | /**
16 | * Enrich AggregatedData with OpportunityId
17 | * OpportunityId fetched from Google BigQuery for the corresponding ProposalId
18 | * Google BigQuery TableRow should be provided as sideInput
19 | */
20 | public class AggregateDataEnricher extends DoFn {
21 | private static final long serialVersionUID = -369858616535388252L;
22 |
23 | private static final Logger LOG = LoggerFactory.getLogger(AdDataJob.class);
24 |
25 | private PCollectionView> sfReferenceDataView;
26 |
27 | public AggregateDataEnricher(PCollectionView> sfReferenceDataView) {
28 | this.sfReferenceDataView = sfReferenceDataView;
29 | }
30 |
31 | @Override
32 | public void processElement(
33 | DoFn.ProcessContext c) throws Exception {
34 | AggregatedData aggregatedData = c.element();
35 | String proposalId = aggregatedData.getProposalId();
36 | // Since in this case BigQuery table considered to be small
37 | // table rows are passed as sideInput
38 | Iterable sfReferenceData = c.sideInput(sfReferenceDataView);
39 | for (TableRow sfReferenceRow : sfReferenceData) {
40 | String proposalIdFromBigQuery = (String) sfReferenceRow.get(COL_PROPOSAL_ID);
41 | String opportunityId = (String) sfReferenceRow.get(COL_OPPORTUNITY_ID);
42 | // Make sure to fetch the opportunityId for the corresponding proposalId
43 | if (proposalIdFromBigQuery.contains(proposalId)) {
44 | LOG.info("Adding OpportunityId into aggregatedData : " + opportunityId.toString());
45 | aggregatedData.setOpportunityId((String) sfReferenceRow.get(COL_OPPORTUNITY_ID));
46 | }
47 | }
48 |
49 | c.output(aggregatedData);
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/function/CSVFormatter.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.function;
2 |
3 | import com.google.cloud.dataflow.sdk.transforms.DoFn;
4 | import com.google.wave.prototype.dataflow.model.AggregatedData;
5 |
6 | /**
7 | * A simple DoFn to convert {@link AggregatedData} into CSV Row
8 | */
9 | public class CSVFormatter extends DoFn {
10 | private static final long serialVersionUID = 398388311953363232L;
11 |
12 | @Override
13 | public void processElement(DoFn.ProcessContext c)
14 | throws Exception {
15 | StringBuffer sb = new StringBuffer(256);
16 | sb.append(c.element().toString()).append('\n');
17 | c.output(sb.toString());
18 | }
19 |
20 | }
21 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/function/TableRowFormatter.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.function;
2 |
3 | import java.util.List;
4 |
5 | import com.google.api.services.bigquery.model.TableRow;
6 | import com.google.cloud.dataflow.sdk.transforms.DoFn;
7 |
8 | /**
9 | * A Google Dataflow DoFn converts the given CSV row into Google BigQuery TableRow
10 | * Column Names has to be in the order in which the fields are present in CSV
11 | */
12 | public class TableRowFormatter extends DoFn {
13 | private static final long serialVersionUID = -5798809828662211092L;
14 |
15 | private List columnNames;
16 |
17 | public TableRowFormatter(List columnNames) {
18 | this.columnNames = columnNames;
19 | }
20 |
21 | @Override
22 | public void processElement(ProcessContext c) throws Exception {
23 | TableRow row = new TableRow();
24 | String sfReferenceData = c.element();
25 | // CSV will contain \n at end
26 | // \n should be added as column value
27 | sfReferenceData = removeNewlineChar(sfReferenceData);
28 |
29 | String[] individualFields = sfReferenceData.split(",");
30 | // Order is according to the query we provide
31 | // For SELECT AccountId, Id, ProposalID__c FROM Opportunity
32 | // AccountId will be at 0
33 | // OpportunityId will be at 1
34 | // ProposalId will be at 2
35 |
36 | if (columnNames.size() != individualFields.length) {
37 | throw new Exception ("Number of column does not match with the columns present in CSV");
38 | }
39 |
40 | int col = 0;
41 | for (String columnName : columnNames) {
42 | row.set(columnName, individualFields[col++]);
43 | }
44 |
45 | c.output(row);
46 | }
47 |
48 | private String removeNewlineChar(String sfReferenceData) {
49 | int newlineCharIndex = sfReferenceData.lastIndexOf('\n');
50 | if (newlineCharIndex != -1) {
51 | sfReferenceData = sfReferenceData.substring(0, newlineCharIndex);
52 | }
53 |
54 | return sfReferenceData;
55 | }
56 | }
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/model/AggregatedData.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.model;
2 |
3 | import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
4 | import com.google.wave.prototype.dataflow.coder.AggregateDataCoder;
5 |
6 | /**
7 | * POJO holding enriched Salesforce wave data
8 | * ProposalId, OpportunityId, ClickCount and ImpressionCount
9 | */
10 | @DefaultCoder(AggregateDataCoder.class)
11 | public class AggregatedData {
12 | private String proposalId = "";
13 | private String opportunityId = "";
14 | private int clickCount = 0;
15 | private int impressionCount = 0;
16 |
17 | // Used before adding OpportunityId
18 | public AggregatedData(String proposalId, int clickCount,
19 | int impressionCount) {
20 | this.proposalId = proposalId;
21 | this.clickCount = clickCount;
22 | this.impressionCount = impressionCount;
23 | }
24 |
25 | public AggregatedData(String proposalId, String opportunityId, int clickCount,
26 | int impressionCount) {
27 | this.proposalId = proposalId;
28 | this.opportunityId = opportunityId;
29 | this.clickCount = clickCount;
30 | this.impressionCount = impressionCount;
31 | }
32 |
33 | public String getProposalId() {
34 | return proposalId;
35 | }
36 |
37 | public void setProposalId(String proposalId) {
38 | this.proposalId = proposalId;
39 | }
40 |
41 | public int getClickCount() {
42 | return clickCount;
43 | }
44 |
45 | public void setClickCount(int clicksCount) {
46 | this.clickCount = clicksCount;
47 | }
48 |
49 | public int getImpressionCount() {
50 | return impressionCount;
51 | }
52 |
53 | public void setImpressionCount(int impressionCount) {
54 | this.impressionCount = impressionCount;
55 | }
56 |
57 | public void incrementImpressionCount() {
58 | this.impressionCount++;
59 | }
60 |
61 | public void incrementClickCount() {
62 | this.clickCount++;
63 | }
64 |
65 | public void addImpressionCount(int impressionCount) {
66 | this.impressionCount += impressionCount;
67 | }
68 |
69 | public void addClickCount(int clickCount) {
70 | this.clickCount++;
71 | }
72 |
73 | public String getOpportunityId() {
74 | return opportunityId;
75 | }
76 |
77 | public void setOpportunityId(String opportunityId) {
78 | this.opportunityId = opportunityId;
79 | }
80 |
81 | @Override
82 | public String toString() {
83 | // Constructs CSV row using fields
84 | return proposalId + "," + opportunityId + "," + clickCount + "," + impressionCount;
85 | }
86 |
87 | @Override
88 | public int hashCode() {
89 | final int prime = 31;
90 | int result = 1;
91 | result = prime * result + clickCount;
92 | result = prime * result + impressionCount;
93 | result = prime * result
94 | + ((opportunityId == null) ? 0 : opportunityId.hashCode());
95 | result = prime * result
96 | + ((proposalId == null) ? 0 : proposalId.hashCode());
97 | return result;
98 | }
99 |
100 | @Override
101 | public boolean equals(Object obj) {
102 | if (this == obj)
103 | return true;
104 | if (obj == null)
105 | return false;
106 | if (getClass() != obj.getClass())
107 | return false;
108 | AggregatedData other = (AggregatedData) obj;
109 | if (clickCount != other.clickCount)
110 | return false;
111 | if (impressionCount != other.impressionCount)
112 | return false;
113 | if (opportunityId == null) {
114 | if (other.opportunityId != null)
115 | return false;
116 | } else if (!opportunityId.equals(other.opportunityId))
117 | return false;
118 | if (proposalId == null) {
119 | if (other.proposalId != null)
120 | return false;
121 | } else if (!proposalId.equals(other.proposalId))
122 | return false;
123 | return true;
124 | }
125 |
126 |
127 |
128 | }
129 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/model/SFConfig.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.model;
2 |
3 | import java.io.Serializable;
4 |
5 | import org.apache.commons.lang3.StringUtils;
6 | import org.slf4j.Logger;
7 | import org.slf4j.LoggerFactory;
8 |
9 | import com.google.appengine.repackaged.com.google.gson.Gson;
10 | import com.google.appengine.repackaged.com.google.gson.GsonBuilder;
11 | import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
12 | import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
13 | import com.google.cloud.dataflow.sdk.options.PipelineOptions;
14 | import com.google.wave.prototype.dataflow.util.FileUtil;
15 | import com.google.wave.prototype.dataflow.util.SFConstants;
16 | import com.sforce.soap.enterprise.EnterpriseConnection;
17 | import com.sforce.soap.partner.Connector;
18 | import com.sforce.soap.partner.PartnerConnection;
19 | import com.sforce.ws.ConnectionException;
20 | import com.sforce.ws.ConnectorConfig;
21 |
22 | /**
23 | * Holds the configuration which will be used by SFSource
24 | * Fetches Salesforce user credentials by reading the configuration file present in GS or local
25 | * A config file will have the below content
26 | * {
27 | * "userId": ,
28 | * "password":
29 | * }
30 | */
31 | @DefaultCoder(SerializableCoder.class)
32 | public class SFConfig implements Serializable {
33 | private static final long serialVersionUID = -5569745252294105529L;
34 |
35 | private static final Logger LOG = LoggerFactory.getLogger(SFConfig.class);
36 |
37 | private String userId;
38 | private String password;
39 |
40 | public static SFConfig getInstance(String configFileLocation, PipelineOptions options) throws Exception {
41 | validate(configFileLocation);
42 | // Content will be in JSON
43 | // So constructing SFConfig bean using GSON
44 | String json = FileUtil.getContent(configFileLocation, options);
45 | Gson gson = new GsonBuilder().create();
46 | // Unmarshalling file content into SFConfig
47 | return gson.fromJson(json, SFConfig.class);
48 | }
49 |
50 | public String getUserId() {
51 | return userId;
52 | }
53 |
54 | public String getPassword() {
55 | return password;
56 | }
57 |
58 | public PartnerConnection createPartnerConnection() throws Exception {
59 | ConnectorConfig config = new ConnectorConfig();
60 | LOG.debug("Connecting SF Partner Connection using " + getUserId());
61 | config.setUsername(getUserId());
62 | config.setPassword(getPassword());
63 |
64 | try {
65 | return Connector.newConnection(config);
66 | } catch (ConnectionException ce) {
67 | LOG.error("Exception while creating connection", ce);
68 | throw new Exception(ce);
69 | }
70 | }
71 |
72 | public EnterpriseConnection createEnterpriseConnection() throws Exception {
73 | ConnectorConfig config = new ConnectorConfig();
74 | LOG.debug("Connecting SF Partner Connection using " + getUserId());
75 | config.setUsername(getUserId());
76 | config.setPassword(getPassword());
77 |
78 | try {
79 | return com.sforce.soap.enterprise.Connector.newConnection(config);
80 | } catch (ConnectionException ce) {
81 | LOG.error("Exception while creating connection", ce);
82 | throw new Exception(ce);
83 | }
84 | }
85 |
86 | private static void validate(String configFileLocation) throws Exception {
87 | // Checking whether the file is provided in proper format
88 | // GS file should start with gs://
89 | // local file should start with file://
90 | if (!StringUtils.isEmpty(configFileLocation)) {
91 | if (configFileLocation.startsWith(SFConstants.GS_FILE_PREFIX) ||
92 | configFileLocation.startsWith(SFConstants.LOCAL_FILE_PREFIX)) {
93 | return;
94 | }
95 | }
96 |
97 | // Provided configFileLocation is not valid
98 | // Stopping the Job
99 | throw new Exception("Invalid Configuration file " + configFileLocation);
100 | }
101 |
102 | }
103 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/model/SFReferenceData.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.model;
2 |
3 | import java.io.Serializable;
4 |
5 | /**
6 | * POJO containing Salesforce reference data
7 | */
8 | public class SFReferenceData implements Serializable {
9 | private static final long serialVersionUID = -7597520654419284165L;
10 |
11 | private String accountId;
12 | private String opportunityId;
13 | private String proposalId;
14 |
15 | public SFReferenceData(String accountId, String opportunityId,
16 | String proposalId) {
17 | super();
18 | this.accountId = accountId;
19 | this.opportunityId = opportunityId;
20 | this.proposalId = proposalId;
21 | }
22 |
23 | public String getAccountId() {
24 | return accountId;
25 | }
26 |
27 | public void setAccountId(String accountId) {
28 | this.accountId = accountId;
29 | }
30 |
31 | public String getOpportunityId() {
32 | return opportunityId;
33 | }
34 |
35 | public void setOpportunityId(String opportunityId) {
36 | this.opportunityId = opportunityId;
37 | }
38 |
39 | public String getProposalId() {
40 | return proposalId;
41 | }
42 |
43 | public void setProposalId(String proposalId) {
44 | this.proposalId = proposalId;
45 | }
46 |
47 | @Override
48 | public String toString() {
49 | return accountId + ","+ opportunityId + "," + proposalId;
50 | }
51 |
52 | @Override
53 | public int hashCode() {
54 | final int prime = 31;
55 | int result = 1;
56 | result = prime * result
57 | + ((accountId == null) ? 0 : accountId.hashCode());
58 | result = prime * result
59 | + ((opportunityId == null) ? 0 : opportunityId.hashCode());
60 | result = prime * result
61 | + ((proposalId == null) ? 0 : proposalId.hashCode());
62 | return result;
63 | }
64 |
65 | @Override
66 | public boolean equals(Object obj) {
67 | if (this == obj)
68 | return true;
69 | if (obj == null)
70 | return false;
71 | if (getClass() != obj.getClass())
72 | return false;
73 | SFReferenceData other = (SFReferenceData) obj;
74 | if (accountId == null) {
75 | if (other.accountId != null)
76 | return false;
77 | } else if (!accountId.equals(other.accountId))
78 | return false;
79 | if (opportunityId == null) {
80 | if (other.opportunityId != null)
81 | return false;
82 | } else if (!opportunityId.equals(other.opportunityId))
83 | return false;
84 | if (proposalId == null) {
85 | if (other.proposalId != null)
86 | return false;
87 | } else if (!proposalId.equals(other.proposalId))
88 | return false;
89 | return true;
90 | }
91 |
92 | }
93 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/model/SFWaveWriteResult.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.model;
2 |
3 | import java.io.Serializable;
4 |
5 | /**
6 | * WriteResult class
7 | * This just holds the Salesforce object Id of the persisted data
8 | */
9 | public class SFWaveWriteResult implements Serializable {
10 | private static final long serialVersionUID = -7451739773848100070L;
11 |
12 | private String sfObjId;
13 |
14 | public SFWaveWriteResult(String sfObjId) {
15 | this.sfObjId = sfObjId;
16 | }
17 |
18 | public String getSfObjId() {
19 | return sfObjId;
20 | }
21 |
22 | @Override
23 | public int hashCode() {
24 | final int prime = 31;
25 | int result = 1;
26 | result = prime * result + ((sfObjId == null) ? 0 : sfObjId.hashCode());
27 | return result;
28 | }
29 |
30 | @Override
31 | public boolean equals(Object obj) {
32 | if (this == obj)
33 | return true;
34 | if (obj == null)
35 | return false;
36 | if (getClass() != obj.getClass())
37 | return false;
38 | SFWaveWriteResult other = (SFWaveWriteResult) obj;
39 | if (sfObjId == null) {
40 | if (other.sfObjId != null)
41 | return false;
42 | } else if (!sfObjId.equals(other.sfObjId))
43 | return false;
44 | return true;
45 | }
46 |
47 |
48 | }
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/pipeline/AdDataJob.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.pipeline;
2 |
3 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_CLICKS;
4 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_IMPRESSIONS;
5 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_OPPORTUNITY_ID;
6 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_PROPOSAL_ID;
7 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_TYPE_INTEGER;
8 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_TYPE_STRING;
9 |
10 | import java.util.ArrayList;
11 | import java.util.List;
12 |
13 | import com.google.api.services.bigquery.model.TableFieldSchema;
14 | import com.google.api.services.bigquery.model.TableRow;
15 | import com.google.api.services.bigquery.model.TableSchema;
16 | import com.google.cloud.dataflow.sdk.Pipeline;
17 | import com.google.cloud.dataflow.sdk.io.BigQueryIO;
18 | import com.google.cloud.dataflow.sdk.io.TextIO;
19 | import com.google.cloud.dataflow.sdk.options.Default;
20 | import com.google.cloud.dataflow.sdk.options.PipelineOptions;
21 | import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
22 | import com.google.cloud.dataflow.sdk.options.Validation;
23 | import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
24 | import com.google.cloud.dataflow.sdk.transforms.ParDo;
25 | import com.google.cloud.dataflow.sdk.transforms.View;
26 | import com.google.cloud.dataflow.sdk.values.PCollection;
27 | import com.google.cloud.dataflow.sdk.values.PCollectionView;
28 | import com.google.wave.prototype.dataflow.coder.AggregateDataCoder;
29 | import com.google.wave.prototype.dataflow.function.AggregateDataEnricher;
30 | import com.google.wave.prototype.dataflow.function.CSVFormatter;
31 | import com.google.wave.prototype.dataflow.function.TableRowFormatter;
32 | import com.google.wave.prototype.dataflow.model.AggregatedData;
33 | import com.google.wave.prototype.dataflow.model.SFConfig;
34 | import com.google.wave.prototype.dataflow.sf.SFWaveDatasetWriter;
35 | import com.google.wave.prototype.dataflow.transform.AggregateEvents;
36 | import com.google.wave.prototype.dataflow.transform.SFWaveWrite;
37 |
38 | /**
39 | * Google Dataflow Job
40 | * 1. Reads the raw Ad Data from Google cloud storage
41 | * 2. Reads Salesforce Reference data from Google BigQuery
42 | * 3. Enrich Ad Data using Salesforce Reference data
43 | * 4. Publish the Enriched data into Salesforce Wave and Google BigQuery
44 | * To execute, provide the following configuration
45 | * --project=YOUR_PROJECT_ID
46 | * --stagingLocation=YOUR_STAGING_LOCATON
47 | * --inputCSV=GCS_LOCATION_OF_YOUR_RAW_AD_DATA
48 | * --inputTable=GOOGLE_BIGQUERY_TABLE_CONTAINING_SALESFORCE_REFERENCE_DATA
49 | * --output=GOOGLE_BIGQUERY_TABLE_TO_WHICH_ENRICHED_DATA_HAS_TO_BE_ADDED
50 | * --dataset=SALESFORCE WAVE DATASET
51 | * --sfMetadataFileLocation=GCS_LOCATION_OF_SALESFORCE_METADATA_FILE
52 | * --sfConfigFileLocation=GCS_LOCATION_OF_SALESFORCE_CONFIG_FILE
53 | */
54 | public class AdDataJob {
55 | public static interface Options extends PipelineOptions {
56 | @Default.String("gs://sam-bucket1/SampleAdData/ad-server-data1.csv")
57 | String getInputCSV();
58 | void setInputCSV(String value);
59 |
60 | @Default.String("ace-scarab-94723:SFDCReferenceData.SFRef")
61 | String getInputTable();
62 | void setInputTable(String value);
63 |
64 | @Validation.Required
65 | @Default.String("ace-scarab-94723:SFDCReferenceData.EnrichedSample")
66 | String getOutput();
67 | void setOutput(String value);
68 |
69 | @Default.String("SampleAdDataSet")
70 | String getDataset();
71 | void setDataset(String dataset);
72 |
73 | @Default.String("gs://sam-bucket1/SampleAdData/metadata.json")
74 | String getSfMetadataFileLocation();
75 | void setSfMetadataFileLocation(String sfMetadataFileLocation);
76 |
77 | @Default.String("gs://sam-bucket1/config/sf_source_config.json")
78 | String getSfConfigFileLocation();
79 | void setSfConfigFileLocation(String sfConfigFileLocation);
80 | }
81 |
82 | private static TableSchema getSchema() {
83 | List fields = new ArrayList<>();
84 | fields.add(constructTableFieldSchema(COL_PROPOSAL_ID, COL_TYPE_STRING));
85 | fields.add(constructTableFieldSchema(COL_OPPORTUNITY_ID, COL_TYPE_STRING));
86 | fields.add(constructTableFieldSchema(COL_CLICKS, COL_TYPE_INTEGER));
87 | fields.add(constructTableFieldSchema(COL_IMPRESSIONS, COL_TYPE_INTEGER));
88 |
89 | TableSchema tableSchema = new TableSchema().setFields(fields);
90 | tableSchema.setFields(fields);
91 | return tableSchema;
92 | }
93 |
94 | private static TableFieldSchema constructTableFieldSchema(String name, String type) {
95 | TableFieldSchema tableFieldSchema = new TableFieldSchema();
96 | tableFieldSchema.setName(name);
97 | tableFieldSchema.setType(type);
98 |
99 | return tableFieldSchema;
100 | }
101 |
102 | private static List getEnrichedTableColumns() {
103 | List columns = new ArrayList(4);
104 |
105 | columns.add(COL_PROPOSAL_ID);
106 | columns.add(COL_OPPORTUNITY_ID);
107 | columns.add(COL_CLICKS);
108 | columns.add(COL_IMPRESSIONS);
109 |
110 | return columns;
111 | }
112 |
113 | private static SFWaveDatasetWriter createSFWaveDatasetWriter(AdDataJob.Options options) throws Exception {
114 | SFConfig sfConfig = SFConfig.getInstance(options.getSfConfigFileLocation(), options);
115 | return new SFWaveDatasetWriter(sfConfig, options.getDataset());
116 | }
117 |
118 | public static void main(String[] args) throws Exception {
119 | // Helper if command line options are not provided
120 | if (args.length < 2) {
121 | args = new String[2];
122 | args[0] = "--project=ace-scarab-94723";
123 | args[1] = "--stagingLocation=gs://sam-bucket1/staging";
124 | }
125 |
126 | Options options = PipelineOptionsFactory.fromArgs(args)
127 | .withValidation().as(Options.class);
128 | // Always executing using BlockingDataflowPipelineRunner
129 | options.setRunner(BlockingDataflowPipelineRunner.class);
130 | Pipeline p = Pipeline.create(options);
131 |
132 | // Reading the CSV present in GCS
133 | PCollection aggregated = p.apply(TextIO.Read.from(options.getInputCSV()))
134 | .apply(new AggregateEvents())
135 | .setCoder(AggregateDataCoder.getInstance());
136 |
137 | // Reading Salesforce reference data from Google BigQuery
138 | PCollection tableColl = p.apply(BigQueryIO.Read.from(options.getInputTable()));
139 | final PCollectionView> sideInput = tableColl.apply(View.asIterable());
140 | // Salesforce Reference data passed as sideInput
141 | PCollection enriched = aggregated
142 | .apply(ParDo.withSideInputs(sideInput)
143 | .of((new AggregateDataEnricher(sideInput))))
144 | .setCoder(AggregateDataCoder.getInstance());
145 |
146 | // Converting into CSV
147 | PCollection enrichedCSV = enriched.apply(ParDo.of(new CSVFormatter()));
148 | // Writing the results into Salesforce Wave
149 | enrichedCSV
150 | .apply(new SFWaveWrite(createSFWaveDatasetWriter(options), options.getSfMetadataFileLocation()));
151 |
152 | // Populated BigQuery with enriched data
153 | enrichedCSV
154 | .apply(ParDo.of(new TableRowFormatter(getEnrichedTableColumns())))
155 | .apply(BigQueryIO.Write
156 | .to(options.getOutput())
157 | .withSchema(getSchema())
158 | .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
159 | .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND));
160 | p.run();
161 | }
162 |
163 | }
164 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/pipeline/SFReferenceDataJob.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.pipeline;
2 |
3 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_ACCOUNT_ID;
4 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_OPPORTUNITY_ID;
5 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_PROPOSAL_ID;
6 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_TYPE_STRING;
7 |
8 | import java.util.ArrayList;
9 | import java.util.List;
10 |
11 | import com.google.api.services.bigquery.model.TableFieldSchema;
12 | import com.google.api.services.bigquery.model.TableSchema;
13 | import com.google.cloud.dataflow.sdk.Pipeline;
14 | import com.google.cloud.dataflow.sdk.io.BigQueryIO;
15 | import com.google.cloud.dataflow.sdk.options.Default;
16 | import com.google.cloud.dataflow.sdk.options.Description;
17 | import com.google.cloud.dataflow.sdk.options.PipelineOptions;
18 | import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
19 | import com.google.cloud.dataflow.sdk.options.Validation;
20 | import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
21 | import com.google.cloud.dataflow.sdk.transforms.Create;
22 | import com.google.cloud.dataflow.sdk.transforms.ParDo;
23 | import com.google.wave.prototype.dataflow.function.TableRowFormatter;
24 | import com.google.wave.prototype.dataflow.model.SFConfig;
25 | import com.google.wave.prototype.dataflow.sf.SFSOQLExecutor;
26 | import com.google.wave.prototype.dataflow.transform.SFRead;
27 |
28 | /**
29 | * Google Dataflow Job
30 | * 1. Read Salesforce Reference Data using {@link SFRead}
31 | * 2. Populate Google BigQuery Table with Salesforce Reference Data
32 | * To execute, provide the following configuration
33 | * --project=YOUR_PROJECT_ID
34 | * --stagingLocation=YOUR_STAGING_LOCATON
35 | * --output=GOOGLE_BIGQUERY_TABLE_TO_WHICH_SALESFORCE_REFERENCE_DATA_WILL_BE_POPULATED
36 | * --sfConfigFileLocation=GCS_LOCATION_OF_SALESFORCE_CONFIG_FILE
37 | * --sfQuery=SALESFORCE_SOQL_TO_FETCH_SALESFORCE_REFERENCE_DATA
38 | */
39 | public class SFReferenceDataJob {
40 |
41 | private static interface Options extends PipelineOptions {
42 | @Description("BigQuery table to write to, specified as "
43 | + ":.. The dataset must already exist.")
44 | @Validation.Required
45 | String getOutput();
46 | void setOutput(String value);
47 |
48 | @Default.String("gs://sam-bucket1/config/sf_source_config.json")
49 | String getSfConfigFileLocation();
50 | void setSfConfigFileLocation(String sfConfigFileLocation);
51 |
52 | @Default.String("SELECT AccountId, Id, ProposalID__c FROM Opportunity where ProposalID__c != null")
53 | String getSfQuery();
54 | void setSfQuery(String sfQuery);
55 | }
56 |
57 | private static TableSchema getSchema() {
58 | List fields = new ArrayList<>();
59 |
60 | fields.add(constructTableFieldSchema(COL_ACCOUNT_ID, COL_TYPE_STRING));
61 | fields.add(constructTableFieldSchema(COL_OPPORTUNITY_ID, COL_TYPE_STRING));
62 | fields.add(constructTableFieldSchema(COL_PROPOSAL_ID, COL_TYPE_STRING));
63 |
64 | TableSchema schema = new TableSchema().setFields(fields);
65 | return schema;
66 | }
67 |
68 | private static TableFieldSchema constructTableFieldSchema(String name, String type) {
69 | TableFieldSchema tableFieldSchema = new TableFieldSchema();
70 |
71 | tableFieldSchema.setName(name);
72 | tableFieldSchema.setType(type);
73 |
74 | return tableFieldSchema;
75 | }
76 |
77 | private static List getSFRefTableColumns() {
78 | List columns = new ArrayList(4);
79 |
80 | columns.add(COL_ACCOUNT_ID);
81 | columns.add(COL_OPPORTUNITY_ID);
82 | columns.add(COL_PROPOSAL_ID);
83 |
84 | return columns;
85 | }
86 |
87 | public static void main(String args[]) throws Exception {
88 | if (args.length < 3) {
89 | args = new String[3];
90 | args[0] = "--project=ace-scarab-94723";
91 | args[1] = "--stagingLocation=gs://sam-bucket1/staging";
92 | args[2] = "--output=ace-scarab-94723:SFDCReferenceData.SFRef";
93 | }
94 |
95 | Options options = PipelineOptionsFactory.fromArgs(args)
96 | .withValidation().as(Options.class);
97 | options.setRunner(BlockingDataflowPipelineRunner.class);
98 | Pipeline p = Pipeline.create(options);
99 |
100 | // SFSOQLExecutor which will be used to execute SOQL query
101 | // SFConfig which will be used to create Salesforce Connection
102 | SFSOQLExecutor soqlExecutor = new SFSOQLExecutor(SFConfig.getInstance(options.getSfConfigFileLocation(), options));
103 |
104 | // Executing pipeline
105 | p.apply(Create.of(options.getSfQuery()))
106 | // Reading from Salesforce
107 | .apply(new SFRead(soqlExecutor))
108 | // Convert to TableRow
109 | .apply(ParDo.of(new TableRowFormatter(getSFRefTableColumns())))
110 | // Wiring into BigQuery
111 | .apply(BigQueryIO.Write
112 | .to(options.getOutput())
113 | .withSchema(getSchema())
114 | .withCreateDisposition(
115 | BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
116 | .withWriteDisposition(
117 | // Since all data are fetched from Salesforce,
118 | // we need to overwrite the existing data
119 | BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
120 | p.run();
121 | }
122 |
123 | }
124 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/sf/SFSOQLExecutor.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.sf;
2 |
3 | import java.io.Serializable;
4 | import java.util.ArrayList;
5 | import java.util.Arrays;
6 | import java.util.List;
7 |
8 | import org.slf4j.Logger;
9 | import org.slf4j.LoggerFactory;
10 |
11 | import com.google.wave.prototype.dataflow.model.SFConfig;
12 | import com.google.wave.prototype.dataflow.transform.SFRead;
13 | import com.sforce.soap.enterprise.EnterpriseConnection;
14 | import com.sforce.soap.enterprise.QueryResult;
15 | import com.sforce.soap.enterprise.sobject.SObject;
16 |
17 | /**
18 | * Can be used to exeucte a SF SOQL Query
19 | * It will be executed using the credentials provided in {@link SFConfig}
20 | */
21 | public class SFSOQLExecutor implements Serializable {
22 | private static final long serialVersionUID = 296485933905679924L;
23 |
24 | private static final Logger LOG = LoggerFactory.getLogger(SFRead.class);
25 |
26 | private SFConfig sfConfig;
27 |
28 | public SFSOQLExecutor(SFConfig sfConfig) {
29 | this.sfConfig = sfConfig;
30 | }
31 |
32 | public List executeQuery(String sfQuery) throws Exception {
33 | EnterpriseConnection connection = null;
34 | List records = new ArrayList();
35 |
36 | try {
37 | connection = sfConfig.createEnterpriseConnection();
38 |
39 | QueryResult result = connection.query(sfQuery);
40 | // First call results are added here
41 | records.addAll(Arrays.asList(result.getRecords()));
42 | String queryLocator = result.getQueryLocator();
43 | LOG.info("Total number of records to be read :" + result.getSize());
44 |
45 | // Salesforce will not return all the rows in a single shot if the result is huge
46 | // By default it will return 500 rows per call
47 | // To fetch further connection.queryMore is used
48 | // result.isDone() will tell you where all the records have been read
49 | boolean done = result.isDone();
50 | while (!done) {
51 | result = connection.queryMore(queryLocator);
52 | records.addAll(Arrays.asList(result.getRecords()));
53 |
54 | done = result.isDone();
55 | }
56 | } finally {
57 | if (connection != null) {
58 | connection.logout();
59 | }
60 | }
61 |
62 | return records;
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/sf/SFWaveDatasetWriter.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.sf;
2 |
3 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_ACTION;
4 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_ACTION_NONE;
5 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_ACTION_PROCESS;
6 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_CSV_FORMAT;
7 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_DATAFILE;
8 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_EDGEMART_ALIAS;
9 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_FORMAT;
10 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_INSIGHTS_EXTERNAL_DATA;
11 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_INSIGHTS_EXTERNAL_DATA_ID;
12 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_INSIGHTS_EXTERNAL_DATA_PART;
13 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_METADATA_JSON;
14 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_OPERATION;
15 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_OVERWRITE_OPERATION;
16 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_PART_NUMBER;
17 |
18 | import java.io.Serializable;
19 |
20 | import org.slf4j.Logger;
21 | import org.slf4j.LoggerFactory;
22 |
23 | import com.google.wave.prototype.dataflow.model.SFConfig;
24 | import com.sforce.soap.partner.Error;
25 | import com.sforce.soap.partner.PartnerConnection;
26 | import com.sforce.soap.partner.SaveResult;
27 | import com.sforce.soap.partner.sobject.SObject;
28 |
29 | /**
30 | * This can be used to write metadata and datasetData into SF Wave
31 | * 1. It creates connection using {@link SFConfig}
32 | * 2. Writes specified Metadata
33 | * 3. Writes Dataset data
34 | * 4. Finalize the write
35 | * This uses Salesforce SOAP API (Partner WSDL)
36 | */
37 | public class SFWaveDatasetWriter implements Serializable {
38 | private static final long serialVersionUID = 5714980864384207026L;
39 |
40 | private static final Logger LOG = LoggerFactory.getLogger(SFWaveDatasetWriter.class);
41 |
42 | private SFConfig sfConfig;
43 | private String datasetName;
44 |
45 | public SFWaveDatasetWriter(SFConfig sfConfig, String datasetName) {
46 | this.sfConfig = sfConfig;
47 | this.datasetName = datasetName;
48 | }
49 |
50 | public String write(byte[] metadata, byte[] datasetData) throws Exception {
51 | PartnerConnection connection = null;
52 | try {
53 | connection = sfConfig.createPartnerConnection();
54 | String parentId = publishMetaData(metadata, connection);
55 | publish(datasetData, parentId, connection);
56 | finalizeWavePublish(parentId, connection);
57 |
58 | return parentId;
59 | } finally {
60 | if (connection != null) {
61 | connection.logout();
62 | }
63 | }
64 | }
65 |
66 | private void publish(byte[] content, String parentId, PartnerConnection connection) throws Exception {
67 | // Contents are being pushed here
68 | SObject dataSObject = new SObject();
69 | dataSObject.setType(STR_INSIGHTS_EXTERNAL_DATA_PART);
70 | dataSObject.setField(STR_DATAFILE, content);
71 | LOG.trace("Writing this data into WAVE : " + new String(content));
72 | dataSObject.setField(STR_INSIGHTS_EXTERNAL_DATA_ID, parentId);
73 | // Since the each bundle is max of 10 MB we will have only one part
74 | // Hence part number is always set to 1
75 | dataSObject.setField(STR_PART_NUMBER, 1);
76 |
77 | SaveResult[] dataPartPublishResults = connection.create(new SObject[] { dataSObject });
78 | checkResults(dataPartPublishResults);
79 | }
80 |
81 |
82 | private void finalizeWavePublish(String parentId, PartnerConnection connection) throws Exception {
83 | SObject metaDataSObject = new SObject();
84 | metaDataSObject.setType(STR_INSIGHTS_EXTERNAL_DATA);
85 | // Action set to process, which should finalize the DataPart published so on
86 | metaDataSObject.setField(STR_ACTION, STR_ACTION_PROCESS);
87 | // Using the Object Id during metadata publish
88 | metaDataSObject.setId(parentId);
89 |
90 | SaveResult[] metadataPublishResults = connection.update(new SObject[] {metaDataSObject});
91 | checkResults(metadataPublishResults);
92 | }
93 |
94 | private String publishMetaData(byte[] metadata, PartnerConnection connection) throws Exception {
95 | // Metadata of a dataset is being published here
96 | SObject metadataSObject = new SObject();
97 | metadataSObject.setType(STR_INSIGHTS_EXTERNAL_DATA);
98 | metadataSObject.setField(STR_FORMAT, STR_CSV_FORMAT);
99 | metadataSObject.setField(STR_EDGEMART_ALIAS, datasetName);
100 | metadataSObject.setField(STR_METADATA_JSON, metadata);
101 | metadataSObject.setField(STR_OPERATION, STR_OVERWRITE_OPERATION);
102 | // Action is None here. It will be Process only after all data part has been created
103 | metadataSObject.setField(STR_ACTION, STR_ACTION_NONE);
104 |
105 | SaveResult[] metadataPublishResults = connection.create(new SObject[] { metadataSObject });
106 | return checkResults(metadataPublishResults);
107 | }
108 |
109 | private String checkResults(SaveResult[] publishResults) throws Exception {
110 | for (SaveResult publishResult : publishResults) {
111 | if (publishResult.isSuccess()) {
112 | LOG.debug("Flushed to wave : " + publishResult.getId());
113 | return publishResult.getId();
114 | } else {
115 | StringBuilder sfWaveErrMsg = new StringBuilder();
116 | sfWaveErrMsg.append("Error while flushing data to wave.\n");
117 | sfWaveErrMsg.append("Salesforce Job Id : " + publishResult.getId() + "\n");
118 | sfWaveErrMsg.append("Salesforce error message : ");
119 | // Errors are concatenated to get a meaning message
120 | Error[] errors = publishResult.getErrors();
121 | for (int i = 0; i < errors.length; i++) {
122 | sfWaveErrMsg.append(errors[i].getMessage());
123 | }
124 |
125 | LOG.error(sfWaveErrMsg.toString());
126 |
127 | // Stopping Job if publish fails
128 | throw new Exception(sfWaveErrMsg.toString());
129 | }
130 | }
131 |
132 | return null;
133 | }
134 | }
135 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/transform/AggregateEvents.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.transform;
2 |
3 | import com.google.cloud.dataflow.sdk.transforms.DoFn;
4 | import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
5 | import com.google.cloud.dataflow.sdk.transforms.PTransform;
6 | import com.google.cloud.dataflow.sdk.transforms.ParDo;
7 | import com.google.cloud.dataflow.sdk.values.KV;
8 | import com.google.cloud.dataflow.sdk.values.PCollection;
9 | import com.google.wave.prototype.dataflow.model.AggregatedData;
10 | import com.google.wave.prototype.dataflow.util.JobConstants;
11 |
12 | /**
13 | * Aggregate the AdData using the proposalId and event present in AdData CSV
14 | * AdData CSV data is with the below headers,
15 | * id,time,local_host,pixel_id,client_ip,request_url,cookie_id,event,version,success_code,proposal_id
16 | * In this event will be either click or Impression. There will be multiple rows with a single proposal_id
17 | * This PTransform will transform such rows into {@link AggregateEvents}
18 | */
19 | public class AggregateEvents extends
20 | PTransform, PCollection> {
21 | private static final long serialVersionUID = 3238291110118750209L;
22 |
23 | @Override
24 | public PCollection apply(PCollection rawdata) {
25 | // Just selecting ProposalId and events
26 | PCollection> filteredData = rawdata.apply(ParDo
27 | .of(new FilterRawData()));
28 | // Grouping all events for a proposalId
29 | PCollection>> groupedData = filteredData
30 | .apply(GroupByKey. create());
31 | // Counting the number of clicks and impressions for a proposalId
32 | return groupedData.apply(ParDo.of(new CountEvents()));
33 | }
34 |
35 | /**
36 | * Construct KV with proposalId as key and event as value for a given CSV Row (AdData)
37 | * CSV Row will be the input for this DoFn
38 | * Output will be a KV with proposal_id in the row as key and event in the row as value
39 | * For example, for the below input
40 | * 1,01-01-14 9:00,ip-10-150-38-122/10.150.38.122,0,70.209.198.223,http://sample.com,3232,Impression,3,1,101
41 | * output will be
42 | * KV.of(101, Impression)
43 | */
44 | protected static class FilterRawData extends DoFn> {
45 | private static final long serialVersionUID = 6002612407682561915L;
46 | private static int COL_PROPOSAL_ID = 10;
47 | private static int COL_EVENT = 7;
48 |
49 | @Override
50 | public void processElement(
51 | DoFn>.ProcessContext c)
52 | throws Exception {
53 | // CSVRow will be like
54 | // id,time,local_host,pixel_id,client_ip,request_url,cookie_id,event,version,success_code,proposal_id
55 | // Column 7 and 10. i.e. event and proposal_id
56 | String csvRow = c.element();
57 | String[] columns = csvRow.split(JobConstants.STR_COMMA);
58 | // Result will be KV with proposal_id as key and event as value
59 | c.output(KV.of(columns[COL_PROPOSAL_ID], columns[COL_EVENT]));
60 | }
61 |
62 | }
63 |
64 | /**
65 | * Count the number of clicks and number of Impressions for a specific ProposalId
66 | * Input for this DoFn will be KV with key as proposalId and value as events. Like,
67 | * KV(101, ("Impression", "Impression", "Click")
68 | * Output will be {@link AggregateEvents} with the proposalId and number of clicks and Impressions
69 | */
70 | public static class CountEvents extends
71 | DoFn>, AggregatedData> {
72 | private static final long serialVersionUID = 6002612407682561915L;
73 | private static final String STR_IMPRESSION = "impression";
74 | private static final String STR_CLICK = "click";
75 |
76 | @Override
77 | public void processElement(
78 | DoFn>, AggregatedData>.ProcessContext c)
79 | throws Exception {
80 | // Element will be like,
81 | // KV(101, ("Impression", "Impression", "Click")
82 | KV> proposalIdEventsKV = c.element();
83 | // Getting the events alone
84 | // ("Impression", "Impression", "Click")
85 | Iterable events = proposalIdEventsKV.getValue();
86 | int clicks = 0;
87 | int impressions = 0;
88 | // Iterating events and increasing the click and impression count
89 | for (String event : events) {
90 | if (event.equalsIgnoreCase(STR_IMPRESSION)) {
91 | impressions++;
92 | } else if (event.equalsIgnoreCase(STR_CLICK)) {
93 | clicks++;
94 | }
95 | }
96 |
97 | // Constructing new AggregatedData with proposalId, Click Count and Impression Count
98 | c.output(new AggregatedData(proposalIdEventsKV.getKey(), clicks, impressions));
99 | }
100 | }
101 | }
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/transform/SFRead.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.transform;
2 |
3 | import java.util.List;
4 |
5 | import com.google.cloud.dataflow.sdk.transforms.DoFn;
6 | import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
7 | import com.google.cloud.dataflow.sdk.transforms.PTransform;
8 | import com.google.cloud.dataflow.sdk.transforms.ParDo;
9 | import com.google.cloud.dataflow.sdk.values.KV;
10 | import com.google.cloud.dataflow.sdk.values.PCollection;
11 | import com.google.wave.prototype.dataflow.sf.SFSOQLExecutor;
12 | import com.google.wave.prototype.dataflow.util.CSVUtil;
13 | import com.sforce.soap.enterprise.sobject.SObject;
14 |
15 | /**
16 | * PTransform to read the Salesforce object using SOQL
17 | * SOQL query present in pipeline will be executed and the result will be converted into CSV
18 | * This uses Salesforce SOAP API (Enterprise.wsdl) to execute SOQL
19 | * A Sample SOQL will look like,
20 | * SELECT AccountId, Id FROM Opportunity
21 | */
22 | public final class SFRead extends PTransform, PCollection>{
23 | private static final long serialVersionUID = -7168554842895484301L;
24 |
25 | private final int noOfBundles;
26 | private final SFSOQLExecutor soqlExecutor;
27 |
28 | public SFRead(SFSOQLExecutor soqlExecutor) {
29 | // Default to 10
30 | this.noOfBundles = 10;
31 | this.soqlExecutor = soqlExecutor;
32 | }
33 |
34 | public SFRead(SFSOQLExecutor soqlExecutor, int noOfBundles) {
35 | this.noOfBundles = noOfBundles;
36 | this.soqlExecutor = soqlExecutor;
37 | }
38 |
39 | @Override
40 | public PCollection apply(PCollection input) {
41 | return input
42 | // Executing SOQL Query
43 | .apply(ParDo.of(new ExecuteSOQL(soqlExecutor, noOfBundles)))
44 | // Creating bundles based on the key
45 | // Key will be hash modulo
46 | .apply(GroupByKey.create())
47 | .apply(ParDo.of(new RegroupRecords()));
48 | }
49 |
50 | /**
51 | * Splitting the grouped data as individual records
52 | */
53 | private class RegroupRecords extends DoFn>, String> {
54 | private static final long serialVersionUID = -2126735721477220174L;
55 |
56 | @Override
57 | public void processElement(
58 | DoFn>, String>.ProcessContext c)
59 | throws Exception {
60 | // Adding the result as individual Salesforce Data
61 | Iterable sfRefData = c.element().getValue();
62 | for (String csvRow : sfRefData) {
63 | c.output(csvRow);
64 | }
65 | }
66 |
67 | }
68 |
69 | /**
70 | * Executes SOQL Query and provides the result as CSV in bundles
71 | * Result of the SOQL query will be converted into CSV
72 | * Bundles will be created according to the noOfBundles specified
73 | */
74 | public static class ExecuteSOQL extends DoFn> {
75 | private static final long serialVersionUID = 3227568229914179295L;
76 |
77 | private int noOfBundles;
78 | private SFSOQLExecutor soqlExecutor;
79 |
80 | public ExecuteSOQL(SFSOQLExecutor soqlExecutor, int noOfBundles) {
81 | this.soqlExecutor = soqlExecutor;
82 | this.noOfBundles = noOfBundles;
83 | }
84 |
85 | @Override
86 | public void processElement(
87 | DoFn>.ProcessContext c)
88 | throws Exception {
89 | String sfQuery = c.element();
90 | // Execute SOQL
91 | List sfResults = soqlExecutor.executeQuery(sfQuery);
92 | // Convert to CSV
93 | CSVUtil csvUtil = new CSVUtil(sfQuery);
94 | for (int i = 0, size = sfResults.size(); i < size; i++) {
95 | String csvRow = csvUtil.getAsCSV(sfResults.get(i));
96 | // Getting hash Modulo
97 | int hashModulo = Math.abs(csvRow.hashCode() % noOfBundles);
98 | c.output(KV.of(hashModulo, csvRow));
99 | }
100 | }
101 | }
102 | }
103 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/transform/SFWaveWrite.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.transform;
2 |
3 | import org.slf4j.Logger;
4 | import org.slf4j.LoggerFactory;
5 |
6 | import com.google.cloud.dataflow.sdk.options.PipelineOptions;
7 | import com.google.cloud.dataflow.sdk.transforms.Combine;
8 | import com.google.cloud.dataflow.sdk.transforms.DoFn;
9 | import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
10 | import com.google.cloud.dataflow.sdk.transforms.PTransform;
11 | import com.google.cloud.dataflow.sdk.transforms.ParDo;
12 | import com.google.cloud.dataflow.sdk.transforms.Sum;
13 | import com.google.cloud.dataflow.sdk.transforms.View;
14 | import com.google.cloud.dataflow.sdk.values.KV;
15 | import com.google.cloud.dataflow.sdk.values.PCollection;
16 | import com.google.cloud.dataflow.sdk.values.PCollectionView;
17 | import com.google.wave.prototype.dataflow.model.SFConfig;
18 | import com.google.wave.prototype.dataflow.model.SFWaveWriteResult;
19 | import com.google.wave.prototype.dataflow.sf.SFWaveDatasetWriter;
20 | import com.google.wave.prototype.dataflow.util.FileUtil;
21 |
22 | /**
23 | * PTransform to write the dataset content into SF Wave This uses Salesforce
24 | * SOAP API (Partner WSDL) to publish data into Salesforce Wave This PTransform
25 | * requires the following input {@link SFWaveDatasetWriter} - Writer with
26 | * {@link SFConfig} which will be used by this transform sfMetadataFileLocation
27 | * - A Salesforce wave metadata file describing the data to be published to wave
28 | * Can be a local file or GS file Refer
29 | * https://resources.docs.salesforce.com/sfdc
30 | * /pdf/bi_dev_guide_ext_data_format.pdf
31 | */
32 | public class SFWaveWrite extends
33 | PTransform, PCollection> {
34 | private static final long serialVersionUID = 5830880169795002498L;
35 | private static final Logger LOG = LoggerFactory
36 | .getLogger(SFWaveWrite.class);
37 |
38 | private final SFWaveDatasetWriter writer;
39 | private final String sfMetadataFileLocation;
40 |
41 | public SFWaveWrite(SFWaveDatasetWriter writer, String sfMetadataFileLocation) {
42 | this.writer = writer;
43 | this.sfMetadataFileLocation = sfMetadataFileLocation;
44 | }
45 |
46 | @Override
47 | public PCollection apply(PCollection rowData) {
48 | LOG.debug("SFWaveWrite starts");
49 | // Number of bundles calculated here
50 | PCollection noOfBundles = rowData
51 | .apply(new CalculateNoOfBundles());
52 | PCollectionView sideInput = noOfBundles.apply(View
53 | . asSingleton());
54 | // Making KV with hash modulo as key and CSV row as value
55 | PCollection> kvData = rowData
56 | .apply(ParDo.withSideInputs(sideInput).of(
57 | new DistributeRowData(sideInput)));
58 | // Creating bundles using GroupByKey
59 | PCollection>> groupedRows = kvData
60 | .apply(GroupByKey. create());
61 | // Writing Data into Salesforce Wave
62 | PCollection writeResult = groupedRows.apply(ParDo
63 | .of(new Write(writer, sfMetadataFileLocation)));
64 |
65 | LOG.debug("SFWaveWrite ends");
66 | return writeResult;
67 | }
68 |
69 | /**
70 | * Calculates the Number of bundles to be created Calculation is based on
71 | * the size of the data to be sent to Salesforce Wave Size of the data is
72 | * calculated using {@code String.length()} and then {@code Sum.SumLongFn}
73 | */
74 | public static class CalculateNoOfBundles extends
75 | PTransform, PCollection> {
76 | private static final long serialVersionUID = -7383871712471335638L;
77 | private static final String INDIVIDUAL_SIZE_PAR_DO_NAME = "IndividualSize";
78 | private static final String NO_OF_BUNDLES_PAR_DO_NAME = "NoOfBundles";
79 |
80 | @Override
81 | public PCollection apply(PCollection input) {
82 | return input.apply(ParDo.named(INDIVIDUAL_SIZE_PAR_DO_NAME).of(
83 |
84 | new DoFn() {
85 | private static final long serialVersionUID = -6374354958403597940L;
86 |
87 | @Override
88 | public void processElement(ProcessContext c) throws Exception {
89 | // String.length is used to get the size of data for an
90 | // individual row
91 | // As further grouping takes place, the additional size for
92 | // UTF-16 characters are ignored
93 | String rowToBePersisted = c.element();
94 | c.output(Integer.valueOf(rowToBePersisted.length())
95 | .longValue());
96 | }
97 | }))
98 | // Calculating the total size of the data to be persisted into
99 | // Salesforce Wave
100 | .apply(Combine.globally(new Sum.SumLongFn()))
101 | // Number of bundles calculated based on the size of data
102 | .apply(ParDo.named(NO_OF_BUNDLES_PAR_DO_NAME).of(
103 | new BundleCount()));
104 | }
105 | }
106 |
107 | /**
108 | * Count the number of bundles to be created Number of bundles to be created
109 | * is based on the size of the data to be persisted into Salesforce wave At
110 | * a max Saleforce can accept 10MB So size of a bundle should not be more
111 | * than 10MB
112 | */
113 | public static class BundleCount extends DoFn {
114 | private static final long serialVersionUID = -7446604319456830150L;
115 |
116 | @Override
117 | public void processElement(DoFn.ProcessContext c)
118 | throws Exception {
119 | // No of Bundles = totalSize / (1024 * 1024 * 10)
120 | // 1024 * 1024 is to convert into MB
121 | // Maximum support in Salesforce Wave API is 10 MB
122 | // For example, if the size of the data is 335544320, then 33
123 | // bundles will be created
124 | // Math.round(335544320/(1024 * 1024 * 10)) + 1 = 33
125 | Long totalDataSize = c.element();
126 | Long maxBundleSize = 1024 * 1024 * 10l;
127 | if (totalDataSize > maxBundleSize) {
128 | c.output(Math.round(totalDataSize / maxBundleSize) + 1);
129 | } else {
130 | // As the size less than 10MB the data can be handled in single
131 | // bundle itself
132 | c.output(1);
133 | }
134 | }
135 |
136 | }
137 |
138 | /**
139 | * Distributes the data evenly to bundles If the data is of size 32 MB then
140 | * data will be distributed to 4 bundles of 8MB each
141 | */
142 | public static class DistributeRowData extends
143 | DoFn> {
144 | private static final long serialVersionUID = 3917848069436988535L;
145 | private PCollectionView noOfBundlesPCol;
146 |
147 | // Number of bundles is calculated in CalculateNoOfBundles and
148 | // provided here as sideInput
149 | public DistributeRowData(PCollectionView noOfBundles) {
150 | this.noOfBundlesPCol = noOfBundles;
151 | }
152 |
153 | @Override
154 | public void processElement(
155 | DoFn>.ProcessContext c)
156 | throws Exception {
157 | // Getting the number of bundles from sideInput
158 | Integer noOfBundles = c.sideInput(noOfBundlesPCol);
159 | String waveCSVData = c.element();
160 | // Using hash modulo to evenly distribute data across bundles
161 | int hash = Math.abs(waveCSVData.hashCode() % noOfBundles);
162 | // Using the hash as key which can be grouped later to create
163 | // bundles
164 | c.output(KV.of(hash, waveCSVData));
165 | }
166 | }
167 |
168 | /**
169 | * DoFn which takes care of writing the datasets into Salesforce Wave This
170 | * uses {@link SFWaveDatasetWriter}
171 | */
172 | public static class Write extends
173 | DoFn>, SFWaveWriteResult> {
174 | private static final long serialVersionUID = -1875427181542264934L;
175 |
176 | private final SFWaveDatasetWriter writer;
177 | private final String sfMetadataFileLocation;
178 |
179 | public Write(SFWaveDatasetWriter writer, String sfMetadataFileLocation) {
180 | this.writer = writer;
181 | this.sfMetadataFileLocation = sfMetadataFileLocation;
182 | }
183 |
184 | @Override
185 | public void processElement(
186 | DoFn>, SFWaveWriteResult>.ProcessContext c)
187 | throws Exception {
188 |
189 | // Converting the grouped records into bytes
190 | KV> groupedRecords = c.element();
191 | Iterable csvRows = groupedRecords.getValue();
192 | byte[] datasetData = getAsBytes(csvRows);
193 |
194 | String sfObjId = writer.write(
195 | getMetadataContent(c.getPipelineOptions()), datasetData);
196 | SFWaveWriteResult sfWaveWriteResult = new SFWaveWriteResult(sfObjId);
197 | c.output(sfWaveWriteResult);
198 | }
199 |
200 | private byte[] getMetadataContent(PipelineOptions options)
201 | throws Exception {
202 | String content = FileUtil.getContent(sfMetadataFileLocation,
203 | options);
204 | return content.getBytes();
205 | }
206 |
207 | private byte[] getAsBytes(Iterable waveRows) {
208 | // Converting all CSV rows into single String which will be
209 | // published to Salesforce WAVE
210 | StringBuilder csvRows = new StringBuilder();
211 | // Row may be like
212 | // AcccountId,OpportunityId,ClickCount,ImpressionCount
213 | for (String individualRow : waveRows) {
214 | csvRows.append(individualRow);
215 | csvRows.append('\n');
216 | }
217 |
218 | return csvRows.toString().getBytes();
219 | }
220 |
221 | }
222 |
223 | }
224 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/util/CSVUtil.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.util;
2 |
3 | import java.io.ByteArrayInputStream;
4 | import java.io.ByteArrayOutputStream;
5 | import java.io.IOException;
6 | import java.util.ArrayList;
7 | import java.util.HashMap;
8 | import java.util.List;
9 | import java.util.Map;
10 |
11 | import javax.xml.namespace.QName;
12 | import javax.xml.parsers.DocumentBuilder;
13 | import javax.xml.parsers.DocumentBuilderFactory;
14 | import javax.xml.parsers.ParserConfigurationException;
15 |
16 | import net.sf.jsqlparser.parser.CCJSqlParserUtil;
17 | import net.sf.jsqlparser.schema.Column;
18 | import net.sf.jsqlparser.statement.select.PlainSelect;
19 | import net.sf.jsqlparser.statement.select.Select;
20 | import net.sf.jsqlparser.statement.select.SelectExpressionItem;
21 | import net.sf.jsqlparser.statement.select.SelectItem;
22 |
23 | import org.slf4j.Logger;
24 | import org.slf4j.LoggerFactory;
25 | import org.w3c.dom.Document;
26 | import org.w3c.dom.Node;
27 | import org.w3c.dom.NodeList;
28 | import org.xml.sax.SAXException;
29 |
30 | import com.sforce.soap.enterprise.sobject.SObject;
31 | import com.sforce.ws.bind.TypeMapper;
32 | import com.sforce.ws.parser.XmlOutputStream;
33 |
34 | /**
35 | * Utility to convert Salesforce SObject into CSV
36 | * It requires SOQL to get the field queried from Salesforce
37 | */
38 | public class CSVUtil {
39 | private static final Logger LOG = LoggerFactory.getLogger(CSVUtil.class);
40 |
41 | /** Columns queried from Salesforce */
42 | private List columnNames = new ArrayList();
43 |
44 | /**
45 | * @param soqlQuery - SOQL query used to fetch Salesforce Reference data
46 | * @throws Exception
47 | */
48 | public CSVUtil(String soqlQuery) throws Exception {
49 | // Parsing the SOQL Query to get the columns queried from Salesforce
50 | Select stmt = (Select) CCJSqlParserUtil.parse(soqlQuery);
51 | PlainSelect plainSelect = (PlainSelect) stmt.getSelectBody();
52 | // SelectedItems contains the column to be selected
53 | List selectItems = plainSelect.getSelectItems();
54 | for (SelectItem selectItem : selectItems) {
55 | // We will get only columns as expressions are not supported
56 | Column column = (Column) ((SelectExpressionItem) selectItem).getExpression();
57 | columnNames.add(column.getColumnName());
58 | }
59 |
60 | LOG.debug("Columns from SOQL Query " + columnNames);
61 | }
62 |
63 | /**
64 | * @param sObject One of the result on executing SOQL Query
65 | * @return Converted CSV data from SObject
66 | * @throws Exception
67 | */
68 | public String getAsCSV(SObject sObject) throws Exception {
69 | StringBuilder csv = new StringBuilder();
70 |
71 | // Reading the SObject as XML Document
72 | Document doc = readDocument(sObject);
73 | // Reading the fields present in XML document
74 | Map fieldMap = readFields(doc);
75 | for (int i = 0, size = columnNames.size(); i < size; i++) {
76 | if (i != 0) {
77 | csv.append(',');
78 | }
79 |
80 | // Getting the corresponding value from the fieldMap using columns constructed from SOQL query
81 | String fieldValue = fieldMap.get(columnNames.get(i));
82 | if (fieldValue != null) {
83 | csv.append(fieldValue);
84 | }
85 | }
86 |
87 | // Completing a row
88 | csv.append('\n');
89 |
90 | LOG.debug("Returning CSV " + csv);
91 | return csv.toString();
92 | }
93 |
94 | private Map readFields(Document doc) {
95 | // XML will be like
96 | //
97 | //
98 | // 1233
99 | // 1234
100 | // 101
101 | //
102 | //
103 | // Here doc is
104 | Node parentElement = doc.getChildNodes().item(0);
105 | // Here parentElement is
106 | NodeList childNodes = parentElement.getChildNodes();
107 | // Child Nodes are , and
108 | Map fieldValueMap = new HashMap();
109 | if (childNodes != null && childNodes.getLength() > 0) {
110 | for (int i = 0, size = childNodes.getLength(); i < size; i++) {
111 | Node item = childNodes.item(i);
112 | // Removing prefix as the column name present in SOQL will not have it
113 | // This nodename will be compared with fields queried in SOQL
114 | fieldValueMap.put(stripPrefix(item.getNodeName()), item.getTextContent());
115 | }
116 | }
117 |
118 | return fieldValueMap;
119 | }
120 |
121 | private String stripPrefix(String nodeName) {
122 | return strip(nodeName, ':');
123 | }
124 |
125 | private String strip(String str, char separator) {
126 | int aliasIndex = str.indexOf(separator);
127 | if (aliasIndex != -1) {
128 | return str.substring(aliasIndex + 1);
129 | }
130 |
131 | return str;
132 | }
133 |
134 | private Document readDocument(SObject sObject) throws Exception {
135 | ByteArrayInputStream bis = null;
136 | XmlOutputStream xmlOutputStream = null;
137 |
138 | try {
139 | // Getting the doc as
140 | // As Salesforce SOAP API is used converting to XML is the only option
141 | QName element = new QName("urn:sobject", "result");
142 | ByteArrayOutputStream bos = new ByteArrayOutputStream();
143 |
144 | xmlOutputStream = new XmlOutputStream(bos, false);
145 | xmlOutputStream.startDocument();
146 | // Writes all the fields to outputStream
147 | sObject.write(element, xmlOutputStream, new TypeMapper());
148 | xmlOutputStream.endDocument();
149 |
150 | bis = new ByteArrayInputStream(bos.toByteArray());
151 | // Converting it as DOM object
152 | DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
153 | DocumentBuilder docBuilder = builderFactory.newDocumentBuilder();
154 | return docBuilder.parse(bis);
155 | } catch (ParserConfigurationException | SAXException e) {
156 | throw new Exception(e);
157 | } finally {
158 | if (bis != null) {
159 | try {
160 | bis.close();
161 | } catch (IOException ioe) {
162 | LOG.warn("Error while closing Stream", ioe);
163 | }
164 |
165 | if (xmlOutputStream != null) {
166 | // This will make sure the ByteArrayOutputStream provided is also closed
167 | try {
168 | xmlOutputStream.close();
169 | } catch (IOException ioe) {
170 | LOG.warn("Error while closing Stream", ioe);
171 | }
172 | }
173 | }
174 | }
175 | }
176 |
177 | }
178 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/util/FileUtil.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.util;
2 |
3 | import java.io.File;
4 |
5 | import org.apache.commons.io.Charsets;
6 | import org.apache.commons.io.FileUtils;
7 | import org.apache.commons.lang3.StringUtils;
8 |
9 | import com.google.cloud.dataflow.sdk.options.PipelineOptions;
10 |
11 | /**
12 | * Simple Utility to read to the contents from file
13 | * File can be present in GCS or from local file system
14 | */
15 | public class FileUtil {
16 |
17 | public static String getContent(String fileLocation, PipelineOptions options) throws Exception {
18 | // Have separate reader for GS files and local files
19 | if (fileLocation.startsWith(SFConstants.GS_FILE_PREFIX)) {
20 | return readFromGCS(fileLocation, options);
21 | } else {
22 | return readFromLocal(fileLocation);
23 | }
24 | }
25 |
26 | private static String readFromLocal(String configFileLocation) throws Exception {
27 | // Removing file:// prefix
28 | String fileLocation = StringUtils.substringAfter(configFileLocation, SFConstants.LOCAL_FILE_PREFIX);
29 | // Using commons-io utility to read the file as String
30 | return FileUtils.readFileToString(new File(fileLocation), Charsets.UTF_8);
31 | }
32 |
33 | private static String readFromGCS(String configFileLocation,
34 | PipelineOptions options) throws Exception {
35 | GCSFileUtil gcsFileUtil = new GCSFileUtil(options);
36 | byte[] contents = gcsFileUtil.read(configFileLocation);
37 | return new String(contents);
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/util/GCSFileUtil.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.util;
2 |
3 | import java.nio.ByteBuffer;
4 | import java.nio.channels.SeekableByteChannel;
5 |
6 | import com.google.cloud.dataflow.sdk.options.PipelineOptions;
7 | import com.google.cloud.dataflow.sdk.util.GcsUtil;
8 | import com.google.cloud.dataflow.sdk.util.GcsUtil.GcsUtilFactory;
9 | import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
10 |
11 | /**
12 | * A Google Cloud Storage utility which can be used to read the files present in GCS
13 | * This utility can be used only for the Jobs running in Google Dataflow
14 | * This makes use of {@code GcsUtil} and {@code GcsPath} to read the file present in GCS
15 | */
16 | public class GCSFileUtil {
17 | private GcsUtil gcsUtil;
18 |
19 | public GCSFileUtil(PipelineOptions options) {
20 | // PipelineOption is required to create GcsUtil
21 | // hence this can be used only for Google Dataflow jobs
22 | gcsUtil = new GcsUtilFactory().create(options);
23 | }
24 |
25 | public byte[] read(String filePath) throws Exception {
26 | GcsPath gcsPath = GcsPath.fromUri(filePath);
27 | SeekableByteChannel seekableByteChannel = gcsUtil.open(gcsPath);
28 | // Allocating ByteBuffer based on the file size
29 | ByteBuffer fileContent = ByteBuffer.allocate(Long.valueOf(gcsUtil.fileSize(gcsPath)).intValue());
30 | seekableByteChannel.read(fileContent);
31 |
32 | return fileContent.array();
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/util/JobConstants.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.util;
2 |
3 | public interface JobConstants {
4 | public static final String COL_ACCOUNT_ID = "AccountId";
5 | public static final String COL_OPPORTUNITY_ID = "OpportunityId";
6 | public static final String COL_PROPOSAL_ID = "ProposalId";
7 | public static final String COL_CLICKS = "Clicks";
8 | public static final String COL_IMPRESSIONS = "Impressions";
9 |
10 | public static final String COL_TYPE_STRING = "STRING";
11 | public static final String COL_TYPE_INTEGER = "INTEGER";
12 |
13 | public static final String STR_COMMA = ",";
14 | }
15 |
--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/util/SFConstants.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.util;
2 |
3 | public interface SFConstants {
4 | public static String STR_INSIGHTS_EXTERNAL_DATA = "InsightsExternalData";
5 | public static String STR_INSIGHTS_EXTERNAL_DATA_PART = "InsightsExternalDataPart";
6 | public static String STR_INSIGHTS_EXTERNAL_DATA_ID = "InsightsExternalDataId";
7 |
8 | public static String STR_FORMAT = "Format";
9 | public static String STR_DATAFILE = "DataFile";
10 | public static String STR_EDGEMART_ALIAS = "EdgemartAlias";
11 | public static String STR_METADATA_JSON = "MetadataJson";
12 | public static String STR_OPERATION = "Operation";
13 | public static String STR_ACTION = "Action";
14 | public static String STR_PART_NUMBER= "PartNumber";
15 |
16 | public static String STR_CSV_FORMAT = "Csv";
17 | public static String STR_OVERWRITE_OPERATION = "Overwrite";
18 | public static String STR_ACTION_NONE = "None";
19 | public static String STR_ACTION_PROCESS = "Process";
20 |
21 | public static String GS_FILE_PREFIX = "gs://";
22 | public static String LOCAL_FILE_PREFIX = "file://";
23 | }
24 |
--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/BaseTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 |
6 | import com.google.api.services.bigquery.model.TableRow;
7 | import com.google.wave.prototype.dataflow.model.AggregatedData;
8 | import com.google.wave.prototype.dataflow.util.JobConstants;
9 |
10 | public class BaseTest {
11 | // Test data
12 | protected static final String ACCOUNT_ID_1 = "001B0000003oYAfIAM";
13 | protected static final String OPPOR_ID_1 = "006B0000002ndnpIAA";
14 | protected static final String PROPOSAL_ID_1 = "101";
15 | protected static final int CLICK_COUNT_1 = 100;
16 | protected static final int IMPRESSION_COUNT_1 = 1000;
17 |
18 | protected static final String ACCOUNT_ID_2 = "001B0000003oYAfIAM";
19 | protected static final String OPPOR_ID_2 = "006B0000002ndnpIAF";
20 | protected static final String PROPOSAL_ID_2 = "102";
21 | protected static final int CLICK_COUNT_2 = 200;
22 | protected static final int IMPRESSION_COUNT_2 = 2000;
23 |
24 | protected AggregatedData[] getSampleAggDataWithoutOpporId() {
25 | AggregatedData[] sampleAggData = new AggregatedData[2];
26 |
27 | sampleAggData[0] = new AggregatedData(PROPOSAL_ID_1, CLICK_COUNT_1, IMPRESSION_COUNT_1);
28 | sampleAggData[1] = new AggregatedData(PROPOSAL_ID_2, CLICK_COUNT_2, IMPRESSION_COUNT_2);
29 |
30 | return sampleAggData;
31 | }
32 |
33 | protected AggregatedData[] getSampleAggDataWithOpporId() {
34 | AggregatedData[] sampleAggData = getSampleAggDataWithoutOpporId();
35 |
36 | sampleAggData[0].setOpportunityId(OPPOR_ID_1);
37 | sampleAggData[1].setOpportunityId(OPPOR_ID_2);
38 |
39 | return sampleAggData;
40 | }
41 |
42 | protected String getAsCSV(String... columns) {
43 | StringBuilder csv = new StringBuilder();
44 | for (int i = 0; i < columns.length; i++) {
45 | if (i != 0) {
46 | csv.append(',');
47 | }
48 | csv.append(columns[i]);
49 | }
50 | csv.append('\n');
51 |
52 | return csv.toString();
53 | }
54 |
55 | protected String getAsCSV(String proposalId, String opporId,
56 | int clickCount, int impressionCount) {
57 | return getAsCSV(proposalId, opporId, clickCount + "", impressionCount + "");
58 | }
59 |
60 | protected TableRow getAsTableRow(String accId1, String opporId1,
61 | String proposalId1) {
62 | TableRow row = new TableRow();
63 |
64 | row.set(JobConstants.COL_ACCOUNT_ID, accId1);
65 | row.set(JobConstants.COL_OPPORTUNITY_ID, opporId1);
66 | row.set(JobConstants.COL_PROPOSAL_ID, proposalId1);
67 |
68 | return row;
69 | }
70 |
71 | protected List getSampleSFRefTableRows() {
72 | List sampleSFRefTableRows = new ArrayList(4);
73 |
74 | sampleSFRefTableRows.add(getAsTableRow(ACCOUNT_ID_1, OPPOR_ID_1, PROPOSAL_ID_1));
75 | sampleSFRefTableRows.add(getAsTableRow(ACCOUNT_ID_2, OPPOR_ID_2, PROPOSAL_ID_2));
76 |
77 | return sampleSFRefTableRows;
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/coder/AggregateDataCoderTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.coder;
2 |
3 | import static org.junit.Assert.assertEquals;
4 |
5 | import java.io.ByteArrayInputStream;
6 |
7 | import org.apache.commons.io.output.ByteArrayOutputStream;
8 | import org.junit.Before;
9 | import org.junit.Test;
10 |
11 | import com.google.cloud.dataflow.sdk.coders.Coder.Context;
12 | import com.google.wave.prototype.dataflow.BaseTest;
13 | import com.google.wave.prototype.dataflow.model.AggregatedData;
14 |
15 | public class AggregateDataCoderTest extends BaseTest {
16 | private AggregatedData aggregatedData;
17 |
18 | @Before
19 | public void setup() {
20 | aggregatedData = new AggregatedData(PROPOSAL_ID_1, OPPOR_ID_1, CLICK_COUNT_1, IMPRESSION_COUNT_1);
21 | }
22 |
23 | @Test
24 | public void testCoder() throws Exception {
25 | ByteArrayOutputStream bos = null;
26 | ByteArrayInputStream bis = null;
27 | try {
28 | AggregateDataCoder coder = AggregateDataCoder.getInstance();
29 |
30 | bos = new ByteArrayOutputStream();
31 | coder.encode(aggregatedData, bos, Context.NESTED);
32 |
33 | bis = new ByteArrayInputStream(bos.toByteArray());
34 | AggregatedData decodedAggData = coder.decode(bis, Context.NESTED);
35 |
36 | assertEquals(aggregatedData, decodedAggData);
37 | } finally {
38 | if (bos != null) {
39 | bos.close();
40 | }
41 | }
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/coder/SFCoderTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.coder;
2 |
3 | import static org.junit.Assert.assertEquals;
4 |
5 | import java.io.ByteArrayInputStream;
6 |
7 | import org.apache.commons.io.output.ByteArrayOutputStream;
8 | import org.junit.Before;
9 | import org.junit.Test;
10 |
11 | import com.google.cloud.dataflow.sdk.coders.Coder.Context;
12 | import com.google.wave.prototype.dataflow.BaseTest;
13 | import com.google.wave.prototype.dataflow.model.SFReferenceData;
14 |
15 | public class SFCoderTest extends BaseTest {
16 | private SFReferenceData sfReferenceData;
17 |
18 | @Before
19 | public void setup() {
20 | sfReferenceData = new SFReferenceData(ACCOUNT_ID_1, OPPOR_ID_1, PROPOSAL_ID_1);
21 | }
22 |
23 | @Test
24 | public void testCoder() throws Exception {
25 | ByteArrayOutputStream bos = null;
26 | ByteArrayInputStream bis = null;
27 | try {
28 | SFCoder coder = SFCoder.getInstance();
29 |
30 | bos = new ByteArrayOutputStream();
31 | coder.encode(sfReferenceData, bos, Context.NESTED);
32 |
33 | bis = new ByteArrayInputStream(bos.toByteArray());
34 | SFReferenceData decodedsfData= coder.decode(bis, Context.NESTED);
35 |
36 | assertEquals(sfReferenceData, decodedsfData);
37 | } finally {
38 | if (bos != null) {
39 | bos.close();
40 | }
41 | }
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/function/AggregateDataEnricherTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.function;
2 |
3 | import java.util.List;
4 |
5 | import org.hamcrest.CoreMatchers;
6 | import org.junit.Assert;
7 | import org.junit.Test;
8 |
9 | import com.google.api.services.bigquery.model.TableRow;
10 | import com.google.cloud.dataflow.sdk.testing.TestPipeline;
11 | import com.google.cloud.dataflow.sdk.transforms.Create;
12 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
13 | import com.google.cloud.dataflow.sdk.transforms.View;
14 | import com.google.cloud.dataflow.sdk.values.PCollection;
15 | import com.google.cloud.dataflow.sdk.values.PCollectionView;
16 | import com.google.wave.prototype.dataflow.BaseTest;
17 | import com.google.wave.prototype.dataflow.model.AggregatedData;
18 |
19 | /**
20 | * Unit tests for {@link AggregateDataEnricher}
21 | */
22 | public class AggregateDataEnricherTest extends BaseTest {
23 |
24 | @Test
25 | public void enrichTest() {
26 | // Creating pipeline to construct sideInput
27 | TestPipeline testPipeline = TestPipeline.create();
28 | // Constructing sideInput
29 | List sampleSFRefTableRows = getSampleSFRefTableRows();
30 | PCollection sampleSFRefData = testPipeline.apply(Create.of(sampleSFRefTableRows));
31 | PCollectionView> sideInput = sampleSFRefData.apply(View.asIterable());
32 |
33 | AggregateDataEnricher enricher = new AggregateDataEnricher(sideInput);
34 | DoFnTester doFnTester = DoFnTester.of(enricher);
35 | doFnTester.setSideInputInGlobalWindow(sideInput, sampleSFRefTableRows);
36 |
37 | // Input Aggregated provided without opportunity Id
38 | List results = doFnTester.processBatch(getSampleAggDataWithoutOpporId());
39 |
40 | // Check whether the result has opportunity id populated with it
41 | Assert.assertThat(results, CoreMatchers.hasItems(getSampleAggDataWithOpporId()));
42 | }
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/function/CSVFormatterTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.function;
2 |
3 | import java.util.List;
4 |
5 | import org.hamcrest.CoreMatchers;
6 | import org.junit.Assert;
7 | import org.junit.Test;
8 |
9 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
10 | import com.google.wave.prototype.dataflow.BaseTest;
11 | import com.google.wave.prototype.dataflow.model.AggregatedData;
12 |
13 | public class CSVFormatterTest extends BaseTest {
14 |
15 | @Test
16 | public void transformAsCSVTest() {
17 | CSVFormatter csvFormatter = new CSVFormatter();
18 | DoFnTester dofnTester = DoFnTester.of(csvFormatter);
19 |
20 | List results = dofnTester.processBatch(getSampleAggDataWithOpporId());
21 | Assert.assertThat(results, CoreMatchers.hasItems(getSampleEnrichedDataAsCSV()));
22 | }
23 |
24 | private String[] getSampleEnrichedDataAsCSV() {
25 | String[] sampleEnrichedCSVs= new String[2];
26 |
27 | sampleEnrichedCSVs[0] = getAsCSV(PROPOSAL_ID_1, OPPOR_ID_1, CLICK_COUNT_1, IMPRESSION_COUNT_1);
28 | sampleEnrichedCSVs[1] = getAsCSV(PROPOSAL_ID_2, OPPOR_ID_2, CLICK_COUNT_2, IMPRESSION_COUNT_2);
29 |
30 | return sampleEnrichedCSVs;
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/function/TableRowFormatterTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.function;
2 |
3 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_ACCOUNT_ID;
4 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_OPPORTUNITY_ID;
5 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_PROPOSAL_ID;
6 |
7 | import java.util.ArrayList;
8 | import java.util.List;
9 |
10 | import org.junit.Assert;
11 | import org.junit.Test;
12 |
13 | import com.google.api.services.bigquery.model.TableRow;
14 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
15 | import com.google.wave.prototype.dataflow.BaseTest;
16 |
17 | /**
18 | * Unit test for {@link TableRowFormatter} DoFn
19 | */
20 | public class TableRowFormatterTest extends BaseTest {
21 |
22 | @Test
23 | public void formatSFRefTest() {
24 | TableRowFormatter formatSFRefFn = new TableRowFormatter(getSFRefTableColumns());
25 | DoFnTester doFnTester = DoFnTester.of(formatSFRefFn);
26 |
27 | // Mocking SFRead by manually constructing CSV data
28 | List results = doFnTester.processBatch(
29 | getAsCSV(ACCOUNT_ID_1, OPPOR_ID_1, PROPOSAL_ID_1),
30 | getAsCSV(ACCOUNT_ID_2, OPPOR_ID_2, PROPOSAL_ID_2));
31 |
32 | // Converted tableRows are verified here
33 | Assert.assertEquals(results, getSampleSFRefTableRows());
34 | }
35 |
36 | private List getSFRefTableColumns() {
37 | List columns = new ArrayList(4);
38 |
39 | columns.add(COL_ACCOUNT_ID);
40 | columns.add(COL_OPPORTUNITY_ID);
41 | columns.add(COL_PROPOSAL_ID);
42 |
43 | return columns;
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/model/SFConfigTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.model;
2 |
3 | import static org.junit.Assert.assertEquals;
4 | import static org.junit.Assert.fail;
5 |
6 | import org.junit.Test;
7 |
8 | import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
9 | import com.google.wave.prototype.dataflow.model.SFConfig;
10 | import com.google.wave.prototype.dataflow.util.SFConstants;
11 |
12 | /**
13 | * Unit test for SFConfig
14 | * Reads the config file present in local and assert the values
15 | */
16 | public class SFConfigTest {
17 | @Test
18 | public void validLocalFile() throws Exception {
19 | // Config files are present in project home
20 | StringBuilder sb = new StringBuilder();
21 | sb.append(SFConstants.LOCAL_FILE_PREFIX);
22 | sb.append(System.getProperty("user.dir"));
23 | sb.append("/test_sf_config.json");
24 |
25 | // This will read the config file and populate SFConfig with userId and password
26 | SFConfig sfConfig = SFConfig.getInstance(sb.toString(), PipelineOptionsFactory.create());
27 |
28 | assertEquals("demo@demo.com", sfConfig.getUserId());
29 | assertEquals("test", sfConfig.getPassword());
30 | }
31 |
32 | @Test
33 | public void invalidLocalFile() throws Exception {
34 | try {
35 | // Providing invalid file path which should throw Exception
36 | SFConfig.getInstance("test_sf_config.json", PipelineOptionsFactory.create());
37 | fail("Expected exception not raised");
38 | } catch (Exception e) {
39 | // Expected exception here
40 | }
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/pipeline/AdDataJobTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.pipeline;
2 |
3 |
4 | /**
5 | * Jobs are not tested as BigQueryIO is not mocked
6 | */
7 | public class AdDataJobTest {
8 |
9 | }
--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/pipeline/SFReferenceDataJobTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.pipeline;
2 |
3 |
4 | /**
5 | * Jobs are not tested as BigQueryIO is not mocked
6 | */
7 | public class SFReferenceDataJobTest {
8 | }
--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/sf/SFSOQLExecutorTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.sf;
2 |
3 | import static org.mockito.Mockito.mock;
4 | import static org.mockito.Mockito.when;
5 | import static org.junit.Assert.assertEquals;
6 | import static org.junit.Assert.assertNotNull;
7 |
8 | import java.util.List;
9 |
10 | import org.junit.Before;
11 | import org.junit.Test;
12 |
13 | import com.google.wave.prototype.dataflow.BaseTest;
14 | import com.google.wave.prototype.dataflow.model.SFConfig;
15 | import com.sforce.soap.enterprise.EnterpriseConnection;
16 | import com.sforce.soap.enterprise.QueryResult;
17 | import com.sforce.soap.enterprise.sobject.Opportunity;
18 | import com.sforce.soap.enterprise.sobject.SObject;
19 | import com.sforce.ws.ConnectionException;
20 | import com.sforce.ws.ConnectorConfig;
21 |
22 | /**
23 | * Unit test for {@link SFSOQLExecutor}
24 | */
25 | public class SFSOQLExecutorTest extends BaseTest {
26 | private static final String sfQueryStr = "SELECT AccountId, Id, ProposalID__c FROM Opportunity where ProposalID__c != null";
27 |
28 | private SFConfig sfConfig;
29 |
30 | @Before
31 | public void setup() throws Exception {
32 | sfConfig = mock(SFConfig.class);
33 |
34 | // Returning our EnterpriseConnection which return a single object during query execution
35 | when(sfConfig.createEnterpriseConnection()).thenReturn(EnterpriseConnectionExt.getInstance());
36 | }
37 |
38 | @Test
39 | public void executeQueryTest() throws Exception {
40 | int expectedRecordsCount = 1;
41 | SFSOQLExecutor executor = new SFSOQLExecutor(sfConfig);
42 | List results = executor.executeQuery(sfQueryStr);
43 |
44 | assertNotNull(results);
45 | assertEquals(results.size(), expectedRecordsCount);
46 | Opportunity opportunity = (Opportunity) results.get(0);
47 |
48 | assertEquals(ACCOUNT_ID_1, opportunity.getAccountId());
49 | assertEquals(OPPOR_ID_1, opportunity.getId());
50 | assertEquals(PROPOSAL_ID_1, opportunity.getProposalID__c());
51 | }
52 |
53 | public static class EnterpriseConnectionExt extends EnterpriseConnection {
54 |
55 | public static EnterpriseConnectionExt getInstance() throws ConnectionException {
56 | ConnectorConfig config = new ConnectorConfig();
57 | config.setUsername("dummy_sf_user");
58 | config.setPassword("dummy_sf_password");
59 | config.setManualLogin(true);
60 | // Salesforce SOAP API checks for /services/Soap/c/
61 | config.setServiceEndpoint("http://dummysgendpoint/services/Soap/c/");
62 | return new EnterpriseConnectionExt(config);
63 | }
64 |
65 | public EnterpriseConnectionExt(ConnectorConfig config)
66 | throws ConnectionException {
67 | super(config);
68 | }
69 |
70 | @Override
71 | public QueryResult query(String queryString) throws ConnectionException {
72 | QueryResult queryResult = new QueryResult();
73 |
74 | Opportunity opportunity = new Opportunity();
75 | opportunity.setAccountId(ACCOUNT_ID_1);
76 | opportunity.setProposalID__c(PROPOSAL_ID_1);
77 | opportunity.setId(OPPOR_ID_1);
78 |
79 | queryResult.setRecords(new SObject[] {opportunity});
80 | queryResult.setDone(true);
81 | return queryResult;
82 | }
83 |
84 | @Override
85 | public void logout() throws ConnectionException {
86 | // no op
87 | }
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/sf/SFWaveDatasetWriterTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.sf;
2 |
3 | import static com.google.wave.prototype.dataflow.util.SFConstants.*;
4 | import static org.junit.Assert.*;
5 | import static org.mockito.Mockito.mock;
6 | import static org.mockito.Mockito.when;
7 |
8 | import org.junit.Before;
9 | import org.junit.Test;
10 |
11 | import com.google.wave.prototype.dataflow.BaseTest;
12 | import com.google.wave.prototype.dataflow.model.SFConfig;
13 | import com.google.wave.prototype.dataflow.util.SFConstants;
14 | import com.sforce.soap.partner.PartnerConnection;
15 | import com.sforce.soap.partner.SaveResult;
16 | import com.sforce.soap.partner.sobject.SObject;
17 | import com.sforce.ws.ConnectionException;
18 | import com.sforce.ws.ConnectorConfig;
19 |
20 | /**
21 | * Unit test for {@link SFWaveDatasetWriter}
22 | */
23 | public class SFWaveDatasetWriterTest extends BaseTest {
24 | private static final String DUMMY_METADATA_CONTENT = "dummy_metadata_content";
25 | private static final String DUMMY_DATASET_CONTENT = "dummy_dataset_content";
26 | private static final String DUMMY_SOBJECT_ID = "dummy_sobject_id";
27 | private static final String DUMMY_DATASET_NAME = "dummy_dataset_name";
28 |
29 | private static int CREATE_CALL_COUNT = 0;
30 | private static int UPDATE_CALL_COUNT = 0;
31 |
32 | private SFConfig sfConfig;
33 |
34 | @Before
35 | public void setup() throws Exception {
36 | StringBuilder metadataFileLocationSB = new StringBuilder();
37 | metadataFileLocationSB.append(SFConstants.LOCAL_FILE_PREFIX);
38 | metadataFileLocationSB.append(System.getProperty("user.dir"));
39 | metadataFileLocationSB.append("/test_metadata.json");
40 |
41 | sfConfig = mock(SFConfig.class);
42 |
43 | when(sfConfig.createPartnerConnection()).thenReturn(PartnerConnectionExt.getInstance());
44 |
45 | CREATE_CALL_COUNT = 0;
46 | UPDATE_CALL_COUNT = 0;
47 | }
48 |
49 | @Test
50 | public void testWrite() throws Exception {
51 | SFWaveDatasetWriter writer = new SFWaveDatasetWriter(sfConfig, DUMMY_DATASET_NAME);
52 | String sfObjId = writer.write(DUMMY_METADATA_CONTENT.getBytes(), DUMMY_DATASET_CONTENT.getBytes());
53 |
54 | assertEquals(DUMMY_SOBJECT_ID, sfObjId);
55 | // Verify that PartnerConnection.create() has been called twice
56 | // metadata publish and datapart publish
57 | assertEquals(2, CREATE_CALL_COUNT);
58 |
59 | // Verify that PartnerConnection.update() has been called only once
60 | // finalize publish
61 | assertEquals(1, UPDATE_CALL_COUNT);
62 | }
63 |
64 | public static class PartnerConnectionExt extends PartnerConnection {
65 |
66 | public static PartnerConnectionExt getInstance() throws ConnectionException {
67 | ConnectorConfig config = new ConnectorConfig();
68 | config.setUsername("dummy_sf_user");
69 | config.setPassword("dummy_sf_password");
70 | config.setManualLogin(true);
71 | // Salesforce SOAP API checks for /services/Soap/c/
72 | config.setServiceEndpoint("http://dummysgendpoint/services/Soap/u/");
73 | return new PartnerConnectionExt(config);
74 | }
75 |
76 | public PartnerConnectionExt(ConnectorConfig config)
77 | throws ConnectionException {
78 | super(config);
79 | }
80 |
81 | @Override
82 | public SaveResult[] update(SObject[] sObjects)
83 | throws ConnectionException {
84 | int expectedSObjectCount = 1;
85 | assertEquals(expectedSObjectCount, sObjects.length);
86 |
87 | String type = sObjects[0].getType();
88 | assertEquals(STR_INSIGHTS_EXTERNAL_DATA, type);
89 |
90 | // verify action
91 | String actualAction = (String) sObjects[0].getField(STR_ACTION);
92 | assertEquals(STR_ACTION_PROCESS, actualAction);
93 |
94 | // verify Sobject Id
95 | assertEquals(DUMMY_SOBJECT_ID, sObjects[0].getId());
96 |
97 | UPDATE_CALL_COUNT++;
98 | return constructSaveResultArray();
99 | }
100 |
101 | @Override
102 | public SaveResult[] create(SObject[] sObjects)
103 | throws ConnectionException {
104 | int expectedSObjectCount = 1;
105 | assertEquals(expectedSObjectCount, sObjects.length);
106 |
107 | String type = sObjects[0].getType();
108 | assertNotNull(type);
109 | // It is metadata publish
110 | if (STR_INSIGHTS_EXTERNAL_DATA.equals(type)) {
111 | // verify dataset name
112 | String actualDatasetName = (String) sObjects[0].getField(STR_EDGEMART_ALIAS);
113 | assertEquals(DUMMY_DATASET_NAME, actualDatasetName);
114 |
115 | // verify metadata content
116 | byte[] actualMetadataContent = (byte[]) sObjects[0].getField(STR_METADATA_JSON);
117 | assertEquals(DUMMY_METADATA_CONTENT, new String(actualMetadataContent));
118 | } else if (STR_INSIGHTS_EXTERNAL_DATA_PART.equals(type)) {
119 | // verify dataset content
120 | byte[] actualDatasetContent = (byte[]) sObjects[0].getField(STR_DATAFILE);
121 | assertEquals(DUMMY_DATASET_CONTENT, new String(actualDatasetContent));
122 |
123 | // verify sobject id
124 | String actualSObjectId = (String) sObjects[0].getField(STR_INSIGHTS_EXTERNAL_DATA_ID);
125 | assertEquals(DUMMY_SOBJECT_ID, actualSObjectId);
126 | } else {
127 | fail("PartnerConnection.create() called with invalid type " + type);
128 | }
129 |
130 | CREATE_CALL_COUNT++;
131 | return constructSaveResultArray();
132 | }
133 |
134 | @Override
135 | public void logout() throws ConnectionException {
136 | // no op
137 | }
138 |
139 | private SaveResult[] constructSaveResultArray() {
140 | SaveResult saveResult = new SaveResult();
141 | saveResult.setId(DUMMY_SOBJECT_ID);
142 | saveResult.setSuccess(true);
143 |
144 | return new SaveResult[] {saveResult};
145 | }
146 | }
147 | }
148 |
--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/transform/AggregateEventsTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.transform;
2 |
3 | import java.util.Arrays;
4 | import java.util.List;
5 |
6 | import org.hamcrest.CoreMatchers;
7 | import org.junit.Assert;
8 | import org.junit.Test;
9 |
10 | import com.google.cloud.dataflow.sdk.Pipeline;
11 | import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
12 | import com.google.cloud.dataflow.sdk.testing.TestPipeline;
13 | import com.google.cloud.dataflow.sdk.transforms.Create;
14 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
15 | import com.google.cloud.dataflow.sdk.values.KV;
16 | import com.google.cloud.dataflow.sdk.values.PCollection;
17 | import com.google.wave.prototype.dataflow.coder.AggregateDataCoder;
18 | import com.google.wave.prototype.dataflow.model.AggregatedData;
19 | import com.google.wave.prototype.dataflow.transform.AggregateEvents;
20 | import com.google.wave.prototype.dataflow.transform.AggregateEvents.CountEvents;
21 | import com.google.wave.prototype.dataflow.transform.AggregateEvents.FilterRawData;
22 |
23 | /**
24 | * Unit tester for AggregateEvents PTransform and the DoFn present in it
25 | */
26 | public class AggregateEventsTest {
27 |
28 | @SuppressWarnings("unchecked")
29 | @Test
30 | public void filterRawDataTest() {
31 | FilterRawData filterRawDataDoFn = new AggregateEvents.FilterRawData();
32 | DoFnTester> doFnTester = DoFnTester.of(filterRawDataDoFn);
33 |
34 | // getAdDataSampleCSVRows() will return raw AdData csv rows
35 | // FilterRawData DoFn will extract ProposalId and event from it
36 | List> results = doFnTester.processBatch(getAdDataSampleCSVRows());
37 |
38 | // Based on the input following KV are expected
39 | KV expectedValue1 = KV.of("101", "Impression");
40 | KV expectedValue2 = KV.of("102", "Click");
41 | KV expectedValue3 = KV.of("101", "Click");
42 | Assert.assertThat(results, CoreMatchers.hasItems(expectedValue1, expectedValue2, expectedValue3));
43 | }
44 |
45 | @SuppressWarnings("unchecked")
46 | @Test
47 | public void countEventsDoFnTest() {
48 | CountEvents countEventsDoFn = new AggregateEvents.CountEvents();
49 | DoFnTester>, AggregatedData> countEventDoFnTester = DoFnTester.of(countEventsDoFn);
50 |
51 | // Input to AggregateEvents.CountEvents
52 | KV> kvPropsalIdEvents1 = KV.of("101", (Iterable) Arrays.asList("Impression", "Click", "Impression"));
53 | KV> kvPropsalIdEvents2 = KV.of("102", (Iterable) Arrays.asList("Click", "Impression"));
54 | KV> kvPropsalIdEvents3 = KV.of("103", (Iterable) Arrays.asList("Click"));
55 |
56 | List results = countEventDoFnTester.processBatch(kvPropsalIdEvents1, kvPropsalIdEvents2, kvPropsalIdEvents3);
57 |
58 | // Expected results
59 | // For proposalId 101, there are 1 Click and 2 Impressions in the input
60 | // Hence the expected in new AggregatedData("101", 1, 2)
61 | // For proposalId 102, there are 1 Click and 1 Impression in the input
62 | // For proposalId 103, there are 1 Click and 0 Impression in the input
63 | AggregatedData expectedValue1 = new AggregatedData("101", 1, 2);
64 | AggregatedData expectedValue2 = new AggregatedData("102", 1, 1);
65 | AggregatedData expectedValue3 = new AggregatedData("103", 1, 0);
66 | Assert.assertThat(results, CoreMatchers.hasItems(expectedValue1, expectedValue2, expectedValue3));
67 | }
68 |
69 | @Test
70 | public void aggregateEventsTransformTest() {
71 | Pipeline p = TestPipeline.create();
72 |
73 | PCollection inPCol = p.apply(Create.of(getAdDataSampleCSVRows()));
74 | PCollection result = inPCol.apply(new AggregateEvents())
75 | .setCoder(AggregateDataCoder.getInstance());
76 |
77 | // Input data contains 3 rows
78 | // 2 proposal Id present in input 101 and 102
79 | // And proposal Id 101 has 1 Impression and 1 Click
80 | // Proposal Id 102 has 1 Click
81 | // So expected values are new AggregatedData("101", 1, 1) and new AggregatedData("102", 1, 0)
82 | AggregatedData expectedValue1 = new AggregatedData("101", 1, 1);
83 | AggregatedData expectedValue2 = new AggregatedData("102", 1, 0);
84 | DataflowAssert.that(result).containsInAnyOrder(Arrays.asList(expectedValue1, expectedValue2));
85 |
86 | p.run();
87 | }
88 |
89 | private String[] getAdDataSampleCSVRows() {
90 | String[] adDataSampleCSVRows = new String[3];
91 | adDataSampleCSVRows[0] = "1,01-01-14 9:00,ip-10-150-38-122/10.150.38.122,0,70.209.198.223,http://sample.com,3232,Impression,3,1,101";
92 | adDataSampleCSVRows[1] = "2,01-01-14 9:01,ip-10-150-38-122/10.150.38.123,0,70.209.198.223,http://sample.com,3232,Click,3,1,102";
93 | adDataSampleCSVRows[2] = "3,01-01-14 9:00,ip-10-150-38-122/10.150.38.122,0,70.209.198.223,http://sample.com,3232,Click,3,1,101";
94 |
95 | return adDataSampleCSVRows;
96 | }
97 |
98 | }
99 |
--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/transform/SFReadTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.transform;
2 |
3 | import static org.mockito.Mockito.mock;
4 | import static org.mockito.Mockito.when;
5 | import static org.mockito.Mockito.withSettings;
6 |
7 | import java.io.Serializable;
8 | import java.util.ArrayList;
9 | import java.util.List;
10 |
11 | import org.junit.Before;
12 | import org.junit.Ignore;
13 | import org.junit.Test;
14 |
15 | import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
16 | import com.google.cloud.dataflow.sdk.testing.TestPipeline;
17 | import com.google.cloud.dataflow.sdk.transforms.Create;
18 | import com.google.cloud.dataflow.sdk.values.PCollection;
19 | import com.google.wave.prototype.dataflow.BaseTest;
20 | import com.google.wave.prototype.dataflow.sf.SFSOQLExecutor;
21 | import com.sforce.soap.enterprise.sobject.Opportunity;
22 | import com.sforce.soap.enterprise.sobject.SObject;
23 |
24 | public class SFReadTest extends BaseTest {
25 | private static final String sfQueryStr = "SELECT AccountId, Id, ProposalID__c FROM Opportunity where ProposalID__c != null";
26 |
27 | private SFSOQLExecutor sfSOQLExecutor;
28 |
29 | @Before
30 | public void setup() throws Exception {
31 | sfSOQLExecutor = mock(SFSOQLExecutor.class, withSettings().serializable());
32 |
33 | OpportunityExt oppor = new OpportunityExt();
34 | oppor.setAccountId(ACCOUNT_ID_1);
35 | oppor.setId(OPPOR_ID_1);
36 | oppor.setProposalID__c(PROPOSAL_ID_1);
37 | List sobjects = new ArrayList();
38 | sobjects.add(oppor);
39 |
40 | when(sfSOQLExecutor.executeQuery(sfQueryStr)).thenReturn(sobjects);
41 | }
42 |
43 | @Ignore("Not able to serialize Opportunity, hence not able to mock it. But unit test for SFRead is covered as part SFSOQLExecutor")
44 | @Test
45 | public void pTransformTest() {
46 | TestPipeline pipeline = TestPipeline.create();
47 |
48 | PCollection input = pipeline.apply(Create.of(sfQueryStr));
49 | PCollection results = input.apply(new SFRead(sfSOQLExecutor));
50 |
51 | DataflowAssert.that(results).containsInAnyOrder(getAsCSV(ACCOUNT_ID_1, OPPOR_ID_1, PROPOSAL_ID_1));
52 |
53 | pipeline.run();
54 | }
55 |
56 | public class OpportunityExt extends Opportunity implements Serializable {
57 | private static final long serialVersionUID = -563793703304651268L;
58 |
59 |
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/transform/SFWaveWriteTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.transform;
2 |
3 | import static org.mockito.Mockito.mock;
4 | import static org.mockito.Mockito.when;
5 | import static org.mockito.Mockito.withSettings;
6 |
7 | import java.util.Arrays;
8 | import java.util.HashSet;
9 | import java.util.List;
10 | import java.util.Set;
11 |
12 | import org.hamcrest.CoreMatchers;
13 | import org.junit.Assert;
14 | import org.junit.Before;
15 | import org.junit.Test;
16 |
17 | import com.google.cloud.dataflow.sdk.Pipeline;
18 | import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
19 | import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
20 | import com.google.cloud.dataflow.sdk.testing.TestPipeline;
21 | import com.google.cloud.dataflow.sdk.transforms.Create;
22 | import com.google.cloud.dataflow.sdk.transforms.DoFn;
23 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
24 | import com.google.cloud.dataflow.sdk.transforms.PTransform;
25 | import com.google.cloud.dataflow.sdk.transforms.View;
26 | import com.google.cloud.dataflow.sdk.values.KV;
27 | import com.google.cloud.dataflow.sdk.values.PCollection;
28 | import com.google.cloud.dataflow.sdk.values.PCollectionView;
29 | import com.google.wave.prototype.dataflow.model.SFWaveWriteResult;
30 | import com.google.wave.prototype.dataflow.sf.SFWaveDatasetWriter;
31 | import com.google.wave.prototype.dataflow.transform.SFWaveWrite.BundleCount;
32 | import com.google.wave.prototype.dataflow.transform.SFWaveWrite.DistributeRowData;
33 | import com.google.wave.prototype.dataflow.transform.SFWaveWrite.Write;
34 | import com.google.wave.prototype.dataflow.util.FileUtil;
35 | import com.google.wave.prototype.dataflow.util.SFConstants;
36 |
37 | /**
38 | * Simple unit tests for {@link SFWaveWrite} {@link PTransform} and its {@link DoFn}
39 | */
40 | public class SFWaveWriteTest {
41 | private static final String SAMPLE_DATA_TO_BE_WRITTEN = "001B0000003oYAfIAM,006B0000002ndnpIAA,102";
42 | private static final String SAMPLE_SF_OBJ_ID = "testSFOBjId";
43 |
44 | private SFWaveDatasetWriter writer;
45 | private String metadataFileLocation;
46 |
47 | @Before
48 | public void setup() throws Exception {
49 | StringBuilder metadataFileLocationSB = new StringBuilder();
50 | metadataFileLocationSB.append(SFConstants.LOCAL_FILE_PREFIX);
51 | metadataFileLocationSB.append(System.getProperty("user.dir"));
52 | metadataFileLocationSB.append("/test_metadata.json");
53 |
54 | metadataFileLocation = metadataFileLocationSB.toString();
55 |
56 | writer = mock(SFWaveDatasetWriter.class, withSettings().serializable());
57 | when(writer.write(
58 | FileUtil.getContent(metadataFileLocation.toString(), PipelineOptionsFactory.create()).getBytes(),
59 | (SAMPLE_DATA_TO_BE_WRITTEN + "\n").getBytes()))
60 | .thenReturn(SAMPLE_SF_OBJ_ID);
61 | }
62 |
63 | @Test
64 | public void calculateNoOfBundlesDoFnTest() {
65 | BundleCount bundleCtFn = new SFWaveWrite.BundleCount();
66 | DoFnTester bundleCtFnTester = DoFnTester.of(bundleCtFn);
67 |
68 | long bundle = 1024 * 1024 * 10l;
69 | // This should create 2 bundles
70 | long input1 = bundle + 1;
71 |
72 | // This should create 32 bundles
73 | long input2 = (bundle * 31) + 1024;
74 |
75 | // These should create 1 bundle
76 | long input3 = 1024l;
77 | long input4 = 0l;
78 |
79 | List results = bundleCtFnTester.processBatch(input1, input2, input3, input4);
80 | Assert.assertThat(results, CoreMatchers.hasItems(2, 32, 1, 1));
81 | }
82 |
83 | @Test
84 | public void distributeRowDataDoFnTest() {
85 | int noOfBundles = 2;
86 | Pipeline p = TestPipeline.create();
87 | // Preparing sideInput
88 | PCollection bundleCount = p.apply(Create.of(noOfBundles));
89 | PCollectionView sideInput = bundleCount.apply(View. asSingleton());
90 | DistributeRowData distributeRowDataDoFn = new SFWaveWrite.DistributeRowData(sideInput);
91 |
92 | DoFnTester> doFnTester = DoFnTester.of(distributeRowDataDoFn);
93 | // Providing number of bundles as sideInput
94 | doFnTester.setSideInputInGlobalWindow(sideInput, Arrays.asList(noOfBundles));
95 |
96 | List> results = doFnTester.processBatch(getSampleSFRefData());
97 | // Result should have 4 KV with 2 unique keys
98 | Assert.assertEquals(4, results.size());
99 | // Checking whether the result has two unique keys as noOfBundles is 2
100 | Set keys = new HashSet();
101 | for (KV kv : results) {
102 | keys.add(kv.getKey());
103 | }
104 |
105 | Assert.assertEquals("Proper number of bundles are not created", noOfBundles, keys.size());
106 | }
107 |
108 | @SuppressWarnings("unchecked")
109 | @Test
110 | public void testWriteDoFn() throws Exception {
111 |
112 | KV> input = KV.of(1, (Iterable) Arrays.asList(SAMPLE_DATA_TO_BE_WRITTEN));
113 |
114 | Write writeDoFn = new SFWaveWrite.Write(writer, metadataFileLocation);
115 | DoFnTester>,SFWaveWriteResult> doFnTester = DoFnTester.of(writeDoFn);
116 |
117 | // SFWaveDatasetWriter is mocked
118 | // If proper bytes are sent by SFWaveWrite.Writethe it will return SAMPLE_SF_OBJ_ID
119 | // So just checking whether it returns SAMPLE_SF_OBJ_ID or not
120 | List result = doFnTester.processBatch(input);
121 | Assert.assertThat(result, CoreMatchers.hasItems(new SFWaveWriteResult(SAMPLE_SF_OBJ_ID)));
122 | }
123 |
124 | @Test
125 | public void sfWaveWriteTest() {
126 | Pipeline p = TestPipeline.create();
127 |
128 | PCollection inputPCol = p.apply(Create.of(SAMPLE_DATA_TO_BE_WRITTEN));
129 | PCollection output = inputPCol.apply(new SFWaveWrite(writer, metadataFileLocation));
130 |
131 | // SFWaveDatasetWriter is mocked
132 | // If proper bytes are sent by SFWaveWrite.Writethe it will return SAMPLE_SF_OBJ_ID
133 | // So just checking whether it returns SAMPLE_SF_OBJ_ID or not
134 | DataflowAssert.that(output).containsInAnyOrder(Arrays.asList(new SFWaveWriteResult(SAMPLE_SF_OBJ_ID)));
135 | p.run();
136 | }
137 |
138 | private String[] getSampleSFRefData() {
139 | String[] sfRefDat = new String[4];
140 | // accountId, opportunityId, proposalId inputs
141 | sfRefDat[0] = "001B0000003oYAfIAM,006B0000002ndnpIAA,102";
142 | sfRefDat[1] = "001B0000003oYAfIAM,006B0000002ndnuIAA,103";
143 | sfRefDat[2] = "001B0000003oYAfIAM,006B0000002ndnkIAA,101";
144 | sfRefDat[3] = "001B0000003oUqJIAU,006B0000002nBrQIAU,0001";
145 |
146 | return sfRefDat;
147 | }
148 |
149 | }
150 |
--------------------------------------------------------------------------------