├── README.md
├── pom.xml
└── src
    ├── main
        └── java
        │   └── com
        │       └── google
        │           └── wave
        │               └── prototype
        │                   └── dataflow
        │                       ├── coder
        │                           ├── AggregateDataCoder.java
        │                           └── SFCoder.java
        │                       ├── function
        │                           ├── AggregateDataEnricher.java
        │                           ├── CSVFormatter.java
        │                           └── TableRowFormatter.java
        │                       ├── model
        │                           ├── AggregatedData.java
        │                           ├── SFConfig.java
        │                           ├── SFReferenceData.java
        │                           └── SFWaveWriteResult.java
        │                       ├── pipeline
        │                           ├── AdDataJob.java
        │                           └── SFReferenceDataJob.java
        │                       ├── sf
        │                           ├── SFSOQLExecutor.java
        │                           └── SFWaveDatasetWriter.java
        │                       ├── transform
        │                           ├── AggregateEvents.java
        │                           ├── SFRead.java
        │                           └── SFWaveWrite.java
        │                       └── util
        │                           ├── CSVUtil.java
        │                           ├── FileUtil.java
        │                           ├── GCSFileUtil.java
        │                           ├── JobConstants.java
        │                           └── SFConstants.java
    └── test
        └── java
            └── com
                └── google
                    └── wave
                        └── prototype
                            └── dataflow
                                ├── BaseTest.java
                                ├── coder
                                    ├── AggregateDataCoderTest.java
                                    └── SFCoderTest.java
                                ├── function
                                    ├── AggregateDataEnricherTest.java
                                    ├── CSVFormatterTest.java
                                    └── TableRowFormatterTest.java
                                ├── model
                                    └── SFConfigTest.java
                                ├── pipeline
                                    ├── AdDataJobTest.java
                                    └── SFReferenceDataJobTest.java
                                ├── sf
                                    ├── SFSOQLExecutorTest.java
                                    └── SFWaveDatasetWriterTest.java
                                └── transform
                                    ├── AggregateEventsTest.java
                                    ├── SFReadTest.java
                                    └── SFWaveWriteTest.java


/README.md:
--------------------------------------------------------------------------------
 1 | # README #
 2 | 
 3 | ### springML Inc Repository ###
 4 | 
 5 | Google Dataflow Jobs
 6 | --------------------
 7 | 
 8 | 
 9 | Following two classes take care of Google cloud dataflow jobs
10 | 
11 | SFReferenceDataJob - Will fetch the reference data from SF (Oppurtunity) and populate bigQuery
12 | AdDataJob - Will fetch the raw data from GCS and SF reference data from bigquery. Enrich the data and populate bigQuery with the enriched data
13 | 
14 | 
15 | SFReferenceDataJob
16 | ------------------
17 | 
18 | This requires the following inputs
19 | 
20 | 1. Google cloud project
21 | 2. Google cloud Staging location
22 | 3. BigQuery output table
23 | 4. SF UserId 
24 | 5. SF Password
25 | 
26 | On completion of the job, bigquery table SFDCReferenceData.SFRef will be populated with SF Reference data
27 | 
28 | 
29 | AdDataJob
30 | ---------
31 | 
32 | This requires the following inputs
33 | 
34 | 1. Google cloud project 
35 | 2. Google cloud Staging location
36 | 3. Ad Raw data (CSV)
37 | 4. BigQuery Reference data table
38 | 5. BigQuery output table
39 | 
40 | On completion of the job bigquery table SFDCReferenceData.EnrichedSample will be populated withenriched data.
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 |     xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 |     <modelVersion>4.0.0</modelVersion>
  4 | 
  5 |     <groupId>com.google.wave</groupId>
  6 |     <artifactId>wave_connector_prototype</artifactId>
  7 |     <version>0.0.2-SNAPSHOT</version>
  8 |     <packaging>jar</packaging>
  9 | 	
 10 | 	<repositories>
 11 | 	  <repository>
 12 | 			<id>in-project1</id>
 13 | 			<name>In Project Repo</name>
 14 | 			<url>file://${project.basedir}\lib</url>
 15 | 		</repository>
 16 | 	</repositories>
 17 | 
 18 |     <name>wave_connector_prototype</name>
 19 |     <url>http://maven.apache.org</url>
 20 | 
 21 |     <properties>
 22 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 23 |     </properties>
 24 | 
 25 |     <dependencies>
 26 |         <dependency>
 27 |             <groupId>junit</groupId>
 28 |             <artifactId>junit</artifactId>
 29 |             <version>4.12</version>
 30 |             <scope>test</scope>
 31 |         </dependency>
 32 | 
 33 |         <dependency>
 34 |             <groupId>com.google.cloud.dataflow</groupId>
 35 |             <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
 36 |             <version>LATEST</version>
 37 |         </dependency>
 38 | 
 39 |         <dependency>
 40 |             <groupId>com.google.apis</groupId>
 41 |             <artifactId>google-api-services-storage</artifactId>
 42 |             <version>v1-rev25-1.19.1</version>
 43 |             <exclusions>
 44 |                 <!-- Exclude an old version of guava that is being pulled in by a transitive
 45 |                     dependency google-api-client 1.19.0 -->
 46 |                 <exclusion>
 47 |                     <groupId>com.google.guava</groupId>
 48 |                     <artifactId>guava-jdk5</artifactId>
 49 |                 </exclusion>
 50 |             </exclusions>
 51 |         </dependency>
 52 | 
 53 |         <dependency>
 54 |             <groupId>com.google.apis</groupId>
 55 |             <artifactId>google-api-services-bigquery</artifactId>
 56 |             <version>v2-rev187-1.19.1</version>
 57 |             <exclusions>
 58 |                 <!-- Exclude an old version of guava that is being pulled in by a transitive
 59 |                     dependency google-api-client 1.19.0 -->
 60 |                 <exclusion>
 61 |                     <groupId>com.google.guava</groupId>
 62 |                     <artifactId>guava-jdk5</artifactId>
 63 |                 </exclusion>
 64 |             </exclusions>
 65 |         </dependency>
 66 | 
 67 |         <dependency>
 68 |             <groupId>com.google.http-client</groupId>
 69 |             <artifactId>google-http-client-jackson2</artifactId>
 70 |             <version>1.19.0</version>
 71 |             <exclusions>
 72 |                 <!-- Exclude an old version of guava that is being pulled in by a transitive
 73 |                     dependency google-api-client 1.19.0 -->
 74 |                 <exclusion>
 75 |                     <groupId>com.google.guava</groupId>
 76 |                     <artifactId>guava-jdk5</artifactId>
 77 |                 </exclusion>
 78 |             </exclusions>
 79 |         </dependency>
 80 | 
 81 |         <dependency>
 82 |             <groupId>com.fasterxml.jackson.core</groupId>
 83 |             <artifactId>jackson-core</artifactId>
 84 |             <version>2.4.2</version>
 85 |         </dependency>
 86 | 
 87 |         <dependency>
 88 |             <groupId>com.fasterxml.jackson.core</groupId>
 89 |             <artifactId>jackson-annotations</artifactId>
 90 |             <version>2.4.2</version>
 91 |         </dependency>
 92 | 
 93 |         <!-- Add slf4j API frontend binding with JUL backend -->
 94 |         <dependency>
 95 |             <groupId>org.slf4j</groupId>
 96 |             <artifactId>slf4j-api</artifactId>
 97 |             <version>1.7.7</version>
 98 |         </dependency>
 99 | 
100 |         <dependency>
101 |             <groupId>org.hamcrest</groupId>
102 |             <artifactId>hamcrest-all</artifactId>
103 |             <version>1.3</version>
104 |             <scope>test</scope>
105 |         </dependency>
106 | 
107 |         <dependency>
108 |             <groupId>com.google.appengine.tools</groupId>
109 |             <artifactId>appengine-gcs-client</artifactId>
110 |             <version>RELEASE</version>
111 |         </dependency>
112 | 
113 |         <dependency>
114 |             <groupId>org.apache.commons</groupId>
115 |             <artifactId>commons-lang3</artifactId>
116 |             <version>3.4</version>
117 |         </dependency>
118 | 
119 |         <dependency>
120 |             <groupId>commons-io</groupId>
121 |             <artifactId>commons-io</artifactId>
122 |             <version>2.4</version>
123 |         </dependency>
124 | 
125 |         
126 | 		<dependency>
127 | 		<groupId>sf</groupId>
128 | 		<artifactId>enterprise</artifactId>
129 | 		<version>1</version>
130 | 	</dependency>
131 | 	<dependency>
132 | 		<groupId>sf</groupId>
133 | 		<artifactId>partner</artifactId>
134 | 		<version>1</version>
135 | 	</dependency>
136 | 	<dependency>
137 | 		<groupId>sf</groupId>
138 | 		<artifactId>wsc</artifactId>
139 | 		<version>1</version>
140 | 	</dependency>
141 | 	
142 |         <dependency>
143 |             <groupId>org.mockito</groupId>
144 |             <artifactId>mockito-core</artifactId>
145 |             <version>2.0.26-beta</version>
146 |             <scope>test</scope>
147 |         </dependency>
148 | 
149 |         <dependency>
150 |             <groupId>com.github.jsqlparser</groupId>
151 |             <artifactId>jsqlparser</artifactId>
152 |             <version>0.9.3</version>
153 |         </dependency>
154 | 
155 |     </dependencies>
156 | 
157 |     <build>
158 |         <pluginManagement>
159 |             <plugins>
160 |                 <plugin>
161 |                     <artifactId>maven-compiler-plugin</artifactId>
162 |                     <configuration>
163 |                         <source>1.7</source>
164 |                         <target>1.7</target>
165 |                     </configuration>
166 |                 </plugin>
167 |             </plugins>
168 |         </pluginManagement>
169 |     </build>
170 | </project>
171 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/coder/AggregateDataCoder.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.coder;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.InputStream;
 5 | import java.io.OutputStream;
 6 | 
 7 | import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 8 | import com.google.cloud.dataflow.sdk.coders.CoderException;
 9 | import com.google.wave.prototype.dataflow.model.AggregatedData;
10 | 
11 | /**
12 |  * Coder for {@link AggregatedData}
13 |  * It just uses AggregatedData.toString() to encode
14 |  * 		AggregatedData.toString() will produce CSV of {@link AggregatedData}
15 |  * In decode,
16 |  * 		CSV is separated into fields by String.split(',') and
17 |  * 		{@link AggregatedData} is constructed using the fields
18 |  */
19 | public class AggregateDataCoder extends AtomicCoder<AggregatedData> {
20 |     private static final long serialVersionUID = 4037984240347308918L;
21 |     private static final int COL_PROPOSAL_ID = 0;
22 |     private static final int COL_OPPORTUNITY_ID = 1;
23 |     private static final int COL_CLICK_COUNT = 2;
24 |     private static final int COL_IMP_COUNT = 3;
25 | 
26 |     private static final AggregateDataCoder INSTANCE = new AggregateDataCoder();
27 |     private AggregateDataCoder() {	}
28 | 
29 |     public static AggregateDataCoder getInstance() {
30 |         return INSTANCE;
31 |     }
32 | 
33 |     @Override
34 |     public void encode(AggregatedData value, OutputStream outStream,
35 |             com.google.cloud.dataflow.sdk.coders.Coder.Context context)
36 |             throws CoderException, IOException {
37 |         // Returning bytes of CSV
38 |         // AggregatedData.toString() will be a CSV
39 |         outStream.write(value.toString().getBytes());
40 |     }
41 | 
42 |     @Override
43 |     public AggregatedData decode(InputStream inStream,
44 |             com.google.cloud.dataflow.sdk.coders.Coder.Context context)
45 |             throws CoderException, IOException {
46 |         int csvRowSize = inStream.available();
47 |         byte[] csvRow = new byte[csvRowSize];
48 |         inStream.read(csvRow);
49 |         // Stream is converted into String
50 |         // String will be a CSV
51 |         // CSV splitted using comma to get the fields
52 |         // AggregatedData constructed using the fields
53 |         String aggDataStr = new String(csvRow);
54 |         String[] addDataFields = aggDataStr.split(",");
55 | 
56 | 
57 |         return new AggregatedData(addDataFields[COL_PROPOSAL_ID],
58 |                 addDataFields[COL_OPPORTUNITY_ID],
59 |                 Integer.parseInt(addDataFields[COL_CLICK_COUNT]),
60 |                 Integer.parseInt(addDataFields[COL_IMP_COUNT]));
61 |     }
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/coder/SFCoder.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.coder;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.InputStream;
 5 | import java.io.OutputStream;
 6 | 
 7 | import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 8 | import com.google.cloud.dataflow.sdk.coders.CoderException;
 9 | import com.google.wave.prototype.dataflow.model.SFReferenceData;
10 | 
11 | /**
12 |  * Coder for {@link SFReferenceData}
13 |  * It just uses SFReferenceData.toString() to encode
14 |  * 		SFReferenceData.toString() will produce CSV of {@link SFReferenceData}
15 |  * In decode,
16 |  * 		CSV is separated into fields by String.split(',') and
17 |  * 		{@link SFReferenceData} is constructed using the fields
18 |  */
19 | public class SFCoder extends AtomicCoder<SFReferenceData> {
20 |     private static final long serialVersionUID = 4037984240347308918L;
21 |     private static final int COL_ACCOUNT_ID = 0;
22 |     private static final int COL_OPPORTUNITY_ID = 1;
23 |     private static final int COL_PROPOSAL_ID = 2;
24 | 
25 |     private static final SFCoder INSTANCE = new SFCoder();
26 |     private SFCoder() {	}
27 | 
28 |     public static SFCoder getInstance() {
29 |         return INSTANCE;
30 |     }
31 | 
32 |     @Override
33 |     public void encode(SFReferenceData value, OutputStream outStream,
34 |             com.google.cloud.dataflow.sdk.coders.Coder.Context context)
35 |             throws CoderException, IOException {
36 |         // SFReferenceData.toString will provide a String as CSV
37 |         outStream.write(value.toString().getBytes());
38 |     }
39 | 
40 |     @Override
41 |     public SFReferenceData decode(InputStream inStream,
42 |             com.google.cloud.dataflow.sdk.coders.Coder.Context context)
43 |             throws CoderException, IOException {
44 |         int size = inStream.available();
45 |         byte[] sfRefBytes = new byte[size];
46 |         inStream.read(sfRefBytes);
47 |         String refStr = new String(sfRefBytes);
48 |         String[] sfRefDataFields = refStr.split(",");
49 | 
50 |         String proposalId = null;
51 |         // Proposal may be null for some rows and hence adding only if it is present
52 |         if (sfRefDataFields.length > 2) {
53 |             proposalId = sfRefDataFields[COL_PROPOSAL_ID];
54 |         }
55 |         return new SFReferenceData(sfRefDataFields[COL_ACCOUNT_ID], sfRefDataFields[COL_OPPORTUNITY_ID], proposalId);
56 |     }
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/function/AggregateDataEnricher.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.function;
 2 | 
 3 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_OPPORTUNITY_ID;
 4 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_PROPOSAL_ID;
 5 | 
 6 | import org.slf4j.Logger;
 7 | import org.slf4j.LoggerFactory;
 8 | 
 9 | import com.google.api.services.bigquery.model.TableRow;
10 | import com.google.cloud.dataflow.sdk.transforms.DoFn;
11 | import com.google.cloud.dataflow.sdk.values.PCollectionView;
12 | import com.google.wave.prototype.dataflow.model.AggregatedData;
13 | import com.google.wave.prototype.dataflow.pipeline.AdDataJob;
14 | 
15 | /**
16 |  * Enrich AggregatedData with OpportunityId
17 |  * OpportunityId fetched from Google BigQuery for the corresponding ProposalId
18 |  * Google BigQuery TableRow should be provided as sideInput
19 |  */
20 | public class AggregateDataEnricher extends DoFn<AggregatedData, AggregatedData> {
21 |     private static final long serialVersionUID = -369858616535388252L;
22 | 
23 |     private static final Logger LOG = LoggerFactory.getLogger(AdDataJob.class);
24 | 
25 |     private PCollectionView<Iterable<TableRow>> sfReferenceDataView;
26 | 
27 |     public AggregateDataEnricher(PCollectionView<Iterable<TableRow>> sfReferenceDataView) {
28 |         this.sfReferenceDataView = sfReferenceDataView;
29 |     }
30 | 
31 |     @Override
32 |     public void processElement(
33 |             DoFn<AggregatedData, AggregatedData>.ProcessContext c) throws Exception {
34 |         AggregatedData aggregatedData = c.element();
35 |         String proposalId = aggregatedData.getProposalId();
36 |         // Since in this case BigQuery table considered to be small
37 |         // table rows are passed as sideInput
38 |         Iterable<TableRow> sfReferenceData = c.sideInput(sfReferenceDataView);
39 |         for (TableRow sfReferenceRow : sfReferenceData) {
40 |             String proposalIdFromBigQuery = (String) sfReferenceRow.get(COL_PROPOSAL_ID);
41 |             String opportunityId = (String) sfReferenceRow.get(COL_OPPORTUNITY_ID);
42 |             // Make sure to fetch the opportunityId for the corresponding proposalId
43 |             if (proposalIdFromBigQuery.contains(proposalId)) {
44 |                 LOG.info("Adding OpportunityId into aggregatedData : " + opportunityId.toString());
45 |                 aggregatedData.setOpportunityId((String) sfReferenceRow.get(COL_OPPORTUNITY_ID));
46 |             }
47 |         }
48 | 
49 |         c.output(aggregatedData);
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/function/CSVFormatter.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.function;
 2 | 
 3 | import com.google.cloud.dataflow.sdk.transforms.DoFn;
 4 | import com.google.wave.prototype.dataflow.model.AggregatedData;
 5 | 
 6 | /**
 7 |  * A simple DoFn to convert {@link AggregatedData} into CSV Row
 8 |  */
 9 | public class CSVFormatter extends DoFn<AggregatedData, String> {
10 |     private static final long serialVersionUID = 398388311953363232L;
11 | 
12 |     @Override
13 |     public void processElement(DoFn<AggregatedData, String>.ProcessContext c)
14 |             throws Exception {
15 |         StringBuffer sb = new StringBuffer(256);
16 |         sb.append(c.element().toString()).append('\n');
17 |         c.output(sb.toString());
18 |     }
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/function/TableRowFormatter.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.function;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import com.google.api.services.bigquery.model.TableRow;
 6 | import com.google.cloud.dataflow.sdk.transforms.DoFn;
 7 | 
 8 | /**
 9 |  * A Google Dataflow DoFn converts the given CSV row into Google BigQuery TableRow
10 |  * Column Names has to be in the order in which the fields are present in CSV
11 |  */
12 | public class TableRowFormatter extends DoFn<String, TableRow> {
13 |     private static final long serialVersionUID = -5798809828662211092L;
14 | 
15 |     private List<String> columnNames;
16 | 
17 |     public TableRowFormatter(List<String> columnNames) {
18 |         this.columnNames = columnNames;
19 |     }
20 | 
21 |     @Override
22 |     public void processElement(ProcessContext c) throws Exception {
23 |         TableRow row = new TableRow();
24 |         String sfReferenceData = c.element();
25 |         // CSV will contain \n at end
26 |         // \n should be added as column value
27 |         sfReferenceData = removeNewlineChar(sfReferenceData);
28 | 
29 |         String[] individualFields = sfReferenceData.split(",");
30 |         // Order is according to the query we provide
31 |         // For SELECT AccountId, Id, ProposalID__c FROM Opportunity
32 |         // AccountId will be at 0
33 |         // OpportunityId will be at 1
34 |         // ProposalId will be at 2
35 | 
36 |         if (columnNames.size() != individualFields.length) {
37 |             throw new Exception ("Number of column does not match with the columns present in CSV");
38 |         }
39 | 
40 |         int col = 0;
41 |         for (String columnName : columnNames) {
42 |             row.set(columnName, individualFields[col++]);
43 |         }
44 | 
45 |         c.output(row);
46 |     }
47 | 
48 |     private String removeNewlineChar(String sfReferenceData) {
49 |         int newlineCharIndex = sfReferenceData.lastIndexOf('\n');
50 |         if (newlineCharIndex != -1) {
51 |             sfReferenceData = sfReferenceData.substring(0, newlineCharIndex);
52 |         }
53 | 
54 |         return sfReferenceData;
55 |     }
56 | }


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/model/AggregatedData.java:
--------------------------------------------------------------------------------
  1 | package com.google.wave.prototype.dataflow.model;
  2 | 
  3 | import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
  4 | import com.google.wave.prototype.dataflow.coder.AggregateDataCoder;
  5 | 
  6 | /**
  7 |  * POJO holding enriched Salesforce wave data
  8 |  * ProposalId, OpportunityId, ClickCount and ImpressionCount
  9 |  */
 10 | @DefaultCoder(AggregateDataCoder.class)
 11 | public class AggregatedData {
 12 |     private String proposalId = "";
 13 |     private String opportunityId = "";
 14 |     private int clickCount = 0;
 15 |     private int impressionCount = 0;
 16 | 
 17 |     // Used before adding OpportunityId
 18 |     public AggregatedData(String proposalId, int clickCount,
 19 |             int impressionCount) {
 20 |         this.proposalId = proposalId;
 21 |         this.clickCount = clickCount;
 22 |         this.impressionCount = impressionCount;
 23 |     }
 24 | 
 25 |     public AggregatedData(String proposalId, String opportunityId, int clickCount,
 26 |             int impressionCount) {
 27 |         this.proposalId = proposalId;
 28 |         this.opportunityId = opportunityId;
 29 |         this.clickCount = clickCount;
 30 |         this.impressionCount = impressionCount;
 31 |     }
 32 | 
 33 |     public String getProposalId() {
 34 |         return proposalId;
 35 |     }
 36 | 
 37 |     public void setProposalId(String proposalId) {
 38 |         this.proposalId = proposalId;
 39 |     }
 40 | 
 41 |     public int getClickCount() {
 42 |         return clickCount;
 43 |     }
 44 | 
 45 |     public void setClickCount(int clicksCount) {
 46 |         this.clickCount = clicksCount;
 47 |     }
 48 | 
 49 |     public int getImpressionCount() {
 50 |         return impressionCount;
 51 |     }
 52 | 
 53 |     public void setImpressionCount(int impressionCount) {
 54 |         this.impressionCount = impressionCount;
 55 |     }
 56 | 
 57 |     public void incrementImpressionCount() {
 58 |         this.impressionCount++;
 59 |     }
 60 | 
 61 |     public void incrementClickCount() {
 62 |         this.clickCount++;
 63 |     }
 64 | 
 65 |     public void addImpressionCount(int impressionCount) {
 66 |         this.impressionCount += impressionCount;
 67 |     }
 68 | 
 69 |     public void addClickCount(int clickCount) {
 70 |         this.clickCount++;
 71 |     }
 72 | 
 73 |     public String getOpportunityId() {
 74 |         return opportunityId;
 75 |     }
 76 | 
 77 |     public void setOpportunityId(String opportunityId) {
 78 |         this.opportunityId = opportunityId;
 79 |     }
 80 | 
 81 |     @Override
 82 |     public String toString() {
 83 |         // Constructs CSV row using fields
 84 |         return proposalId + "," + opportunityId + "," + clickCount + "," + impressionCount;
 85 |     }
 86 | 
 87 |     @Override
 88 |     public int hashCode() {
 89 |         final int prime = 31;
 90 |         int result = 1;
 91 |         result = prime * result + clickCount;
 92 |         result = prime * result + impressionCount;
 93 |         result = prime * result
 94 |                 + ((opportunityId == null) ? 0 : opportunityId.hashCode());
 95 |         result = prime * result
 96 |                 + ((proposalId == null) ? 0 : proposalId.hashCode());
 97 |         return result;
 98 |     }
 99 | 
100 |     @Override
101 |     public boolean equals(Object obj) {
102 |         if (this == obj)
103 |             return true;
104 |         if (obj == null)
105 |             return false;
106 |         if (getClass() != obj.getClass())
107 |             return false;
108 |         AggregatedData other = (AggregatedData) obj;
109 |         if (clickCount != other.clickCount)
110 |             return false;
111 |         if (impressionCount != other.impressionCount)
112 |             return false;
113 |         if (opportunityId == null) {
114 |             if (other.opportunityId != null)
115 |                 return false;
116 |         } else if (!opportunityId.equals(other.opportunityId))
117 |             return false;
118 |         if (proposalId == null) {
119 |             if (other.proposalId != null)
120 |                 return false;
121 |         } else if (!proposalId.equals(other.proposalId))
122 |             return false;
123 |         return true;
124 |     }
125 | 
126 | 
127 | 
128 | }
129 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/model/SFConfig.java:
--------------------------------------------------------------------------------
  1 | package com.google.wave.prototype.dataflow.model;
  2 | 
  3 | import java.io.Serializable;
  4 | 
  5 | import org.apache.commons.lang3.StringUtils;
  6 | import org.slf4j.Logger;
  7 | import org.slf4j.LoggerFactory;
  8 | 
  9 | import com.google.appengine.repackaged.com.google.gson.Gson;
 10 | import com.google.appengine.repackaged.com.google.gson.GsonBuilder;
 11 | import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
 12 | import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
 13 | import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 14 | import com.google.wave.prototype.dataflow.util.FileUtil;
 15 | import com.google.wave.prototype.dataflow.util.SFConstants;
 16 | import com.sforce.soap.enterprise.EnterpriseConnection;
 17 | import com.sforce.soap.partner.Connector;
 18 | import com.sforce.soap.partner.PartnerConnection;
 19 | import com.sforce.ws.ConnectionException;
 20 | import com.sforce.ws.ConnectorConfig;
 21 | 
 22 | /**
 23 |  * Holds the configuration which will be used by SFSource
 24 |  * Fetches Salesforce user credentials by reading the configuration file present in GS or local
 25 |  * A config file will have the below content
 26 |  *				{
 27 |  *                	"userId": <salesforce_acccount_id>,
 28 |  *                  "password": <salesforce_account_password>
 29 |  *   			}
 30 |  */
 31 | @DefaultCoder(SerializableCoder.class)
 32 | public class SFConfig implements Serializable {
 33 |     private static final long serialVersionUID = -5569745252294105529L;
 34 | 
 35 |     private static final Logger LOG = LoggerFactory.getLogger(SFConfig.class);
 36 | 
 37 |     private String userId;
 38 |     private String password;
 39 | 
 40 |     public static SFConfig getInstance(String configFileLocation, PipelineOptions options) throws Exception {
 41 |         validate(configFileLocation);
 42 |         // Content will be in JSON
 43 |         // So constructing SFConfig bean using GSON
 44 |         String json = FileUtil.getContent(configFileLocation, options);
 45 |         Gson gson = new GsonBuilder().create();
 46 |         // Unmarshalling file content into SFConfig
 47 |         return gson.fromJson(json, SFConfig.class);
 48 |     }
 49 | 
 50 |     public String getUserId() {
 51 |         return userId;
 52 |     }
 53 | 
 54 |     public String getPassword() {
 55 |         return password;
 56 |     }
 57 | 
 58 |     public PartnerConnection createPartnerConnection() throws Exception {
 59 |         ConnectorConfig config = new ConnectorConfig();
 60 |         LOG.debug("Connecting SF Partner Connection using " + getUserId());
 61 |         config.setUsername(getUserId());
 62 |         config.setPassword(getPassword());
 63 | 
 64 |         try {
 65 |             return Connector.newConnection(config);
 66 |         } catch (ConnectionException ce) {
 67 |             LOG.error("Exception while creating connection", ce);
 68 |             throw new Exception(ce);
 69 |         }
 70 |     }
 71 | 
 72 |     public EnterpriseConnection createEnterpriseConnection() throws Exception {
 73 |         ConnectorConfig config = new ConnectorConfig();
 74 |         LOG.debug("Connecting SF Partner Connection using " + getUserId());
 75 |         config.setUsername(getUserId());
 76 |         config.setPassword(getPassword());
 77 | 
 78 |         try {
 79 |             return com.sforce.soap.enterprise.Connector.newConnection(config);
 80 |         } catch (ConnectionException ce) {
 81 |             LOG.error("Exception while creating connection", ce);
 82 |             throw new Exception(ce);
 83 |         }
 84 |     }
 85 | 
 86 |     private static void validate(String configFileLocation) throws Exception {
 87 |         // Checking whether the file is provided in proper format
 88 |         // GS file should start with gs://
 89 |         // local file should start with file://
 90 |         if (!StringUtils.isEmpty(configFileLocation)) {
 91 |             if (configFileLocation.startsWith(SFConstants.GS_FILE_PREFIX) ||
 92 |                     configFileLocation.startsWith(SFConstants.LOCAL_FILE_PREFIX)) {
 93 |                 return;
 94 |             }
 95 |         }
 96 | 
 97 |         // Provided configFileLocation is not valid
 98 |         // Stopping the Job
 99 |         throw new Exception("Invalid Configuration file " + configFileLocation);
100 |     }
101 | 
102 | }
103 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/model/SFReferenceData.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.model;
 2 | 
 3 | import java.io.Serializable;
 4 | 
 5 | /**
 6 |  * POJO containing Salesforce reference data
 7 |  */
 8 | public class SFReferenceData implements Serializable {
 9 |     private static final long serialVersionUID = -7597520654419284165L;
10 | 
11 |     private String accountId;
12 |     private String opportunityId;
13 |     private String proposalId;
14 | 
15 |     public SFReferenceData(String accountId, String opportunityId,
16 |             String proposalId) {
17 |         super();
18 |         this.accountId = accountId;
19 |         this.opportunityId = opportunityId;
20 |         this.proposalId = proposalId;
21 |     }
22 | 
23 |     public String getAccountId() {
24 |         return accountId;
25 |     }
26 | 
27 |     public void setAccountId(String accountId) {
28 |         this.accountId = accountId;
29 |     }
30 | 
31 |     public String getOpportunityId() {
32 |         return opportunityId;
33 |     }
34 | 
35 |     public void setOpportunityId(String opportunityId) {
36 |         this.opportunityId = opportunityId;
37 |     }
38 | 
39 |     public String getProposalId() {
40 |         return proposalId;
41 |     }
42 | 
43 |     public void setProposalId(String proposalId) {
44 |         this.proposalId = proposalId;
45 |     }
46 | 
47 |     @Override
48 |     public String toString() {
49 |         return accountId + ","+ opportunityId + "," + proposalId;
50 |     }
51 | 
52 |     @Override
53 |     public int hashCode() {
54 |         final int prime = 31;
55 |         int result = 1;
56 |         result = prime * result
57 |                 + ((accountId == null) ? 0 : accountId.hashCode());
58 |         result = prime * result
59 |                 + ((opportunityId == null) ? 0 : opportunityId.hashCode());
60 |         result = prime * result
61 |                 + ((proposalId == null) ? 0 : proposalId.hashCode());
62 |         return result;
63 |     }
64 | 
65 |     @Override
66 |     public boolean equals(Object obj) {
67 |         if (this == obj)
68 |             return true;
69 |         if (obj == null)
70 |             return false;
71 |         if (getClass() != obj.getClass())
72 |             return false;
73 |         SFReferenceData other = (SFReferenceData) obj;
74 |         if (accountId == null) {
75 |             if (other.accountId != null)
76 |                 return false;
77 |         } else if (!accountId.equals(other.accountId))
78 |             return false;
79 |         if (opportunityId == null) {
80 |             if (other.opportunityId != null)
81 |                 return false;
82 |         } else if (!opportunityId.equals(other.opportunityId))
83 |             return false;
84 |         if (proposalId == null) {
85 |             if (other.proposalId != null)
86 |                 return false;
87 |         } else if (!proposalId.equals(other.proposalId))
88 |             return false;
89 |         return true;
90 |     }
91 | 
92 | }
93 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/model/SFWaveWriteResult.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.model;
 2 | 
 3 | import java.io.Serializable;
 4 | 
 5 | /**
 6 |  * WriteResult class
 7 |  * This just holds the Salesforce object Id of the persisted data
 8 |  */
 9 | public class SFWaveWriteResult implements Serializable {
10 |     private static final long serialVersionUID = -7451739773848100070L;
11 | 
12 |     private String sfObjId;
13 | 
14 |     public SFWaveWriteResult(String sfObjId) {
15 |         this.sfObjId = sfObjId;
16 |     }
17 | 
18 |     public String getSfObjId() {
19 |         return sfObjId;
20 |     }
21 | 
22 |     @Override
23 |     public int hashCode() {
24 |         final int prime = 31;
25 |         int result = 1;
26 |         result = prime * result + ((sfObjId == null) ? 0 : sfObjId.hashCode());
27 |         return result;
28 |     }
29 | 
30 |     @Override
31 |     public boolean equals(Object obj) {
32 |         if (this == obj)
33 |             return true;
34 |         if (obj == null)
35 |             return false;
36 |         if (getClass() != obj.getClass())
37 |             return false;
38 |         SFWaveWriteResult other = (SFWaveWriteResult) obj;
39 |         if (sfObjId == null) {
40 |             if (other.sfObjId != null)
41 |                 return false;
42 |         } else if (!sfObjId.equals(other.sfObjId))
43 |             return false;
44 |         return true;
45 |     }
46 | 
47 | 
48 | }


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/pipeline/AdDataJob.java:
--------------------------------------------------------------------------------
  1 | package com.google.wave.prototype.dataflow.pipeline;
  2 | 
  3 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_CLICKS;
  4 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_IMPRESSIONS;
  5 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_OPPORTUNITY_ID;
  6 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_PROPOSAL_ID;
  7 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_TYPE_INTEGER;
  8 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_TYPE_STRING;
  9 | 
 10 | import java.util.ArrayList;
 11 | import java.util.List;
 12 | 
 13 | import com.google.api.services.bigquery.model.TableFieldSchema;
 14 | import com.google.api.services.bigquery.model.TableRow;
 15 | import com.google.api.services.bigquery.model.TableSchema;
 16 | import com.google.cloud.dataflow.sdk.Pipeline;
 17 | import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 18 | import com.google.cloud.dataflow.sdk.io.TextIO;
 19 | import com.google.cloud.dataflow.sdk.options.Default;
 20 | import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 21 | import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 22 | import com.google.cloud.dataflow.sdk.options.Validation;
 23 | import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
 24 | import com.google.cloud.dataflow.sdk.transforms.ParDo;
 25 | import com.google.cloud.dataflow.sdk.transforms.View;
 26 | import com.google.cloud.dataflow.sdk.values.PCollection;
 27 | import com.google.cloud.dataflow.sdk.values.PCollectionView;
 28 | import com.google.wave.prototype.dataflow.coder.AggregateDataCoder;
 29 | import com.google.wave.prototype.dataflow.function.AggregateDataEnricher;
 30 | import com.google.wave.prototype.dataflow.function.CSVFormatter;
 31 | import com.google.wave.prototype.dataflow.function.TableRowFormatter;
 32 | import com.google.wave.prototype.dataflow.model.AggregatedData;
 33 | import com.google.wave.prototype.dataflow.model.SFConfig;
 34 | import com.google.wave.prototype.dataflow.sf.SFWaveDatasetWriter;
 35 | import com.google.wave.prototype.dataflow.transform.AggregateEvents;
 36 | import com.google.wave.prototype.dataflow.transform.SFWaveWrite;
 37 | 
 38 | /**
 39 |  * Google Dataflow Job
 40 |  * 1. Reads the raw Ad Data from Google cloud storage
 41 |  * 2. Reads Salesforce Reference data from Google BigQuery
 42 |  * 3. Enrich Ad Data using Salesforce Reference data
 43 |  * 4. Publish the Enriched data into Salesforce Wave and Google BigQuery
 44 |  * To execute, provide the following configuration
 45 |  * 		--project=YOUR_PROJECT_ID
 46 |  * 		--stagingLocation=YOUR_STAGING_LOCATON
 47 |  * 		--inputCSV=GCS_LOCATION_OF_YOUR_RAW_AD_DATA
 48 |  * 		--inputTable=GOOGLE_BIGQUERY_TABLE_CONTAINING_SALESFORCE_REFERENCE_DATA
 49 |  * 		--output=GOOGLE_BIGQUERY_TABLE_TO_WHICH_ENRICHED_DATA_HAS_TO_BE_ADDED
 50 |  * 		--dataset=SALESFORCE WAVE DATASET
 51 |  * 		--sfMetadataFileLocation=GCS_LOCATION_OF_SALESFORCE_METADATA_FILE
 52 |  * 		--sfConfigFileLocation=GCS_LOCATION_OF_SALESFORCE_CONFIG_FILE
 53 |  */
 54 | public class AdDataJob {
 55 |     public static interface Options extends PipelineOptions {
 56 |         @Default.String("gs://sam-bucket1/SampleAdData/ad-server-data1.csv")
 57 |         String getInputCSV();
 58 |         void setInputCSV(String value);
 59 | 
 60 |         @Default.String("ace-scarab-94723:SFDCReferenceData.SFRef")
 61 |         String getInputTable();
 62 |         void setInputTable(String value);
 63 | 
 64 |         @Validation.Required
 65 |         @Default.String("ace-scarab-94723:SFDCReferenceData.EnrichedSample")
 66 |         String getOutput();
 67 |         void setOutput(String value);
 68 | 
 69 |         @Default.String("SampleAdDataSet")
 70 |         String getDataset();
 71 |         void setDataset(String dataset);
 72 | 
 73 |         @Default.String("gs://sam-bucket1/SampleAdData/metadata.json")
 74 |         String getSfMetadataFileLocation();
 75 |         void setSfMetadataFileLocation(String sfMetadataFileLocation);
 76 | 
 77 |         @Default.String("gs://sam-bucket1/config/sf_source_config.json")
 78 |         String getSfConfigFileLocation();
 79 |         void setSfConfigFileLocation(String sfConfigFileLocation);
 80 |     }
 81 | 
 82 |     private static TableSchema getSchema() {
 83 |         List<TableFieldSchema> fields = new ArrayList<>();
 84 |         fields.add(constructTableFieldSchema(COL_PROPOSAL_ID, COL_TYPE_STRING));
 85 |         fields.add(constructTableFieldSchema(COL_OPPORTUNITY_ID, COL_TYPE_STRING));
 86 |         fields.add(constructTableFieldSchema(COL_CLICKS, COL_TYPE_INTEGER));
 87 |         fields.add(constructTableFieldSchema(COL_IMPRESSIONS, COL_TYPE_INTEGER));
 88 | 
 89 |         TableSchema tableSchema = new TableSchema().setFields(fields);
 90 |         tableSchema.setFields(fields);
 91 |         return tableSchema;
 92 |     }
 93 | 
 94 |     private static TableFieldSchema constructTableFieldSchema(String name, String type) {
 95 |         TableFieldSchema tableFieldSchema = new TableFieldSchema();
 96 |         tableFieldSchema.setName(name);
 97 |         tableFieldSchema.setType(type);
 98 | 
 99 |         return tableFieldSchema;
100 |     }
101 | 
102 |     private static List<String> getEnrichedTableColumns() {
103 |         List<String> columns = new ArrayList<String>(4);
104 | 
105 |         columns.add(COL_PROPOSAL_ID);
106 |         columns.add(COL_OPPORTUNITY_ID);
107 |         columns.add(COL_CLICKS);
108 |         columns.add(COL_IMPRESSIONS);
109 | 
110 |         return columns;
111 |     }
112 | 
113 |     private static SFWaveDatasetWriter createSFWaveDatasetWriter(AdDataJob.Options options) throws Exception {
114 |         SFConfig sfConfig = SFConfig.getInstance(options.getSfConfigFileLocation(), options);
115 |         return new SFWaveDatasetWriter(sfConfig, options.getDataset());
116 |     }
117 | 
118 |     public static void main(String[] args) throws Exception {
119 |         // Helper if command line options are not provided
120 |         if (args.length < 2) {
121 |             args = new String[2];
122 |             args[0] = "--project=ace-scarab-94723";
123 |             args[1] = "--stagingLocation=gs://sam-bucket1/staging";
124 |         }
125 | 
126 |         Options options = PipelineOptionsFactory.fromArgs(args)
127 |                 .withValidation().as(Options.class);
128 |         // Always executing using BlockingDataflowPipelineRunner
129 |         options.setRunner(BlockingDataflowPipelineRunner.class);
130 |         Pipeline p = Pipeline.create(options);
131 | 
132 |         // Reading the CSV present in GCS
133 |         PCollection<AggregatedData> aggregated = p.apply(TextIO.Read.from(options.getInputCSV()))
134 |                 .apply(new AggregateEvents())
135 |                 .setCoder(AggregateDataCoder.getInstance());
136 | 
137 |         // Reading Salesforce reference data from Google BigQuery
138 |         PCollection<TableRow> tableColl = p.apply(BigQueryIO.Read.from(options.getInputTable()));
139 |         final PCollectionView<Iterable<TableRow>> sideInput = tableColl.apply(View.<TableRow>asIterable());
140 |         // Salesforce Reference data passed as sideInput
141 |         PCollection<AggregatedData> enriched = aggregated
142 |                 .apply(ParDo.withSideInputs(sideInput)
143 |                 .of((new AggregateDataEnricher(sideInput))))
144 |                 .setCoder(AggregateDataCoder.getInstance());
145 | 
146 |         // Converting into CSV
147 |         PCollection<String> enrichedCSV = enriched.apply(ParDo.of(new CSVFormatter()));
148 |         // Writing the results into Salesforce Wave
149 |         enrichedCSV
150 |                 .apply(new SFWaveWrite(createSFWaveDatasetWriter(options), options.getSfMetadataFileLocation()));
151 | 
152 |         // Populated BigQuery with enriched data
153 |         enrichedCSV
154 |                 .apply(ParDo.of(new TableRowFormatter(getEnrichedTableColumns())))
155 |                 .apply(BigQueryIO.Write
156 |                     .to(options.getOutput())
157 |                     .withSchema(getSchema())
158 |                     .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
159 |                     .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND));
160 |         p.run();
161 |     }
162 | 
163 | }
164 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/pipeline/SFReferenceDataJob.java:
--------------------------------------------------------------------------------
  1 | package com.google.wave.prototype.dataflow.pipeline;
  2 | 
  3 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_ACCOUNT_ID;
  4 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_OPPORTUNITY_ID;
  5 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_PROPOSAL_ID;
  6 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_TYPE_STRING;
  7 | 
  8 | import java.util.ArrayList;
  9 | import java.util.List;
 10 | 
 11 | import com.google.api.services.bigquery.model.TableFieldSchema;
 12 | import com.google.api.services.bigquery.model.TableSchema;
 13 | import com.google.cloud.dataflow.sdk.Pipeline;
 14 | import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 15 | import com.google.cloud.dataflow.sdk.options.Default;
 16 | import com.google.cloud.dataflow.sdk.options.Description;
 17 | import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 18 | import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 19 | import com.google.cloud.dataflow.sdk.options.Validation;
 20 | import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
 21 | import com.google.cloud.dataflow.sdk.transforms.Create;
 22 | import com.google.cloud.dataflow.sdk.transforms.ParDo;
 23 | import com.google.wave.prototype.dataflow.function.TableRowFormatter;
 24 | import com.google.wave.prototype.dataflow.model.SFConfig;
 25 | import com.google.wave.prototype.dataflow.sf.SFSOQLExecutor;
 26 | import com.google.wave.prototype.dataflow.transform.SFRead;
 27 | 
 28 | /**
 29 |  * Google Dataflow Job
 30 |  * 1. Read Salesforce Reference Data using {@link SFRead}
 31 |  * 2. Populate Google BigQuery Table with Salesforce Reference Data
 32 |  * To execute, provide the following configuration
 33 |  *      --project=YOUR_PROJECT_ID
 34 |  *      --stagingLocation=YOUR_STAGING_LOCATON
 35 |  *      --output=GOOGLE_BIGQUERY_TABLE_TO_WHICH_SALESFORCE_REFERENCE_DATA_WILL_BE_POPULATED
 36 |  *      --sfConfigFileLocation=GCS_LOCATION_OF_SALESFORCE_CONFIG_FILE
 37 |  *      --sfQuery=SALESFORCE_SOQL_TO_FETCH_SALESFORCE_REFERENCE_DATA
 38 |  */
 39 | public class SFReferenceDataJob {
 40 | 
 41 |     private static interface Options extends PipelineOptions {
 42 |         @Description("BigQuery table to write to, specified as "
 43 |                 + "<project_id>:<dataset_id>.<table_id>. The dataset must already exist.")
 44 |         @Validation.Required
 45 |         String getOutput();
 46 |         void setOutput(String value);
 47 | 
 48 |         @Default.String("gs://sam-bucket1/config/sf_source_config.json")
 49 |         String getSfConfigFileLocation();
 50 |         void setSfConfigFileLocation(String sfConfigFileLocation);
 51 | 
 52 |         @Default.String("SELECT AccountId, Id, ProposalID__c FROM Opportunity where ProposalID__c != null")
 53 |         String getSfQuery();
 54 |         void setSfQuery(String sfQuery);
 55 |     }
 56 | 
 57 |     private static TableSchema getSchema() {
 58 |         List<TableFieldSchema> fields = new ArrayList<>();
 59 | 
 60 |         fields.add(constructTableFieldSchema(COL_ACCOUNT_ID, COL_TYPE_STRING));
 61 |         fields.add(constructTableFieldSchema(COL_OPPORTUNITY_ID, COL_TYPE_STRING));
 62 |         fields.add(constructTableFieldSchema(COL_PROPOSAL_ID, COL_TYPE_STRING));
 63 | 
 64 |         TableSchema schema = new TableSchema().setFields(fields);
 65 |         return schema;
 66 |     }
 67 | 
 68 |     private static TableFieldSchema constructTableFieldSchema(String name, String type) {
 69 |         TableFieldSchema tableFieldSchema = new TableFieldSchema();
 70 | 
 71 |         tableFieldSchema.setName(name);
 72 |         tableFieldSchema.setType(type);
 73 | 
 74 |         return tableFieldSchema;
 75 |     }
 76 | 
 77 |     private static List<String> getSFRefTableColumns() {
 78 |         List<String> columns = new ArrayList<String>(4);
 79 | 
 80 |         columns.add(COL_ACCOUNT_ID);
 81 |         columns.add(COL_OPPORTUNITY_ID);
 82 |         columns.add(COL_PROPOSAL_ID);
 83 | 
 84 |         return columns;
 85 |     }
 86 | 
 87 |     public static void main(String args[]) throws Exception {
 88 |         if (args.length < 3) {
 89 |             args = new String[3];
 90 |             args[0] = "--project=ace-scarab-94723";
 91 |             args[1] = "--stagingLocation=gs://sam-bucket1/staging";
 92 |             args[2] = "--output=ace-scarab-94723:SFDCReferenceData.SFRef";
 93 |         }
 94 | 
 95 |         Options options = PipelineOptionsFactory.fromArgs(args)
 96 |                 .withValidation().as(Options.class);
 97 |         options.setRunner(BlockingDataflowPipelineRunner.class);
 98 |         Pipeline p = Pipeline.create(options);
 99 | 
100 |         // SFSOQLExecutor which will be used to execute SOQL query
101 |         // SFConfig which will be used to create Salesforce Connection
102 |         SFSOQLExecutor soqlExecutor = new SFSOQLExecutor(SFConfig.getInstance(options.getSfConfigFileLocation(), options));
103 | 
104 |         // Executing pipeline
105 |         p.apply(Create.of(options.getSfQuery()))
106 |                 // Reading from Salesforce
107 |                 .apply(new SFRead(soqlExecutor))
108 |                 // Convert to TableRow
109 |                 .apply(ParDo.of(new TableRowFormatter(getSFRefTableColumns())))
110 |                 // Wiring into BigQuery
111 |                 .apply(BigQueryIO.Write
112 |                         .to(options.getOutput())
113 |                         .withSchema(getSchema())
114 |                         .withCreateDisposition(
115 |                                 BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
116 |                         .withWriteDisposition(
117 |                         // Since all data are fetched from Salesforce,
118 |                         // we need to overwrite the existing data
119 |                                 BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
120 |         p.run();
121 |     }
122 | 
123 | }
124 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/sf/SFSOQLExecutor.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.sf;
 2 | 
 3 | import java.io.Serializable;
 4 | import java.util.ArrayList;
 5 | import java.util.Arrays;
 6 | import java.util.List;
 7 | 
 8 | import org.slf4j.Logger;
 9 | import org.slf4j.LoggerFactory;
10 | 
11 | import com.google.wave.prototype.dataflow.model.SFConfig;
12 | import com.google.wave.prototype.dataflow.transform.SFRead;
13 | import com.sforce.soap.enterprise.EnterpriseConnection;
14 | import com.sforce.soap.enterprise.QueryResult;
15 | import com.sforce.soap.enterprise.sobject.SObject;
16 | 
17 | /**
18 |  * Can be used to exeucte a SF SOQL Query
19 |  * It will be executed using the credentials provided in {@link SFConfig}
20 |  */
21 | public class SFSOQLExecutor implements Serializable {
22 |     private static final long serialVersionUID = 296485933905679924L;
23 | 
24 |     private static final Logger LOG = LoggerFactory.getLogger(SFRead.class);
25 | 
26 |     private SFConfig sfConfig;
27 | 
28 |     public SFSOQLExecutor(SFConfig sfConfig) {
29 |         this.sfConfig = sfConfig;
30 |     }
31 | 
32 |     public List<SObject> executeQuery(String sfQuery) throws Exception {
33 |         EnterpriseConnection connection = null;
34 |         List<SObject> records = new ArrayList<SObject>();
35 | 
36 |         try {
37 |             connection = sfConfig.createEnterpriseConnection();
38 | 
39 |             QueryResult result = connection.query(sfQuery);
40 |             // First call results are added here
41 |             records.addAll(Arrays.asList(result.getRecords()));
42 |             String queryLocator = result.getQueryLocator();
43 |             LOG.info("Total number of records to be read :" + result.getSize());
44 | 
45 |             // Salesforce will not return all the rows in a single shot if the result is huge
46 |             // By default it will return 500 rows per call
47 |             // To fetch further connection.queryMore is used
48 |             // result.isDone() will tell you where all the records have been read
49 |             boolean done = result.isDone();
50 |             while (!done) {
51 |                 result = connection.queryMore(queryLocator);
52 |                 records.addAll(Arrays.asList(result.getRecords()));
53 | 
54 |                 done = result.isDone();
55 |             }
56 |         } finally {
57 |             if (connection != null) {
58 |                 connection.logout();
59 |             }
60 |         }
61 | 
62 |         return records;
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/sf/SFWaveDatasetWriter.java:
--------------------------------------------------------------------------------
  1 | package com.google.wave.prototype.dataflow.sf;
  2 | 
  3 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_ACTION;
  4 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_ACTION_NONE;
  5 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_ACTION_PROCESS;
  6 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_CSV_FORMAT;
  7 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_DATAFILE;
  8 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_EDGEMART_ALIAS;
  9 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_FORMAT;
 10 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_INSIGHTS_EXTERNAL_DATA;
 11 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_INSIGHTS_EXTERNAL_DATA_ID;
 12 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_INSIGHTS_EXTERNAL_DATA_PART;
 13 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_METADATA_JSON;
 14 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_OPERATION;
 15 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_OVERWRITE_OPERATION;
 16 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_PART_NUMBER;
 17 | 
 18 | import java.io.Serializable;
 19 | 
 20 | import org.slf4j.Logger;
 21 | import org.slf4j.LoggerFactory;
 22 | 
 23 | import com.google.wave.prototype.dataflow.model.SFConfig;
 24 | import com.sforce.soap.partner.Error;
 25 | import com.sforce.soap.partner.PartnerConnection;
 26 | import com.sforce.soap.partner.SaveResult;
 27 | import com.sforce.soap.partner.sobject.SObject;
 28 | 
 29 | /**
 30 |  * This can be used to write metadata and datasetData into SF Wave
 31 |  * 1. It creates connection using {@link SFConfig}
 32 |  * 2. Writes specified Metadata
 33 |  * 3. Writes Dataset data
 34 |  * 4. Finalize the write
 35 |  * This uses Salesforce SOAP API (Partner WSDL)
 36 |  */
 37 | public class SFWaveDatasetWriter implements Serializable {
 38 |     private static final long serialVersionUID = 5714980864384207026L;
 39 | 
 40 |     private static final Logger LOG = LoggerFactory.getLogger(SFWaveDatasetWriter.class);
 41 | 
 42 |     private SFConfig sfConfig;
 43 |     private String datasetName;
 44 | 
 45 |     public SFWaveDatasetWriter(SFConfig sfConfig, String datasetName) {
 46 |         this.sfConfig = sfConfig;
 47 |         this.datasetName = datasetName;
 48 |     }
 49 | 
 50 |     public String write(byte[] metadata, byte[] datasetData) throws Exception {
 51 |         PartnerConnection connection = null;
 52 |         try {
 53 |             connection = sfConfig.createPartnerConnection();
 54 |             String parentId = publishMetaData(metadata, connection);
 55 |             publish(datasetData, parentId, connection);
 56 |             finalizeWavePublish(parentId, connection);
 57 | 
 58 |             return parentId;
 59 |         } finally {
 60 |             if (connection != null) {
 61 |                 connection.logout();
 62 |             }
 63 |         }
 64 |     }
 65 | 
 66 |     private void publish(byte[] content, String parentId, PartnerConnection connection) throws Exception {
 67 |         // Contents are being pushed here
 68 |         SObject dataSObject = new SObject();
 69 |         dataSObject.setType(STR_INSIGHTS_EXTERNAL_DATA_PART);
 70 |         dataSObject.setField(STR_DATAFILE, content);
 71 |         LOG.trace("Writing this data into WAVE : " + new String(content));
 72 |         dataSObject.setField(STR_INSIGHTS_EXTERNAL_DATA_ID, parentId);
 73 |         // Since the each bundle is max of 10 MB we will have only one part
 74 |         // Hence part number is always set to 1
 75 |         dataSObject.setField(STR_PART_NUMBER, 1);
 76 | 
 77 |         SaveResult[] dataPartPublishResults = connection.create(new SObject[] { dataSObject });
 78 |         checkResults(dataPartPublishResults);
 79 |     }
 80 | 
 81 | 
 82 |     private void finalizeWavePublish(String parentId, PartnerConnection connection) throws Exception {
 83 |         SObject metaDataSObject = new SObject();
 84 |         metaDataSObject.setType(STR_INSIGHTS_EXTERNAL_DATA);
 85 |         // Action set to process, which should finalize the DataPart published so on
 86 |         metaDataSObject.setField(STR_ACTION, STR_ACTION_PROCESS);
 87 |         // Using the Object Id during metadata publish
 88 |         metaDataSObject.setId(parentId);
 89 | 
 90 |         SaveResult[] metadataPublishResults = connection.update(new SObject[] {metaDataSObject});
 91 |         checkResults(metadataPublishResults);
 92 |     }
 93 | 
 94 |     private String publishMetaData(byte[] metadata, PartnerConnection connection) throws Exception {
 95 |         // Metadata of a dataset is being published here
 96 |         SObject metadataSObject = new SObject();
 97 |         metadataSObject.setType(STR_INSIGHTS_EXTERNAL_DATA);
 98 |         metadataSObject.setField(STR_FORMAT, STR_CSV_FORMAT);
 99 |         metadataSObject.setField(STR_EDGEMART_ALIAS, datasetName);
100 |         metadataSObject.setField(STR_METADATA_JSON, metadata);
101 |         metadataSObject.setField(STR_OPERATION, STR_OVERWRITE_OPERATION);
102 |         // Action is None here. It will be Process only after all data part has been created
103 |         metadataSObject.setField(STR_ACTION, STR_ACTION_NONE);
104 | 
105 |         SaveResult[] metadataPublishResults = connection.create(new SObject[] { metadataSObject });
106 |         return checkResults(metadataPublishResults);
107 |     }
108 | 
109 |     private String checkResults(SaveResult[] publishResults) throws Exception {
110 |         for (SaveResult publishResult : publishResults) {
111 |             if (publishResult.isSuccess()) {
112 |                 LOG.debug("Flushed to wave : " + publishResult.getId());
113 |                 return publishResult.getId();
114 |             } else {
115 |                 StringBuilder sfWaveErrMsg = new StringBuilder();
116 |                 sfWaveErrMsg.append("Error while flushing data to wave.\n");
117 |                 sfWaveErrMsg.append("Salesforce Job Id : " + publishResult.getId() + "\n");
118 |                 sfWaveErrMsg.append("Salesforce error message : ");
119 |                 // Errors are concatenated to get a meaning message
120 |                 Error[] errors = publishResult.getErrors();
121 |                 for (int i = 0; i < errors.length; i++) {
122 |                     sfWaveErrMsg.append(errors[i].getMessage());
123 |                 }
124 | 
125 |                 LOG.error(sfWaveErrMsg.toString());
126 | 
127 |                 // Stopping Job if publish fails
128 |                 throw new Exception(sfWaveErrMsg.toString());
129 |             }
130 |         }
131 | 
132 |         return null;
133 |     }
134 | }
135 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/transform/AggregateEvents.java:
--------------------------------------------------------------------------------
  1 | package com.google.wave.prototype.dataflow.transform;
  2 | 
  3 | import com.google.cloud.dataflow.sdk.transforms.DoFn;
  4 | import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
  5 | import com.google.cloud.dataflow.sdk.transforms.PTransform;
  6 | import com.google.cloud.dataflow.sdk.transforms.ParDo;
  7 | import com.google.cloud.dataflow.sdk.values.KV;
  8 | import com.google.cloud.dataflow.sdk.values.PCollection;
  9 | import com.google.wave.prototype.dataflow.model.AggregatedData;
 10 | import com.google.wave.prototype.dataflow.util.JobConstants;
 11 | 
 12 | /**
 13 |  * Aggregate the AdData using the proposalId and event present in AdData CSV
 14 |  * AdData CSV data is with the below headers,
 15 |  *     id,time,local_host,pixel_id,client_ip,request_url,cookie_id,event,version,success_code,proposal_id
 16 |  * In this event will be either click or Impression. There will be multiple rows with a single proposal_id
 17 |  * This PTransform will transform such rows into {@link AggregateEvents}
 18 |  */
 19 | public class AggregateEvents extends
 20 |         PTransform<PCollection<String>, PCollection<AggregatedData>> {
 21 |     private static final long serialVersionUID = 3238291110118750209L;
 22 | 
 23 |     @Override
 24 |     public PCollection<AggregatedData> apply(PCollection<String> rawdata) {
 25 |         // Just selecting ProposalId and events
 26 |         PCollection<KV<String, String>> filteredData = rawdata.apply(ParDo
 27 |                 .of(new FilterRawData()));
 28 |         // Grouping all events for a proposalId
 29 |         PCollection<KV<String, Iterable<String>>> groupedData = filteredData
 30 |                 .apply(GroupByKey.<String, String> create());
 31 |         // Counting the number of clicks and impressions for a proposalId
 32 |         return groupedData.apply(ParDo.of(new CountEvents()));
 33 |     }
 34 | 
 35 |     /**
 36 |      * Construct KV with proposalId as key and event as value for a given CSV Row (AdData)
 37 |      * CSV Row will be the input for this DoFn
 38 |      * Output will be a KV with proposal_id in the row as key and event in the row as value
 39 |      * For example, for the below input
 40 |      *     1,01-01-14 9:00,ip-10-150-38-122/10.150.38.122,0,70.209.198.223,http://sample.com,3232,Impression,3,1,101
 41 |      * output will be
 42 |      *     KV.of(101, Impression)
 43 |      */
 44 |     protected static class FilterRawData extends DoFn<String, KV<String, String>> {
 45 |         private static final long serialVersionUID = 6002612407682561915L;
 46 |         private static int COL_PROPOSAL_ID = 10;
 47 |         private static int COL_EVENT = 7;
 48 | 
 49 |         @Override
 50 |         public void processElement(
 51 |                 DoFn<String, KV<String, String>>.ProcessContext c)
 52 |                 throws Exception {
 53 |             // CSVRow will be like
 54 |             // id,time,local_host,pixel_id,client_ip,request_url,cookie_id,event,version,success_code,proposal_id
 55 |             // Column 7 and 10. i.e. event and proposal_id
 56 |             String csvRow = c.element();
 57 |             String[] columns = csvRow.split(JobConstants.STR_COMMA);
 58 |             // Result will be KV with proposal_id as key and event as value
 59 |             c.output(KV.of(columns[COL_PROPOSAL_ID], columns[COL_EVENT]));
 60 |         }
 61 | 
 62 |     }
 63 | 
 64 |     /**
 65 |      * Count the number of clicks and number of Impressions for a specific ProposalId
 66 |      * Input for this DoFn will be KV with key as proposalId and value as events. Like,
 67 |      *     KV(101, ("Impression", "Impression", "Click")
 68 |      * Output will be {@link AggregateEvents} with the proposalId and number of clicks and Impressions
 69 |      */
 70 |     public static class CountEvents extends
 71 |             DoFn<KV<String, Iterable<String>>, AggregatedData> {
 72 |         private static final long serialVersionUID = 6002612407682561915L;
 73 |         private static final String STR_IMPRESSION = "impression";
 74 |         private static final String STR_CLICK = "click";
 75 | 
 76 |         @Override
 77 |         public void processElement(
 78 |                 DoFn<KV<String, Iterable<String>>, AggregatedData>.ProcessContext c)
 79 |                 throws Exception {
 80 |             // Element will be like,
 81 |             // KV(101, ("Impression", "Impression", "Click")
 82 |             KV<String, Iterable<String>> proposalIdEventsKV = c.element();
 83 |             // Getting the events alone
 84 |             // ("Impression", "Impression", "Click")
 85 |             Iterable<String> events = proposalIdEventsKV.getValue();
 86 |             int clicks = 0;
 87 |             int impressions = 0;
 88 |             // Iterating events and increasing the click and impression count
 89 |             for (String event : events) {
 90 |                 if (event.equalsIgnoreCase(STR_IMPRESSION)) {
 91 |                     impressions++;
 92 |                 } else if (event.equalsIgnoreCase(STR_CLICK)) {
 93 |                     clicks++;
 94 |                 }
 95 |             }
 96 | 
 97 |             // Constructing new AggregatedData with proposalId, Click Count and Impression Count
 98 |             c.output(new AggregatedData(proposalIdEventsKV.getKey(), clicks, impressions));
 99 |         }
100 |     }
101 | }


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/transform/SFRead.java:
--------------------------------------------------------------------------------
  1 | package com.google.wave.prototype.dataflow.transform;
  2 | 
  3 | import java.util.List;
  4 | 
  5 | import com.google.cloud.dataflow.sdk.transforms.DoFn;
  6 | import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
  7 | import com.google.cloud.dataflow.sdk.transforms.PTransform;
  8 | import com.google.cloud.dataflow.sdk.transforms.ParDo;
  9 | import com.google.cloud.dataflow.sdk.values.KV;
 10 | import com.google.cloud.dataflow.sdk.values.PCollection;
 11 | import com.google.wave.prototype.dataflow.sf.SFSOQLExecutor;
 12 | import com.google.wave.prototype.dataflow.util.CSVUtil;
 13 | import com.sforce.soap.enterprise.sobject.SObject;
 14 | 
 15 | /**
 16 |  * PTransform to read the Salesforce object using SOQL
 17 |  * SOQL query present in pipeline will be executed and the result will be converted into CSV
 18 |  * This uses Salesforce SOAP API (Enterprise.wsdl) to execute SOQL
 19 |  * A Sample SOQL will look like,
 20 |  *		SELECT AccountId, Id FROM Opportunity
 21 |  */
 22 | public final class SFRead extends PTransform<PCollection<String>, PCollection<String>>{
 23 |     private static final long serialVersionUID = -7168554842895484301L;
 24 | 
 25 |     private final int noOfBundles;
 26 |     private final SFSOQLExecutor soqlExecutor;
 27 | 
 28 |     public SFRead(SFSOQLExecutor soqlExecutor) {
 29 |         // Default to 10
 30 |         this.noOfBundles = 10;
 31 |         this.soqlExecutor = soqlExecutor;
 32 |     }
 33 | 
 34 |     public SFRead(SFSOQLExecutor soqlExecutor, int noOfBundles) {
 35 |         this.noOfBundles = noOfBundles;
 36 |         this.soqlExecutor = soqlExecutor;
 37 |     }
 38 | 
 39 |     @Override
 40 |     public PCollection<String> apply(PCollection<String> input) {
 41 |         return input
 42 |             // Executing SOQL Query
 43 |             .apply(ParDo.of(new ExecuteSOQL(soqlExecutor, noOfBundles)))
 44 |             // Creating bundles based on the key
 45 |             // Key will be hash modulo
 46 |             .apply(GroupByKey.<Integer, String>create())
 47 |             .apply(ParDo.of(new RegroupRecords()));
 48 |     }
 49 | 
 50 |     /**
 51 |      * Splitting the grouped data as individual records
 52 |      */
 53 |     private class RegroupRecords extends DoFn<KV<Integer, Iterable<String>>, String> {
 54 |         private static final long serialVersionUID = -2126735721477220174L;
 55 | 
 56 |         @Override
 57 |         public void processElement(
 58 |                 DoFn<KV<Integer, Iterable<String>>, String>.ProcessContext c)
 59 |                 throws Exception {
 60 |             // Adding the result as individual Salesforce Data
 61 |             Iterable<String> sfRefData = c.element().getValue();
 62 |             for (String csvRow : sfRefData) {
 63 |                 c.output(csvRow);
 64 |             }
 65 |         }
 66 | 
 67 |     }
 68 | 
 69 |     /**
 70 |      * Executes SOQL Query and provides the result as CSV in bundles
 71 |      * Result of the SOQL query will be converted into CSV
 72 |      * Bundles will be created according to the noOfBundles specified
 73 |      */
 74 |     public static class ExecuteSOQL extends DoFn<String, KV<Integer, String>> {
 75 |         private static final long serialVersionUID = 3227568229914179295L;
 76 | 
 77 |         private int noOfBundles;
 78 |         private SFSOQLExecutor soqlExecutor;
 79 | 
 80 |         public ExecuteSOQL(SFSOQLExecutor soqlExecutor, int noOfBundles) {
 81 |             this.soqlExecutor = soqlExecutor;
 82 |             this.noOfBundles = noOfBundles;
 83 |         }
 84 | 
 85 |         @Override
 86 |         public void processElement(
 87 |                 DoFn<String, KV<Integer, String>>.ProcessContext c)
 88 |                 throws Exception {
 89 |             String sfQuery = c.element();
 90 |             // Execute SOQL
 91 |             List<SObject> sfResults = soqlExecutor.executeQuery(sfQuery);
 92 |             // Convert to CSV
 93 |             CSVUtil csvUtil = new CSVUtil(sfQuery);
 94 |             for (int i = 0, size = sfResults.size(); i < size; i++) {
 95 |                 String csvRow = csvUtil.getAsCSV(sfResults.get(i));
 96 |                 // Getting hash Modulo
 97 |                 int hashModulo = Math.abs(csvRow.hashCode() % noOfBundles);
 98 |                 c.output(KV.of(hashModulo, csvRow));
 99 |             }
100 |         }
101 |     }
102 | }
103 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/transform/SFWaveWrite.java:
--------------------------------------------------------------------------------
  1 | package com.google.wave.prototype.dataflow.transform;
  2 | 
  3 | import org.slf4j.Logger;
  4 | import org.slf4j.LoggerFactory;
  5 | 
  6 | import com.google.cloud.dataflow.sdk.options.PipelineOptions;
  7 | import com.google.cloud.dataflow.sdk.transforms.Combine;
  8 | import com.google.cloud.dataflow.sdk.transforms.DoFn;
  9 | import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 10 | import com.google.cloud.dataflow.sdk.transforms.PTransform;
 11 | import com.google.cloud.dataflow.sdk.transforms.ParDo;
 12 | import com.google.cloud.dataflow.sdk.transforms.Sum;
 13 | import com.google.cloud.dataflow.sdk.transforms.View;
 14 | import com.google.cloud.dataflow.sdk.values.KV;
 15 | import com.google.cloud.dataflow.sdk.values.PCollection;
 16 | import com.google.cloud.dataflow.sdk.values.PCollectionView;
 17 | import com.google.wave.prototype.dataflow.model.SFConfig;
 18 | import com.google.wave.prototype.dataflow.model.SFWaveWriteResult;
 19 | import com.google.wave.prototype.dataflow.sf.SFWaveDatasetWriter;
 20 | import com.google.wave.prototype.dataflow.util.FileUtil;
 21 | 
 22 | /**
 23 |  * PTransform to write the dataset content into SF Wave This uses Salesforce
 24 |  * SOAP API (Partner WSDL) to publish data into Salesforce Wave This PTransform
 25 |  * requires the following input {@link SFWaveDatasetWriter} - Writer with
 26 |  * {@link SFConfig} which will be used by this transform sfMetadataFileLocation
 27 |  * - A Salesforce wave metadata file describing the data to be published to wave
 28 |  * Can be a local file or GS file Refer
 29 |  * https://resources.docs.salesforce.com/sfdc
 30 |  * /pdf/bi_dev_guide_ext_data_format.pdf
 31 |  */
 32 | public class SFWaveWrite extends
 33 | 		PTransform<PCollection<String>, PCollection<SFWaveWriteResult>> {
 34 | 	private static final long serialVersionUID = 5830880169795002498L;
 35 | 	private static final Logger LOG = LoggerFactory
 36 | 			.getLogger(SFWaveWrite.class);
 37 | 
 38 | 	private final SFWaveDatasetWriter writer;
 39 | 	private final String sfMetadataFileLocation;
 40 | 
 41 | 	public SFWaveWrite(SFWaveDatasetWriter writer, String sfMetadataFileLocation) {
 42 | 		this.writer = writer;
 43 | 		this.sfMetadataFileLocation = sfMetadataFileLocation;
 44 | 	}
 45 | 
 46 | 	@Override
 47 | 	public PCollection<SFWaveWriteResult> apply(PCollection<String> rowData) {
 48 | 		LOG.debug("SFWaveWrite starts");
 49 | 		// Number of bundles calculated here
 50 | 		PCollection<Integer> noOfBundles = rowData
 51 | 				.apply(new CalculateNoOfBundles());
 52 | 		PCollectionView<Integer> sideInput = noOfBundles.apply(View
 53 | 				.<Integer> asSingleton());
 54 | 		// Making KV with hash modulo as key and CSV row as value
 55 | 		PCollection<KV<Integer, String>> kvData = rowData
 56 | 				.apply(ParDo.withSideInputs(sideInput).of(
 57 | 						new DistributeRowData(sideInput)));
 58 | 		// Creating bundles using GroupByKey
 59 | 		PCollection<KV<Integer, Iterable<String>>> groupedRows = kvData
 60 | 				.apply(GroupByKey.<Integer, String> create());
 61 | 		// Writing Data into Salesforce Wave
 62 | 		PCollection<SFWaveWriteResult> writeResult = groupedRows.apply(ParDo
 63 | 				.of(new Write(writer, sfMetadataFileLocation)));
 64 | 
 65 | 		LOG.debug("SFWaveWrite ends");
 66 | 		return writeResult;
 67 | 	}
 68 | 
 69 | 	/**
 70 | 	 * Calculates the Number of bundles to be created Calculation is based on
 71 | 	 * the size of the data to be sent to Salesforce Wave Size of the data is
 72 | 	 * calculated using {@code String.length()} and then {@code Sum.SumLongFn}
 73 | 	 */
 74 | 	public static class CalculateNoOfBundles extends
 75 | 			PTransform<PCollection<String>, PCollection<Integer>> {
 76 | 		private static final long serialVersionUID = -7383871712471335638L;
 77 | 		private static final String INDIVIDUAL_SIZE_PAR_DO_NAME = "IndividualSize";
 78 | 		private static final String NO_OF_BUNDLES_PAR_DO_NAME = "NoOfBundles";
 79 | 
 80 | 		@Override
 81 | 		public PCollection<Integer> apply(PCollection<String> input) {
 82 | 			return input.apply(ParDo.named(INDIVIDUAL_SIZE_PAR_DO_NAME).of(
 83 | 
 84 | 			new DoFn<String, Long>() {
 85 | 				private static final long serialVersionUID = -6374354958403597940L;
 86 | 
 87 | 				@Override
 88 | 				public void processElement(ProcessContext c) throws Exception {
 89 | 					// String.length is used to get the size of data for an
 90 | 					// individual row
 91 | 					// As further grouping takes place, the additional size for
 92 | 					// UTF-16 characters are ignored
 93 | 					String rowToBePersisted = c.element();
 94 | 					c.output(Integer.valueOf(rowToBePersisted.length())
 95 | 							.longValue());
 96 | 				}
 97 | 			}))
 98 | 			// Calculating the total size of the data to be persisted into
 99 | 			// Salesforce Wave
100 | 					.apply(Combine.globally(new Sum.SumLongFn()))
101 | 					// Number of bundles calculated based on the size of data
102 | 					.apply(ParDo.named(NO_OF_BUNDLES_PAR_DO_NAME).of(
103 | 							new BundleCount()));
104 | 		}
105 | 	}
106 | 
107 | 	/**
108 | 	 * Count the number of bundles to be created Number of bundles to be created
109 | 	 * is based on the size of the data to be persisted into Salesforce wave At
110 | 	 * a max Saleforce can accept 10MB So size of a bundle should not be more
111 | 	 * than 10MB
112 | 	 */
113 | 	public static class BundleCount extends DoFn<Long, Integer> {
114 | 		private static final long serialVersionUID = -7446604319456830150L;
115 | 
116 | 		@Override
117 | 		public void processElement(DoFn<Long, Integer>.ProcessContext c)
118 | 				throws Exception {
119 | 			// No of Bundles = totalSize / (1024 * 1024 * 10)
120 | 			// 1024 * 1024 is to convert into MB
121 | 			// Maximum support in Salesforce Wave API is 10 MB
122 | 			// For example, if the size of the data is 335544320, then 33
123 | 			// bundles will be created
124 | 			// Math.round(335544320/(1024 * 1024 * 10)) + 1 = 33
125 | 			Long totalDataSize = c.element();
126 | 			Long maxBundleSize = 1024 * 1024 * 10l;
127 | 			if (totalDataSize > maxBundleSize) {
128 | 				c.output(Math.round(totalDataSize / maxBundleSize) + 1);
129 | 			} else {
130 | 				// As the size less than 10MB the data can be handled in single
131 | 				// bundle itself
132 | 				c.output(1);
133 | 			}
134 | 		}
135 | 
136 | 	}
137 | 
138 | 	/**
139 | 	 * Distributes the data evenly to bundles If the data is of size 32 MB then
140 | 	 * data will be distributed to 4 bundles of 8MB each
141 | 	 */
142 | 	public static class DistributeRowData extends
143 | 			DoFn<String, KV<Integer, String>> {
144 | 		private static final long serialVersionUID = 3917848069436988535L;
145 | 		private PCollectionView<Integer> noOfBundlesPCol;
146 | 
147 | 		// Number of bundles is calculated in CalculateNoOfBundles and
148 | 		// provided here as sideInput
149 | 		public DistributeRowData(PCollectionView<Integer> noOfBundles) {
150 | 			this.noOfBundlesPCol = noOfBundles;
151 | 		}
152 | 
153 | 		@Override
154 | 		public void processElement(
155 | 				DoFn<String, KV<Integer, String>>.ProcessContext c)
156 | 				throws Exception {
157 | 			// Getting the number of bundles from sideInput
158 | 			Integer noOfBundles = c.sideInput(noOfBundlesPCol);
159 | 			String waveCSVData = c.element();
160 | 			// Using hash modulo to evenly distribute data across bundles
161 | 			int hash = Math.abs(waveCSVData.hashCode() % noOfBundles);
162 | 			// Using the hash as key which can be grouped later to create
163 | 			// bundles
164 | 			c.output(KV.of(hash, waveCSVData));
165 | 		}
166 | 	}
167 | 
168 | 	/**
169 | 	 * DoFn which takes care of writing the datasets into Salesforce Wave This
170 | 	 * uses {@link SFWaveDatasetWriter}
171 | 	 */
172 | 	public static class Write extends
173 | 			DoFn<KV<Integer, Iterable<String>>, SFWaveWriteResult> {
174 | 		private static final long serialVersionUID = -1875427181542264934L;
175 | 
176 | 		private final SFWaveDatasetWriter writer;
177 | 		private final String sfMetadataFileLocation;
178 | 
179 | 		public Write(SFWaveDatasetWriter writer, String sfMetadataFileLocation) {
180 | 			this.writer = writer;
181 | 			this.sfMetadataFileLocation = sfMetadataFileLocation;
182 | 		}
183 | 
184 | 		@Override
185 | 		public void processElement(
186 | 				DoFn<KV<Integer, Iterable<String>>, SFWaveWriteResult>.ProcessContext c)
187 | 				throws Exception {
188 | 
189 | 			// Converting the grouped records into bytes
190 | 			KV<Integer, Iterable<String>> groupedRecords = c.element();
191 | 			Iterable<String> csvRows = groupedRecords.getValue();
192 | 			byte[] datasetData = getAsBytes(csvRows);
193 | 
194 | 			String sfObjId = writer.write(
195 | 					getMetadataContent(c.getPipelineOptions()), datasetData);
196 | 			SFWaveWriteResult sfWaveWriteResult = new SFWaveWriteResult(sfObjId);
197 | 			c.output(sfWaveWriteResult);
198 | 		}
199 | 
200 | 		private byte[] getMetadataContent(PipelineOptions options)
201 | 				throws Exception {
202 | 			String content = FileUtil.getContent(sfMetadataFileLocation,
203 | 					options);
204 | 			return content.getBytes();
205 | 		}
206 | 
207 | 		private byte[] getAsBytes(Iterable<String> waveRows) {
208 | 			// Converting all CSV rows into single String which will be
209 | 			// published to Salesforce WAVE
210 | 			StringBuilder csvRows = new StringBuilder();
211 | 			// Row may be like
212 | 			// AcccountId,OpportunityId,ClickCount,ImpressionCount
213 | 			for (String individualRow : waveRows) {
214 | 				csvRows.append(individualRow);
215 | 				csvRows.append('\n');
216 | 			}
217 | 
218 | 			return csvRows.toString().getBytes();
219 | 		}
220 | 
221 | 	}
222 | 
223 | }
224 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/util/CSVUtil.java:
--------------------------------------------------------------------------------
  1 | package com.google.wave.prototype.dataflow.util;
  2 | 
  3 | import java.io.ByteArrayInputStream;
  4 | import java.io.ByteArrayOutputStream;
  5 | import java.io.IOException;
  6 | import java.util.ArrayList;
  7 | import java.util.HashMap;
  8 | import java.util.List;
  9 | import java.util.Map;
 10 | 
 11 | import javax.xml.namespace.QName;
 12 | import javax.xml.parsers.DocumentBuilder;
 13 | import javax.xml.parsers.DocumentBuilderFactory;
 14 | import javax.xml.parsers.ParserConfigurationException;
 15 | 
 16 | import net.sf.jsqlparser.parser.CCJSqlParserUtil;
 17 | import net.sf.jsqlparser.schema.Column;
 18 | import net.sf.jsqlparser.statement.select.PlainSelect;
 19 | import net.sf.jsqlparser.statement.select.Select;
 20 | import net.sf.jsqlparser.statement.select.SelectExpressionItem;
 21 | import net.sf.jsqlparser.statement.select.SelectItem;
 22 | 
 23 | import org.slf4j.Logger;
 24 | import org.slf4j.LoggerFactory;
 25 | import org.w3c.dom.Document;
 26 | import org.w3c.dom.Node;
 27 | import org.w3c.dom.NodeList;
 28 | import org.xml.sax.SAXException;
 29 | 
 30 | import com.sforce.soap.enterprise.sobject.SObject;
 31 | import com.sforce.ws.bind.TypeMapper;
 32 | import com.sforce.ws.parser.XmlOutputStream;
 33 | 
 34 | /**
 35 |  * Utility to convert Salesforce SObject into CSV
 36 |  * It requires SOQL to get the field queried from Salesforce
 37 |  */
 38 | public class CSVUtil {
 39 |     private static final Logger LOG = LoggerFactory.getLogger(CSVUtil.class);
 40 | 
 41 |     /** Columns queried from Salesforce */
 42 |     private List<String> columnNames = new ArrayList<String>();
 43 | 
 44 |     /**
 45 |      * @param soqlQuery - SOQL query used to fetch Salesforce Reference data
 46 |      * @throws Exception
 47 |      */
 48 |     public CSVUtil(String soqlQuery) throws Exception {
 49 |         // Parsing the SOQL Query to get the columns queried from Salesforce
 50 |         Select stmt = (Select) CCJSqlParserUtil.parse(soqlQuery);
 51 |         PlainSelect plainSelect = (PlainSelect) stmt.getSelectBody();
 52 |         // SelectedItems contains the column to be selected
 53 |         List<SelectItem> selectItems = plainSelect.getSelectItems();
 54 |         for (SelectItem selectItem : selectItems) {
 55 |             // We will get only columns as expressions are not supported
 56 |             Column column = (Column) ((SelectExpressionItem) selectItem).getExpression();
 57 |             columnNames.add(column.getColumnName());
 58 |         }
 59 | 
 60 |         LOG.debug("Columns from SOQL Query " + columnNames);
 61 |     }
 62 | 
 63 |     /**
 64 |      * @param sObject One of the result on executing SOQL Query
 65 |      * @return Converted CSV data from SObject
 66 |      * @throws Exception
 67 |      */
 68 |     public String getAsCSV(SObject sObject) throws Exception {
 69 |         StringBuilder csv = new StringBuilder();
 70 | 
 71 |         // Reading the SObject as XML Document
 72 |         Document doc = readDocument(sObject);
 73 |         // Reading the fields present in XML document
 74 |         Map<String, String> fieldMap = readFields(doc);
 75 |         for (int i = 0, size = columnNames.size(); i < size; i++) {
 76 |             if (i != 0) {
 77 |                 csv.append(',');
 78 |             }
 79 | 
 80 |             // Getting the corresponding value from the fieldMap using columns constructed from SOQL query
 81 |             String fieldValue = fieldMap.get(columnNames.get(i));
 82 |             if (fieldValue != null) {
 83 |                 csv.append(fieldValue);
 84 |             }
 85 |         }
 86 | 
 87 |         // Completing a row
 88 |         csv.append('\n');
 89 | 
 90 |         LOG.debug("Returning CSV " + csv);
 91 |         return csv.toString();
 92 |     }
 93 | 
 94 |     private Map<String, String> readFields(Document doc) {
 95 |         // XML will be like
 96 |         // <sObject>
 97 |         //   <Opportunity>
 98 |         //     <AccountId>1233</AcccountId>
 99 |         //	   <OpportunityId>1234</OpportunityId>
100 |         //	   <ProposalId>101</ProposalId>
101 |         //	 </Opportunity>
102 |         // <sObject>
103 |         // Here doc is <sObject>
104 |         Node parentElement = doc.getChildNodes().item(0);
105 |         // Here parentElement is <Opportunity>
106 |         NodeList childNodes = parentElement.getChildNodes();
107 |         // Child Nodes are <AccountId>, <OpportunityId> and <ProposalId>
108 |         Map<String, String> fieldValueMap = new HashMap<String, String>();
109 |         if (childNodes != null && childNodes.getLength() > 0) {
110 |             for (int i = 0, size = childNodes.getLength(); i < size; i++) {
111 |                 Node item = childNodes.item(i);
112 |                 // Removing prefix as the column name present in SOQL will not have it
113 |                 // This nodename will be compared with fields queried in SOQL
114 |                 fieldValueMap.put(stripPrefix(item.getNodeName()), item.getTextContent());
115 |             }
116 |         }
117 | 
118 |         return fieldValueMap;
119 |     }
120 | 
121 |     private String stripPrefix(String nodeName) {
122 |         return strip(nodeName, ':');
123 |     }
124 | 
125 |     private String strip(String str, char separator) {
126 |         int aliasIndex = str.indexOf(separator);
127 |         if (aliasIndex != -1) {
128 |             return str.substring(aliasIndex + 1);
129 |         }
130 | 
131 |         return str;
132 |     }
133 | 
134 |     private Document readDocument(SObject sObject) throws Exception  {
135 |         ByteArrayInputStream bis = null;
136 |         XmlOutputStream xmlOutputStream = null;
137 | 
138 |         try {
139 |             // Getting the doc as <sObject/>
140 |             // As Salesforce SOAP API is used converting to XML is the only option
141 |             QName element = new QName("urn:sobject", "result");
142 |             ByteArrayOutputStream bos = new ByteArrayOutputStream();
143 | 
144 |             xmlOutputStream = new XmlOutputStream(bos, false);
145 |             xmlOutputStream.startDocument();
146 |             // Writes all the fields to outputStream
147 |             sObject.write(element, xmlOutputStream, new TypeMapper());
148 |             xmlOutputStream.endDocument();
149 | 
150 |             bis = new ByteArrayInputStream(bos.toByteArray());
151 |             // Converting it as DOM object
152 |             DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
153 |             DocumentBuilder docBuilder = builderFactory.newDocumentBuilder();
154 |             return docBuilder.parse(bis);
155 |         } catch (ParserConfigurationException | SAXException e) {
156 |             throw new Exception(e);
157 |         } finally {
158 |             if (bis != null) {
159 |                 try {
160 |                     bis.close();
161 |                 } catch (IOException ioe) {
162 |                     LOG.warn("Error while closing Stream", ioe);
163 |                 }
164 | 
165 |                 if (xmlOutputStream != null) {
166 |                     // This will make sure the ByteArrayOutputStream provided is also closed
167 |                     try {
168 |                         xmlOutputStream.close();
169 |                     } catch (IOException ioe) {
170 |                         LOG.warn("Error while closing Stream", ioe);
171 |                     }
172 |                 }
173 |             }
174 |         }
175 |     }
176 | 
177 | }
178 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/util/FileUtil.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.util;
 2 | 
 3 | import java.io.File;
 4 | 
 5 | import org.apache.commons.io.Charsets;
 6 | import org.apache.commons.io.FileUtils;
 7 | import org.apache.commons.lang3.StringUtils;
 8 | 
 9 | import com.google.cloud.dataflow.sdk.options.PipelineOptions;
10 | 
11 | /**
12 |  * Simple Utility to read to the contents from file
13 |  * File can be present in GCS or from local file system
14 |  */
15 | public class FileUtil {
16 | 
17 |     public static String getContent(String fileLocation, PipelineOptions options) throws Exception {
18 |         // Have separate reader for GS files and local files
19 |         if (fileLocation.startsWith(SFConstants.GS_FILE_PREFIX)) {
20 |             return readFromGCS(fileLocation, options);
21 |         } else {
22 |             return readFromLocal(fileLocation);
23 |         }
24 |     }
25 | 
26 |     private static String readFromLocal(String configFileLocation) throws Exception {
27 |         // Removing file:// prefix
28 |         String fileLocation = StringUtils.substringAfter(configFileLocation, SFConstants.LOCAL_FILE_PREFIX);
29 |         // Using commons-io utility to read the file as String
30 |         return FileUtils.readFileToString(new File(fileLocation), Charsets.UTF_8);
31 |     }
32 | 
33 |     private static String readFromGCS(String configFileLocation,
34 |             PipelineOptions options) throws Exception {
35 |         GCSFileUtil gcsFileUtil = new GCSFileUtil(options);
36 |         byte[] contents = gcsFileUtil.read(configFileLocation);
37 |         return new String(contents);
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/util/GCSFileUtil.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.util;
 2 | 
 3 | import java.nio.ByteBuffer;
 4 | import java.nio.channels.SeekableByteChannel;
 5 | 
 6 | import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 7 | import com.google.cloud.dataflow.sdk.util.GcsUtil;
 8 | import com.google.cloud.dataflow.sdk.util.GcsUtil.GcsUtilFactory;
 9 | import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
10 | 
11 | /**
12 |  * A Google Cloud Storage utility which can be used to read the files present in GCS
13 |  * This utility can be used only for the Jobs running in Google Dataflow
14 |  * This makes use of {@code GcsUtil} and {@code GcsPath} to read the file present in GCS
15 |  */
16 | public class GCSFileUtil {
17 |     private GcsUtil gcsUtil;
18 | 
19 |     public GCSFileUtil(PipelineOptions options) {
20 |         // PipelineOption is required to create GcsUtil
21 |         // hence this can be used only for Google Dataflow jobs
22 |         gcsUtil = new GcsUtilFactory().create(options);
23 |     }
24 | 
25 |     public byte[] read(String filePath) throws Exception {
26 |         GcsPath gcsPath = GcsPath.fromUri(filePath);
27 |         SeekableByteChannel seekableByteChannel = gcsUtil.open(gcsPath);
28 |         // Allocating ByteBuffer based on the file size
29 |         ByteBuffer fileContent = ByteBuffer.allocate(Long.valueOf(gcsUtil.fileSize(gcsPath)).intValue());
30 |         seekableByteChannel.read(fileContent);
31 | 
32 |         return fileContent.array();
33 |     }
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/util/JobConstants.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.util;
 2 | 
 3 | public interface JobConstants {
 4 |     public static final String COL_ACCOUNT_ID = "AccountId";
 5 |     public static final String COL_OPPORTUNITY_ID = "OpportunityId";
 6 |     public static final String COL_PROPOSAL_ID = "ProposalId";
 7 |     public static final String COL_CLICKS = "Clicks";
 8 |     public static final String COL_IMPRESSIONS = "Impressions";
 9 | 
10 |     public static final String COL_TYPE_STRING = "STRING";
11 |     public static final String COL_TYPE_INTEGER = "INTEGER";
12 | 
13 |     public static final String STR_COMMA = ",";
14 | }
15 | 


--------------------------------------------------------------------------------
/src/main/java/com/google/wave/prototype/dataflow/util/SFConstants.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.util;
 2 | 
 3 | public interface SFConstants {
 4 |     public static String STR_INSIGHTS_EXTERNAL_DATA = "InsightsExternalData";
 5 |     public static String STR_INSIGHTS_EXTERNAL_DATA_PART = "InsightsExternalDataPart";
 6 |     public static String STR_INSIGHTS_EXTERNAL_DATA_ID = "InsightsExternalDataId";
 7 | 
 8 |     public static String STR_FORMAT = "Format";
 9 |     public static String STR_DATAFILE = "DataFile";
10 |     public static String STR_EDGEMART_ALIAS = "EdgemartAlias";
11 |     public static String STR_METADATA_JSON = "MetadataJson";
12 |     public static String STR_OPERATION = "Operation";
13 |     public static String STR_ACTION = "Action";
14 |     public static String STR_PART_NUMBER= "PartNumber";
15 | 
16 |     public static String STR_CSV_FORMAT = "Csv";
17 |     public static String STR_OVERWRITE_OPERATION = "Overwrite";
18 |     public static String STR_ACTION_NONE = "None";
19 |     public static String STR_ACTION_PROCESS = "Process";
20 | 
21 |     public static String GS_FILE_PREFIX = "gs://";
22 |     public static String LOCAL_FILE_PREFIX = "file://";
23 | }
24 | 


--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/BaseTest.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.List;
 5 | 
 6 | import com.google.api.services.bigquery.model.TableRow;
 7 | import com.google.wave.prototype.dataflow.model.AggregatedData;
 8 | import com.google.wave.prototype.dataflow.util.JobConstants;
 9 | 
10 | public class BaseTest {
11 |     // Test data
12 |     protected static final String ACCOUNT_ID_1 = "001B0000003oYAfIAM";
13 |     protected static final String OPPOR_ID_1 = "006B0000002ndnpIAA";
14 |     protected static final String PROPOSAL_ID_1 = "101";
15 |     protected static final int CLICK_COUNT_1 = 100;
16 |     protected static final int IMPRESSION_COUNT_1 = 1000;
17 | 
18 |     protected static final String ACCOUNT_ID_2 = "001B0000003oYAfIAM";
19 |     protected static final String OPPOR_ID_2 = "006B0000002ndnpIAF";
20 |     protected static final String PROPOSAL_ID_2 = "102";
21 |     protected static final int CLICK_COUNT_2 = 200;
22 |     protected static final int IMPRESSION_COUNT_2 = 2000;
23 | 
24 |     protected AggregatedData[] getSampleAggDataWithoutOpporId() {
25 |         AggregatedData[] sampleAggData = new AggregatedData[2];
26 | 
27 |         sampleAggData[0] = new AggregatedData(PROPOSAL_ID_1, CLICK_COUNT_1, IMPRESSION_COUNT_1);
28 |         sampleAggData[1] = new AggregatedData(PROPOSAL_ID_2, CLICK_COUNT_2, IMPRESSION_COUNT_2);
29 | 
30 |         return sampleAggData;
31 |     }
32 | 
33 |     protected AggregatedData[] getSampleAggDataWithOpporId() {
34 |         AggregatedData[] sampleAggData = getSampleAggDataWithoutOpporId();
35 | 
36 |         sampleAggData[0].setOpportunityId(OPPOR_ID_1);
37 |         sampleAggData[1].setOpportunityId(OPPOR_ID_2);
38 | 
39 |         return sampleAggData;
40 |     }
41 | 
42 |     protected String getAsCSV(String... columns) {
43 |         StringBuilder csv = new StringBuilder();
44 |         for (int i = 0; i < columns.length; i++) {
45 |             if (i != 0) {
46 |                 csv.append(',');
47 |             }
48 |             csv.append(columns[i]);
49 |         }
50 |         csv.append('\n');
51 | 
52 |         return csv.toString();
53 |     }
54 | 
55 |     protected String getAsCSV(String proposalId, String opporId,
56 |             int clickCount, int impressionCount) {
57 |         return getAsCSV(proposalId, opporId, clickCount + "", impressionCount + "");
58 |     }
59 | 
60 |     protected TableRow getAsTableRow(String accId1, String opporId1,
61 |             String proposalId1) {
62 |         TableRow row = new TableRow();
63 | 
64 |         row.set(JobConstants.COL_ACCOUNT_ID, accId1);
65 |         row.set(JobConstants.COL_OPPORTUNITY_ID, opporId1);
66 |         row.set(JobConstants.COL_PROPOSAL_ID, proposalId1);
67 | 
68 |         return row;
69 |     }
70 | 
71 |     protected List<TableRow> getSampleSFRefTableRows() {
72 |         List<TableRow> sampleSFRefTableRows = new ArrayList<TableRow>(4);
73 | 
74 |         sampleSFRefTableRows.add(getAsTableRow(ACCOUNT_ID_1, OPPOR_ID_1, PROPOSAL_ID_1));
75 |         sampleSFRefTableRows.add(getAsTableRow(ACCOUNT_ID_2, OPPOR_ID_2, PROPOSAL_ID_2));
76 | 
77 |         return sampleSFRefTableRows;
78 |     }
79 | }
80 | 


--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/coder/AggregateDataCoderTest.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.coder;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | 
 5 | import java.io.ByteArrayInputStream;
 6 | 
 7 | import org.apache.commons.io.output.ByteArrayOutputStream;
 8 | import org.junit.Before;
 9 | import org.junit.Test;
10 | 
11 | import com.google.cloud.dataflow.sdk.coders.Coder.Context;
12 | import com.google.wave.prototype.dataflow.BaseTest;
13 | import com.google.wave.prototype.dataflow.model.AggregatedData;
14 | 
15 | public class AggregateDataCoderTest extends BaseTest {
16 |     private AggregatedData aggregatedData;
17 | 
18 |     @Before
19 |     public void setup() {
20 |         aggregatedData = new AggregatedData(PROPOSAL_ID_1, OPPOR_ID_1, CLICK_COUNT_1, IMPRESSION_COUNT_1);
21 |     }
22 | 
23 |     @Test
24 |     public void testCoder() throws Exception {
25 |         ByteArrayOutputStream bos = null;
26 |         ByteArrayInputStream bis = null;
27 |         try {
28 |             AggregateDataCoder coder = AggregateDataCoder.getInstance();
29 | 
30 |             bos = new ByteArrayOutputStream();
31 |             coder.encode(aggregatedData, bos, Context.NESTED);
32 | 
33 |             bis = new ByteArrayInputStream(bos.toByteArray());
34 |             AggregatedData decodedAggData = coder.decode(bis, Context.NESTED);
35 | 
36 |             assertEquals(aggregatedData, decodedAggData);
37 |         } finally {
38 |             if (bos != null) {
39 |                 bos.close();
40 |             }
41 |         }
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/coder/SFCoderTest.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.coder;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | 
 5 | import java.io.ByteArrayInputStream;
 6 | 
 7 | import org.apache.commons.io.output.ByteArrayOutputStream;
 8 | import org.junit.Before;
 9 | import org.junit.Test;
10 | 
11 | import com.google.cloud.dataflow.sdk.coders.Coder.Context;
12 | import com.google.wave.prototype.dataflow.BaseTest;
13 | import com.google.wave.prototype.dataflow.model.SFReferenceData;
14 | 
15 | public class SFCoderTest extends BaseTest {
16 |     private SFReferenceData sfReferenceData;
17 | 
18 |     @Before
19 |     public void setup() {
20 |         sfReferenceData = new SFReferenceData(ACCOUNT_ID_1, OPPOR_ID_1, PROPOSAL_ID_1);
21 |     }
22 | 
23 |     @Test
24 |     public void testCoder() throws Exception {
25 |         ByteArrayOutputStream bos = null;
26 |         ByteArrayInputStream bis = null;
27 |         try {
28 |             SFCoder coder = SFCoder.getInstance();
29 | 
30 |             bos = new ByteArrayOutputStream();
31 |             coder.encode(sfReferenceData, bos, Context.NESTED);
32 | 
33 |             bis = new ByteArrayInputStream(bos.toByteArray());
34 |             SFReferenceData decodedsfData= coder.decode(bis, Context.NESTED);
35 | 
36 |             assertEquals(sfReferenceData, decodedsfData);
37 |         } finally {
38 |             if (bos != null) {
39 |                 bos.close();
40 |             }
41 |         }
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/function/AggregateDataEnricherTest.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.function;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import org.hamcrest.CoreMatchers;
 6 | import org.junit.Assert;
 7 | import org.junit.Test;
 8 | 
 9 | import com.google.api.services.bigquery.model.TableRow;
10 | import com.google.cloud.dataflow.sdk.testing.TestPipeline;
11 | import com.google.cloud.dataflow.sdk.transforms.Create;
12 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
13 | import com.google.cloud.dataflow.sdk.transforms.View;
14 | import com.google.cloud.dataflow.sdk.values.PCollection;
15 | import com.google.cloud.dataflow.sdk.values.PCollectionView;
16 | import com.google.wave.prototype.dataflow.BaseTest;
17 | import com.google.wave.prototype.dataflow.model.AggregatedData;
18 | 
19 | /**
20 |  * Unit tests for {@link AggregateDataEnricher}
21 |  */
22 | public class AggregateDataEnricherTest extends BaseTest {
23 | 
24 |     @Test
25 |     public void enrichTest() {
26 |         // Creating pipeline to construct sideInput
27 |         TestPipeline testPipeline = TestPipeline.create();
28 |         // Constructing sideInput
29 |         List<TableRow> sampleSFRefTableRows = getSampleSFRefTableRows();
30 |         PCollection<TableRow> sampleSFRefData = testPipeline.apply(Create.of(sampleSFRefTableRows));
31 |         PCollectionView<Iterable<TableRow>> sideInput = sampleSFRefData.apply(View.<TableRow>asIterable());
32 | 
33 |         AggregateDataEnricher enricher = new AggregateDataEnricher(sideInput);
34 |         DoFnTester<AggregatedData,AggregatedData> doFnTester = DoFnTester.of(enricher);
35 |         doFnTester.setSideInputInGlobalWindow(sideInput, sampleSFRefTableRows);
36 | 
37 |         // Input Aggregated provided without opportunity Id
38 |         List<AggregatedData> results = doFnTester.processBatch(getSampleAggDataWithoutOpporId());
39 | 
40 |         // Check whether the result has opportunity id populated with it
41 |         Assert.assertThat(results, CoreMatchers.hasItems(getSampleAggDataWithOpporId()));
42 |     }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/function/CSVFormatterTest.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.function;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import org.hamcrest.CoreMatchers;
 6 | import org.junit.Assert;
 7 | import org.junit.Test;
 8 | 
 9 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
10 | import com.google.wave.prototype.dataflow.BaseTest;
11 | import com.google.wave.prototype.dataflow.model.AggregatedData;
12 | 
13 | public class CSVFormatterTest extends BaseTest {
14 | 
15 |     @Test
16 |     public void transformAsCSVTest() {
17 |         CSVFormatter csvFormatter = new CSVFormatter();
18 |         DoFnTester<AggregatedData, String> dofnTester = DoFnTester.of(csvFormatter);
19 | 
20 |         List<String> results = dofnTester.processBatch(getSampleAggDataWithOpporId());
21 |         Assert.assertThat(results, CoreMatchers.hasItems(getSampleEnrichedDataAsCSV()));
22 |     }
23 | 
24 |     private String[] getSampleEnrichedDataAsCSV() {
25 |         String[] sampleEnrichedCSVs= new String[2];
26 | 
27 |         sampleEnrichedCSVs[0] = getAsCSV(PROPOSAL_ID_1, OPPOR_ID_1, CLICK_COUNT_1, IMPRESSION_COUNT_1);
28 |         sampleEnrichedCSVs[1] = getAsCSV(PROPOSAL_ID_2, OPPOR_ID_2, CLICK_COUNT_2, IMPRESSION_COUNT_2);
29 | 
30 |         return sampleEnrichedCSVs;
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/function/TableRowFormatterTest.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.function;
 2 | 
 3 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_ACCOUNT_ID;
 4 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_OPPORTUNITY_ID;
 5 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_PROPOSAL_ID;
 6 | 
 7 | import java.util.ArrayList;
 8 | import java.util.List;
 9 | 
10 | import org.junit.Assert;
11 | import org.junit.Test;
12 | 
13 | import com.google.api.services.bigquery.model.TableRow;
14 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
15 | import com.google.wave.prototype.dataflow.BaseTest;
16 | 
17 | /**
18 |  * Unit test for {@link TableRowFormatter} DoFn
19 |  */
20 | public class TableRowFormatterTest extends BaseTest {
21 | 
22 |     @Test
23 |     public void formatSFRefTest() {
24 |         TableRowFormatter formatSFRefFn = new TableRowFormatter(getSFRefTableColumns());
25 |         DoFnTester<String,TableRow> doFnTester = DoFnTester.of(formatSFRefFn);
26 | 
27 |         // Mocking SFRead by manually constructing CSV data
28 |         List<TableRow> results = doFnTester.processBatch(
29 |                 getAsCSV(ACCOUNT_ID_1, OPPOR_ID_1, PROPOSAL_ID_1),
30 |                 getAsCSV(ACCOUNT_ID_2, OPPOR_ID_2, PROPOSAL_ID_2));
31 | 
32 |         // Converted tableRows are verified here
33 |         Assert.assertEquals(results, getSampleSFRefTableRows());
34 |     }
35 | 
36 |     private List<String> getSFRefTableColumns() {
37 |         List<String> columns = new ArrayList<String>(4);
38 | 
39 |         columns.add(COL_ACCOUNT_ID);
40 |         columns.add(COL_OPPORTUNITY_ID);
41 |         columns.add(COL_PROPOSAL_ID);
42 | 
43 |         return columns;
44 |     }
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/model/SFConfigTest.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.model;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | import static org.junit.Assert.fail;
 5 | 
 6 | import org.junit.Test;
 7 | 
 8 | import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 9 | import com.google.wave.prototype.dataflow.model.SFConfig;
10 | import com.google.wave.prototype.dataflow.util.SFConstants;
11 | 
12 | /**
13 |  * Unit test for SFConfig
14 |  * Reads the config file present in local and assert the values
15 |  */
16 | public class SFConfigTest {
17 |     @Test
18 |     public void validLocalFile() throws Exception {
19 |         // Config files are present in project home
20 |         StringBuilder sb = new StringBuilder();
21 |         sb.append(SFConstants.LOCAL_FILE_PREFIX);
22 |         sb.append(System.getProperty("user.dir"));
23 |         sb.append("/test_sf_config.json");
24 | 
25 |         // This will read the config file and populate SFConfig with userId and password
26 |         SFConfig sfConfig = SFConfig.getInstance(sb.toString(), PipelineOptionsFactory.create());
27 | 
28 |         assertEquals("demo@demo.com", sfConfig.getUserId());
29 |         assertEquals("test", sfConfig.getPassword());
30 |     }
31 | 
32 |     @Test
33 |     public void invalidLocalFile() throws Exception {
34 |         try {
35 |             // Providing invalid file path which should throw Exception
36 |             SFConfig.getInstance("test_sf_config.json", PipelineOptionsFactory.create());
37 |             fail("Expected exception not raised");
38 |         } catch (Exception e) {
39 |             // Expected exception here
40 |         }
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/pipeline/AdDataJobTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.pipeline;
2 | 
3 | 
4 | /**
5 |  * Jobs are not tested as BigQueryIO is not mocked
6 |  */
7 | public class AdDataJobTest {
8 | 
9 | }


--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/pipeline/SFReferenceDataJobTest.java:
--------------------------------------------------------------------------------
1 | package com.google.wave.prototype.dataflow.pipeline;
2 | 
3 | 
4 | /**
5 |  * Jobs are not tested as BigQueryIO is not mocked
6 |  */
7 | public class SFReferenceDataJobTest {
8 | }


--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/sf/SFSOQLExecutorTest.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.sf;
 2 | 
 3 | import static org.mockito.Mockito.mock;
 4 | import static org.mockito.Mockito.when;
 5 | import static org.junit.Assert.assertEquals;
 6 | import static org.junit.Assert.assertNotNull;
 7 | 
 8 | import java.util.List;
 9 | 
10 | import org.junit.Before;
11 | import org.junit.Test;
12 | 
13 | import com.google.wave.prototype.dataflow.BaseTest;
14 | import com.google.wave.prototype.dataflow.model.SFConfig;
15 | import com.sforce.soap.enterprise.EnterpriseConnection;
16 | import com.sforce.soap.enterprise.QueryResult;
17 | import com.sforce.soap.enterprise.sobject.Opportunity;
18 | import com.sforce.soap.enterprise.sobject.SObject;
19 | import com.sforce.ws.ConnectionException;
20 | import com.sforce.ws.ConnectorConfig;
21 | 
22 | /**
23 |  * Unit test for {@link SFSOQLExecutor}
24 |  */
25 | public class SFSOQLExecutorTest extends BaseTest {
26 |     private static final String sfQueryStr = "SELECT AccountId, Id, ProposalID__c FROM Opportunity where ProposalID__c != null";
27 | 
28 |     private SFConfig sfConfig;
29 | 
30 |     @Before
31 |     public void setup() throws Exception {
32 |         sfConfig = mock(SFConfig.class);
33 | 
34 |         // Returning our EnterpriseConnection which return a single object during query execution
35 |         when(sfConfig.createEnterpriseConnection()).thenReturn(EnterpriseConnectionExt.getInstance());
36 |     }
37 | 
38 |     @Test
39 |     public void executeQueryTest() throws Exception {
40 |         int expectedRecordsCount = 1;
41 |         SFSOQLExecutor executor = new SFSOQLExecutor(sfConfig);
42 |         List<SObject> results = executor.executeQuery(sfQueryStr);
43 | 
44 |         assertNotNull(results);
45 |         assertEquals(results.size(), expectedRecordsCount);
46 |         Opportunity opportunity = (Opportunity) results.get(0);
47 | 
48 |         assertEquals(ACCOUNT_ID_1, opportunity.getAccountId());
49 |         assertEquals(OPPOR_ID_1, opportunity.getId());
50 |         assertEquals(PROPOSAL_ID_1, opportunity.getProposalID__c());
51 |     }
52 | 
53 |     public static class EnterpriseConnectionExt extends EnterpriseConnection {
54 | 
55 |         public static EnterpriseConnectionExt getInstance() throws ConnectionException {
56 |             ConnectorConfig config = new ConnectorConfig();
57 |             config.setUsername("dummy_sf_user");
58 |             config.setPassword("dummy_sf_password");
59 |             config.setManualLogin(true);
60 |             // Salesforce SOAP API checks for /services/Soap/c/
61 |             config.setServiceEndpoint("http://dummysgendpoint/services/Soap/c/");
62 |             return new EnterpriseConnectionExt(config);
63 |         }
64 | 
65 |         public EnterpriseConnectionExt(ConnectorConfig config)
66 |                 throws ConnectionException {
67 |             super(config);
68 |         }
69 | 
70 |         @Override
71 |         public QueryResult query(String queryString) throws ConnectionException {
72 |             QueryResult queryResult = new QueryResult();
73 | 
74 |             Opportunity opportunity = new Opportunity();
75 |             opportunity.setAccountId(ACCOUNT_ID_1);
76 |             opportunity.setProposalID__c(PROPOSAL_ID_1);
77 |             opportunity.setId(OPPOR_ID_1);
78 | 
79 |             queryResult.setRecords(new SObject[] {opportunity});
80 |             queryResult.setDone(true);
81 |             return queryResult;
82 |         }
83 | 
84 |         @Override
85 |         public void logout() throws ConnectionException {
86 |             // no op
87 |         }
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/sf/SFWaveDatasetWriterTest.java:
--------------------------------------------------------------------------------
  1 | package com.google.wave.prototype.dataflow.sf;
  2 | 
  3 | import static com.google.wave.prototype.dataflow.util.SFConstants.*;
  4 | import static org.junit.Assert.*;
  5 | import static org.mockito.Mockito.mock;
  6 | import static org.mockito.Mockito.when;
  7 | 
  8 | import org.junit.Before;
  9 | import org.junit.Test;
 10 | 
 11 | import com.google.wave.prototype.dataflow.BaseTest;
 12 | import com.google.wave.prototype.dataflow.model.SFConfig;
 13 | import com.google.wave.prototype.dataflow.util.SFConstants;
 14 | import com.sforce.soap.partner.PartnerConnection;
 15 | import com.sforce.soap.partner.SaveResult;
 16 | import com.sforce.soap.partner.sobject.SObject;
 17 | import com.sforce.ws.ConnectionException;
 18 | import com.sforce.ws.ConnectorConfig;
 19 | 
 20 | /**
 21 |  * Unit test for {@link SFWaveDatasetWriter}
 22 |  */
 23 | public class SFWaveDatasetWriterTest extends BaseTest {
 24 |     private static final String DUMMY_METADATA_CONTENT = "dummy_metadata_content";
 25 |     private static final String DUMMY_DATASET_CONTENT = "dummy_dataset_content";
 26 |     private static final String DUMMY_SOBJECT_ID = "dummy_sobject_id";
 27 |     private static final String DUMMY_DATASET_NAME = "dummy_dataset_name";
 28 | 
 29 |     private static int CREATE_CALL_COUNT = 0;
 30 |     private static int UPDATE_CALL_COUNT = 0;
 31 | 
 32 |     private SFConfig sfConfig;
 33 | 
 34 |     @Before
 35 |     public void setup() throws Exception {
 36 |         StringBuilder metadataFileLocationSB = new StringBuilder();
 37 |         metadataFileLocationSB.append(SFConstants.LOCAL_FILE_PREFIX);
 38 |         metadataFileLocationSB.append(System.getProperty("user.dir"));
 39 |         metadataFileLocationSB.append("/test_metadata.json");
 40 | 
 41 |         sfConfig = mock(SFConfig.class);
 42 | 
 43 |         when(sfConfig.createPartnerConnection()).thenReturn(PartnerConnectionExt.getInstance());
 44 | 
 45 |         CREATE_CALL_COUNT = 0;
 46 |         UPDATE_CALL_COUNT = 0;
 47 |     }
 48 | 
 49 |     @Test
 50 |     public void testWrite() throws Exception {
 51 |         SFWaveDatasetWriter writer = new SFWaveDatasetWriter(sfConfig, DUMMY_DATASET_NAME);
 52 |         String sfObjId = writer.write(DUMMY_METADATA_CONTENT.getBytes(), DUMMY_DATASET_CONTENT.getBytes());
 53 | 
 54 |         assertEquals(DUMMY_SOBJECT_ID, sfObjId);
 55 |         // Verify that PartnerConnection.create() has been called twice
 56 |         // metadata publish and datapart publish
 57 |         assertEquals(2, CREATE_CALL_COUNT);
 58 | 
 59 |         // Verify that PartnerConnection.update() has been called only once
 60 |         // finalize publish
 61 |         assertEquals(1, UPDATE_CALL_COUNT);
 62 |     }
 63 | 
 64 |     public static class PartnerConnectionExt extends PartnerConnection {
 65 | 
 66 |         public static PartnerConnectionExt getInstance() throws ConnectionException {
 67 |             ConnectorConfig config = new ConnectorConfig();
 68 |             config.setUsername("dummy_sf_user");
 69 |             config.setPassword("dummy_sf_password");
 70 |             config.setManualLogin(true);
 71 |             // Salesforce SOAP API checks for /services/Soap/c/
 72 |             config.setServiceEndpoint("http://dummysgendpoint/services/Soap/u/");
 73 |             return new PartnerConnectionExt(config);
 74 |         }
 75 | 
 76 |         public PartnerConnectionExt(ConnectorConfig config)
 77 |                 throws ConnectionException {
 78 |             super(config);
 79 |         }
 80 | 
 81 |         @Override
 82 |         public SaveResult[] update(SObject[] sObjects)
 83 |                 throws ConnectionException {
 84 |             int expectedSObjectCount = 1;
 85 |             assertEquals(expectedSObjectCount, sObjects.length);
 86 | 
 87 |             String type = sObjects[0].getType();
 88 |             assertEquals(STR_INSIGHTS_EXTERNAL_DATA, type);
 89 | 
 90 |             // verify action
 91 |             String actualAction = (String) sObjects[0].getField(STR_ACTION);
 92 |             assertEquals(STR_ACTION_PROCESS, actualAction);
 93 | 
 94 |             // verify Sobject Id
 95 |             assertEquals(DUMMY_SOBJECT_ID, sObjects[0].getId());
 96 | 
 97 |             UPDATE_CALL_COUNT++;
 98 |             return constructSaveResultArray();
 99 |         }
100 | 
101 |         @Override
102 |         public SaveResult[] create(SObject[] sObjects)
103 |                 throws ConnectionException {
104 |             int expectedSObjectCount = 1;
105 |             assertEquals(expectedSObjectCount, sObjects.length);
106 | 
107 |             String type = sObjects[0].getType();
108 |             assertNotNull(type);
109 |             // It is metadata publish
110 |             if (STR_INSIGHTS_EXTERNAL_DATA.equals(type)) {
111 |                 // verify dataset name
112 |                 String actualDatasetName = (String) sObjects[0].getField(STR_EDGEMART_ALIAS);
113 |                 assertEquals(DUMMY_DATASET_NAME, actualDatasetName);
114 | 
115 |                 // verify metadata content
116 |                 byte[] actualMetadataContent = (byte[]) sObjects[0].getField(STR_METADATA_JSON);
117 |                 assertEquals(DUMMY_METADATA_CONTENT, new String(actualMetadataContent));
118 |             } else if (STR_INSIGHTS_EXTERNAL_DATA_PART.equals(type)) {
119 |                 // verify dataset content
120 |                 byte[] actualDatasetContent = (byte[]) sObjects[0].getField(STR_DATAFILE);
121 |                 assertEquals(DUMMY_DATASET_CONTENT, new String(actualDatasetContent));
122 | 
123 |                 // verify sobject id
124 |                 String actualSObjectId = (String) sObjects[0].getField(STR_INSIGHTS_EXTERNAL_DATA_ID);
125 |                 assertEquals(DUMMY_SOBJECT_ID, actualSObjectId);
126 |             } else {
127 |                 fail("PartnerConnection.create() called with invalid type " + type);
128 |             }
129 | 
130 |             CREATE_CALL_COUNT++;
131 |             return constructSaveResultArray();
132 |         }
133 | 
134 |         @Override
135 |         public void logout() throws ConnectionException {
136 |             // no op
137 |         }
138 | 
139 |         private SaveResult[] constructSaveResultArray() {
140 |             SaveResult saveResult = new SaveResult();
141 |             saveResult.setId(DUMMY_SOBJECT_ID);
142 |             saveResult.setSuccess(true);
143 | 
144 |             return new SaveResult[] {saveResult};
145 |         }
146 |     }
147 | }
148 | 


--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/transform/AggregateEventsTest.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.transform;
 2 | 
 3 | import java.util.Arrays;
 4 | import java.util.List;
 5 | 
 6 | import org.hamcrest.CoreMatchers;
 7 | import org.junit.Assert;
 8 | import org.junit.Test;
 9 | 
10 | import com.google.cloud.dataflow.sdk.Pipeline;
11 | import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
12 | import com.google.cloud.dataflow.sdk.testing.TestPipeline;
13 | import com.google.cloud.dataflow.sdk.transforms.Create;
14 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
15 | import com.google.cloud.dataflow.sdk.values.KV;
16 | import com.google.cloud.dataflow.sdk.values.PCollection;
17 | import com.google.wave.prototype.dataflow.coder.AggregateDataCoder;
18 | import com.google.wave.prototype.dataflow.model.AggregatedData;
19 | import com.google.wave.prototype.dataflow.transform.AggregateEvents;
20 | import com.google.wave.prototype.dataflow.transform.AggregateEvents.CountEvents;
21 | import com.google.wave.prototype.dataflow.transform.AggregateEvents.FilterRawData;
22 | 
23 | /**
24 |  * Unit tester for AggregateEvents PTransform and the DoFn present in it
25 |  */
26 | public class AggregateEventsTest {
27 | 
28 |     @SuppressWarnings("unchecked")
29 |     @Test
30 |     public void filterRawDataTest() {
31 |         FilterRawData filterRawDataDoFn = new AggregateEvents.FilterRawData();
32 |         DoFnTester<String, KV<String, String>> doFnTester = DoFnTester.of(filterRawDataDoFn);
33 | 
34 |         // getAdDataSampleCSVRows() will return raw AdData csv rows
35 |         // FilterRawData DoFn will extract ProposalId and event from it
36 |         List<KV<String, String>> results = doFnTester.processBatch(getAdDataSampleCSVRows());
37 | 
38 |         // Based on the input following KV are expected
39 |         KV<String, String> expectedValue1 = KV.of("101", "Impression");
40 |         KV<String, String> expectedValue2 = KV.of("102", "Click");
41 |         KV<String, String> expectedValue3 = KV.of("101", "Click");
42 |         Assert.assertThat(results, CoreMatchers.hasItems(expectedValue1, expectedValue2, expectedValue3));
43 |     }
44 | 
45 |     @SuppressWarnings("unchecked")
46 |     @Test
47 |     public void countEventsDoFnTest() {
48 |         CountEvents countEventsDoFn = new AggregateEvents.CountEvents();
49 |         DoFnTester<KV<String, Iterable<String>>, AggregatedData> countEventDoFnTester = DoFnTester.of(countEventsDoFn);
50 | 
51 |         // Input to AggregateEvents.CountEvents
52 |         KV<String, Iterable<String>> kvPropsalIdEvents1 = KV.of("101", (Iterable<String>) Arrays.asList("Impression", "Click", "Impression"));
53 |         KV<String, Iterable<String>> kvPropsalIdEvents2 = KV.of("102", (Iterable<String>) Arrays.asList("Click", "Impression"));
54 |         KV<String, Iterable<String>> kvPropsalIdEvents3 = KV.of("103", (Iterable<String>) Arrays.asList("Click"));
55 | 
56 |         List<AggregatedData> results = countEventDoFnTester.processBatch(kvPropsalIdEvents1, kvPropsalIdEvents2, kvPropsalIdEvents3);
57 | 
58 |         // Expected results
59 |         // For proposalId 101, there are 1 Click and 2 Impressions in the input
60 |         // Hence the expected in new AggregatedData("101", 1, 2)
61 |         // For proposalId 102, there are 1 Click and 1 Impression in the input
62 |         // For proposalId 103, there are 1 Click and 0 Impression in the input
63 |         AggregatedData expectedValue1 = new AggregatedData("101", 1, 2);
64 |         AggregatedData expectedValue2 = new AggregatedData("102", 1, 1);
65 |         AggregatedData expectedValue3 = new AggregatedData("103", 1, 0);
66 |         Assert.assertThat(results, CoreMatchers.hasItems(expectedValue1, expectedValue2, expectedValue3));
67 |     }
68 | 
69 |     @Test
70 |     public void aggregateEventsTransformTest() {
71 |         Pipeline p = TestPipeline.create();
72 | 
73 |         PCollection<String> inPCol = p.apply(Create.of(getAdDataSampleCSVRows()));
74 |         PCollection<AggregatedData> result = inPCol.apply(new AggregateEvents())
75 |                 .setCoder(AggregateDataCoder.getInstance());
76 | 
77 |         // Input data contains 3 rows
78 |         // 2 proposal Id present in input 101 and 102
79 |         // And proposal Id 101 has 1 Impression and 1 Click
80 |         // Proposal Id 102 has 1 Click
81 |         // So expected values are new AggregatedData("101", 1, 1) and new AggregatedData("102", 1, 0)
82 |         AggregatedData expectedValue1 = new AggregatedData("101", 1, 1);
83 |         AggregatedData expectedValue2 = new AggregatedData("102", 1, 0);
84 |         DataflowAssert.that(result).containsInAnyOrder(Arrays.asList(expectedValue1, expectedValue2));
85 | 
86 |         p.run();
87 |     }
88 | 
89 |     private String[] getAdDataSampleCSVRows() {
90 |         String[] adDataSampleCSVRows = new String[3];
91 |         adDataSampleCSVRows[0] = "1,01-01-14 9:00,ip-10-150-38-122/10.150.38.122,0,70.209.198.223,http://sample.com,3232,Impression,3,1,101";
92 |         adDataSampleCSVRows[1] = "2,01-01-14 9:01,ip-10-150-38-122/10.150.38.123,0,70.209.198.223,http://sample.com,3232,Click,3,1,102";
93 |         adDataSampleCSVRows[2] = "3,01-01-14 9:00,ip-10-150-38-122/10.150.38.122,0,70.209.198.223,http://sample.com,3232,Click,3,1,101";
94 | 
95 |         return adDataSampleCSVRows;
96 |     }
97 | 
98 | }
99 | 


--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/transform/SFReadTest.java:
--------------------------------------------------------------------------------
 1 | package com.google.wave.prototype.dataflow.transform;
 2 | 
 3 | import static org.mockito.Mockito.mock;
 4 | import static org.mockito.Mockito.when;
 5 | import static org.mockito.Mockito.withSettings;
 6 | 
 7 | import java.io.Serializable;
 8 | import java.util.ArrayList;
 9 | import java.util.List;
10 | 
11 | import org.junit.Before;
12 | import org.junit.Ignore;
13 | import org.junit.Test;
14 | 
15 | import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
16 | import com.google.cloud.dataflow.sdk.testing.TestPipeline;
17 | import com.google.cloud.dataflow.sdk.transforms.Create;
18 | import com.google.cloud.dataflow.sdk.values.PCollection;
19 | import com.google.wave.prototype.dataflow.BaseTest;
20 | import com.google.wave.prototype.dataflow.sf.SFSOQLExecutor;
21 | import com.sforce.soap.enterprise.sobject.Opportunity;
22 | import com.sforce.soap.enterprise.sobject.SObject;
23 | 
24 | public class SFReadTest extends BaseTest {
25 |     private static final String sfQueryStr = "SELECT AccountId, Id, ProposalID__c FROM Opportunity where ProposalID__c != null";
26 | 
27 |     private SFSOQLExecutor sfSOQLExecutor;
28 | 
29 |     @Before
30 |     public void setup() throws Exception {
31 |         sfSOQLExecutor = mock(SFSOQLExecutor.class, withSettings().serializable());
32 | 
33 |         OpportunityExt oppor = new OpportunityExt();
34 |         oppor.setAccountId(ACCOUNT_ID_1);
35 |         oppor.setId(OPPOR_ID_1);
36 |         oppor.setProposalID__c(PROPOSAL_ID_1);
37 |         List<SObject> sobjects = new ArrayList<SObject>();
38 |         sobjects.add(oppor);
39 | 
40 |         when(sfSOQLExecutor.executeQuery(sfQueryStr)).thenReturn(sobjects);
41 |     }
42 | 
43 |     @Ignore("Not able to serialize Opportunity, hence not able to mock it. But unit test for SFRead is covered as part SFSOQLExecutor")
44 |     @Test
45 |     public void pTransformTest() {
46 |         TestPipeline pipeline = TestPipeline.create();
47 | 
48 |         PCollection<String> input = pipeline.apply(Create.of(sfQueryStr));
49 |         PCollection<String> results = input.apply(new SFRead(sfSOQLExecutor));
50 | 
51 |         DataflowAssert.that(results).containsInAnyOrder(getAsCSV(ACCOUNT_ID_1, OPPOR_ID_1, PROPOSAL_ID_1));
52 | 
53 |         pipeline.run();
54 |     }
55 | 
56 |     public class OpportunityExt extends Opportunity implements Serializable {
57 |         private static final long serialVersionUID = -563793703304651268L;
58 | 
59 | 
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/test/java/com/google/wave/prototype/dataflow/transform/SFWaveWriteTest.java:
--------------------------------------------------------------------------------
  1 | package com.google.wave.prototype.dataflow.transform;
  2 | 
  3 | import static org.mockito.Mockito.mock;
  4 | import static org.mockito.Mockito.when;
  5 | import static org.mockito.Mockito.withSettings;
  6 | 
  7 | import java.util.Arrays;
  8 | import java.util.HashSet;
  9 | import java.util.List;
 10 | import java.util.Set;
 11 | 
 12 | import org.hamcrest.CoreMatchers;
 13 | import org.junit.Assert;
 14 | import org.junit.Before;
 15 | import org.junit.Test;
 16 | 
 17 | import com.google.cloud.dataflow.sdk.Pipeline;
 18 | import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 19 | import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 20 | import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 21 | import com.google.cloud.dataflow.sdk.transforms.Create;
 22 | import com.google.cloud.dataflow.sdk.transforms.DoFn;
 23 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
 24 | import com.google.cloud.dataflow.sdk.transforms.PTransform;
 25 | import com.google.cloud.dataflow.sdk.transforms.View;
 26 | import com.google.cloud.dataflow.sdk.values.KV;
 27 | import com.google.cloud.dataflow.sdk.values.PCollection;
 28 | import com.google.cloud.dataflow.sdk.values.PCollectionView;
 29 | import com.google.wave.prototype.dataflow.model.SFWaveWriteResult;
 30 | import com.google.wave.prototype.dataflow.sf.SFWaveDatasetWriter;
 31 | import com.google.wave.prototype.dataflow.transform.SFWaveWrite.BundleCount;
 32 | import com.google.wave.prototype.dataflow.transform.SFWaveWrite.DistributeRowData;
 33 | import com.google.wave.prototype.dataflow.transform.SFWaveWrite.Write;
 34 | import com.google.wave.prototype.dataflow.util.FileUtil;
 35 | import com.google.wave.prototype.dataflow.util.SFConstants;
 36 | 
 37 | /**
 38 |  * Simple unit tests for {@link SFWaveWrite} {@link PTransform} and its {@link DoFn}
 39 |  */
 40 | public class SFWaveWriteTest {
 41 |     private static final String SAMPLE_DATA_TO_BE_WRITTEN = "001B0000003oYAfIAM,006B0000002ndnpIAA,102";
 42 |     private static final String SAMPLE_SF_OBJ_ID = "testSFOBjId";
 43 | 
 44 |     private SFWaveDatasetWriter writer;
 45 |     private String metadataFileLocation;
 46 | 
 47 |     @Before
 48 |     public void setup() throws Exception {
 49 |         StringBuilder metadataFileLocationSB = new StringBuilder();
 50 |         metadataFileLocationSB.append(SFConstants.LOCAL_FILE_PREFIX);
 51 |         metadataFileLocationSB.append(System.getProperty("user.dir"));
 52 |         metadataFileLocationSB.append("/test_metadata.json");
 53 | 
 54 |         metadataFileLocation = metadataFileLocationSB.toString();
 55 | 
 56 |         writer = mock(SFWaveDatasetWriter.class, withSettings().serializable());
 57 |         when(writer.write(
 58 |                 FileUtil.getContent(metadataFileLocation.toString(), PipelineOptionsFactory.create()).getBytes(),
 59 |                 (SAMPLE_DATA_TO_BE_WRITTEN + "\n").getBytes()))
 60 |                     .thenReturn(SAMPLE_SF_OBJ_ID);
 61 |     }
 62 | 
 63 |     @Test
 64 |     public void calculateNoOfBundlesDoFnTest() {
 65 |         BundleCount bundleCtFn = new SFWaveWrite.BundleCount();
 66 |         DoFnTester<Long, Integer> bundleCtFnTester = DoFnTester.of(bundleCtFn);
 67 | 
 68 |         long bundle = 1024 * 1024 * 10l;
 69 |         // This should create 2 bundles
 70 |         long input1 = bundle + 1;
 71 | 
 72 |         // This should create 32 bundles
 73 |         long input2 = (bundle * 31) + 1024;
 74 | 
 75 |         // These should create 1 bundle
 76 |         long input3 = 1024l;
 77 |         long input4 = 0l;
 78 | 
 79 |         List<Integer> results = bundleCtFnTester.processBatch(input1, input2, input3, input4);
 80 |         Assert.assertThat(results, CoreMatchers.hasItems(2, 32, 1, 1));
 81 |     }
 82 | 
 83 |     @Test
 84 |     public void distributeRowDataDoFnTest() {
 85 |         int noOfBundles = 2;
 86 |         Pipeline p = TestPipeline.create();
 87 |         // Preparing sideInput
 88 |         PCollection<Integer> bundleCount = p.apply(Create.of(noOfBundles));
 89 |         PCollectionView<Integer> sideInput = bundleCount.apply(View.<Integer> asSingleton());
 90 |         DistributeRowData distributeRowDataDoFn = new SFWaveWrite.DistributeRowData(sideInput);
 91 | 
 92 |         DoFnTester<String, KV<Integer, String>> doFnTester = DoFnTester.of(distributeRowDataDoFn);
 93 |         // Providing number of bundles as sideInput
 94 |         doFnTester.setSideInputInGlobalWindow(sideInput, Arrays.asList(noOfBundles));
 95 | 
 96 |         List<KV<Integer, String>> results = doFnTester.processBatch(getSampleSFRefData());
 97 |         // Result should have 4 KV with 2 unique keys
 98 |         Assert.assertEquals(4, results.size());
 99 |         // Checking whether the result has two unique keys as noOfBundles is 2
100 |         Set<Integer> keys = new HashSet<Integer>();
101 |         for (KV<Integer, String> kv : results) {
102 |             keys.add(kv.getKey());
103 |         }
104 | 
105 |         Assert.assertEquals("Proper number of bundles are not created", noOfBundles, keys.size());
106 |     }
107 | 
108 |     @SuppressWarnings("unchecked")
109 |     @Test
110 |     public void testWriteDoFn() throws Exception {
111 | 
112 |         KV<Integer, Iterable<String>> input = KV.of(1, (Iterable<String>) Arrays.asList(SAMPLE_DATA_TO_BE_WRITTEN));
113 | 
114 |         Write writeDoFn = new SFWaveWrite.Write(writer, metadataFileLocation);
115 |         DoFnTester<KV<Integer,Iterable<String>>,SFWaveWriteResult> doFnTester = DoFnTester.of(writeDoFn);
116 | 
117 |         // SFWaveDatasetWriter is mocked
118 |         // If proper bytes are sent by SFWaveWrite.Writethe it will return SAMPLE_SF_OBJ_ID
119 |         // So just checking whether it returns SAMPLE_SF_OBJ_ID or not
120 |         List<SFWaveWriteResult> result = doFnTester.processBatch(input);
121 |         Assert.assertThat(result, CoreMatchers.hasItems(new SFWaveWriteResult(SAMPLE_SF_OBJ_ID)));
122 |     }
123 | 
124 |     @Test
125 |     public void sfWaveWriteTest() {
126 |         Pipeline p = TestPipeline.create();
127 | 
128 |         PCollection<String> inputPCol = p.apply(Create.of(SAMPLE_DATA_TO_BE_WRITTEN));
129 |         PCollection<SFWaveWriteResult> output = inputPCol.apply(new SFWaveWrite(writer, metadataFileLocation));
130 | 
131 |         // SFWaveDatasetWriter is mocked
132 |         // If proper bytes are sent by SFWaveWrite.Writethe it will return SAMPLE_SF_OBJ_ID
133 |         // So just checking whether it returns SAMPLE_SF_OBJ_ID or not
134 |         DataflowAssert.that(output).containsInAnyOrder(Arrays.asList(new SFWaveWriteResult(SAMPLE_SF_OBJ_ID)));
135 |         p.run();
136 |     }
137 | 
138 |     private String[] getSampleSFRefData() {
139 |         String[] sfRefDat = new String[4];
140 |         // accountId, opportunityId, proposalId inputs
141 |         sfRefDat[0] = "001B0000003oYAfIAM,006B0000002ndnpIAA,102";
142 |         sfRefDat[1] = "001B0000003oYAfIAM,006B0000002ndnuIAA,103";
143 |         sfRefDat[2] = "001B0000003oYAfIAM,006B0000002ndnkIAA,101";
144 |         sfRefDat[3] = "001B0000003oUqJIAU,006B0000002nBrQIAU,0001";
145 | 
146 |         return sfRefDat;
147 |     }
148 | 
149 | }
150 | 


--------------------------------------------------------------------------------