├── README.md ├── pom.xml └── src ├── main └── java │ └── com │ └── google │ └── wave │ └── prototype │ └── dataflow │ ├── coder │ ├── AggregateDataCoder.java │ └── SFCoder.java │ ├── function │ ├── AggregateDataEnricher.java │ ├── CSVFormatter.java │ └── TableRowFormatter.java │ ├── model │ ├── AggregatedData.java │ ├── SFConfig.java │ ├── SFReferenceData.java │ └── SFWaveWriteResult.java │ ├── pipeline │ ├── AdDataJob.java │ └── SFReferenceDataJob.java │ ├── sf │ ├── SFSOQLExecutor.java │ └── SFWaveDatasetWriter.java │ ├── transform │ ├── AggregateEvents.java │ ├── SFRead.java │ └── SFWaveWrite.java │ └── util │ ├── CSVUtil.java │ ├── FileUtil.java │ ├── GCSFileUtil.java │ ├── JobConstants.java │ └── SFConstants.java └── test └── java └── com └── google └── wave └── prototype └── dataflow ├── BaseTest.java ├── coder ├── AggregateDataCoderTest.java └── SFCoderTest.java ├── function ├── AggregateDataEnricherTest.java ├── CSVFormatterTest.java └── TableRowFormatterTest.java ├── model └── SFConfigTest.java ├── pipeline ├── AdDataJobTest.java └── SFReferenceDataJobTest.java ├── sf ├── SFSOQLExecutorTest.java └── SFWaveDatasetWriterTest.java └── transform ├── AggregateEventsTest.java ├── SFReadTest.java └── SFWaveWriteTest.java /README.md: -------------------------------------------------------------------------------- 1 | # README # 2 | 3 | ### springML Inc Repository ### 4 | 5 | Google Dataflow Jobs 6 | -------------------- 7 | 8 | 9 | Following two classes take care of Google cloud dataflow jobs 10 | 11 | SFReferenceDataJob - Will fetch the reference data from SF (Oppurtunity) and populate bigQuery 12 | AdDataJob - Will fetch the raw data from GCS and SF reference data from bigquery. Enrich the data and populate bigQuery with the enriched data 13 | 14 | 15 | SFReferenceDataJob 16 | ------------------ 17 | 18 | This requires the following inputs 19 | 20 | 1. Google cloud project 21 | 2. Google cloud Staging location 22 | 3. BigQuery output table 23 | 4. SF UserId 24 | 5. SF Password 25 | 26 | On completion of the job, bigquery table SFDCReferenceData.SFRef will be populated with SF Reference data 27 | 28 | 29 | AdDataJob 30 | --------- 31 | 32 | This requires the following inputs 33 | 34 | 1. Google cloud project 35 | 2. Google cloud Staging location 36 | 3. Ad Raw data (CSV) 37 | 4. BigQuery Reference data table 38 | 5. BigQuery output table 39 | 40 | On completion of the job bigquery table SFDCReferenceData.EnrichedSample will be populated withenriched data. 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.google.wave 6 | wave_connector_prototype 7 | 0.0.2-SNAPSHOT 8 | jar 9 | 10 | 11 | 12 | in-project1 13 | In Project Repo 14 | file://${project.basedir}\lib 15 | 16 | 17 | 18 | wave_connector_prototype 19 | http://maven.apache.org 20 | 21 | 22 | UTF-8 23 | 24 | 25 | 26 | 27 | junit 28 | junit 29 | 4.12 30 | test 31 | 32 | 33 | 34 | com.google.cloud.dataflow 35 | google-cloud-dataflow-java-sdk-all 36 | LATEST 37 | 38 | 39 | 40 | com.google.apis 41 | google-api-services-storage 42 | v1-rev25-1.19.1 43 | 44 | 46 | 47 | com.google.guava 48 | guava-jdk5 49 | 50 | 51 | 52 | 53 | 54 | com.google.apis 55 | google-api-services-bigquery 56 | v2-rev187-1.19.1 57 | 58 | 60 | 61 | com.google.guava 62 | guava-jdk5 63 | 64 | 65 | 66 | 67 | 68 | com.google.http-client 69 | google-http-client-jackson2 70 | 1.19.0 71 | 72 | 74 | 75 | com.google.guava 76 | guava-jdk5 77 | 78 | 79 | 80 | 81 | 82 | com.fasterxml.jackson.core 83 | jackson-core 84 | 2.4.2 85 | 86 | 87 | 88 | com.fasterxml.jackson.core 89 | jackson-annotations 90 | 2.4.2 91 | 92 | 93 | 94 | 95 | org.slf4j 96 | slf4j-api 97 | 1.7.7 98 | 99 | 100 | 101 | org.hamcrest 102 | hamcrest-all 103 | 1.3 104 | test 105 | 106 | 107 | 108 | com.google.appengine.tools 109 | appengine-gcs-client 110 | RELEASE 111 | 112 | 113 | 114 | org.apache.commons 115 | commons-lang3 116 | 3.4 117 | 118 | 119 | 120 | commons-io 121 | commons-io 122 | 2.4 123 | 124 | 125 | 126 | 127 | sf 128 | enterprise 129 | 1 130 | 131 | 132 | sf 133 | partner 134 | 1 135 | 136 | 137 | sf 138 | wsc 139 | 1 140 | 141 | 142 | 143 | org.mockito 144 | mockito-core 145 | 2.0.26-beta 146 | test 147 | 148 | 149 | 150 | com.github.jsqlparser 151 | jsqlparser 152 | 0.9.3 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | maven-compiler-plugin 162 | 163 | 1.7 164 | 1.7 165 | 166 | 167 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/coder/AggregateDataCoder.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.coder; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.io.OutputStream; 6 | 7 | import com.google.cloud.dataflow.sdk.coders.AtomicCoder; 8 | import com.google.cloud.dataflow.sdk.coders.CoderException; 9 | import com.google.wave.prototype.dataflow.model.AggregatedData; 10 | 11 | /** 12 | * Coder for {@link AggregatedData} 13 | * It just uses AggregatedData.toString() to encode 14 | * AggregatedData.toString() will produce CSV of {@link AggregatedData} 15 | * In decode, 16 | * CSV is separated into fields by String.split(',') and 17 | * {@link AggregatedData} is constructed using the fields 18 | */ 19 | public class AggregateDataCoder extends AtomicCoder { 20 | private static final long serialVersionUID = 4037984240347308918L; 21 | private static final int COL_PROPOSAL_ID = 0; 22 | private static final int COL_OPPORTUNITY_ID = 1; 23 | private static final int COL_CLICK_COUNT = 2; 24 | private static final int COL_IMP_COUNT = 3; 25 | 26 | private static final AggregateDataCoder INSTANCE = new AggregateDataCoder(); 27 | private AggregateDataCoder() { } 28 | 29 | public static AggregateDataCoder getInstance() { 30 | return INSTANCE; 31 | } 32 | 33 | @Override 34 | public void encode(AggregatedData value, OutputStream outStream, 35 | com.google.cloud.dataflow.sdk.coders.Coder.Context context) 36 | throws CoderException, IOException { 37 | // Returning bytes of CSV 38 | // AggregatedData.toString() will be a CSV 39 | outStream.write(value.toString().getBytes()); 40 | } 41 | 42 | @Override 43 | public AggregatedData decode(InputStream inStream, 44 | com.google.cloud.dataflow.sdk.coders.Coder.Context context) 45 | throws CoderException, IOException { 46 | int csvRowSize = inStream.available(); 47 | byte[] csvRow = new byte[csvRowSize]; 48 | inStream.read(csvRow); 49 | // Stream is converted into String 50 | // String will be a CSV 51 | // CSV splitted using comma to get the fields 52 | // AggregatedData constructed using the fields 53 | String aggDataStr = new String(csvRow); 54 | String[] addDataFields = aggDataStr.split(","); 55 | 56 | 57 | return new AggregatedData(addDataFields[COL_PROPOSAL_ID], 58 | addDataFields[COL_OPPORTUNITY_ID], 59 | Integer.parseInt(addDataFields[COL_CLICK_COUNT]), 60 | Integer.parseInt(addDataFields[COL_IMP_COUNT])); 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/coder/SFCoder.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.coder; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.io.OutputStream; 6 | 7 | import com.google.cloud.dataflow.sdk.coders.AtomicCoder; 8 | import com.google.cloud.dataflow.sdk.coders.CoderException; 9 | import com.google.wave.prototype.dataflow.model.SFReferenceData; 10 | 11 | /** 12 | * Coder for {@link SFReferenceData} 13 | * It just uses SFReferenceData.toString() to encode 14 | * SFReferenceData.toString() will produce CSV of {@link SFReferenceData} 15 | * In decode, 16 | * CSV is separated into fields by String.split(',') and 17 | * {@link SFReferenceData} is constructed using the fields 18 | */ 19 | public class SFCoder extends AtomicCoder { 20 | private static final long serialVersionUID = 4037984240347308918L; 21 | private static final int COL_ACCOUNT_ID = 0; 22 | private static final int COL_OPPORTUNITY_ID = 1; 23 | private static final int COL_PROPOSAL_ID = 2; 24 | 25 | private static final SFCoder INSTANCE = new SFCoder(); 26 | private SFCoder() { } 27 | 28 | public static SFCoder getInstance() { 29 | return INSTANCE; 30 | } 31 | 32 | @Override 33 | public void encode(SFReferenceData value, OutputStream outStream, 34 | com.google.cloud.dataflow.sdk.coders.Coder.Context context) 35 | throws CoderException, IOException { 36 | // SFReferenceData.toString will provide a String as CSV 37 | outStream.write(value.toString().getBytes()); 38 | } 39 | 40 | @Override 41 | public SFReferenceData decode(InputStream inStream, 42 | com.google.cloud.dataflow.sdk.coders.Coder.Context context) 43 | throws CoderException, IOException { 44 | int size = inStream.available(); 45 | byte[] sfRefBytes = new byte[size]; 46 | inStream.read(sfRefBytes); 47 | String refStr = new String(sfRefBytes); 48 | String[] sfRefDataFields = refStr.split(","); 49 | 50 | String proposalId = null; 51 | // Proposal may be null for some rows and hence adding only if it is present 52 | if (sfRefDataFields.length > 2) { 53 | proposalId = sfRefDataFields[COL_PROPOSAL_ID]; 54 | } 55 | return new SFReferenceData(sfRefDataFields[COL_ACCOUNT_ID], sfRefDataFields[COL_OPPORTUNITY_ID], proposalId); 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/function/AggregateDataEnricher.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.function; 2 | 3 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_OPPORTUNITY_ID; 4 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_PROPOSAL_ID; 5 | 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | import com.google.api.services.bigquery.model.TableRow; 10 | import com.google.cloud.dataflow.sdk.transforms.DoFn; 11 | import com.google.cloud.dataflow.sdk.values.PCollectionView; 12 | import com.google.wave.prototype.dataflow.model.AggregatedData; 13 | import com.google.wave.prototype.dataflow.pipeline.AdDataJob; 14 | 15 | /** 16 | * Enrich AggregatedData with OpportunityId 17 | * OpportunityId fetched from Google BigQuery for the corresponding ProposalId 18 | * Google BigQuery TableRow should be provided as sideInput 19 | */ 20 | public class AggregateDataEnricher extends DoFn { 21 | private static final long serialVersionUID = -369858616535388252L; 22 | 23 | private static final Logger LOG = LoggerFactory.getLogger(AdDataJob.class); 24 | 25 | private PCollectionView> sfReferenceDataView; 26 | 27 | public AggregateDataEnricher(PCollectionView> sfReferenceDataView) { 28 | this.sfReferenceDataView = sfReferenceDataView; 29 | } 30 | 31 | @Override 32 | public void processElement( 33 | DoFn.ProcessContext c) throws Exception { 34 | AggregatedData aggregatedData = c.element(); 35 | String proposalId = aggregatedData.getProposalId(); 36 | // Since in this case BigQuery table considered to be small 37 | // table rows are passed as sideInput 38 | Iterable sfReferenceData = c.sideInput(sfReferenceDataView); 39 | for (TableRow sfReferenceRow : sfReferenceData) { 40 | String proposalIdFromBigQuery = (String) sfReferenceRow.get(COL_PROPOSAL_ID); 41 | String opportunityId = (String) sfReferenceRow.get(COL_OPPORTUNITY_ID); 42 | // Make sure to fetch the opportunityId for the corresponding proposalId 43 | if (proposalIdFromBigQuery.contains(proposalId)) { 44 | LOG.info("Adding OpportunityId into aggregatedData : " + opportunityId.toString()); 45 | aggregatedData.setOpportunityId((String) sfReferenceRow.get(COL_OPPORTUNITY_ID)); 46 | } 47 | } 48 | 49 | c.output(aggregatedData); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/function/CSVFormatter.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.function; 2 | 3 | import com.google.cloud.dataflow.sdk.transforms.DoFn; 4 | import com.google.wave.prototype.dataflow.model.AggregatedData; 5 | 6 | /** 7 | * A simple DoFn to convert {@link AggregatedData} into CSV Row 8 | */ 9 | public class CSVFormatter extends DoFn { 10 | private static final long serialVersionUID = 398388311953363232L; 11 | 12 | @Override 13 | public void processElement(DoFn.ProcessContext c) 14 | throws Exception { 15 | StringBuffer sb = new StringBuffer(256); 16 | sb.append(c.element().toString()).append('\n'); 17 | c.output(sb.toString()); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/function/TableRowFormatter.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.function; 2 | 3 | import java.util.List; 4 | 5 | import com.google.api.services.bigquery.model.TableRow; 6 | import com.google.cloud.dataflow.sdk.transforms.DoFn; 7 | 8 | /** 9 | * A Google Dataflow DoFn converts the given CSV row into Google BigQuery TableRow 10 | * Column Names has to be in the order in which the fields are present in CSV 11 | */ 12 | public class TableRowFormatter extends DoFn { 13 | private static final long serialVersionUID = -5798809828662211092L; 14 | 15 | private List columnNames; 16 | 17 | public TableRowFormatter(List columnNames) { 18 | this.columnNames = columnNames; 19 | } 20 | 21 | @Override 22 | public void processElement(ProcessContext c) throws Exception { 23 | TableRow row = new TableRow(); 24 | String sfReferenceData = c.element(); 25 | // CSV will contain \n at end 26 | // \n should be added as column value 27 | sfReferenceData = removeNewlineChar(sfReferenceData); 28 | 29 | String[] individualFields = sfReferenceData.split(","); 30 | // Order is according to the query we provide 31 | // For SELECT AccountId, Id, ProposalID__c FROM Opportunity 32 | // AccountId will be at 0 33 | // OpportunityId will be at 1 34 | // ProposalId will be at 2 35 | 36 | if (columnNames.size() != individualFields.length) { 37 | throw new Exception ("Number of column does not match with the columns present in CSV"); 38 | } 39 | 40 | int col = 0; 41 | for (String columnName : columnNames) { 42 | row.set(columnName, individualFields[col++]); 43 | } 44 | 45 | c.output(row); 46 | } 47 | 48 | private String removeNewlineChar(String sfReferenceData) { 49 | int newlineCharIndex = sfReferenceData.lastIndexOf('\n'); 50 | if (newlineCharIndex != -1) { 51 | sfReferenceData = sfReferenceData.substring(0, newlineCharIndex); 52 | } 53 | 54 | return sfReferenceData; 55 | } 56 | } -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/model/AggregatedData.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.model; 2 | 3 | import com.google.cloud.dataflow.sdk.coders.DefaultCoder; 4 | import com.google.wave.prototype.dataflow.coder.AggregateDataCoder; 5 | 6 | /** 7 | * POJO holding enriched Salesforce wave data 8 | * ProposalId, OpportunityId, ClickCount and ImpressionCount 9 | */ 10 | @DefaultCoder(AggregateDataCoder.class) 11 | public class AggregatedData { 12 | private String proposalId = ""; 13 | private String opportunityId = ""; 14 | private int clickCount = 0; 15 | private int impressionCount = 0; 16 | 17 | // Used before adding OpportunityId 18 | public AggregatedData(String proposalId, int clickCount, 19 | int impressionCount) { 20 | this.proposalId = proposalId; 21 | this.clickCount = clickCount; 22 | this.impressionCount = impressionCount; 23 | } 24 | 25 | public AggregatedData(String proposalId, String opportunityId, int clickCount, 26 | int impressionCount) { 27 | this.proposalId = proposalId; 28 | this.opportunityId = opportunityId; 29 | this.clickCount = clickCount; 30 | this.impressionCount = impressionCount; 31 | } 32 | 33 | public String getProposalId() { 34 | return proposalId; 35 | } 36 | 37 | public void setProposalId(String proposalId) { 38 | this.proposalId = proposalId; 39 | } 40 | 41 | public int getClickCount() { 42 | return clickCount; 43 | } 44 | 45 | public void setClickCount(int clicksCount) { 46 | this.clickCount = clicksCount; 47 | } 48 | 49 | public int getImpressionCount() { 50 | return impressionCount; 51 | } 52 | 53 | public void setImpressionCount(int impressionCount) { 54 | this.impressionCount = impressionCount; 55 | } 56 | 57 | public void incrementImpressionCount() { 58 | this.impressionCount++; 59 | } 60 | 61 | public void incrementClickCount() { 62 | this.clickCount++; 63 | } 64 | 65 | public void addImpressionCount(int impressionCount) { 66 | this.impressionCount += impressionCount; 67 | } 68 | 69 | public void addClickCount(int clickCount) { 70 | this.clickCount++; 71 | } 72 | 73 | public String getOpportunityId() { 74 | return opportunityId; 75 | } 76 | 77 | public void setOpportunityId(String opportunityId) { 78 | this.opportunityId = opportunityId; 79 | } 80 | 81 | @Override 82 | public String toString() { 83 | // Constructs CSV row using fields 84 | return proposalId + "," + opportunityId + "," + clickCount + "," + impressionCount; 85 | } 86 | 87 | @Override 88 | public int hashCode() { 89 | final int prime = 31; 90 | int result = 1; 91 | result = prime * result + clickCount; 92 | result = prime * result + impressionCount; 93 | result = prime * result 94 | + ((opportunityId == null) ? 0 : opportunityId.hashCode()); 95 | result = prime * result 96 | + ((proposalId == null) ? 0 : proposalId.hashCode()); 97 | return result; 98 | } 99 | 100 | @Override 101 | public boolean equals(Object obj) { 102 | if (this == obj) 103 | return true; 104 | if (obj == null) 105 | return false; 106 | if (getClass() != obj.getClass()) 107 | return false; 108 | AggregatedData other = (AggregatedData) obj; 109 | if (clickCount != other.clickCount) 110 | return false; 111 | if (impressionCount != other.impressionCount) 112 | return false; 113 | if (opportunityId == null) { 114 | if (other.opportunityId != null) 115 | return false; 116 | } else if (!opportunityId.equals(other.opportunityId)) 117 | return false; 118 | if (proposalId == null) { 119 | if (other.proposalId != null) 120 | return false; 121 | } else if (!proposalId.equals(other.proposalId)) 122 | return false; 123 | return true; 124 | } 125 | 126 | 127 | 128 | } 129 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/model/SFConfig.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.model; 2 | 3 | import java.io.Serializable; 4 | 5 | import org.apache.commons.lang3.StringUtils; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | import com.google.appengine.repackaged.com.google.gson.Gson; 10 | import com.google.appengine.repackaged.com.google.gson.GsonBuilder; 11 | import com.google.cloud.dataflow.sdk.coders.DefaultCoder; 12 | import com.google.cloud.dataflow.sdk.coders.SerializableCoder; 13 | import com.google.cloud.dataflow.sdk.options.PipelineOptions; 14 | import com.google.wave.prototype.dataflow.util.FileUtil; 15 | import com.google.wave.prototype.dataflow.util.SFConstants; 16 | import com.sforce.soap.enterprise.EnterpriseConnection; 17 | import com.sforce.soap.partner.Connector; 18 | import com.sforce.soap.partner.PartnerConnection; 19 | import com.sforce.ws.ConnectionException; 20 | import com.sforce.ws.ConnectorConfig; 21 | 22 | /** 23 | * Holds the configuration which will be used by SFSource 24 | * Fetches Salesforce user credentials by reading the configuration file present in GS or local 25 | * A config file will have the below content 26 | * { 27 | * "userId": , 28 | * "password": 29 | * } 30 | */ 31 | @DefaultCoder(SerializableCoder.class) 32 | public class SFConfig implements Serializable { 33 | private static final long serialVersionUID = -5569745252294105529L; 34 | 35 | private static final Logger LOG = LoggerFactory.getLogger(SFConfig.class); 36 | 37 | private String userId; 38 | private String password; 39 | 40 | public static SFConfig getInstance(String configFileLocation, PipelineOptions options) throws Exception { 41 | validate(configFileLocation); 42 | // Content will be in JSON 43 | // So constructing SFConfig bean using GSON 44 | String json = FileUtil.getContent(configFileLocation, options); 45 | Gson gson = new GsonBuilder().create(); 46 | // Unmarshalling file content into SFConfig 47 | return gson.fromJson(json, SFConfig.class); 48 | } 49 | 50 | public String getUserId() { 51 | return userId; 52 | } 53 | 54 | public String getPassword() { 55 | return password; 56 | } 57 | 58 | public PartnerConnection createPartnerConnection() throws Exception { 59 | ConnectorConfig config = new ConnectorConfig(); 60 | LOG.debug("Connecting SF Partner Connection using " + getUserId()); 61 | config.setUsername(getUserId()); 62 | config.setPassword(getPassword()); 63 | 64 | try { 65 | return Connector.newConnection(config); 66 | } catch (ConnectionException ce) { 67 | LOG.error("Exception while creating connection", ce); 68 | throw new Exception(ce); 69 | } 70 | } 71 | 72 | public EnterpriseConnection createEnterpriseConnection() throws Exception { 73 | ConnectorConfig config = new ConnectorConfig(); 74 | LOG.debug("Connecting SF Partner Connection using " + getUserId()); 75 | config.setUsername(getUserId()); 76 | config.setPassword(getPassword()); 77 | 78 | try { 79 | return com.sforce.soap.enterprise.Connector.newConnection(config); 80 | } catch (ConnectionException ce) { 81 | LOG.error("Exception while creating connection", ce); 82 | throw new Exception(ce); 83 | } 84 | } 85 | 86 | private static void validate(String configFileLocation) throws Exception { 87 | // Checking whether the file is provided in proper format 88 | // GS file should start with gs:// 89 | // local file should start with file:// 90 | if (!StringUtils.isEmpty(configFileLocation)) { 91 | if (configFileLocation.startsWith(SFConstants.GS_FILE_PREFIX) || 92 | configFileLocation.startsWith(SFConstants.LOCAL_FILE_PREFIX)) { 93 | return; 94 | } 95 | } 96 | 97 | // Provided configFileLocation is not valid 98 | // Stopping the Job 99 | throw new Exception("Invalid Configuration file " + configFileLocation); 100 | } 101 | 102 | } 103 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/model/SFReferenceData.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.model; 2 | 3 | import java.io.Serializable; 4 | 5 | /** 6 | * POJO containing Salesforce reference data 7 | */ 8 | public class SFReferenceData implements Serializable { 9 | private static final long serialVersionUID = -7597520654419284165L; 10 | 11 | private String accountId; 12 | private String opportunityId; 13 | private String proposalId; 14 | 15 | public SFReferenceData(String accountId, String opportunityId, 16 | String proposalId) { 17 | super(); 18 | this.accountId = accountId; 19 | this.opportunityId = opportunityId; 20 | this.proposalId = proposalId; 21 | } 22 | 23 | public String getAccountId() { 24 | return accountId; 25 | } 26 | 27 | public void setAccountId(String accountId) { 28 | this.accountId = accountId; 29 | } 30 | 31 | public String getOpportunityId() { 32 | return opportunityId; 33 | } 34 | 35 | public void setOpportunityId(String opportunityId) { 36 | this.opportunityId = opportunityId; 37 | } 38 | 39 | public String getProposalId() { 40 | return proposalId; 41 | } 42 | 43 | public void setProposalId(String proposalId) { 44 | this.proposalId = proposalId; 45 | } 46 | 47 | @Override 48 | public String toString() { 49 | return accountId + ","+ opportunityId + "," + proposalId; 50 | } 51 | 52 | @Override 53 | public int hashCode() { 54 | final int prime = 31; 55 | int result = 1; 56 | result = prime * result 57 | + ((accountId == null) ? 0 : accountId.hashCode()); 58 | result = prime * result 59 | + ((opportunityId == null) ? 0 : opportunityId.hashCode()); 60 | result = prime * result 61 | + ((proposalId == null) ? 0 : proposalId.hashCode()); 62 | return result; 63 | } 64 | 65 | @Override 66 | public boolean equals(Object obj) { 67 | if (this == obj) 68 | return true; 69 | if (obj == null) 70 | return false; 71 | if (getClass() != obj.getClass()) 72 | return false; 73 | SFReferenceData other = (SFReferenceData) obj; 74 | if (accountId == null) { 75 | if (other.accountId != null) 76 | return false; 77 | } else if (!accountId.equals(other.accountId)) 78 | return false; 79 | if (opportunityId == null) { 80 | if (other.opportunityId != null) 81 | return false; 82 | } else if (!opportunityId.equals(other.opportunityId)) 83 | return false; 84 | if (proposalId == null) { 85 | if (other.proposalId != null) 86 | return false; 87 | } else if (!proposalId.equals(other.proposalId)) 88 | return false; 89 | return true; 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/model/SFWaveWriteResult.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.model; 2 | 3 | import java.io.Serializable; 4 | 5 | /** 6 | * WriteResult class 7 | * This just holds the Salesforce object Id of the persisted data 8 | */ 9 | public class SFWaveWriteResult implements Serializable { 10 | private static final long serialVersionUID = -7451739773848100070L; 11 | 12 | private String sfObjId; 13 | 14 | public SFWaveWriteResult(String sfObjId) { 15 | this.sfObjId = sfObjId; 16 | } 17 | 18 | public String getSfObjId() { 19 | return sfObjId; 20 | } 21 | 22 | @Override 23 | public int hashCode() { 24 | final int prime = 31; 25 | int result = 1; 26 | result = prime * result + ((sfObjId == null) ? 0 : sfObjId.hashCode()); 27 | return result; 28 | } 29 | 30 | @Override 31 | public boolean equals(Object obj) { 32 | if (this == obj) 33 | return true; 34 | if (obj == null) 35 | return false; 36 | if (getClass() != obj.getClass()) 37 | return false; 38 | SFWaveWriteResult other = (SFWaveWriteResult) obj; 39 | if (sfObjId == null) { 40 | if (other.sfObjId != null) 41 | return false; 42 | } else if (!sfObjId.equals(other.sfObjId)) 43 | return false; 44 | return true; 45 | } 46 | 47 | 48 | } -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/pipeline/AdDataJob.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.pipeline; 2 | 3 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_CLICKS; 4 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_IMPRESSIONS; 5 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_OPPORTUNITY_ID; 6 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_PROPOSAL_ID; 7 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_TYPE_INTEGER; 8 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_TYPE_STRING; 9 | 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | import com.google.api.services.bigquery.model.TableFieldSchema; 14 | import com.google.api.services.bigquery.model.TableRow; 15 | import com.google.api.services.bigquery.model.TableSchema; 16 | import com.google.cloud.dataflow.sdk.Pipeline; 17 | import com.google.cloud.dataflow.sdk.io.BigQueryIO; 18 | import com.google.cloud.dataflow.sdk.io.TextIO; 19 | import com.google.cloud.dataflow.sdk.options.Default; 20 | import com.google.cloud.dataflow.sdk.options.PipelineOptions; 21 | import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; 22 | import com.google.cloud.dataflow.sdk.options.Validation; 23 | import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner; 24 | import com.google.cloud.dataflow.sdk.transforms.ParDo; 25 | import com.google.cloud.dataflow.sdk.transforms.View; 26 | import com.google.cloud.dataflow.sdk.values.PCollection; 27 | import com.google.cloud.dataflow.sdk.values.PCollectionView; 28 | import com.google.wave.prototype.dataflow.coder.AggregateDataCoder; 29 | import com.google.wave.prototype.dataflow.function.AggregateDataEnricher; 30 | import com.google.wave.prototype.dataflow.function.CSVFormatter; 31 | import com.google.wave.prototype.dataflow.function.TableRowFormatter; 32 | import com.google.wave.prototype.dataflow.model.AggregatedData; 33 | import com.google.wave.prototype.dataflow.model.SFConfig; 34 | import com.google.wave.prototype.dataflow.sf.SFWaveDatasetWriter; 35 | import com.google.wave.prototype.dataflow.transform.AggregateEvents; 36 | import com.google.wave.prototype.dataflow.transform.SFWaveWrite; 37 | 38 | /** 39 | * Google Dataflow Job 40 | * 1. Reads the raw Ad Data from Google cloud storage 41 | * 2. Reads Salesforce Reference data from Google BigQuery 42 | * 3. Enrich Ad Data using Salesforce Reference data 43 | * 4. Publish the Enriched data into Salesforce Wave and Google BigQuery 44 | * To execute, provide the following configuration 45 | * --project=YOUR_PROJECT_ID 46 | * --stagingLocation=YOUR_STAGING_LOCATON 47 | * --inputCSV=GCS_LOCATION_OF_YOUR_RAW_AD_DATA 48 | * --inputTable=GOOGLE_BIGQUERY_TABLE_CONTAINING_SALESFORCE_REFERENCE_DATA 49 | * --output=GOOGLE_BIGQUERY_TABLE_TO_WHICH_ENRICHED_DATA_HAS_TO_BE_ADDED 50 | * --dataset=SALESFORCE WAVE DATASET 51 | * --sfMetadataFileLocation=GCS_LOCATION_OF_SALESFORCE_METADATA_FILE 52 | * --sfConfigFileLocation=GCS_LOCATION_OF_SALESFORCE_CONFIG_FILE 53 | */ 54 | public class AdDataJob { 55 | public static interface Options extends PipelineOptions { 56 | @Default.String("gs://sam-bucket1/SampleAdData/ad-server-data1.csv") 57 | String getInputCSV(); 58 | void setInputCSV(String value); 59 | 60 | @Default.String("ace-scarab-94723:SFDCReferenceData.SFRef") 61 | String getInputTable(); 62 | void setInputTable(String value); 63 | 64 | @Validation.Required 65 | @Default.String("ace-scarab-94723:SFDCReferenceData.EnrichedSample") 66 | String getOutput(); 67 | void setOutput(String value); 68 | 69 | @Default.String("SampleAdDataSet") 70 | String getDataset(); 71 | void setDataset(String dataset); 72 | 73 | @Default.String("gs://sam-bucket1/SampleAdData/metadata.json") 74 | String getSfMetadataFileLocation(); 75 | void setSfMetadataFileLocation(String sfMetadataFileLocation); 76 | 77 | @Default.String("gs://sam-bucket1/config/sf_source_config.json") 78 | String getSfConfigFileLocation(); 79 | void setSfConfigFileLocation(String sfConfigFileLocation); 80 | } 81 | 82 | private static TableSchema getSchema() { 83 | List fields = new ArrayList<>(); 84 | fields.add(constructTableFieldSchema(COL_PROPOSAL_ID, COL_TYPE_STRING)); 85 | fields.add(constructTableFieldSchema(COL_OPPORTUNITY_ID, COL_TYPE_STRING)); 86 | fields.add(constructTableFieldSchema(COL_CLICKS, COL_TYPE_INTEGER)); 87 | fields.add(constructTableFieldSchema(COL_IMPRESSIONS, COL_TYPE_INTEGER)); 88 | 89 | TableSchema tableSchema = new TableSchema().setFields(fields); 90 | tableSchema.setFields(fields); 91 | return tableSchema; 92 | } 93 | 94 | private static TableFieldSchema constructTableFieldSchema(String name, String type) { 95 | TableFieldSchema tableFieldSchema = new TableFieldSchema(); 96 | tableFieldSchema.setName(name); 97 | tableFieldSchema.setType(type); 98 | 99 | return tableFieldSchema; 100 | } 101 | 102 | private static List getEnrichedTableColumns() { 103 | List columns = new ArrayList(4); 104 | 105 | columns.add(COL_PROPOSAL_ID); 106 | columns.add(COL_OPPORTUNITY_ID); 107 | columns.add(COL_CLICKS); 108 | columns.add(COL_IMPRESSIONS); 109 | 110 | return columns; 111 | } 112 | 113 | private static SFWaveDatasetWriter createSFWaveDatasetWriter(AdDataJob.Options options) throws Exception { 114 | SFConfig sfConfig = SFConfig.getInstance(options.getSfConfigFileLocation(), options); 115 | return new SFWaveDatasetWriter(sfConfig, options.getDataset()); 116 | } 117 | 118 | public static void main(String[] args) throws Exception { 119 | // Helper if command line options are not provided 120 | if (args.length < 2) { 121 | args = new String[2]; 122 | args[0] = "--project=ace-scarab-94723"; 123 | args[1] = "--stagingLocation=gs://sam-bucket1/staging"; 124 | } 125 | 126 | Options options = PipelineOptionsFactory.fromArgs(args) 127 | .withValidation().as(Options.class); 128 | // Always executing using BlockingDataflowPipelineRunner 129 | options.setRunner(BlockingDataflowPipelineRunner.class); 130 | Pipeline p = Pipeline.create(options); 131 | 132 | // Reading the CSV present in GCS 133 | PCollection aggregated = p.apply(TextIO.Read.from(options.getInputCSV())) 134 | .apply(new AggregateEvents()) 135 | .setCoder(AggregateDataCoder.getInstance()); 136 | 137 | // Reading Salesforce reference data from Google BigQuery 138 | PCollection tableColl = p.apply(BigQueryIO.Read.from(options.getInputTable())); 139 | final PCollectionView> sideInput = tableColl.apply(View.asIterable()); 140 | // Salesforce Reference data passed as sideInput 141 | PCollection enriched = aggregated 142 | .apply(ParDo.withSideInputs(sideInput) 143 | .of((new AggregateDataEnricher(sideInput)))) 144 | .setCoder(AggregateDataCoder.getInstance()); 145 | 146 | // Converting into CSV 147 | PCollection enrichedCSV = enriched.apply(ParDo.of(new CSVFormatter())); 148 | // Writing the results into Salesforce Wave 149 | enrichedCSV 150 | .apply(new SFWaveWrite(createSFWaveDatasetWriter(options), options.getSfMetadataFileLocation())); 151 | 152 | // Populated BigQuery with enriched data 153 | enrichedCSV 154 | .apply(ParDo.of(new TableRowFormatter(getEnrichedTableColumns()))) 155 | .apply(BigQueryIO.Write 156 | .to(options.getOutput()) 157 | .withSchema(getSchema()) 158 | .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) 159 | .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)); 160 | p.run(); 161 | } 162 | 163 | } 164 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/pipeline/SFReferenceDataJob.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.pipeline; 2 | 3 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_ACCOUNT_ID; 4 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_OPPORTUNITY_ID; 5 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_PROPOSAL_ID; 6 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_TYPE_STRING; 7 | 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | 11 | import com.google.api.services.bigquery.model.TableFieldSchema; 12 | import com.google.api.services.bigquery.model.TableSchema; 13 | import com.google.cloud.dataflow.sdk.Pipeline; 14 | import com.google.cloud.dataflow.sdk.io.BigQueryIO; 15 | import com.google.cloud.dataflow.sdk.options.Default; 16 | import com.google.cloud.dataflow.sdk.options.Description; 17 | import com.google.cloud.dataflow.sdk.options.PipelineOptions; 18 | import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; 19 | import com.google.cloud.dataflow.sdk.options.Validation; 20 | import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner; 21 | import com.google.cloud.dataflow.sdk.transforms.Create; 22 | import com.google.cloud.dataflow.sdk.transforms.ParDo; 23 | import com.google.wave.prototype.dataflow.function.TableRowFormatter; 24 | import com.google.wave.prototype.dataflow.model.SFConfig; 25 | import com.google.wave.prototype.dataflow.sf.SFSOQLExecutor; 26 | import com.google.wave.prototype.dataflow.transform.SFRead; 27 | 28 | /** 29 | * Google Dataflow Job 30 | * 1. Read Salesforce Reference Data using {@link SFRead} 31 | * 2. Populate Google BigQuery Table with Salesforce Reference Data 32 | * To execute, provide the following configuration 33 | * --project=YOUR_PROJECT_ID 34 | * --stagingLocation=YOUR_STAGING_LOCATON 35 | * --output=GOOGLE_BIGQUERY_TABLE_TO_WHICH_SALESFORCE_REFERENCE_DATA_WILL_BE_POPULATED 36 | * --sfConfigFileLocation=GCS_LOCATION_OF_SALESFORCE_CONFIG_FILE 37 | * --sfQuery=SALESFORCE_SOQL_TO_FETCH_SALESFORCE_REFERENCE_DATA 38 | */ 39 | public class SFReferenceDataJob { 40 | 41 | private static interface Options extends PipelineOptions { 42 | @Description("BigQuery table to write to, specified as " 43 | + ":.. The dataset must already exist.") 44 | @Validation.Required 45 | String getOutput(); 46 | void setOutput(String value); 47 | 48 | @Default.String("gs://sam-bucket1/config/sf_source_config.json") 49 | String getSfConfigFileLocation(); 50 | void setSfConfigFileLocation(String sfConfigFileLocation); 51 | 52 | @Default.String("SELECT AccountId, Id, ProposalID__c FROM Opportunity where ProposalID__c != null") 53 | String getSfQuery(); 54 | void setSfQuery(String sfQuery); 55 | } 56 | 57 | private static TableSchema getSchema() { 58 | List fields = new ArrayList<>(); 59 | 60 | fields.add(constructTableFieldSchema(COL_ACCOUNT_ID, COL_TYPE_STRING)); 61 | fields.add(constructTableFieldSchema(COL_OPPORTUNITY_ID, COL_TYPE_STRING)); 62 | fields.add(constructTableFieldSchema(COL_PROPOSAL_ID, COL_TYPE_STRING)); 63 | 64 | TableSchema schema = new TableSchema().setFields(fields); 65 | return schema; 66 | } 67 | 68 | private static TableFieldSchema constructTableFieldSchema(String name, String type) { 69 | TableFieldSchema tableFieldSchema = new TableFieldSchema(); 70 | 71 | tableFieldSchema.setName(name); 72 | tableFieldSchema.setType(type); 73 | 74 | return tableFieldSchema; 75 | } 76 | 77 | private static List getSFRefTableColumns() { 78 | List columns = new ArrayList(4); 79 | 80 | columns.add(COL_ACCOUNT_ID); 81 | columns.add(COL_OPPORTUNITY_ID); 82 | columns.add(COL_PROPOSAL_ID); 83 | 84 | return columns; 85 | } 86 | 87 | public static void main(String args[]) throws Exception { 88 | if (args.length < 3) { 89 | args = new String[3]; 90 | args[0] = "--project=ace-scarab-94723"; 91 | args[1] = "--stagingLocation=gs://sam-bucket1/staging"; 92 | args[2] = "--output=ace-scarab-94723:SFDCReferenceData.SFRef"; 93 | } 94 | 95 | Options options = PipelineOptionsFactory.fromArgs(args) 96 | .withValidation().as(Options.class); 97 | options.setRunner(BlockingDataflowPipelineRunner.class); 98 | Pipeline p = Pipeline.create(options); 99 | 100 | // SFSOQLExecutor which will be used to execute SOQL query 101 | // SFConfig which will be used to create Salesforce Connection 102 | SFSOQLExecutor soqlExecutor = new SFSOQLExecutor(SFConfig.getInstance(options.getSfConfigFileLocation(), options)); 103 | 104 | // Executing pipeline 105 | p.apply(Create.of(options.getSfQuery())) 106 | // Reading from Salesforce 107 | .apply(new SFRead(soqlExecutor)) 108 | // Convert to TableRow 109 | .apply(ParDo.of(new TableRowFormatter(getSFRefTableColumns()))) 110 | // Wiring into BigQuery 111 | .apply(BigQueryIO.Write 112 | .to(options.getOutput()) 113 | .withSchema(getSchema()) 114 | .withCreateDisposition( 115 | BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) 116 | .withWriteDisposition( 117 | // Since all data are fetched from Salesforce, 118 | // we need to overwrite the existing data 119 | BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); 120 | p.run(); 121 | } 122 | 123 | } 124 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/sf/SFSOQLExecutor.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.sf; 2 | 3 | import java.io.Serializable; 4 | import java.util.ArrayList; 5 | import java.util.Arrays; 6 | import java.util.List; 7 | 8 | import org.slf4j.Logger; 9 | import org.slf4j.LoggerFactory; 10 | 11 | import com.google.wave.prototype.dataflow.model.SFConfig; 12 | import com.google.wave.prototype.dataflow.transform.SFRead; 13 | import com.sforce.soap.enterprise.EnterpriseConnection; 14 | import com.sforce.soap.enterprise.QueryResult; 15 | import com.sforce.soap.enterprise.sobject.SObject; 16 | 17 | /** 18 | * Can be used to exeucte a SF SOQL Query 19 | * It will be executed using the credentials provided in {@link SFConfig} 20 | */ 21 | public class SFSOQLExecutor implements Serializable { 22 | private static final long serialVersionUID = 296485933905679924L; 23 | 24 | private static final Logger LOG = LoggerFactory.getLogger(SFRead.class); 25 | 26 | private SFConfig sfConfig; 27 | 28 | public SFSOQLExecutor(SFConfig sfConfig) { 29 | this.sfConfig = sfConfig; 30 | } 31 | 32 | public List executeQuery(String sfQuery) throws Exception { 33 | EnterpriseConnection connection = null; 34 | List records = new ArrayList(); 35 | 36 | try { 37 | connection = sfConfig.createEnterpriseConnection(); 38 | 39 | QueryResult result = connection.query(sfQuery); 40 | // First call results are added here 41 | records.addAll(Arrays.asList(result.getRecords())); 42 | String queryLocator = result.getQueryLocator(); 43 | LOG.info("Total number of records to be read :" + result.getSize()); 44 | 45 | // Salesforce will not return all the rows in a single shot if the result is huge 46 | // By default it will return 500 rows per call 47 | // To fetch further connection.queryMore is used 48 | // result.isDone() will tell you where all the records have been read 49 | boolean done = result.isDone(); 50 | while (!done) { 51 | result = connection.queryMore(queryLocator); 52 | records.addAll(Arrays.asList(result.getRecords())); 53 | 54 | done = result.isDone(); 55 | } 56 | } finally { 57 | if (connection != null) { 58 | connection.logout(); 59 | } 60 | } 61 | 62 | return records; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/sf/SFWaveDatasetWriter.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.sf; 2 | 3 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_ACTION; 4 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_ACTION_NONE; 5 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_ACTION_PROCESS; 6 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_CSV_FORMAT; 7 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_DATAFILE; 8 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_EDGEMART_ALIAS; 9 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_FORMAT; 10 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_INSIGHTS_EXTERNAL_DATA; 11 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_INSIGHTS_EXTERNAL_DATA_ID; 12 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_INSIGHTS_EXTERNAL_DATA_PART; 13 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_METADATA_JSON; 14 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_OPERATION; 15 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_OVERWRITE_OPERATION; 16 | import static com.google.wave.prototype.dataflow.util.SFConstants.STR_PART_NUMBER; 17 | 18 | import java.io.Serializable; 19 | 20 | import org.slf4j.Logger; 21 | import org.slf4j.LoggerFactory; 22 | 23 | import com.google.wave.prototype.dataflow.model.SFConfig; 24 | import com.sforce.soap.partner.Error; 25 | import com.sforce.soap.partner.PartnerConnection; 26 | import com.sforce.soap.partner.SaveResult; 27 | import com.sforce.soap.partner.sobject.SObject; 28 | 29 | /** 30 | * This can be used to write metadata and datasetData into SF Wave 31 | * 1. It creates connection using {@link SFConfig} 32 | * 2. Writes specified Metadata 33 | * 3. Writes Dataset data 34 | * 4. Finalize the write 35 | * This uses Salesforce SOAP API (Partner WSDL) 36 | */ 37 | public class SFWaveDatasetWriter implements Serializable { 38 | private static final long serialVersionUID = 5714980864384207026L; 39 | 40 | private static final Logger LOG = LoggerFactory.getLogger(SFWaveDatasetWriter.class); 41 | 42 | private SFConfig sfConfig; 43 | private String datasetName; 44 | 45 | public SFWaveDatasetWriter(SFConfig sfConfig, String datasetName) { 46 | this.sfConfig = sfConfig; 47 | this.datasetName = datasetName; 48 | } 49 | 50 | public String write(byte[] metadata, byte[] datasetData) throws Exception { 51 | PartnerConnection connection = null; 52 | try { 53 | connection = sfConfig.createPartnerConnection(); 54 | String parentId = publishMetaData(metadata, connection); 55 | publish(datasetData, parentId, connection); 56 | finalizeWavePublish(parentId, connection); 57 | 58 | return parentId; 59 | } finally { 60 | if (connection != null) { 61 | connection.logout(); 62 | } 63 | } 64 | } 65 | 66 | private void publish(byte[] content, String parentId, PartnerConnection connection) throws Exception { 67 | // Contents are being pushed here 68 | SObject dataSObject = new SObject(); 69 | dataSObject.setType(STR_INSIGHTS_EXTERNAL_DATA_PART); 70 | dataSObject.setField(STR_DATAFILE, content); 71 | LOG.trace("Writing this data into WAVE : " + new String(content)); 72 | dataSObject.setField(STR_INSIGHTS_EXTERNAL_DATA_ID, parentId); 73 | // Since the each bundle is max of 10 MB we will have only one part 74 | // Hence part number is always set to 1 75 | dataSObject.setField(STR_PART_NUMBER, 1); 76 | 77 | SaveResult[] dataPartPublishResults = connection.create(new SObject[] { dataSObject }); 78 | checkResults(dataPartPublishResults); 79 | } 80 | 81 | 82 | private void finalizeWavePublish(String parentId, PartnerConnection connection) throws Exception { 83 | SObject metaDataSObject = new SObject(); 84 | metaDataSObject.setType(STR_INSIGHTS_EXTERNAL_DATA); 85 | // Action set to process, which should finalize the DataPart published so on 86 | metaDataSObject.setField(STR_ACTION, STR_ACTION_PROCESS); 87 | // Using the Object Id during metadata publish 88 | metaDataSObject.setId(parentId); 89 | 90 | SaveResult[] metadataPublishResults = connection.update(new SObject[] {metaDataSObject}); 91 | checkResults(metadataPublishResults); 92 | } 93 | 94 | private String publishMetaData(byte[] metadata, PartnerConnection connection) throws Exception { 95 | // Metadata of a dataset is being published here 96 | SObject metadataSObject = new SObject(); 97 | metadataSObject.setType(STR_INSIGHTS_EXTERNAL_DATA); 98 | metadataSObject.setField(STR_FORMAT, STR_CSV_FORMAT); 99 | metadataSObject.setField(STR_EDGEMART_ALIAS, datasetName); 100 | metadataSObject.setField(STR_METADATA_JSON, metadata); 101 | metadataSObject.setField(STR_OPERATION, STR_OVERWRITE_OPERATION); 102 | // Action is None here. It will be Process only after all data part has been created 103 | metadataSObject.setField(STR_ACTION, STR_ACTION_NONE); 104 | 105 | SaveResult[] metadataPublishResults = connection.create(new SObject[] { metadataSObject }); 106 | return checkResults(metadataPublishResults); 107 | } 108 | 109 | private String checkResults(SaveResult[] publishResults) throws Exception { 110 | for (SaveResult publishResult : publishResults) { 111 | if (publishResult.isSuccess()) { 112 | LOG.debug("Flushed to wave : " + publishResult.getId()); 113 | return publishResult.getId(); 114 | } else { 115 | StringBuilder sfWaveErrMsg = new StringBuilder(); 116 | sfWaveErrMsg.append("Error while flushing data to wave.\n"); 117 | sfWaveErrMsg.append("Salesforce Job Id : " + publishResult.getId() + "\n"); 118 | sfWaveErrMsg.append("Salesforce error message : "); 119 | // Errors are concatenated to get a meaning message 120 | Error[] errors = publishResult.getErrors(); 121 | for (int i = 0; i < errors.length; i++) { 122 | sfWaveErrMsg.append(errors[i].getMessage()); 123 | } 124 | 125 | LOG.error(sfWaveErrMsg.toString()); 126 | 127 | // Stopping Job if publish fails 128 | throw new Exception(sfWaveErrMsg.toString()); 129 | } 130 | } 131 | 132 | return null; 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/transform/AggregateEvents.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.transform; 2 | 3 | import com.google.cloud.dataflow.sdk.transforms.DoFn; 4 | import com.google.cloud.dataflow.sdk.transforms.GroupByKey; 5 | import com.google.cloud.dataflow.sdk.transforms.PTransform; 6 | import com.google.cloud.dataflow.sdk.transforms.ParDo; 7 | import com.google.cloud.dataflow.sdk.values.KV; 8 | import com.google.cloud.dataflow.sdk.values.PCollection; 9 | import com.google.wave.prototype.dataflow.model.AggregatedData; 10 | import com.google.wave.prototype.dataflow.util.JobConstants; 11 | 12 | /** 13 | * Aggregate the AdData using the proposalId and event present in AdData CSV 14 | * AdData CSV data is with the below headers, 15 | * id,time,local_host,pixel_id,client_ip,request_url,cookie_id,event,version,success_code,proposal_id 16 | * In this event will be either click or Impression. There will be multiple rows with a single proposal_id 17 | * This PTransform will transform such rows into {@link AggregateEvents} 18 | */ 19 | public class AggregateEvents extends 20 | PTransform, PCollection> { 21 | private static final long serialVersionUID = 3238291110118750209L; 22 | 23 | @Override 24 | public PCollection apply(PCollection rawdata) { 25 | // Just selecting ProposalId and events 26 | PCollection> filteredData = rawdata.apply(ParDo 27 | .of(new FilterRawData())); 28 | // Grouping all events for a proposalId 29 | PCollection>> groupedData = filteredData 30 | .apply(GroupByKey. create()); 31 | // Counting the number of clicks and impressions for a proposalId 32 | return groupedData.apply(ParDo.of(new CountEvents())); 33 | } 34 | 35 | /** 36 | * Construct KV with proposalId as key and event as value for a given CSV Row (AdData) 37 | * CSV Row will be the input for this DoFn 38 | * Output will be a KV with proposal_id in the row as key and event in the row as value 39 | * For example, for the below input 40 | * 1,01-01-14 9:00,ip-10-150-38-122/10.150.38.122,0,70.209.198.223,http://sample.com,3232,Impression,3,1,101 41 | * output will be 42 | * KV.of(101, Impression) 43 | */ 44 | protected static class FilterRawData extends DoFn> { 45 | private static final long serialVersionUID = 6002612407682561915L; 46 | private static int COL_PROPOSAL_ID = 10; 47 | private static int COL_EVENT = 7; 48 | 49 | @Override 50 | public void processElement( 51 | DoFn>.ProcessContext c) 52 | throws Exception { 53 | // CSVRow will be like 54 | // id,time,local_host,pixel_id,client_ip,request_url,cookie_id,event,version,success_code,proposal_id 55 | // Column 7 and 10. i.e. event and proposal_id 56 | String csvRow = c.element(); 57 | String[] columns = csvRow.split(JobConstants.STR_COMMA); 58 | // Result will be KV with proposal_id as key and event as value 59 | c.output(KV.of(columns[COL_PROPOSAL_ID], columns[COL_EVENT])); 60 | } 61 | 62 | } 63 | 64 | /** 65 | * Count the number of clicks and number of Impressions for a specific ProposalId 66 | * Input for this DoFn will be KV with key as proposalId and value as events. Like, 67 | * KV(101, ("Impression", "Impression", "Click") 68 | * Output will be {@link AggregateEvents} with the proposalId and number of clicks and Impressions 69 | */ 70 | public static class CountEvents extends 71 | DoFn>, AggregatedData> { 72 | private static final long serialVersionUID = 6002612407682561915L; 73 | private static final String STR_IMPRESSION = "impression"; 74 | private static final String STR_CLICK = "click"; 75 | 76 | @Override 77 | public void processElement( 78 | DoFn>, AggregatedData>.ProcessContext c) 79 | throws Exception { 80 | // Element will be like, 81 | // KV(101, ("Impression", "Impression", "Click") 82 | KV> proposalIdEventsKV = c.element(); 83 | // Getting the events alone 84 | // ("Impression", "Impression", "Click") 85 | Iterable events = proposalIdEventsKV.getValue(); 86 | int clicks = 0; 87 | int impressions = 0; 88 | // Iterating events and increasing the click and impression count 89 | for (String event : events) { 90 | if (event.equalsIgnoreCase(STR_IMPRESSION)) { 91 | impressions++; 92 | } else if (event.equalsIgnoreCase(STR_CLICK)) { 93 | clicks++; 94 | } 95 | } 96 | 97 | // Constructing new AggregatedData with proposalId, Click Count and Impression Count 98 | c.output(new AggregatedData(proposalIdEventsKV.getKey(), clicks, impressions)); 99 | } 100 | } 101 | } -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/transform/SFRead.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.transform; 2 | 3 | import java.util.List; 4 | 5 | import com.google.cloud.dataflow.sdk.transforms.DoFn; 6 | import com.google.cloud.dataflow.sdk.transforms.GroupByKey; 7 | import com.google.cloud.dataflow.sdk.transforms.PTransform; 8 | import com.google.cloud.dataflow.sdk.transforms.ParDo; 9 | import com.google.cloud.dataflow.sdk.values.KV; 10 | import com.google.cloud.dataflow.sdk.values.PCollection; 11 | import com.google.wave.prototype.dataflow.sf.SFSOQLExecutor; 12 | import com.google.wave.prototype.dataflow.util.CSVUtil; 13 | import com.sforce.soap.enterprise.sobject.SObject; 14 | 15 | /** 16 | * PTransform to read the Salesforce object using SOQL 17 | * SOQL query present in pipeline will be executed and the result will be converted into CSV 18 | * This uses Salesforce SOAP API (Enterprise.wsdl) to execute SOQL 19 | * A Sample SOQL will look like, 20 | * SELECT AccountId, Id FROM Opportunity 21 | */ 22 | public final class SFRead extends PTransform, PCollection>{ 23 | private static final long serialVersionUID = -7168554842895484301L; 24 | 25 | private final int noOfBundles; 26 | private final SFSOQLExecutor soqlExecutor; 27 | 28 | public SFRead(SFSOQLExecutor soqlExecutor) { 29 | // Default to 10 30 | this.noOfBundles = 10; 31 | this.soqlExecutor = soqlExecutor; 32 | } 33 | 34 | public SFRead(SFSOQLExecutor soqlExecutor, int noOfBundles) { 35 | this.noOfBundles = noOfBundles; 36 | this.soqlExecutor = soqlExecutor; 37 | } 38 | 39 | @Override 40 | public PCollection apply(PCollection input) { 41 | return input 42 | // Executing SOQL Query 43 | .apply(ParDo.of(new ExecuteSOQL(soqlExecutor, noOfBundles))) 44 | // Creating bundles based on the key 45 | // Key will be hash modulo 46 | .apply(GroupByKey.create()) 47 | .apply(ParDo.of(new RegroupRecords())); 48 | } 49 | 50 | /** 51 | * Splitting the grouped data as individual records 52 | */ 53 | private class RegroupRecords extends DoFn>, String> { 54 | private static final long serialVersionUID = -2126735721477220174L; 55 | 56 | @Override 57 | public void processElement( 58 | DoFn>, String>.ProcessContext c) 59 | throws Exception { 60 | // Adding the result as individual Salesforce Data 61 | Iterable sfRefData = c.element().getValue(); 62 | for (String csvRow : sfRefData) { 63 | c.output(csvRow); 64 | } 65 | } 66 | 67 | } 68 | 69 | /** 70 | * Executes SOQL Query and provides the result as CSV in bundles 71 | * Result of the SOQL query will be converted into CSV 72 | * Bundles will be created according to the noOfBundles specified 73 | */ 74 | public static class ExecuteSOQL extends DoFn> { 75 | private static final long serialVersionUID = 3227568229914179295L; 76 | 77 | private int noOfBundles; 78 | private SFSOQLExecutor soqlExecutor; 79 | 80 | public ExecuteSOQL(SFSOQLExecutor soqlExecutor, int noOfBundles) { 81 | this.soqlExecutor = soqlExecutor; 82 | this.noOfBundles = noOfBundles; 83 | } 84 | 85 | @Override 86 | public void processElement( 87 | DoFn>.ProcessContext c) 88 | throws Exception { 89 | String sfQuery = c.element(); 90 | // Execute SOQL 91 | List sfResults = soqlExecutor.executeQuery(sfQuery); 92 | // Convert to CSV 93 | CSVUtil csvUtil = new CSVUtil(sfQuery); 94 | for (int i = 0, size = sfResults.size(); i < size; i++) { 95 | String csvRow = csvUtil.getAsCSV(sfResults.get(i)); 96 | // Getting hash Modulo 97 | int hashModulo = Math.abs(csvRow.hashCode() % noOfBundles); 98 | c.output(KV.of(hashModulo, csvRow)); 99 | } 100 | } 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/transform/SFWaveWrite.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.transform; 2 | 3 | import org.slf4j.Logger; 4 | import org.slf4j.LoggerFactory; 5 | 6 | import com.google.cloud.dataflow.sdk.options.PipelineOptions; 7 | import com.google.cloud.dataflow.sdk.transforms.Combine; 8 | import com.google.cloud.dataflow.sdk.transforms.DoFn; 9 | import com.google.cloud.dataflow.sdk.transforms.GroupByKey; 10 | import com.google.cloud.dataflow.sdk.transforms.PTransform; 11 | import com.google.cloud.dataflow.sdk.transforms.ParDo; 12 | import com.google.cloud.dataflow.sdk.transforms.Sum; 13 | import com.google.cloud.dataflow.sdk.transforms.View; 14 | import com.google.cloud.dataflow.sdk.values.KV; 15 | import com.google.cloud.dataflow.sdk.values.PCollection; 16 | import com.google.cloud.dataflow.sdk.values.PCollectionView; 17 | import com.google.wave.prototype.dataflow.model.SFConfig; 18 | import com.google.wave.prototype.dataflow.model.SFWaveWriteResult; 19 | import com.google.wave.prototype.dataflow.sf.SFWaveDatasetWriter; 20 | import com.google.wave.prototype.dataflow.util.FileUtil; 21 | 22 | /** 23 | * PTransform to write the dataset content into SF Wave This uses Salesforce 24 | * SOAP API (Partner WSDL) to publish data into Salesforce Wave This PTransform 25 | * requires the following input {@link SFWaveDatasetWriter} - Writer with 26 | * {@link SFConfig} which will be used by this transform sfMetadataFileLocation 27 | * - A Salesforce wave metadata file describing the data to be published to wave 28 | * Can be a local file or GS file Refer 29 | * https://resources.docs.salesforce.com/sfdc 30 | * /pdf/bi_dev_guide_ext_data_format.pdf 31 | */ 32 | public class SFWaveWrite extends 33 | PTransform, PCollection> { 34 | private static final long serialVersionUID = 5830880169795002498L; 35 | private static final Logger LOG = LoggerFactory 36 | .getLogger(SFWaveWrite.class); 37 | 38 | private final SFWaveDatasetWriter writer; 39 | private final String sfMetadataFileLocation; 40 | 41 | public SFWaveWrite(SFWaveDatasetWriter writer, String sfMetadataFileLocation) { 42 | this.writer = writer; 43 | this.sfMetadataFileLocation = sfMetadataFileLocation; 44 | } 45 | 46 | @Override 47 | public PCollection apply(PCollection rowData) { 48 | LOG.debug("SFWaveWrite starts"); 49 | // Number of bundles calculated here 50 | PCollection noOfBundles = rowData 51 | .apply(new CalculateNoOfBundles()); 52 | PCollectionView sideInput = noOfBundles.apply(View 53 | . asSingleton()); 54 | // Making KV with hash modulo as key and CSV row as value 55 | PCollection> kvData = rowData 56 | .apply(ParDo.withSideInputs(sideInput).of( 57 | new DistributeRowData(sideInput))); 58 | // Creating bundles using GroupByKey 59 | PCollection>> groupedRows = kvData 60 | .apply(GroupByKey. create()); 61 | // Writing Data into Salesforce Wave 62 | PCollection writeResult = groupedRows.apply(ParDo 63 | .of(new Write(writer, sfMetadataFileLocation))); 64 | 65 | LOG.debug("SFWaveWrite ends"); 66 | return writeResult; 67 | } 68 | 69 | /** 70 | * Calculates the Number of bundles to be created Calculation is based on 71 | * the size of the data to be sent to Salesforce Wave Size of the data is 72 | * calculated using {@code String.length()} and then {@code Sum.SumLongFn} 73 | */ 74 | public static class CalculateNoOfBundles extends 75 | PTransform, PCollection> { 76 | private static final long serialVersionUID = -7383871712471335638L; 77 | private static final String INDIVIDUAL_SIZE_PAR_DO_NAME = "IndividualSize"; 78 | private static final String NO_OF_BUNDLES_PAR_DO_NAME = "NoOfBundles"; 79 | 80 | @Override 81 | public PCollection apply(PCollection input) { 82 | return input.apply(ParDo.named(INDIVIDUAL_SIZE_PAR_DO_NAME).of( 83 | 84 | new DoFn() { 85 | private static final long serialVersionUID = -6374354958403597940L; 86 | 87 | @Override 88 | public void processElement(ProcessContext c) throws Exception { 89 | // String.length is used to get the size of data for an 90 | // individual row 91 | // As further grouping takes place, the additional size for 92 | // UTF-16 characters are ignored 93 | String rowToBePersisted = c.element(); 94 | c.output(Integer.valueOf(rowToBePersisted.length()) 95 | .longValue()); 96 | } 97 | })) 98 | // Calculating the total size of the data to be persisted into 99 | // Salesforce Wave 100 | .apply(Combine.globally(new Sum.SumLongFn())) 101 | // Number of bundles calculated based on the size of data 102 | .apply(ParDo.named(NO_OF_BUNDLES_PAR_DO_NAME).of( 103 | new BundleCount())); 104 | } 105 | } 106 | 107 | /** 108 | * Count the number of bundles to be created Number of bundles to be created 109 | * is based on the size of the data to be persisted into Salesforce wave At 110 | * a max Saleforce can accept 10MB So size of a bundle should not be more 111 | * than 10MB 112 | */ 113 | public static class BundleCount extends DoFn { 114 | private static final long serialVersionUID = -7446604319456830150L; 115 | 116 | @Override 117 | public void processElement(DoFn.ProcessContext c) 118 | throws Exception { 119 | // No of Bundles = totalSize / (1024 * 1024 * 10) 120 | // 1024 * 1024 is to convert into MB 121 | // Maximum support in Salesforce Wave API is 10 MB 122 | // For example, if the size of the data is 335544320, then 33 123 | // bundles will be created 124 | // Math.round(335544320/(1024 * 1024 * 10)) + 1 = 33 125 | Long totalDataSize = c.element(); 126 | Long maxBundleSize = 1024 * 1024 * 10l; 127 | if (totalDataSize > maxBundleSize) { 128 | c.output(Math.round(totalDataSize / maxBundleSize) + 1); 129 | } else { 130 | // As the size less than 10MB the data can be handled in single 131 | // bundle itself 132 | c.output(1); 133 | } 134 | } 135 | 136 | } 137 | 138 | /** 139 | * Distributes the data evenly to bundles If the data is of size 32 MB then 140 | * data will be distributed to 4 bundles of 8MB each 141 | */ 142 | public static class DistributeRowData extends 143 | DoFn> { 144 | private static final long serialVersionUID = 3917848069436988535L; 145 | private PCollectionView noOfBundlesPCol; 146 | 147 | // Number of bundles is calculated in CalculateNoOfBundles and 148 | // provided here as sideInput 149 | public DistributeRowData(PCollectionView noOfBundles) { 150 | this.noOfBundlesPCol = noOfBundles; 151 | } 152 | 153 | @Override 154 | public void processElement( 155 | DoFn>.ProcessContext c) 156 | throws Exception { 157 | // Getting the number of bundles from sideInput 158 | Integer noOfBundles = c.sideInput(noOfBundlesPCol); 159 | String waveCSVData = c.element(); 160 | // Using hash modulo to evenly distribute data across bundles 161 | int hash = Math.abs(waveCSVData.hashCode() % noOfBundles); 162 | // Using the hash as key which can be grouped later to create 163 | // bundles 164 | c.output(KV.of(hash, waveCSVData)); 165 | } 166 | } 167 | 168 | /** 169 | * DoFn which takes care of writing the datasets into Salesforce Wave This 170 | * uses {@link SFWaveDatasetWriter} 171 | */ 172 | public static class Write extends 173 | DoFn>, SFWaveWriteResult> { 174 | private static final long serialVersionUID = -1875427181542264934L; 175 | 176 | private final SFWaveDatasetWriter writer; 177 | private final String sfMetadataFileLocation; 178 | 179 | public Write(SFWaveDatasetWriter writer, String sfMetadataFileLocation) { 180 | this.writer = writer; 181 | this.sfMetadataFileLocation = sfMetadataFileLocation; 182 | } 183 | 184 | @Override 185 | public void processElement( 186 | DoFn>, SFWaveWriteResult>.ProcessContext c) 187 | throws Exception { 188 | 189 | // Converting the grouped records into bytes 190 | KV> groupedRecords = c.element(); 191 | Iterable csvRows = groupedRecords.getValue(); 192 | byte[] datasetData = getAsBytes(csvRows); 193 | 194 | String sfObjId = writer.write( 195 | getMetadataContent(c.getPipelineOptions()), datasetData); 196 | SFWaveWriteResult sfWaveWriteResult = new SFWaveWriteResult(sfObjId); 197 | c.output(sfWaveWriteResult); 198 | } 199 | 200 | private byte[] getMetadataContent(PipelineOptions options) 201 | throws Exception { 202 | String content = FileUtil.getContent(sfMetadataFileLocation, 203 | options); 204 | return content.getBytes(); 205 | } 206 | 207 | private byte[] getAsBytes(Iterable waveRows) { 208 | // Converting all CSV rows into single String which will be 209 | // published to Salesforce WAVE 210 | StringBuilder csvRows = new StringBuilder(); 211 | // Row may be like 212 | // AcccountId,OpportunityId,ClickCount,ImpressionCount 213 | for (String individualRow : waveRows) { 214 | csvRows.append(individualRow); 215 | csvRows.append('\n'); 216 | } 217 | 218 | return csvRows.toString().getBytes(); 219 | } 220 | 221 | } 222 | 223 | } 224 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/util/CSVUtil.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.util; 2 | 3 | import java.io.ByteArrayInputStream; 4 | import java.io.ByteArrayOutputStream; 5 | import java.io.IOException; 6 | import java.util.ArrayList; 7 | import java.util.HashMap; 8 | import java.util.List; 9 | import java.util.Map; 10 | 11 | import javax.xml.namespace.QName; 12 | import javax.xml.parsers.DocumentBuilder; 13 | import javax.xml.parsers.DocumentBuilderFactory; 14 | import javax.xml.parsers.ParserConfigurationException; 15 | 16 | import net.sf.jsqlparser.parser.CCJSqlParserUtil; 17 | import net.sf.jsqlparser.schema.Column; 18 | import net.sf.jsqlparser.statement.select.PlainSelect; 19 | import net.sf.jsqlparser.statement.select.Select; 20 | import net.sf.jsqlparser.statement.select.SelectExpressionItem; 21 | import net.sf.jsqlparser.statement.select.SelectItem; 22 | 23 | import org.slf4j.Logger; 24 | import org.slf4j.LoggerFactory; 25 | import org.w3c.dom.Document; 26 | import org.w3c.dom.Node; 27 | import org.w3c.dom.NodeList; 28 | import org.xml.sax.SAXException; 29 | 30 | import com.sforce.soap.enterprise.sobject.SObject; 31 | import com.sforce.ws.bind.TypeMapper; 32 | import com.sforce.ws.parser.XmlOutputStream; 33 | 34 | /** 35 | * Utility to convert Salesforce SObject into CSV 36 | * It requires SOQL to get the field queried from Salesforce 37 | */ 38 | public class CSVUtil { 39 | private static final Logger LOG = LoggerFactory.getLogger(CSVUtil.class); 40 | 41 | /** Columns queried from Salesforce */ 42 | private List columnNames = new ArrayList(); 43 | 44 | /** 45 | * @param soqlQuery - SOQL query used to fetch Salesforce Reference data 46 | * @throws Exception 47 | */ 48 | public CSVUtil(String soqlQuery) throws Exception { 49 | // Parsing the SOQL Query to get the columns queried from Salesforce 50 | Select stmt = (Select) CCJSqlParserUtil.parse(soqlQuery); 51 | PlainSelect plainSelect = (PlainSelect) stmt.getSelectBody(); 52 | // SelectedItems contains the column to be selected 53 | List selectItems = plainSelect.getSelectItems(); 54 | for (SelectItem selectItem : selectItems) { 55 | // We will get only columns as expressions are not supported 56 | Column column = (Column) ((SelectExpressionItem) selectItem).getExpression(); 57 | columnNames.add(column.getColumnName()); 58 | } 59 | 60 | LOG.debug("Columns from SOQL Query " + columnNames); 61 | } 62 | 63 | /** 64 | * @param sObject One of the result on executing SOQL Query 65 | * @return Converted CSV data from SObject 66 | * @throws Exception 67 | */ 68 | public String getAsCSV(SObject sObject) throws Exception { 69 | StringBuilder csv = new StringBuilder(); 70 | 71 | // Reading the SObject as XML Document 72 | Document doc = readDocument(sObject); 73 | // Reading the fields present in XML document 74 | Map fieldMap = readFields(doc); 75 | for (int i = 0, size = columnNames.size(); i < size; i++) { 76 | if (i != 0) { 77 | csv.append(','); 78 | } 79 | 80 | // Getting the corresponding value from the fieldMap using columns constructed from SOQL query 81 | String fieldValue = fieldMap.get(columnNames.get(i)); 82 | if (fieldValue != null) { 83 | csv.append(fieldValue); 84 | } 85 | } 86 | 87 | // Completing a row 88 | csv.append('\n'); 89 | 90 | LOG.debug("Returning CSV " + csv); 91 | return csv.toString(); 92 | } 93 | 94 | private Map readFields(Document doc) { 95 | // XML will be like 96 | // 97 | // 98 | // 1233 99 | // 1234 100 | // 101 101 | // 102 | // 103 | // Here doc is 104 | Node parentElement = doc.getChildNodes().item(0); 105 | // Here parentElement is 106 | NodeList childNodes = parentElement.getChildNodes(); 107 | // Child Nodes are , and 108 | Map fieldValueMap = new HashMap(); 109 | if (childNodes != null && childNodes.getLength() > 0) { 110 | for (int i = 0, size = childNodes.getLength(); i < size; i++) { 111 | Node item = childNodes.item(i); 112 | // Removing prefix as the column name present in SOQL will not have it 113 | // This nodename will be compared with fields queried in SOQL 114 | fieldValueMap.put(stripPrefix(item.getNodeName()), item.getTextContent()); 115 | } 116 | } 117 | 118 | return fieldValueMap; 119 | } 120 | 121 | private String stripPrefix(String nodeName) { 122 | return strip(nodeName, ':'); 123 | } 124 | 125 | private String strip(String str, char separator) { 126 | int aliasIndex = str.indexOf(separator); 127 | if (aliasIndex != -1) { 128 | return str.substring(aliasIndex + 1); 129 | } 130 | 131 | return str; 132 | } 133 | 134 | private Document readDocument(SObject sObject) throws Exception { 135 | ByteArrayInputStream bis = null; 136 | XmlOutputStream xmlOutputStream = null; 137 | 138 | try { 139 | // Getting the doc as 140 | // As Salesforce SOAP API is used converting to XML is the only option 141 | QName element = new QName("urn:sobject", "result"); 142 | ByteArrayOutputStream bos = new ByteArrayOutputStream(); 143 | 144 | xmlOutputStream = new XmlOutputStream(bos, false); 145 | xmlOutputStream.startDocument(); 146 | // Writes all the fields to outputStream 147 | sObject.write(element, xmlOutputStream, new TypeMapper()); 148 | xmlOutputStream.endDocument(); 149 | 150 | bis = new ByteArrayInputStream(bos.toByteArray()); 151 | // Converting it as DOM object 152 | DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); 153 | DocumentBuilder docBuilder = builderFactory.newDocumentBuilder(); 154 | return docBuilder.parse(bis); 155 | } catch (ParserConfigurationException | SAXException e) { 156 | throw new Exception(e); 157 | } finally { 158 | if (bis != null) { 159 | try { 160 | bis.close(); 161 | } catch (IOException ioe) { 162 | LOG.warn("Error while closing Stream", ioe); 163 | } 164 | 165 | if (xmlOutputStream != null) { 166 | // This will make sure the ByteArrayOutputStream provided is also closed 167 | try { 168 | xmlOutputStream.close(); 169 | } catch (IOException ioe) { 170 | LOG.warn("Error while closing Stream", ioe); 171 | } 172 | } 173 | } 174 | } 175 | } 176 | 177 | } 178 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/util/FileUtil.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.util; 2 | 3 | import java.io.File; 4 | 5 | import org.apache.commons.io.Charsets; 6 | import org.apache.commons.io.FileUtils; 7 | import org.apache.commons.lang3.StringUtils; 8 | 9 | import com.google.cloud.dataflow.sdk.options.PipelineOptions; 10 | 11 | /** 12 | * Simple Utility to read to the contents from file 13 | * File can be present in GCS or from local file system 14 | */ 15 | public class FileUtil { 16 | 17 | public static String getContent(String fileLocation, PipelineOptions options) throws Exception { 18 | // Have separate reader for GS files and local files 19 | if (fileLocation.startsWith(SFConstants.GS_FILE_PREFIX)) { 20 | return readFromGCS(fileLocation, options); 21 | } else { 22 | return readFromLocal(fileLocation); 23 | } 24 | } 25 | 26 | private static String readFromLocal(String configFileLocation) throws Exception { 27 | // Removing file:// prefix 28 | String fileLocation = StringUtils.substringAfter(configFileLocation, SFConstants.LOCAL_FILE_PREFIX); 29 | // Using commons-io utility to read the file as String 30 | return FileUtils.readFileToString(new File(fileLocation), Charsets.UTF_8); 31 | } 32 | 33 | private static String readFromGCS(String configFileLocation, 34 | PipelineOptions options) throws Exception { 35 | GCSFileUtil gcsFileUtil = new GCSFileUtil(options); 36 | byte[] contents = gcsFileUtil.read(configFileLocation); 37 | return new String(contents); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/util/GCSFileUtil.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.util; 2 | 3 | import java.nio.ByteBuffer; 4 | import java.nio.channels.SeekableByteChannel; 5 | 6 | import com.google.cloud.dataflow.sdk.options.PipelineOptions; 7 | import com.google.cloud.dataflow.sdk.util.GcsUtil; 8 | import com.google.cloud.dataflow.sdk.util.GcsUtil.GcsUtilFactory; 9 | import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath; 10 | 11 | /** 12 | * A Google Cloud Storage utility which can be used to read the files present in GCS 13 | * This utility can be used only for the Jobs running in Google Dataflow 14 | * This makes use of {@code GcsUtil} and {@code GcsPath} to read the file present in GCS 15 | */ 16 | public class GCSFileUtil { 17 | private GcsUtil gcsUtil; 18 | 19 | public GCSFileUtil(PipelineOptions options) { 20 | // PipelineOption is required to create GcsUtil 21 | // hence this can be used only for Google Dataflow jobs 22 | gcsUtil = new GcsUtilFactory().create(options); 23 | } 24 | 25 | public byte[] read(String filePath) throws Exception { 26 | GcsPath gcsPath = GcsPath.fromUri(filePath); 27 | SeekableByteChannel seekableByteChannel = gcsUtil.open(gcsPath); 28 | // Allocating ByteBuffer based on the file size 29 | ByteBuffer fileContent = ByteBuffer.allocate(Long.valueOf(gcsUtil.fileSize(gcsPath)).intValue()); 30 | seekableByteChannel.read(fileContent); 31 | 32 | return fileContent.array(); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/util/JobConstants.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.util; 2 | 3 | public interface JobConstants { 4 | public static final String COL_ACCOUNT_ID = "AccountId"; 5 | public static final String COL_OPPORTUNITY_ID = "OpportunityId"; 6 | public static final String COL_PROPOSAL_ID = "ProposalId"; 7 | public static final String COL_CLICKS = "Clicks"; 8 | public static final String COL_IMPRESSIONS = "Impressions"; 9 | 10 | public static final String COL_TYPE_STRING = "STRING"; 11 | public static final String COL_TYPE_INTEGER = "INTEGER"; 12 | 13 | public static final String STR_COMMA = ","; 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/com/google/wave/prototype/dataflow/util/SFConstants.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.util; 2 | 3 | public interface SFConstants { 4 | public static String STR_INSIGHTS_EXTERNAL_DATA = "InsightsExternalData"; 5 | public static String STR_INSIGHTS_EXTERNAL_DATA_PART = "InsightsExternalDataPart"; 6 | public static String STR_INSIGHTS_EXTERNAL_DATA_ID = "InsightsExternalDataId"; 7 | 8 | public static String STR_FORMAT = "Format"; 9 | public static String STR_DATAFILE = "DataFile"; 10 | public static String STR_EDGEMART_ALIAS = "EdgemartAlias"; 11 | public static String STR_METADATA_JSON = "MetadataJson"; 12 | public static String STR_OPERATION = "Operation"; 13 | public static String STR_ACTION = "Action"; 14 | public static String STR_PART_NUMBER= "PartNumber"; 15 | 16 | public static String STR_CSV_FORMAT = "Csv"; 17 | public static String STR_OVERWRITE_OPERATION = "Overwrite"; 18 | public static String STR_ACTION_NONE = "None"; 19 | public static String STR_ACTION_PROCESS = "Process"; 20 | 21 | public static String GS_FILE_PREFIX = "gs://"; 22 | public static String LOCAL_FILE_PREFIX = "file://"; 23 | } 24 | -------------------------------------------------------------------------------- /src/test/java/com/google/wave/prototype/dataflow/BaseTest.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import com.google.api.services.bigquery.model.TableRow; 7 | import com.google.wave.prototype.dataflow.model.AggregatedData; 8 | import com.google.wave.prototype.dataflow.util.JobConstants; 9 | 10 | public class BaseTest { 11 | // Test data 12 | protected static final String ACCOUNT_ID_1 = "001B0000003oYAfIAM"; 13 | protected static final String OPPOR_ID_1 = "006B0000002ndnpIAA"; 14 | protected static final String PROPOSAL_ID_1 = "101"; 15 | protected static final int CLICK_COUNT_1 = 100; 16 | protected static final int IMPRESSION_COUNT_1 = 1000; 17 | 18 | protected static final String ACCOUNT_ID_2 = "001B0000003oYAfIAM"; 19 | protected static final String OPPOR_ID_2 = "006B0000002ndnpIAF"; 20 | protected static final String PROPOSAL_ID_2 = "102"; 21 | protected static final int CLICK_COUNT_2 = 200; 22 | protected static final int IMPRESSION_COUNT_2 = 2000; 23 | 24 | protected AggregatedData[] getSampleAggDataWithoutOpporId() { 25 | AggregatedData[] sampleAggData = new AggregatedData[2]; 26 | 27 | sampleAggData[0] = new AggregatedData(PROPOSAL_ID_1, CLICK_COUNT_1, IMPRESSION_COUNT_1); 28 | sampleAggData[1] = new AggregatedData(PROPOSAL_ID_2, CLICK_COUNT_2, IMPRESSION_COUNT_2); 29 | 30 | return sampleAggData; 31 | } 32 | 33 | protected AggregatedData[] getSampleAggDataWithOpporId() { 34 | AggregatedData[] sampleAggData = getSampleAggDataWithoutOpporId(); 35 | 36 | sampleAggData[0].setOpportunityId(OPPOR_ID_1); 37 | sampleAggData[1].setOpportunityId(OPPOR_ID_2); 38 | 39 | return sampleAggData; 40 | } 41 | 42 | protected String getAsCSV(String... columns) { 43 | StringBuilder csv = new StringBuilder(); 44 | for (int i = 0; i < columns.length; i++) { 45 | if (i != 0) { 46 | csv.append(','); 47 | } 48 | csv.append(columns[i]); 49 | } 50 | csv.append('\n'); 51 | 52 | return csv.toString(); 53 | } 54 | 55 | protected String getAsCSV(String proposalId, String opporId, 56 | int clickCount, int impressionCount) { 57 | return getAsCSV(proposalId, opporId, clickCount + "", impressionCount + ""); 58 | } 59 | 60 | protected TableRow getAsTableRow(String accId1, String opporId1, 61 | String proposalId1) { 62 | TableRow row = new TableRow(); 63 | 64 | row.set(JobConstants.COL_ACCOUNT_ID, accId1); 65 | row.set(JobConstants.COL_OPPORTUNITY_ID, opporId1); 66 | row.set(JobConstants.COL_PROPOSAL_ID, proposalId1); 67 | 68 | return row; 69 | } 70 | 71 | protected List getSampleSFRefTableRows() { 72 | List sampleSFRefTableRows = new ArrayList(4); 73 | 74 | sampleSFRefTableRows.add(getAsTableRow(ACCOUNT_ID_1, OPPOR_ID_1, PROPOSAL_ID_1)); 75 | sampleSFRefTableRows.add(getAsTableRow(ACCOUNT_ID_2, OPPOR_ID_2, PROPOSAL_ID_2)); 76 | 77 | return sampleSFRefTableRows; 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/test/java/com/google/wave/prototype/dataflow/coder/AggregateDataCoderTest.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.coder; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import java.io.ByteArrayInputStream; 6 | 7 | import org.apache.commons.io.output.ByteArrayOutputStream; 8 | import org.junit.Before; 9 | import org.junit.Test; 10 | 11 | import com.google.cloud.dataflow.sdk.coders.Coder.Context; 12 | import com.google.wave.prototype.dataflow.BaseTest; 13 | import com.google.wave.prototype.dataflow.model.AggregatedData; 14 | 15 | public class AggregateDataCoderTest extends BaseTest { 16 | private AggregatedData aggregatedData; 17 | 18 | @Before 19 | public void setup() { 20 | aggregatedData = new AggregatedData(PROPOSAL_ID_1, OPPOR_ID_1, CLICK_COUNT_1, IMPRESSION_COUNT_1); 21 | } 22 | 23 | @Test 24 | public void testCoder() throws Exception { 25 | ByteArrayOutputStream bos = null; 26 | ByteArrayInputStream bis = null; 27 | try { 28 | AggregateDataCoder coder = AggregateDataCoder.getInstance(); 29 | 30 | bos = new ByteArrayOutputStream(); 31 | coder.encode(aggregatedData, bos, Context.NESTED); 32 | 33 | bis = new ByteArrayInputStream(bos.toByteArray()); 34 | AggregatedData decodedAggData = coder.decode(bis, Context.NESTED); 35 | 36 | assertEquals(aggregatedData, decodedAggData); 37 | } finally { 38 | if (bos != null) { 39 | bos.close(); 40 | } 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/test/java/com/google/wave/prototype/dataflow/coder/SFCoderTest.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.coder; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import java.io.ByteArrayInputStream; 6 | 7 | import org.apache.commons.io.output.ByteArrayOutputStream; 8 | import org.junit.Before; 9 | import org.junit.Test; 10 | 11 | import com.google.cloud.dataflow.sdk.coders.Coder.Context; 12 | import com.google.wave.prototype.dataflow.BaseTest; 13 | import com.google.wave.prototype.dataflow.model.SFReferenceData; 14 | 15 | public class SFCoderTest extends BaseTest { 16 | private SFReferenceData sfReferenceData; 17 | 18 | @Before 19 | public void setup() { 20 | sfReferenceData = new SFReferenceData(ACCOUNT_ID_1, OPPOR_ID_1, PROPOSAL_ID_1); 21 | } 22 | 23 | @Test 24 | public void testCoder() throws Exception { 25 | ByteArrayOutputStream bos = null; 26 | ByteArrayInputStream bis = null; 27 | try { 28 | SFCoder coder = SFCoder.getInstance(); 29 | 30 | bos = new ByteArrayOutputStream(); 31 | coder.encode(sfReferenceData, bos, Context.NESTED); 32 | 33 | bis = new ByteArrayInputStream(bos.toByteArray()); 34 | SFReferenceData decodedsfData= coder.decode(bis, Context.NESTED); 35 | 36 | assertEquals(sfReferenceData, decodedsfData); 37 | } finally { 38 | if (bos != null) { 39 | bos.close(); 40 | } 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/test/java/com/google/wave/prototype/dataflow/function/AggregateDataEnricherTest.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.function; 2 | 3 | import java.util.List; 4 | 5 | import org.hamcrest.CoreMatchers; 6 | import org.junit.Assert; 7 | import org.junit.Test; 8 | 9 | import com.google.api.services.bigquery.model.TableRow; 10 | import com.google.cloud.dataflow.sdk.testing.TestPipeline; 11 | import com.google.cloud.dataflow.sdk.transforms.Create; 12 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester; 13 | import com.google.cloud.dataflow.sdk.transforms.View; 14 | import com.google.cloud.dataflow.sdk.values.PCollection; 15 | import com.google.cloud.dataflow.sdk.values.PCollectionView; 16 | import com.google.wave.prototype.dataflow.BaseTest; 17 | import com.google.wave.prototype.dataflow.model.AggregatedData; 18 | 19 | /** 20 | * Unit tests for {@link AggregateDataEnricher} 21 | */ 22 | public class AggregateDataEnricherTest extends BaseTest { 23 | 24 | @Test 25 | public void enrichTest() { 26 | // Creating pipeline to construct sideInput 27 | TestPipeline testPipeline = TestPipeline.create(); 28 | // Constructing sideInput 29 | List sampleSFRefTableRows = getSampleSFRefTableRows(); 30 | PCollection sampleSFRefData = testPipeline.apply(Create.of(sampleSFRefTableRows)); 31 | PCollectionView> sideInput = sampleSFRefData.apply(View.asIterable()); 32 | 33 | AggregateDataEnricher enricher = new AggregateDataEnricher(sideInput); 34 | DoFnTester doFnTester = DoFnTester.of(enricher); 35 | doFnTester.setSideInputInGlobalWindow(sideInput, sampleSFRefTableRows); 36 | 37 | // Input Aggregated provided without opportunity Id 38 | List results = doFnTester.processBatch(getSampleAggDataWithoutOpporId()); 39 | 40 | // Check whether the result has opportunity id populated with it 41 | Assert.assertThat(results, CoreMatchers.hasItems(getSampleAggDataWithOpporId())); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/test/java/com/google/wave/prototype/dataflow/function/CSVFormatterTest.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.function; 2 | 3 | import java.util.List; 4 | 5 | import org.hamcrest.CoreMatchers; 6 | import org.junit.Assert; 7 | import org.junit.Test; 8 | 9 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester; 10 | import com.google.wave.prototype.dataflow.BaseTest; 11 | import com.google.wave.prototype.dataflow.model.AggregatedData; 12 | 13 | public class CSVFormatterTest extends BaseTest { 14 | 15 | @Test 16 | public void transformAsCSVTest() { 17 | CSVFormatter csvFormatter = new CSVFormatter(); 18 | DoFnTester dofnTester = DoFnTester.of(csvFormatter); 19 | 20 | List results = dofnTester.processBatch(getSampleAggDataWithOpporId()); 21 | Assert.assertThat(results, CoreMatchers.hasItems(getSampleEnrichedDataAsCSV())); 22 | } 23 | 24 | private String[] getSampleEnrichedDataAsCSV() { 25 | String[] sampleEnrichedCSVs= new String[2]; 26 | 27 | sampleEnrichedCSVs[0] = getAsCSV(PROPOSAL_ID_1, OPPOR_ID_1, CLICK_COUNT_1, IMPRESSION_COUNT_1); 28 | sampleEnrichedCSVs[1] = getAsCSV(PROPOSAL_ID_2, OPPOR_ID_2, CLICK_COUNT_2, IMPRESSION_COUNT_2); 29 | 30 | return sampleEnrichedCSVs; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/test/java/com/google/wave/prototype/dataflow/function/TableRowFormatterTest.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.function; 2 | 3 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_ACCOUNT_ID; 4 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_OPPORTUNITY_ID; 5 | import static com.google.wave.prototype.dataflow.util.JobConstants.COL_PROPOSAL_ID; 6 | 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | import org.junit.Assert; 11 | import org.junit.Test; 12 | 13 | import com.google.api.services.bigquery.model.TableRow; 14 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester; 15 | import com.google.wave.prototype.dataflow.BaseTest; 16 | 17 | /** 18 | * Unit test for {@link TableRowFormatter} DoFn 19 | */ 20 | public class TableRowFormatterTest extends BaseTest { 21 | 22 | @Test 23 | public void formatSFRefTest() { 24 | TableRowFormatter formatSFRefFn = new TableRowFormatter(getSFRefTableColumns()); 25 | DoFnTester doFnTester = DoFnTester.of(formatSFRefFn); 26 | 27 | // Mocking SFRead by manually constructing CSV data 28 | List results = doFnTester.processBatch( 29 | getAsCSV(ACCOUNT_ID_1, OPPOR_ID_1, PROPOSAL_ID_1), 30 | getAsCSV(ACCOUNT_ID_2, OPPOR_ID_2, PROPOSAL_ID_2)); 31 | 32 | // Converted tableRows are verified here 33 | Assert.assertEquals(results, getSampleSFRefTableRows()); 34 | } 35 | 36 | private List getSFRefTableColumns() { 37 | List columns = new ArrayList(4); 38 | 39 | columns.add(COL_ACCOUNT_ID); 40 | columns.add(COL_OPPORTUNITY_ID); 41 | columns.add(COL_PROPOSAL_ID); 42 | 43 | return columns; 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/test/java/com/google/wave/prototype/dataflow/model/SFConfigTest.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.model; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.fail; 5 | 6 | import org.junit.Test; 7 | 8 | import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; 9 | import com.google.wave.prototype.dataflow.model.SFConfig; 10 | import com.google.wave.prototype.dataflow.util.SFConstants; 11 | 12 | /** 13 | * Unit test for SFConfig 14 | * Reads the config file present in local and assert the values 15 | */ 16 | public class SFConfigTest { 17 | @Test 18 | public void validLocalFile() throws Exception { 19 | // Config files are present in project home 20 | StringBuilder sb = new StringBuilder(); 21 | sb.append(SFConstants.LOCAL_FILE_PREFIX); 22 | sb.append(System.getProperty("user.dir")); 23 | sb.append("/test_sf_config.json"); 24 | 25 | // This will read the config file and populate SFConfig with userId and password 26 | SFConfig sfConfig = SFConfig.getInstance(sb.toString(), PipelineOptionsFactory.create()); 27 | 28 | assertEquals("demo@demo.com", sfConfig.getUserId()); 29 | assertEquals("test", sfConfig.getPassword()); 30 | } 31 | 32 | @Test 33 | public void invalidLocalFile() throws Exception { 34 | try { 35 | // Providing invalid file path which should throw Exception 36 | SFConfig.getInstance("test_sf_config.json", PipelineOptionsFactory.create()); 37 | fail("Expected exception not raised"); 38 | } catch (Exception e) { 39 | // Expected exception here 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/test/java/com/google/wave/prototype/dataflow/pipeline/AdDataJobTest.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.pipeline; 2 | 3 | 4 | /** 5 | * Jobs are not tested as BigQueryIO is not mocked 6 | */ 7 | public class AdDataJobTest { 8 | 9 | } -------------------------------------------------------------------------------- /src/test/java/com/google/wave/prototype/dataflow/pipeline/SFReferenceDataJobTest.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.pipeline; 2 | 3 | 4 | /** 5 | * Jobs are not tested as BigQueryIO is not mocked 6 | */ 7 | public class SFReferenceDataJobTest { 8 | } -------------------------------------------------------------------------------- /src/test/java/com/google/wave/prototype/dataflow/sf/SFSOQLExecutorTest.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.sf; 2 | 3 | import static org.mockito.Mockito.mock; 4 | import static org.mockito.Mockito.when; 5 | import static org.junit.Assert.assertEquals; 6 | import static org.junit.Assert.assertNotNull; 7 | 8 | import java.util.List; 9 | 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | import com.google.wave.prototype.dataflow.BaseTest; 14 | import com.google.wave.prototype.dataflow.model.SFConfig; 15 | import com.sforce.soap.enterprise.EnterpriseConnection; 16 | import com.sforce.soap.enterprise.QueryResult; 17 | import com.sforce.soap.enterprise.sobject.Opportunity; 18 | import com.sforce.soap.enterprise.sobject.SObject; 19 | import com.sforce.ws.ConnectionException; 20 | import com.sforce.ws.ConnectorConfig; 21 | 22 | /** 23 | * Unit test for {@link SFSOQLExecutor} 24 | */ 25 | public class SFSOQLExecutorTest extends BaseTest { 26 | private static final String sfQueryStr = "SELECT AccountId, Id, ProposalID__c FROM Opportunity where ProposalID__c != null"; 27 | 28 | private SFConfig sfConfig; 29 | 30 | @Before 31 | public void setup() throws Exception { 32 | sfConfig = mock(SFConfig.class); 33 | 34 | // Returning our EnterpriseConnection which return a single object during query execution 35 | when(sfConfig.createEnterpriseConnection()).thenReturn(EnterpriseConnectionExt.getInstance()); 36 | } 37 | 38 | @Test 39 | public void executeQueryTest() throws Exception { 40 | int expectedRecordsCount = 1; 41 | SFSOQLExecutor executor = new SFSOQLExecutor(sfConfig); 42 | List results = executor.executeQuery(sfQueryStr); 43 | 44 | assertNotNull(results); 45 | assertEquals(results.size(), expectedRecordsCount); 46 | Opportunity opportunity = (Opportunity) results.get(0); 47 | 48 | assertEquals(ACCOUNT_ID_1, opportunity.getAccountId()); 49 | assertEquals(OPPOR_ID_1, opportunity.getId()); 50 | assertEquals(PROPOSAL_ID_1, opportunity.getProposalID__c()); 51 | } 52 | 53 | public static class EnterpriseConnectionExt extends EnterpriseConnection { 54 | 55 | public static EnterpriseConnectionExt getInstance() throws ConnectionException { 56 | ConnectorConfig config = new ConnectorConfig(); 57 | config.setUsername("dummy_sf_user"); 58 | config.setPassword("dummy_sf_password"); 59 | config.setManualLogin(true); 60 | // Salesforce SOAP API checks for /services/Soap/c/ 61 | config.setServiceEndpoint("http://dummysgendpoint/services/Soap/c/"); 62 | return new EnterpriseConnectionExt(config); 63 | } 64 | 65 | public EnterpriseConnectionExt(ConnectorConfig config) 66 | throws ConnectionException { 67 | super(config); 68 | } 69 | 70 | @Override 71 | public QueryResult query(String queryString) throws ConnectionException { 72 | QueryResult queryResult = new QueryResult(); 73 | 74 | Opportunity opportunity = new Opportunity(); 75 | opportunity.setAccountId(ACCOUNT_ID_1); 76 | opportunity.setProposalID__c(PROPOSAL_ID_1); 77 | opportunity.setId(OPPOR_ID_1); 78 | 79 | queryResult.setRecords(new SObject[] {opportunity}); 80 | queryResult.setDone(true); 81 | return queryResult; 82 | } 83 | 84 | @Override 85 | public void logout() throws ConnectionException { 86 | // no op 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/test/java/com/google/wave/prototype/dataflow/sf/SFWaveDatasetWriterTest.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.sf; 2 | 3 | import static com.google.wave.prototype.dataflow.util.SFConstants.*; 4 | import static org.junit.Assert.*; 5 | import static org.mockito.Mockito.mock; 6 | import static org.mockito.Mockito.when; 7 | 8 | import org.junit.Before; 9 | import org.junit.Test; 10 | 11 | import com.google.wave.prototype.dataflow.BaseTest; 12 | import com.google.wave.prototype.dataflow.model.SFConfig; 13 | import com.google.wave.prototype.dataflow.util.SFConstants; 14 | import com.sforce.soap.partner.PartnerConnection; 15 | import com.sforce.soap.partner.SaveResult; 16 | import com.sforce.soap.partner.sobject.SObject; 17 | import com.sforce.ws.ConnectionException; 18 | import com.sforce.ws.ConnectorConfig; 19 | 20 | /** 21 | * Unit test for {@link SFWaveDatasetWriter} 22 | */ 23 | public class SFWaveDatasetWriterTest extends BaseTest { 24 | private static final String DUMMY_METADATA_CONTENT = "dummy_metadata_content"; 25 | private static final String DUMMY_DATASET_CONTENT = "dummy_dataset_content"; 26 | private static final String DUMMY_SOBJECT_ID = "dummy_sobject_id"; 27 | private static final String DUMMY_DATASET_NAME = "dummy_dataset_name"; 28 | 29 | private static int CREATE_CALL_COUNT = 0; 30 | private static int UPDATE_CALL_COUNT = 0; 31 | 32 | private SFConfig sfConfig; 33 | 34 | @Before 35 | public void setup() throws Exception { 36 | StringBuilder metadataFileLocationSB = new StringBuilder(); 37 | metadataFileLocationSB.append(SFConstants.LOCAL_FILE_PREFIX); 38 | metadataFileLocationSB.append(System.getProperty("user.dir")); 39 | metadataFileLocationSB.append("/test_metadata.json"); 40 | 41 | sfConfig = mock(SFConfig.class); 42 | 43 | when(sfConfig.createPartnerConnection()).thenReturn(PartnerConnectionExt.getInstance()); 44 | 45 | CREATE_CALL_COUNT = 0; 46 | UPDATE_CALL_COUNT = 0; 47 | } 48 | 49 | @Test 50 | public void testWrite() throws Exception { 51 | SFWaveDatasetWriter writer = new SFWaveDatasetWriter(sfConfig, DUMMY_DATASET_NAME); 52 | String sfObjId = writer.write(DUMMY_METADATA_CONTENT.getBytes(), DUMMY_DATASET_CONTENT.getBytes()); 53 | 54 | assertEquals(DUMMY_SOBJECT_ID, sfObjId); 55 | // Verify that PartnerConnection.create() has been called twice 56 | // metadata publish and datapart publish 57 | assertEquals(2, CREATE_CALL_COUNT); 58 | 59 | // Verify that PartnerConnection.update() has been called only once 60 | // finalize publish 61 | assertEquals(1, UPDATE_CALL_COUNT); 62 | } 63 | 64 | public static class PartnerConnectionExt extends PartnerConnection { 65 | 66 | public static PartnerConnectionExt getInstance() throws ConnectionException { 67 | ConnectorConfig config = new ConnectorConfig(); 68 | config.setUsername("dummy_sf_user"); 69 | config.setPassword("dummy_sf_password"); 70 | config.setManualLogin(true); 71 | // Salesforce SOAP API checks for /services/Soap/c/ 72 | config.setServiceEndpoint("http://dummysgendpoint/services/Soap/u/"); 73 | return new PartnerConnectionExt(config); 74 | } 75 | 76 | public PartnerConnectionExt(ConnectorConfig config) 77 | throws ConnectionException { 78 | super(config); 79 | } 80 | 81 | @Override 82 | public SaveResult[] update(SObject[] sObjects) 83 | throws ConnectionException { 84 | int expectedSObjectCount = 1; 85 | assertEquals(expectedSObjectCount, sObjects.length); 86 | 87 | String type = sObjects[0].getType(); 88 | assertEquals(STR_INSIGHTS_EXTERNAL_DATA, type); 89 | 90 | // verify action 91 | String actualAction = (String) sObjects[0].getField(STR_ACTION); 92 | assertEquals(STR_ACTION_PROCESS, actualAction); 93 | 94 | // verify Sobject Id 95 | assertEquals(DUMMY_SOBJECT_ID, sObjects[0].getId()); 96 | 97 | UPDATE_CALL_COUNT++; 98 | return constructSaveResultArray(); 99 | } 100 | 101 | @Override 102 | public SaveResult[] create(SObject[] sObjects) 103 | throws ConnectionException { 104 | int expectedSObjectCount = 1; 105 | assertEquals(expectedSObjectCount, sObjects.length); 106 | 107 | String type = sObjects[0].getType(); 108 | assertNotNull(type); 109 | // It is metadata publish 110 | if (STR_INSIGHTS_EXTERNAL_DATA.equals(type)) { 111 | // verify dataset name 112 | String actualDatasetName = (String) sObjects[0].getField(STR_EDGEMART_ALIAS); 113 | assertEquals(DUMMY_DATASET_NAME, actualDatasetName); 114 | 115 | // verify metadata content 116 | byte[] actualMetadataContent = (byte[]) sObjects[0].getField(STR_METADATA_JSON); 117 | assertEquals(DUMMY_METADATA_CONTENT, new String(actualMetadataContent)); 118 | } else if (STR_INSIGHTS_EXTERNAL_DATA_PART.equals(type)) { 119 | // verify dataset content 120 | byte[] actualDatasetContent = (byte[]) sObjects[0].getField(STR_DATAFILE); 121 | assertEquals(DUMMY_DATASET_CONTENT, new String(actualDatasetContent)); 122 | 123 | // verify sobject id 124 | String actualSObjectId = (String) sObjects[0].getField(STR_INSIGHTS_EXTERNAL_DATA_ID); 125 | assertEquals(DUMMY_SOBJECT_ID, actualSObjectId); 126 | } else { 127 | fail("PartnerConnection.create() called with invalid type " + type); 128 | } 129 | 130 | CREATE_CALL_COUNT++; 131 | return constructSaveResultArray(); 132 | } 133 | 134 | @Override 135 | public void logout() throws ConnectionException { 136 | // no op 137 | } 138 | 139 | private SaveResult[] constructSaveResultArray() { 140 | SaveResult saveResult = new SaveResult(); 141 | saveResult.setId(DUMMY_SOBJECT_ID); 142 | saveResult.setSuccess(true); 143 | 144 | return new SaveResult[] {saveResult}; 145 | } 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/test/java/com/google/wave/prototype/dataflow/transform/AggregateEventsTest.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.transform; 2 | 3 | import java.util.Arrays; 4 | import java.util.List; 5 | 6 | import org.hamcrest.CoreMatchers; 7 | import org.junit.Assert; 8 | import org.junit.Test; 9 | 10 | import com.google.cloud.dataflow.sdk.Pipeline; 11 | import com.google.cloud.dataflow.sdk.testing.DataflowAssert; 12 | import com.google.cloud.dataflow.sdk.testing.TestPipeline; 13 | import com.google.cloud.dataflow.sdk.transforms.Create; 14 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester; 15 | import com.google.cloud.dataflow.sdk.values.KV; 16 | import com.google.cloud.dataflow.sdk.values.PCollection; 17 | import com.google.wave.prototype.dataflow.coder.AggregateDataCoder; 18 | import com.google.wave.prototype.dataflow.model.AggregatedData; 19 | import com.google.wave.prototype.dataflow.transform.AggregateEvents; 20 | import com.google.wave.prototype.dataflow.transform.AggregateEvents.CountEvents; 21 | import com.google.wave.prototype.dataflow.transform.AggregateEvents.FilterRawData; 22 | 23 | /** 24 | * Unit tester for AggregateEvents PTransform and the DoFn present in it 25 | */ 26 | public class AggregateEventsTest { 27 | 28 | @SuppressWarnings("unchecked") 29 | @Test 30 | public void filterRawDataTest() { 31 | FilterRawData filterRawDataDoFn = new AggregateEvents.FilterRawData(); 32 | DoFnTester> doFnTester = DoFnTester.of(filterRawDataDoFn); 33 | 34 | // getAdDataSampleCSVRows() will return raw AdData csv rows 35 | // FilterRawData DoFn will extract ProposalId and event from it 36 | List> results = doFnTester.processBatch(getAdDataSampleCSVRows()); 37 | 38 | // Based on the input following KV are expected 39 | KV expectedValue1 = KV.of("101", "Impression"); 40 | KV expectedValue2 = KV.of("102", "Click"); 41 | KV expectedValue3 = KV.of("101", "Click"); 42 | Assert.assertThat(results, CoreMatchers.hasItems(expectedValue1, expectedValue2, expectedValue3)); 43 | } 44 | 45 | @SuppressWarnings("unchecked") 46 | @Test 47 | public void countEventsDoFnTest() { 48 | CountEvents countEventsDoFn = new AggregateEvents.CountEvents(); 49 | DoFnTester>, AggregatedData> countEventDoFnTester = DoFnTester.of(countEventsDoFn); 50 | 51 | // Input to AggregateEvents.CountEvents 52 | KV> kvPropsalIdEvents1 = KV.of("101", (Iterable) Arrays.asList("Impression", "Click", "Impression")); 53 | KV> kvPropsalIdEvents2 = KV.of("102", (Iterable) Arrays.asList("Click", "Impression")); 54 | KV> kvPropsalIdEvents3 = KV.of("103", (Iterable) Arrays.asList("Click")); 55 | 56 | List results = countEventDoFnTester.processBatch(kvPropsalIdEvents1, kvPropsalIdEvents2, kvPropsalIdEvents3); 57 | 58 | // Expected results 59 | // For proposalId 101, there are 1 Click and 2 Impressions in the input 60 | // Hence the expected in new AggregatedData("101", 1, 2) 61 | // For proposalId 102, there are 1 Click and 1 Impression in the input 62 | // For proposalId 103, there are 1 Click and 0 Impression in the input 63 | AggregatedData expectedValue1 = new AggregatedData("101", 1, 2); 64 | AggregatedData expectedValue2 = new AggregatedData("102", 1, 1); 65 | AggregatedData expectedValue3 = new AggregatedData("103", 1, 0); 66 | Assert.assertThat(results, CoreMatchers.hasItems(expectedValue1, expectedValue2, expectedValue3)); 67 | } 68 | 69 | @Test 70 | public void aggregateEventsTransformTest() { 71 | Pipeline p = TestPipeline.create(); 72 | 73 | PCollection inPCol = p.apply(Create.of(getAdDataSampleCSVRows())); 74 | PCollection result = inPCol.apply(new AggregateEvents()) 75 | .setCoder(AggregateDataCoder.getInstance()); 76 | 77 | // Input data contains 3 rows 78 | // 2 proposal Id present in input 101 and 102 79 | // And proposal Id 101 has 1 Impression and 1 Click 80 | // Proposal Id 102 has 1 Click 81 | // So expected values are new AggregatedData("101", 1, 1) and new AggregatedData("102", 1, 0) 82 | AggregatedData expectedValue1 = new AggregatedData("101", 1, 1); 83 | AggregatedData expectedValue2 = new AggregatedData("102", 1, 0); 84 | DataflowAssert.that(result).containsInAnyOrder(Arrays.asList(expectedValue1, expectedValue2)); 85 | 86 | p.run(); 87 | } 88 | 89 | private String[] getAdDataSampleCSVRows() { 90 | String[] adDataSampleCSVRows = new String[3]; 91 | adDataSampleCSVRows[0] = "1,01-01-14 9:00,ip-10-150-38-122/10.150.38.122,0,70.209.198.223,http://sample.com,3232,Impression,3,1,101"; 92 | adDataSampleCSVRows[1] = "2,01-01-14 9:01,ip-10-150-38-122/10.150.38.123,0,70.209.198.223,http://sample.com,3232,Click,3,1,102"; 93 | adDataSampleCSVRows[2] = "3,01-01-14 9:00,ip-10-150-38-122/10.150.38.122,0,70.209.198.223,http://sample.com,3232,Click,3,1,101"; 94 | 95 | return adDataSampleCSVRows; 96 | } 97 | 98 | } 99 | -------------------------------------------------------------------------------- /src/test/java/com/google/wave/prototype/dataflow/transform/SFReadTest.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.transform; 2 | 3 | import static org.mockito.Mockito.mock; 4 | import static org.mockito.Mockito.when; 5 | import static org.mockito.Mockito.withSettings; 6 | 7 | import java.io.Serializable; 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | 11 | import org.junit.Before; 12 | import org.junit.Ignore; 13 | import org.junit.Test; 14 | 15 | import com.google.cloud.dataflow.sdk.testing.DataflowAssert; 16 | import com.google.cloud.dataflow.sdk.testing.TestPipeline; 17 | import com.google.cloud.dataflow.sdk.transforms.Create; 18 | import com.google.cloud.dataflow.sdk.values.PCollection; 19 | import com.google.wave.prototype.dataflow.BaseTest; 20 | import com.google.wave.prototype.dataflow.sf.SFSOQLExecutor; 21 | import com.sforce.soap.enterprise.sobject.Opportunity; 22 | import com.sforce.soap.enterprise.sobject.SObject; 23 | 24 | public class SFReadTest extends BaseTest { 25 | private static final String sfQueryStr = "SELECT AccountId, Id, ProposalID__c FROM Opportunity where ProposalID__c != null"; 26 | 27 | private SFSOQLExecutor sfSOQLExecutor; 28 | 29 | @Before 30 | public void setup() throws Exception { 31 | sfSOQLExecutor = mock(SFSOQLExecutor.class, withSettings().serializable()); 32 | 33 | OpportunityExt oppor = new OpportunityExt(); 34 | oppor.setAccountId(ACCOUNT_ID_1); 35 | oppor.setId(OPPOR_ID_1); 36 | oppor.setProposalID__c(PROPOSAL_ID_1); 37 | List sobjects = new ArrayList(); 38 | sobjects.add(oppor); 39 | 40 | when(sfSOQLExecutor.executeQuery(sfQueryStr)).thenReturn(sobjects); 41 | } 42 | 43 | @Ignore("Not able to serialize Opportunity, hence not able to mock it. But unit test for SFRead is covered as part SFSOQLExecutor") 44 | @Test 45 | public void pTransformTest() { 46 | TestPipeline pipeline = TestPipeline.create(); 47 | 48 | PCollection input = pipeline.apply(Create.of(sfQueryStr)); 49 | PCollection results = input.apply(new SFRead(sfSOQLExecutor)); 50 | 51 | DataflowAssert.that(results).containsInAnyOrder(getAsCSV(ACCOUNT_ID_1, OPPOR_ID_1, PROPOSAL_ID_1)); 52 | 53 | pipeline.run(); 54 | } 55 | 56 | public class OpportunityExt extends Opportunity implements Serializable { 57 | private static final long serialVersionUID = -563793703304651268L; 58 | 59 | 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/test/java/com/google/wave/prototype/dataflow/transform/SFWaveWriteTest.java: -------------------------------------------------------------------------------- 1 | package com.google.wave.prototype.dataflow.transform; 2 | 3 | import static org.mockito.Mockito.mock; 4 | import static org.mockito.Mockito.when; 5 | import static org.mockito.Mockito.withSettings; 6 | 7 | import java.util.Arrays; 8 | import java.util.HashSet; 9 | import java.util.List; 10 | import java.util.Set; 11 | 12 | import org.hamcrest.CoreMatchers; 13 | import org.junit.Assert; 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | import com.google.cloud.dataflow.sdk.Pipeline; 18 | import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; 19 | import com.google.cloud.dataflow.sdk.testing.DataflowAssert; 20 | import com.google.cloud.dataflow.sdk.testing.TestPipeline; 21 | import com.google.cloud.dataflow.sdk.transforms.Create; 22 | import com.google.cloud.dataflow.sdk.transforms.DoFn; 23 | import com.google.cloud.dataflow.sdk.transforms.DoFnTester; 24 | import com.google.cloud.dataflow.sdk.transforms.PTransform; 25 | import com.google.cloud.dataflow.sdk.transforms.View; 26 | import com.google.cloud.dataflow.sdk.values.KV; 27 | import com.google.cloud.dataflow.sdk.values.PCollection; 28 | import com.google.cloud.dataflow.sdk.values.PCollectionView; 29 | import com.google.wave.prototype.dataflow.model.SFWaveWriteResult; 30 | import com.google.wave.prototype.dataflow.sf.SFWaveDatasetWriter; 31 | import com.google.wave.prototype.dataflow.transform.SFWaveWrite.BundleCount; 32 | import com.google.wave.prototype.dataflow.transform.SFWaveWrite.DistributeRowData; 33 | import com.google.wave.prototype.dataflow.transform.SFWaveWrite.Write; 34 | import com.google.wave.prototype.dataflow.util.FileUtil; 35 | import com.google.wave.prototype.dataflow.util.SFConstants; 36 | 37 | /** 38 | * Simple unit tests for {@link SFWaveWrite} {@link PTransform} and its {@link DoFn} 39 | */ 40 | public class SFWaveWriteTest { 41 | private static final String SAMPLE_DATA_TO_BE_WRITTEN = "001B0000003oYAfIAM,006B0000002ndnpIAA,102"; 42 | private static final String SAMPLE_SF_OBJ_ID = "testSFOBjId"; 43 | 44 | private SFWaveDatasetWriter writer; 45 | private String metadataFileLocation; 46 | 47 | @Before 48 | public void setup() throws Exception { 49 | StringBuilder metadataFileLocationSB = new StringBuilder(); 50 | metadataFileLocationSB.append(SFConstants.LOCAL_FILE_PREFIX); 51 | metadataFileLocationSB.append(System.getProperty("user.dir")); 52 | metadataFileLocationSB.append("/test_metadata.json"); 53 | 54 | metadataFileLocation = metadataFileLocationSB.toString(); 55 | 56 | writer = mock(SFWaveDatasetWriter.class, withSettings().serializable()); 57 | when(writer.write( 58 | FileUtil.getContent(metadataFileLocation.toString(), PipelineOptionsFactory.create()).getBytes(), 59 | (SAMPLE_DATA_TO_BE_WRITTEN + "\n").getBytes())) 60 | .thenReturn(SAMPLE_SF_OBJ_ID); 61 | } 62 | 63 | @Test 64 | public void calculateNoOfBundlesDoFnTest() { 65 | BundleCount bundleCtFn = new SFWaveWrite.BundleCount(); 66 | DoFnTester bundleCtFnTester = DoFnTester.of(bundleCtFn); 67 | 68 | long bundle = 1024 * 1024 * 10l; 69 | // This should create 2 bundles 70 | long input1 = bundle + 1; 71 | 72 | // This should create 32 bundles 73 | long input2 = (bundle * 31) + 1024; 74 | 75 | // These should create 1 bundle 76 | long input3 = 1024l; 77 | long input4 = 0l; 78 | 79 | List results = bundleCtFnTester.processBatch(input1, input2, input3, input4); 80 | Assert.assertThat(results, CoreMatchers.hasItems(2, 32, 1, 1)); 81 | } 82 | 83 | @Test 84 | public void distributeRowDataDoFnTest() { 85 | int noOfBundles = 2; 86 | Pipeline p = TestPipeline.create(); 87 | // Preparing sideInput 88 | PCollection bundleCount = p.apply(Create.of(noOfBundles)); 89 | PCollectionView sideInput = bundleCount.apply(View. asSingleton()); 90 | DistributeRowData distributeRowDataDoFn = new SFWaveWrite.DistributeRowData(sideInput); 91 | 92 | DoFnTester> doFnTester = DoFnTester.of(distributeRowDataDoFn); 93 | // Providing number of bundles as sideInput 94 | doFnTester.setSideInputInGlobalWindow(sideInput, Arrays.asList(noOfBundles)); 95 | 96 | List> results = doFnTester.processBatch(getSampleSFRefData()); 97 | // Result should have 4 KV with 2 unique keys 98 | Assert.assertEquals(4, results.size()); 99 | // Checking whether the result has two unique keys as noOfBundles is 2 100 | Set keys = new HashSet(); 101 | for (KV kv : results) { 102 | keys.add(kv.getKey()); 103 | } 104 | 105 | Assert.assertEquals("Proper number of bundles are not created", noOfBundles, keys.size()); 106 | } 107 | 108 | @SuppressWarnings("unchecked") 109 | @Test 110 | public void testWriteDoFn() throws Exception { 111 | 112 | KV> input = KV.of(1, (Iterable) Arrays.asList(SAMPLE_DATA_TO_BE_WRITTEN)); 113 | 114 | Write writeDoFn = new SFWaveWrite.Write(writer, metadataFileLocation); 115 | DoFnTester>,SFWaveWriteResult> doFnTester = DoFnTester.of(writeDoFn); 116 | 117 | // SFWaveDatasetWriter is mocked 118 | // If proper bytes are sent by SFWaveWrite.Writethe it will return SAMPLE_SF_OBJ_ID 119 | // So just checking whether it returns SAMPLE_SF_OBJ_ID or not 120 | List result = doFnTester.processBatch(input); 121 | Assert.assertThat(result, CoreMatchers.hasItems(new SFWaveWriteResult(SAMPLE_SF_OBJ_ID))); 122 | } 123 | 124 | @Test 125 | public void sfWaveWriteTest() { 126 | Pipeline p = TestPipeline.create(); 127 | 128 | PCollection inputPCol = p.apply(Create.of(SAMPLE_DATA_TO_BE_WRITTEN)); 129 | PCollection output = inputPCol.apply(new SFWaveWrite(writer, metadataFileLocation)); 130 | 131 | // SFWaveDatasetWriter is mocked 132 | // If proper bytes are sent by SFWaveWrite.Writethe it will return SAMPLE_SF_OBJ_ID 133 | // So just checking whether it returns SAMPLE_SF_OBJ_ID or not 134 | DataflowAssert.that(output).containsInAnyOrder(Arrays.asList(new SFWaveWriteResult(SAMPLE_SF_OBJ_ID))); 135 | p.run(); 136 | } 137 | 138 | private String[] getSampleSFRefData() { 139 | String[] sfRefDat = new String[4]; 140 | // accountId, opportunityId, proposalId inputs 141 | sfRefDat[0] = "001B0000003oYAfIAM,006B0000002ndnpIAA,102"; 142 | sfRefDat[1] = "001B0000003oYAfIAM,006B0000002ndnuIAA,103"; 143 | sfRefDat[2] = "001B0000003oYAfIAM,006B0000002ndnkIAA,101"; 144 | sfRefDat[3] = "001B0000003oUqJIAU,006B0000002nBrQIAU,0001"; 145 | 146 | return sfRefDat; 147 | } 148 | 149 | } 150 | --------------------------------------------------------------------------------