├── settings.gradle ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── lgtm.yml ├── sample-config.csv ├── .gitignore ├── AUTHORS ├── src └── main │ └── java │ └── dswebquerytobigquery │ ├── CommandLinePromptReceiver.java │ ├── ColumnHeader.java │ ├── StorageServiceFactory.java │ ├── BigQueryFactory.java │ ├── BigQueryConfig.java │ ├── TransferConfig.java │ ├── Constants.java │ ├── WqToBqDataTypeMapper.java │ ├── ConfigReader.java │ ├── Main.java │ ├── StorageController.java │ ├── BigQueryProcessor.java │ ├── WebQuery.java │ ├── TransferRunner.java │ └── Html2CsvParseHandler.java ├── CONTRIBUTING.md ├── gradlew.bat ├── README.md ├── gradlew └── LICENSE /settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | */ 4 | 5 | rootProject.name = 'dswqtobq' 6 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/sa360-webquery-bigquery/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /lgtm.yml: -------------------------------------------------------------------------------- 1 | extraction: 2 | java: 3 | before_index: "export BUILD_MODE=release" 4 | index: 5 | gradle: 6 | version: "6.8.1" 7 | java_version: "11" 8 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.5-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /sample-config.csv: -------------------------------------------------------------------------------- 1 | gcsBucketName,projectId,datasetId,tableId,webQueryUrl 2 | dswqbq-gcs-bucket,bigquery-project-id,bigquery_dataset_id,webquery_report,https://searchads.google.com/ds/reports/download?ay=20700000000000451&av=21700000000010411&rid=290537&of=webqueryphtml&authuser=0 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .project 2 | target 3 | .classpath 4 | .factorypath 5 | .settings 6 | .idea 7 | datastore 8 | *.iml 9 | .gradle 10 | **/build/ 11 | !src/**/build/ 12 | 13 | # Ignore Gradle GUI config 14 | gradle-app.setting 15 | 16 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored) 17 | !gradle-wrapper.jar 18 | 19 | # Cache of project 20 | .gradletasknamecache 21 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # This is the list of tool authors for copyright purposes. 2 | # 3 | # This does not necessarily list everyone who has contributed code, since in 4 | # some cases, their employer may be the copyright holder. To see the full list 5 | # of contributors, see the revision history in source control. 6 | Meera Youn - Original Idea and concept 7 | Anant Damle - Overall solution design and build 8 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/CommandLinePromptReceiver.java: -------------------------------------------------------------------------------- 1 | package dswebquerytobigquery; 2 | 3 | import com.google.api.client.extensions.java6.auth.oauth2.AbstractPromptReceiver; 4 | 5 | /** 6 | * Provides Prompt receiver for command line prompt to allow user to supply Authorization code. 7 | */ 8 | public class CommandLinePromptReceiver extends AbstractPromptReceiver { 9 | 10 | public static CommandLinePromptReceiver newReceiver() { 11 | return new CommandLinePromptReceiver(); 12 | } 13 | 14 | @Override 15 | public String getRedirectUri() { 16 | return "urn:ietf:wg:oauth:2.0:oob"; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/ColumnHeader.java: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dswebquerytobigquery; 16 | 17 | import com.google.auto.value.AutoValue; 18 | 19 | /** Model to store information on the WebQuery's columns. */ 20 | @AutoValue 21 | public abstract class ColumnHeader { 22 | 23 | public abstract String name(); 24 | public abstract String bqType(); 25 | 26 | public static ColumnHeader create(String name, String bqType) { 27 | return new AutoValue_ColumnHeader(name, bqType); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows 28 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). 29 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/StorageServiceFactory.java: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dswebquerytobigquery; 16 | 17 | import com.google.auth.Credentials; 18 | import com.google.cloud.storage.Storage; 19 | import com.google.cloud.storage.StorageOptions; 20 | 21 | /** 22 | * Factory to build Google Cloud Storage client. 23 | */ 24 | public interface StorageServiceFactory { 25 | 26 | static StorageServiceFactory getDefaultInstance(Credentials credential) { 27 | return () -> StorageOptions.newBuilder() 28 | .setCredentials(credential) 29 | .build().getService(); 30 | } 31 | 32 | Storage buildStorageService(); 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/BigQueryFactory.java: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dswebquerytobigquery; 16 | 17 | import com.google.auth.Credentials; 18 | import com.google.cloud.bigquery.BigQuery; 19 | import com.google.cloud.bigquery.BigQueryOptions; 20 | 21 | /** 22 | * Factory to build BigQuery client. 23 | */ 24 | public interface BigQueryFactory { 25 | 26 | static BigQueryFactory getDefaultInstance(Credentials credentials) { 27 | return projectId -> BigQueryOptions.newBuilder().setCredentials(credentials).setProjectId(projectId).build().getService(); 28 | } 29 | 30 | BigQuery getBigQueryService(String projectId); 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/BigQueryConfig.java: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dswebquerytobigquery; 16 | 17 | import com.google.auto.value.AutoValue; 18 | 19 | /** Configuration for BigQuery Table information. */ 20 | @AutoValue 21 | public abstract class BigQueryConfig { 22 | 23 | public static Builder builder() { 24 | return new AutoValue_BigQueryConfig.Builder(); 25 | } 26 | 27 | public abstract String getProjectId(); 28 | 29 | public abstract String getDatasetId(); 30 | 31 | public abstract String getTableId(); 32 | 33 | @AutoValue.Builder 34 | public abstract static class Builder { 35 | 36 | public abstract Builder setProjectId(String newProjectId); 37 | 38 | public abstract Builder setDatasetId(String newDatasetId); 39 | 40 | public abstract Builder setTableId(String newTableId); 41 | 42 | public abstract BigQueryConfig build(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/TransferConfig.java: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dswebquerytobigquery; 16 | 17 | import com.google.auto.value.AutoValue; 18 | 19 | /** Model to denote a configuration to load WebQuery into BigQuery. */ 20 | @AutoValue 21 | public abstract class TransferConfig { 22 | 23 | static Builder builder() { 24 | return new AutoValue_TransferConfig.Builder(); 25 | } 26 | 27 | public abstract BigQueryConfig getBigQueryConfig(); 28 | 29 | public abstract String getTempGcsBucketName(); 30 | 31 | public abstract String getWebQueryUrl(); 32 | 33 | @AutoValue.Builder 34 | public abstract static class Builder { 35 | 36 | public abstract Builder setBigQueryConfig(BigQueryConfig newBigQueryConfig); 37 | 38 | public abstract Builder setTempGcsBucketName(String newTempGcsBucketName); 39 | 40 | public abstract Builder setWebQueryUrl(String newWebQueryUrl); 41 | 42 | public abstract TransferConfig build(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/Constants.java: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dswebquerytobigquery; 16 | 17 | import com.google.api.services.bigquery.BigqueryScopes; 18 | import com.google.api.services.storage.StorageScopes; 19 | import com.google.common.collect.ImmutableSet; 20 | 21 | /** Application Configuration Properties.*/ 22 | final class Constants { 23 | 24 | public static final String CSV_FILE_PREFIX = "dswq_"; 25 | 26 | public static final int MAX_THREADS = 10; 27 | public static final String REPORT_PULL_TIMESTAMP_COLUMN_NAME = "reporting_date"; 28 | 29 | public static final ImmutableSet REQUIRED_SCOPES = 30 | ImmutableSet.of( 31 | BigqueryScopes.BIGQUERY_INSERTDATA, 32 | BigqueryScopes.BIGQUERY, 33 | StorageScopes.DEVSTORAGE_READ_WRITE, 34 | "https://www.googleapis.com/auth/doubleclicksearch"); 35 | 36 | public static final String APPLICATION_NAME = "WebQueryToBigQueryv1"; 37 | public static final String CREDENTIAL_DATASTORE_FOLDER = "datastore"; 38 | 39 | private Constants() {} 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/WqToBqDataTypeMapper.java: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dswebquerytobigquery; 16 | 17 | import java.util.HashMap; 18 | 19 | /** Mapper for WebQuery column type to BigQuery Column type. */ 20 | class WqToBqDataTypeMapper { 21 | 22 | static final String TIMESTAMP = "TIMESTAMP"; 23 | static final String INTEGER = "INTEGER"; 24 | static final String TEXT = "STRING"; 25 | static final String DEFAULT_TYPE = "STRING"; 26 | private static final HashMap TYPE_MAPPER; 27 | 28 | static { 29 | TYPE_MAPPER = new HashMap<>(5); 30 | TYPE_MAPPER.put("date", "DATE"); 31 | TYPE_MAPPER.put("text", "STRING"); 32 | TYPE_MAPPER.put("integral", "INTEGER"); 33 | TYPE_MAPPER.put("decimal", "FLOAT"); 34 | TYPE_MAPPER.put("percent", "FLOAT"); 35 | } 36 | 37 | /** 38 | * Translate Data-types from DS WebQuery type to BigQuery Column type If no mapping found, returns 39 | * {@code DEFAULT_TYPE} 40 | * 41 | * @param type WebQwuery type 42 | * @return BigQueryType 43 | */ 44 | public static String translateWebQueryTypeToBigQueryType(String type) { 45 | 46 | String bqType = TYPE_MAPPER.get(type); 47 | return (bqType != null) ? bqType : DEFAULT_TYPE; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/ConfigReader.java: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dswebquerytobigquery; 16 | 17 | import java.io.File; 18 | import java.io.FileReader; 19 | import java.io.IOException; 20 | import org.apache.commons.csv.CSVFormat; 21 | 22 | /** Service to read CSV Configurations for loading multiple WebQuery to BQ jobs. */ 23 | class ConfigReader { 24 | 25 | /** 26 | * Retrieve configuration from CSV File. 27 | * 28 | * @return Array of WebQuery to BigQuery Transfer configuration items 29 | */ 30 | public static TransferConfig[] loadConfig(File configFile) throws IOException { 31 | return CSVFormat.DEFAULT.withFirstRecordAsHeader().parse(new FileReader(configFile)) 32 | .getRecords() 33 | .stream() 34 | .map( 35 | // Build TransferConfig objects from CSV rows 36 | record -> 37 | TransferConfig.builder() 38 | .setBigQueryConfig( 39 | BigQueryConfig.builder() 40 | .setProjectId(record.get("projectId")) 41 | .setDatasetId(record.get("datasetId")) 42 | .setTableId(record.get("tableId")) 43 | .build()) 44 | .setWebQueryUrl(record.get("webQueryUrl")) 45 | .setTempGcsBucketName(record.get("gcsBucketName")) 46 | .build() 47 | ) 48 | .toArray(TransferConfig[]::new); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/Main.java: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dswebquerytobigquery; 16 | 17 | import static com.google.common.base.Preconditions.checkArgument; 18 | import static dswebquerytobigquery.Constants.MAX_THREADS; 19 | 20 | import com.google.api.services.bigquery.BigqueryScopes; 21 | import com.google.api.services.storage.StorageScopes; 22 | import com.google.auth.oauth2.GoogleCredentials; 23 | import com.google.common.collect.ImmutableList; 24 | import com.google.common.flogger.GoogleLogger; 25 | import java.io.File; 26 | import java.io.IOException; 27 | import java.util.concurrent.Executors; 28 | import java.util.concurrent.ThreadPoolExecutor; 29 | import java.util.stream.Stream; 30 | 31 | class Main { 32 | 33 | private static final GoogleLogger logger = GoogleLogger.forEnclosingClass(); 34 | 35 | public static void main(String[] args) throws IOException { 36 | 37 | checkArgument(args.length == 2, "Provide Configuration CSV and location for csv reports"); 38 | // Load Config File 39 | logger.atInfo().log("config file: %s", args[0]); 40 | var configFile = new File(args[0]); 41 | 42 | var tmpFolder = new File(args[1]); 43 | checkArgument(tmpFolder.isDirectory(), "provided URI is not a folder: %s", args[1]); 44 | TransferConfig[] transferConfigs = ConfigReader.loadConfig(configFile); 45 | 46 | logger.atFine().log("Loaded %s configurations", transferConfigs.length); 47 | 48 | var executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(MAX_THREADS); 49 | 50 | var serviceAccountCredentials = GoogleCredentials.getApplicationDefault(); 51 | serviceAccountCredentials.refresh(); 52 | // Run all configs 53 | Stream.of(transferConfigs) 54 | .map(config -> 55 | new TransferRunner( 56 | config, 57 | serviceAccountCredentials, 58 | BigQueryFactory.getDefaultInstance(serviceAccountCredentials), 59 | StorageServiceFactory.getDefaultInstance(serviceAccountCredentials), 60 | tmpFolder)) 61 | .forEach(executor::execute); 62 | 63 | executor.shutdown(); 64 | logger.atInfo().log("Waiting for workers to complete."); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/StorageController.java: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dswebquerytobigquery; 16 | 17 | import static com.google.common.base.Preconditions.checkArgument; 18 | import static com.google.common.base.Preconditions.checkNotNull; 19 | 20 | import com.google.cloud.storage.Blob; 21 | import com.google.cloud.storage.BlobId; 22 | import com.google.cloud.storage.BlobInfo; 23 | import com.google.cloud.storage.Storage; 24 | import com.google.common.flogger.GoogleLogger; 25 | import java.io.File; 26 | import java.io.IOException; 27 | import java.nio.file.Files; 28 | import java.nio.file.Paths; 29 | 30 | /** 31 | * Service to store files into Google Cloud Storage. 32 | */ 33 | class StorageController { 34 | 35 | private static final GoogleLogger logger = GoogleLogger.forEnclosingClass(); 36 | 37 | private final Storage storageService; 38 | 39 | public StorageController(Storage storageService) { 40 | this.storageService = storageService; 41 | } 42 | 43 | /** 44 | * Stores the provided local file to Google Cloud Storage. 45 | * 46 | * @param file the local file to be uploaded. 47 | * @param gcsBucketName the Cloud Storage Bucket name. 48 | * @param folder the name of the folder to store the file on GCS. 49 | * @return the URI of the stored object. 50 | * @throws IOException in-case there is error uploading the file. 51 | */ 52 | public Blob uploadFile(File file, String gcsBucketName, String folder) throws IOException { 53 | checkNotNull(file, "Null/Empty file"); 54 | checkNotNull(folder, "Null Folder name"); 55 | checkArgument(gcsBucketName != null && !gcsBucketName.isEmpty(), "Null Bucket Name"); 56 | 57 | var gcsObject = 58 | storageService 59 | .create( 60 | BlobInfo 61 | .newBuilder(BlobId.of(gcsBucketName, String.format("%s/%s", folder, file.getName()))) 62 | .setContentType("text/csv") 63 | .build(), 64 | Files.readAllBytes(Paths.get(file.getAbsolutePath()))); 65 | 66 | logger.atInfo().log("GCS File Id: %s", gcsObject.getBlobId()); 67 | 68 | // create a gs link to the file 69 | return gcsObject; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2021 Google LLC 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if "%ERRORLEVEL%" == "0" goto execute 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto execute 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :execute 68 | @rem Setup the command line 69 | 70 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 71 | 72 | 73 | @rem Execute Gradle 74 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 75 | 76 | :end 77 | @rem End local scope for the variables with windows NT shell 78 | if "%ERRORLEVEL%"=="0" goto mainEnd 79 | 80 | :fail 81 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 82 | rem the _cmd.exe /c_ return code! 83 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 84 | exit /b 1 85 | 86 | :mainEnd 87 | if "%OS%"=="Windows_NT" endlocal 88 | 89 | :omega 90 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/BigQueryProcessor.java: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dswebquerytobigquery; 16 | 17 | import static com.google.common.collect.ImmutableList.toImmutableList; 18 | 19 | import com.google.cloud.bigquery.BigQuery; 20 | import com.google.cloud.bigquery.FormatOptions; 21 | import com.google.cloud.bigquery.Job; 22 | import com.google.cloud.bigquery.JobInfo; 23 | import com.google.cloud.bigquery.LoadJobConfiguration; 24 | import com.google.cloud.bigquery.TableId; 25 | import com.google.cloud.storage.Blob; 26 | import java.io.IOException; 27 | import java.time.LocalDate; 28 | import java.time.ZoneOffset; 29 | import java.time.format.DateTimeFormatter; 30 | import java.util.stream.Stream; 31 | 32 | /** 33 | * BigQuery client to load the CSV file from Cloud Storage into a table. 34 | */ 35 | class BigQueryProcessor { 36 | 37 | private final BigQueryConfig outputTableInfo; 38 | private final BigQuery bigQueryService; 39 | 40 | public BigQueryProcessor(BigQueryConfig outputTableInfo, 41 | BigQuery bigQueryService) { 42 | this.outputTableInfo = outputTableInfo; 43 | this.bigQueryService = bigQueryService; 44 | } 45 | 46 | /** 47 | * Creates a Load job on BigQuery for the provided CSV file on Cloud Bucket. 48 | * 49 | * @param gcsBlobs the list of GCS blobs for CSV files to be uploaded into the given table. 50 | * First file should contain headers. 51 | * @return BigQuery load job id. 52 | */ 53 | public Job loadFileInTable(Blob... gcsBlobs) { 54 | var gcsFileNames = 55 | Stream.of(gcsBlobs) 56 | .map(blob -> String.format("gs://%s/%s", blob.getBucket(), blob.getName())) 57 | .collect(toImmutableList()); 58 | 59 | return bigQueryService.create( 60 | JobInfo.newBuilder( 61 | LoadJobConfiguration.newBuilder( 62 | TableId.of( 63 | outputTableInfo.getProjectId(), 64 | outputTableInfo.getDatasetId(), 65 | outputTableInfo.getTableId() + "_" + getDateSuffix()), 66 | gcsFileNames, 67 | FormatOptions.csv() 68 | .toBuilder() 69 | .setSkipLeadingRows(1) 70 | .setAllowQuotedNewLines(true) 71 | .build()) 72 | .setAutodetect(true) 73 | .setWriteDisposition(JobInfo.WriteDisposition.WRITE_TRUNCATE) 74 | .build()) 75 | .build()); 76 | } 77 | 78 | /** 79 | * Returns today's date at UTC in YYYYMMDD format to be used a suffix for table-name. 80 | */ 81 | private static String getDateSuffix() { 82 | return DateTimeFormatter.ofPattern("yyyyMMdd").format(LocalDate.now(ZoneOffset.UTC)); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/WebQuery.java: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dswebquerytobigquery; 16 | 17 | import static com.google.common.base.Preconditions.checkNotNull; 18 | 19 | import com.google.auth.oauth2.GoogleCredentials; 20 | import java.io.File; 21 | import java.io.IOException; 22 | import java.io.InputStream; 23 | import java.io.InputStreamReader; 24 | import java.io.Reader; 25 | import java.net.HttpURLConnection; 26 | import java.net.URL; 27 | import java.nio.charset.StandardCharsets; 28 | import java.util.regex.Matcher; 29 | import java.util.regex.Pattern; 30 | import org.ccil.cowan.tagsoup.Parser; 31 | import org.xml.sax.InputSource; 32 | import org.xml.sax.SAXException; 33 | 34 | /** 35 | * A Service to denote and read SA360's WebQuery. 36 | */ 37 | public class WebQuery { 38 | 39 | private final String queryUrl; 40 | private final String reportId; 41 | private final GoogleCredentials credential; 42 | 43 | public WebQuery(String queryUrl, GoogleCredentials credential) { 44 | this.queryUrl = checkNotNull(queryUrl); 45 | this.reportId = extractReportId(queryUrl); 46 | this.credential = credential; 47 | } 48 | 49 | /** 50 | * Returns a reader to the given WebQuery by opening a Socket connection and creating an {@link 51 | * InputStream}. 52 | * 53 | */ 54 | public WebQueryReader read() { 55 | return new WebQueryReader(credential); 56 | } 57 | 58 | public String getReportId() { 59 | return reportId; 60 | } 61 | 62 | public String getQueryUrl() { 63 | return queryUrl; 64 | } 65 | 66 | private static String extractReportId(String queryUrl) { 67 | Matcher matcher = Pattern.compile("rid=(\\d+)").matcher(queryUrl); 68 | if (matcher.find()) { 69 | return matcher.group(1); 70 | } 71 | return null; 72 | } 73 | 74 | /** 75 | * A service to open a connection to SA360 for reading the webquery. 76 | */ 77 | public class WebQueryReader { 78 | 79 | private final GoogleCredentials credential; 80 | 81 | private WebQueryReader(GoogleCredentials credential) { 82 | this.credential = checkNotNull(credential); 83 | } 84 | 85 | /** 86 | * Returns a stream by opening a connection to the WebQuery URL by providing OAuth token in the 87 | * header. 88 | * 89 | * @throws IOException if credentials or URL is incorrect. 90 | */ 91 | InputStream getStream() throws IOException { 92 | 93 | HttpURLConnection conn = (HttpURLConnection) new URL(queryUrl).openConnection(); 94 | conn.setRequestMethod("GET"); 95 | conn.setRequestProperty("Authorization", "Bearer " + credential.getAccessToken().getTokenValue()); 96 | 97 | return conn.getInputStream(); 98 | } 99 | 100 | /** 101 | * Parses and outputs the WebQuery HTML to a given CSV File. 102 | */ 103 | public void writeAsCsv(File outputFile) throws IOException, SAXException { 104 | try (Reader htmlFileReader = new InputStreamReader(getStream(), StandardCharsets.UTF_8)) { 105 | Parser xmlParser = new Parser(); 106 | xmlParser.setContentHandler(Html2CsvParseHandler.forFile(outputFile)); 107 | xmlParser 108 | .parse(new InputSource(htmlFileReader)); 109 | } 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/TransferRunner.java: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dswebquerytobigquery; 16 | 17 | import static dswebquerytobigquery.Constants.CSV_FILE_PREFIX; 18 | 19 | import com.google.auth.oauth2.GoogleCredentials; 20 | import com.google.common.flogger.GoogleLogger; 21 | import java.io.File; 22 | 23 | /** 24 | * Provides an indempotent runner for each configuration. 25 | */ 26 | class TransferRunner implements Runnable { 27 | 28 | private static final GoogleLogger logger = GoogleLogger.forEnclosingClass(); 29 | 30 | private final TransferConfig xferConfig; 31 | private final GoogleCredentials credential; 32 | private final BigQueryFactory bigQueryFactory; 33 | private final StorageServiceFactory storageServiceFactory; 34 | private final File csvFolder; 35 | 36 | public TransferRunner(TransferConfig xferConfig, 37 | GoogleCredentials credential, 38 | BigQueryFactory bigQueryFactory, 39 | StorageServiceFactory storageServiceFactory, 40 | File csvFolder) { 41 | this.xferConfig = xferConfig; 42 | this.credential = credential; 43 | this.bigQueryFactory = bigQueryFactory; 44 | this.storageServiceFactory = storageServiceFactory; 45 | this.csvFolder = csvFolder; 46 | } 47 | 48 | @Override 49 | public void run() { 50 | logger.atInfo().log("Processing: %s", xferConfig); 51 | 52 | var webQuery = new WebQuery(xferConfig.getWebQueryUrl(), credential); 53 | // local file to store the report as CSV 54 | var tempCsvFile = createCsvFile(); 55 | 56 | try { 57 | logger.atInfo() 58 | .log("[Report %s] starting: url: %s", webQuery.getReportId(), webQuery.getQueryUrl()); 59 | 60 | logger.atInfo() 61 | .log("[Report %s] localFile: %s", webQuery.getReportId(), tempCsvFile.getAbsolutePath()); 62 | 63 | // Convert to CSV File 64 | webQuery.read().writeAsCsv(tempCsvFile); 65 | 66 | // Copy to GCS 67 | var gcsLink = new StorageController(storageServiceFactory.buildStorageService()) 68 | .uploadFile(tempCsvFile, xferConfig.getTempGcsBucketName(), "sa360tmp"); 69 | 70 | logger.atInfo().log("GCS Link: %s", gcsLink); 71 | 72 | //Delete local file 73 | logger.atInfo().log("marking file for deletion: %s", tempCsvFile.getAbsolutePath()); 74 | 75 | // Issue BigQuery command to consume file into a table 76 | var bqJob = 77 | new BigQueryProcessor(xferConfig.getBigQueryConfig(), 78 | bigQueryFactory.getBigQueryService(xferConfig.getBigQueryConfig().getProjectId())) 79 | .loadFileInTable(gcsLink); 80 | 81 | logger.atInfo().log( 82 | "[Report %s] BQ JobId:%s%nBigquery Job link: https://console.cloud.google.com/bigquery?project=%s&page=jobs", 83 | webQuery.getReportId(), 84 | bqJob.getJobId().getJob(), 85 | bqJob.getJobId().getProject()); 86 | logger.atInfo().log("[Report %s] finished %s", webQuery.getReportId(), xferConfig); 87 | } catch (Exception exception) { 88 | logger.atSevere().withCause(exception) 89 | .log("[Report %s] Error Processing", webQuery.getReportId()); 90 | } 91 | finally { 92 | tempCsvFile.deleteOnExit(); 93 | } 94 | } 95 | 96 | private File createCsvFile() { 97 | return new File( 98 | csvFolder.getAbsolutePath() + "/" + 99 | String.format( 100 | "%s_%s_%s.csv", 101 | CSV_FILE_PREFIX, 102 | Thread.currentThread().getId(), 103 | new WebQuery(xferConfig.getWebQueryUrl(), credential).getReportId())); 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Total alerts](https://img.shields.io/lgtm/alerts/g/google/sa360-webquery-bigquery.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/google/sa360-webquery-bigquery/alerts/) [![Language grade: Java](https://img.shields.io/lgtm/grade/java/g/google/sa360-webquery-bigquery.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/google/sa360-webquery-bigquery/context:java) 2 | 3 | # SA360 WebQuery to BigQuery Exporter 4 | 5 | ## Background 6 | Large SA360 customers want to build custom reports by combining their 1st party data with paid-search data. 7 | 8 | The Easiest way to achieve that is by combining the data in BigQuery. 9 | There are two ways to programmatically import SA360 data into BigQuery 10 | 1. [API](https://developers.google.com/search-ads/v2/how-tos/reporting) 11 | 2. [Web Query](https://support.google.com/searchads/answer/2870738?hl=en) 12 | 13 | WebQuery makes reporting easier compared to the API (with less steps), as it allows adding additional entity data (e.g. Business data) in the report, which makes the report simple as compared to an API where this stitching has to be done in a user's program. 14 | 15 | ### How does it work? 16 | The tool uses [Service account](https://cloud.google.com/iam/docs/service-accounts) credentials to extract Reports from SA360 and also send data into BigQuery. 17 | First the system extracts the Report (in XML format) from SA360 and converts it into CSV on the fly (using SAX parsing), this file is then staged (copied) to GCS, followed by calling BQ API to `load` the data into a separate table. 18 | 19 | > Support for [User account](https://cloud.google.com/docs/authentication/end-user) credentials has been dropped due to security issues arising from storing user credentials locally. 20 | 21 | ## Video Tutorial 22 | [![Part-1](https://img.youtube.com/vi/xEMe5CRy6BQ/0.jpg)](https://www.youtube.com/watch?v=xEMe5CRy6BQ) 23 | 24 | ## Usage 25 | 26 | ### Create Google Cloud Project 27 | 1. Create a Google Cloud Project and enable billing. 28 | 1. [Enable APIs](https://console.cloud.google.com/flows/enableapi?apiid=doubleclicksearch,bigquery.googleapis.com,storage.googleapis.com). 29 | 1. [Create a Service Account](https://cloud.google.com/iam/docs/creating-managing-service-accounts#creating) to be used for running this project. 30 | 1. Grant the service account permissions 31 | 1. SA360 permission to retrieve webquery report 32 | 1. BigQuery and Storage permission 33 | 1. Set Variables 34 | ```shell 35 | export PROJECT_ID="" 36 | export GCS_BUCKET_NAME="" 37 | export BIGQUERY_DATASET_NAME="" 38 | export SERVICE_ACCOUNT_NAME="" 39 | export COMPUTE_ENGINE_INSTANCE_NAME="" 40 | ``` 41 | 42 | ### Create Resources 43 | 1. Create Cloud Storage bucket to stage the reports. 44 | ```shell 45 | gsutil mb -p ${PROJECT_ID} "gs://${GCS_BUCKET_NAME}" 46 | ``` 47 | 48 | 1. Crate BigQuery dataset to store the reports 49 | ```shell 50 | bq mk --project_id="${PROJECT_ID}" ${BIGQUERY_DATASET_NAME} 51 | ``` 52 | 53 | 1. Create Service account 54 | ```shell 55 | gcloud iam service-accounts create ${SERVICE_ACCOUNT_NAME} \ 56 | --description="Service account to run SA360 webquery to BigQuery" 57 | ``` 58 | 59 | 1. Create [Compute Engine](https://cloud.google.com/compute) instance, [Set default zone and region](https://cloud.google.com/compute/docs/instances/create-start-instance#before-you-begin) 60 | ```shell 61 | gcloud compute instances create ${COMPUTE_ENGINE_INSTANCE_NAME} \ 62 | --service-account="${SERVICE_ACCOUNT_NAME}@${PROJECT_ID}.iam.gserviceaccount.com" \ 63 | --scopes=https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/doubleclicksearch \ 64 | --image-project debian-cloud \ 65 | --image-family debian-10 66 | ``` 67 | 68 | > Ensure that the user/serviceAccount has at least **READ** permissions for SA360 and **EDITOR** Permissions for BigQuery. 69 | 70 | ### Compile and run 71 | 1. Create a [Configuration file (csv)](#csv-file-format) with specified headers. (consider `sample-config.csv` as a reference) 72 | 73 | 1. Compile and package source code into an executable JAR. 74 | ```shell 75 | ./gradlew clean shadowJar 76 | ``` 77 | 1. Copy the fatJar to the Compute Engine instance. 78 | ```shell 79 | gcloud compute scp build/libs/dswqtobq-1.1-all.jar ${COMPUTE_ENGINE_INSTANCE_NAME}:~/ 80 | gcloud compute scp ${COMPUTE_ENGINE_INSTANCE_NAME}:~/ 81 | ``` 82 | 83 | 1. SSH into the Compute Engine instance 84 | ```shell 85 | gcloud compute ssh ${COMPUTE_ENGINE_INSTANCE_NAME} 86 | ``` 87 | > Install Java 11 on the VM if required: `sudo apt install -y openjdk-11-jdk` 88 | 89 | 1. Run the jar file 90 | ```shell 91 | # run the JAR file by specifying the configuraiton file as first parameter 92 | java -jar dswqtobq-1.1-all.jar 93 | ``` 94 | > You can schedule to run it automatically using cron, after this step. 95 | 96 | ### CSV File Format 97 | The CSV configuration file must contain following headers, The order does not matter. 98 | * `gcsBucketName` - The GCS Bucket to be used for staging CSV file for BQ upload. 99 | * `projectId` - GCP Project Id to use for billing as well as for BQ Table location. 100 | * `datasetId` - BQ Dataset id/name belonging to given _projectId_. 101 | * `tableId` - Prefix to be used for the BigQuery Table 102 | * `webQueryUrl` - SearchAds 360 WebQuery link 103 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | # 3 | # Copyright 2021 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | ############################################################################## 19 | ## 20 | ## Gradle start up script for UN*X 21 | ## 22 | ############################################################################## 23 | 24 | # Attempt to set APP_HOME 25 | # Resolve links: $0 may be a link 26 | PRG="$0" 27 | # Need this for relative symlinks. 28 | while [ -h "$PRG" ] ; do 29 | ls=`ls -ld "$PRG"` 30 | link=`expr "$ls" : '.*-> \(.*\)$'` 31 | if expr "$link" : '/.*' > /dev/null; then 32 | PRG="$link" 33 | else 34 | PRG=`dirname "$PRG"`"/$link" 35 | fi 36 | done 37 | SAVED="`pwd`" 38 | cd "`dirname \"$PRG\"`/" >/dev/null 39 | APP_HOME="`pwd -P`" 40 | cd "$SAVED" >/dev/null 41 | 42 | APP_NAME="Gradle" 43 | APP_BASE_NAME=`basename "$0"` 44 | 45 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 46 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 47 | 48 | # Use the maximum available, or set MAX_FD != -1 to use that value. 49 | MAX_FD="maximum" 50 | 51 | warn () { 52 | echo "$*" 53 | } 54 | 55 | die () { 56 | echo 57 | echo "$*" 58 | echo 59 | exit 1 60 | } 61 | 62 | # OS specific support (must be 'true' or 'false'). 63 | cygwin=false 64 | msys=false 65 | darwin=false 66 | nonstop=false 67 | case "`uname`" in 68 | CYGWIN* ) 69 | cygwin=true 70 | ;; 71 | Darwin* ) 72 | darwin=true 73 | ;; 74 | MINGW* ) 75 | msys=true 76 | ;; 77 | NONSTOP* ) 78 | nonstop=true 79 | ;; 80 | esac 81 | 82 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 83 | 84 | 85 | # Determine the Java command to use to start the JVM. 86 | if [ -n "$JAVA_HOME" ] ; then 87 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 88 | # IBM's JDK on AIX uses strange locations for the executables 89 | JAVACMD="$JAVA_HOME/jre/sh/java" 90 | else 91 | JAVACMD="$JAVA_HOME/bin/java" 92 | fi 93 | if [ ! -x "$JAVACMD" ] ; then 94 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 95 | 96 | Please set the JAVA_HOME variable in your environment to match the 97 | location of your Java installation." 98 | fi 99 | else 100 | JAVACMD="java" 101 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 102 | 103 | Please set the JAVA_HOME variable in your environment to match the 104 | location of your Java installation." 105 | fi 106 | 107 | # Increase the maximum file descriptors if we can. 108 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 109 | MAX_FD_LIMIT=`ulimit -H -n` 110 | if [ $? -eq 0 ] ; then 111 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 112 | MAX_FD="$MAX_FD_LIMIT" 113 | fi 114 | ulimit -n $MAX_FD 115 | if [ $? -ne 0 ] ; then 116 | warn "Could not set maximum file descriptor limit: $MAX_FD" 117 | fi 118 | else 119 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 120 | fi 121 | fi 122 | 123 | # For Darwin, add options to specify how the application appears in the dock 124 | if $darwin; then 125 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 126 | fi 127 | 128 | # For Cygwin or MSYS, switch paths to Windows format before running java 129 | if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then 130 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 131 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 132 | 133 | JAVACMD=`cygpath --unix "$JAVACMD"` 134 | 135 | # We build the pattern for arguments to be converted via cygpath 136 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 137 | SEP="" 138 | for dir in $ROOTDIRSRAW ; do 139 | ROOTDIRS="$ROOTDIRS$SEP$dir" 140 | SEP="|" 141 | done 142 | OURCYGPATTERN="(^($ROOTDIRS))" 143 | # Add a user-defined pattern to the cygpath arguments 144 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 145 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 146 | fi 147 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 148 | i=0 149 | for arg in "$@" ; do 150 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 151 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 152 | 153 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 154 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 155 | else 156 | eval `echo args$i`="\"$arg\"" 157 | fi 158 | i=`expr $i + 1` 159 | done 160 | case $i in 161 | 0) set -- ;; 162 | 1) set -- "$args0" ;; 163 | 2) set -- "$args0" "$args1" ;; 164 | 3) set -- "$args0" "$args1" "$args2" ;; 165 | 4) set -- "$args0" "$args1" "$args2" "$args3" ;; 166 | 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 167 | 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 168 | 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 169 | 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 170 | 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 171 | esac 172 | fi 173 | 174 | # Escape application args 175 | save () { 176 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 177 | echo " " 178 | } 179 | APP_ARGS=`save "$@"` 180 | 181 | # Collect all arguments for the java command, following the shell quoting and substitution rules 182 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 183 | 184 | exec "$JAVACMD" "$@" 185 | -------------------------------------------------------------------------------- /src/main/java/dswebquerytobigquery/Html2CsvParseHandler.java: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dswebquerytobigquery; 16 | 17 | import static dswebquerytobigquery.WqToBqDataTypeMapper.translateWebQueryTypeToBigQueryType; 18 | 19 | import com.google.common.collect.ImmutableList; 20 | import com.google.common.flogger.GoogleLogger; 21 | import java.io.File; 22 | import java.io.FileOutputStream; 23 | import java.io.IOException; 24 | import java.io.OutputStreamWriter; 25 | import java.nio.charset.StandardCharsets; 26 | import java.text.SimpleDateFormat; 27 | import java.util.Date; 28 | import java.util.List; 29 | import org.apache.commons.csv.CSVFormat; 30 | import org.apache.commons.csv.CSVPrinter; 31 | import org.xml.sax.Attributes; 32 | import org.xml.sax.SAXException; 33 | import org.xml.sax.SAXParseException; 34 | import org.xml.sax.helpers.DefaultHandler; 35 | 36 | /** SAX parser for WebQuery HTML. 37 | * 38 | * Reads the WebQuery HTML and transforms into a CSV File. 39 | */ 40 | public class Html2CsvParseHandler extends DefaultHandler { 41 | 42 | private static final GoogleLogger logger = GoogleLogger.forEnclosingClass(); 43 | 44 | private final File outputFile; 45 | private final String processingDateString = 46 | new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()); 47 | private CSVPrinter csvPrinter; 48 | private ImmutableList.Builder columnTypeInfo; 49 | private ImmutableList.Builder columnNameInfo; 50 | private StringBuilder textAccumulator; 51 | private ImmutableList.Builder rowAccumulator; 52 | private boolean bodyElementStarted = false; 53 | private int bodyRowCounter = 0; 54 | 55 | /** 56 | * Initializes the Parser with Output CSV File location. 57 | * 58 | * @param outputFile name and location of the output CSV File to store parsed output. 59 | */ 60 | private Html2CsvParseHandler(File outputFile) { 61 | this.outputFile = outputFile; 62 | } 63 | 64 | /** 65 | * Factory Meethod to build the parser for a given output file. 66 | * 67 | * @param outputFile name and location of the output CSV File to store parsed output. 68 | * @return the Parser which will store the output to the given File. 69 | */ 70 | public static Html2CsvParseHandler forFile(File outputFile) { 71 | return new Html2CsvParseHandler(outputFile); 72 | } 73 | 74 | private ImmutableList getColumnHeaders() { 75 | 76 | if (bodyElementStarted) { 77 | List columnTypes = columnTypeInfo.build(); 78 | List columnNames = columnNameInfo.build(); 79 | 80 | ImmutableList.Builder columnHeaderBuilder = ImmutableList.builder(); 81 | 82 | for (int index = 0; index < columnNames.size(); index++) { 83 | columnHeaderBuilder 84 | .add(ColumnHeader.create(columnNames.get(index), columnTypes.get(index))); 85 | } 86 | return columnHeaderBuilder.build(); 87 | } 88 | 89 | return ImmutableList.of(); 90 | } 91 | 92 | private void createCsvWriter(String[] headers) throws SAXException { 93 | try { 94 | csvPrinter = 95 | new CSVPrinter( 96 | new OutputStreamWriter(new FileOutputStream(outputFile), StandardCharsets.UTF_8), 97 | CSVFormat.DEFAULT.withHeader(headers)); 98 | } catch (IOException ioexception) { 99 | throw new SAXException("error creating outputfile", ioexception); 100 | } 101 | } 102 | 103 | private void writeRowToCsv(ImmutableList rowElements) throws SAXException { 104 | try { 105 | csvPrinter.printRecord(rowElements); 106 | } catch (IOException ioException) { 107 | throw new SAXException("error writing row " + rowElements, ioException); 108 | } 109 | } 110 | 111 | @Override 112 | public void startDocument() { 113 | columnTypeInfo = ImmutableList.builder(); 114 | columnNameInfo = ImmutableList.builder(); 115 | } 116 | 117 | @Override 118 | public void endDocument() throws SAXException { 119 | try { 120 | csvPrinter.flush(); 121 | csvPrinter.close(); 122 | } catch (IOException ioexception) { 123 | throw new SAXException("error closing file", ioexception); 124 | } 125 | logger.atFine().log("headers: %s", getColumnHeaders()); 126 | logger.atInfo().log("parsed rows: %s", bodyRowCounter); 127 | } 128 | 129 | @Override 130 | public void startElement(String uri, String localName, String qName, Attributes attributes) { 131 | 132 | // Start collecting Column Type Information 133 | if (qName.equals("col")) { 134 | String colClassValue = attributes.getValue("class"); 135 | columnTypeInfo.add( 136 | (colClassValue == null) 137 | ? WqToBqDataTypeMapper.DEFAULT_TYPE 138 | : translateWebQueryTypeToBigQueryType(colClassValue)); 139 | } 140 | 141 | if (qName.equals("th") || qName.equals("td")) { 142 | textAccumulator = new StringBuilder(); 143 | } 144 | 145 | if (qName.equals("tbody")) { 146 | bodyElementStarted = true; 147 | } 148 | 149 | // New Datarow begins 150 | if (bodyElementStarted && qName.equals("tr")) { 151 | rowAccumulator = ImmutableList.builder(); 152 | } 153 | } 154 | 155 | @Override 156 | public void endElement(String uri, String localName, String qName) throws SAXException { 157 | 158 | // means colgroup and thead is complete. 159 | if (qName.equals("colgroup")) { 160 | // add timestamp col 161 | columnTypeInfo.add(WqToBqDataTypeMapper.TIMESTAMP); 162 | } 163 | 164 | if (qName.equals("th")) { 165 | columnNameInfo.add(formatHeader(textAccumulator.toString())); 166 | } 167 | 168 | // Start the CSV Writer 169 | if (qName.equals("thead")) { 170 | createCsvWriter( 171 | columnNameInfo 172 | .add(Constants.REPORT_PULL_TIMESTAMP_COLUMN_NAME) // add partition column 173 | .build() 174 | .toArray(new String[0])); 175 | } 176 | 177 | if (qName.equals("td")) { 178 | rowAccumulator.add(sanitizeForCsv(textAccumulator.toString())); 179 | } 180 | 181 | if (bodyElementStarted && qName.equals("tr")) { 182 | bodyRowCounter++; 183 | writeRowToCsv( 184 | rowAccumulator 185 | .add(processingDateString) // Add partition timestamp to each row 186 | .build()); 187 | } 188 | } 189 | 190 | @Override 191 | public void characters(char[] ch, int start, int length) { 192 | if (textAccumulator != null) { 193 | textAccumulator.append(ch, start, length); 194 | } 195 | } 196 | 197 | @Override 198 | public void fatalError(SAXParseException e) throws SAXException { 199 | 200 | try { 201 | if (csvPrinter != null) { 202 | csvPrinter.close(); 203 | } 204 | } catch (IOException ioexecption) { 205 | System.err.println("error closing file" + ioexecption.getMessage()); 206 | } 207 | super.fatalError(e); 208 | } 209 | 210 | private static String formatHeader(String headerValue) { 211 | 212 | if (headerValue == null) { 213 | return ""; 214 | } 215 | 216 | return headerValue 217 | .replaceAll("[^A-Za-z0-9_]", "_") // Replace all non-Alphanumeric chars 218 | .replaceAll("_{2,}", "_") // Replace multiple underscrores with one 219 | .toLowerCase(); 220 | } 221 | 222 | private static String sanitizeForCsv(String cellData) { 223 | if (cellData == null || cellData.equals("") || cellData.equals("\"\"")) { 224 | return ""; 225 | } 226 | 227 | var resultBuilder = new StringBuilder(cellData); 228 | // Look for doublequotes, escape as necessary. 229 | var lastIndex = 0; 230 | while (resultBuilder.indexOf("\"", lastIndex) >= 0) { 231 | var quoteIndex = resultBuilder.indexOf("\"", lastIndex); 232 | resultBuilder.replace(quoteIndex, quoteIndex + 1, "\"\""); 233 | lastIndex = quoteIndex + 2; 234 | } 235 | 236 | var firstChar = cellData.charAt(0); 237 | var lastChar = cellData.charAt(cellData.length() - 1); 238 | 239 | if (cellData.contains(",") || // Check for commas 240 | cellData.contains("\n") || // Check for line breaks 241 | Character.isWhitespace(firstChar) || // Check for leading whitespace. 242 | Character.isWhitespace(lastChar)) { // Check for trailing whitespace 243 | resultBuilder.insert(0, "\"").append("\""); // Wrap in doublequotes. 244 | } 245 | return resultBuilder.toString(); 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | Apache License 204 | Version 2.0, January 2004 205 | http://www.apache.org/licenses/ 206 | 207 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 208 | 209 | 1. Definitions. 210 | 211 | "License" shall mean the terms and conditions for use, reproduction, 212 | and distribution as defined by Sections 1 through 9 of this document. 213 | 214 | "Licensor" shall mean the copyright owner or entity authorized by 215 | the copyright owner that is granting the License. 216 | 217 | "Legal Entity" shall mean the union of the acting entity and all 218 | other entities that control, are controlled by, or are under common 219 | control with that entity. For the purposes of this definition, 220 | "control" means (i) the power, direct or indirect, to cause the 221 | direction or management of such entity, whether by contract or 222 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 223 | outstanding shares, or (iii) beneficial ownership of such entity. 224 | 225 | "You" (or "Your") shall mean an individual or Legal Entity 226 | exercising permissions granted by this License. 227 | 228 | "Source" form shall mean the preferred form for making modifications, 229 | including but not limited to software source code, documentation 230 | source, and configuration files. 231 | 232 | "Object" form shall mean any form resulting from mechanical 233 | transformation or translation of a Source form, including but 234 | not limited to compiled object code, generated documentation, 235 | and conversions to other media types. 236 | 237 | "Work" shall mean the work of authorship, whether in Source or 238 | Object form, made available under the License, as indicated by a 239 | copyright notice that is included in or attached to the work 240 | (an example is provided in the Appendix below). 241 | 242 | "Derivative Works" shall mean any work, whether in Source or Object 243 | form, that is based on (or derived from) the Work and for which the 244 | editorial revisions, annotations, elaborations, or other modifications 245 | represent, as a whole, an original work of authorship. For the purposes 246 | of this License, Derivative Works shall not include works that remain 247 | separable from, or merely link (or bind by name) to the interfaces of, 248 | the Work and Derivative Works thereof. 249 | 250 | "Contribution" shall mean any work of authorship, including 251 | the original version of the Work and any modifications or additions 252 | to that Work or Derivative Works thereof, that is intentionally 253 | submitted to Licensor for inclusion in the Work by the copyright owner 254 | or by an individual or Legal Entity authorized to submit on behalf of 255 | the copyright owner. For the purposes of this definition, "submitted" 256 | means any form of electronic, verbal, or written communication sent 257 | to the Licensor or its representatives, including but not limited to 258 | communication on electronic mailing lists, source code control systems, 259 | and issue tracking systems that are managed by, or on behalf of, the 260 | Licensor for the purpose of discussing and improving the Work, but 261 | excluding communication that is conspicuously marked or otherwise 262 | designated in writing by the copyright owner as "Not a Contribution." 263 | 264 | "Contributor" shall mean Licensor and any individual or Legal Entity 265 | on behalf of whom a Contribution has been received by Licensor and 266 | subsequently incorporated within the Work. 267 | 268 | 2. Grant of Copyright License. Subject to the terms and conditions of 269 | this License, each Contributor hereby grants to You a perpetual, 270 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 271 | copyright license to reproduce, prepare Derivative Works of, 272 | publicly display, publicly perform, sublicense, and distribute the 273 | Work and such Derivative Works in Source or Object form. 274 | 275 | 3. Grant of Patent License. Subject to the terms and conditions of 276 | this License, each Contributor hereby grants to You a perpetual, 277 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 278 | (except as stated in this section) patent license to make, have made, 279 | use, offer to sell, sell, import, and otherwise transfer the Work, 280 | where such license applies only to those patent claims licensable 281 | by such Contributor that are necessarily infringed by their 282 | Contribution(s) alone or by combination of their Contribution(s) 283 | with the Work to which such Contribution(s) was submitted. If You 284 | institute patent litigation against any entity (including a 285 | cross-claim or counterclaim in a lawsuit) alleging that the Work 286 | or a Contribution incorporated within the Work constitutes direct 287 | or contributory patent infringement, then any patent licenses 288 | granted to You under this License for that Work shall terminate 289 | as of the date such litigation is filed. 290 | 291 | 4. Redistribution. You may reproduce and distribute copies of the 292 | Work or Derivative Works thereof in any medium, with or without 293 | modifications, and in Source or Object form, provided that You 294 | meet the following conditions: 295 | 296 | (a) You must give any other recipients of the Work or 297 | Derivative Works a copy of this License; and 298 | 299 | (b) You must cause any modified files to carry prominent notices 300 | stating that You changed the files; and 301 | 302 | (c) You must retain, in the Source form of any Derivative Works 303 | that You distribute, all copyright, patent, trademark, and 304 | attribution notices from the Source form of the Work, 305 | excluding those notices that do not pertain to any part of 306 | the Derivative Works; and 307 | 308 | (d) If the Work includes a "NOTICE" text file as part of its 309 | distribution, then any Derivative Works that You distribute must 310 | include a readable copy of the attribution notices contained 311 | within such NOTICE file, excluding those notices that do not 312 | pertain to any part of the Derivative Works, in at least one 313 | of the following places: within a NOTICE text file distributed 314 | as part of the Derivative Works; within the Source form or 315 | documentation, if provided along with the Derivative Works; or, 316 | within a display generated by the Derivative Works, if and 317 | wherever such third-party notices normally appear. The contents 318 | of the NOTICE file are for informational purposes only and 319 | do not modify the License. You may add Your own attribution 320 | notices within Derivative Works that You distribute, alongside 321 | or as an addendum to the NOTICE text from the Work, provided 322 | that such additional attribution notices cannot be construed 323 | as modifying the License. 324 | 325 | You may add Your own copyright statement to Your modifications and 326 | may provide additional or different license terms and conditions 327 | for use, reproduction, or distribution of Your modifications, or 328 | for any such Derivative Works as a whole, provided Your use, 329 | reproduction, and distribution of the Work otherwise complies with 330 | the conditions stated in this License. 331 | 332 | 5. Submission of Contributions. Unless You explicitly state otherwise, 333 | any Contribution intentionally submitted for inclusion in the Work 334 | by You to the Licensor shall be under the terms and conditions of 335 | this License, without any additional terms or conditions. 336 | Notwithstanding the above, nothing herein shall supersede or modify 337 | the terms of any separate license agreement you may have executed 338 | with Licensor regarding such Contributions. 339 | 340 | 6. Trademarks. This License does not grant permission to use the trade 341 | names, trademarks, service marks, or product names of the Licensor, 342 | except as required for reasonable and customary use in describing the 343 | origin of the Work and reproducing the content of the NOTICE file. 344 | 345 | 7. Disclaimer of Warranty. Unless required by applicable law or 346 | agreed to in writing, Licensor provides the Work (and each 347 | Contributor provides its Contributions) on an "AS IS" BASIS, 348 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 349 | implied, including, without limitation, any warranties or conditions 350 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 351 | PARTICULAR PURPOSE. You are solely responsible for determining the 352 | appropriateness of using or redistributing the Work and assume any 353 | risks associated with Your exercise of permissions under this License. 354 | 355 | 8. Limitation of Liability. In no event and under no legal theory, 356 | whether in tort (including negligence), contract, or otherwise, 357 | unless required by applicable law (such as deliberate and grossly 358 | negligent acts) or agreed to in writing, shall any Contributor be 359 | liable to You for damages, including any direct, indirect, special, 360 | incidental, or consequential damages of any character arising as a 361 | result of this License or out of the use or inability to use the 362 | Work (including but not limited to damages for loss of goodwill, 363 | work stoppage, computer failure or malfunction, or any and all 364 | other commercial damages or losses), even if such Contributor 365 | has been advised of the possibility of such damages. 366 | 367 | 9. Accepting Warranty or Additional Liability. While redistributing 368 | the Work or Derivative Works thereof, You may choose to offer, 369 | and charge a fee for, acceptance of support, warranty, indemnity, 370 | or other liability obligations and/or rights consistent with this 371 | License. However, in accepting such obligations, You may act only 372 | on Your own behalf and on Your sole responsibility, not on behalf 373 | of any other Contributor, and only if You agree to indemnify, 374 | defend, and hold each Contributor harmless for any liability 375 | incurred by, or claims asserted against, such Contributor by reason 376 | of your accepting any such warranty or additional liability. 377 | 378 | END OF TERMS AND CONDITIONS 379 | 380 | APPENDIX: How to apply the Apache License to your work. 381 | 382 | To apply the Apache License to your work, attach the following 383 | boilerplate notice, with the fields enclosed by brackets "[]" 384 | replaced with your own identifying information. (Don't include 385 | the brackets!) The text should be enclosed in the appropriate 386 | comment syntax for the file format. We also recommend that a 387 | file or class name and description of purpose be included on the 388 | same "printed page" as the copyright notice for easier 389 | identification within third-party archives. 390 | 391 | Copyright [yyyy] [name of copyright owner] 392 | 393 | Licensed under the Apache License, Version 2.0 (the "License"); 394 | you may not use this file except in compliance with the License. 395 | You may obtain a copy of the License at 396 | 397 | http://www.apache.org/licenses/LICENSE-2.0 398 | 399 | Unless required by applicable law or agreed to in writing, software 400 | distributed under the License is distributed on an "AS IS" BASIS, 401 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 402 | See the License for the specific language governing permissions and 403 | limitations under the License. 404 | --------------------------------------------------------------------------------