├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── bigquery_samples.key.json.enc ├── favicon.ico ├── java ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── google │ │ └── cloud │ │ └── bigquery │ │ └── samples │ │ ├── AsyncQuerySample.java │ │ ├── BigqueryServiceFactory.java │ │ ├── BigqueryUtils.java │ │ ├── ExportDataCloudStorageSample.java │ │ ├── LoadDataCSVSample.java │ │ ├── StreamingSample.java │ │ └── SyncQuerySample.java │ └── test │ └── java │ └── com │ └── google │ └── cloud │ └── bigquery │ └── samples │ └── test │ ├── AsyncQuerySampleTest.java │ ├── BigquerySampleTest.java │ ├── ExportDataCloudStorageSampleTest.java │ ├── LoadDataCSVSampleTest.java │ ├── StreamingSampleTest.java │ └── SyncQuerySampleTest.java ├── python ├── dump │ ├── __init__.py │ └── load-data-by-POST.py ├── requirements.txt ├── samples │ ├── __init__.py │ ├── async_query.py │ ├── discovery_doc.py │ ├── export_data_to_cloud_storage.py │ ├── load_data_by_post.py │ ├── load_data_from_csv.py │ ├── streaming.py │ ├── sync_query.py │ └── utils.py ├── test │ ├── __init__.py │ ├── base_test.py │ ├── test_async_query.py │ ├── test_export_data_to_cloud_storage.py │ ├── test_load_data_from_csv.py │ ├── test_streaming.py │ └── test_sync_query.py └── tox.ini └── resources ├── constants.json ├── data.csv ├── schema.json └── streamrows.json /.gitignore: -------------------------------------------------------------------------------- 1 | # VIM tmp files 2 | *.swp 3 | 4 | *.pyc 5 | 6 | .tox 7 | 8 | *.key.json 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | python: 4 | - "2.7" 5 | cache: 6 | directories: 7 | - $HOME/gcloud/ 8 | - $HOME/.pip-cache 9 | 10 | env: 11 | - GCLOUD=$HOME/gcloud/google-cloud-sdk/bin 12 | 13 | before_install: 14 | #Install gcloud 15 | - if [ ! -d $HOME/gcloud/google-cloud-sdk ]; then 16 | mkdir -p $HOME/gcloud && 17 | wget https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.tar.gz --directory-prefix=$HOME/gcloud && 18 | cd $HOME/gcloud && 19 | tar xzf google-cloud-sdk.tar.gz && 20 | printf '\ny\n\ny\ny\n' | ./google-cloud-sdk/install.sh && 21 | cd $TRAVIS_BUILD_DIR; 22 | fi 23 | 24 | # Auth 25 | - openssl aes-256-cbc -K $encrypted_51ede52ea9aa_key -iv $encrypted_51ede52ea9aa_iv -in bigquery_samples.key.json.enc -out bigquery_samples.key.json -d 26 | 27 | install: 28 | - pip install tox --download-cache $HOME/.pip-cache 29 | 30 | script: 31 | - cd python/ 32 | - tox 33 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to become a contributor and submit your own code 2 | 3 | ## Contributor License Agreements 4 | 5 | We'd love to accept your sample apps and patches! Before we can take them, we 6 | have to jump a couple of legal hurdles. 7 | 8 | Please fill out either the individual or corporate Contributor License Agreement 9 | (CLA). 10 | 11 | * If you are an individual writing original source code and you're sure you 12 | own the intellectual property, then you'll need to sign an [individual CLA] 13 | (https://developers.google.com/open-source/cla/individual). 14 | * If you work for a company that wants to allow you to contribute your work, 15 | then you'll need to sign a [corporate CLA] 16 | (https://developers.google.com/open-source/cla/corporate). 17 | 18 | Follow either of the two links above to access the appropriate CLA and 19 | instructions for how to sign and return it. Once we receive it, we'll be able to 20 | accept your pull requests. 21 | 22 | ## Contributing A Patch 23 | 24 | 1. Submit an issue describing your proposed change to the repo in question. 25 | 1. The repo owner will respond to your issue promptly. 26 | 1. If your proposed change is accepted, and you haven't already done so, sign a 27 | Contributor License Agreement (see details above). 28 | 1. Fork the desired repo, develop and test your code changes. 29 | 1. Ensure that your code adheres to the existing style in the sample to which 30 | you are contributing. Refer to the 31 | [Google Cloud Platform Samples Style Guide] 32 | (https://github.com/GoogleCloudPlatform/Template/wiki/style.html) for the 33 | recommended coding standards for this organization. 34 | 1. Ensure that your code has an appropriate set of unit tests which all pass. 35 | 1. Submit a pull request. 36 | 37 | ## Contributing A New Sample App 38 | 39 | 1. Submit an issue to the GoogleCloudPlatform/Template repo describing your 40 | proposed sample app. 41 | 1. The Template repo owner will respond to your enhancement issue promptly. 42 | Instructional value is the top priority when evaluating new app proposals for 43 | this collection of repos. 44 | 1. If your proposal is accepted, and you haven't already done so, sign a 45 | Contributor License Agreement (see details above). 46 | 1. Create your own repo for your app following this naming convention: 47 | * {product}-{app-name}-{language} 48 | * products: appengine, compute, storage, bigquery, prediction, cloudsql 49 | * example: appengine-guestbook-python 50 | * For multi-product apps, concatenate the primary products, like this: 51 | compute-appengine-demo-suite-python. 52 | * For multi-language apps, concatenate the primary languages like this: 53 | appengine-sockets-python-java-go. 54 | 55 | 1. Clone the README.md, CONTRIB.md and LICENSE files from the 56 | GoogleCloudPlatform/Template repo. 57 | 1. Ensure that your code adheres to the existing style in the sample to which 58 | you are contributing. Refer to the 59 | [Google Cloud Platform Samples Style Guide] 60 | (https://github.com/GoogleCloudPlatform/Template/wiki/style.html) for the 61 | recommended coding standards for this organization. 62 | 1. Ensure that your code has an appropriate set of unit tests which all pass. 63 | 1. Submit a request to fork your repo in GoogleCloudPlatform organization via 64 | your proposal issue. 65 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2013 Google Inc. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![status: inactive](https://img.shields.io/badge/status-inactive-red.svg) 2 | 3 | This project is no longer actively developed or maintained. 4 | 5 | For new work on this check out the [Java](https://github.com/GoogleCloudPlatform/java-docs-samples/tree/master/bigquery) or [Python](https://github.com/GoogleCloudPlatform/python-docs-samples/tree/master/bigquery) documentation samples repositories. 6 | 7 | 8 | ## bigquery-samples 9 | 10 | Sample code documenting usage of [Google BigQuery](https://cloud.google.com/bigquery/) using the [BigQuery Client Libraries](https://cloud.google.com/bigquery/client-libraries) 11 | 12 | [![Build Status](https://travis-ci.org/GoogleCloudPlatform/bigquery-samples-python.svg)](https://travis-ci.org/GoogleCloudPlatform/bigquery-samples-python) 13 | 14 | ## Setup 15 | 16 | 1. [Setup a project to use Google BigQuery](https://cloud.google.com/bigquery/sign-up) if you haven't already. 17 | 18 | 2. [Generate a private key (JSON format)](https://cloud.google.com/storage/docs/authentication#generating-a-private-key) for your account. 19 | 20 | 3. Make an environment variable: 21 | ``` 22 | export GOOGLE_APPLICATION_CREDENTIALS=/// 23 | ``` 24 | 25 | 26 | ## Run 27 | 28 | ### Python 29 | 30 | From the `python/` directory run 31 | 32 | ``` 33 | pip install -r requirements.txt 34 | ``` 35 | 36 | Then to run a sample, simply use the interpreter 37 | 38 | ``` 39 | $ python 40 | >>> from samples import 41 | >>> .main() 42 | ``` 43 | 44 | ### Java 45 | 46 | Install [Maven version 3.10 or later](http://maven.apache.org/index.html) and from `java/` run 47 | 48 | ``` 49 | mvn clean install -Dmaven.test.skip=true 50 | java src/main/java/com/google/cloud/bigquery/samples/.java 51 | ``` 52 | 53 | 54 | ## Contributing changes 55 | 56 | * See [CONTRIBUTING.md](CONTRIBUTING.md) 57 | 58 | 59 | ## Licensing 60 | 61 | * See [LICENSE](LICENSE) 62 | -------------------------------------------------------------------------------- /bigquery_samples.key.json.enc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googlearchive/bigquery-samples-python/d7a840407d07947eb19e8c88646eed5233948dc4/bigquery_samples.key.json.enc -------------------------------------------------------------------------------- /favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googlearchive/bigquery-samples-python/d7a840407d07947eb19e8c88646eed5233948dc4/favicon.ico -------------------------------------------------------------------------------- /java/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.google.cloud.bigquery.samples 5 | bq-gettingstarted 6 | jar 7 | 1.0-SNAPSHOT 8 | bq-gettingstarted 9 | http://maven.apache.org 10 | 11 | 12 | 13 | googleapis 14 | https://google-api-client-libraries.appspot.com/mavenrepo 15 | 16 | 17 | 18 | 19 | 20 | com.google.apis 21 | google-api-services-bigquery 22 | v2-rev158-1.19.0 23 | 24 | 25 | com.google.oauth-client 26 | google-oauth-client 27 | ${project.oauth.version} 28 | 29 | 30 | com.google.http-client 31 | google-http-client-jackson2 32 | ${project.http.version} 33 | 34 | 35 | com.google.oauth-client 36 | google-oauth-client-jetty 37 | ${project.oauth.version} 38 | 39 | 40 | junit 41 | junit 42 | 4.12 43 | test 44 | 45 | 46 | com.google.code.gson 47 | gson 48 | 2.3.1 49 | 50 | 51 | 52 | 53 | 1.19.0 54 | 1.19.0 55 | UTF-8 56 | 57 | 58 | 59 | 60 | 61 | org.apache.maven.plugins 62 | maven-compiler-plugin 63 | 3.2 64 | 65 | 5 66 | 5 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /java/src/main/java/com/google/cloud/bigquery/samples/AsyncQuerySample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | 15 | package com.google.cloud.bigquery.samples; 16 | 17 | 18 | import com.google.api.services.bigquery.Bigquery; 19 | import com.google.api.services.bigquery.Bigquery.Jobs.GetQueryResults; 20 | import com.google.api.services.bigquery.model.GetQueryResultsResponse; 21 | import com.google.api.services.bigquery.model.Job; 22 | import com.google.api.services.bigquery.model.JobConfiguration; 23 | import com.google.api.services.bigquery.model.JobConfigurationQuery; 24 | 25 | import java.io.IOException; 26 | import java.util.Iterator; 27 | import java.util.Scanner; 28 | 29 | 30 | /** 31 | * Example of authorizing with BigQuery and reading from a public dataset. 32 | */ 33 | public class AsyncQuerySample extends BigqueryUtils{ 34 | 35 | 36 | // [START main] 37 | /** 38 | * @param args 39 | * @throws IOException 40 | * @throws InterruptedException 41 | */ 42 | public static void main(String[] args) 43 | throws IOException, InterruptedException { 44 | 45 | Scanner scanner = new Scanner(System.in); 46 | System.out.println("Enter your project id: "); 47 | String projectId = scanner.nextLine(); 48 | System.out.println("Enter your query string: "); 49 | String queryString = scanner.nextLine(); 50 | System.out.println("Run query in batch mode? [true|false] "); 51 | boolean batch = Boolean.valueOf(scanner.nextLine()); 52 | System.out.println("Enter how often to check if your job is complete (milliseconds): "); 53 | long waitTime = scanner.nextLong(); 54 | scanner.close(); 55 | Iterator pages = run(projectId, queryString, batch, waitTime); 56 | while(pages.hasNext()){ 57 | printRows(pages.next().getRows(), System.out); 58 | } 59 | 60 | } 61 | // [END main] 62 | 63 | // [START run] 64 | public static Iterator run(String projectId, 65 | String queryString, 66 | boolean batch, 67 | long waitTime) 68 | throws IOException, InterruptedException{ 69 | 70 | Bigquery bigquery = BigqueryServiceFactory.getService(); 71 | 72 | Job query = asyncQuery(bigquery, projectId, queryString, batch); 73 | Bigquery.Jobs.Get getRequest = bigquery.jobs().get( 74 | projectId, query.getJobReference().getJobId()); 75 | 76 | //Poll every waitTime milliseconds, 77 | //retrying at most retries times if there are errors 78 | pollJob(getRequest, waitTime); 79 | 80 | GetQueryResults resultsRequest = bigquery.jobs().getQueryResults( 81 | projectId, query.getJobReference().getJobId()); 82 | 83 | return getPages(resultsRequest); 84 | } 85 | // [END run] 86 | 87 | // [START asyncQuery] 88 | /** 89 | * Inserts an asynchronous query Job for a particular query 90 | * 91 | * @param bigquery an authorized BigQuery client 92 | * @param projectId a String containing the project ID 93 | * @param querySql the actual query string 94 | * @return a reference to the inserted query job 95 | * @throws IOException 96 | */ 97 | public static Job asyncQuery(Bigquery bigquery, 98 | String projectId, 99 | String querySql, 100 | boolean batch) throws IOException { 101 | 102 | JobConfigurationQuery query_config = new JobConfigurationQuery() 103 | .setQuery(querySql); 104 | 105 | if(batch){ 106 | query_config.setPriority("BATCH"); 107 | } 108 | 109 | Job job = new Job().setConfiguration( 110 | new JobConfiguration().setQuery(query_config)); 111 | 112 | return bigquery.jobs().insert(projectId, job).execute(); 113 | } 114 | // [END asyncQuery] 115 | 116 | } 117 | -------------------------------------------------------------------------------- /java/src/main/java/com/google/cloud/bigquery/samples/BigqueryServiceFactory.java: -------------------------------------------------------------------------------- 1 | package com.google.cloud.bigquery.samples; 2 | 3 | import com.google.api.client.googleapis.auth.oauth2.GoogleCredential; 4 | import com.google.api.client.http.HttpTransport; 5 | import com.google.api.client.http.javanet.NetHttpTransport; 6 | import com.google.api.client.json.JsonFactory; 7 | import com.google.api.client.json.jackson2.JacksonFactory; 8 | import com.google.api.services.bigquery.Bigquery; 9 | import com.google.api.services.bigquery.BigqueryScopes; 10 | 11 | import java.io.IOException; 12 | import java.util.Collection; 13 | 14 | /** 15 | * TODO: Insert description here. (generated by elibixby) 16 | */ 17 | public class BigqueryServiceFactory { 18 | 19 | private static Bigquery service = null; 20 | private static Object service_lock = new Object(); 21 | 22 | public static Bigquery getService() throws IOException{ 23 | if(service==null){ 24 | synchronized(service_lock){ 25 | if(service==null){ 26 | service=createAuthorizedClient(); 27 | } 28 | } 29 | } 30 | return service; 31 | } 32 | 33 | // [START get_service] 34 | private static Bigquery createAuthorizedClient() throws IOException { 35 | Collection BIGQUERY_SCOPES = BigqueryScopes.all(); 36 | HttpTransport TRANSPORT = new NetHttpTransport(); 37 | JsonFactory JSON_FACTORY = new JacksonFactory(); 38 | GoogleCredential credential = GoogleCredential.getApplicationDefault(TRANSPORT, JSON_FACTORY); 39 | if(credential.createScopedRequired()){ 40 | credential = credential.createScoped(BIGQUERY_SCOPES); 41 | } 42 | return new Bigquery.Builder(TRANSPORT, JSON_FACTORY, credential).setApplicationName("BigQuery Samples").build(); 43 | } 44 | // [END get_service] 45 | 46 | } 47 | -------------------------------------------------------------------------------- /java/src/main/java/com/google/cloud/bigquery/samples/BigqueryUtils.java: -------------------------------------------------------------------------------- 1 | package com.google.cloud.bigquery.samples; 2 | 3 | import com.google.api.client.json.GenericJson; 4 | import com.google.api.services.bigquery.Bigquery; 5 | import com.google.api.services.bigquery.Bigquery.Datasets; 6 | import com.google.api.services.bigquery.BigqueryRequest; 7 | import com.google.api.services.bigquery.model.DatasetList; 8 | import com.google.api.services.bigquery.model.Job; 9 | import com.google.api.services.bigquery.model.TableCell; 10 | import com.google.api.services.bigquery.model.TableFieldSchema; 11 | import com.google.api.services.bigquery.model.TableRow; 12 | import com.google.api.services.bigquery.model.TableSchema; 13 | import com.google.gson.Gson; 14 | 15 | import java.io.IOException; 16 | import java.io.PrintStream; 17 | import java.io.Reader; 18 | import java.util.ArrayList; 19 | import java.util.Iterator; 20 | import java.util.List; 21 | import java.util.NoSuchElementException; 22 | 23 | /** 24 | * TODO: Insert description here. (generated by elibixby) 25 | */ 26 | public class BigqueryUtils { 27 | 28 | // [START print_rows] 29 | public static void printRows(List rows, PrintStream out){ 30 | for (TableRow row : rows) { 31 | for (TableCell field : row.getF()) { 32 | out.printf("%-50s", field.getV()); 33 | } 34 | out.println(); 35 | } 36 | } 37 | // [END print_rows] 38 | 39 | // [START poll_job] 40 | public static Job pollJob(Bigquery.Jobs.Get request, long interval) 41 | throws IOException, InterruptedException{ 42 | Job job = request.execute(); 43 | while(!job.getStatus().getState().equals("DONE")) { 44 | System.out.println("Job is " 45 | + job.getStatus().getState() 46 | + " waiting " + interval + " milliseconds..."); 47 | Thread.sleep(interval); 48 | job = request.execute(); 49 | } 50 | return job; 51 | } 52 | // [END poll_job] 53 | 54 | // [START paging] 55 | public static Iterator getPages(BigqueryRequest request_template){ 56 | 57 | class PageIterator implements Iterator{ 58 | 59 | BigqueryRequest request; 60 | boolean hasNext = true; 61 | 62 | public PageIterator(BigqueryRequest request_template){ 63 | this.request = request_template; 64 | } 65 | 66 | public boolean hasNext() { 67 | return hasNext ; 68 | } 69 | 70 | public T next() { 71 | if(!hasNext){ 72 | throw new NoSuchElementException(); 73 | } 74 | try { 75 | T response = request.execute(); 76 | if (response.containsKey("pageToken")) { 77 | request = request.set("pageToken", response.get("pageToken")); 78 | } else { 79 | hasNext = false; 80 | } 81 | return response; 82 | } catch (IOException e) { 83 | e.printStackTrace(); 84 | return null; 85 | } 86 | } 87 | public void remove() { 88 | this.next(); 89 | } 90 | } 91 | 92 | return new PageIterator(request_template); 93 | } 94 | // [END paging] 95 | 96 | // [START load_schema] 97 | public static TableSchema loadSchema(Reader schemaSource){ 98 | TableSchema sourceSchema = new TableSchema(); 99 | 100 | List fields = (new Gson()).>fromJson( 101 | schemaSource, 102 | (new ArrayList()).getClass()); 103 | 104 | sourceSchema.setFields(fields); 105 | 106 | return sourceSchema; 107 | } 108 | // [END load_schema] 109 | 110 | // [START list_datasets] 111 | /** 112 | * Display all BigQuery datasets associated with a project 113 | * 114 | * @param bigquery an authorized BigQuery client 115 | * @param projectId a string containing the current project ID 116 | * @throws IOException 117 | */ 118 | public static void listDatasets(Bigquery bigquery, String projectId) 119 | throws IOException { 120 | Datasets.List datasetRequest = bigquery.datasets().list(projectId); 121 | DatasetList datasetList = datasetRequest.execute(); 122 | if (datasetList.getDatasets() != null) { 123 | List datasets = datasetList.getDatasets(); 124 | System.out.println("Available datasets\n----------------"); 125 | System.out.println(datasets.toString()); 126 | for (DatasetList.Datasets dataset : datasets) { 127 | System.out.format("%s\n", dataset.getDatasetReference().getDatasetId()); 128 | } 129 | } 130 | } 131 | // [END list_datasets] 132 | 133 | } 134 | -------------------------------------------------------------------------------- /java/src/main/java/com/google/cloud/bigquery/samples/ExportDataCloudStorageSample.java: -------------------------------------------------------------------------------- 1 | package com.google.cloud.bigquery.samples; 2 | 3 | import com.google.api.services.bigquery.Bigquery; 4 | import com.google.api.services.bigquery.model.Job; 5 | import com.google.api.services.bigquery.model.JobConfiguration; 6 | import com.google.api.services.bigquery.model.JobConfigurationExtract; 7 | import com.google.api.services.bigquery.model.TableReference; 8 | 9 | import java.io.IOException; 10 | import java.util.Scanner; 11 | 12 | /** 13 | * TODO: Insert description here. (generated by elibixby) 14 | */ 15 | public class ExportDataCloudStorageSample extends BigqueryUtils { 16 | 17 | 18 | 19 | // [START main] 20 | public static void main(String[] args) throws IOException, InterruptedException{ 21 | Scanner scanner = new Scanner(System.in); 22 | System.out.println("Enter your project id: "); 23 | String projectId = scanner.nextLine(); 24 | System.out.println("Enter your dataset id: "); 25 | String datasetId = scanner.nextLine(); 26 | System.out.println("Enter your table id: "); 27 | String tableId = scanner.nextLine(); 28 | System.out.println("Enter the Google Cloud Storage Path to which you'd like to export: "); 29 | String cloudStoragePath = scanner.nextLine(); 30 | System.out.println("Enter how often to check if your job is complete (milliseconds): "); 31 | long interval = scanner.nextLong(); 32 | scanner.close(); 33 | 34 | run(cloudStoragePath, projectId, datasetId, tableId, interval); 35 | 36 | } 37 | // [END main] 38 | 39 | // [START run] 40 | public static void run( 41 | String cloudStoragePath, 42 | String projectId, 43 | String datasetId, 44 | String tableId, 45 | long interval) throws IOException, InterruptedException{ 46 | 47 | Bigquery bigquery = BigqueryServiceFactory.getService(); 48 | 49 | Job extractJob = extractJob( 50 | bigquery, 51 | cloudStoragePath, 52 | new TableReference() 53 | .setProjectId(projectId) 54 | .setDatasetId(datasetId) 55 | .setTableId(tableId)); 56 | 57 | Bigquery.Jobs.Get get_job = bigquery.jobs().get( 58 | extractJob.getJobReference().getProjectId(), 59 | extractJob.getJobReference().getJobId()); 60 | 61 | pollJob(get_job, interval); 62 | 63 | System.out.println("Export is Done!"); 64 | 65 | } 66 | // [END run] 67 | 68 | 69 | // [START extract_job] 70 | public static Job extractJob( 71 | Bigquery bigquery, 72 | String cloudStoragePath, 73 | TableReference table) throws IOException{ 74 | 75 | JobConfigurationExtract extract = new JobConfigurationExtract() 76 | .setSourceTable(table) 77 | .setDestinationUri(cloudStoragePath); 78 | 79 | return bigquery.jobs().insert(table.getProjectId(), 80 | new Job().setConfiguration(new JobConfiguration().setExtract(extract))) 81 | .execute(); 82 | } 83 | // [END extract_job] 84 | 85 | 86 | 87 | 88 | } 89 | -------------------------------------------------------------------------------- /java/src/main/java/com/google/cloud/bigquery/samples/LoadDataCSVSample.java: -------------------------------------------------------------------------------- 1 | package com.google.cloud.bigquery.samples; 2 | 3 | import com.google.api.services.bigquery.Bigquery; 4 | import com.google.api.services.bigquery.model.Job; 5 | import com.google.api.services.bigquery.model.JobConfiguration; 6 | import com.google.api.services.bigquery.model.JobConfigurationLoad; 7 | import com.google.api.services.bigquery.model.TableReference; 8 | import com.google.api.services.bigquery.model.TableSchema; 9 | 10 | import java.io.File; 11 | import java.io.FileReader; 12 | import java.io.IOException; 13 | import java.io.Reader; 14 | import java.util.Collections; 15 | import java.util.Scanner; 16 | 17 | /** 18 | * TODO: Insert description here. (generated by elibixby) 19 | */ 20 | public class LoadDataCSVSample extends BigqueryUtils { 21 | 22 | 23 | // [START main] 24 | public static void main(String[] args) throws IOException, InterruptedException{ 25 | Scanner scanner = new Scanner(System.in); 26 | System.out.println("Enter your project id: "); 27 | String projectId = scanner.nextLine(); 28 | System.out.println("Enter your dataset id: "); 29 | String datasetId = scanner.nextLine(); 30 | System.out.println("Enter your table id: "); 31 | String tableId = scanner.nextLine(); 32 | System.out.println("Enter the Google Cloud Storage Path to the data you'd like to load: "); 33 | String cloudStoragePath = scanner.nextLine(); 34 | System.out.println("Enter the filepath to your schema: "); 35 | String sourceSchemaPath = scanner.nextLine(); 36 | 37 | 38 | System.out.println("Enter how often to check if your job is complete (milliseconds): "); 39 | long interval = scanner.nextLong(); 40 | scanner.close(); 41 | 42 | run(cloudStoragePath, 43 | projectId, 44 | datasetId, 45 | tableId, 46 | new FileReader(new File(sourceSchemaPath)), 47 | interval); 48 | 49 | } 50 | // [END main] 51 | 52 | // [START run] 53 | public static void run( 54 | String cloudStoragePath, 55 | String projectId, 56 | String datasetId, 57 | String tableId, 58 | Reader schemaSource, 59 | long interval) throws IOException, InterruptedException{ 60 | 61 | Bigquery bigquery = BigqueryServiceFactory.getService(); 62 | 63 | 64 | Job loadJob = loadJob( 65 | bigquery, 66 | cloudStoragePath, 67 | new TableReference() 68 | .setProjectId(projectId) 69 | .setDatasetId(datasetId) 70 | .setTableId(tableId), 71 | loadSchema(schemaSource)); 72 | 73 | Bigquery.Jobs.Get get_job = bigquery.jobs().get( 74 | loadJob.getJobReference().getProjectId(), 75 | loadJob.getJobReference().getJobId()); 76 | 77 | pollJob(get_job, interval); 78 | 79 | System.out.println("Load is Done!"); 80 | 81 | } 82 | // [END run] 83 | 84 | // [START load_job] 85 | public static Job loadJob( 86 | Bigquery bigquery, 87 | String cloudStoragePath, 88 | TableReference table, 89 | TableSchema schema) throws IOException{ 90 | 91 | JobConfigurationLoad load = new JobConfigurationLoad() 92 | .setDestinationTable(table) 93 | .setSchema(schema) 94 | .setSourceUris(Collections.singletonList(cloudStoragePath)); 95 | 96 | return bigquery.jobs().insert(table.getProjectId(), 97 | new Job().setConfiguration(new JobConfiguration().setLoad(load))) 98 | .execute(); 99 | } 100 | // [END load_job] 101 | 102 | 103 | } 104 | -------------------------------------------------------------------------------- /java/src/main/java/com/google/cloud/bigquery/samples/StreamingSample.java: -------------------------------------------------------------------------------- 1 | package com.google.cloud.bigquery.samples; 2 | 3 | import com.google.api.services.bigquery.Bigquery; 4 | import com.google.api.services.bigquery.model.TableDataInsertAllRequest; 5 | import com.google.api.services.bigquery.model.TableDataInsertAllResponse; 6 | import com.google.gson.Gson; 7 | import com.google.gson.JsonSyntaxException; 8 | import com.google.gson.stream.JsonReader; 9 | 10 | import java.io.IOException; 11 | import java.io.InputStreamReader; 12 | import java.util.Collections; 13 | import java.util.HashMap; 14 | import java.util.Iterator; 15 | import java.util.Map; 16 | import java.util.Scanner; 17 | 18 | 19 | /** 20 | * TODO: Insert description here. (generated by elibixby) 21 | */ 22 | public class StreamingSample extends BigqueryUtils { 23 | 24 | 25 | 26 | // [START main] 27 | public static void main(String[] args) throws IOException{ 28 | final Scanner scanner = new Scanner(System.in); 29 | System.out.println("Enter your project id: "); 30 | String projectId = scanner.nextLine(); 31 | System.out.println("Enter your dataset id: "); 32 | String datasetId = scanner.nextLine(); 33 | System.out.println("Enter your table id: "); 34 | String tableId = scanner.nextLine(); 35 | scanner.close(); 36 | 37 | System.out.println("Enter JSON to stream to BigQuery: \n" 38 | + "Press End-of-stream (CTRL-D) to stop"); 39 | 40 | JsonReader fromCLI = new JsonReader(new InputStreamReader(System.in)); 41 | 42 | Iterator responses = run(projectId, 43 | datasetId, 44 | tableId, 45 | fromCLI); 46 | 47 | while(responses.hasNext()){ 48 | System.out.println(responses.next()); 49 | } 50 | 51 | fromCLI.close(); 52 | } 53 | // [END main] 54 | 55 | 56 | 57 | // [START run] 58 | public static Iterator run(final String projectId, 59 | final String datasetId, 60 | final String tableId, 61 | final JsonReader rows) throws IOException{ 62 | 63 | 64 | final Bigquery bigquery = BigqueryServiceFactory.getService(); 65 | final Gson gson = new Gson(); 66 | rows.beginArray(); 67 | 68 | return new Iterator(){ 69 | 70 | public boolean hasNext() { 71 | try { 72 | return rows.hasNext(); 73 | } catch (IOException e) { 74 | // TODO(elibixby): Auto-generated catch block 75 | e.printStackTrace(); 76 | } 77 | return false; 78 | } 79 | 80 | public TableDataInsertAllResponse next() { 81 | try { 82 | Map rowData = gson.>fromJson( 83 | rows, 84 | (new HashMap()).getClass()); 85 | return streamRow(bigquery, 86 | projectId, 87 | datasetId, 88 | tableId, 89 | new TableDataInsertAllRequest.Rows().setJson(rowData)); 90 | } catch (JsonSyntaxException e) { 91 | e.printStackTrace(); 92 | } catch (IOException e) { 93 | e.printStackTrace(); 94 | } 95 | return null; 96 | } 97 | 98 | public void remove() { 99 | this.next(); 100 | } 101 | 102 | }; 103 | 104 | } 105 | // [END run] 106 | 107 | // [START streamRow] 108 | public static TableDataInsertAllResponse streamRow(Bigquery bigquery, 109 | String projectId, 110 | String datasetId, 111 | String tableId, 112 | TableDataInsertAllRequest.Rows row) throws IOException{ 113 | 114 | return bigquery.tabledata().insertAll( 115 | projectId, 116 | datasetId, 117 | tableId, 118 | new TableDataInsertAllRequest().setRows(Collections.singletonList(row))).execute(); 119 | 120 | } 121 | // [END streamRow] 122 | } 123 | -------------------------------------------------------------------------------- /java/src/main/java/com/google/cloud/bigquery/samples/SyncQuerySample.java: -------------------------------------------------------------------------------- 1 | package com.google.cloud.bigquery.samples; 2 | 3 | import com.google.api.services.bigquery.Bigquery; 4 | import com.google.api.services.bigquery.Bigquery.Jobs.GetQueryResults; 5 | import com.google.api.services.bigquery.model.GetQueryResultsResponse; 6 | import com.google.api.services.bigquery.model.QueryRequest; 7 | import com.google.api.services.bigquery.model.QueryResponse; 8 | 9 | import java.io.IOException; 10 | import java.util.Iterator; 11 | import java.util.Scanner; 12 | /** 13 | * TODO: Insert description here. (generated by elibixby) 14 | */ 15 | public class SyncQuerySample extends BigqueryUtils{ 16 | 17 | 18 | //[START main] 19 | /** 20 | * @param args 21 | * @throws IOException 22 | */ 23 | public static void main(String[] args) 24 | throws IOException{ 25 | 26 | 27 | Scanner scanner = new Scanner(System.in); 28 | System.out.println("Enter your project id: "); 29 | String projectId = scanner.nextLine(); 30 | System.out.println("Enter your query string: "); 31 | String queryString = scanner.nextLine(); 32 | System.out.println("Enter how long to wait for the query to complete (in milliseconds):\n " + 33 | "(if longer than 10 seconds, use an asynchronous query)"); 34 | long waitTime = scanner.nextLong(); 35 | scanner.close(); 36 | Iterator pages = run(projectId, queryString, waitTime); 37 | while(pages.hasNext()){ 38 | printRows(pages.next().getRows(), System.out); 39 | } 40 | } 41 | // [END main] 42 | 43 | 44 | // [START run] 45 | public static Iterator run(String projectId, 46 | String queryString, 47 | long waitTime) throws IOException{ 48 | Bigquery bigquery = BigqueryServiceFactory.getService(); 49 | //Wait until query is done with 10 second timeout, at most 5 retries on error 50 | QueryResponse query = bigquery.jobs().query( 51 | projectId, 52 | new QueryRequest().setTimeoutMs(waitTime).setQuery(queryString)).execute(); 53 | 54 | //Make a request to get the results of the query 55 | //(timeout is zero since job should be complete) 56 | 57 | GetQueryResults getRequest = bigquery.jobs().getQueryResults( 58 | query.getJobReference().getProjectId(), 59 | query.getJobReference().getJobId()); 60 | 61 | 62 | return getPages(getRequest); 63 | } 64 | // [END run] 65 | 66 | 67 | } 68 | -------------------------------------------------------------------------------- /java/src/test/java/com/google/cloud/bigquery/samples/test/AsyncQuerySampleTest.java: -------------------------------------------------------------------------------- 1 | package com.google.cloud.bigquery.samples.test; 2 | 3 | import com.google.api.services.bigquery.model.GetQueryResultsResponse; 4 | import com.google.cloud.bigquery.samples.AsyncQuerySample; 5 | import com.google.gson.JsonIOException; 6 | import com.google.gson.JsonSyntaxException; 7 | 8 | import org.junit.*; 9 | 10 | import static org.junit.Assert.*; 11 | 12 | import java.io.FileNotFoundException; 13 | import java.io.IOException; 14 | import java.util.Iterator; 15 | 16 | public class AsyncQuerySampleTest extends BigquerySampleTest{ 17 | 18 | /** 19 | * @throws JsonSyntaxException 20 | * @throws JsonIOException 21 | * @throws FileNotFoundException 22 | */ 23 | public AsyncQuerySampleTest() throws JsonSyntaxException, JsonIOException, 24 | FileNotFoundException { 25 | super(); 26 | // TODO(elibixby): Auto-generated constructor stub 27 | } 28 | 29 | 30 | @Test 31 | public void testInteractive() throws IOException, InterruptedException{ 32 | Iterator pages = AsyncQuerySample.run(CONSTANTS.getProjectId(), CONSTANTS.getQuery(), false, 5000); 33 | while(pages.hasNext()){ 34 | assertTrue(!pages.next().getRows().isEmpty()); 35 | } 36 | } 37 | 38 | 39 | @Test 40 | @Ignore // Batches can take up to 3 hours to run, probably shouldn't use this 41 | public void testBatch() throws IOException, InterruptedException{ 42 | Iterator pages = AsyncQuerySample.run(CONSTANTS.getProjectId(), CONSTANTS.getQuery(), true, 5000); 43 | while(pages.hasNext()){ 44 | assertTrue(!pages.next().getRows().isEmpty()); 45 | } 46 | } 47 | 48 | 49 | } 50 | -------------------------------------------------------------------------------- /java/src/test/java/com/google/cloud/bigquery/samples/test/BigquerySampleTest.java: -------------------------------------------------------------------------------- 1 | package com.google.cloud.bigquery.samples.test; 2 | 3 | import com.google.cloud.bigquery.samples.BigqueryUtils; 4 | import com.google.gson.Gson; 5 | import com.google.gson.JsonIOException; 6 | import com.google.gson.JsonSyntaxException; 7 | 8 | import java.io.File; 9 | import java.io.FileNotFoundException; 10 | import java.io.FileReader; 11 | import java.nio.file.Path; 12 | import java.nio.file.Paths; 13 | 14 | 15 | /** 16 | * TODO: Insert description here. (generated by elibixby) 17 | */ 18 | public class BigquerySampleTest extends BigqueryUtils{ 19 | 20 | protected static class Constants{ 21 | private String projectId; 22 | private String datasetId; 23 | private String currentTableId; 24 | private String newTableId; 25 | private String cloudStorageInputURI; 26 | private String cloudStorageOutputURI; 27 | private String query; 28 | /** 29 | * @return the projectId 30 | */ 31 | public String getProjectId() { 32 | return projectId; 33 | } 34 | /** 35 | * @return the datasetId 36 | */ 37 | public String getDatasetId() { 38 | return datasetId; 39 | } 40 | /** 41 | * @return the currentTableId 42 | */ 43 | public String getCurrentTableId() { 44 | return currentTableId; 45 | } 46 | /** 47 | * @return the newTableId 48 | */ 49 | public String getNewTableId() { 50 | return newTableId; 51 | } 52 | /** 53 | * @return the query 54 | */ 55 | public String getQuery() { 56 | return query; 57 | } 58 | /** 59 | * @return the cloudStorageOutputURI 60 | */ 61 | public String getCloudStorageOutputURI() { 62 | return cloudStorageOutputURI; 63 | } 64 | /** 65 | * @return the cloudStorageInputURI 66 | */ 67 | public String getCloudStorageInputURI() { 68 | return cloudStorageInputURI; 69 | } 70 | } 71 | 72 | 73 | protected static final Path RESOURCE_PATH = Paths.get(System.getProperty("user.dir")) 74 | .getParent().resolve("resources"); 75 | 76 | protected static final File CONSTANTS_FILE = new File(RESOURCE_PATH.resolve("constants.json").toString()); 77 | 78 | protected static Constants CONSTANTS = null ; 79 | 80 | protected BigquerySampleTest() throws JsonSyntaxException, JsonIOException, FileNotFoundException{ 81 | if(CONSTANTS == null){ 82 | CONSTANTS = (new Gson()).fromJson( 83 | new FileReader(CONSTANTS_FILE), 84 | Constants.class); 85 | 86 | } 87 | } 88 | 89 | 90 | 91 | } 92 | -------------------------------------------------------------------------------- /java/src/test/java/com/google/cloud/bigquery/samples/test/ExportDataCloudStorageSampleTest.java: -------------------------------------------------------------------------------- 1 | package com.google.cloud.bigquery.samples.test; 2 | 3 | import com.google.cloud.bigquery.samples.ExportDataCloudStorageSample; 4 | import com.google.gson.JsonIOException; 5 | import com.google.gson.JsonSyntaxException; 6 | 7 | import org.junit.Test; 8 | 9 | import java.io.FileNotFoundException; 10 | import java.io.IOException; 11 | 12 | /** 13 | * TODO: Insert description here. (generated by elibixby) 14 | */ 15 | public class ExportDataCloudStorageSampleTest extends BigquerySampleTest { 16 | 17 | /** 18 | * @throws JsonSyntaxException 19 | * @throws JsonIOException 20 | * @throws FileNotFoundException 21 | */ 22 | public ExportDataCloudStorageSampleTest() throws JsonSyntaxException, JsonIOException, 23 | FileNotFoundException { 24 | super(); 25 | } 26 | 27 | @Test 28 | public void testExportData() throws IOException, InterruptedException{ 29 | ExportDataCloudStorageSample.run(CONSTANTS.getCloudStorageOutputURI(), 30 | CONSTANTS.getProjectId(), 31 | CONSTANTS.getDatasetId(), 32 | CONSTANTS.getCurrentTableId(), 33 | 5000L); 34 | } 35 | 36 | 37 | 38 | } 39 | -------------------------------------------------------------------------------- /java/src/test/java/com/google/cloud/bigquery/samples/test/LoadDataCSVSampleTest.java: -------------------------------------------------------------------------------- 1 | package com.google.cloud.bigquery.samples.test; 2 | 3 | import com.google.gson.JsonIOException; 4 | import com.google.gson.JsonSyntaxException; 5 | import com.google.cloud.bigquery.samples.LoadDataCSVSample; 6 | 7 | import org.junit.Test; 8 | 9 | import java.io.File; 10 | import java.io.FileNotFoundException; 11 | import java.io.FileReader; 12 | import java.io.IOException; 13 | 14 | /** 15 | * TODO: Insert description here. (generated by elibixby) 16 | */ 17 | public class LoadDataCSVSampleTest extends BigquerySampleTest { 18 | 19 | /** 20 | * @throws JsonSyntaxException 21 | * @throws JsonIOException 22 | * @throws FileNotFoundException 23 | */ 24 | public LoadDataCSVSampleTest() throws JsonSyntaxException, JsonIOException, FileNotFoundException { 25 | // TODO(elibixby): Auto-generated constructor stub 26 | super(); 27 | } 28 | 29 | @Test 30 | public void testLoadData() throws IOException, InterruptedException{ 31 | LoadDataCSVSample.run( 32 | CONSTANTS.getCloudStorageInputURI(), 33 | CONSTANTS.getProjectId(), 34 | CONSTANTS.getDatasetId(), 35 | CONSTANTS.getNewTableId(), 36 | new FileReader(new File(RESOURCE_PATH.resolve("schema.json").toString())), 37 | 5000L); 38 | } 39 | 40 | 41 | } 42 | -------------------------------------------------------------------------------- /java/src/test/java/com/google/cloud/bigquery/samples/test/StreamingSampleTest.java: -------------------------------------------------------------------------------- 1 | package com.google.cloud.bigquery.samples.test; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import com.google.api.services.bigquery.model.TableDataInsertAllResponse; 6 | import com.google.cloud.bigquery.samples.StreamingSample; 7 | import com.google.gson.JsonIOException; 8 | import com.google.gson.JsonSyntaxException; 9 | import com.google.gson.stream.JsonReader; 10 | 11 | import org.junit.Test; 12 | 13 | import java.io.File; 14 | import java.io.FileNotFoundException; 15 | import java.io.FileReader; 16 | import java.io.IOException; 17 | import java.util.Iterator; 18 | 19 | /** 20 | * TODO: Insert description here. (generated by elibixby) 21 | */ 22 | public class StreamingSampleTest extends BigquerySampleTest { 23 | 24 | /** 25 | * @throws JsonSyntaxException 26 | * @throws JsonIOException 27 | * @throws FileNotFoundException 28 | */ 29 | public StreamingSampleTest() throws JsonSyntaxException, JsonIOException, FileNotFoundException { 30 | super(); 31 | } 32 | 33 | @Test 34 | public void testStream() throws IOException{ 35 | JsonReader json = new JsonReader( 36 | new FileReader( 37 | new File(RESOURCE_PATH.resolve("streamrows.json").toString()))); 38 | Iterator response = StreamingSample.run( 39 | CONSTANTS.getProjectId(), 40 | CONSTANTS.getDatasetId(), 41 | CONSTANTS.getCurrentTableId(), 42 | json); 43 | 44 | while(response.hasNext()){ 45 | assertTrue(!response.next().isEmpty()); 46 | } 47 | 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /java/src/test/java/com/google/cloud/bigquery/samples/test/SyncQuerySampleTest.java: -------------------------------------------------------------------------------- 1 | package com.google.cloud.bigquery.samples.test; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import com.google.api.services.bigquery.model.GetQueryResultsResponse; 6 | import com.google.cloud.bigquery.samples.SyncQuerySample; 7 | import com.google.gson.JsonIOException; 8 | import com.google.gson.JsonSyntaxException; 9 | 10 | import org.junit.Test; 11 | 12 | import java.io.FileNotFoundException; 13 | import java.io.IOException; 14 | import java.util.Iterator; 15 | 16 | 17 | /** 18 | * TODO: Insert description here. (generated by elibixby) 19 | */ 20 | public class SyncQuerySampleTest extends BigquerySampleTest { 21 | 22 | /** 23 | * @throws JsonSyntaxException 24 | * @throws JsonIOException 25 | * @throws FileNotFoundException 26 | */ 27 | public SyncQuerySampleTest() throws JsonSyntaxException, JsonIOException, 28 | FileNotFoundException { 29 | super(); 30 | } 31 | 32 | @Test 33 | public void testSyncQuery() throws IOException{ 34 | Iterator pages = SyncQuerySample.run( 35 | CONSTANTS.getProjectId(), 36 | CONSTANTS.getQuery(), 37 | 10000); 38 | while(pages.hasNext()){ 39 | assertTrue(!pages.next().getRows().isEmpty()); 40 | } 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /python/dump/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googlearchive/bigquery-samples-python/d7a840407d07947eb19e8c88646eed5233948dc4/python/dump/__init__.py -------------------------------------------------------------------------------- /python/dump/load-data-by-POST.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | 4 | from apiclient.discovery import build 5 | from oauth2client.file import Storage 6 | from oauth2client.client import OAuth2WebServerFlow 7 | from oauth2client.tools import run 8 | import httplib2 9 | 10 | # for python3 compat 11 | raw_input = vars(__builtins__).get('raw_input', input) 12 | 13 | FLOW = OAuth2WebServerFlow( 14 | client_id='xxxxxxx.apps.googleusercontent.com', 15 | client_secret='shhhhhhhhhhhh', 16 | scope='https://www.googleapis.com/auth/bigquery', 17 | user_agent='my-program-name/1.0') 18 | 19 | 20 | def loadTable(http, service): 21 | projectId = raw_input('Choose your project ID: ') 22 | datasetId = raw_input('Choose a dataset ID: ') 23 | tableId = raw_input('Choose a table name to load the data to: ') 24 | 25 | url = ('https://www.googleapis.com/upload/bigquery/v2/projects/' + 26 | projectId + '/jobs') 27 | newSchemaFile = raw_input('What is your schema? ') 28 | schema = open(newSchemaFile, 'r') 29 | 30 | # Create the body of the request, separated by a boundary of xxx 31 | newresource = ('--xxx\n' + 32 | 'Content-Type: application/json; charset=UTF-8\n' + '\n' + 33 | '{\n' + 34 | ' "configuration": {\n' + 35 | ' "load": {\n' + 36 | ' "schema": {\n' 37 | ' "fields": ' + schema.read() + '\n' + 38 | ' },\n' + 39 | ' "destinationTable": {\n' + 40 | ' "projectId": "' + projectId + '",\n' + 41 | ' "datasetId": "' + datasetId + '",\n' + 42 | ' "tableId": "' + tableId + '"\n' + 43 | ' }\n' + 44 | ' }\n' + 45 | ' }\n' + 46 | '}\n' + 47 | '--xxx\n' + 48 | 'Content-Type: application/octet-stream\n' + 49 | '\n') 50 | newDataFile = raw_input('What is your data? ') 51 | 52 | # Append data from the specified file to the request body 53 | f = open(newDataFile, 'r') 54 | newresource += f.read() 55 | 56 | # Signify the end of the body 57 | newresource += ('--xxx--\n') 58 | 59 | headers = {'Content-Type': 'multipart/related; boundary=xxx'} 60 | resp, content = http.request(url, method='POST', 61 | body=newresource, headers=headers) 62 | 63 | if resp.status == 200: 64 | jsonResponse = json.loads(content) 65 | jobReference = jsonResponse['jobReference']['jobId'] 66 | import time 67 | while True: 68 | jobCollection = service.jobs() 69 | getJob = jobCollection.get(projectId=projectId, 70 | jobId=jobReference).execute() 71 | currentStatus = getJob['status']['state'] 72 | 73 | if 'DONE' == currentStatus: 74 | print('Done Loading!') 75 | return 76 | else: 77 | print('Waiting to load...') 78 | print('Current status: ' + currentStatus) 79 | print(time.ctime()) 80 | time.sleep(10) 81 | 82 | 83 | def main(argv): 84 | # If the credentials don't exist or are invalid, run the native client 85 | # auth flow. The Storage object will ensure that if successful the good 86 | # credentials will get written back to a file. 87 | # 88 | # Choose a file name to store the credentials. 89 | storage = Storage('bigquery2.dat') 90 | credentials = storage.get() 91 | if credentials is None or credentials.invalid: 92 | credentials = run(FLOW, storage) 93 | 94 | # Create an httplib2.Http object to handle our HTTP requests 95 | # and authorize it with our good credentials. 96 | http = httplib2.Http() 97 | http = credentials.authorize(http) 98 | 99 | service = build('bigquery', 'v2', http=http) 100 | 101 | loadTable(http, service) 102 | 103 | if __name__ == '__main__': 104 | main(sys.argv) 105 | -------------------------------------------------------------------------------- /python/requirements.txt: -------------------------------------------------------------------------------- 1 | argparse==1.2.1 2 | google-api-python-client==1.3.2 3 | httplib2==0.9 4 | oauth2client==1.4.6 5 | py==1.4.26 6 | pyasn1==0.1.7 7 | pyasn1-modules==0.0.5 8 | rsa==3.1.4 9 | simplejson==3.6.5 10 | six==1.9.0 11 | tox==1.9.0 12 | uritemplate==0.6 13 | virtualenv==12.0.7 14 | wsgiref==0.1.2 15 | -------------------------------------------------------------------------------- /python/samples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googlearchive/bigquery-samples-python/d7a840407d07947eb19e8c88646eed5233948dc4/python/samples/__init__.py -------------------------------------------------------------------------------- /python/samples/async_query.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function # For python 2/3 interoperability 2 | from samples.utils import get_service, paging, poll_job 3 | import uuid 4 | import json 5 | 6 | 7 | # [START async_query] 8 | def async_query(service, project_id, query, batch=False, num_retries=5): 9 | # Generate a unique job_id so retries 10 | # don't accidentally duplicate query 11 | job_data = { 12 | 'jobReference': { 13 | 'projectId': project_id, 14 | 'job_id': str(uuid.uuid4()) 15 | }, 16 | 'configuration': { 17 | 'query': { 18 | 'query': query, 19 | 'priority': 'BATCH' if batch else 'INTERACTIVE', 20 | }, 21 | } 22 | } 23 | return service.jobs().insert( 24 | projectId=project_id, 25 | body=job_data).execute(num_retries=num_retries) 26 | # [END async_query] 27 | 28 | 29 | # [START run] 30 | def run(project_id, query_string, batch, num_retries, interval): 31 | service = get_service() 32 | 33 | query_job = async_query(service, 34 | project_id, 35 | query_string, 36 | batch, 37 | num_retries) 38 | 39 | poll_job(service, 40 | query_job['jobReference']['projectId'], 41 | query_job['jobReference']['jobId'], 42 | interval, 43 | num_retries) 44 | 45 | 46 | for page in paging(service, 47 | service.jobs().getQueryResults, 48 | num_retries=num_retries, 49 | **query_job['jobReference']): 50 | 51 | yield json.dumps(page['rows']) 52 | # [END run] 53 | 54 | 55 | # [START main] 56 | def main(): 57 | project_id = raw_input("Enter the project ID: ") 58 | query_string = raw_input("Enter the Bigquery SQL Query: ") 59 | batch = raw_input("Run query as batch (y/n)?: ") in ('True', 60 | 'true', 61 | 'y', 62 | 'Y', 63 | 'yes', 64 | 'Yes') 65 | 66 | 67 | num_retries = raw_input( 68 | "Enter number of times to retry in case of 500 error: ") 69 | interval = raw_input( 70 | "Enter how often to poll the query for completion (seconds): ") 71 | 72 | for result in run(project_id, query_string, batch, num_retries, interval): 73 | print(result) 74 | # [END main] 75 | -------------------------------------------------------------------------------- /python/samples/discovery_doc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import httplib2 4 | import time 5 | 6 | # [START build_and_update] 7 | 8 | RESOURCE_PATH='..' #look for discovery docs in the parent folder 9 | MAX_AGE = 86400 #update discovery docs older than a day 10 | 11 | # A module that takes care of caching and updating discovery docs 12 | # for google-api-python-clients (until such a feature is integrated) 13 | 14 | 15 | def build_and_update(api, version): 16 | from oauth2client.client import GoogleCredentials 17 | from googleapiclient.discovery import build_from_document 18 | 19 | 20 | path = os.path.join(RESOURCE_PATH, '{}.{}'.format(api, version)) 21 | try: 22 | age = time.time() - os.path.getmtime(path) 23 | if age > MAX_AGE: 24 | _update_discovery_doc(api, version, path) 25 | except os.error: 26 | _update_discovery_doc(api, version, path) 27 | 28 | with open(path, 'r') as discovery_doc: 29 | return build_from_document(discovery_doc.read(), 30 | http=httplib2.Http(), 31 | credentials=GoogleCredentials 32 | .get_application_default()) 33 | 34 | def _update_discovery_doc(api, version, path): 35 | from apiclient.discovery import DISCOVERY_URI 36 | from apiclient.errors import HttpError 37 | from apiclient.errors import InvalidJsonError 38 | import uritemplate 39 | 40 | requested_url = uritemplate.expand(DISCOVERY_URI, 41 | {'api': api, 'apiVersion': version}) 42 | resp, content = httplib2.Http().request(requested_url) 43 | if resp.status >= 400: 44 | raise HttpError(resp, content, uri=requested_url) 45 | try: 46 | with open(path, 'w') as discovery_doc: 47 | discovery_json = json.loads(content) 48 | json.dump(discovery_json, discovery_doc) 49 | except ValueError: 50 | raise InvalidJsonError( 51 | 'Bad JSON: %s from %s.' % (content, requested_url)) 52 | # [END build_and_update] 53 | -------------------------------------------------------------------------------- /python/samples/export_data_to_cloud_storage.py: -------------------------------------------------------------------------------- 1 | from samples.utils import get_service, poll_job 2 | import uuid 3 | 4 | 5 | # [START export_table] 6 | def export_table(service, cloud_storage_path, 7 | projectId, datasetId, tableId, 8 | num_retries=5): 9 | # Generate a unique job_id so retries 10 | # don't accidentally duplicate export 11 | job_data = { 12 | 'jobReference': { 13 | 'projectId': projectId, 14 | 'jobId': str(uuid.uuid4()) 15 | }, 16 | 'configuration': { 17 | 'extract': { 18 | 'sourceTable': { 19 | 'projectId': projectId, 20 | 'datasetId': datasetId, 21 | 'tableId': tableId, 22 | }, 23 | 'destinationUris': [cloud_storage_path], 24 | } 25 | } 26 | } 27 | return service.jobs().insert( 28 | projectId=projectId, 29 | body=job_data).execute(num_retries=num_retries) 30 | # [END export_table] 31 | 32 | 33 | # [START run] 34 | def run(cloud_storage_path, 35 | projectId, datasetId, tableId, 36 | num_retries, interval): 37 | 38 | bigquery = get_service() 39 | resource = export_table(bigquery, cloud_storage_path, 40 | projectId, datasetId, tableId, num_retries) 41 | poll_job(bigquery, 42 | resource['jobReference']['projectId'], 43 | resource['jobReference']['jobId'], 44 | interval, 45 | num_retries) 46 | # [END run] 47 | 48 | 49 | # [START main] 50 | def main(): 51 | projectId = raw_input("Enter the project ID: ") 52 | datasetId = raw_input("Enter a dataset ID: ") 53 | tableId = raw_input("Enter a table name to copy: ") 54 | cloud_storage_path = raw_input( 55 | "Enter a Google Cloud Storage URI: ") 56 | interval = raw_input( 57 | "Enter how often to poll the job (in seconds): ") 58 | num_retries = raw_input( 59 | "Enter the number of retries in case of 500 error: ") 60 | 61 | run(cloud_storage_path, 62 | projectId, datasetId, tableId, 63 | num_retries, interval) 64 | 65 | print 'Done exporting!' 66 | # [END main] 67 | -------------------------------------------------------------------------------- /python/samples/load_data_by_post.py: -------------------------------------------------------------------------------- 1 | import json 2 | import httplib2 3 | from samples.utils import get_service, poll_job 4 | from oauth2client.client import GoogleCredentials 5 | 6 | 7 | # [START make_post] 8 | def make_post(http, schema, data, projectId, datasetId, tableId): 9 | url = ('https://www.googleapis.com/upload/bigquery/v2/projects/' + 10 | projectId + '/jobs') 11 | # Create the body of the request, separated by a boundary of xxx 12 | resource = ('--xxx\n' + 13 | 'Content-Type: application/json; charset=UTF-8\n' + '\n' + 14 | '{\n' + 15 | ' "configuration": {\n' + 16 | ' "load": {\n' + 17 | ' "schema": {\n' 18 | ' "fields": ' + str(schema) + '\n' + 19 | ' },\n' + 20 | ' "destinationTable": {\n' + 21 | ' "projectId": "' + projectId + '",\n' + 22 | ' "datasetId": "' + datasetId + '",\n' + 23 | ' "tableId": "' + tableId + '"\n' + 24 | ' }\n' + 25 | ' }\n' + 26 | ' }\n' + 27 | '}\n' + 28 | '--xxx\n' + 29 | 'Content-Type: application/octet-stream\n' + 30 | '\n') 31 | # Append data to the request body 32 | resource += data 33 | 34 | # Signify the end of the body 35 | resource += ('--xxx--\n') 36 | 37 | headers = {'Content-Type': 'multipart/related; boundary=xxx'} 38 | 39 | return http.request(url, 40 | method='POST', 41 | body=resource, 42 | headers=headers) 43 | # [END make_post] 44 | 45 | 46 | # [START main] 47 | def main(): 48 | credentials = GoogleCredentials.get_application_default() 49 | http = credentials.authorize(httplib2.Http()) 50 | projectId = raw_input('Enter the project ID: ') 51 | datasetId = raw_input('Enter a dataset ID: ') 52 | tableId = raw_input('Enter a table name to load the data to: ') 53 | schema_path = raw_input( 54 | 'Enter the path to the schema file for the table: ') 55 | 56 | with open(schema_path, 'r') as schema_file: 57 | schema = schema_file.read() 58 | 59 | data_path = raw_input('Enter the path to the data file: ') 60 | 61 | with open(data_path, 'r') as data_file: 62 | data = data_file.read() 63 | 64 | resp, content = make_post(http, 65 | schema, 66 | data, 67 | projectId, 68 | datasetId, 69 | tableId) 70 | 71 | if resp.status == 200: 72 | job_resource = json.loads(content) 73 | service = get_service(credentials) 74 | poll_job(service, **job_resource['jobReference']) 75 | print("Success!") 76 | else: 77 | print("Http error code: {}".format(resp.status)) 78 | # [END main] 79 | 80 | if __name__ == '__main__': 81 | main() 82 | -------------------------------------------------------------------------------- /python/samples/load_data_from_csv.py: -------------------------------------------------------------------------------- 1 | from samples.utils import get_service, poll_job 2 | import json 3 | import uuid 4 | 5 | 6 | # [START load_table] 7 | def load_table(service, source_schema, source_csv, 8 | projectId, datasetId, tableId, num_retries=5): 9 | # Generate a unique job_id so retries 10 | # don't accidentally duplicate query 11 | job_data = { 12 | 'jobReference': { 13 | 'projectId': projectId, 14 | 'job_id': str(uuid.uuid4()) 15 | }, 16 | 'configuration': { 17 | 'load': { 18 | 'sourceUris': [source_csv], 19 | 'schema': { 20 | 'fields': source_schema 21 | }, 22 | 'destinationTable': { 23 | 'projectId': projectId, 24 | 'datasetId': datasetId, 25 | 'tableId': tableId 26 | }, 27 | } 28 | } 29 | } 30 | 31 | return service.jobs().insert( 32 | projectId=projectId, 33 | body=job_data).execute(num_retries=num_retries) 34 | # [END load_table] 35 | 36 | 37 | # [START run] 38 | def run(source_schema, source_csv, 39 | projectId, datasetId, tableId, interval, num_retries): 40 | service = get_service() 41 | 42 | job = load_table(service, source_schema, source_csv, 43 | projectId, datasetId, tableId, num_retries) 44 | 45 | poll_job(service, 46 | job['jobReference']['projectId'], 47 | job['jobReference']['jobId'], 48 | interval, 49 | num_retries) 50 | # [END run] 51 | 52 | 53 | # [START main] 54 | def main(): 55 | projectId = raw_input("Enter the project ID: ") 56 | datasetId = raw_input("Enter a dataset ID: ") 57 | tableId = raw_input("Enter a destination table name: ") 58 | 59 | schema_file_path = raw_input( 60 | "Enter the path to the table schema: ") 61 | with open(schema_file_path, 'r') as schema_file: 62 | schema = json.load(schema_file) 63 | 64 | data_file_path = raw_input( 65 | "Enter the Cloud Storage path for the CSV file: ") 66 | num_retries = raw_input( 67 | "Enter number of times to retry in case of 500 error: ") 68 | interval = raw_input( 69 | "Enter how often to poll the query for completion (seconds): ") 70 | run(schema, 71 | data_file_path, 72 | projectId, 73 | datasetId, 74 | tableId, 75 | interval, 76 | num_retries) 77 | 78 | print("Job complete!") 79 | # [END main] 80 | -------------------------------------------------------------------------------- /python/samples/streaming.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from samples.utils import get_service 4 | import ast 5 | import uuid 6 | import json 7 | 8 | 9 | # [START stream_row_to_bigquery] 10 | def stream_row_to_bigquery(service, 11 | project_id, 12 | dataset_id, 13 | table_id, 14 | row, 15 | num_retries=5): 16 | # Generate a unique row id so retries 17 | # don't accidentally duplicate insert 18 | insert_all_data = { 19 | 'insertId': str(uuid.uuid4()), 20 | 'rows': [{'json': row}] 21 | } 22 | return service.tabledata().insertAll( 23 | projectId=project_id, 24 | datasetId=dataset_id, 25 | tableId=table_id, 26 | body=insert_all_data).execute(num_retries=num_retries) 27 | # [END stream_row_to_bigquery] 28 | 29 | 30 | # [START run] 31 | def run(project_id, dataset_id, table_id, rows, num_retries): 32 | service = get_service() 33 | for row in rows: 34 | response = stream_row_to_bigquery(service, 35 | project_id, 36 | dataset_id, 37 | table_id, 38 | row, 39 | num_retries) 40 | yield json.dumps(response) 41 | # [END run] 42 | 43 | 44 | # [START main] 45 | def get_rows(): 46 | line = raw_input("Enter a row (python dict) into the table: ") 47 | while line: 48 | yield ast.literal_eval(line) 49 | line = raw_input( 50 | "Enter another row into the table \n" + 51 | "[hit enter to stop]: ") 52 | 53 | 54 | def main(): 55 | project_id = raw_input("Enter the project ID: ") 56 | dataset_id = raw_input("Enter a dataset ID: ") 57 | table_id = raw_input("Enter a table ID : ") 58 | num_retries = int(raw_input( 59 | "Enter number of times to retry in case of 500 error: ")) 60 | 61 | for result in run(project_id, dataset_id, table_id, 62 | get_rows(), num_retries): 63 | print(result) 64 | # [END main] 65 | -------------------------------------------------------------------------------- /python/samples/sync_query.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function # For python 2/3 interoperability 2 | from samples.utils import get_service, paging 3 | import json 4 | 5 | 6 | # [START sync_query] 7 | def sync_query(service, project_id, query, timeout=10000, num_retries=5): 8 | query_data = { 9 | 'query': query, 10 | 'timeoutMs': timeout, 11 | } 12 | return service.jobs().query( 13 | projectId=project_id, 14 | body=query_data).execute(num_retries=num_retries) 15 | # [END sync_query] 16 | 17 | 18 | # [START run] 19 | def run(project_id, query, timeout, num_retries): 20 | service = get_service() 21 | response = sync_query(service, 22 | project_id, 23 | query, 24 | timeout, 25 | num_retries) 26 | 27 | for page in paging(service, 28 | service.jobs().getQueryResults, 29 | num_retries=num_retries, 30 | **response['jobReference']): 31 | yield json.dumps(page['rows']) 32 | # [END run] 33 | 34 | 35 | # [START main] 36 | def main(): 37 | project_id = raw_input("Enter the project ID: ") 38 | query_string = raw_input("Enter the Bigquery SQL Query: ") 39 | timeout = raw_input( 40 | "Enter how long to wait for the query to complete in milliseconds" 41 | "\n (if longer than 10 seconds, use an asynchronous query): ") 42 | num_retries = int(raw_input( 43 | "Enter how many times to retry in case of server error")) 44 | 45 | for result in run(project_id, query_string, timeout, num_retries): 46 | print(result) 47 | 48 | 49 | # [END main] 50 | -------------------------------------------------------------------------------- /python/samples/utils.py: -------------------------------------------------------------------------------- 1 | 2 | # [START get_service] 3 | def get_service(): 4 | from discovery_doc import build_and_update 5 | return build_and_update('bigquery','v2') 6 | # [END get_service] 7 | 8 | # [START poll_job] 9 | def poll_job(service, projectId, jobId, interval=5, num_retries=5): 10 | import time 11 | 12 | job_get = service.jobs().get( 13 | projectId=projectId, 14 | jobId=jobId) 15 | job_resource = job_get.execute(num_retries=num_retries) 16 | 17 | while not job_resource['status']['state'] == 'DONE': 18 | print('Job is {}, waiting {} seconds...' 19 | .format(job_resource['status']['state'], interval)) 20 | time.sleep(interval) 21 | job_resource = job_get.execute(num_retries=num_retries) 22 | 23 | return job_resource 24 | # [END poll_job] 25 | 26 | 27 | # [START paging] 28 | def paging(service, request_func, num_retries=5, **kwargs): 29 | has_next = True 30 | while has_next: 31 | response = request_func(**kwargs).execute(num_retries=num_retries) 32 | if 'pageToken' in response: 33 | kwargs['pageToken'] = response['pageToken'] 34 | else: 35 | has_next = False 36 | yield response 37 | # [END paging] 38 | -------------------------------------------------------------------------------- /python/test/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | RESOURCE_PATH = os.path.join(os.path.dirname(__file__), 4 | '..', 5 | '..', 6 | 'resources') 7 | -------------------------------------------------------------------------------- /python/test/base_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from test import RESOURCE_PATH 3 | import json 4 | import os 5 | 6 | 7 | class BaseBigqueryTest(unittest.TestCase): 8 | 9 | def setUp(self): 10 | with open( 11 | os.path.join(RESOURCE_PATH, 'constants.json'), 12 | 'r') as constants_file: 13 | 14 | self.constants = json.load(constants_file) 15 | -------------------------------------------------------------------------------- /python/test/test_async_query.py: -------------------------------------------------------------------------------- 1 | from test.base_test import BaseBigqueryTest 2 | from samples.async_query import run 3 | import json 4 | import unittest 5 | 6 | 7 | class TestAsyncQuery(BaseBigqueryTest): 8 | 9 | def test_async_query(self): 10 | for result in run(self.constants['projectId'], 11 | self.constants['query'], 12 | False, 13 | 5, 14 | 5): 15 | self.assertIsNotNone(json.loads(result)) 16 | 17 | 18 | if __name__ == '__main__': 19 | unittest.main() 20 | -------------------------------------------------------------------------------- /python/test/test_export_data_to_cloud_storage.py: -------------------------------------------------------------------------------- 1 | """Tests for export_table_to_gcs.""" 2 | from test.base_test import BaseBigqueryTest 3 | from samples.export_data_to_cloud_storage import run 4 | import unittest 5 | 6 | 7 | class TestExportTableToGCS(BaseBigqueryTest): 8 | 9 | def test_export_table(self): 10 | run(self.constants['cloudStorageInputURI'], 11 | self.constants['projectId'], 12 | self.constants['datasetId'], 13 | self.constants['newTableId'], 14 | 5, 15 | 5) 16 | 17 | if __name__ == '__main__': 18 | unittest.main() 19 | -------------------------------------------------------------------------------- /python/test/test_load_data_from_csv.py: -------------------------------------------------------------------------------- 1 | """Tests for load_data_from_csv.""" 2 | 3 | from test.base_test import BaseBigqueryTest 4 | from test import RESOURCE_PATH 5 | from samples.load_data_from_csv import run 6 | import os 7 | import json 8 | import unittest 9 | 10 | 11 | class TestLoadDataFromCSV(BaseBigqueryTest): 12 | 13 | def setUp(self): 14 | super(TestLoadDataFromCSV, self).setUp() 15 | with open( 16 | os.path.join(RESOURCE_PATH, 'schema.json'), 17 | 'r') as schema_file: 18 | self.schema = json.load(schema_file) 19 | 20 | def test_load_table(self): 21 | run(self.schema, 22 | self.constants['cloudStorageInputURI'], 23 | self.constants['projectId'], 24 | self.constants['datasetId'], 25 | self.constants['newTableId'], 26 | 5, 27 | 5) 28 | 29 | 30 | if __name__ == '__main__': 31 | unittest.main() 32 | -------------------------------------------------------------------------------- /python/test/test_streaming.py: -------------------------------------------------------------------------------- 1 | """Tests for export_table_to_gcs.""" 2 | from samples.streaming import run 3 | from test.base_test import BaseBigqueryTest 4 | from test import RESOURCE_PATH 5 | import json 6 | import os 7 | import unittest 8 | 9 | 10 | class TestStreaming(BaseBigqueryTest): 11 | 12 | def test_stream_row_to_bigquery(self): 13 | 14 | with open( 15 | os.path.join(RESOURCE_PATH, 'streamrows.json'), 16 | 'r') as rows_file: 17 | 18 | rows = json.load(rows_file) 19 | 20 | for result in run(self.constants['projectId'], 21 | self.constants['datasetId'], 22 | self.constants['newTableId'], 23 | rows, 24 | 5): 25 | self.assertIsNotNone(json.loads(result)) 26 | 27 | 28 | if __name__ == '__main__': 29 | unittest.main() 30 | -------------------------------------------------------------------------------- /python/test/test_sync_query.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from samples.sync_query import run 4 | from test.base_test import BaseBigqueryTest 5 | import json 6 | 7 | 8 | class TestSyncQuery(BaseBigqueryTest): 9 | 10 | def test_sync_query(self): 11 | for result in run(self.constants['projectId'], 12 | self.constants['query'], 13 | 5000, 14 | 5): 15 | 16 | self.assertIsNotNone(json.loads(result)) 17 | 18 | 19 | if __name__ == '__main__': 20 | unittest.main() 21 | -------------------------------------------------------------------------------- /python/tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | skipsdist = True 3 | envlist = flake8-py2 4 | 5 | [testenv] 6 | deps = 7 | flake8 8 | google-api-python-client 9 | oauth2client 10 | setenv = 11 | GOOGLE_APPLICATION_CREDENTIALS=../bigquery_samples.key.json 12 | commands = 13 | python -m unittest discover 14 | 15 | [testenv:flake8-py2] 16 | -------------------------------------------------------------------------------- /resources/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "projectId": "bigquery-devrel-samples", 3 | "datasetId": "test_dataset", 4 | "currentTableId": "test_table", 5 | "newTableId": "test_table2", 6 | "cloudStorageInputURI": "gs://bigquery-devrel-samples-bucket/data.csv", 7 | "cloudStorageOutputURI": "gs://bigquery-devrel-samples-bucket/output.csv", 8 | "query": "SELECT corpus FROM publicdata:samples.shakespeare GROUP BY corpus;" 9 | } 10 | -------------------------------------------------------------------------------- /resources/data.csv: -------------------------------------------------------------------------------- 1 | Gandalf, 2000, 140.0, 1 2 | -------------------------------------------------------------------------------- /resources/schema.json: -------------------------------------------------------------------------------- 1 | [{"type": "STRING", "name": "Name"}, {"type": "INTEGER", "name": "Age"}, {"type": "FLOAT", "name": "Weight"}, {"type": "BOOLEAN", "name": "IsMagic"}] -------------------------------------------------------------------------------- /resources/streamrows.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"Name": "test", "Age": 0, "Weight": 100.0, "IsMagic": false}, 3 | {"Name": "test", "Age": 1, "Weight": 100.0, "IsMagic": false}, 4 | {"Name": "test", "Age": 2, "Weight": 100.0, "IsMagic": false}, 5 | {"Name": "test", "Age": 3, "Weight": 100.0, "IsMagic": false}, 6 | {"Name": "test", "Age": 0, "Weight": 100.0, "IsMagic": false} 7 | ] 8 | --------------------------------------------------------------------------------