├── .credentials.json.enc
├── .github
    ├── issue_template.md
    ├── pull_request_template.md
    ├── stale.yml
    └── workflows
    │   └── release.yml
├── .gitignore
├── .travis.yml
├── LEAD.md
├── LICENSE.md
├── MANIFEST.in
├── Makefile
├── README.md
├── data
    ├── articles.csv
    └── articles.json
├── examples
    ├── __init__.py
    └── storage.py
├── pylama.ini
├── pytest.ini
├── setup.cfg
├── setup.py
├── tableschema_bigquery
    ├── VERSION
    ├── __init__.py
    ├── mapper.py
    └── storage.py
├── tests
    ├── __init__.py
    ├── test_mapper.py
    └── test_storage.py
└── tox.ini


/.credentials.json.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frictionlessdata/tableschema-bigquery-py/7ed9d002620619a819f73d97e03257dcc715c7a4/.credentials.json.enc


--------------------------------------------------------------------------------
/.github/issue_template.md:
--------------------------------------------------------------------------------
1 | # Overview
2 | 
3 | Please replace this line with full information about your idea or problem. If it's a bug share as much as possible to reproduce it
4 | 
5 | ---
6 | 
7 | Please preserve this line to notify @roll (lead of this repository)
8 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | # Overview
2 | 
3 | Please replace this line with full information about your pull request. Make sure that tests pass before publishing it
4 | 
5 | ---
6 | 
7 | Please preserve this line to notify @roll (lead of this repository)
8 | 


--------------------------------------------------------------------------------
/.github/stale.yml:
--------------------------------------------------------------------------------
 1 | # Number of days of inactivity before an issue becomes stale
 2 | daysUntilStale: 90
 3 | 
 4 | # Number of days of inactivity before a stale issue is closed
 5 | daysUntilClose: 30
 6 | 
 7 | # Issues with these labels will never be considered stale
 8 | exemptLabels:
 9 |   - feature
10 |   - enhancement
11 |   - bug
12 | 
13 | # Label to use when marking an issue as stale
14 | staleLabel: wontfix
15 | 
16 | # Comment to post when marking an issue as stale. Set to `false` to disable
17 | markComment: >
18 |   This issue has been automatically marked as stale because it has not had
19 |   recent activity. It will be closed if no further activity occurs. Thank you
20 |   for your contributions.
21 | 
22 | # Comment to post when closing a stale issue. Set to `false` to disable
23 | closeComment: false
24 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*.*.*'
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout
13 |         uses: actions/checkout@v1
14 |       - name: Release
15 |         uses: softprops/action-gh-release@v1
16 |         env:
17 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 
59 | # Project
60 | .credentials.json
61 | tmp/
62 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist:
 2 |   xenial
 3 | 
 4 | sudo:
 5 |   false
 6 | 
 7 | language:
 8 |   python
 9 | 
10 | python:
11 |   - 2.7
12 |   - 3.6
13 |   - 3.7
14 |   - 3.8
15 | 
16 | env:
17 |   global:
18 |     - TOXENV="py${PYTHON_VERSION//./}"
19 | 
20 | before_install:
21 |   - openssl aes-256-cbc -K $encrypted_3343fdf878cf_key -iv $encrypted_3343fdf878cf_iv -in .credentials.json.enc -out .credentials.json -d
22 | 
23 | install:
24 |   - make install
25 |   - pip install coveralls
26 | 
27 | script:
28 |   - make test
29 | 
30 | after_success:
31 |   - coveralls
32 | 
33 | jobs:
34 |   include:
35 |     - stage: release
36 |       if: tag IS present
37 |       python: 3.8
38 |       deploy:
39 |         provider: pypi
40 |         user: roll
41 |         distributions: sdist bdist_wheel
42 |         skip_cleanup: true
43 |         on:
44 |           tags: true
45 |         password:
46 |           secure: hmjc1R3LVbPZ0HK73QlW3HfGAAxuAXTRSZtn/hR1aIda5CACyfa67s4P7dLE8sV4tKO52L/hqxgpm+Tp1ssp3GnlFedcoROvqkm22JCHjDlSyu9VpYt/lTwCY1OKBennGjY9TfvfAJWup+e+kIDmLOBFTtCOvvEhHD7agtob14SbV65ELmzpPsP/GlG7n6hNN6B97HsjXhjdlDC6wR2yfAodkjIHMR2l5g5BnySVZ0QNIzIlpMILUXHb0mm2PfZU2mr2fsTDkpj/k95yCANoC0gOFzYnewohriEX6NA4xiD4R/sxsCVLcfOOrZPoUGtFt42K+wyMiJG3/N6quR4rPoLkaKVt5yHcrhxGboYDXs6hPEnf3CHJ4ENxi6xDZI4RvYCrEbTb9OqGqv8ci2C44H27c/qBTjulm4sb9mUIJnurR+D+U7TO6GNj52xrIS8wvjl2EQ0srrZll2BKhXzsLtn7qa24pyyrHW+AU3NZmEq+1nJG+X1F8e1VEBZKb3P+ft3lJTMl7bnOlc95sTWFn/CMiZb3itGT0pEbFWGw3Nrl1JAUfJSCr6kEkjqMvRyiWqCMsjKltAfQ7KqV6mW6zptOKn1xgyPysmQJqL+qFWRR3hagzPqIy/EoMbaQDJ9ISx8hbDroCxDDEHtxRy2og0oVgbZTk+7j4aKEi3XrDUk=
47 | 


--------------------------------------------------------------------------------
/LEAD.md:
--------------------------------------------------------------------------------
1 | roll
2 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Open Knowledge
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | global-include VERSION
2 | include LICENSE.md
3 | include Makefile
4 | include pylama.ini
5 | include pytest.ini
6 | include README.md
7 | include tox.ini
8 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all install list readme release templates test version
 2 | 
 3 | 
 4 | PACKAGE := $(shell grep '^PACKAGE =' setup.py | cut -d "'" -f2)
 5 | VERSION := $(shell head -n 1 $(PACKAGE)/VERSION)
 6 | LEAD := $(shell head -n 1 LEAD.md)
 7 | 
 8 | 
 9 | all: list
10 | 
11 | install:
12 | 	pip install --upgrade -e .[develop]
13 | 
14 | list:
15 | 	@grep '^\.PHONY' Makefile | cut -d' ' -f2- | tr ' ' '\n'
16 | 
17 | lint:
18 | 
19 | readme:
20 | 	pip install md-toc
21 | 	pip install referencer
22 | 	referencer $(PACKAGE) README.md --in-place
23 | 	md_toc -p README.md github --header-levels 3
24 | 	sed -i '/(#tableschema-bigquery-py)/,+2d' README.md
25 | 
26 | release:
27 | 	git checkout master && git pull origin && git fetch -p && git diff
28 | 	@echo "\nContinuing in 10 seconds. Press <CTRL+C> to abort\n" && sleep 10
29 | 	@git log --pretty=format:"%C(yellow)%h%Creset %s%Cgreen%d" --reverse -20
30 | 	@echo "\nReleasing v$(VERSION) in 10 seconds. Press <CTRL+C> to abort\n" && sleep 10
31 | 	git commit -a -m 'v$(VERSION)' && git tag -a v$(VERSION) -m 'v$(VERSION)'
32 | 	git push --follow-tags
33 | 
34 | templates:
35 | 	sed -i -E "s/@(\w*)/@$(LEAD)/" .github/issue_template.md
36 | 	sed -i -E "s/@(\w*)/@$(LEAD)/" .github/pull_request_template.md
37 | 
38 | test:
39 | 	pylama $(PACKAGE)
40 | 	tox
41 | 
42 | version:
43 | 	@echo $(VERSION)
44 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # tableschema-bigquery-py
  2 | 
  3 | [![Travis](https://img.shields.io/travis/frictionlessdata/tableschema-bigquery-py/master.svg)](https://travis-ci.org/frictionlessdata/tableschema-bigquery-py)
  4 | [![Coveralls](http://img.shields.io/coveralls/frictionlessdata/tableschema-bigquery-py.svg?branch=master)](https://coveralls.io/r/frictionlessdata/tableschema-bigquery-py?branch=master)
  5 | [![PyPi](https://img.shields.io/pypi/v/tableschema-bigquery.svg)](https://pypi.python.org/pypi/tableschema-bigquery)
  6 | [![Github](https://img.shields.io/badge/github-master-brightgreen)](https://github.com/frictionlessdata/tableschema-bigquery-py)
  7 | [![Gitter](https://img.shields.io/gitter/room/frictionlessdata/chat.svg)](https://gitter.im/frictionlessdata/chat)
  8 | 
  9 | Generate and load BigQuery tables based on [Table Schema](http://specs.frictionlessdata.io/table-schema/) descriptors.
 10 | 
 11 | ## Features
 12 | 
 13 | - implements `tableschema.Storage` interface
 14 | 
 15 | ## Contents
 16 | 
 17 | <!--TOC-->
 18 | 
 19 |   - [Getting Started](#getting-started)
 20 |     - [Installation](#installation)
 21 |     - [Prepare BigQuery](#prepare-bigquery)
 22 |   - [Documentation](#documentation)
 23 |   - [API Reference](#api-reference)
 24 |     - [`Storage`](#storage)
 25 |   - [Contributing](#contributing)
 26 |   - [Changelog](#changelog)
 27 | 
 28 | <!--TOC-->
 29 | 
 30 | ## Getting Started
 31 | 
 32 | ### Installation
 33 | 
 34 | The package use semantic versioning. It means that major versions  could include breaking changes. It's highly recommended to specify `package` version range in your `setup/requirements` file e.g. `package>=1.0,<2.0`.
 35 | 
 36 | ```bash
 37 | pip install tableschema-bigquery
 38 | ```
 39 | 
 40 | ### Prepare BigQuery
 41 | 
 42 | To start using Google BigQuery service:
 43 | - Create a new project - [link](https://console.developers.google.com/home/dashboard)
 44 | - Create a service key - [link](https://console.developers.google.com/apis/credentials)
 45 | - Download json credentials and set `GOOGLE_APPLICATION_CREDENTIALS` environment variable
 46 | 
 47 | ## Documentation
 48 | 
 49 | ```python
 50 | import io
 51 | import os
 52 | import json
 53 | from datapackage import Package
 54 | from apiclient.discovery import build
 55 | from oauth2client.client import GoogleCredentials
 56 | 
 57 | # Prepare BigQuery credentials
 58 | os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '.credentials.json'
 59 | credentials = GoogleCredentials.get_application_default()
 60 | service = build('bigquery', 'v2', credentials=credentials)
 61 | project = json.load(io.open('.credentials.json', encoding='utf-8'))['project_id']
 62 | 
 63 | # Save package to BigQuery
 64 | package = Package('datapackage.json')
 65 | package.save(storage='bigquery', service=service, project=project, dataset='dataset')
 66 | 
 67 | # Load package from BigQuery
 68 | package = Package(storage='bigquery', service=service, project=project, dataset='dataset')
 69 | package.resources
 70 | ```
 71 | 
 72 | ## API Reference
 73 | 
 74 | ### `Storage`
 75 | ```python
 76 | Storage(self, service, project, dataset, prefix='')
 77 | ```
 78 | BigQuery storage
 79 | 
 80 | Package implements
 81 | [Tabular Storage](https://github.com/frictionlessdata/tableschema-py#storage)
 82 | interface (see full documentation on the link):
 83 | 
 84 | ![Storage](https://i.imgur.com/RQgrxqp.png)
 85 | 
 86 | > Only additional API is documented
 87 | 
 88 | __Arguments__
 89 | - __service (object)__: BigQuery `Service` object
 90 | - __project (str)__: BigQuery project name
 91 | - __dataset (str)__: BigQuery dataset name
 92 | - __prefix (str)__: prefix for all buckets
 93 | 
 94 | 
 95 | ## Contributing
 96 | 
 97 | > The project follows the [Open Knowledge International coding standards](https://github.com/okfn/coding-standards).
 98 | 
 99 | Recommended way to get started is to create and activate a project virtual environment.
100 | To install package and development dependencies into active environment:
101 | 
102 | ```bash
103 | $ make install
104 | ```
105 | 
106 | To run tests with linting and coverage:
107 | 
108 | ```bash
109 | $ make test
110 | ```
111 | 
112 | ## Changelog
113 | 
114 | Here described only breaking and the most important changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/tableschema-bigquery-py/commits/master).
115 | 
116 | #### v1.0
117 | 
118 | - Initial driver realease
119 | 


--------------------------------------------------------------------------------
/data/articles.csv:
--------------------------------------------------------------------------------
1 | id,name,current,rating,created_year,created_date,created_datetime
2 | 1,Taxes,True,9.5,2015,2015-01-01,2015-01-01T03:00:00Z
3 | 2,中国人,False,7,2015,2015-12-31,2015-12-31T15:45:33Z
4 | 


--------------------------------------------------------------------------------
/data/articles.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fields": [
 3 |         {
 4 |             "name": "id",
 5 |             "type": "integer",
 6 |             "constraints": {
 7 |                 "required": true
 8 |             }
 9 |         },
10 |         {
11 |             "name": "name",
12 |             "type": "string"
13 |         },
14 |         {
15 |             "name": "current",
16 |             "type": "boolean"
17 |         },
18 |         {
19 |             "name": "rating",
20 |             "type": "number"
21 |         },
22 |         {
23 |             "name": "created_year",
24 |             "type": "date",
25 |             "format": "fmt:%Y"
26 |         },
27 |         {
28 |             "name": "created_date",
29 |             "type": "date"
30 |         },
31 |         {
32 |             "name": "created_datetime",
33 |             "type": "datetime"
34 |         }
35 |     ]
36 | }
37 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frictionlessdata/tableschema-bigquery-py/7ed9d002620619a819f73d97e03257dcc715c7a4/examples/__init__.py


--------------------------------------------------------------------------------
/examples/storage.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import absolute_import
 5 | from __future__ import unicode_literals
 6 | 
 7 | import os
 8 | import io
 9 | import json
10 | import uuid
11 | from tabulator import topen
12 | from apiclient.discovery import build
13 | from oauth2client.client import GoogleCredentials
14 | 
15 | from tableschema_bigquery import Storage
16 | 
17 | 
18 | # Get resources
19 | articles_schema = json.load(io.open('data/articles.json', encoding='utf-8'))
20 | articles_data = topen('data/articles.csv', with_headers=True).read()
21 | 
22 | # Prepare BigQuery
23 | os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '.credentials.json'
24 | credentials = GoogleCredentials.get_application_default()
25 | service = build('bigquery', 'v2', credentials=credentials)
26 | project = json.load(io.open('.credentials.json', encoding='utf-8'))['project_id']
27 | dataset = 'resource'
28 | prefix = '%s_' % uuid.uuid4().hex
29 | 
30 | # Storage
31 | storage = Storage(service, project, dataset, prefix=prefix)
32 | 
33 | # Delete tables
34 | for table in reversed(storage.tables):
35 |     storage.delete(table)
36 | 
37 | # Create tables
38 | storage.create('articles', articles_schema)
39 | 
40 | # Write data to tables
41 | storage.write('articles', articles_data)
42 | 
43 | # List tables
44 | print(storage.tables)
45 | 
46 | # Describe tables
47 | print(storage.describe('articles'))
48 | 
49 | # Read data from tables
50 | print(list(storage.read('articles')))
51 | 
52 | # Delete tables
53 | for table in reversed(storage.tables):
54 |     storage.delete(table)
55 | 


--------------------------------------------------------------------------------
/pylama.ini:
--------------------------------------------------------------------------------
1 | [pylama]
2 | linters = pyflakes,mccabe,pep8
3 | 
4 | [pylama:pep8]
5 | max_line_length = 100
6 | 
7 | [pylama:*/__init__.py]
8 | ignore = W0611
9 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | testpaths = tests
3 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import absolute_import
 5 | 
 6 | import os
 7 | import io
 8 | from setuptools import setup, find_packages
 9 | 
10 | 
11 | # Helpers
12 | def read(*paths):
13 |     """Read a text file."""
14 |     basedir = os.path.dirname(__file__)
15 |     fullpath = os.path.join(basedir, *paths)
16 |     contents = io.open(fullpath, encoding='utf-8').read().strip()
17 |     return contents
18 | 
19 | 
20 | # Prepare
21 | PACKAGE = 'tableschema_bigquery'
22 | NAME = PACKAGE.replace('_', '-')
23 | INSTALL_REQUIRES = [
24 |     'six>=1.9',
25 |     'rsa<=4.0',  # for py2
26 |     'python-slugify>=1.2',
27 |     'google-api-python-client>=1.5',
28 |     'unicodecsv>=0.14',
29 |     'tableschema>=1.0',
30 |     'tabulator>=1.0',
31 | ]
32 | TESTS_REQUIRE = [
33 |     'mock',
34 |     'pylama',
35 |     'pytest',
36 |     'pytest-cov',
37 |     'oauth2client',
38 |     'tox',
39 | ]
40 | README = read('README.md')
41 | VERSION = read(PACKAGE, 'VERSION')
42 | PACKAGES = find_packages(exclude=['examples', 'tests'])
43 | 
44 | 
45 | # Run
46 | setup(
47 |     name=NAME,
48 |     version=VERSION,
49 |     packages=PACKAGES,
50 |     include_package_data=True,
51 |     install_requires=INSTALL_REQUIRES,
52 |     tests_require=TESTS_REQUIRE,
53 |     extras_require={'develop': TESTS_REQUIRE},
54 |     zip_safe=False,
55 |     long_description=README,
56 |     long_description_content_type='text/markdown',
57 |     description='Generate BigQuery tables, load and extract data, based on JSON Table Schema descriptors.',
58 |     author='Open Knowledge Foundation',
59 |     author_email='info@okfn.org',
60 |     url='https://github.com/frictionlessdata/jsontableschema-bigquery-py',
61 |     license='MIT',
62 |     keywords=[
63 |         'frictionless data',
64 |     ],
65 |     classifiers=[
66 |         'Development Status :: 4 - Beta',
67 |         'Environment :: Web Environment',
68 |         'Intended Audience :: Developers',
69 |         'License :: OSI Approved :: MIT License',
70 |         'Operating System :: OS Independent',
71 |         'Programming Language :: Python :: 2',
72 |         'Programming Language :: Python :: 2.7',
73 |         'Programming Language :: Python :: 3',
74 |         'Programming Language :: Python :: 3.3',
75 |         'Programming Language :: Python :: 3.4',
76 |         'Programming Language :: Python :: 3.5',
77 |         'Topic :: Internet :: WWW/HTTP :: Dynamic Content',
78 |         'Topic :: Software Development :: Libraries :: Python Modules'
79 |     ],
80 | )
81 | 


--------------------------------------------------------------------------------
/tableschema_bigquery/VERSION:
--------------------------------------------------------------------------------
1 | 1.0.1
2 | 


--------------------------------------------------------------------------------
/tableschema_bigquery/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # pylama:skip=1
 3 | from __future__ import division
 4 | from __future__ import print_function
 5 | from __future__ import absolute_import
 6 | from __future__ import unicode_literals
 7 | 
 8 | 
 9 | # Module API
10 | 
11 | from .storage import Storage
12 | 
13 | 
14 | # Version
15 | 
16 | import io
17 | import os
18 | __version__ = io.open(
19 |     os.path.join(os.path.dirname(__file__), 'VERSION'),
20 |     encoding='utf-8').read().strip()
21 | 


--------------------------------------------------------------------------------
/tableschema_bigquery/mapper.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from __future__ import absolute_import
  5 | from __future__ import unicode_literals
  6 | 
  7 | import re
  8 | import json
  9 | import tableschema
 10 | from slugify import slugify
 11 | from dateutil.parser import parse
 12 | 
 13 | 
 14 | # Module API
 15 | 
 16 | class Mapper(object):
 17 | 
 18 |     # Public
 19 | 
 20 |     def __init__(self, prefix):
 21 |         """Mapper to convert/restore FD entities to/from BigQuery entities
 22 |         """
 23 |         self.__prefix = prefix
 24 | 
 25 |     def convert_bucket(self, bucket):
 26 |         """Convert bucket to BigQuery
 27 |         """
 28 |         return self.__prefix + bucket
 29 | 
 30 |     def convert_descriptor(self, descriptor):
 31 |         """Convert descriptor to BigQuery
 32 |         """
 33 | 
 34 |         # Fields
 35 |         fields = []
 36 |         fallbacks = []
 37 |         schema = tableschema.Schema(descriptor)
 38 |         for index, field in enumerate(schema.fields):
 39 |             converted_type = self.convert_type(field.type)
 40 |             if not converted_type:
 41 |                 converted_type = 'STRING'
 42 |                 fallbacks.append(index)
 43 |             mode = 'NULLABLE'
 44 |             if field.required:
 45 |                 mode = 'REQUIRED'
 46 |             fields.append({
 47 |                 'name': _slugify_field_name(field.name),
 48 |                 'type': converted_type,
 49 |                 'mode': mode,
 50 |             })
 51 | 
 52 |         # Descriptor
 53 |         converted_descriptor = {
 54 |             'fields': fields,
 55 |         }
 56 | 
 57 |         return (converted_descriptor, fallbacks)
 58 | 
 59 |     def convert_row(self, row, schema, fallbacks):
 60 |         """Convert row to BigQuery
 61 |         """
 62 |         for index, field in enumerate(schema.fields):
 63 |             value = row[index]
 64 |             if index in fallbacks:
 65 |                 value = _uncast_value(value, field=field)
 66 |             else:
 67 |                 value = field.cast_value(value)
 68 |             row[index] = value
 69 |         return row
 70 | 
 71 |     def convert_type(self, type):
 72 |         """Convert type to BigQuery
 73 |         """
 74 | 
 75 |         # Mapping
 76 |         mapping = {
 77 |             'any': 'STRING',
 78 |             'array': None,
 79 |             'boolean': 'BOOLEAN',
 80 |             'date': 'DATE',
 81 |             'datetime': 'DATETIME',
 82 |             'duration': None,
 83 |             'geojson': None,
 84 |             'geopoint': None,
 85 |             'integer': 'INTEGER',
 86 |             'number': 'FLOAT',
 87 |             'object': None,
 88 |             'string': 'STRING',
 89 |             'time': 'TIME',
 90 |             'year': 'INTEGER',
 91 |             'yearmonth': None,
 92 |         }
 93 | 
 94 |         # Not supported type
 95 |         if type not in mapping:
 96 |             message = 'Type %s is not supported' % type
 97 |             raise tableschema.exceptions.StorageError(message)
 98 | 
 99 |         return mapping[type]
100 | 
101 |     def restore_bucket(self, table_name):
102 |         """Restore bucket from BigQuery
103 |         """
104 |         if table_name.startswith(self.__prefix):
105 |             return table_name.replace(self.__prefix, '', 1)
106 |         return None
107 | 
108 |     def restore_descriptor(self, converted_descriptor):
109 |         """Restore descriptor rom BigQuery
110 |         """
111 | 
112 |         # Convert
113 |         fields = []
114 |         for field in converted_descriptor['fields']:
115 |             field_type = self.restore_type(field['type'])
116 |             resfield = {
117 |                 'name': field['name'],
118 |                 'type': field_type,
119 |             }
120 |             if field.get('mode', 'NULLABLE') != 'NULLABLE':
121 |                 resfield['constraints'] = {'required': True}
122 |             fields.append(resfield)
123 |         descriptor = {'fields': fields}
124 | 
125 |         return descriptor
126 | 
127 |     def restore_row(self, row, schema):
128 |         """Restore row from BigQuery
129 |         """
130 |         for index, field in enumerate(schema.fields):
131 |             if field.type == 'datetime':
132 |                 row[index] = parse(row[index])
133 |             if field.type == 'date':
134 |                 row[index] = parse(row[index]).date()
135 |             if field.type == 'time':
136 |                 row[index] = parse(row[index]).time()
137 |         return schema.cast_row(row)
138 | 
139 |     def restore_type(self, type):
140 |         """Restore type from BigQuery
141 |         """
142 | 
143 |         # Mapping
144 |         mapping = {
145 |             'BOOLEAN': 'boolean',
146 |             'DATE': 'date',
147 |             'DATETIME': 'datetime',
148 |             'INTEGER': 'integer',
149 |             'FLOAT': 'number',
150 |             'STRING': 'string',
151 |             'TIME': 'time',
152 |         }
153 | 
154 |         # Not supported type
155 |         if type not in mapping:
156 |             message = 'Type %s is not supported' % type
157 |             raise tableschema.exceptions.StorageError(message)
158 | 
159 |         return mapping[type]
160 | 
161 | 
162 | # Internal
163 | 
164 | def _slugify_field_name(name):
165 | 
166 |     # Referene:
167 |     # https://cloud.google.com/bigquery/docs/reference/v2/tables
168 |     MAX_LENGTH = 128
169 |     VALID_NAME = r'^[a-zA-Z_]\w{0,%d}$' % (MAX_LENGTH-1)
170 | 
171 |     # Convert
172 |     if not re.match(VALID_NAME, name):
173 |         name = slugify(name, separator='_')
174 |         if not re.match('^[a-zA-Z_]', name):
175 |             name = '_' + name
176 | 
177 |     return name[:MAX_LENGTH]
178 | 
179 | 
180 | def _uncast_value(value, field):
181 |     # Eventially should be moved to:
182 |     # https://github.com/frictionlessdata/tableschema-py/issues/161
183 |     if isinstance(value, (list, dict)):
184 |         value = json.dumps(value)
185 |     else:
186 |         value = str(value)
187 |     return value
188 | 


--------------------------------------------------------------------------------
/tableschema_bigquery/storage.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from __future__ import absolute_import
  5 | from __future__ import unicode_literals
  6 | 
  7 | import io
  8 | import six
  9 | import time
 10 | import unicodecsv
 11 | import tableschema
 12 | from apiclient.http import MediaIoBaseUpload
 13 | from .mapper import Mapper
 14 | 
 15 | 
 16 | # Module API
 17 | 
 18 | class Storage(tableschema.Storage):
 19 |     """BigQuery storage
 20 | 
 21 |     Package implements
 22 |     [Tabular Storage](https://github.com/frictionlessdata/tableschema-py#storage)
 23 |     interface (see full documentation on the link):
 24 | 
 25 |     ![Storage](https://i.imgur.com/RQgrxqp.png)
 26 | 
 27 |     > Only additional API is documented
 28 | 
 29 |     # Arguments
 30 |         service (object): BigQuery `Service` object
 31 |         project (str): BigQuery project name
 32 |         dataset (str): BigQuery dataset name
 33 |         prefix (str): prefix for all buckets
 34 | 
 35 |     """
 36 | 
 37 |     # Public
 38 | 
 39 |     def __init__(self, service, project, dataset, prefix=''):
 40 | 
 41 |         # Set attributes
 42 |         self.__service = service
 43 |         self.__project = project
 44 |         self.__dataset = dataset
 45 |         self.__prefix = prefix
 46 |         self.__buckets = None
 47 |         self.__descriptors = {}
 48 |         self.__fallbacks = {}
 49 | 
 50 |         # Create mapper
 51 |         self.__mapper = Mapper(prefix=prefix)
 52 | 
 53 |     def __repr__(self):
 54 | 
 55 |         # Template and format
 56 |         template = 'Storage <{service}/{project}-{dataset}>'
 57 |         text = template.format(
 58 |             service=self.__service,
 59 |             project=self.__project,
 60 |             dataset=self.__dataset)
 61 | 
 62 |         return text
 63 | 
 64 |     @property
 65 |     def buckets(self):
 66 | 
 67 |         # No cached value
 68 |         if self.__buckets is None:
 69 | 
 70 |             # Get response
 71 |             response = self.__service.tables().list(
 72 |                 projectId=self.__project,
 73 |                 datasetId=self.__dataset).execute()
 74 | 
 75 |             # Extract buckets
 76 |             self.__buckets = []
 77 |             for table in response.get('tables', []):
 78 |                 table_name = table['tableReference']['tableId']
 79 |                 bucket = self.__mapper.restore_bucket(table_name)
 80 |                 if bucket is not None:
 81 |                     self.__buckets.append(bucket)
 82 | 
 83 |         return self.__buckets
 84 | 
 85 |     def create(self, bucket, descriptor, force=False):
 86 | 
 87 |         # Make lists
 88 |         buckets = bucket
 89 |         if isinstance(bucket, six.string_types):
 90 |             buckets = [bucket]
 91 |         descriptors = descriptor
 92 |         if isinstance(descriptor, dict):
 93 |             descriptors = [descriptor]
 94 | 
 95 |         # Iterate over buckets/descriptors
 96 |         for bucket, descriptor in zip(buckets, descriptors):
 97 | 
 98 |             # Existent bucket
 99 |             if bucket in self.buckets:
100 |                 if not force:
101 |                     message = 'Bucket "%s" already exists' % bucket
102 |                     raise tableschema.exceptions.StorageError(message)
103 |                 self.delete(bucket)
104 | 
105 |             # Prepare job body
106 |             tableschema.validate(descriptor)
107 |             table_name = self.__mapper.convert_bucket(bucket)
108 |             converted_descriptor, fallbacks = self.__mapper.convert_descriptor(descriptor)
109 |             body = {
110 |                 'tableReference': {
111 |                     'projectId': self.__project,
112 |                     'datasetId': self.__dataset,
113 |                     'tableId': table_name,
114 |                 },
115 |                 'schema': converted_descriptor,
116 |             }
117 | 
118 |             # Make request
119 |             self.__service.tables().insert(
120 |                 projectId=self.__project,
121 |                 datasetId=self.__dataset,
122 |                 body=body).execute()
123 | 
124 |             # Add to descriptors/fallbacks
125 |             self.__descriptors[bucket] = descriptor
126 |             self.__fallbacks[bucket] = fallbacks
127 | 
128 |         # Remove buckets cache
129 |         self.__buckets = None
130 | 
131 |     def delete(self, bucket=None, ignore=False):
132 | 
133 |         # Make lists
134 |         buckets = bucket
135 |         if isinstance(bucket, six.string_types):
136 |             buckets = [bucket]
137 |         elif bucket is None:
138 |             buckets = reversed(self.buckets)
139 | 
140 |         # Iterater over buckets
141 |         for bucket in buckets:
142 | 
143 |             # Non-existent bucket
144 |             if bucket not in self.buckets:
145 |                 if not ignore:
146 |                     message = 'Bucket "%s" doesn\'t exist.' % bucket
147 |                     raise tableschema.exceptions.StorageError(message)
148 |                 return
149 | 
150 |             # Remove from descriptors
151 |             if bucket in self.__descriptors:
152 |                 del self.__descriptors[bucket]
153 | 
154 |             # Make delete request
155 |             table_name = self.__mapper.convert_bucket(bucket)
156 |             self.__service.tables().delete(
157 |                 projectId=self.__project,
158 |                 datasetId=self.__dataset,
159 |                 tableId=table_name).execute()
160 | 
161 |         # Remove tables cache
162 |         self.__buckets = None
163 | 
164 |     def describe(self, bucket, descriptor=None):
165 | 
166 |         # Set descriptor
167 |         if descriptor is not None:
168 |             self.__descriptors[bucket] = descriptor
169 | 
170 |         # Get descriptor
171 |         else:
172 |             descriptor = self.__descriptors.get(bucket)
173 |             if descriptor is None:
174 |                 table_name = self.__mapper.convert_bucket(bucket)
175 |                 response = self.__service.tables().get(
176 |                     projectId=self.__project,
177 |                     datasetId=self.__dataset,
178 |                     tableId=table_name).execute()
179 |                 converted_descriptor = response['schema']
180 |                 descriptor = self.__mapper.restore_descriptor(converted_descriptor)
181 | 
182 |         return descriptor
183 | 
184 |     def iter(self, bucket):
185 | 
186 |         # Get schema/data
187 |         schema = tableschema.Schema(self.describe(bucket))
188 |         table_name = self.__mapper.convert_bucket(bucket)
189 |         response = self.__service.tabledata().list(
190 |             projectId=self.__project,
191 |             datasetId=self.__dataset,
192 |             tableId=table_name).execute()
193 | 
194 |         # Collect rows
195 |         rows = []
196 |         for fields in response['rows']:
197 |             row = [field['v'] for field in fields['f']]
198 |             rows.append(row)
199 | 
200 |         # Sort rows
201 |         # TODO: provide proper sorting solution
202 |         rows = sorted(rows, key=lambda row: row[0] if row[0] is not None else 'null')
203 | 
204 |         # Emit rows
205 |         for row in rows:
206 |             row = self.__mapper.restore_row(row, schema=schema)
207 |             yield row
208 | 
209 |     def read(self, bucket):
210 |         rows = list(self.iter(bucket))
211 |         return rows
212 | 
213 |     def write(self, bucket, rows):
214 | 
215 |         # Write buffer
216 |         BUFFER_SIZE = 10000
217 | 
218 |         # Prepare schema, fallbacks
219 |         schema = tableschema.Schema(self.describe(bucket))
220 |         fallbacks = self.__fallbacks.get(bucket, [])
221 | 
222 |         # Write data
223 |         rows_buffer = []
224 |         for row in rows:
225 |             row = self.__mapper.convert_row(row, schema=schema, fallbacks=fallbacks)
226 |             rows_buffer.append(row)
227 |             if len(rows_buffer) > BUFFER_SIZE:
228 |                 self.__write_rows_buffer(bucket, rows_buffer)
229 |                 rows_buffer = []
230 |         if len(rows_buffer) > 0:
231 |             self.__write_rows_buffer(bucket, rows_buffer)
232 | 
233 |     # Private
234 | 
235 |     def __write_rows_buffer(self, bucket, rows_buffer):
236 | 
237 |         # Process data to byte stream csv
238 |         bytes = io.BufferedRandom(io.BytesIO())
239 |         writer = unicodecsv.writer(bytes, encoding='utf-8')
240 |         for row in rows_buffer:
241 |             writer.writerow(row)
242 |         bytes.seek(0)
243 | 
244 |         # Prepare job body
245 |         table_name = self.__mapper.convert_bucket(bucket)
246 |         body = {
247 |             'configuration': {
248 |                 'load': {
249 |                     'destinationTable': {
250 |                         'projectId': self.__project,
251 |                         'datasetId': self.__dataset,
252 |                         'tableId': table_name
253 |                     },
254 |                     'sourceFormat': 'CSV',
255 |                 }
256 |             }
257 |         }
258 | 
259 |         # Prepare job media body
260 |         mimetype = 'application/octet-stream'
261 |         media_body = MediaIoBaseUpload(bytes, mimetype=mimetype)
262 | 
263 |         # Make request to Big Query
264 |         response = self.__service.jobs().insert(
265 |             projectId=self.__project,
266 |             body=body,
267 |             media_body=media_body).execute()
268 |         self.__wait_response(response)
269 | 
270 |     def __wait_response(self, response):
271 | 
272 |         # Get job instance
273 |         job = self.__service.jobs().get(
274 |             projectId=response['jobReference']['projectId'],
275 |             jobId=response['jobReference']['jobId'])
276 | 
277 |         # Wait done
278 |         while True:
279 |             result = job.execute(num_retries=1)
280 |             if result['status']['state'] == 'DONE':
281 |                 if result['status'].get('errors'):
282 |                     errors = result['status']['errors']
283 |                     message = '\n'.join(error['message'] for error in errors)
284 |                     raise tableschema.exceptions.StorageError(message)
285 |                 break
286 |             time.sleep(1)
287 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frictionlessdata/tableschema-bigquery-py/7ed9d002620619a819f73d97e03257dcc715c7a4/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_mapper.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import absolute_import
 5 | from __future__ import unicode_literals
 6 | 
 7 | import pytest
 8 | from tableschema_bigquery.mapper import Mapper
 9 | 
10 | 
11 | # Tests
12 | 
13 | def test_mapper_convert_bucket():
14 |     mapper = Mapper('prefix_')
15 |     assert mapper.convert_bucket('bucket') == 'prefix_bucket'
16 | 
17 | 
18 | def test_mapper_restore_bucket():
19 |     mapper = Mapper('prefix_')
20 |     assert mapper.restore_bucket('prefix_bucket') == 'bucket'
21 |     assert mapper.restore_bucket('xxxxxx_bucket') == None
22 | 


--------------------------------------------------------------------------------
/tests/test_storage.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from __future__ import absolute_import
  5 | from __future__ import unicode_literals
  6 | 
  7 | import os
  8 | import io
  9 | import json
 10 | import uuid
 11 | import pytest
 12 | import datetime
 13 | import tableschema
 14 | from copy import deepcopy
 15 | from decimal import Decimal
 16 | from tabulator import Stream
 17 | from apiclient.discovery import build
 18 | from oauth2client.client import GoogleCredentials
 19 | from tableschema_bigquery import Storage
 20 | 
 21 | 
 22 | # Resources
 23 | 
 24 | ARTICLES = {
 25 |     'schema': {
 26 |         'fields': [
 27 |             {'name': 'id', 'type': 'integer', 'constraints': {'required': True}},
 28 |             {'name': 'parent', 'type': 'integer'},
 29 |             {'name': 'name', 'type': 'string'},
 30 |             {'name': 'current', 'type': 'boolean'},
 31 |             {'name': 'rating', 'type': 'number'},
 32 |         ],
 33 |         # 'primaryKey': 'id',
 34 |         # 'foreignKeys': [
 35 |             # {'fields': 'parent', 'reference': {'resource': '', 'fields': 'id'}},
 36 |         # ],
 37 |     },
 38 |     'data': [
 39 |         ['1', '', 'Taxes', 'True', '9.5'],
 40 |         ['2', '1', '中国人', 'False', '7'],
 41 |     ],
 42 | }
 43 | COMMENTS = {
 44 |     'schema': {
 45 |         'fields': [
 46 |             {'name': 'entry_id', 'type': 'integer', 'constraints': {'required': True}},
 47 |             {'name': 'comment', 'type': 'string'},
 48 |             {'name': 'note', 'type': 'any'},
 49 |         ],
 50 |         # 'primaryKey': 'entry_id',
 51 |         # 'foreignKeys': [
 52 |             # {'fields': 'entry_id', 'reference': {'resource': 'articles', 'fields': 'id'}},
 53 |         # ],
 54 |     },
 55 |     'data': [
 56 |         ['1', 'good', 'note1'],
 57 |         ['2', 'bad', 'note2'],
 58 |     ],
 59 | }
 60 | TEMPORAL = {
 61 |     'schema': {
 62 |         'fields': [
 63 |             {'name': 'date', 'type': 'date'},
 64 |             {'name': 'date_year', 'type': 'date', 'format': '%Y'},
 65 |             {'name': 'datetime', 'type': 'datetime'},
 66 |             {'name': 'duration', 'type': 'duration'},
 67 |             {'name': 'time', 'type': 'time'},
 68 |             {'name': 'year', 'type': 'year'},
 69 |             {'name': 'yearmonth', 'type': 'yearmonth'},
 70 |         ],
 71 |     },
 72 |     'data': [
 73 |         ['2015-01-01', '2015', '2015-01-01T03:00:00Z', 'P1Y1M', '03:00:00', '2015', '2015-01'],
 74 |         ['2015-12-31', '2015', '2015-12-31T15:45:33Z', 'P2Y2M', '15:45:33', '2015', '2015-01'],
 75 |     ],
 76 | }
 77 | LOCATION = {
 78 |     'schema': {
 79 |         'fields': [
 80 |             {'name': 'location', 'type': 'geojson'},
 81 |             {'name': 'geopoint', 'type': 'geopoint'},
 82 |         ],
 83 |     },
 84 |     'data': [
 85 |         ['{"type": "Point","coordinates":[33.33,33.33]}', '30,75'],
 86 |         ['{"type": "Point","coordinates":[50.00,50.00]}', '90,45'],
 87 |     ],
 88 | }
 89 | COMPOUND = {
 90 |     'schema': {
 91 |         'fields': [
 92 |             {'name': 'stats', 'type': 'object'},
 93 |             {'name': 'persons', 'type': 'array'},
 94 |         ],
 95 |     },
 96 |     'data': [
 97 |         ['{"chars":560}', '["Mike", "John"]'],
 98 |         ['{"chars":970}', '["Paul", "Alex"]'],
 99 |     ],
100 | }
101 | 
102 | 
103 | # Credentials
104 | 
105 | os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '.credentials.json'
106 | CREDENTIALS = GoogleCredentials.get_application_default()
107 | SERVICE = build('bigquery', 'v2', credentials=CREDENTIALS)
108 | PROJECT = json.load(io.open('.credentials.json', encoding='utf-8'))['project_id']
109 | DATASET = 'resource'
110 | PREFIX = '%s_' % uuid.uuid4().hex
111 | 
112 | 
113 | # Tests
114 | 
115 | def test_storage():
116 | 
117 |     # Create storage
118 |     storage = Storage(SERVICE, project=PROJECT, dataset=DATASET, prefix=PREFIX)
119 | 
120 |     # Delete buckets
121 |     storage.delete()
122 | 
123 |     # Create buckets
124 |     storage.create(['articles', 'comments'], [ARTICLES['schema'], COMMENTS['schema']])
125 |     # TODO: investigate why it fails
126 |     # storage.create('comments', COMMENTS['schema'], force=True)
127 |     storage.create('temporal', TEMPORAL['schema'])
128 |     storage.create('location', LOCATION['schema'])
129 |     storage.create('compound', COMPOUND['schema'])
130 | 
131 |     # Write data
132 |     storage.write('articles', ARTICLES['data'])
133 |     storage.write('comments', COMMENTS['data'])
134 |     storage.write('temporal', TEMPORAL['data'])
135 |     storage.write('location', LOCATION['data'])
136 |     storage.write('compound', COMPOUND['data'])
137 | 
138 |     # Create new storage to use reflection only
139 |     storage = Storage(SERVICE, project=PROJECT, dataset=DATASET, prefix=PREFIX)
140 | 
141 |     # Create existent bucket
142 |     # TODO: investigate why it fails
143 |     # with pytest.raises(tableschema.exceptions.StorageError):
144 |         # storage.create('articles', ARTICLES['schema'])
145 | 
146 |     # Assert buckets
147 |     assert storage.buckets == ['articles', 'comments', 'compound', 'location', 'temporal']
148 | 
149 |     # Assert schemas
150 |     assert storage.describe('articles') == ARTICLES['schema']
151 |     assert storage.describe('comments') == {
152 |         'fields': [
153 |             {'name': 'entry_id', 'type': 'integer', 'constraints': {'required': True}},
154 |             {'name': 'comment', 'type': 'string'},
155 |             {'name': 'note', 'type': 'string'}, # type downgrade
156 |         ],
157 |     }
158 |     assert storage.describe('temporal') == {
159 |         'fields': [
160 |             {'name': 'date', 'type': 'date'},
161 |             {'name': 'date_year', 'type': 'date'}, # format removal
162 |             {'name': 'datetime', 'type': 'datetime'},
163 |             {'name': 'duration', 'type': 'string'}, # type fallback
164 |             {'name': 'time', 'type': 'time'},
165 |             {'name': 'year', 'type': 'integer'}, # type downgrade
166 |             {'name': 'yearmonth', 'type': 'string'}, # type fallback
167 |         ],
168 |     }
169 |     assert storage.describe('location') == {
170 |         'fields': [
171 |             {'name': 'location', 'type': 'string'}, # type fallback
172 |             {'name': 'geopoint', 'type': 'string'}, # type fallback
173 |         ],
174 |     }
175 |     assert storage.describe('compound') == {
176 |         'fields': [
177 |             {'name': 'stats', 'type': 'string'}, # type fallback
178 |             {'name': 'persons', 'type': 'string'}, # type fallback
179 |         ],
180 |     }
181 | 
182 |     assert storage.read('articles') == cast(ARTICLES)['data']
183 |     assert storage.read('comments') == cast(COMMENTS)['data']
184 |     assert storage.read('temporal') == cast(TEMPORAL, skip=['duration', 'yearmonth'])['data']
185 |     assert storage.read('location') == cast(LOCATION, skip=['geojson', 'geopoint'])['data']
186 |     assert storage.read('compound') == cast(COMPOUND, skip=['array', 'object'])['data']
187 | 
188 |     # Assert data with forced schema
189 |     storage.describe('compound', COMPOUND['schema'])
190 |     assert storage.read('compound') == cast(COMPOUND)['data']
191 | 
192 |     # Delete non existent bucket
193 |     with pytest.raises(tableschema.exceptions.StorageError):
194 |         storage.delete('non_existent')
195 | 
196 |     # Delete buckets
197 |     storage.delete()
198 | 
199 | 
200 | def test_storage_bigdata():
201 |     RESOURCE = {
202 |         'schema': {
203 |             'fields': [
204 |                 {'name': 'id', 'type': 'integer'}
205 |             ]
206 |         },
207 |         'data': [[value,] for value in range(0, 15000)]
208 |     }
209 | 
210 |     # Write data
211 |     storage = Storage(SERVICE, project=PROJECT, dataset=DATASET, prefix=PREFIX)
212 |     storage.create('bucket', RESOURCE['schema'], force=True)
213 |     storage.write('bucket', RESOURCE['data'])
214 | 
215 |     # Pull rows
216 |     # TODO: remove sorting after proper soring solution implementation
217 |     assert sorted(storage.read('bucket'), key=lambda row: row[0]) == RESOURCE['data']
218 | 
219 | 
220 | # Helpers
221 | 
222 | def cast(resource, skip=[]):
223 |     resource = deepcopy(resource)
224 |     schema = tableschema.Schema(resource['schema'])
225 |     for row in resource['data']:
226 |         for index, field in enumerate(schema.fields):
227 |             if field.type not in skip:
228 |                 row[index] = field.cast_value(row[index])
229 |     return resource
230 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | package=tableschema_bigquery
 3 | skip_missing_interpreters=true
 4 | envlist=
 5 |   py27
 6 |   py36
 7 |   py37
 8 |   py38
 9 | 
10 | [testenv]
11 | deps=
12 |   mock
13 |   pytest
14 |   pytest-cov
15 |   coverage
16 |   oauth2client
17 | passenv=
18 |   CI
19 |   TRAVIS
20 |   TRAVIS_JOB_ID
21 |   TRAVIS_BRANCH
22 | commands=
23 |   py.test \
24 |     --cov {[tox]package} \
25 |     --cov-config tox.ini \
26 |     --cov-report term-missing \
27 |     {posargs}
28 | 


--------------------------------------------------------------------------------