├── server ├── static │ ├── robots.txt │ ├── google2ed1af765c529f57.html │ ├── google76d23419c98fdd46.html │ ├── favicon.ico │ └── dart-logo.png ├── index.yaml ├── scripts │ ├── __init__.py │ ├── README.txt │ ├── cloudstorage │ │ ├── test_utils.py │ │ ├── __init__.py │ │ ├── errors.py │ │ ├── rest_api.py │ │ ├── api_utils.py │ │ ├── common.py │ │ ├── cloudstorage_api.py │ │ └── storage_api.py │ └── redirector.py └── app.yaml ├── .gitignore ├── AUTHORS ├── README.md └── LICENSE /server/static/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /build 3 | Disallow: /dev 4 | -------------------------------------------------------------------------------- /server/static/google2ed1af765c529f57.html: -------------------------------------------------------------------------------- 1 | google-site-verification: google2ed1af765c529f57.html -------------------------------------------------------------------------------- /server/static/google76d23419c98fdd46.html: -------------------------------------------------------------------------------- 1 | google-site-verification: google76d23419c98fdd46.html -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | packages 2 | out 3 | pubspec.lock 4 | .project 5 | tags 6 | .DS_Store 7 | *.pyc 8 | .buildlog 9 | -------------------------------------------------------------------------------- /server/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dart-archive/api.dart.dev/master/server/static/favicon.ico -------------------------------------------------------------------------------- /server/static/dart-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dart-archive/api.dart.dev/master/server/static/dart-logo.png -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # Below is a list of people and organizations that have contributed 2 | # to the project. Names should be added to the list like so: 3 | # 4 | # Name/Organization 5 | 6 | Google Inc. 7 | -------------------------------------------------------------------------------- /server/index.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 2 | # for details. All rights reserved. Use of this source code is governed by a 3 | # BSD-style license that can be found in the LICENSE file. -------------------------------------------------------------------------------- /server/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 2 | # for details. All rights reserved. Use of this source code is governed by a 3 | # BSD-style license that can be found in the LICENSE file. -------------------------------------------------------------------------------- /server/scripts/README.txt: -------------------------------------------------------------------------------- 1 | This contains the scripts used to handle requests to the web site. 2 | 3 | - redirector.py: The main script, redirects packages to dartdocs.org 4 | and handles cloud storage requests for the main pages. 5 | 6 | - cloudstorage: The cloud storage API code, downloaded from 7 | https://cloud.google.com/appengine/docs/python/googlecloudstorageclient/download 8 | -------------------------------------------------------------------------------- /server/scripts/cloudstorage/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2013 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | 15 | """Utils for testing.""" 16 | 17 | 18 | class MockUrlFetchResult(object): 19 | 20 | def __init__(self, status, headers, body): 21 | self.status_code = status 22 | self.headers = headers 23 | self.content = body 24 | self.content_was_truncated = False 25 | self.final_url = None 26 | -------------------------------------------------------------------------------- /server/scripts/cloudstorage/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | 15 | """Client Library for Google Cloud Storage.""" 16 | 17 | 18 | 19 | 20 | from .api_utils import RetryParams 21 | from .api_utils import set_default_retry_params 22 | from cloudstorage_api import * 23 | from .common import CSFileStat 24 | from .common import GCSFileStat 25 | from .common import validate_bucket_name 26 | from .common import validate_bucket_path 27 | from .common import validate_file_path 28 | from errors import * 29 | from storage_api import * 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # api.dart.dev server 2 | 3 | An App Engine server that fronts a Google Cloud Storage 4 | repository of Dart API docs. 5 | 6 | See LICENSE. 7 | 8 | ## Link structure 9 | 10 | First, read how 11 | [dartdoc structures links](https://github.com/dart-lang/dartdoc/blob/master/README.md#link-structure). 12 | 13 | The api.dart.dev server expand some structure in the links from dartdoc. 14 | 15 | ``` 16 | / ==> /stable 17 | /stable ==> //index.html 18 | /beta ==> /latest-beta-version>/index.html 19 | /dev ==> /latest-dev-version>/index.html 20 | /main ==> //index.html 21 | 22 | /stable/dart-async/Future-class.html ==> //dart-async/Future-class.html 23 | (same for beta, dev, and main) 24 | ``` 25 | 26 | ## Deployment 27 | 28 | 1. Install the [Google Cloud SDK][gcloud]. 29 | 30 | 1. Run `gcloud auth login` 31 | 32 | 1. Run `gcloud config set app/promote_by_default false` to avoid accidentally 33 | deploying a test version. 34 | 35 | 1. Run `gcloud config set project dartlang-api` 36 | 37 | 1. Run `gcloud app deploy -v name-of-new-version server/app.yaml` and test 38 | 39 | 1. Run `gcloud app deploy -v name-of-new-version --promote server/app.yaml` to 40 | make this version the default 41 | 42 | 43 | [gcloud]: https://cloud.google.com/sdk/downloads 44 | -------------------------------------------------------------------------------- /server/app.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 2 | # for details. All rights reserved. Use of this source code is governed by a 3 | # BSD-style license that can be found in the LICENSE file. 4 | 5 | runtime: python27 6 | api_version: 1 7 | threadsafe: true 8 | 9 | handlers: 10 | # It seems considerably cleaner to write (stable|dev|be) instead of .*, but it 11 | # doesn't work for some reason. 12 | 13 | - url: /apidocs/channels/.*/dartdoc-viewer/home 14 | script: scripts.redirector.application 15 | 16 | - url: /apidocs/channels/.*/dartdoc-viewer/.*static 17 | script: scripts.redirector.application 18 | 19 | - url: /apidocs/channels/.*/dartdoc-viewer/.*packages 20 | script: scripts.redirector.application 21 | 22 | - url: /favicon.ico 23 | static_files: static/favicon.ico 24 | upload: static/favicon.ico 25 | 26 | - url: /robots.txt 27 | static_files: robots.txt 28 | upload: static/robots.txt 29 | 30 | - url: /google76d23419c98fdd46.html 31 | static_files: static/google76d23419c98fdd46.html 32 | upload: static/google76d23419c98fdd46.html 33 | 34 | - url: /google2ed1af765c529f57.html 35 | static_files: static/google2ed1af765c529f57.html 36 | upload: static/google2ed1af765c529f57.html 37 | 38 | - url: /apidocs/channels/.*/docs/.* 39 | script: scripts.redirector.application 40 | 41 | - url: /.* 42 | script: scripts.redirector.application 43 | secure: always 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2014, the Dart project authors. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following 11 | disclaimer in the documentation and/or other materials provided 12 | with the distribution. 13 | * Neither the name of Google LLC nor the names of its 14 | contributors may be used to endorse or promote products derived 15 | from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /server/scripts/cloudstorage/errors.py: -------------------------------------------------------------------------------- 1 | # Copyright 2012 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | 15 | """Google Cloud Storage specific Files API calls.""" 16 | 17 | 18 | 19 | 20 | 21 | __all__ = ['AuthorizationError', 22 | 'check_status', 23 | 'Error', 24 | 'FatalError', 25 | 'FileClosedError', 26 | 'ForbiddenError', 27 | 'InvalidRange', 28 | 'NotFoundError', 29 | 'ServerError', 30 | 'TimeoutError', 31 | 'TransientError', 32 | ] 33 | 34 | import httplib 35 | 36 | 37 | class Error(Exception): 38 | """Base error for all gcs operations. 39 | 40 | Error can happen on GAE side or GCS server side. 41 | For details on a particular GCS HTTP response code, see 42 | https://developers.google.com/storage/docs/reference-status#standardcodes 43 | """ 44 | 45 | 46 | class TransientError(Error): 47 | """TransientError could be retried.""" 48 | 49 | 50 | class TimeoutError(TransientError): 51 | """HTTP 408 timeout.""" 52 | 53 | 54 | class FatalError(Error): 55 | """FatalError shouldn't be retried.""" 56 | 57 | 58 | class FileClosedError(FatalError): 59 | """File is already closed. 60 | 61 | This can happen when the upload has finished but 'write' is called on 62 | a stale upload handle. 63 | """ 64 | 65 | 66 | class NotFoundError(FatalError): 67 | """HTTP 404 resource not found.""" 68 | 69 | 70 | class ForbiddenError(FatalError): 71 | """HTTP 403 Forbidden. 72 | 73 | While GCS replies with a 403 error for many reasons, the most common one 74 | is due to bucket permission not correctly setup for your app to access. 75 | """ 76 | 77 | 78 | class AuthorizationError(FatalError): 79 | """HTTP 401 authentication required. 80 | 81 | Unauthorized request has been received by GCS. 82 | 83 | This error is mostly handled by GCS client. GCS client will request 84 | a new access token and retry the request. 85 | """ 86 | 87 | 88 | class InvalidRange(FatalError): 89 | """HTTP 416 RequestRangeNotSatifiable.""" 90 | 91 | 92 | class ServerError(TransientError): 93 | """HTTP >= 500 server side error.""" 94 | 95 | 96 | def check_status(status, expected, path, headers=None, 97 | resp_headers=None, body=None, extras=None): 98 | """Check HTTP response status is expected. 99 | 100 | Args: 101 | status: HTTP response status. int. 102 | expected: a list of expected statuses. A list of ints. 103 | path: filename or a path prefix. 104 | headers: HTTP request headers. 105 | resp_headers: HTTP response headers. 106 | body: HTTP response body. 107 | extras: extra info to be logged verbatim if error occurs. 108 | 109 | Raises: 110 | AuthorizationError: if authorization failed. 111 | NotFoundError: if an object that's expected to exist doesn't. 112 | TimeoutError: if HTTP request timed out. 113 | ServerError: if server experienced some errors. 114 | FatalError: if any other unexpected errors occurred. 115 | """ 116 | if status in expected: 117 | return 118 | 119 | msg = ('Expect status %r from Google Storage. But got status %d.\n' 120 | 'Path: %r.\n' 121 | 'Request headers: %r.\n' 122 | 'Response headers: %r.\n' 123 | 'Body: %r.\n' 124 | 'Extra info: %r.\n' % 125 | (expected, status, path, headers, resp_headers, body, extras)) 126 | 127 | if status == httplib.UNAUTHORIZED: 128 | raise AuthorizationError(msg) 129 | elif status == httplib.FORBIDDEN: 130 | raise ForbiddenError(msg) 131 | elif status == httplib.NOT_FOUND: 132 | raise NotFoundError(msg) 133 | elif status == httplib.REQUEST_TIMEOUT: 134 | raise TimeoutError(msg) 135 | elif status == httplib.REQUESTED_RANGE_NOT_SATISFIABLE: 136 | raise InvalidRange(msg) 137 | elif (status == httplib.OK and 308 in expected and 138 | httplib.OK not in expected): 139 | raise FileClosedError(msg) 140 | elif status >= 500: 141 | raise ServerError(msg) 142 | else: 143 | raise FatalError(msg) 144 | -------------------------------------------------------------------------------- /server/scripts/cloudstorage/rest_api.py: -------------------------------------------------------------------------------- 1 | # Copyright 2012 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | 15 | """Base and helper classes for Google RESTful APIs.""" 16 | 17 | 18 | 19 | 20 | 21 | __all__ = ['add_sync_methods'] 22 | 23 | import random 24 | import time 25 | 26 | from . import api_utils 27 | 28 | try: 29 | from google.appengine.api import app_identity 30 | from google.appengine.ext import ndb 31 | except ImportError: 32 | from google.appengine.api import app_identity 33 | from google.appengine.ext import ndb 34 | 35 | 36 | 37 | def _make_sync_method(name): 38 | """Helper to synthesize a synchronous method from an async method name. 39 | 40 | Used by the @add_sync_methods class decorator below. 41 | 42 | Args: 43 | name: The name of the synchronous method. 44 | 45 | Returns: 46 | A method (with first argument 'self') that retrieves and calls 47 | self., passing its own arguments, expects it to return a 48 | Future, and then waits for and returns that Future's result. 49 | """ 50 | 51 | def sync_wrapper(self, *args, **kwds): 52 | method = getattr(self, name) 53 | future = method(*args, **kwds) 54 | return future.get_result() 55 | 56 | return sync_wrapper 57 | 58 | 59 | def add_sync_methods(cls): 60 | """Class decorator to add synchronous methods corresponding to async methods. 61 | 62 | This modifies the class in place, adding additional methods to it. 63 | If a synchronous method of a given name already exists it is not 64 | replaced. 65 | 66 | Args: 67 | cls: A class. 68 | 69 | Returns: 70 | The same class, modified in place. 71 | """ 72 | for name in cls.__dict__.keys(): 73 | if name.endswith('_async'): 74 | sync_name = name[:-6] 75 | if not hasattr(cls, sync_name): 76 | setattr(cls, sync_name, _make_sync_method(name)) 77 | return cls 78 | 79 | 80 | class _AE_TokenStorage_(ndb.Model): 81 | """Entity to store app_identity tokens in memcache.""" 82 | 83 | token = ndb.StringProperty() 84 | expires = ndb.FloatProperty() 85 | 86 | 87 | @ndb.tasklet 88 | def _make_token_async(scopes, service_account_id): 89 | """Get a fresh authentication token. 90 | 91 | Args: 92 | scopes: A list of scopes. 93 | service_account_id: Internal-use only. 94 | 95 | Raises: 96 | An ndb.Return with a tuple (token, expiration_time) where expiration_time is 97 | seconds since the epoch. 98 | """ 99 | rpc = app_identity.create_rpc() 100 | app_identity.make_get_access_token_call(rpc, scopes, service_account_id) 101 | token, expires_at = yield rpc 102 | raise ndb.Return((token, expires_at)) 103 | 104 | 105 | class _RestApi(object): 106 | """Base class for REST-based API wrapper classes. 107 | 108 | This class manages authentication tokens and request retries. All 109 | APIs are available as synchronous and async methods; synchronous 110 | methods are synthesized from async ones by the add_sync_methods() 111 | function in this module. 112 | 113 | WARNING: Do NOT directly use this api. It's an implementation detail 114 | and is subject to change at any release. 115 | """ 116 | 117 | def __init__(self, scopes, service_account_id=None, token_maker=None, 118 | retry_params=None): 119 | """Constructor. 120 | 121 | Args: 122 | scopes: A scope or a list of scopes. 123 | service_account_id: Internal use only. 124 | token_maker: An asynchronous function of the form 125 | (scopes, service_account_id) -> (token, expires). 126 | retry_params: An instance of api_utils.RetryParams. If None, the 127 | default for current thread will be used. 128 | """ 129 | 130 | if isinstance(scopes, basestring): 131 | scopes = [scopes] 132 | self.scopes = scopes 133 | self.service_account_id = service_account_id 134 | self.make_token_async = token_maker or _make_token_async 135 | if not retry_params: 136 | retry_params = api_utils._get_default_retry_params() 137 | self.retry_params = retry_params 138 | self.user_agent = {'User-Agent': retry_params._user_agent} 139 | self.expiration_headroom = random.randint(60, 240) 140 | 141 | def __getstate__(self): 142 | """Store state as part of serialization/pickling.""" 143 | return {'scopes': self.scopes, 144 | 'id': self.service_account_id, 145 | 'a_maker': (None if self.make_token_async == _make_token_async 146 | else self.make_token_async), 147 | 'retry_params': self.retry_params, 148 | 'expiration_headroom': self.expiration_headroom} 149 | 150 | def __setstate__(self, state): 151 | """Restore state as part of deserialization/unpickling.""" 152 | self.__init__(state['scopes'], 153 | service_account_id=state['id'], 154 | token_maker=state['a_maker'], 155 | retry_params=state['retry_params']) 156 | self.expiration_headroom = state['expiration_headroom'] 157 | 158 | @ndb.tasklet 159 | def do_request_async(self, url, method='GET', headers=None, payload=None, 160 | deadline=None, callback=None): 161 | """Issue one HTTP request. 162 | 163 | It performs async retries using tasklets. 164 | 165 | Args: 166 | url: the url to fetch. 167 | method: the method in which to fetch. 168 | headers: the http headers. 169 | payload: the data to submit in the fetch. 170 | deadline: the deadline in which to make the call. 171 | callback: the call to make once completed. 172 | 173 | Yields: 174 | The async fetch of the url. 175 | """ 176 | retry_wrapper = api_utils._RetryWrapper( 177 | self.retry_params, 178 | retriable_exceptions=api_utils._RETRIABLE_EXCEPTIONS, 179 | should_retry=api_utils._should_retry) 180 | resp = yield retry_wrapper.run( 181 | self.urlfetch_async, 182 | url=url, 183 | method=method, 184 | headers=headers, 185 | payload=payload, 186 | deadline=deadline, 187 | callback=callback, 188 | follow_redirects=False) 189 | raise ndb.Return((resp.status_code, resp.headers, resp.content)) 190 | 191 | @ndb.tasklet 192 | def get_token_async(self, refresh=False): 193 | """Get an authentication token. 194 | 195 | The token is cached in memcache, keyed by the scopes argument. 196 | Uses a random token expiration headroom value generated in the constructor 197 | to eliminate a burst of GET_ACCESS_TOKEN API requests. 198 | 199 | Args: 200 | refresh: If True, ignore a cached token; default False. 201 | 202 | Yields: 203 | An authentication token. This token is guaranteed to be non-expired. 204 | """ 205 | key = '%s,%s' % (self.service_account_id, ','.join(self.scopes)) 206 | ts = yield _AE_TokenStorage_.get_by_id_async( 207 | key, use_cache=True, use_memcache=True, 208 | use_datastore=self.retry_params.save_access_token) 209 | if refresh or ts is None or ts.expires < ( 210 | time.time() + self.expiration_headroom): 211 | token, expires_at = yield self.make_token_async( 212 | self.scopes, self.service_account_id) 213 | timeout = int(expires_at - time.time()) 214 | ts = _AE_TokenStorage_(id=key, token=token, expires=expires_at) 215 | if timeout > 0: 216 | yield ts.put_async(memcache_timeout=timeout, 217 | use_datastore=self.retry_params.save_access_token, 218 | use_cache=True, use_memcache=True) 219 | raise ndb.Return(ts.token) 220 | 221 | @ndb.tasklet 222 | def urlfetch_async(self, url, method='GET', headers=None, 223 | payload=None, deadline=None, callback=None, 224 | follow_redirects=False): 225 | """Make an async urlfetch() call. 226 | 227 | This is an async wrapper around urlfetch(). It adds an authentication 228 | header. 229 | 230 | Args: 231 | url: the url to fetch. 232 | method: the method in which to fetch. 233 | headers: the http headers. 234 | payload: the data to submit in the fetch. 235 | deadline: the deadline in which to make the call. 236 | callback: the call to make once completed. 237 | follow_redirects: whether or not to follow redirects. 238 | 239 | Yields: 240 | This returns a Future despite not being decorated with @ndb.tasklet! 241 | """ 242 | headers = {} if headers is None else dict(headers) 243 | headers.update(self.user_agent) 244 | self.token = yield self.get_token_async() 245 | if self.token: 246 | headers['authorization'] = 'OAuth ' + self.token 247 | 248 | deadline = deadline or self.retry_params.urlfetch_timeout 249 | 250 | ctx = ndb.get_context() 251 | resp = yield ctx.urlfetch( 252 | url, payload=payload, method=method, 253 | headers=headers, follow_redirects=follow_redirects, 254 | deadline=deadline, callback=callback) 255 | raise ndb.Return(resp) 256 | 257 | 258 | _RestApi = add_sync_methods(_RestApi) 259 | -------------------------------------------------------------------------------- /server/scripts/cloudstorage/api_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2013 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | 15 | """Util functions and classes for cloudstorage_api.""" 16 | 17 | 18 | 19 | __all__ = ['set_default_retry_params', 20 | 'RetryParams', 21 | ] 22 | 23 | import copy 24 | import httplib 25 | import logging 26 | import math 27 | import os 28 | import threading 29 | import time 30 | import urllib 31 | 32 | 33 | try: 34 | from google.appengine.api import app_identity 35 | from google.appengine.api import urlfetch 36 | from google.appengine.datastore import datastore_rpc 37 | from google.appengine.ext import ndb 38 | from google.appengine.ext.ndb import eventloop 39 | from google.appengine.ext.ndb import tasklets 40 | from google.appengine.ext.ndb import utils 41 | from google.appengine import runtime 42 | from google.appengine.runtime import apiproxy_errors 43 | except ImportError: 44 | from google.appengine.api import app_identity 45 | from google.appengine.api import urlfetch 46 | from google.appengine.datastore import datastore_rpc 47 | from google.appengine import runtime 48 | from google.appengine.runtime import apiproxy_errors 49 | from google.appengine.ext import ndb 50 | from google.appengine.ext.ndb import eventloop 51 | from google.appengine.ext.ndb import tasklets 52 | from google.appengine.ext.ndb import utils 53 | 54 | 55 | _RETRIABLE_EXCEPTIONS = (urlfetch.DownloadError, 56 | apiproxy_errors.Error, 57 | app_identity.InternalError, 58 | app_identity.BackendDeadlineExceeded) 59 | 60 | _thread_local_settings = threading.local() 61 | _thread_local_settings.default_retry_params = None 62 | 63 | 64 | def set_default_retry_params(retry_params): 65 | """Set a default RetryParams for current thread current request.""" 66 | _thread_local_settings.default_retry_params = copy.copy(retry_params) 67 | 68 | 69 | def _get_default_retry_params(): 70 | """Get default RetryParams for current request and current thread. 71 | 72 | Returns: 73 | A new instance of the default RetryParams. 74 | """ 75 | default = getattr(_thread_local_settings, 'default_retry_params', None) 76 | if default is None or not default.belong_to_current_request(): 77 | return RetryParams() 78 | else: 79 | return copy.copy(default) 80 | 81 | 82 | def _quote_filename(filename): 83 | """Quotes filename to use as a valid URI path. 84 | 85 | Args: 86 | filename: user provided filename. /bucket/filename. 87 | 88 | Returns: 89 | The filename properly quoted to use as URI's path component. 90 | """ 91 | return urllib.quote(filename) 92 | 93 | 94 | def _unquote_filename(filename): 95 | """Unquotes a valid URI path back to its filename. 96 | 97 | This is the opposite of _quote_filename. 98 | 99 | Args: 100 | filename: a quoted filename. /bucket/some%20filename. 101 | 102 | Returns: 103 | The filename unquoted. 104 | """ 105 | return urllib.unquote(filename) 106 | 107 | 108 | def _should_retry(resp): 109 | """Given a urlfetch response, decide whether to retry that request.""" 110 | return (resp.status_code == httplib.REQUEST_TIMEOUT or 111 | (resp.status_code >= 500 and 112 | resp.status_code < 600)) 113 | 114 | 115 | class _RetryWrapper(object): 116 | """A wrapper that wraps retry logic around any tasklet.""" 117 | 118 | def __init__(self, 119 | retry_params, 120 | retriable_exceptions=_RETRIABLE_EXCEPTIONS, 121 | should_retry=lambda r: False): 122 | """Init. 123 | 124 | Args: 125 | retry_params: an RetryParams instance. 126 | retriable_exceptions: a list of exception classes that are retriable. 127 | should_retry: a function that takes a result from the tasklet and returns 128 | a boolean. True if the result should be retried. 129 | """ 130 | self.retry_params = retry_params 131 | self.retriable_exceptions = retriable_exceptions 132 | self.should_retry = should_retry 133 | 134 | @ndb.tasklet 135 | def run(self, tasklet, **kwds): 136 | """Run a tasklet with retry. 137 | 138 | The retry should be transparent to the caller: if no results 139 | are successful, the exception or result from the last retry is returned 140 | to the caller. 141 | 142 | Args: 143 | tasklet: the tasklet to run. 144 | **kwds: keywords arguments to run the tasklet. 145 | 146 | Raises: 147 | The exception from running the tasklet. 148 | 149 | Returns: 150 | The result from running the tasklet. 151 | """ 152 | start_time = time.time() 153 | n = 1 154 | 155 | while True: 156 | e = None 157 | result = None 158 | got_result = False 159 | 160 | try: 161 | result = yield tasklet(**kwds) 162 | got_result = True 163 | if not self.should_retry(result): 164 | raise ndb.Return(result) 165 | except runtime.DeadlineExceededError: 166 | logging.debug( 167 | 'Tasklet has exceeded request deadline after %s seconds total', 168 | time.time() - start_time) 169 | raise 170 | except self.retriable_exceptions, e: 171 | pass 172 | 173 | if n == 1: 174 | logging.debug('Tasklet is %r', tasklet) 175 | 176 | delay = self.retry_params.delay(n, start_time) 177 | 178 | if delay <= 0: 179 | logging.debug( 180 | 'Tasklet failed after %s attempts and %s seconds in total', 181 | n, time.time() - start_time) 182 | if got_result: 183 | raise ndb.Return(result) 184 | elif e is not None: 185 | raise e 186 | else: 187 | assert False, 'Should never reach here.' 188 | 189 | if got_result: 190 | logging.debug( 191 | 'Got result %r from tasklet.', result) 192 | else: 193 | logging.debug( 194 | 'Got exception "%r" from tasklet.', e) 195 | logging.debug('Retry in %s seconds.', delay) 196 | n += 1 197 | yield tasklets.sleep(delay) 198 | 199 | 200 | class RetryParams(object): 201 | """Retry configuration parameters.""" 202 | 203 | _DEFAULT_USER_AGENT = 'App Engine Python GCS Client' 204 | 205 | @datastore_rpc._positional(1) 206 | def __init__(self, 207 | backoff_factor=2.0, 208 | initial_delay=0.1, 209 | max_delay=10.0, 210 | min_retries=3, 211 | max_retries=6, 212 | max_retry_period=30.0, 213 | urlfetch_timeout=None, 214 | save_access_token=False, 215 | _user_agent=None): 216 | """Init. 217 | 218 | This object is unique per request per thread. 219 | 220 | Library will retry according to this setting when App Engine Server 221 | can't call urlfetch, urlfetch timed out, or urlfetch got a 408 or 222 | 500-600 response. 223 | 224 | Args: 225 | backoff_factor: exponential backoff multiplier. 226 | initial_delay: seconds to delay for the first retry. 227 | max_delay: max seconds to delay for every retry. 228 | min_retries: min number of times to retry. This value is automatically 229 | capped by max_retries. 230 | max_retries: max number of times to retry. Set this to 0 for no retry. 231 | max_retry_period: max total seconds spent on retry. Retry stops when 232 | this period passed AND min_retries has been attempted. 233 | urlfetch_timeout: timeout for urlfetch in seconds. Could be None, 234 | in which case the value will be chosen by urlfetch module. 235 | save_access_token: persist access token to datastore to avoid 236 | excessive usage of GetAccessToken API. Usually the token is cached 237 | in process and in memcache. In some cases, memcache isn't very 238 | reliable. 239 | _user_agent: The user agent string that you want to use in your requests. 240 | """ 241 | self.backoff_factor = self._check('backoff_factor', backoff_factor) 242 | self.initial_delay = self._check('initial_delay', initial_delay) 243 | self.max_delay = self._check('max_delay', max_delay) 244 | self.max_retry_period = self._check('max_retry_period', max_retry_period) 245 | self.max_retries = self._check('max_retries', max_retries, True, int) 246 | self.min_retries = self._check('min_retries', min_retries, True, int) 247 | if self.min_retries > self.max_retries: 248 | self.min_retries = self.max_retries 249 | 250 | self.urlfetch_timeout = None 251 | if urlfetch_timeout is not None: 252 | self.urlfetch_timeout = self._check('urlfetch_timeout', urlfetch_timeout) 253 | self.save_access_token = self._check('save_access_token', save_access_token, 254 | True, bool) 255 | self._user_agent = _user_agent or self._DEFAULT_USER_AGENT 256 | 257 | self._request_id = os.getenv('REQUEST_LOG_ID') 258 | 259 | def __eq__(self, other): 260 | if not isinstance(other, self.__class__): 261 | return False 262 | return self.__dict__ == other.__dict__ 263 | 264 | def __ne__(self, other): 265 | return not self.__eq__(other) 266 | 267 | @classmethod 268 | def _check(cls, name, val, can_be_zero=False, val_type=float): 269 | """Check init arguments. 270 | 271 | Args: 272 | name: name of the argument. For logging purpose. 273 | val: value. Value has to be non negative number. 274 | can_be_zero: whether value can be zero. 275 | val_type: Python type of the value. 276 | 277 | Returns: 278 | The value. 279 | 280 | Raises: 281 | ValueError: when invalid value is passed in. 282 | TypeError: when invalid value type is passed in. 283 | """ 284 | valid_types = [val_type] 285 | if val_type is float: 286 | valid_types.append(int) 287 | 288 | if type(val) not in valid_types: 289 | raise TypeError( 290 | 'Expect type %s for parameter %s' % (val_type.__name__, name)) 291 | if val < 0: 292 | raise ValueError( 293 | 'Value for parameter %s has to be greater than 0' % name) 294 | if not can_be_zero and val == 0: 295 | raise ValueError( 296 | 'Value for parameter %s can not be 0' % name) 297 | return val 298 | 299 | def belong_to_current_request(self): 300 | return os.getenv('REQUEST_LOG_ID') == self._request_id 301 | 302 | def delay(self, n, start_time): 303 | """Calculate delay before the next retry. 304 | 305 | Args: 306 | n: the number of current attempt. The first attempt should be 1. 307 | start_time: the time when retry started in unix time. 308 | 309 | Returns: 310 | Number of seconds to wait before next retry. -1 if retry should give up. 311 | """ 312 | if (n > self.max_retries or 313 | (n > self.min_retries and 314 | time.time() - start_time > self.max_retry_period)): 315 | return -1 316 | return min( 317 | math.pow(self.backoff_factor, n-1) * self.initial_delay, 318 | self.max_delay) 319 | 320 | 321 | def _run_until_rpc(): 322 | """Eagerly evaluate tasklets until it is blocking on some RPC. 323 | 324 | Usually ndb eventloop el isn't run until some code calls future.get_result(). 325 | 326 | When an async tasklet is called, the tasklet wrapper evaluates the tasklet 327 | code into a generator, enqueues a callback _help_tasklet_along onto 328 | the el.current queue, and returns a future. 329 | 330 | _help_tasklet_along, when called by the el, will 331 | get one yielded value from the generator. If the value if another future, 332 | set up a callback _on_future_complete to invoke _help_tasklet_along 333 | when the dependent future fulfills. If the value if a RPC, set up a 334 | callback _on_rpc_complete to invoke _help_tasklet_along when the RPC fulfills. 335 | Thus _help_tasklet_along drills down 336 | the chain of futures until some future is blocked by RPC. El runs 337 | all callbacks and constantly check pending RPC status. 338 | """ 339 | el = eventloop.get_event_loop() 340 | while el.current: 341 | el.run0() 342 | 343 | 344 | def _eager_tasklet(tasklet): 345 | """Decorator to turn tasklet to run eagerly.""" 346 | 347 | @utils.wrapping(tasklet) 348 | def eager_wrapper(*args, **kwds): 349 | fut = tasklet(*args, **kwds) 350 | _run_until_rpc() 351 | return fut 352 | 353 | return eager_wrapper 354 | -------------------------------------------------------------------------------- /server/scripts/cloudstorage/common.py: -------------------------------------------------------------------------------- 1 | # Copyright 2012 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | 15 | """Helpers shared by cloudstorage_stub and cloudstorage_api.""" 16 | 17 | 18 | 19 | 20 | 21 | __all__ = ['CS_XML_NS', 22 | 'CSFileStat', 23 | 'dt_str_to_posix', 24 | 'local_api_url', 25 | 'LOCAL_GCS_ENDPOINT', 26 | 'local_run', 27 | 'get_access_token', 28 | 'get_stored_content_length', 29 | 'get_metadata', 30 | 'GCSFileStat', 31 | 'http_time_to_posix', 32 | 'memory_usage', 33 | 'posix_time_to_http', 34 | 'posix_to_dt_str', 35 | 'set_access_token', 36 | 'validate_options', 37 | 'validate_bucket_name', 38 | 'validate_bucket_path', 39 | 'validate_file_path', 40 | ] 41 | 42 | 43 | import calendar 44 | import datetime 45 | from email import utils as email_utils 46 | import logging 47 | import os 48 | import re 49 | 50 | try: 51 | from google.appengine.api import runtime 52 | except ImportError: 53 | from google.appengine.api import runtime 54 | 55 | 56 | _GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}' 57 | _GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$') 58 | _GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$') 59 | _GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*') 60 | _GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*') 61 | _GCS_METADATA = ['x-goog-meta-', 62 | 'content-disposition', 63 | 'cache-control', 64 | 'content-encoding'] 65 | _GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl'] 66 | CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01' 67 | LOCAL_GCS_ENDPOINT = '/_ah/gcs' 68 | _access_token = '' 69 | 70 | 71 | _MAX_GET_BUCKET_RESULT = 1000 72 | 73 | 74 | def set_access_token(access_token): 75 | """Set the shared access token to authenticate with Google Cloud Storage. 76 | 77 | When set, the library will always attempt to communicate with the 78 | real Google Cloud Storage with this token even when running on dev appserver. 79 | Note the token could expire so it's up to you to renew it. 80 | 81 | When absent, the library will automatically request and refresh a token 82 | on appserver, or when on dev appserver, talk to a Google Cloud Storage 83 | stub. 84 | 85 | Args: 86 | access_token: you can get one by run 'gsutil -d ls' and copy the 87 | str after 'Bearer'. 88 | """ 89 | global _access_token 90 | _access_token = access_token 91 | 92 | 93 | def get_access_token(): 94 | """Returns the shared access token.""" 95 | return _access_token 96 | 97 | 98 | class GCSFileStat(object): 99 | """Container for GCS file stat.""" 100 | 101 | def __init__(self, 102 | filename, 103 | st_size, 104 | etag, 105 | st_ctime, 106 | content_type=None, 107 | metadata=None, 108 | is_dir=False): 109 | """Initialize. 110 | 111 | For files, the non optional arguments are always set. 112 | For directories, only filename and is_dir is set. 113 | 114 | Args: 115 | filename: a Google Cloud Storage filename of form '/bucket/filename'. 116 | st_size: file size in bytes. long compatible. 117 | etag: hex digest of the md5 hash of the file's content. str. 118 | st_ctime: posix file creation time. float compatible. 119 | content_type: content type. str. 120 | metadata: a str->str dict of user specified options when creating 121 | the file. Possible keys are x-goog-meta-, content-disposition, 122 | content-encoding, and cache-control. 123 | is_dir: True if this represents a directory. False if this is a real file. 124 | """ 125 | self.filename = filename 126 | self.is_dir = is_dir 127 | self.st_size = None 128 | self.st_ctime = None 129 | self.etag = None 130 | self.content_type = content_type 131 | self.metadata = metadata 132 | 133 | if not is_dir: 134 | self.st_size = long(st_size) 135 | self.st_ctime = float(st_ctime) 136 | if etag[0] == '"' and etag[-1] == '"': 137 | etag = etag[1:-1] 138 | self.etag = etag 139 | 140 | def __repr__(self): 141 | if self.is_dir: 142 | return '(directory: %s)' % self.filename 143 | 144 | return ( 145 | '(filename: %(filename)s, st_size: %(st_size)s, ' 146 | 'st_ctime: %(st_ctime)s, etag: %(etag)s, ' 147 | 'content_type: %(content_type)s, ' 148 | 'metadata: %(metadata)s)' % 149 | dict(filename=self.filename, 150 | st_size=self.st_size, 151 | st_ctime=self.st_ctime, 152 | etag=self.etag, 153 | content_type=self.content_type, 154 | metadata=self.metadata)) 155 | 156 | def __cmp__(self, other): 157 | if not isinstance(other, self.__class__): 158 | raise ValueError('Argument to cmp must have the same type. ' 159 | 'Expect %s, got %s', self.__class__.__name__, 160 | other.__class__.__name__) 161 | if self.filename > other.filename: 162 | return 1 163 | elif self.filename < other.filename: 164 | return -1 165 | return 0 166 | 167 | def __hash__(self): 168 | if self.etag: 169 | return hash(self.etag) 170 | return hash(self.filename) 171 | 172 | 173 | CSFileStat = GCSFileStat 174 | 175 | 176 | def get_stored_content_length(headers): 177 | """Return the content length (in bytes) of the object as stored in GCS. 178 | 179 | x-goog-stored-content-length should always be present except when called via 180 | the local dev_appserver. Therefore if it is not present we default to the 181 | standard content-length header. 182 | 183 | Args: 184 | headers: a dict of headers from the http response. 185 | 186 | Returns: 187 | the stored content length. 188 | """ 189 | length = headers.get('x-goog-stored-content-length') 190 | if length is None: 191 | length = headers.get('content-length') 192 | return length 193 | 194 | 195 | def get_metadata(headers): 196 | """Get user defined options from HTTP response headers.""" 197 | return dict((k, v) for k, v in headers.iteritems() 198 | if any(k.lower().startswith(valid) for valid in _GCS_METADATA)) 199 | 200 | 201 | def validate_bucket_name(name): 202 | """Validate a Google Storage bucket name. 203 | 204 | Args: 205 | name: a Google Storage bucket name with no prefix or suffix. 206 | 207 | Raises: 208 | ValueError: if name is invalid. 209 | """ 210 | _validate_path(name) 211 | if not _GCS_BUCKET_REGEX.match(name): 212 | raise ValueError('Bucket should be 3-63 characters long using only a-z,' 213 | '0-9, underscore, dash or dot but got %s' % name) 214 | 215 | 216 | def validate_bucket_path(path): 217 | """Validate a Google Cloud Storage bucket path. 218 | 219 | Args: 220 | path: a Google Storage bucket path. It should have form '/bucket'. 221 | 222 | Raises: 223 | ValueError: if path is invalid. 224 | """ 225 | _validate_path(path) 226 | if not _GCS_BUCKET_PATH_REGEX.match(path): 227 | raise ValueError('Bucket should have format /bucket ' 228 | 'but got %s' % path) 229 | 230 | 231 | def validate_file_path(path): 232 | """Validate a Google Cloud Storage file path. 233 | 234 | Args: 235 | path: a Google Storage file path. It should have form '/bucket/filename'. 236 | 237 | Raises: 238 | ValueError: if path is invalid. 239 | """ 240 | _validate_path(path) 241 | if not _GCS_FULLPATH_REGEX.match(path): 242 | raise ValueError('Path should have format /bucket/filename ' 243 | 'but got %s' % path) 244 | 245 | 246 | def _process_path_prefix(path_prefix): 247 | """Validate and process a Google Cloud Stoarge path prefix. 248 | 249 | Args: 250 | path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix' 251 | or '/bucket/' or '/bucket'. 252 | 253 | Raises: 254 | ValueError: if path is invalid. 255 | 256 | Returns: 257 | a tuple of /bucket and prefix. prefix can be None. 258 | """ 259 | _validate_path(path_prefix) 260 | if not _GCS_PATH_PREFIX_REGEX.match(path_prefix): 261 | raise ValueError('Path prefix should have format /bucket, /bucket/, ' 262 | 'or /bucket/prefix but got %s.' % path_prefix) 263 | bucket_name_end = path_prefix.find('/', 1) 264 | bucket = path_prefix 265 | prefix = None 266 | if bucket_name_end != -1: 267 | bucket = path_prefix[:bucket_name_end] 268 | prefix = path_prefix[bucket_name_end + 1:] or None 269 | return bucket, prefix 270 | 271 | 272 | def _validate_path(path): 273 | """Basic validation of Google Storage paths. 274 | 275 | Args: 276 | path: a Google Storage path. It should have form '/bucket/filename' 277 | or '/bucket'. 278 | 279 | Raises: 280 | ValueError: if path is invalid. 281 | TypeError: if path is not of type basestring. 282 | """ 283 | if not path: 284 | raise ValueError('Path is empty') 285 | if not isinstance(path, basestring): 286 | raise TypeError('Path should be a string but is %s (%s).' % 287 | (path.__class__, path)) 288 | 289 | 290 | def validate_options(options): 291 | """Validate Google Cloud Storage options. 292 | 293 | Args: 294 | options: a str->basestring dict of options to pass to Google Cloud Storage. 295 | 296 | Raises: 297 | ValueError: if option is not supported. 298 | TypeError: if option is not of type str or value of an option 299 | is not of type basestring. 300 | """ 301 | if not options: 302 | return 303 | 304 | for k, v in options.iteritems(): 305 | if not isinstance(k, str): 306 | raise TypeError('option %r should be a str.' % k) 307 | if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS): 308 | raise ValueError('option %s is not supported.' % k) 309 | if not isinstance(v, basestring): 310 | raise TypeError('value %r for option %s should be of type basestring.' % 311 | (v, k)) 312 | 313 | 314 | def http_time_to_posix(http_time): 315 | """Convert HTTP time format to posix time. 316 | 317 | See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1 318 | for http time format. 319 | 320 | Args: 321 | http_time: time in RFC 2616 format. e.g. 322 | "Mon, 20 Nov 1995 19:12:08 GMT". 323 | 324 | Returns: 325 | A float of secs from unix epoch. 326 | """ 327 | if http_time is not None: 328 | return email_utils.mktime_tz(email_utils.parsedate_tz(http_time)) 329 | 330 | 331 | def posix_time_to_http(posix_time): 332 | """Convert posix time to HTML header time format. 333 | 334 | Args: 335 | posix_time: unix time. 336 | 337 | Returns: 338 | A datatime str in RFC 2616 format. 339 | """ 340 | if posix_time: 341 | return email_utils.formatdate(posix_time, usegmt=True) 342 | 343 | 344 | _DT_FORMAT = '%Y-%m-%dT%H:%M:%S' 345 | 346 | 347 | def dt_str_to_posix(dt_str): 348 | """format str to posix. 349 | 350 | datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ, 351 | e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator 352 | between date and time when they are on the same line. 353 | Z indicates UTC (zero meridian). 354 | 355 | A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html 356 | 357 | This is used to parse LastModified node from GCS's GET bucket XML response. 358 | 359 | Args: 360 | dt_str: A datetime str. 361 | 362 | Returns: 363 | A float of secs from unix epoch. By posix definition, epoch is midnight 364 | 1970/1/1 UTC. 365 | """ 366 | parsable, _ = dt_str.split('.') 367 | dt = datetime.datetime.strptime(parsable, _DT_FORMAT) 368 | return calendar.timegm(dt.utctimetuple()) 369 | 370 | 371 | def posix_to_dt_str(posix): 372 | """Reverse of str_to_datetime. 373 | 374 | This is used by GCS stub to generate GET bucket XML response. 375 | 376 | Args: 377 | posix: A float of secs from unix epoch. 378 | 379 | Returns: 380 | A datetime str. 381 | """ 382 | dt = datetime.datetime.utcfromtimestamp(posix) 383 | dt_str = dt.strftime(_DT_FORMAT) 384 | return dt_str + '.000Z' 385 | 386 | 387 | def local_run(): 388 | """Whether we should hit GCS dev appserver stub.""" 389 | server_software = os.environ.get('SERVER_SOFTWARE') 390 | if server_software is None: 391 | return True 392 | if 'remote_api' in server_software: 393 | return False 394 | if server_software.startswith(('Development', 'testutil')): 395 | return True 396 | return False 397 | 398 | 399 | def local_api_url(): 400 | """Return URL for GCS emulation on dev appserver.""" 401 | return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT) 402 | 403 | 404 | def memory_usage(method): 405 | """Log memory usage before and after a method.""" 406 | def wrapper(*args, **kwargs): 407 | logging.info('Memory before method %s is %s.', 408 | method.__name__, runtime.memory_usage().current()) 409 | result = method(*args, **kwargs) 410 | logging.info('Memory after method %s is %s', 411 | method.__name__, runtime.memory_usage().current()) 412 | return result 413 | return wrapper 414 | 415 | 416 | def _add_ns(tagname): 417 | return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS, 418 | 'tag': tagname} 419 | 420 | 421 | _T_CONTENTS = _add_ns('Contents') 422 | _T_LAST_MODIFIED = _add_ns('LastModified') 423 | _T_ETAG = _add_ns('ETag') 424 | _T_KEY = _add_ns('Key') 425 | _T_SIZE = _add_ns('Size') 426 | _T_PREFIX = _add_ns('Prefix') 427 | _T_COMMON_PREFIXES = _add_ns('CommonPrefixes') 428 | _T_NEXT_MARKER = _add_ns('NextMarker') 429 | _T_IS_TRUNCATED = _add_ns('IsTruncated') 430 | -------------------------------------------------------------------------------- /server/scripts/cloudstorage/cloudstorage_api.py: -------------------------------------------------------------------------------- 1 | # Copyright 2012 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | 15 | """File Interface for Google Cloud Storage.""" 16 | 17 | 18 | 19 | from __future__ import with_statement 20 | 21 | 22 | 23 | __all__ = ['delete', 24 | 'listbucket', 25 | 'open', 26 | 'stat', 27 | ] 28 | 29 | import logging 30 | import StringIO 31 | import urllib 32 | import xml.etree.cElementTree as ET 33 | from . import api_utils 34 | from . import common 35 | from . import errors 36 | from . import storage_api 37 | 38 | 39 | 40 | def open(filename, 41 | mode='r', 42 | content_type=None, 43 | options=None, 44 | read_buffer_size=storage_api.ReadBuffer.DEFAULT_BUFFER_SIZE, 45 | retry_params=None, 46 | _account_id=None): 47 | """Opens a Google Cloud Storage file and returns it as a File-like object. 48 | 49 | Args: 50 | filename: A Google Cloud Storage filename of form '/bucket/filename'. 51 | mode: 'r' for reading mode. 'w' for writing mode. 52 | In reading mode, the file must exist. In writing mode, a file will 53 | be created or be overrode. 54 | content_type: The MIME type of the file. str. Only valid in writing mode. 55 | options: A str->basestring dict to specify additional headers to pass to 56 | GCS e.g. {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}. 57 | Supported options are x-goog-acl, x-goog-meta-, cache-control, 58 | content-disposition, and content-encoding. 59 | Only valid in writing mode. 60 | See https://developers.google.com/storage/docs/reference-headers 61 | for details. 62 | read_buffer_size: The buffer size for read. Read keeps a buffer 63 | and prefetches another one. To minimize blocking for large files, 64 | always read by buffer size. To minimize number of RPC requests for 65 | small files, set a large buffer size. Max is 30MB. 66 | retry_params: An instance of api_utils.RetryParams for subsequent calls 67 | to GCS from this file handle. If None, the default one is used. 68 | _account_id: Internal-use only. 69 | 70 | Returns: 71 | A reading or writing buffer that supports File-like interface. Buffer 72 | must be closed after operations are done. 73 | 74 | Raises: 75 | errors.AuthorizationError: if authorization failed. 76 | errors.NotFoundError: if an object that's expected to exist doesn't. 77 | ValueError: invalid open mode or if content_type or options are specified 78 | in reading mode. 79 | """ 80 | common.validate_file_path(filename) 81 | api = storage_api._get_storage_api(retry_params=retry_params, 82 | account_id=_account_id) 83 | filename = api_utils._quote_filename(filename) 84 | 85 | if mode == 'w': 86 | common.validate_options(options) 87 | return storage_api.StreamingBuffer(api, filename, content_type, options) 88 | elif mode == 'r': 89 | if content_type or options: 90 | raise ValueError('Options and content_type can only be specified ' 91 | 'for writing mode.') 92 | return storage_api.ReadBuffer(api, 93 | filename, 94 | buffer_size=read_buffer_size) 95 | else: 96 | raise ValueError('Invalid mode %s.' % mode) 97 | 98 | 99 | def delete(filename, retry_params=None, _account_id=None): 100 | """Delete a Google Cloud Storage file. 101 | 102 | Args: 103 | filename: A Google Cloud Storage filename of form '/bucket/filename'. 104 | retry_params: An api_utils.RetryParams for this call to GCS. If None, 105 | the default one is used. 106 | _account_id: Internal-use only. 107 | 108 | Raises: 109 | errors.NotFoundError: if the file doesn't exist prior to deletion. 110 | """ 111 | api = storage_api._get_storage_api(retry_params=retry_params, 112 | account_id=_account_id) 113 | common.validate_file_path(filename) 114 | filename = api_utils._quote_filename(filename) 115 | status, resp_headers, content = api.delete_object(filename) 116 | errors.check_status(status, [204], filename, resp_headers=resp_headers, 117 | body=content) 118 | 119 | 120 | def stat(filename, retry_params=None, _account_id=None): 121 | """Get GCSFileStat of a Google Cloud storage file. 122 | 123 | Args: 124 | filename: A Google Cloud Storage filename of form '/bucket/filename'. 125 | retry_params: An api_utils.RetryParams for this call to GCS. If None, 126 | the default one is used. 127 | _account_id: Internal-use only. 128 | 129 | Returns: 130 | a GCSFileStat object containing info about this file. 131 | 132 | Raises: 133 | errors.AuthorizationError: if authorization failed. 134 | errors.NotFoundError: if an object that's expected to exist doesn't. 135 | """ 136 | common.validate_file_path(filename) 137 | api = storage_api._get_storage_api(retry_params=retry_params, 138 | account_id=_account_id) 139 | status, headers, content = api.head_object( 140 | api_utils._quote_filename(filename)) 141 | errors.check_status(status, [200], filename, resp_headers=headers, 142 | body=content) 143 | file_stat = common.GCSFileStat( 144 | filename=filename, 145 | st_size=common.get_stored_content_length(headers), 146 | st_ctime=common.http_time_to_posix(headers.get('last-modified')), 147 | etag=headers.get('etag'), 148 | content_type=headers.get('content-type'), 149 | metadata=common.get_metadata(headers)) 150 | 151 | return file_stat 152 | 153 | 154 | def _copy2(src, dst, metadata=None, retry_params=None): 155 | """Copy the file content from src to dst. 156 | 157 | Internal use only! 158 | 159 | Args: 160 | src: /bucket/filename 161 | dst: /bucket/filename 162 | metadata: a dict of metadata for this copy. If None, old metadata is copied. 163 | For example, {'x-goog-meta-foo': 'bar'}. 164 | retry_params: An api_utils.RetryParams for this call to GCS. If None, 165 | the default one is used. 166 | 167 | Raises: 168 | errors.AuthorizationError: if authorization failed. 169 | errors.NotFoundError: if an object that's expected to exist doesn't. 170 | """ 171 | common.validate_file_path(src) 172 | common.validate_file_path(dst) 173 | 174 | if metadata is None: 175 | metadata = {} 176 | copy_meta = 'COPY' 177 | else: 178 | copy_meta = 'REPLACE' 179 | metadata.update({'x-goog-copy-source': src, 180 | 'x-goog-metadata-directive': copy_meta}) 181 | 182 | api = storage_api._get_storage_api(retry_params=retry_params) 183 | status, resp_headers, content = api.put_object( 184 | api_utils._quote_filename(dst), headers=metadata) 185 | errors.check_status(status, [200], src, metadata, resp_headers, body=content) 186 | 187 | 188 | def listbucket(path_prefix, marker=None, prefix=None, max_keys=None, 189 | delimiter=None, retry_params=None, _account_id=None): 190 | """Returns a GCSFileStat iterator over a bucket. 191 | 192 | Optional arguments can limit the result to a subset of files under bucket. 193 | 194 | This function has two modes: 195 | 1. List bucket mode: Lists all files in the bucket without any concept of 196 | hierarchy. GCS doesn't have real directory hierarchies. 197 | 2. Directory emulation mode: If you specify the 'delimiter' argument, 198 | it is used as a path separator to emulate a hierarchy of directories. 199 | In this mode, the "path_prefix" argument should end in the delimiter 200 | specified (thus designates a logical directory). The logical directory's 201 | contents, both files and subdirectories, are listed. The names of 202 | subdirectories returned will end with the delimiter. So listbucket 203 | can be called with the subdirectory name to list the subdirectory's 204 | contents. 205 | 206 | Args: 207 | path_prefix: A Google Cloud Storage path of format "/bucket" or 208 | "/bucket/prefix". Only objects whose fullpath starts with the 209 | path_prefix will be returned. 210 | marker: Another path prefix. Only objects whose fullpath starts 211 | lexicographically after marker will be returned (exclusive). 212 | prefix: Deprecated. Use path_prefix. 213 | max_keys: The limit on the number of objects to return. int. 214 | For best performance, specify max_keys only if you know how many objects 215 | you want. Otherwise, this method requests large batches and handles 216 | pagination for you. 217 | delimiter: Use to turn on directory mode. str of one or multiple chars 218 | that your bucket uses as its directory separator. 219 | retry_params: An api_utils.RetryParams for this call to GCS. If None, 220 | the default one is used. 221 | _account_id: Internal-use only. 222 | 223 | Examples: 224 | For files "/bucket/a", 225 | "/bucket/bar/1" 226 | "/bucket/foo", 227 | "/bucket/foo/1", "/bucket/foo/2/1", "/bucket/foo/3/1", 228 | 229 | Regular mode: 230 | listbucket("/bucket/f", marker="/bucket/foo/1") 231 | will match "/bucket/foo/2/1", "/bucket/foo/3/1". 232 | 233 | Directory mode: 234 | listbucket("/bucket/", delimiter="/") 235 | will match "/bucket/a, "/bucket/bar/" "/bucket/foo", "/bucket/foo/". 236 | listbucket("/bucket/foo/", delimiter="/") 237 | will match "/bucket/foo/1", "/bucket/foo/2/", "/bucket/foo/3/" 238 | 239 | Returns: 240 | Regular mode: 241 | A GCSFileStat iterator over matched files ordered by filename. 242 | The iterator returns GCSFileStat objects. filename, etag, st_size, 243 | st_ctime, and is_dir are set. 244 | 245 | Directory emulation mode: 246 | A GCSFileStat iterator over matched files and directories ordered by 247 | name. The iterator returns GCSFileStat objects. For directories, 248 | only the filename and is_dir fields are set. 249 | 250 | The last name yielded can be used as next call's marker. 251 | """ 252 | if prefix: 253 | common.validate_bucket_path(path_prefix) 254 | bucket = path_prefix 255 | else: 256 | bucket, prefix = common._process_path_prefix(path_prefix) 257 | 258 | if marker and marker.startswith(bucket): 259 | marker = marker[len(bucket) + 1:] 260 | 261 | api = storage_api._get_storage_api(retry_params=retry_params, 262 | account_id=_account_id) 263 | options = {} 264 | if marker: 265 | options['marker'] = marker 266 | if max_keys: 267 | options['max-keys'] = max_keys 268 | if prefix: 269 | options['prefix'] = prefix 270 | if delimiter: 271 | options['delimiter'] = delimiter 272 | 273 | return _Bucket(api, bucket, options) 274 | 275 | 276 | class _Bucket(object): 277 | """A wrapper for a GCS bucket as the return value of listbucket.""" 278 | 279 | def __init__(self, api, path, options): 280 | """Initialize. 281 | 282 | Args: 283 | api: storage_api instance. 284 | path: bucket path of form '/bucket'. 285 | options: a dict of listbucket options. Please see listbucket doc. 286 | """ 287 | self._init(api, path, options) 288 | 289 | def _init(self, api, path, options): 290 | self._api = api 291 | self._path = path 292 | self._options = options.copy() 293 | self._get_bucket_fut = self._api.get_bucket_async( 294 | self._path + '?' + urllib.urlencode(self._options)) 295 | self._last_yield = None 296 | self._new_max_keys = self._options.get('max-keys') 297 | 298 | def __getstate__(self): 299 | options = self._options 300 | if self._last_yield: 301 | options['marker'] = self._last_yield.filename[len(self._path) + 1:] 302 | if self._new_max_keys is not None: 303 | options['max-keys'] = self._new_max_keys 304 | return {'api': self._api, 305 | 'path': self._path, 306 | 'options': options} 307 | 308 | def __setstate__(self, state): 309 | self._init(state['api'], state['path'], state['options']) 310 | 311 | def __iter__(self): 312 | """Iter over the bucket. 313 | 314 | Yields: 315 | GCSFileStat: a GCSFileStat for an object in the bucket. 316 | They are ordered by GCSFileStat.filename. 317 | """ 318 | total = 0 319 | max_keys = self._options.get('max-keys') 320 | 321 | while self._get_bucket_fut: 322 | status, resp_headers, content = self._get_bucket_fut.get_result() 323 | errors.check_status(status, [200], self._path, resp_headers=resp_headers, 324 | body=content, extras=self._options) 325 | 326 | if self._should_get_another_batch(content): 327 | self._get_bucket_fut = self._api.get_bucket_async( 328 | self._path + '?' + urllib.urlencode(self._options)) 329 | else: 330 | self._get_bucket_fut = None 331 | 332 | root = ET.fromstring(content) 333 | dirs = self._next_dir_gen(root) 334 | files = self._next_file_gen(root) 335 | next_file = files.next() 336 | next_dir = dirs.next() 337 | 338 | while ((max_keys is None or total < max_keys) and 339 | not (next_file is None and next_dir is None)): 340 | total += 1 341 | if next_file is None: 342 | self._last_yield = next_dir 343 | next_dir = dirs.next() 344 | elif next_dir is None: 345 | self._last_yield = next_file 346 | next_file = files.next() 347 | elif next_dir < next_file: 348 | self._last_yield = next_dir 349 | next_dir = dirs.next() 350 | elif next_file < next_dir: 351 | self._last_yield = next_file 352 | next_file = files.next() 353 | else: 354 | logging.error( 355 | 'Should never reach. next file is %r. next dir is %r.', 356 | next_file, next_dir) 357 | if self._new_max_keys: 358 | self._new_max_keys -= 1 359 | yield self._last_yield 360 | 361 | def _next_file_gen(self, root): 362 | """Generator for next file element in the document. 363 | 364 | Args: 365 | root: root element of the XML tree. 366 | 367 | Yields: 368 | GCSFileStat for the next file. 369 | """ 370 | for e in root.getiterator(common._T_CONTENTS): 371 | st_ctime, size, etag, key = None, None, None, None 372 | for child in e.getiterator('*'): 373 | if child.tag == common._T_LAST_MODIFIED: 374 | st_ctime = common.dt_str_to_posix(child.text) 375 | elif child.tag == common._T_ETAG: 376 | etag = child.text 377 | elif child.tag == common._T_SIZE: 378 | size = child.text 379 | elif child.tag == common._T_KEY: 380 | key = child.text 381 | yield common.GCSFileStat(self._path + '/' + key, 382 | size, etag, st_ctime) 383 | e.clear() 384 | yield None 385 | 386 | def _next_dir_gen(self, root): 387 | """Generator for next directory element in the document. 388 | 389 | Args: 390 | root: root element in the XML tree. 391 | 392 | Yields: 393 | GCSFileStat for the next directory. 394 | """ 395 | for e in root.getiterator(common._T_COMMON_PREFIXES): 396 | yield common.GCSFileStat( 397 | self._path + '/' + e.find(common._T_PREFIX).text, 398 | st_size=None, etag=None, st_ctime=None, is_dir=True) 399 | e.clear() 400 | yield None 401 | 402 | def _should_get_another_batch(self, content): 403 | """Whether to issue another GET bucket call. 404 | 405 | Args: 406 | content: response XML. 407 | 408 | Returns: 409 | True if should, also update self._options for the next request. 410 | False otherwise. 411 | """ 412 | if ('max-keys' in self._options and 413 | self._options['max-keys'] <= common._MAX_GET_BUCKET_RESULT): 414 | return False 415 | 416 | elements = self._find_elements( 417 | content, set([common._T_IS_TRUNCATED, 418 | common._T_NEXT_MARKER])) 419 | if elements.get(common._T_IS_TRUNCATED, 'false').lower() != 'true': 420 | return False 421 | 422 | next_marker = elements.get(common._T_NEXT_MARKER) 423 | if next_marker is None: 424 | self._options.pop('marker', None) 425 | return False 426 | self._options['marker'] = next_marker 427 | return True 428 | 429 | def _find_elements(self, result, elements): 430 | """Find interesting elements from XML. 431 | 432 | This function tries to only look for specified elements 433 | without parsing the entire XML. The specified elements is better 434 | located near the beginning. 435 | 436 | Args: 437 | result: response XML. 438 | elements: a set of interesting element tags. 439 | 440 | Returns: 441 | A dict from element tag to element value. 442 | """ 443 | element_mapping = {} 444 | result = StringIO.StringIO(result) 445 | for _, e in ET.iterparse(result, events=('end',)): 446 | if not elements: 447 | break 448 | if e.tag in elements: 449 | element_mapping[e.tag] = e.text 450 | elements.remove(e.tag) 451 | return element_mapping 452 | -------------------------------------------------------------------------------- /server/scripts/redirector.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 2 | # for details. All rights reserved. Use of this source code is governed by a 3 | # BSD-style license that can be found in the LICENSE file. 4 | 5 | import logging 6 | import re 7 | import json 8 | from webapp2 import * 9 | from webapp2_extras.routes import DomainRoute 10 | from datetime import datetime, timedelta 11 | from google.appengine.ext import blobstore 12 | from google.appengine.ext.webapp import blobstore_handlers 13 | from google.appengine.api import memcache 14 | import cloudstorage 15 | 16 | ONE_HOUR = 60 * 60 17 | ONE_DAY = ONE_HOUR * 24 18 | ONE_WEEK = ONE_DAY * 7 19 | 20 | # for redirects below 21 | ONLY_DART_LIB = re.compile("^dart:([a-zA-Z0-9_]+)$") 22 | LIB_NAME_AND_CLASS_NAME = re.compile("^dart[:-]([^\.]+)\.(.+)$") 23 | 24 | class VersionInfo(object): 25 | """Small helper class holding information about the last version seen and the 26 | last time the version was checked for.""" 27 | def __init__(self, update_interval): 28 | # The most recent version for this channel. 29 | self.version = None 30 | # The time this version was found. 31 | self.last_check = None 32 | self.update_interval = update_interval 33 | 34 | def should_update(self): 35 | """Tests to see if the last check was long enough past the update interval 36 | that we should update the version.""" 37 | return datetime.now() > self.last_check + self.update_interval 38 | 39 | class ApiDocs(blobstore_handlers.BlobstoreDownloadHandler): 40 | GOOGLE_STORAGE = '/dartlang-api-docs/channels' 41 | GOOGLE_STORAGE_NEW = '/dartlang-api-docs/gen-dartdocs' 42 | 43 | def version_file_loc(self, channel): 44 | return '%s/%s/latest.txt' % (ApiDocs.GOOGLE_STORAGE, channel) 45 | 46 | # Dictionary of versions holding version information of the latest recorded 47 | # version number and the time when it was recorded. 48 | latest_versions = { 49 | 'main': VersionInfo(timedelta(minutes=30)), 50 | 'dev': VersionInfo(timedelta(hours=6)), 51 | 'beta': VersionInfo(timedelta(hours=12)), 52 | 'stable': VersionInfo(timedelta(days=1)), 53 | } 54 | 55 | def recheck_latest_version(self, channel): 56 | """Check Google storage to determine the latest version file in a given 57 | channel.""" 58 | data = None 59 | version_file_location = self.version_file_loc(channel) 60 | with cloudstorage.open(version_file_location, 'r') as f: 61 | line = f.readline() 62 | data = line.replace('\x00', '') 63 | revision = data 64 | ApiDocs.latest_versions[channel].version = revision 65 | ApiDocs.latest_versions[channel].last_check = datetime.now() 66 | return revision 67 | 68 | def get_latest_version(self, channel): 69 | """Determine what the latest version number is for this particular channel. 70 | We do a bit of caching so that we're not constantly pinging for the latest 71 | version of stable, for example.""" 72 | forced_reload = (self.request and self.request.get('force_reload')) 73 | version_info = ApiDocs.latest_versions[channel] 74 | if (forced_reload or 75 | version_info.version is None or version_info.should_update()): 76 | return self.recheck_latest_version(channel) 77 | else: 78 | return version_info.version 79 | 80 | def get_cache_age(self, path): 81 | if re.search(r'(png|jpg)$', path): 82 | age = ONE_DAY 83 | elif path.endswith('.ico'): 84 | age = ONE_WEEK 85 | else: 86 | age = ONE_HOUR 87 | return age 88 | 89 | def build_gcs_path(self, version_num, postfix, channel): 90 | """Build the path to the information on Google Storage.""" 91 | suffix = channel 92 | # Support for bleeding edge versions before git hashes (October 26, 2022). 93 | if channel == 'be' and version_num.isdigit() and len(version_num) != 40: 94 | suffix = 'builds' 95 | index = version_num.find('.') 96 | if index != -1: 97 | nums = version_num.split('.') 98 | release_num = nums[1] 99 | if nums[0] == '1' and int(release_num) < 15: 100 | return '%s/%s/%s' % (ApiDocs.GOOGLE_STORAGE_NEW, version_num, postfix) 101 | return '%s/%s/%s/%s' % (ApiDocs.GOOGLE_STORAGE_NEW, suffix, version_num, postfix) 102 | 103 | def resolve_doc_path(self, channel): 104 | """Given the request URL, determine what specific docs version we should 105 | actually display.""" 106 | path = None 107 | 108 | if channel: 109 | length = len(channel) + 2 110 | else: 111 | length = 1 112 | postfix = self.request.path[length:] 113 | index = postfix.find('/') 114 | if index != -1: 115 | version_num = postfix[:index] 116 | postfix = postfix[index+1:] 117 | if postfix.startswith('/'): 118 | postfix = postfix[1:] 119 | else: 120 | if channel: 121 | version_num = self.get_latest_version(channel) 122 | else: 123 | channel = 'stable' 124 | version_num = self.get_latest_version(channel) 125 | postfix = 'index.html' 126 | path = self.build_gcs_path(version_num, postfix, channel) 127 | logging.debug('build_gcs_path("%s", "%s", "%s") -> "%s"' 128 | % (version_num, postfix, channel, path)) 129 | return path 130 | 131 | def get_channel(self): 132 | """Quick accessor to examine a request and determine what channel 133 | (main/beta/dev/stable) we're looking at. Return None if we have a weird 134 | unexpected URL.""" 135 | parts = self.request.path.split('/') 136 | if len(parts) > 0: 137 | if len(parts) > 3 and self.request.path.startswith('/apidocs/channels/'): 138 | channel = parts[3] # ['', 'apidocs', 'channels', '', ...] 139 | else: 140 | channel = parts[1] # ['', '', ...] 141 | if channel in ApiDocs.latest_versions: 142 | return channel 143 | return None 144 | 145 | def get(self, *args, **kwargs): 146 | """The main entry point for handling the URL for those with ApiDocs as the 147 | handler. See http://webapp-improved.appspot.com/api/webapp2.html?highlight= 148 | redirecthandler#webapp2.RedirectHandler.get. 149 | 150 | Arguments: 151 | - args: Positional arguments passed to this URL handler 152 | - kwargs: Dictionary arguments passed to the hander; expecting at least one 153 | item in the dictionary with a key of 'path', which was populated from the 154 | regular expression matching in Route.""" 155 | channel = self.get_channel() 156 | 157 | # this is serving all paths, so check to make sure version is valid pattern 158 | # else redirect to stable 159 | # /dev/1.15.0-dev.5.1/index.html 160 | if channel: 161 | length = len(channel) + 2 162 | else: 163 | length = 1 164 | request = self.request.path[length:] 165 | 166 | index = request.find('/') 167 | if index != -1: 168 | version_num = request[:index] 169 | match = re.match(r'^-?([0-9]+|[0-9a-z]{40})$', version_num) 170 | if match: 171 | if len(version_num) == 40 or int(version_num) > 136051: 172 | path = request[index+1:] 173 | if not channel: 174 | return self.redirect('/main/%s/%s' % (version_num, path)) 175 | else: 176 | return self.redirect('/stable') 177 | else: 178 | match = re.match(r'(\d+\.){2}\d+([\+-]([\.a-zA-Z0-9-\+])*)?', version_num) 179 | latest = self.get_latest_version(channel or 'stable') 180 | if match: 181 | if not channel: 182 | return self.redirect('/stable/%s/index.html' % latest) 183 | else: 184 | return self.redirect('/%s/%s/%s' % (channel or 'stable', latest, request)) 185 | else: 186 | match = re.match(r'(\d+\.){2}\d+([\+-]([\.a-zA-Z0-9-\+])*)?', request) 187 | if match: 188 | return self.redirect('/%s/index.html' % request) 189 | else: 190 | return self.redirect('/stable') 191 | 192 | my_path = self.resolve_doc_path(channel) 193 | 194 | gcs_path = '/gs%s' % my_path 195 | if not gcs_path: 196 | self.error(404) 197 | return 198 | 199 | gs_key = blobstore.create_gs_key(gcs_path) 200 | age = self.get_cache_age(gcs_path) 201 | 202 | self.response.headers['Cache-Control'] = 'max-age=' + \ 203 | str(age) + ',s-maxage=' + str(age) 204 | 205 | self.response.headers['Access-Control-Allow-Origin'] = '*' 206 | 207 | # is there a better way to check if a file exists in cloud storage? 208 | # AE will serve a 500 if the file doesn't exist, but that should 209 | # be a 404 210 | 211 | path_exists = memcache.get(gcs_path) 212 | if path_exists == "1": 213 | self.send_blob(gs_key) 214 | else: 215 | try: 216 | # just check for existence 217 | cloudstorage.open(my_path, 'r').close() 218 | memcache.add(key=gcs_path, value="1", time=ONE_DAY) 219 | self.send_blob(gs_key) 220 | except Exception: 221 | memcache.add(key=gcs_path, value="0", time=ONE_DAY) 222 | logging.debug('Could not open ' + gcs_path + ', sending 404') 223 | self.error(404) 224 | 225 | def redir_dom(handler, *args, **kwargs): 226 | return '/stable/dart-html/index.html' 227 | 228 | def redir_pkgs(handler, *args, **kwargs): 229 | return 'http://www.dartdocs.org/documentation/' + kwargs['pkg'] + '/latest' 230 | 231 | # Redirect old apidoc URIs 232 | def redir_old(kwargs, channel): 233 | """Crufty old code that hasn't been touched in a long time. Still here for 234 | legacy reasons to not break old links. :-/ Let sleeping dogs lie?""" 235 | old_path = kwargs['path'][1:] 236 | if (old_path == ''): 237 | return '/apidocs/channels/stable/dartdoc-viewer/home' 238 | split = old_path.split('/') 239 | firstPart = split[0] 240 | if (len(split) > 1): 241 | secondPart = '.' + split[1] 242 | else: 243 | secondPart = '' 244 | packages = ['args', 'crypto', 'custom_element', 'fixnum', 'http_server', 245 | 'intl', 'json', 'logging', 'matcher', 'mime', 'mock', 'observe', 'path', 246 | 'polymer', 'polymer_expressions', 'sequence_zip', 'serialization', 247 | 'source_maps', 'template_binding', 'unittest', 'unmodifiable_collection', 248 | 'utf'] 249 | withNoDot = firstPart.split('.')[0] 250 | if withNoDot in packages: 251 | prefix = firstPart + '/' + firstPart 252 | else: 253 | prefix = firstPart.replace('_', ':', 1).replace('.html', '') 254 | # For old URLs like core/String.html. We know it's not a package, so 255 | # it ought to start with a dart: library 256 | if (not prefix.startswith("dart:")): 257 | prefix = "dart:" + prefix 258 | new_path = prefix + secondPart.replace('.html','') 259 | # Should be #! if we use that scheme 260 | return '/' + channel 261 | 262 | def redir_old_be(handler, *args, **kwargs): 263 | return redir_old(kwargs, 'be') 264 | 265 | def redir_old_dev(handler, *args, **kwargs): 266 | return redir_old(kwargs, 'dev') 267 | 268 | def redir_old_stable(handler, *args, **kwargs): 269 | return redir_old(kwargs, 'dev') 270 | 271 | def redir_channel_latest(channel, postfix): 272 | apidocs = ApiDocs() 273 | version_num = apidocs.get_latest_version('%s' % channel) 274 | return '/%s/%s/%s' % (channel, version_num, postfix) 275 | 276 | def redir_stable_latest(handler, *args, **kwargs): 277 | return redir_channel_latest('stable', 'index.html') 278 | 279 | def redir_dev_latest(handler, *args, **kwargs): 280 | return redir_channel_latest('dev', 'index.html') 281 | 282 | def redir_beta_latest(handler, *args, **kwargs): 283 | return redir_channel_latest('beta', 'index.html') 284 | 285 | def redir_main_latest(handler, *args, **kwargs): 286 | return redir_channel_latest('main', 'index.html') 287 | 288 | def redir_stable_path(handler, *args, **kwargs): 289 | postfix = kwargs['path'][1:] 290 | return redir_channel_latest('stable', postfix) 291 | 292 | def redir_dev_path(handler, *args, **kwargs): 293 | postfix = kwargs['path'][1:] 294 | return redir_channel_latest('dev', postfix) 295 | 296 | def redir_beta_path(handler, *args, **kwargs): 297 | postfix = kwargs['path'][1:] 298 | return redir_channel_latest('beta', postfix) 299 | 300 | # /apidocs/channels/stable/dartdoc-viewer/home => /stable 301 | # /apidocs/channels/stable/dartdoc-viewer/dart:math => /stable/dart-math/dart-math-library.html 302 | # /apidocs/channels/stable/dartdoc-viewer/dart[:-]async.Future => /stable/dart-async/Future-class.html 303 | def redir_name(handler, *args, **kwargs): 304 | channel = kwargs['channel'] 305 | postfix = kwargs['path'][1:] 306 | 307 | # /apidocs/channels/stable/dartdoc-viewer/home => /stable 308 | # /apidocs/channels/stable/dartdoc-viewer/ => /stable 309 | # /apidocs/channels/stable/dartdoc-viewer => /stable 310 | if postfix == 'home' or postfix == '': 311 | return '/%s' % (channel) 312 | 313 | # /apidocs/channels/stable/dartdoc-viewer/dart:math => /stable/dart-math/dart-math-library.html 314 | is_lib_page = ONLY_DART_LIB.match(postfix) 315 | if is_lib_page: 316 | name = postfix.replace(':', '-') 317 | return '/%s/%s/%s-library.html' % (channel, name, name) 318 | 319 | # /apidocs/channels/stable/dartdoc-viewer/dart[:-]async.Future => /stable/dart-async/Future-class.html 320 | is_lib_and_class = LIB_NAME_AND_CLASS_NAME.match(postfix) 321 | if is_lib_and_class: 322 | lib_name = 'dart-' + is_lib_and_class.group(1) 323 | class_name = is_lib_and_class.group(2) 324 | return '/%s/%s/%s-class.html' % (channel, lib_name, class_name) 325 | 326 | abort(404) 327 | 328 | def redir_bare_lib_name(handler, *args, **kwargs): 329 | version = kwargs['version'] 330 | libname = kwargs['libname'] 331 | 332 | # /1.12.0/dart-async => /1.12.0/dart-async/dart-async-library.html 333 | return '/%s/dart-%s/dart-%s-library.html' % (version, libname, libname) 334 | 335 | # /dart_core.html => /stable/dart-core/dart-core-library.html 336 | def redir_legacy_lib(handler, *args, **kwargs): 337 | libname = kwargs['libname'] 338 | return '/stable/dart-%s/dart-%s-library.html' % (libname, libname) 339 | 340 | # /dart_core/Iterable.html => /stable/dart-core/Iterable-class.html 341 | def redir_legacy_lib_class(handler, *args, **kwargs): 342 | libname = kwargs['libname'] 343 | classname = kwargs['classname'] 344 | return '/stable/dart-%s/%s-class.html' % (libname, classname) 345 | 346 | def redir_apidartdev(handler, *args, **kwargs): 347 | return 'https://api.dart.dev/%s' % (kwargs['path']) 348 | 349 | application = WSGIApplication( 350 | [ 351 | # Legacy domain name, redirect to new domain 352 | DomainRoute('api.dartlang.org', [ 353 | Route('/', RedirectHandler, 354 | defaults={'_uri': redir_apidartdev}), 355 | ]), 356 | # Legacy URL redirection schemes. 357 | # Redirect all old URL package requests to our updated URL scheme. 358 | # TODO(efortuna): Remove this line when pkg gets moved off of 359 | # api.dartlang.org. 360 | Route('/docs/pkg/<:/?>', 364 | RedirectHandler, defaults={'_uri': redir_pkgs, '_code': 302}), 365 | Route('/dom', RedirectHandler, defaults={'_uri': redir_dom}), 366 | Route('/docs/bleeding_edge', RedirectHandler, 367 | defaults={'_uri': '/be'}), 368 | 369 | # Data requests go to cloud storage 370 | Route('/apidocs/channels/be/docs', RedirectHandler, 371 | defaults={'_uri': '/be'}), 372 | Route('/apidocs/channels/beta/docs', RedirectHandler, 373 | defaults={'_uri': '/beta'}), 374 | Route('/apidocs/channels/dev/docs', RedirectHandler, 375 | defaults={'_uri': '/dev'}), 376 | Route('/apidocs/channels/stable/docs', RedirectHandler, 377 | defaults={'_uri': '/stable'}), 378 | 379 | Route('/stable/', RedirectHandler, 380 | defaults={'_uri': '/stable'}), 381 | Route('/latest', RedirectHandler, 382 | defaults={'_uri': '/stable'}), 383 | Route('/dev/', RedirectHandler, 384 | defaults={'_uri': '/dev'}), 385 | Route('/beta/', RedirectHandler, 386 | defaults={'_uri': '/beta'}), 387 | Route('/be/', RedirectHandler, 388 | defaults={'_uri': '/be'}), 389 | Route('/bleeding_edge', RedirectHandler, 390 | defaults={'_uri': '/be'}), 391 | Route('/be', RedirectHandler, 392 | defaults={'_uri': '/main'}), 393 | Route('/main/', RedirectHandler, 394 | defaults={'_uri': '/main'}), 395 | 396 | Route('/stable/latest', RedirectHandler, 397 | defaults={'_uri': '/stable'}), 398 | Route('/dev/latest', RedirectHandler, 399 | defaults={'_uri': '/dev'}), 400 | Route('/beta/latest', RedirectHandler, 401 | defaults={'_uri': '/beta'}), 402 | Route('/be/latest', RedirectHandler, 403 | defaults={'_uri': '/be'}), 404 | Route('/main/latest', RedirectHandler, 405 | defaults={'_uri': '/main'}), 406 | 407 | Route('/dart_.html', RedirectHandler, 408 | defaults={'_uri': redir_legacy_lib}), 409 | 410 | Route('/dart_/.html', RedirectHandler, 411 | defaults={'_uri': redir_legacy_lib_class}), 412 | 413 | # temp routing till stable docs are rolled out 414 | Route('/stable', RedirectHandler, 415 | defaults={'_uri': redir_stable_latest}), #ApiDocs), 416 | Route('/dev', RedirectHandler, 417 | defaults={'_uri': redir_dev_latest}), #ApiDocs), 418 | Route('/beta', RedirectHandler, 419 | defaults={'_uri': redir_beta_latest}),#ApiDocs), 420 | Route('/main', RedirectHandler, 421 | defaults={'_uri': redir_main_latest}),#ApiDocs), 422 | 423 | Route('/apidocs/channels//dartdoc-viewer', 424 | RedirectHandler, 425 | defaults={'_uri': redir_name}), 426 | 427 | Route('/docs/continuous', RedirectHandler, 428 | defaults={'_uri': '/be'}), 429 | Route('/docs/releases/latest', RedirectHandler, 430 | defaults={'_uri': '/stable'}), 431 | 432 | # Legacy handling: redirect old doc links to apidoc. 433 | Route('/docs/channels/be/latest', RedirectHandler, 434 | defaults={'_uri': redir_old_be}), 435 | Route('/docs/channels/dev/latest', RedirectHandler, 436 | defaults={'_uri': redir_old_dev}), 437 | Route('/docs/channels/stable/latest', RedirectHandler, 438 | defaults={'_uri': redir_old_stable}), 439 | Route('/docs/channels/be', RedirectHandler, 440 | defaults={'_uri': '/be'}), 441 | Route('/docs/channels/dev', RedirectHandler, 442 | defaults={'_uri': '/dev'}), 443 | Route('/docs/channels/stable', RedirectHandler, 444 | defaults={'_uri': '/stable'}), 445 | 446 | Route('//dart-', RedirectHandler, 447 | defaults={'_uri': redir_bare_lib_name}), 448 | 449 | Route('/', RedirectHandler, defaults={'_uri': '/stable'}), 450 | 451 | Route('', ApiDocs) 452 | ], 453 | debug=True) 454 | -------------------------------------------------------------------------------- /server/scripts/cloudstorage/storage_api.py: -------------------------------------------------------------------------------- 1 | # Copyright 2012 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | 15 | """Python wrappers for the Google Storage RESTful API.""" 16 | 17 | 18 | 19 | 20 | 21 | __all__ = ['ReadBuffer', 22 | 'StreamingBuffer', 23 | ] 24 | 25 | import collections 26 | import os 27 | import urlparse 28 | 29 | from . import api_utils 30 | from . import common 31 | from . import errors 32 | from . import rest_api 33 | 34 | try: 35 | from google.appengine.api import urlfetch 36 | from google.appengine.ext import ndb 37 | except ImportError: 38 | from google.appengine.api import urlfetch 39 | from google.appengine.ext import ndb 40 | 41 | 42 | 43 | def _get_storage_api(retry_params, account_id=None): 44 | """Returns storage_api instance for API methods. 45 | 46 | Args: 47 | retry_params: An instance of api_utils.RetryParams. If none, 48 | thread's default will be used. 49 | account_id: Internal-use only. 50 | 51 | Returns: 52 | A storage_api instance to handle urlfetch work to GCS. 53 | On dev appserver, this instance by default will talk to a local stub 54 | unless common.ACCESS_TOKEN is set. That token will be used to talk 55 | to the real GCS. 56 | """ 57 | 58 | 59 | api = _StorageApi(_StorageApi.full_control_scope, 60 | service_account_id=account_id, 61 | retry_params=retry_params) 62 | if common.local_run() and not common.get_access_token(): 63 | api.api_url = common.local_api_url() 64 | if common.get_access_token(): 65 | api.token = common.get_access_token() 66 | return api 67 | 68 | 69 | class _StorageApi(rest_api._RestApi): 70 | """A simple wrapper for the Google Storage RESTful API. 71 | 72 | WARNING: Do NOT directly use this api. It's an implementation detail 73 | and is subject to change at any release. 74 | 75 | All async methods have similar args and returns. 76 | 77 | Args: 78 | path: The path to the Google Storage object or bucket, e.g. 79 | '/mybucket/myfile' or '/mybucket'. 80 | **kwd: Options for urlfetch. e.g. 81 | headers={'content-type': 'text/plain'}, payload='blah'. 82 | 83 | Returns: 84 | A ndb Future. When fulfilled, future.get_result() should return 85 | a tuple of (status, headers, content) that represents a HTTP response 86 | of Google Cloud Storage XML API. 87 | """ 88 | 89 | api_url = 'https://storage.googleapis.com' 90 | read_only_scope = 'https://www.googleapis.com/auth/devstorage.read_only' 91 | read_write_scope = 'https://www.googleapis.com/auth/devstorage.read_write' 92 | full_control_scope = 'https://www.googleapis.com/auth/devstorage.full_control' 93 | 94 | def __getstate__(self): 95 | """Store state as part of serialization/pickling. 96 | 97 | Returns: 98 | A tuple (of dictionaries) with the state of this object 99 | """ 100 | return (super(_StorageApi, self).__getstate__(), {'api_url': self.api_url}) 101 | 102 | def __setstate__(self, state): 103 | """Restore state as part of deserialization/unpickling. 104 | 105 | Args: 106 | state: the tuple from a __getstate__ call 107 | """ 108 | superstate, localstate = state 109 | super(_StorageApi, self).__setstate__(superstate) 110 | self.api_url = localstate['api_url'] 111 | 112 | @api_utils._eager_tasklet 113 | @ndb.tasklet 114 | def do_request_async(self, url, method='GET', headers=None, payload=None, 115 | deadline=None, callback=None): 116 | """Inherit docs. 117 | 118 | This method translates urlfetch exceptions to more service specific ones. 119 | """ 120 | if headers is None: 121 | headers = {} 122 | if 'x-goog-api-version' not in headers: 123 | headers['x-goog-api-version'] = '2' 124 | headers['accept-encoding'] = 'gzip, *' 125 | try: 126 | resp_tuple = yield super(_StorageApi, self).do_request_async( 127 | url, method=method, headers=headers, payload=payload, 128 | deadline=deadline, callback=callback) 129 | except urlfetch.DownloadError, e: 130 | raise errors.TimeoutError( 131 | 'Request to Google Cloud Storage timed out.', e) 132 | 133 | raise ndb.Return(resp_tuple) 134 | 135 | 136 | def post_object_async(self, path, **kwds): 137 | """POST to an object.""" 138 | return self.do_request_async(self.api_url + path, 'POST', **kwds) 139 | 140 | def put_object_async(self, path, **kwds): 141 | """PUT an object.""" 142 | return self.do_request_async(self.api_url + path, 'PUT', **kwds) 143 | 144 | def get_object_async(self, path, **kwds): 145 | """GET an object. 146 | 147 | Note: No payload argument is supported. 148 | """ 149 | return self.do_request_async(self.api_url + path, 'GET', **kwds) 150 | 151 | def delete_object_async(self, path, **kwds): 152 | """DELETE an object. 153 | 154 | Note: No payload argument is supported. 155 | """ 156 | return self.do_request_async(self.api_url + path, 'DELETE', **kwds) 157 | 158 | def head_object_async(self, path, **kwds): 159 | """HEAD an object. 160 | 161 | Depending on request headers, HEAD returns various object properties, 162 | e.g. Content-Length, Last-Modified, and ETag. 163 | 164 | Note: No payload argument is supported. 165 | """ 166 | return self.do_request_async(self.api_url + path, 'HEAD', **kwds) 167 | 168 | def get_bucket_async(self, path, **kwds): 169 | """GET a bucket.""" 170 | return self.do_request_async(self.api_url + path, 'GET', **kwds) 171 | 172 | 173 | _StorageApi = rest_api.add_sync_methods(_StorageApi) 174 | 175 | 176 | class ReadBuffer(object): 177 | """A class for reading Google storage files.""" 178 | 179 | DEFAULT_BUFFER_SIZE = 1024 * 1024 180 | MAX_REQUEST_SIZE = 30 * DEFAULT_BUFFER_SIZE 181 | 182 | def __init__(self, 183 | api, 184 | path, 185 | buffer_size=DEFAULT_BUFFER_SIZE, 186 | max_request_size=MAX_REQUEST_SIZE): 187 | """Constructor. 188 | 189 | Args: 190 | api: A StorageApi instance. 191 | path: Quoted/escaped path to the object, e.g. /mybucket/myfile 192 | buffer_size: buffer size. The ReadBuffer keeps 193 | one buffer. But there may be a pending future that contains 194 | a second buffer. This size must be less than max_request_size. 195 | max_request_size: Max bytes to request in one urlfetch. 196 | """ 197 | self._api = api 198 | self._path = path 199 | self.name = api_utils._unquote_filename(path) 200 | self.closed = False 201 | 202 | assert buffer_size <= max_request_size 203 | self._buffer_size = buffer_size 204 | self._max_request_size = max_request_size 205 | self._offset = 0 206 | self._buffer = _Buffer() 207 | self._etag = None 208 | 209 | get_future = self._get_segment(0, self._buffer_size, check_response=False) 210 | 211 | status, headers, content = self._api.head_object(path) 212 | errors.check_status(status, [200], path, resp_headers=headers, body=content) 213 | self._file_size = long(common.get_stored_content_length(headers)) 214 | self._check_etag(headers.get('etag')) 215 | 216 | self._buffer_future = None 217 | 218 | if self._file_size != 0: 219 | content, check_response_closure = get_future.get_result() 220 | check_response_closure() 221 | self._buffer.reset(content) 222 | self._request_next_buffer() 223 | 224 | def __getstate__(self): 225 | """Store state as part of serialization/pickling. 226 | 227 | The contents of the read buffer are not stored, only the current offset for 228 | data read by the client. A new read buffer is established at unpickling. 229 | The head information for the object (file size and etag) are stored to 230 | reduce startup and ensure the file has not changed. 231 | 232 | Returns: 233 | A dictionary with the state of this object 234 | """ 235 | return {'api': self._api, 236 | 'path': self._path, 237 | 'buffer_size': self._buffer_size, 238 | 'request_size': self._max_request_size, 239 | 'etag': self._etag, 240 | 'size': self._file_size, 241 | 'offset': self._offset, 242 | 'closed': self.closed} 243 | 244 | def __setstate__(self, state): 245 | """Restore state as part of deserialization/unpickling. 246 | 247 | Args: 248 | state: the dictionary from a __getstate__ call 249 | 250 | Along with restoring the state, pre-fetch the next read buffer. 251 | """ 252 | self._api = state['api'] 253 | self._path = state['path'] 254 | self.name = api_utils._unquote_filename(self._path) 255 | self._buffer_size = state['buffer_size'] 256 | self._max_request_size = state['request_size'] 257 | self._etag = state['etag'] 258 | self._file_size = state['size'] 259 | self._offset = state['offset'] 260 | self._buffer = _Buffer() 261 | self.closed = state['closed'] 262 | self._buffer_future = None 263 | if self._remaining() and not self.closed: 264 | self._request_next_buffer() 265 | 266 | def __iter__(self): 267 | """Iterator interface. 268 | 269 | Note the ReadBuffer container itself is the iterator. It's 270 | (quote PEP0234) 271 | 'destructive: they consumes all the values and a second iterator 272 | cannot easily be created that iterates independently over the same values. 273 | You could open the file for the second time, or seek() to the beginning.' 274 | 275 | Returns: 276 | Self. 277 | """ 278 | return self 279 | 280 | def next(self): 281 | line = self.readline() 282 | if not line: 283 | raise StopIteration() 284 | return line 285 | 286 | def readline(self, size=-1): 287 | """Read one line delimited by '\n' from the file. 288 | 289 | A trailing newline character is kept in the string. It may be absent when a 290 | file ends with an incomplete line. If the size argument is non-negative, 291 | it specifies the maximum string size (counting the newline) to return. 292 | A negative size is the same as unspecified. Empty string is returned 293 | only when EOF is encountered immediately. 294 | 295 | Args: 296 | size: Maximum number of bytes to read. If not specified, readline stops 297 | only on '\n' or EOF. 298 | 299 | Returns: 300 | The data read as a string. 301 | 302 | Raises: 303 | IOError: When this buffer is closed. 304 | """ 305 | self._check_open() 306 | if size == 0 or not self._remaining(): 307 | return '' 308 | 309 | data_list = [] 310 | newline_offset = self._buffer.find_newline(size) 311 | while newline_offset < 0: 312 | data = self._buffer.read(size) 313 | size -= len(data) 314 | self._offset += len(data) 315 | data_list.append(data) 316 | if size == 0 or not self._remaining(): 317 | return ''.join(data_list) 318 | self._buffer.reset(self._buffer_future.get_result()) 319 | self._request_next_buffer() 320 | newline_offset = self._buffer.find_newline(size) 321 | 322 | data = self._buffer.read_to_offset(newline_offset + 1) 323 | self._offset += len(data) 324 | data_list.append(data) 325 | 326 | return ''.join(data_list) 327 | 328 | def read(self, size=-1): 329 | """Read data from RAW file. 330 | 331 | Args: 332 | size: Number of bytes to read as integer. Actual number of bytes 333 | read is always equal to size unless EOF is reached. If size is 334 | negative or unspecified, read the entire file. 335 | 336 | Returns: 337 | data read as str. 338 | 339 | Raises: 340 | IOError: When this buffer is closed. 341 | """ 342 | self._check_open() 343 | if not self._remaining(): 344 | return '' 345 | 346 | data_list = [] 347 | while True: 348 | remaining = self._buffer.remaining() 349 | if size >= 0 and size < remaining: 350 | data_list.append(self._buffer.read(size)) 351 | self._offset += size 352 | break 353 | else: 354 | size -= remaining 355 | self._offset += remaining 356 | data_list.append(self._buffer.read()) 357 | 358 | if self._buffer_future is None: 359 | if size < 0 or size >= self._remaining(): 360 | needs = self._remaining() 361 | else: 362 | needs = size 363 | data_list.extend(self._get_segments(self._offset, needs)) 364 | self._offset += needs 365 | break 366 | 367 | if self._buffer_future: 368 | self._buffer.reset(self._buffer_future.get_result()) 369 | self._buffer_future = None 370 | 371 | if self._buffer_future is None: 372 | self._request_next_buffer() 373 | return ''.join(data_list) 374 | 375 | def _remaining(self): 376 | return self._file_size - self._offset 377 | 378 | def _request_next_buffer(self): 379 | """Request next buffer. 380 | 381 | Requires self._offset and self._buffer are in consistent state. 382 | """ 383 | self._buffer_future = None 384 | next_offset = self._offset + self._buffer.remaining() 385 | if next_offset != self._file_size: 386 | self._buffer_future = self._get_segment(next_offset, 387 | self._buffer_size) 388 | 389 | def _get_segments(self, start, request_size): 390 | """Get segments of the file from Google Storage as a list. 391 | 392 | A large request is broken into segments to avoid hitting urlfetch 393 | response size limit. Each segment is returned from a separate urlfetch. 394 | 395 | Args: 396 | start: start offset to request. Inclusive. Have to be within the 397 | range of the file. 398 | request_size: number of bytes to request. 399 | 400 | Returns: 401 | A list of file segments in order 402 | """ 403 | if not request_size: 404 | return [] 405 | 406 | end = start + request_size 407 | futures = [] 408 | 409 | while request_size > self._max_request_size: 410 | futures.append(self._get_segment(start, self._max_request_size)) 411 | request_size -= self._max_request_size 412 | start += self._max_request_size 413 | if start < end: 414 | futures.append(self._get_segment(start, end-start)) 415 | return [fut.get_result() for fut in futures] 416 | 417 | @ndb.tasklet 418 | def _get_segment(self, start, request_size, check_response=True): 419 | """Get a segment of the file from Google Storage. 420 | 421 | Args: 422 | start: start offset of the segment. Inclusive. Have to be within the 423 | range of the file. 424 | request_size: number of bytes to request. Have to be small enough 425 | for a single urlfetch request. May go over the logical range of the 426 | file. 427 | check_response: True to check the validity of GCS response automatically 428 | before the future returns. False otherwise. See Yields section. 429 | 430 | Yields: 431 | If check_response is True, the segment [start, start + request_size) 432 | of the file. 433 | Otherwise, a tuple. The first element is the unverified file segment. 434 | The second element is a closure that checks response. Caller should 435 | first invoke the closure before consuing the file segment. 436 | 437 | Raises: 438 | ValueError: if the file has changed while reading. 439 | """ 440 | end = start + request_size - 1 441 | content_range = '%d-%d' % (start, end) 442 | headers = {'Range': 'bytes=' + content_range} 443 | status, resp_headers, content = yield self._api.get_object_async( 444 | self._path, headers=headers) 445 | def _checker(): 446 | errors.check_status(status, [200, 206], self._path, headers, 447 | resp_headers, body=content) 448 | self._check_etag(resp_headers.get('etag')) 449 | if check_response: 450 | _checker() 451 | raise ndb.Return(content) 452 | raise ndb.Return(content, _checker) 453 | 454 | def _check_etag(self, etag): 455 | """Check if etag is the same across requests to GCS. 456 | 457 | If self._etag is None, set it. If etag is set, check that the new 458 | etag equals the old one. 459 | 460 | In the __init__ method, we fire one HEAD and one GET request using 461 | ndb tasklet. One of them would return first and set the first value. 462 | 463 | Args: 464 | etag: etag from a GCS HTTP response. None if etag is not part of the 465 | response header. It could be None for example in the case of GCS 466 | composite file. 467 | 468 | Raises: 469 | ValueError: if two etags are not equal. 470 | """ 471 | if etag is None: 472 | return 473 | elif self._etag is None: 474 | self._etag = etag 475 | elif self._etag != etag: 476 | raise ValueError('File on GCS has changed while reading.') 477 | 478 | def close(self): 479 | self.closed = True 480 | self._buffer = None 481 | self._buffer_future = None 482 | 483 | def __enter__(self): 484 | return self 485 | 486 | def __exit__(self, atype, value, traceback): 487 | self.close() 488 | return False 489 | 490 | def seek(self, offset, whence=os.SEEK_SET): 491 | """Set the file's current offset. 492 | 493 | Note if the new offset is out of bound, it is adjusted to either 0 or EOF. 494 | 495 | Args: 496 | offset: seek offset as number. 497 | whence: seek mode. Supported modes are os.SEEK_SET (absolute seek), 498 | os.SEEK_CUR (seek relative to the current position), and os.SEEK_END 499 | (seek relative to the end, offset should be negative). 500 | 501 | Raises: 502 | IOError: When this buffer is closed. 503 | ValueError: When whence is invalid. 504 | """ 505 | self._check_open() 506 | 507 | self._buffer.reset() 508 | self._buffer_future = None 509 | 510 | if whence == os.SEEK_SET: 511 | self._offset = offset 512 | elif whence == os.SEEK_CUR: 513 | self._offset += offset 514 | elif whence == os.SEEK_END: 515 | self._offset = self._file_size + offset 516 | else: 517 | raise ValueError('Whence mode %s is invalid.' % str(whence)) 518 | 519 | self._offset = min(self._offset, self._file_size) 520 | self._offset = max(self._offset, 0) 521 | if self._remaining(): 522 | self._request_next_buffer() 523 | 524 | def tell(self): 525 | """Tell the file's current offset. 526 | 527 | Returns: 528 | current offset in reading this file. 529 | 530 | Raises: 531 | IOError: When this buffer is closed. 532 | """ 533 | self._check_open() 534 | return self._offset 535 | 536 | def _check_open(self): 537 | if self.closed: 538 | raise IOError('Buffer is closed.') 539 | 540 | def seekable(self): 541 | return True 542 | 543 | def readable(self): 544 | return True 545 | 546 | def writable(self): 547 | return False 548 | 549 | 550 | class _Buffer(object): 551 | """In memory buffer.""" 552 | 553 | def __init__(self): 554 | self.reset() 555 | 556 | def reset(self, content='', offset=0): 557 | self._buffer = content 558 | self._offset = offset 559 | 560 | def read(self, size=-1): 561 | """Returns bytes from self._buffer and update related offsets. 562 | 563 | Args: 564 | size: number of bytes to read starting from current offset. 565 | Read the entire buffer if negative. 566 | 567 | Returns: 568 | Requested bytes from buffer. 569 | """ 570 | if size < 0: 571 | offset = len(self._buffer) 572 | else: 573 | offset = self._offset + size 574 | return self.read_to_offset(offset) 575 | 576 | def read_to_offset(self, offset): 577 | """Returns bytes from self._buffer and update related offsets. 578 | 579 | Args: 580 | offset: read from current offset to this offset, exclusive. 581 | 582 | Returns: 583 | Requested bytes from buffer. 584 | """ 585 | assert offset >= self._offset 586 | result = self._buffer[self._offset: offset] 587 | self._offset += len(result) 588 | return result 589 | 590 | def remaining(self): 591 | return len(self._buffer) - self._offset 592 | 593 | def find_newline(self, size=-1): 594 | """Search for newline char in buffer starting from current offset. 595 | 596 | Args: 597 | size: number of bytes to search. -1 means all. 598 | 599 | Returns: 600 | offset of newline char in buffer. -1 if doesn't exist. 601 | """ 602 | if size < 0: 603 | return self._buffer.find('\n', self._offset) 604 | return self._buffer.find('\n', self._offset, self._offset + size) 605 | 606 | 607 | class StreamingBuffer(object): 608 | """A class for creating large objects using the 'resumable' API. 609 | 610 | The API is a subset of the Python writable stream API sufficient to 611 | support writing zip files using the zipfile module. 612 | 613 | The exact sequence of calls and use of headers is documented at 614 | https://developers.google.com/storage/docs/developer-guide#unknownresumables 615 | """ 616 | 617 | _blocksize = 256 * 1024 618 | 619 | _flushsize = 8 * _blocksize 620 | 621 | _maxrequestsize = 9 * 4 * _blocksize 622 | 623 | def __init__(self, 624 | api, 625 | path, 626 | content_type=None, 627 | gcs_headers=None): 628 | """Constructor. 629 | 630 | Args: 631 | api: A StorageApi instance. 632 | path: Quoted/escaped path to the object, e.g. /mybucket/myfile 633 | content_type: Optional content-type; Default value is 634 | delegate to Google Cloud Storage. 635 | gcs_headers: additional gs headers as a str->str dict, e.g 636 | {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}. 637 | Raises: 638 | IOError: When this location can not be found. 639 | """ 640 | assert self._maxrequestsize > self._blocksize 641 | assert self._maxrequestsize % self._blocksize == 0 642 | assert self._maxrequestsize >= self._flushsize 643 | 644 | self._api = api 645 | self._path = path 646 | 647 | self.name = api_utils._unquote_filename(path) 648 | self.closed = False 649 | 650 | self._buffer = collections.deque() 651 | self._buffered = 0 652 | self._written = 0 653 | self._offset = 0 654 | 655 | headers = {'x-goog-resumable': 'start'} 656 | if content_type: 657 | headers['content-type'] = content_type 658 | if gcs_headers: 659 | headers.update(gcs_headers) 660 | status, resp_headers, content = self._api.post_object(path, headers=headers) 661 | errors.check_status(status, [201], path, headers, resp_headers, 662 | body=content) 663 | loc = resp_headers.get('location') 664 | if not loc: 665 | raise IOError('No location header found in 201 response') 666 | parsed = urlparse.urlparse(loc) 667 | self._path_with_token = '%s?%s' % (self._path, parsed.query) 668 | 669 | def __getstate__(self): 670 | """Store state as part of serialization/pickling. 671 | 672 | The contents of the write buffer are stored. Writes to the underlying 673 | storage are required to be on block boundaries (_blocksize) except for the 674 | last write. In the worst case the pickled version of this object may be 675 | slightly larger than the blocksize. 676 | 677 | Returns: 678 | A dictionary with the state of this object 679 | 680 | """ 681 | return {'api': self._api, 682 | 'path': self._path, 683 | 'path_token': self._path_with_token, 684 | 'buffer': self._buffer, 685 | 'buffered': self._buffered, 686 | 'written': self._written, 687 | 'offset': self._offset, 688 | 'closed': self.closed} 689 | 690 | def __setstate__(self, state): 691 | """Restore state as part of deserialization/unpickling. 692 | 693 | Args: 694 | state: the dictionary from a __getstate__ call 695 | """ 696 | self._api = state['api'] 697 | self._path_with_token = state['path_token'] 698 | self._buffer = state['buffer'] 699 | self._buffered = state['buffered'] 700 | self._written = state['written'] 701 | self._offset = state['offset'] 702 | self.closed = state['closed'] 703 | self._path = state['path'] 704 | self.name = api_utils._unquote_filename(self._path) 705 | 706 | def write(self, data): 707 | """Write some bytes. 708 | 709 | Args: 710 | data: data to write. str. 711 | 712 | Raises: 713 | TypeError: if data is not of type str. 714 | """ 715 | self._check_open() 716 | if not isinstance(data, str): 717 | raise TypeError('Expected str but got %s.' % type(data)) 718 | if not data: 719 | return 720 | self._buffer.append(data) 721 | self._buffered += len(data) 722 | self._offset += len(data) 723 | if self._buffered >= self._flushsize: 724 | self._flush() 725 | 726 | def flush(self): 727 | """Flush as much as possible to GCS. 728 | 729 | GCS *requires* that all writes except for the final one align on 730 | 256KB boundaries. So the internal buffer may still have < 256KB bytes left 731 | after flush. 732 | """ 733 | self._check_open() 734 | self._flush(finish=False) 735 | 736 | def tell(self): 737 | """Return the total number of bytes passed to write() so far. 738 | 739 | (There is no seek() method.) 740 | """ 741 | return self._offset 742 | 743 | def close(self): 744 | """Flush the buffer and finalize the file. 745 | 746 | When this returns the new file is available for reading. 747 | """ 748 | if not self.closed: 749 | self.closed = True 750 | self._flush(finish=True) 751 | self._buffer = None 752 | 753 | def __enter__(self): 754 | return self 755 | 756 | def __exit__(self, atype, value, traceback): 757 | self.close() 758 | return False 759 | 760 | def _flush(self, finish=False): 761 | """Internal API to flush. 762 | 763 | Buffer is flushed to GCS only when the total amount of buffered data is at 764 | least self._blocksize, or to flush the final (incomplete) block of 765 | the file with finish=True. 766 | """ 767 | while ((finish and self._buffered >= 0) or 768 | (not finish and self._buffered >= self._blocksize)): 769 | tmp_buffer = [] 770 | tmp_buffer_len = 0 771 | 772 | excess = 0 773 | while self._buffer: 774 | buf = self._buffer.popleft() 775 | size = len(buf) 776 | self._buffered -= size 777 | tmp_buffer.append(buf) 778 | tmp_buffer_len += size 779 | if tmp_buffer_len >= self._maxrequestsize: 780 | excess = tmp_buffer_len - self._maxrequestsize 781 | break 782 | if not finish and ( 783 | tmp_buffer_len % self._blocksize + self._buffered < 784 | self._blocksize): 785 | excess = tmp_buffer_len % self._blocksize 786 | break 787 | 788 | if excess: 789 | over = tmp_buffer.pop() 790 | size = len(over) 791 | assert size >= excess 792 | tmp_buffer_len -= size 793 | head, tail = over[:-excess], over[-excess:] 794 | self._buffer.appendleft(tail) 795 | self._buffered += len(tail) 796 | if head: 797 | tmp_buffer.append(head) 798 | tmp_buffer_len += len(head) 799 | 800 | data = ''.join(tmp_buffer) 801 | file_len = '*' 802 | if finish and not self._buffered: 803 | file_len = self._written + len(data) 804 | self._send_data(data, self._written, file_len) 805 | self._written += len(data) 806 | if file_len != '*': 807 | break 808 | 809 | def _send_data(self, data, start_offset, file_len): 810 | """Send the block to the storage service. 811 | 812 | This is a utility method that does not modify self. 813 | 814 | Args: 815 | data: data to send in str. 816 | start_offset: start offset of the data in relation to the file. 817 | file_len: an int if this is the last data to append to the file. 818 | Otherwise '*'. 819 | """ 820 | headers = {} 821 | end_offset = start_offset + len(data) - 1 822 | 823 | if data: 824 | headers['content-range'] = ('bytes %d-%d/%s' % 825 | (start_offset, end_offset, file_len)) 826 | else: 827 | headers['content-range'] = ('bytes */%s' % file_len) 828 | 829 | status, response_headers, content = self._api.put_object( 830 | self._path_with_token, payload=data, headers=headers) 831 | if file_len == '*': 832 | expected = 308 833 | else: 834 | expected = 200 835 | errors.check_status(status, [expected], self._path, headers, 836 | response_headers, content, 837 | {'upload_path': self._path_with_token}) 838 | 839 | def _get_offset_from_gcs(self): 840 | """Get the last offset that has been written to GCS. 841 | 842 | This is a utility method that does not modify self. 843 | 844 | Returns: 845 | an int of the last offset written to GCS by this upload, inclusive. 846 | -1 means nothing has been written. 847 | """ 848 | headers = {'content-range': 'bytes */*'} 849 | status, response_headers, content = self._api.put_object( 850 | self._path_with_token, headers=headers) 851 | errors.check_status(status, [308], self._path, headers, 852 | response_headers, content, 853 | {'upload_path': self._path_with_token}) 854 | val = response_headers.get('range') 855 | if val is None: 856 | return -1 857 | _, offset = val.rsplit('-', 1) 858 | return int(offset) 859 | 860 | def _force_close(self, file_length=None): 861 | """Close this buffer on file_length. 862 | 863 | Finalize this upload immediately on file_length. 864 | Contents that are still in memory will not be uploaded. 865 | 866 | This is a utility method that does not modify self. 867 | 868 | Args: 869 | file_length: file length. Must match what has been uploaded. If None, 870 | it will be queried from GCS. 871 | """ 872 | if file_length is None: 873 | file_length = self._get_offset_from_gcs() + 1 874 | self._send_data('', 0, file_length) 875 | 876 | def _check_open(self): 877 | if self.closed: 878 | raise IOError('Buffer is closed.') 879 | 880 | def seekable(self): 881 | return False 882 | 883 | def readable(self): 884 | return False 885 | 886 | def writable(self): 887 | return True 888 | --------------------------------------------------------------------------------