├── .gitignore ├── README.md ├── app.yaml ├── appengine_config.py ├── blob_files.py ├── blob_serve.py ├── blob_upload.py ├── cloudstorage ├── __init__.py ├── api_utils.py ├── cloudstorage_api.py ├── common.py ├── errors.py ├── rest_api.py ├── storage_api.py └── test_utils.py ├── index.yaml ├── markdown ├── __init__.py ├── __main__.py ├── __version__.py ├── blockparser.py ├── blockprocessors.py ├── extensions │ ├── __init__.py │ ├── abbr.py │ ├── admonition.py │ ├── attr_list.py │ ├── codehilite.py │ ├── def_list.py │ ├── extra.py │ ├── fenced_code.py │ ├── footnotes.py │ ├── headerid.py │ ├── meta.py │ ├── nl2br.py │ ├── sane_lists.py │ ├── smart_strong.py │ ├── smarty.py │ ├── tables.py │ ├── toc.py │ └── wikilinks.py ├── inlinepatterns.py ├── odict.py ├── postprocessors.py ├── preprocessors.py ├── serializers.py ├── treeprocessors.py └── util.py ├── static └── favicon.ico └── templates ├── blob_links.html └── blob_upload.html /.gitignore: -------------------------------------------------------------------------------- 1 | # https://code.google.com/p/webapp-improved/source/browse/.gitignore 2 | 3 | *.py[co] 4 | 5 | # When this repository is included as a 6 | # submodule within a buildout project, 7 | # buildout will dump a .egg-info directory 8 | # into this directory causing the submodule 9 | # to become "dirty", and this shows up 10 | # as a change to track in the parent repository. 11 | # Therefore, we ignore *.egg-info at the 12 | # repository level. 13 | *.egg-info 14 | 15 | 16 | # OSX 17 | .DS_Store 18 | 19 | #PyCharm 20 | .idea -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## App Engine cloudstorage blobstore replacement using SDK and GAE production 2 | 3 | This code shows how to read and write blobs and how to create a blob serving url (GCS host or blobkey). 4 | Writing blob files to GCS is a replacement for the deprecated blobstore.Files API. 5 | 6 | The blob files can be images or other files like html, css, js and pdf. 7 | The free default bucket in Google Cloud Storage (GCS) is used to store the blobs. 8 | From the docs: An application can use the [default GCS bucket](https://developers.google.com/appengine/docs/python/googlecloudstorageclient/activate#Using_the_Default_GCS_Bucket), which provides an already configured bucket with [free quota](https://developers.google.com/appengine/docs/quotas#Default_Gcs_Bucket). 9 | 10 | The code always uses the Images get_serving_url for images (gif/png/jpg). 11 | This image serving url allows dynamic resizing and cropping. 12 | The use_blobstore option configures the serving_url type for non-images. 13 | The use_blobstore default (= True) can be overwritten in appengine_config.py 14 | 15 | blob_upload contains the code to upload a file to cloudstorage: 16 | 17 | upload: https://.appspot.com/blob_upload 18 | or: http://localhost:8080/blob_upload 19 | 20 | To serve the data, you can use in your Jinja HTML template: 21 | 22 | js: 23 | css: 24 | pdf: Test PDF 25 | img: {{ filename }} 26 | 27 | In GAE production the serving url looks like: 28 | 29 | images: https://lhN.ggpht.com/NlCARAtN.........3NQW9ZxYpms=s698 30 | other: https://storage.googleapis.com/default_bucket/file_name 31 | or a blobstore like url, when use_blobstore = True 32 | 33 | And in the SDK: 34 | 35 | images: http://localhost:8080/_ah/img/encoded_gs_file:YXBwX2R......Y3Nz 36 | other: https://localhost:8080/_ah/gcs/default_bucket/file_name 37 | or a blobstore like url, when use_blobstore = True 38 | Note: The SDK encoded_gs_file id = base64.urlsafe_b64encode(app_default_bucket/filename) 39 | 40 | The benefits of use_blobstore = False (GCS host): 41 | 42 | - Cheaper and probably significantly faster. 43 | - Google will serve the GCS files for you. The BlobstoreDownloadHandler is not used. 44 | - The filename is short and part of the serving_url. 45 | - But the hostname of the serving is always https://storage.googleapis.com/... (because of HTTPS) 46 | 47 | This code was tested using App Engine SDK 1.9.13 and the GCS client library 48 | 49 | GCS client library installation on Windows 7: 50 | 51 | C:\Python27\scripts>pip install setuptools --no-use-wheel --upgrade 52 | C:\Python27\scripts>pip install GoogleAppEngineCloudStorageClient -t -------------------------------------------------------------------------------- /app.yaml: -------------------------------------------------------------------------------- 1 | application: gcs-blobstore 2 | version: 3 3 | runtime: python27 4 | api_version: 1 5 | threadsafe: yes 6 | 7 | handlers: 8 | 9 | - url: /favicon.ico 10 | static_files: static/favicon.ico 11 | upload: static/favicon.ico 12 | 13 | - url: /.* 14 | script: blob_upload.app 15 | 16 | libraries: 17 | - name: webapp2 18 | version: latest 19 | 20 | - name: jinja2 21 | version: latest -------------------------------------------------------------------------------- /appengine_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import unicode_literals 4 | 5 | #overwrite lib_config defaults 6 | blob_files_USE_BLOBSTORE = False -------------------------------------------------------------------------------- /blob_files.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import unicode_literals 4 | 5 | import webapp2 6 | from google.appengine.ext import blobstore 7 | from google.appengine.api import app_identity, images 8 | from google.appengine.api import lib_config 9 | import cloudstorage as gcs 10 | from google.appengine.ext import ndb 11 | import os 12 | import mimetypes 13 | import zipfile 14 | import logging 15 | 16 | # to use cloudstorage include appengine-gcs-client-python-r127.zip in your project 17 | 18 | config = lib_config.register('blob_files', { 19 | 'USE_BLOBSTORE': True, 20 | 'ARCHIVE_PATH': '/archives/BlobFiles.zip', 21 | 'UTF_8_FILE_EXTENSIONS': ['js', 'css', 'html', 'txt', 'text', 'py', 'xml'] 22 | }) 23 | 24 | 25 | class BlobFiles(ndb.Model): 26 | """ Contains GCS files names and serving urls for the app_default_bucket 27 | GCS files can have a blobkey. A GCS blobkey does NOT have a BlobInfo object. 28 | A Blobfile entity is like a blobstore.BlobInfo object 29 | """ 30 | 31 | filename = ndb.StringProperty() # unique (folder not part of filename, key and id) 32 | extension = ndb.ComputedProperty(lambda self: self.filename.rsplit('.', 1)[1].lower()) 33 | folder = ndb.StringProperty(default='/') 34 | gcs_filename = ndb.StringProperty(required=True) # //self.filename 35 | blobkey = ndb.ComputedProperty(lambda self: blobstore.create_gs_key('/gs' + self.gcs_filename)) 36 | serving_url = ndb.StringProperty(required=True) 37 | modified = ndb.DateTimeProperty(auto_now=True) 38 | created = ndb.DateTimeProperty(auto_now_add=True) 39 | 40 | @classmethod 41 | def new(cls, filename, bucket=None, folder='/'): 42 | """ filename is the key, which makes an entity unique. But it's not allowed to overwrite a 43 | BlobFiles entity, if the new gcs_filename is not equal to the existing gcs path 44 | use_blobstore controls the type of serving_url. True: use Blobkey; False: use gcs_filename 45 | """ 46 | 47 | gcs_filename = '/%s%s/%s' % (bucket or app_identity.get_default_gcs_bucket_name(), folder, filename) 48 | bf = cls.get_by_id(filename) 49 | if bf and gcs_filename != bf.gcs_filename: 50 | logging.error('new gcs_filename: %s already exists as gcs_filename: %s' % (gcs_filename, bf.gcs_filename)) 51 | return None 52 | 53 | return BlobFiles(id=filename, filename=filename, folder=folder, gcs_filename=gcs_filename) 54 | 55 | def properties(self): 56 | 57 | return gcs.stat(self.gcs_filename) 58 | 59 | def blob_read(self): 60 | """ read binary blob from google cloud storage """ 61 | 62 | try: 63 | with gcs.open(self.gcs_filename) as f: 64 | return f.read() 65 | except gcs.NotFoundError, e: 66 | logging.warning('GCS file %s NOT FOUND : %s' % (self.gcs_filename, e)) 67 | return None 68 | 69 | def blob_reader(self): 70 | """ a BlobInfo like open returns a BlobReader """ 71 | 72 | return blobstore.BlobReader(blobstore.BlobKey(self.blobkey)) 73 | 74 | def blob_write(self, blob): 75 | """ update google cloud storage bf entity """ 76 | 77 | content_type = mimetypes.guess_type(self.filename)[0] 78 | if not content_type: 79 | logging.warning('Mimetype not guessed for: %s', self.filename) 80 | 81 | if content_type and self.extension in config.UTF_8_FILE_EXTENSIONS: 82 | content_type += b'; charset=utf-8' 83 | try: 84 | with gcs.open(self.gcs_filename, 'w', content_type=content_type or b'binary/octet-stream', 85 | options={b'x-goog-acl': b'public-read'}) as f: 86 | f.write(blob) 87 | return self.gcs_filename 88 | except Exception, e: 89 | raise Exception('Blob write failed for %s, exception: %s. Additional info was logged' % (self.filename, str(e))) 90 | 91 | @classmethod 92 | def list_gcs_file_names(cls, bucket=None, folder='/'): 93 | """ Example usage : for gcs_filename, filename in BlobFiles.list_gcs_file_names(folder='/upload') """ 94 | 95 | for obj in gcs.listbucket('/%s%s' % (bucket or app_identity.get_default_gcs_bucket_name(), folder)): 96 | pbf = cls._query(cls.gcs_filename == obj.filename).get(projection=cls.filename) 97 | # yield result: the gcs_filename from GCS and the corresponding filename from BlobFiles 98 | yield obj.filename, (pbf.filename if pbf else '') 99 | 100 | def delete(self): 101 | """ delete filename in GCS and BlobFiles """ 102 | 103 | try: 104 | gcs.delete(self.gcs_filename) 105 | except gcs.NotFoundError, e: 106 | logging.warning('GCS file %s NOT FOUND : %s' % (self.gcs_filename, e)) 107 | return self.key.delete() 108 | 109 | def _pre_put_hook(self): 110 | """ ndb hook to save serving_url """ 111 | 112 | if self.extension in ['jpeg', 'jpg', 'png', 'gif', 'bmp', 'tiff', 'ico']: # image API supported formats 113 | # High-performance dynamic image serving 114 | self.serving_url = images.get_serving_url(self.blobkey, secure_url=True) 115 | elif webapp2.get_request().get('use_blobstore', default_value=config.USE_BLOBSTORE) in ['T', True]: 116 | # Blobstore: GCS blob keys do not have a BlobInfo filename 117 | self.serving_url = '/use_blobstore/%s?save_as=%s' % (self.blobkey, self.filename) 118 | # bf.serving_url = '/use_blobstore/%s?save_as=%s' % (blobstore.create_gs_key('/gs' + gcs_file_name), bf.filename) 119 | elif os.environ['SERVER_SOFTWARE'].startswith('Development'): 120 | # GCS url: this SDK feature has not been documented yet !!! 121 | self.serving_url = '/_ah/gcs%s' % self.gcs_filename 122 | else: 123 | # GCS url: because of HTTPS we cannot use a cname redirect or use the use_blobstore option 124 | self.serving_url = 'https://storage.googleapis.com%s' % self.gcs_filename 125 | 126 | 127 | def blob_archive(new_bf=None): 128 | """ bonus: save all BlobFiles in a zip archive """ 129 | 130 | @ndb.tasklet 131 | def callback(bf_key): 132 | """ key_only query and get() lookup for entity consistency """ 133 | 134 | bf = yield bf_key.get_async() 135 | raise ndb.Return(bf) 136 | 137 | def blobfiles(insert, archive_key): 138 | """ We do not use ancestor queries. This Generator takes care of index and entity inconsistencies 139 | https://cloud.google.com/developers/articles/balancing-strong-and-eventual-consistency-with-google-cloud-datastore/ 140 | """ 141 | 142 | for bf in BlobFiles.query().filter(BlobFiles.key != archive_key).map(callback, keys_only=True): 143 | if insert and new_bf.key == bf.key: 144 | insert = False # no index inconsistency 145 | yield bf 146 | 147 | # if the new_bf entity is not yet present in BlobFiles (due to index inconsistencies), it will be inserted here 148 | if insert: 149 | yield new_bf 150 | 151 | # add all files to archive, except the archive zipfile itself which has a reserved name (BlobFiles key) 152 | (archive_folder, _, archive_file) = config.ARCHIVE_PATH.rpartition('/') 153 | 154 | if new_bf and new_bf.filename != archive_file: 155 | 156 | new_zf = BlobFiles.new(archive_file, folder=archive_folder) 157 | with gcs.open(new_zf.gcs_filename, 'w', content_type=b'multipart/x-zip', 158 | options={b'x-goog-acl': b'public-read', b'cache-control': b'private, max-age=0, no-cache'}) as nzf: 159 | 160 | # nzf is a cloudstorage.storage_api.StreamingBuffer, which can be pickled to append data in a chained task 161 | with zipfile.ZipFile(nzf, 'w') as zf: 162 | for each in blobfiles(new_bf is not None, new_zf.key): 163 | # We also could have used : each.blob_read() 164 | logging.info(each.filename) 165 | blob = each.blob_reader().read() 166 | zf.writestr(each.filename.encode('utf-8'), blob) 167 | 168 | new_zf.put_async() 169 | else: 170 | new_zf = new_bf 171 | 172 | return new_zf -------------------------------------------------------------------------------- /blob_serve.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import unicode_literals 4 | 5 | from google.appengine.ext.webapp import blobstore_handlers 6 | import urllib 7 | import logging 8 | 9 | 10 | class UseBlobstore(blobstore_handlers.BlobstoreDownloadHandler): 11 | """ use the blobstore to download a GCS blobfile """ 12 | 13 | def get(self, resource): 14 | """ Example: /use_blobstore/?save_as= 15 | GCS files cannot use the BlobInfo class. We have to use: save_as= 16 | """ 17 | 18 | save_as = self.request.get('save_as', default_value=None) 19 | logging.info('UseBlobstore download blob : ' + save_as) 20 | 21 | blob_key = str(urllib.unquote(resource)) 22 | self.send_blob(blob_key, save_as=save_as) -------------------------------------------------------------------------------- /blob_upload.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import unicode_literals 4 | 5 | import webapp2 6 | from webapp2_extras import jinja2 7 | from google.appengine.ext import ndb 8 | import blob_files 9 | import blob_serve 10 | import markdown 11 | import logging 12 | import os 13 | 14 | GCS_UPLOAD_FOLDER = '/upload' 15 | README = os.path.join(os.path.dirname(__file__), 'README.md') 16 | 17 | 18 | class BaseHandler(webapp2.RequestHandler): 19 | 20 | def handle_exception(self, exception, debug): 21 | 22 | logging.exception(exception) 23 | self.response.write('

An error occurred.

') 24 | 25 | if isinstance(exception, webapp2.HTTPException): 26 | self.response.set_status(exception.code) 27 | else: 28 | self.response.set_status(500) 29 | 30 | @webapp2.cached_property 31 | def jinja2(self): 32 | return jinja2.get_jinja2(app=self.app) 33 | 34 | def render_template(self, template, **template_args): 35 | self.response.write(self.jinja2.render_template(template, **template_args)) 36 | 37 | 38 | class BlobUpload(BaseHandler): 39 | """ upload to cloudstorage and save serving_url in BlobFiles """ 40 | 41 | def get(self): 42 | """ upload form """ 43 | 44 | self.render_template('blob_upload.html', use_blobstore=blob_files.config.USE_BLOBSTORE) 45 | 46 | def readme(self): 47 | """ readme.md to html in base template """ 48 | 49 | if self.request.method == 'GET': 50 | use_blobstore = blob_files.config.USE_BLOBSTORE 51 | else: # POST 52 | use_blobstore = (True if self.request.get('use_blobstore') == 'T' else False) 53 | readme = markdown.markdown(open(README, 'r').read(), output_format='html5') # options: markdown.__init__ 54 | self.render_template('blob_upload.html', use_blobstore=use_blobstore, readme=readme) 55 | 56 | @ndb.synctasklet 57 | def post(self): 58 | """ upload the file. Result: show file and archive links """ 59 | 60 | context = dict(failed='No file data', use_blobstore=(True if self.request.get('use_blobstore') == 'T' else False)) 61 | 62 | # read upload data, save it in GCS and a zip archive 63 | file_data = self.request.get("file", default_value=None) 64 | if file_data: 65 | 66 | filename = self.request.POST["file"].filename 67 | bf = blob_files.BlobFiles.new(filename, folder=GCS_UPLOAD_FOLDER) 68 | if bf: 69 | bf.blob_write(file_data) 70 | bf.put_async() 71 | logging.info('Uploaded and saved in default GCS bucket : ' + bf.gcs_filename) 72 | 73 | # update zip archive. make sure this (new) bf will be archived 74 | bzf = blob_files.blob_archive(new_bf=bf) 75 | 76 | context.update(dict(failed=None, bzf_url=bzf.serving_url, bzf_name=bzf.filename, 77 | bf_url=bf.serving_url, bf_name=bf.filename)) 78 | else: 79 | context.update(dict(failed='Overwrite blocked. The GCS file already exists in another bucket and/or folder')) 80 | else: 81 | logging.warning('No file data') 82 | 83 | self.render_template('blob_links.html', **context) 84 | 85 | routes = [ 86 | webapp2.Route(r'/blob_upload', handler=BlobUpload), 87 | webapp2.Route(r'/readme', handler='blob_upload.BlobUpload:readme'), 88 | ('/use_blobstore/([^/]+)?', blob_serve.UseBlobstore), 89 | ] 90 | app = ndb.toplevel(webapp2.WSGIApplication(routes=routes, debug=True)) -------------------------------------------------------------------------------- /cloudstorage/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | 15 | """Client Library for Google Cloud Storage.""" 16 | 17 | 18 | 19 | 20 | from .api_utils import RetryParams 21 | from .api_utils import set_default_retry_params 22 | from cloudstorage_api import * 23 | from .common import CSFileStat 24 | from .common import GCSFileStat 25 | from .common import validate_bucket_name 26 | from .common import validate_bucket_path 27 | from .common import validate_file_path 28 | from errors import * 29 | from storage_api import * 30 | -------------------------------------------------------------------------------- /cloudstorage/api_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2013 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | 15 | """Util functions and classes for cloudstorage_api.""" 16 | 17 | 18 | 19 | __all__ = ['set_default_retry_params', 20 | 'RetryParams', 21 | ] 22 | 23 | import copy 24 | import httplib 25 | import logging 26 | import math 27 | import os 28 | import threading 29 | import time 30 | import urllib 31 | 32 | 33 | try: 34 | from google.appengine.api import app_identity 35 | from google.appengine.api import urlfetch 36 | from google.appengine.datastore import datastore_rpc 37 | from google.appengine.ext import ndb 38 | from google.appengine.ext.ndb import eventloop 39 | from google.appengine.ext.ndb import tasklets 40 | from google.appengine.ext.ndb import utils 41 | from google.appengine import runtime 42 | from google.appengine.runtime import apiproxy_errors 43 | except ImportError: 44 | from google.appengine.api import app_identity 45 | from google.appengine.api import urlfetch 46 | from google.appengine.datastore import datastore_rpc 47 | from google.appengine import runtime 48 | from google.appengine.runtime import apiproxy_errors 49 | from google.appengine.ext import ndb 50 | from google.appengine.ext.ndb import eventloop 51 | from google.appengine.ext.ndb import tasklets 52 | from google.appengine.ext.ndb import utils 53 | 54 | 55 | _RETRIABLE_EXCEPTIONS = (urlfetch.DownloadError, 56 | apiproxy_errors.Error, 57 | app_identity.InternalError, 58 | app_identity.BackendDeadlineExceeded) 59 | 60 | _thread_local_settings = threading.local() 61 | _thread_local_settings.default_retry_params = None 62 | 63 | 64 | def set_default_retry_params(retry_params): 65 | """Set a default RetryParams for current thread current request.""" 66 | _thread_local_settings.default_retry_params = copy.copy(retry_params) 67 | 68 | 69 | def _get_default_retry_params(): 70 | """Get default RetryParams for current request and current thread. 71 | 72 | Returns: 73 | A new instance of the default RetryParams. 74 | """ 75 | default = getattr(_thread_local_settings, 'default_retry_params', None) 76 | if default is None or not default.belong_to_current_request(): 77 | return RetryParams() 78 | else: 79 | return copy.copy(default) 80 | 81 | 82 | def _quote_filename(filename): 83 | """Quotes filename to use as a valid URI path. 84 | 85 | Args: 86 | filename: user provided filename. /bucket/filename. 87 | 88 | Returns: 89 | The filename properly quoted to use as URI's path component. 90 | """ 91 | return urllib.quote(filename) 92 | 93 | 94 | def _unquote_filename(filename): 95 | """Unquotes a valid URI path back to its filename. 96 | 97 | This is the opposite of _quote_filename. 98 | 99 | Args: 100 | filename: a quoted filename. /bucket/some%20filename. 101 | 102 | Returns: 103 | The filename unquoted. 104 | """ 105 | return urllib.unquote(filename) 106 | 107 | 108 | def _should_retry(resp): 109 | """Given a urlfetch response, decide whether to retry that request.""" 110 | return (resp.status_code == httplib.REQUEST_TIMEOUT or 111 | (resp.status_code >= 500 and 112 | resp.status_code < 600)) 113 | 114 | 115 | class _RetryWrapper(object): 116 | """A wrapper that wraps retry logic around any tasklet.""" 117 | 118 | def __init__(self, 119 | retry_params, 120 | retriable_exceptions=_RETRIABLE_EXCEPTIONS, 121 | should_retry=lambda r: False): 122 | """Init. 123 | 124 | Args: 125 | retry_params: an RetryParams instance. 126 | retriable_exceptions: a list of exception classes that are retriable. 127 | should_retry: a function that takes a result from the tasklet and returns 128 | a boolean. True if the result should be retried. 129 | """ 130 | self.retry_params = retry_params 131 | self.retriable_exceptions = retriable_exceptions 132 | self.should_retry = should_retry 133 | 134 | @ndb.tasklet 135 | def run(self, tasklet, **kwds): 136 | """Run a tasklet with retry. 137 | 138 | The retry should be transparent to the caller: if no results 139 | are successful, the exception or result from the last retry is returned 140 | to the caller. 141 | 142 | Args: 143 | tasklet: the tasklet to run. 144 | **kwds: keywords arguments to run the tasklet. 145 | 146 | Raises: 147 | The exception from running the tasklet. 148 | 149 | Returns: 150 | The result from running the tasklet. 151 | """ 152 | start_time = time.time() 153 | n = 1 154 | 155 | while True: 156 | e = None 157 | result = None 158 | got_result = False 159 | 160 | try: 161 | result = yield tasklet(**kwds) 162 | got_result = True 163 | if not self.should_retry(result): 164 | raise ndb.Return(result) 165 | except runtime.DeadlineExceededError: 166 | logging.debug( 167 | 'Tasklet has exceeded request deadline after %s seconds total', 168 | time.time() - start_time) 169 | raise 170 | except self.retriable_exceptions, e: 171 | pass 172 | 173 | if n == 1: 174 | logging.debug('Tasklet is %r', tasklet) 175 | 176 | delay = self.retry_params.delay(n, start_time) 177 | 178 | if delay <= 0: 179 | logging.debug( 180 | 'Tasklet failed after %s attempts and %s seconds in total', 181 | n, time.time() - start_time) 182 | if got_result: 183 | raise ndb.Return(result) 184 | elif e is not None: 185 | raise e 186 | else: 187 | assert False, 'Should never reach here.' 188 | 189 | if got_result: 190 | logging.debug( 191 | 'Got result %r from tasklet.', result) 192 | else: 193 | logging.debug( 194 | 'Got exception "%r" from tasklet.', e) 195 | logging.debug('Retry in %s seconds.', delay) 196 | n += 1 197 | yield tasklets.sleep(delay) 198 | 199 | 200 | class RetryParams(object): 201 | """Retry configuration parameters.""" 202 | 203 | _DEFAULT_USER_AGENT = 'App Engine Python GCS Client' 204 | 205 | @datastore_rpc._positional(1) 206 | def __init__(self, 207 | backoff_factor=2.0, 208 | initial_delay=0.1, 209 | max_delay=10.0, 210 | min_retries=3, 211 | max_retries=6, 212 | max_retry_period=30.0, 213 | urlfetch_timeout=None, 214 | save_access_token=False, 215 | _user_agent=None): 216 | """Init. 217 | 218 | This object is unique per request per thread. 219 | 220 | Library will retry according to this setting when App Engine Server 221 | can't call urlfetch, urlfetch timed out, or urlfetch got a 408 or 222 | 500-600 response. 223 | 224 | Args: 225 | backoff_factor: exponential backoff multiplier. 226 | initial_delay: seconds to delay for the first retry. 227 | max_delay: max seconds to delay for every retry. 228 | min_retries: min number of times to retry. This value is automatically 229 | capped by max_retries. 230 | max_retries: max number of times to retry. Set this to 0 for no retry. 231 | max_retry_period: max total seconds spent on retry. Retry stops when 232 | this period passed AND min_retries has been attempted. 233 | urlfetch_timeout: timeout for urlfetch in seconds. Could be None, 234 | in which case the value will be chosen by urlfetch module. 235 | save_access_token: persist access token to datastore to avoid 236 | excessive usage of GetAccessToken API. Usually the token is cached 237 | in process and in memcache. In some cases, memcache isn't very 238 | reliable. 239 | _user_agent: The user agent string that you want to use in your requests. 240 | """ 241 | self.backoff_factor = self._check('backoff_factor', backoff_factor) 242 | self.initial_delay = self._check('initial_delay', initial_delay) 243 | self.max_delay = self._check('max_delay', max_delay) 244 | self.max_retry_period = self._check('max_retry_period', max_retry_period) 245 | self.max_retries = self._check('max_retries', max_retries, True, int) 246 | self.min_retries = self._check('min_retries', min_retries, True, int) 247 | if self.min_retries > self.max_retries: 248 | self.min_retries = self.max_retries 249 | 250 | self.urlfetch_timeout = None 251 | if urlfetch_timeout is not None: 252 | self.urlfetch_timeout = self._check('urlfetch_timeout', urlfetch_timeout) 253 | self.save_access_token = self._check('save_access_token', save_access_token, 254 | True, bool) 255 | self._user_agent = _user_agent or self._DEFAULT_USER_AGENT 256 | 257 | self._request_id = os.getenv('REQUEST_LOG_ID') 258 | 259 | def __eq__(self, other): 260 | if not isinstance(other, self.__class__): 261 | return False 262 | return self.__dict__ == other.__dict__ 263 | 264 | def __ne__(self, other): 265 | return not self.__eq__(other) 266 | 267 | @classmethod 268 | def _check(cls, name, val, can_be_zero=False, val_type=float): 269 | """Check init arguments. 270 | 271 | Args: 272 | name: name of the argument. For logging purpose. 273 | val: value. Value has to be non negative number. 274 | can_be_zero: whether value can be zero. 275 | val_type: Python type of the value. 276 | 277 | Returns: 278 | The value. 279 | 280 | Raises: 281 | ValueError: when invalid value is passed in. 282 | TypeError: when invalid value type is passed in. 283 | """ 284 | valid_types = [val_type] 285 | if val_type is float: 286 | valid_types.append(int) 287 | 288 | if type(val) not in valid_types: 289 | raise TypeError( 290 | 'Expect type %s for parameter %s' % (val_type.__name__, name)) 291 | if val < 0: 292 | raise ValueError( 293 | 'Value for parameter %s has to be greater than 0' % name) 294 | if not can_be_zero and val == 0: 295 | raise ValueError( 296 | 'Value for parameter %s can not be 0' % name) 297 | return val 298 | 299 | def belong_to_current_request(self): 300 | return os.getenv('REQUEST_LOG_ID') == self._request_id 301 | 302 | def delay(self, n, start_time): 303 | """Calculate delay before the next retry. 304 | 305 | Args: 306 | n: the number of current attempt. The first attempt should be 1. 307 | start_time: the time when retry started in unix time. 308 | 309 | Returns: 310 | Number of seconds to wait before next retry. -1 if retry should give up. 311 | """ 312 | if (n > self.max_retries or 313 | (n > self.min_retries and 314 | time.time() - start_time > self.max_retry_period)): 315 | return -1 316 | return min( 317 | math.pow(self.backoff_factor, n-1) * self.initial_delay, 318 | self.max_delay) 319 | 320 | 321 | def _run_until_rpc(): 322 | """Eagerly evaluate tasklets until it is blocking on some RPC. 323 | 324 | Usually ndb eventloop el isn't run until some code calls future.get_result(). 325 | 326 | When an async tasklet is called, the tasklet wrapper evaluates the tasklet 327 | code into a generator, enqueues a callback _help_tasklet_along onto 328 | the el.current queue, and returns a future. 329 | 330 | _help_tasklet_along, when called by the el, will 331 | get one yielded value from the generator. If the value if another future, 332 | set up a callback _on_future_complete to invoke _help_tasklet_along 333 | when the dependent future fulfills. If the value if a RPC, set up a 334 | callback _on_rpc_complete to invoke _help_tasklet_along when the RPC fulfills. 335 | Thus _help_tasklet_along drills down 336 | the chain of futures until some future is blocked by RPC. El runs 337 | all callbacks and constantly check pending RPC status. 338 | """ 339 | el = eventloop.get_event_loop() 340 | while el.current: 341 | el.run0() 342 | 343 | 344 | def _eager_tasklet(tasklet): 345 | """Decorator to turn tasklet to run eagerly.""" 346 | 347 | @utils.wrapping(tasklet) 348 | def eager_wrapper(*args, **kwds): 349 | fut = tasklet(*args, **kwds) 350 | _run_until_rpc() 351 | return fut 352 | 353 | return eager_wrapper 354 | -------------------------------------------------------------------------------- /cloudstorage/common.py: -------------------------------------------------------------------------------- 1 | # Copyright 2012 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | 15 | """Helpers shared by cloudstorage_stub and cloudstorage_api.""" 16 | 17 | 18 | 19 | 20 | 21 | __all__ = ['CS_XML_NS', 22 | 'CSFileStat', 23 | 'dt_str_to_posix', 24 | 'local_api_url', 25 | 'LOCAL_GCS_ENDPOINT', 26 | 'local_run', 27 | 'get_access_token', 28 | 'get_stored_content_length', 29 | 'get_metadata', 30 | 'GCSFileStat', 31 | 'http_time_to_posix', 32 | 'memory_usage', 33 | 'posix_time_to_http', 34 | 'posix_to_dt_str', 35 | 'set_access_token', 36 | 'validate_options', 37 | 'validate_bucket_name', 38 | 'validate_bucket_path', 39 | 'validate_file_path', 40 | ] 41 | 42 | 43 | import calendar 44 | import datetime 45 | from email import utils as email_utils 46 | import logging 47 | import os 48 | import re 49 | 50 | try: 51 | from google.appengine.api import runtime 52 | except ImportError: 53 | from google.appengine.api import runtime 54 | 55 | 56 | _GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}' 57 | _GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$') 58 | _GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$') 59 | _GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*') 60 | _GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*') 61 | _GCS_METADATA = ['x-goog-meta-', 62 | 'content-disposition', 63 | 'cache-control', 64 | 'content-encoding'] 65 | _GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl'] 66 | CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01' 67 | LOCAL_GCS_ENDPOINT = '/_ah/gcs' 68 | _access_token = '' 69 | 70 | 71 | _MAX_GET_BUCKET_RESULT = 1000 72 | 73 | 74 | def set_access_token(access_token): 75 | """Set the shared access token to authenticate with Google Cloud Storage. 76 | 77 | When set, the library will always attempt to communicate with the 78 | real Google Cloud Storage with this token even when running on dev appserver. 79 | Note the token could expire so it's up to you to renew it. 80 | 81 | When absent, the library will automatically request and refresh a token 82 | on appserver, or when on dev appserver, talk to a Google Cloud Storage 83 | stub. 84 | 85 | Args: 86 | access_token: you can get one by run 'gsutil -d ls' and copy the 87 | str after 'Bearer'. 88 | """ 89 | global _access_token 90 | _access_token = access_token 91 | 92 | 93 | def get_access_token(): 94 | """Returns the shared access token.""" 95 | return _access_token 96 | 97 | 98 | class GCSFileStat(object): 99 | """Container for GCS file stat.""" 100 | 101 | def __init__(self, 102 | filename, 103 | st_size, 104 | etag, 105 | st_ctime, 106 | content_type=None, 107 | metadata=None, 108 | is_dir=False): 109 | """Initialize. 110 | 111 | For files, the non optional arguments are always set. 112 | For directories, only filename and is_dir is set. 113 | 114 | Args: 115 | filename: a Google Cloud Storage filename of form '/bucket/filename'. 116 | st_size: file size in bytes. long compatible. 117 | etag: hex digest of the md5 hash of the file's content. str. 118 | st_ctime: posix file creation time. float compatible. 119 | content_type: content type. str. 120 | metadata: a str->str dict of user specified options when creating 121 | the file. Possible keys are x-goog-meta-, content-disposition, 122 | content-encoding, and cache-control. 123 | is_dir: True if this represents a directory. False if this is a real file. 124 | """ 125 | self.filename = filename 126 | self.is_dir = is_dir 127 | self.st_size = None 128 | self.st_ctime = None 129 | self.etag = None 130 | self.content_type = content_type 131 | self.metadata = metadata 132 | 133 | if not is_dir: 134 | self.st_size = long(st_size) 135 | self.st_ctime = float(st_ctime) 136 | if etag[0] == '"' and etag[-1] == '"': 137 | etag = etag[1:-1] 138 | self.etag = etag 139 | 140 | def __repr__(self): 141 | if self.is_dir: 142 | return '(directory: %s)' % self.filename 143 | 144 | return ( 145 | '(filename: %(filename)s, st_size: %(st_size)s, ' 146 | 'st_ctime: %(st_ctime)s, etag: %(etag)s, ' 147 | 'content_type: %(content_type)s, ' 148 | 'metadata: %(metadata)s)' % 149 | dict(filename=self.filename, 150 | st_size=self.st_size, 151 | st_ctime=self.st_ctime, 152 | etag=self.etag, 153 | content_type=self.content_type, 154 | metadata=self.metadata)) 155 | 156 | def __cmp__(self, other): 157 | if not isinstance(other, self.__class__): 158 | raise ValueError('Argument to cmp must have the same type. ' 159 | 'Expect %s, got %s', self.__class__.__name__, 160 | other.__class__.__name__) 161 | if self.filename > other.filename: 162 | return 1 163 | elif self.filename < other.filename: 164 | return -1 165 | return 0 166 | 167 | def __hash__(self): 168 | if self.etag: 169 | return hash(self.etag) 170 | return hash(self.filename) 171 | 172 | 173 | CSFileStat = GCSFileStat 174 | 175 | 176 | def get_stored_content_length(headers): 177 | """Return the content length (in bytes) of the object as stored in GCS. 178 | 179 | x-goog-stored-content-length should always be present except when called via 180 | the local dev_appserver. Therefore if it is not present we default to the 181 | standard content-length header. 182 | 183 | Args: 184 | headers: a dict of headers from the http response. 185 | 186 | Returns: 187 | the stored content length. 188 | """ 189 | length = headers.get('x-goog-stored-content-length') 190 | if length is None: 191 | length = headers.get('content-length') 192 | return length 193 | 194 | 195 | def get_metadata(headers): 196 | """Get user defined options from HTTP response headers.""" 197 | return dict((k, v) for k, v in headers.iteritems() 198 | if any(k.lower().startswith(valid) for valid in _GCS_METADATA)) 199 | 200 | 201 | def validate_bucket_name(name): 202 | """Validate a Google Storage bucket name. 203 | 204 | Args: 205 | name: a Google Storage bucket name with no prefix or suffix. 206 | 207 | Raises: 208 | ValueError: if name is invalid. 209 | """ 210 | _validate_path(name) 211 | if not _GCS_BUCKET_REGEX.match(name): 212 | raise ValueError('Bucket should be 3-63 characters long using only a-z,' 213 | '0-9, underscore, dash or dot but got %s' % name) 214 | 215 | 216 | def validate_bucket_path(path): 217 | """Validate a Google Cloud Storage bucket path. 218 | 219 | Args: 220 | path: a Google Storage bucket path. It should have form '/bucket'. 221 | 222 | Raises: 223 | ValueError: if path is invalid. 224 | """ 225 | _validate_path(path) 226 | if not _GCS_BUCKET_PATH_REGEX.match(path): 227 | raise ValueError('Bucket should have format /bucket ' 228 | 'but got %s' % path) 229 | 230 | 231 | def validate_file_path(path): 232 | """Validate a Google Cloud Storage file path. 233 | 234 | Args: 235 | path: a Google Storage file path. It should have form '/bucket/filename'. 236 | 237 | Raises: 238 | ValueError: if path is invalid. 239 | """ 240 | _validate_path(path) 241 | if not _GCS_FULLPATH_REGEX.match(path): 242 | raise ValueError('Path should have format /bucket/filename ' 243 | 'but got %s' % path) 244 | 245 | 246 | def _process_path_prefix(path_prefix): 247 | """Validate and process a Google Cloud Stoarge path prefix. 248 | 249 | Args: 250 | path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix' 251 | or '/bucket/' or '/bucket'. 252 | 253 | Raises: 254 | ValueError: if path is invalid. 255 | 256 | Returns: 257 | a tuple of /bucket and prefix. prefix can be None. 258 | """ 259 | _validate_path(path_prefix) 260 | if not _GCS_PATH_PREFIX_REGEX.match(path_prefix): 261 | raise ValueError('Path prefix should have format /bucket, /bucket/, ' 262 | 'or /bucket/prefix but got %s.' % path_prefix) 263 | bucket_name_end = path_prefix.find('/', 1) 264 | bucket = path_prefix 265 | prefix = None 266 | if bucket_name_end != -1: 267 | bucket = path_prefix[:bucket_name_end] 268 | prefix = path_prefix[bucket_name_end + 1:] or None 269 | return bucket, prefix 270 | 271 | 272 | def _validate_path(path): 273 | """Basic validation of Google Storage paths. 274 | 275 | Args: 276 | path: a Google Storage path. It should have form '/bucket/filename' 277 | or '/bucket'. 278 | 279 | Raises: 280 | ValueError: if path is invalid. 281 | TypeError: if path is not of type basestring. 282 | """ 283 | if not path: 284 | raise ValueError('Path is empty') 285 | if not isinstance(path, basestring): 286 | raise TypeError('Path should be a string but is %s (%s).' % 287 | (path.__class__, path)) 288 | 289 | 290 | def validate_options(options): 291 | """Validate Google Cloud Storage options. 292 | 293 | Args: 294 | options: a str->basestring dict of options to pass to Google Cloud Storage. 295 | 296 | Raises: 297 | ValueError: if option is not supported. 298 | TypeError: if option is not of type str or value of an option 299 | is not of type basestring. 300 | """ 301 | if not options: 302 | return 303 | 304 | for k, v in options.iteritems(): 305 | if not isinstance(k, str): 306 | raise TypeError('option %r should be a str.' % k) 307 | if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS): 308 | raise ValueError('option %s is not supported.' % k) 309 | if not isinstance(v, basestring): 310 | raise TypeError('value %r for option %s should be of type basestring.' % 311 | (v, k)) 312 | 313 | 314 | def http_time_to_posix(http_time): 315 | """Convert HTTP time format to posix time. 316 | 317 | See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1 318 | for http time format. 319 | 320 | Args: 321 | http_time: time in RFC 2616 format. e.g. 322 | "Mon, 20 Nov 1995 19:12:08 GMT". 323 | 324 | Returns: 325 | A float of secs from unix epoch. 326 | """ 327 | if http_time is not None: 328 | return email_utils.mktime_tz(email_utils.parsedate_tz(http_time)) 329 | 330 | 331 | def posix_time_to_http(posix_time): 332 | """Convert posix time to HTML header time format. 333 | 334 | Args: 335 | posix_time: unix time. 336 | 337 | Returns: 338 | A datatime str in RFC 2616 format. 339 | """ 340 | if posix_time: 341 | return email_utils.formatdate(posix_time, usegmt=True) 342 | 343 | 344 | _DT_FORMAT = '%Y-%m-%dT%H:%M:%S' 345 | 346 | 347 | def dt_str_to_posix(dt_str): 348 | """format str to posix. 349 | 350 | datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ, 351 | e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator 352 | between date and time when they are on the same line. 353 | Z indicates UTC (zero meridian). 354 | 355 | A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html 356 | 357 | This is used to parse LastModified node from GCS's GET bucket XML response. 358 | 359 | Args: 360 | dt_str: A datetime str. 361 | 362 | Returns: 363 | A float of secs from unix epoch. By posix definition, epoch is midnight 364 | 1970/1/1 UTC. 365 | """ 366 | parsable, _ = dt_str.split('.') 367 | dt = datetime.datetime.strptime(parsable, _DT_FORMAT) 368 | return calendar.timegm(dt.utctimetuple()) 369 | 370 | 371 | def posix_to_dt_str(posix): 372 | """Reverse of str_to_datetime. 373 | 374 | This is used by GCS stub to generate GET bucket XML response. 375 | 376 | Args: 377 | posix: A float of secs from unix epoch. 378 | 379 | Returns: 380 | A datetime str. 381 | """ 382 | dt = datetime.datetime.utcfromtimestamp(posix) 383 | dt_str = dt.strftime(_DT_FORMAT) 384 | return dt_str + '.000Z' 385 | 386 | 387 | def local_run(): 388 | """Whether we should hit GCS dev appserver stub.""" 389 | server_software = os.environ.get('SERVER_SOFTWARE') 390 | if server_software is None: 391 | return True 392 | if 'remote_api' in server_software: 393 | return False 394 | if server_software.startswith(('Development', 'testutil')): 395 | return True 396 | return False 397 | 398 | 399 | def local_api_url(): 400 | """Return URL for GCS emulation on dev appserver.""" 401 | return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT) 402 | 403 | 404 | def memory_usage(method): 405 | """Log memory usage before and after a method.""" 406 | def wrapper(*args, **kwargs): 407 | logging.info('Memory before method %s is %s.', 408 | method.__name__, runtime.memory_usage().current()) 409 | result = method(*args, **kwargs) 410 | logging.info('Memory after method %s is %s', 411 | method.__name__, runtime.memory_usage().current()) 412 | return result 413 | return wrapper 414 | 415 | 416 | def _add_ns(tagname): 417 | return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS, 418 | 'tag': tagname} 419 | 420 | 421 | _T_CONTENTS = _add_ns('Contents') 422 | _T_LAST_MODIFIED = _add_ns('LastModified') 423 | _T_ETAG = _add_ns('ETag') 424 | _T_KEY = _add_ns('Key') 425 | _T_SIZE = _add_ns('Size') 426 | _T_PREFIX = _add_ns('Prefix') 427 | _T_COMMON_PREFIXES = _add_ns('CommonPrefixes') 428 | _T_NEXT_MARKER = _add_ns('NextMarker') 429 | _T_IS_TRUNCATED = _add_ns('IsTruncated') 430 | -------------------------------------------------------------------------------- /cloudstorage/errors.py: -------------------------------------------------------------------------------- 1 | # Copyright 2012 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | 15 | """Google Cloud Storage specific Files API calls.""" 16 | 17 | 18 | 19 | 20 | 21 | __all__ = ['AuthorizationError', 22 | 'check_status', 23 | 'Error', 24 | 'FatalError', 25 | 'FileClosedError', 26 | 'ForbiddenError', 27 | 'InvalidRange', 28 | 'NotFoundError', 29 | 'ServerError', 30 | 'TimeoutError', 31 | 'TransientError', 32 | ] 33 | 34 | import httplib 35 | 36 | 37 | class Error(Exception): 38 | """Base error for all gcs operations. 39 | 40 | Error can happen on GAE side or GCS server side. 41 | For details on a particular GCS HTTP response code, see 42 | https://developers.google.com/storage/docs/reference-status#standardcodes 43 | """ 44 | 45 | 46 | class TransientError(Error): 47 | """TransientError could be retried.""" 48 | 49 | 50 | class TimeoutError(TransientError): 51 | """HTTP 408 timeout.""" 52 | 53 | 54 | class FatalError(Error): 55 | """FatalError shouldn't be retried.""" 56 | 57 | 58 | class FileClosedError(FatalError): 59 | """File is already closed. 60 | 61 | This can happen when the upload has finished but 'write' is called on 62 | a stale upload handle. 63 | """ 64 | 65 | 66 | class NotFoundError(FatalError): 67 | """HTTP 404 resource not found.""" 68 | 69 | 70 | class ForbiddenError(FatalError): 71 | """HTTP 403 Forbidden. 72 | 73 | While GCS replies with a 403 error for many reasons, the most common one 74 | is due to bucket permission not correctly setup for your app to access. 75 | """ 76 | 77 | 78 | class AuthorizationError(FatalError): 79 | """HTTP 401 authentication required. 80 | 81 | Unauthorized request has been received by GCS. 82 | 83 | This error is mostly handled by GCS client. GCS client will request 84 | a new access token and retry the request. 85 | """ 86 | 87 | 88 | class InvalidRange(FatalError): 89 | """HTTP 416 RequestRangeNotSatifiable.""" 90 | 91 | 92 | class ServerError(TransientError): 93 | """HTTP >= 500 server side error.""" 94 | 95 | 96 | def check_status(status, expected, path, headers=None, 97 | resp_headers=None, body=None, extras=None): 98 | """Check HTTP response status is expected. 99 | 100 | Args: 101 | status: HTTP response status. int. 102 | expected: a list of expected statuses. A list of ints. 103 | path: filename or a path prefix. 104 | headers: HTTP request headers. 105 | resp_headers: HTTP response headers. 106 | body: HTTP response body. 107 | extras: extra info to be logged verbatim if error occurs. 108 | 109 | Raises: 110 | AuthorizationError: if authorization failed. 111 | NotFoundError: if an object that's expected to exist doesn't. 112 | TimeoutError: if HTTP request timed out. 113 | ServerError: if server experienced some errors. 114 | FatalError: if any other unexpected errors occurred. 115 | """ 116 | if status in expected: 117 | return 118 | 119 | msg = ('Expect status %r from Google Storage. But got status %d.\n' 120 | 'Path: %r.\n' 121 | 'Request headers: %r.\n' 122 | 'Response headers: %r.\n' 123 | 'Body: %r.\n' 124 | 'Extra info: %r.\n' % 125 | (expected, status, path, headers, resp_headers, body, extras)) 126 | 127 | if status == httplib.UNAUTHORIZED: 128 | raise AuthorizationError(msg) 129 | elif status == httplib.FORBIDDEN: 130 | raise ForbiddenError(msg) 131 | elif status == httplib.NOT_FOUND: 132 | raise NotFoundError(msg) 133 | elif status == httplib.REQUEST_TIMEOUT: 134 | raise TimeoutError(msg) 135 | elif status == httplib.REQUESTED_RANGE_NOT_SATISFIABLE: 136 | raise InvalidRange(msg) 137 | elif (status == httplib.OK and 308 in expected and 138 | httplib.OK not in expected): 139 | raise FileClosedError(msg) 140 | elif status >= 500: 141 | raise ServerError(msg) 142 | else: 143 | raise FatalError(msg) 144 | -------------------------------------------------------------------------------- /cloudstorage/rest_api.py: -------------------------------------------------------------------------------- 1 | # Copyright 2012 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | 15 | """Base and helper classes for Google RESTful APIs.""" 16 | 17 | 18 | 19 | 20 | 21 | __all__ = ['add_sync_methods'] 22 | 23 | import random 24 | import time 25 | 26 | from . import api_utils 27 | 28 | try: 29 | from google.appengine.api import app_identity 30 | from google.appengine.ext import ndb 31 | except ImportError: 32 | from google.appengine.api import app_identity 33 | from google.appengine.ext import ndb 34 | 35 | 36 | 37 | def _make_sync_method(name): 38 | """Helper to synthesize a synchronous method from an async method name. 39 | 40 | Used by the @add_sync_methods class decorator below. 41 | 42 | Args: 43 | name: The name of the synchronous method. 44 | 45 | Returns: 46 | A method (with first argument 'self') that retrieves and calls 47 | self., passing its own arguments, expects it to return a 48 | Future, and then waits for and returns that Future's result. 49 | """ 50 | 51 | def sync_wrapper(self, *args, **kwds): 52 | method = getattr(self, name) 53 | future = method(*args, **kwds) 54 | return future.get_result() 55 | 56 | return sync_wrapper 57 | 58 | 59 | def add_sync_methods(cls): 60 | """Class decorator to add synchronous methods corresponding to async methods. 61 | 62 | This modifies the class in place, adding additional methods to it. 63 | If a synchronous method of a given name already exists it is not 64 | replaced. 65 | 66 | Args: 67 | cls: A class. 68 | 69 | Returns: 70 | The same class, modified in place. 71 | """ 72 | for name in cls.__dict__.keys(): 73 | if name.endswith('_async'): 74 | sync_name = name[:-6] 75 | if not hasattr(cls, sync_name): 76 | setattr(cls, sync_name, _make_sync_method(name)) 77 | return cls 78 | 79 | 80 | class _AE_TokenStorage_(ndb.Model): 81 | """Entity to store app_identity tokens in memcache.""" 82 | 83 | token = ndb.StringProperty() 84 | expires = ndb.FloatProperty() 85 | 86 | 87 | @ndb.tasklet 88 | def _make_token_async(scopes, service_account_id): 89 | """Get a fresh authentication token. 90 | 91 | Args: 92 | scopes: A list of scopes. 93 | service_account_id: Internal-use only. 94 | 95 | Raises: 96 | An ndb.Return with a tuple (token, expiration_time) where expiration_time is 97 | seconds since the epoch. 98 | """ 99 | rpc = app_identity.create_rpc() 100 | app_identity.make_get_access_token_call(rpc, scopes, service_account_id) 101 | token, expires_at = yield rpc 102 | raise ndb.Return((token, expires_at)) 103 | 104 | 105 | class _RestApi(object): 106 | """Base class for REST-based API wrapper classes. 107 | 108 | This class manages authentication tokens and request retries. All 109 | APIs are available as synchronous and async methods; synchronous 110 | methods are synthesized from async ones by the add_sync_methods() 111 | function in this module. 112 | 113 | WARNING: Do NOT directly use this api. It's an implementation detail 114 | and is subject to change at any release. 115 | """ 116 | 117 | def __init__(self, scopes, service_account_id=None, token_maker=None, 118 | retry_params=None): 119 | """Constructor. 120 | 121 | Args: 122 | scopes: A scope or a list of scopes. 123 | service_account_id: Internal use only. 124 | token_maker: An asynchronous function of the form 125 | (scopes, service_account_id) -> (token, expires). 126 | retry_params: An instance of api_utils.RetryParams. If None, the 127 | default for current thread will be used. 128 | """ 129 | 130 | if isinstance(scopes, basestring): 131 | scopes = [scopes] 132 | self.scopes = scopes 133 | self.service_account_id = service_account_id 134 | self.make_token_async = token_maker or _make_token_async 135 | if not retry_params: 136 | retry_params = api_utils._get_default_retry_params() 137 | self.retry_params = retry_params 138 | self.user_agent = {'User-Agent': retry_params._user_agent} 139 | self.expiration_headroom = random.randint(60, 240) 140 | 141 | def __getstate__(self): 142 | """Store state as part of serialization/pickling.""" 143 | return {'scopes': self.scopes, 144 | 'id': self.service_account_id, 145 | 'a_maker': (None if self.make_token_async == _make_token_async 146 | else self.make_token_async), 147 | 'retry_params': self.retry_params, 148 | 'expiration_headroom': self.expiration_headroom} 149 | 150 | def __setstate__(self, state): 151 | """Restore state as part of deserialization/unpickling.""" 152 | self.__init__(state['scopes'], 153 | service_account_id=state['id'], 154 | token_maker=state['a_maker'], 155 | retry_params=state['retry_params']) 156 | self.expiration_headroom = state['expiration_headroom'] 157 | 158 | @ndb.tasklet 159 | def do_request_async(self, url, method='GET', headers=None, payload=None, 160 | deadline=None, callback=None): 161 | """Issue one HTTP request. 162 | 163 | It performs async retries using tasklets. 164 | 165 | Args: 166 | url: the url to fetch. 167 | method: the method in which to fetch. 168 | headers: the http headers. 169 | payload: the data to submit in the fetch. 170 | deadline: the deadline in which to make the call. 171 | callback: the call to make once completed. 172 | 173 | Yields: 174 | The async fetch of the url. 175 | """ 176 | retry_wrapper = api_utils._RetryWrapper( 177 | self.retry_params, 178 | retriable_exceptions=api_utils._RETRIABLE_EXCEPTIONS, 179 | should_retry=api_utils._should_retry) 180 | resp = yield retry_wrapper.run( 181 | self.urlfetch_async, 182 | url=url, 183 | method=method, 184 | headers=headers, 185 | payload=payload, 186 | deadline=deadline, 187 | callback=callback, 188 | follow_redirects=False) 189 | raise ndb.Return((resp.status_code, resp.headers, resp.content)) 190 | 191 | @ndb.tasklet 192 | def get_token_async(self, refresh=False): 193 | """Get an authentication token. 194 | 195 | The token is cached in memcache, keyed by the scopes argument. 196 | Uses a random token expiration headroom value generated in the constructor 197 | to eliminate a burst of GET_ACCESS_TOKEN API requests. 198 | 199 | Args: 200 | refresh: If True, ignore a cached token; default False. 201 | 202 | Yields: 203 | An authentication token. This token is guaranteed to be non-expired. 204 | """ 205 | key = '%s,%s' % (self.service_account_id, ','.join(self.scopes)) 206 | ts = yield _AE_TokenStorage_.get_by_id_async( 207 | key, use_cache=True, use_memcache=True, 208 | use_datastore=self.retry_params.save_access_token) 209 | if refresh or ts is None or ts.expires < ( 210 | time.time() + self.expiration_headroom): 211 | token, expires_at = yield self.make_token_async( 212 | self.scopes, self.service_account_id) 213 | timeout = int(expires_at - time.time()) 214 | ts = _AE_TokenStorage_(id=key, token=token, expires=expires_at) 215 | if timeout > 0: 216 | yield ts.put_async(memcache_timeout=timeout, 217 | use_datastore=self.retry_params.save_access_token, 218 | use_cache=True, use_memcache=True) 219 | raise ndb.Return(ts.token) 220 | 221 | @ndb.tasklet 222 | def urlfetch_async(self, url, method='GET', headers=None, 223 | payload=None, deadline=None, callback=None, 224 | follow_redirects=False): 225 | """Make an async urlfetch() call. 226 | 227 | This is an async wrapper around urlfetch(). It adds an authentication 228 | header. 229 | 230 | Args: 231 | url: the url to fetch. 232 | method: the method in which to fetch. 233 | headers: the http headers. 234 | payload: the data to submit in the fetch. 235 | deadline: the deadline in which to make the call. 236 | callback: the call to make once completed. 237 | follow_redirects: whether or not to follow redirects. 238 | 239 | Yields: 240 | This returns a Future despite not being decorated with @ndb.tasklet! 241 | """ 242 | headers = {} if headers is None else dict(headers) 243 | headers.update(self.user_agent) 244 | self.token = yield self.get_token_async() 245 | if self.token: 246 | headers['authorization'] = 'OAuth ' + self.token 247 | 248 | deadline = deadline or self.retry_params.urlfetch_timeout 249 | 250 | ctx = ndb.get_context() 251 | resp = yield ctx.urlfetch( 252 | url, payload=payload, method=method, 253 | headers=headers, follow_redirects=follow_redirects, 254 | deadline=deadline, callback=callback) 255 | raise ndb.Return(resp) 256 | 257 | 258 | _RestApi = add_sync_methods(_RestApi) 259 | -------------------------------------------------------------------------------- /cloudstorage/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2013 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific 13 | # language governing permissions and limitations under the License. 14 | 15 | """Utils for testing.""" 16 | 17 | 18 | class MockUrlFetchResult(object): 19 | 20 | def __init__(self, status, headers, body): 21 | self.status_code = status 22 | self.headers = headers 23 | self.content = body 24 | self.content_was_truncated = False 25 | self.final_url = None 26 | -------------------------------------------------------------------------------- /index.yaml: -------------------------------------------------------------------------------- 1 | indexes: 2 | 3 | # AUTOGENERATED 4 | 5 | # This index.yaml is automatically updated whenever the dev_appserver 6 | # detects that a new type of query is run. If you want to manage the 7 | # index.yaml file manually, remove the above marker line (the line 8 | # saying "# AUTOGENERATED"). If you want to manage some indexes 9 | # manually, move them above the marker line. The index.yaml file is 10 | # automatically uploaded to the admin console when you next deploy 11 | # your application using appcfg.py. 12 | 13 | - kind: BlobFiles 14 | properties: 15 | - name: gcs_filename 16 | - name: filename 17 | -------------------------------------------------------------------------------- /markdown/__main__.py: -------------------------------------------------------------------------------- 1 | """ 2 | COMMAND-LINE SPECIFIC STUFF 3 | ============================================================================= 4 | 5 | """ 6 | 7 | import markdown 8 | import sys 9 | import optparse 10 | import codecs 11 | try: 12 | import yaml 13 | except ImportError: #pragma: no cover 14 | import json as yaml 15 | 16 | import logging 17 | from logging import DEBUG, INFO, CRITICAL 18 | 19 | logger = logging.getLogger('MARKDOWN') 20 | 21 | def parse_options(args=None, values=None): 22 | """ 23 | Define and parse `optparse` options for command-line usage. 24 | """ 25 | usage = """%prog [options] [INPUTFILE] 26 | (STDIN is assumed if no INPUTFILE is given)""" 27 | desc = "A Python implementation of John Gruber's Markdown. " \ 28 | "https://pythonhosted.org/Markdown/" 29 | ver = "%%prog %s" % markdown.version 30 | 31 | parser = optparse.OptionParser(usage=usage, description=desc, version=ver) 32 | parser.add_option("-f", "--file", dest="filename", default=None, 33 | help="Write output to OUTPUT_FILE. Defaults to STDOUT.", 34 | metavar="OUTPUT_FILE") 35 | parser.add_option("-e", "--encoding", dest="encoding", 36 | help="Encoding for input and output files.",) 37 | parser.add_option("-s", "--safe", dest="safe", default=False, 38 | metavar="SAFE_MODE", 39 | help="Deprecated! 'replace', 'remove' or 'escape' HTML tags in input") 40 | parser.add_option("-o", "--output_format", dest="output_format", 41 | default='xhtml1', metavar="OUTPUT_FORMAT", 42 | help="'xhtml1' (default), 'html4' or 'html5'.") 43 | parser.add_option("-n", "--no_lazy_ol", dest="lazy_ol", 44 | action='store_false', default=True, 45 | help="Observe number of first item of ordered lists.") 46 | parser.add_option("-x", "--extension", action="append", dest="extensions", 47 | help = "Load extension EXTENSION.", metavar="EXTENSION") 48 | parser.add_option("-c", "--extension_configs", dest="configfile", default=None, 49 | help="Read extension configurations from CONFIG_FILE. " 50 | "CONFIG_FILE must be of JSON or YAML format. YAML format requires " 51 | "that a python YAML library be installed. The parsed JSON or YAML " 52 | "must result in a python dictionary which would be accepted by the " 53 | "'extension_configs' keyword on the markdown.Markdown class. " 54 | "The extensions must also be loaded with the `--extension` option.", 55 | metavar="CONFIG_FILE") 56 | parser.add_option("-q", "--quiet", default = CRITICAL, 57 | action="store_const", const=CRITICAL+10, dest="verbose", 58 | help="Suppress all warnings.") 59 | parser.add_option("-v", "--verbose", 60 | action="store_const", const=INFO, dest="verbose", 61 | help="Print all warnings.") 62 | parser.add_option("--noisy", 63 | action="store_const", const=DEBUG, dest="verbose", 64 | help="Print debug messages.") 65 | 66 | (options, args) = parser.parse_args(args, values) 67 | 68 | if len(args) == 0: 69 | input_file = None 70 | else: 71 | input_file = args[0] 72 | 73 | if not options.extensions: 74 | options.extensions = [] 75 | 76 | extension_configs = {} 77 | if options.configfile: 78 | with codecs.open(options.configfile, mode="r", encoding=options.encoding) as fp: 79 | try: 80 | extension_configs = yaml.load(fp) 81 | except Exception as e: 82 | message = "Failed parsing extension config file: %s" % options.configfile 83 | e.args = (message,) + e.args[1:] 84 | raise 85 | 86 | return {'input': input_file, 87 | 'output': options.filename, 88 | 'safe_mode': options.safe, 89 | 'extensions': options.extensions, 90 | 'extension_configs': extension_configs, 91 | 'encoding': options.encoding, 92 | 'output_format': options.output_format, 93 | 'lazy_ol': options.lazy_ol}, options.verbose 94 | 95 | def run(): #pragma: no cover 96 | """Run Markdown from the command line.""" 97 | 98 | # Parse options and adjust logging level if necessary 99 | options, logging_level = parse_options() 100 | if not options: sys.exit(2) 101 | logger.setLevel(logging_level) 102 | logger.addHandler(logging.StreamHandler()) 103 | 104 | # Run 105 | markdown.markdownFromFile(**options) 106 | 107 | if __name__ == '__main__': #pragma: no cover 108 | # Support running module as a commandline command. 109 | # Python 2.5 & 2.6 do: `python -m markdown.__main__ [options] [args]`. 110 | # Python 2.7 & 3.x do: `python -m markdown [options] [args]`. 111 | run() 112 | -------------------------------------------------------------------------------- /markdown/__version__.py: -------------------------------------------------------------------------------- 1 | # 2 | # markdown/__version__.py 3 | # 4 | # version_info should conform to PEP 386 5 | # (major, minor, micro, alpha/beta/rc/final, #) 6 | # (1, 1, 2, 'alpha', 0) => "1.1.2.dev" 7 | # (1, 2, 0, 'beta', 2) => "1.2b2" 8 | version_info = (2, 5, 1, 'final', 0) 9 | 10 | def _get_version(): 11 | " Returns a PEP 386-compliant version number from version_info. " 12 | assert len(version_info) == 5 13 | assert version_info[3] in ('alpha', 'beta', 'rc', 'final') 14 | 15 | parts = 2 if version_info[2] == 0 else 3 16 | main = '.'.join(map(str, version_info[:parts])) 17 | 18 | sub = '' 19 | if version_info[3] == 'alpha' and version_info[4] == 0: 20 | # maybe append some sort of git info here?? 21 | sub = '.dev' 22 | elif version_info[3] != 'final': 23 | mapping = {'alpha': 'a', 'beta': 'b', 'rc': 'c'} 24 | sub = mapping[version_info[3]] + str(version_info[4]) 25 | 26 | return str(main + sub) 27 | 28 | version = _get_version() 29 | -------------------------------------------------------------------------------- /markdown/blockparser.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from __future__ import absolute_import 3 | from . import util 4 | from . import odict 5 | 6 | class State(list): 7 | """ Track the current and nested state of the parser. 8 | 9 | This utility class is used to track the state of the BlockParser and 10 | support multiple levels if nesting. It's just a simple API wrapped around 11 | a list. Each time a state is set, that state is appended to the end of the 12 | list. Each time a state is reset, that state is removed from the end of 13 | the list. 14 | 15 | Therefore, each time a state is set for a nested block, that state must be 16 | reset when we back out of that level of nesting or the state could be 17 | corrupted. 18 | 19 | While all the methods of a list object are available, only the three 20 | defined below need be used. 21 | 22 | """ 23 | 24 | def set(self, state): 25 | """ Set a new state. """ 26 | self.append(state) 27 | 28 | def reset(self): 29 | """ Step back one step in nested state. """ 30 | self.pop() 31 | 32 | def isstate(self, state): 33 | """ Test that top (current) level is of given state. """ 34 | if len(self): 35 | return self[-1] == state 36 | else: 37 | return False 38 | 39 | class BlockParser: 40 | """ Parse Markdown blocks into an ElementTree object. 41 | 42 | A wrapper class that stitches the various BlockProcessors together, 43 | looping through them and creating an ElementTree object. 44 | """ 45 | 46 | def __init__(self, markdown): 47 | self.blockprocessors = odict.OrderedDict() 48 | self.state = State() 49 | self.markdown = markdown 50 | 51 | def parseDocument(self, lines): 52 | """ Parse a markdown document into an ElementTree. 53 | 54 | Given a list of lines, an ElementTree object (not just a parent Element) 55 | is created and the root element is passed to the parser as the parent. 56 | The ElementTree object is returned. 57 | 58 | This should only be called on an entire document, not pieces. 59 | 60 | """ 61 | # Create a ElementTree from the lines 62 | self.root = util.etree.Element(self.markdown.doc_tag) 63 | self.parseChunk(self.root, '\n'.join(lines)) 64 | return util.etree.ElementTree(self.root) 65 | 66 | def parseChunk(self, parent, text): 67 | """ Parse a chunk of markdown text and attach to given etree node. 68 | 69 | While the ``text`` argument is generally assumed to contain multiple 70 | blocks which will be split on blank lines, it could contain only one 71 | block. Generally, this method would be called by extensions when 72 | block parsing is required. 73 | 74 | The ``parent`` etree Element passed in is altered in place. 75 | Nothing is returned. 76 | 77 | """ 78 | self.parseBlocks(parent, text.split('\n\n')) 79 | 80 | def parseBlocks(self, parent, blocks): 81 | """ Process blocks of markdown text and attach to given etree node. 82 | 83 | Given a list of ``blocks``, each blockprocessor is stepped through 84 | until there are no blocks left. While an extension could potentially 85 | call this method directly, it's generally expected to be used internally. 86 | 87 | This is a public method as an extension may need to add/alter additional 88 | BlockProcessors which call this method to recursively parse a nested 89 | block. 90 | 91 | """ 92 | while blocks: 93 | for processor in self.blockprocessors.values(): 94 | if processor.test(parent, blocks[0]): 95 | if processor.run(parent, blocks) is not False: 96 | # run returns True or None 97 | break 98 | 99 | 100 | -------------------------------------------------------------------------------- /markdown/extensions/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Extensions 3 | ----------------------------------------------------------------------------- 4 | """ 5 | 6 | from __future__ import unicode_literals 7 | from ..util import parseBoolValue 8 | import warnings 9 | 10 | class Extension(object): 11 | """ Base class for extensions to subclass. """ 12 | 13 | # Default config -- to be overriden by a subclass 14 | # Must be of the following format: 15 | # { 16 | # 'key': ['value', 'description'] 17 | # } 18 | # Note that Extension.setConfig will raise a KeyError 19 | # if a default is not set here. 20 | config = {} 21 | 22 | def __init__(self, *args, **kwargs): 23 | """ Initiate Extension and set up configs. """ 24 | 25 | # check for configs arg for backward compat. 26 | # (there only ever used to be one so we use arg[0]) 27 | if len(args): 28 | self.setConfigs(args[0]) 29 | warnings.warn('Extension classes accepting positional args is pending Deprecation. ' 30 | 'Each setting should be passed into the Class as a keyword. Positional ' 31 | 'args will be deprecated in version 2.6 and raise an error in version ' 32 | '2.7. See the Release Notes for Python-Markdown version 2.5 for more info.', 33 | PendingDeprecationWarning) 34 | # check for configs kwarg for backward compat. 35 | if 'configs' in kwargs.keys(): 36 | self.setConfigs(kwargs.pop('configs', {})) 37 | warnings.warn('Extension classes accepting a dict on the single keyword "config" is ' 38 | 'pending Deprecation. Each setting should be passed into the Class as ' 39 | 'a keyword directly. The "config" keyword will be deprecated in version ' 40 | '2.6 and raise an error in version 2.7. See the Release Notes for ' 41 | 'Python-Markdown version 2.5 for more info.', 42 | PendingDeprecationWarning) 43 | # finally, use kwargs 44 | self.setConfigs(kwargs) 45 | 46 | 47 | def getConfig(self, key, default=''): 48 | """ Return a setting for the given key or an empty string. """ 49 | if key in self.config: 50 | return self.config[key][0] 51 | else: 52 | return default 53 | 54 | def getConfigs(self): 55 | """ Return all configs settings as a dict. """ 56 | return dict([(key, self.getConfig(key)) for key in self.config.keys()]) 57 | 58 | def getConfigInfo(self): 59 | """ Return all config descriptions as a list of tuples. """ 60 | return [(key, self.config[key][1]) for key in self.config.keys()] 61 | 62 | def setConfig(self, key, value): 63 | """ Set a config setting for `key` with the given `value`. """ 64 | if isinstance(self.config[key][0], bool): 65 | value = parseBoolValue(value) 66 | if self.config[key][0] is None: 67 | value = parseBoolValue(value, preserve_none=True) 68 | self.config[key][0] = value 69 | 70 | def setConfigs(self, items): 71 | """ Set multiple config settings given a dict or list of tuples. """ 72 | if hasattr(items, 'items'): 73 | # it's a dict 74 | items = items.items() 75 | for key, value in items: 76 | self.setConfig(key, value) 77 | 78 | def extendMarkdown(self, md, md_globals): 79 | """ 80 | Add the various proccesors and patterns to the Markdown Instance. 81 | 82 | This method must be overriden by every extension. 83 | 84 | Keyword arguments: 85 | 86 | * md: The Markdown instance. 87 | 88 | * md_globals: Global variables in the markdown module namespace. 89 | 90 | """ 91 | raise NotImplementedError('Extension "%s.%s" must define an "extendMarkdown"' \ 92 | 'method.' % (self.__class__.__module__, self.__class__.__name__)) 93 | 94 | -------------------------------------------------------------------------------- /markdown/extensions/abbr.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Abbreviation Extension for Python-Markdown 3 | ========================================== 4 | 5 | This extension adds abbreviation handling to Python-Markdown. 6 | 7 | See 8 | for documentation. 9 | 10 | Oringinal code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/) and 11 | [Seemant Kulleen](http://www.kulleen.org/) 12 | 13 | All changes Copyright 2008-2014 The Python Markdown Project 14 | 15 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 16 | 17 | ''' 18 | 19 | from __future__ import absolute_import 20 | from __future__ import unicode_literals 21 | from . import Extension 22 | from ..preprocessors import Preprocessor 23 | from ..inlinepatterns import Pattern 24 | from ..util import etree, AtomicString 25 | import re 26 | 27 | # Global Vars 28 | ABBR_REF_RE = re.compile(r'[*]\[(?P[^\]]*)\][ ]?:\s*(?P.*)') 29 | 30 | class AbbrExtension(Extension): 31 | """ Abbreviation Extension for Python-Markdown. """ 32 | 33 | def extendMarkdown(self, md, md_globals): 34 | """ Insert AbbrPreprocessor before ReferencePreprocessor. """ 35 | md.preprocessors.add('abbr', AbbrPreprocessor(md), '<reference') 36 | 37 | 38 | class AbbrPreprocessor(Preprocessor): 39 | """ Abbreviation Preprocessor - parse text for abbr references. """ 40 | 41 | def run(self, lines): 42 | ''' 43 | Find and remove all Abbreviation references from the text. 44 | Each reference is set as a new AbbrPattern in the markdown instance. 45 | 46 | ''' 47 | new_text = [] 48 | for line in lines: 49 | m = ABBR_REF_RE.match(line) 50 | if m: 51 | abbr = m.group('abbr').strip() 52 | title = m.group('title').strip() 53 | self.markdown.inlinePatterns['abbr-%s'%abbr] = \ 54 | AbbrPattern(self._generate_pattern(abbr), title) 55 | else: 56 | new_text.append(line) 57 | return new_text 58 | 59 | def _generate_pattern(self, text): 60 | ''' 61 | Given a string, returns an regex pattern to match that string. 62 | 63 | 'HTML' -> r'(?P<abbr>[H][T][M][L])' 64 | 65 | Note: we force each char as a literal match (in brackets) as we don't 66 | know what they will be beforehand. 67 | 68 | ''' 69 | chars = list(text) 70 | for i in range(len(chars)): 71 | chars[i] = r'[%s]' % chars[i] 72 | return r'(?P<abbr>\b%s\b)' % (r''.join(chars)) 73 | 74 | 75 | class AbbrPattern(Pattern): 76 | """ Abbreviation inline pattern. """ 77 | 78 | def __init__(self, pattern, title): 79 | super(AbbrPattern, self).__init__(pattern) 80 | self.title = title 81 | 82 | def handleMatch(self, m): 83 | abbr = etree.Element('abbr') 84 | abbr.text = AtomicString(m.group('abbr')) 85 | abbr.set('title', self.title) 86 | return abbr 87 | 88 | def makeExtension(*args, **kwargs): 89 | return AbbrExtension(*args, **kwargs) 90 | -------------------------------------------------------------------------------- /markdown/extensions/admonition.py: -------------------------------------------------------------------------------- 1 | """ 2 | Admonition extension for Python-Markdown 3 | ======================================== 4 | 5 | Adds rST-style admonitions. Inspired by [rST][] feature with the same name. 6 | 7 | [rST]: http://docutils.sourceforge.net/docs/ref/rst/directives.html#specific-admonitions 8 | 9 | See <https://pythonhosted.org/Markdown/extensions/admonition.html> 10 | for documentation. 11 | 12 | Original code Copyright [Tiago Serafim](http://www.tiagoserafim.com/). 13 | 14 | All changes Copyright The Python Markdown Project 15 | 16 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 17 | 18 | """ 19 | 20 | from __future__ import absolute_import 21 | from __future__ import unicode_literals 22 | from . import Extension 23 | from ..blockprocessors import BlockProcessor 24 | from ..util import etree 25 | import re 26 | 27 | 28 | class AdmonitionExtension(Extension): 29 | """ Admonition extension for Python-Markdown. """ 30 | 31 | def extendMarkdown(self, md, md_globals): 32 | """ Add Admonition to Markdown instance. """ 33 | md.registerExtension(self) 34 | 35 | md.parser.blockprocessors.add('admonition', 36 | AdmonitionProcessor(md.parser), 37 | '_begin') 38 | 39 | 40 | class AdmonitionProcessor(BlockProcessor): 41 | 42 | CLASSNAME = 'admonition' 43 | CLASSNAME_TITLE = 'admonition-title' 44 | RE = re.compile(r'(?:^|\n)!!!\ ?([\w\-]+)(?:\ "(.*?)")?') 45 | 46 | def test(self, parent, block): 47 | sibling = self.lastChild(parent) 48 | return self.RE.search(block) or \ 49 | (block.startswith(' ' * self.tab_length) and sibling and \ 50 | sibling.get('class', '').find(self.CLASSNAME) != -1) 51 | 52 | def run(self, parent, blocks): 53 | sibling = self.lastChild(parent) 54 | block = blocks.pop(0) 55 | m = self.RE.search(block) 56 | 57 | if m: 58 | block = block[m.end() + 1:] # removes the first line 59 | 60 | block, theRest = self.detab(block) 61 | 62 | if m: 63 | klass, title = self.get_class_and_title(m) 64 | div = etree.SubElement(parent, 'div') 65 | div.set('class', '%s %s' % (self.CLASSNAME, klass)) 66 | if title: 67 | p = etree.SubElement(div, 'p') 68 | p.text = title 69 | p.set('class', self.CLASSNAME_TITLE) 70 | else: 71 | div = sibling 72 | 73 | self.parser.parseChunk(div, block) 74 | 75 | if theRest: 76 | # This block contained unindented line(s) after the first indented 77 | # line. Insert these lines as the first block of the master blocks 78 | # list for future processing. 79 | blocks.insert(0, theRest) 80 | 81 | def get_class_and_title(self, match): 82 | klass, title = match.group(1).lower(), match.group(2) 83 | if title is None: 84 | # no title was provided, use the capitalized classname as title 85 | # e.g.: `!!! note` will render `<p class="admonition-title">Note</p>` 86 | title = klass.capitalize() 87 | elif title == '': 88 | # an explicit blank title should not be rendered 89 | # e.g.: `!!! warning ""` will *not* render `p` with a title 90 | title = None 91 | return klass, title 92 | 93 | 94 | def makeExtension(*args, **kwargs): 95 | return AdmonitionExtension(*args, **kwargs) 96 | 97 | -------------------------------------------------------------------------------- /markdown/extensions/attr_list.py: -------------------------------------------------------------------------------- 1 | """ 2 | Attribute List Extension for Python-Markdown 3 | ============================================ 4 | 5 | Adds attribute list syntax. Inspired by 6 | [maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s 7 | feature of the same name. 8 | 9 | See <https://pythonhosted.org/Markdown/extensions/attr_list.html> 10 | for documentation. 11 | 12 | Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/). 13 | 14 | All changes Copyright 2011-2014 The Python Markdown Project 15 | 16 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 17 | 18 | """ 19 | 20 | from __future__ import absolute_import 21 | from __future__ import unicode_literals 22 | from . import Extension 23 | from ..treeprocessors import Treeprocessor 24 | from ..util import isBlockLevel 25 | import re 26 | 27 | try: 28 | Scanner = re.Scanner 29 | except AttributeError: #pragma: no cover 30 | # must be on Python 2.4 31 | from sre import Scanner 32 | 33 | def _handle_double_quote(s, t): 34 | k, v = t.split('=') 35 | return k, v.strip('"') 36 | 37 | def _handle_single_quote(s, t): 38 | k, v = t.split('=') 39 | return k, v.strip("'") 40 | 41 | def _handle_key_value(s, t): 42 | return t.split('=') 43 | 44 | def _handle_word(s, t): 45 | if t.startswith('.'): 46 | return '.', t[1:] 47 | if t.startswith('#'): 48 | return 'id', t[1:] 49 | return t, t 50 | 51 | _scanner = Scanner([ 52 | (r'[^ ]+=".*?"', _handle_double_quote), 53 | (r"[^ ]+='.*?'", _handle_single_quote), 54 | (r'[^ ]+=[^ ]*', _handle_key_value), 55 | (r'[^ ]+', _handle_word), 56 | (r' ', None) 57 | ]) 58 | 59 | def get_attrs(str): 60 | """ Parse attribute list and return a list of attribute tuples. """ 61 | return _scanner.scan(str)[0] 62 | 63 | def isheader(elem): 64 | return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] 65 | 66 | class AttrListTreeprocessor(Treeprocessor): 67 | 68 | BASE_RE = r'\{\:?([^\}]*)\}' 69 | HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE) 70 | BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE) 71 | INLINE_RE = re.compile(r'^%s' % BASE_RE) 72 | NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d' 73 | r'\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef' 74 | r'\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd' 75 | r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') 76 | 77 | def run(self, doc): 78 | for elem in doc.getiterator(): 79 | if isBlockLevel(elem.tag): 80 | # Block level: check for attrs on last line of text 81 | RE = self.BLOCK_RE 82 | if isheader(elem) or elem.tag == 'dt': 83 | # header or def-term: check for attrs at end of line 84 | RE = self.HEADER_RE 85 | if len(elem) and elem.tag == 'li': 86 | # special case list items. children may include a ul or ol. 87 | pos = None 88 | # find the ul or ol position 89 | for i, child in enumerate(elem): 90 | if child.tag in ['ul', 'ol']: 91 | pos = i 92 | break 93 | if pos is None and elem[-1].tail: 94 | # use tail of last child. no ul or ol. 95 | m = RE.search(elem[-1].tail) 96 | if m: 97 | self.assign_attrs(elem, m.group(1)) 98 | elem[-1].tail = elem[-1].tail[:m.start()] 99 | elif pos is not None and pos > 0 and elem[pos-1].tail: 100 | # use tail of last child before ul or ol 101 | m = RE.search(elem[pos-1].tail) 102 | if m: 103 | self.assign_attrs(elem, m.group(1)) 104 | elem[pos-1].tail = elem[pos-1].tail[:m.start()] 105 | elif elem.text: 106 | # use text. ul is first child. 107 | m = RE.search(elem.text) 108 | if m: 109 | self.assign_attrs(elem, m.group(1)) 110 | elem.text = elem.text[:m.start()] 111 | elif len(elem) and elem[-1].tail: 112 | # has children. Get from tail of last child 113 | m = RE.search(elem[-1].tail) 114 | if m: 115 | self.assign_attrs(elem, m.group(1)) 116 | elem[-1].tail = elem[-1].tail[:m.start()] 117 | if isheader(elem): 118 | # clean up trailing #s 119 | elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() 120 | elif elem.text: 121 | # no children. Get from text. 122 | m = RE.search(elem.text) 123 | if not m and elem.tag == 'td': 124 | m = re.search(self.BASE_RE, elem.text) 125 | if m: 126 | self.assign_attrs(elem, m.group(1)) 127 | elem.text = elem.text[:m.start()] 128 | if isheader(elem): 129 | # clean up trailing #s 130 | elem.text = elem.text.rstrip('#').rstrip() 131 | else: 132 | # inline: check for attrs at start of tail 133 | if elem.tail: 134 | m = self.INLINE_RE.match(elem.tail) 135 | if m: 136 | self.assign_attrs(elem, m.group(1)) 137 | elem.tail = elem.tail[m.end():] 138 | 139 | def assign_attrs(self, elem, attrs): 140 | """ Assign attrs to element. """ 141 | for k, v in get_attrs(attrs): 142 | if k == '.': 143 | # add to class 144 | cls = elem.get('class') 145 | if cls: 146 | elem.set('class', '%s %s' % (cls, v)) 147 | else: 148 | elem.set('class', v) 149 | else: 150 | # assign attr k with v 151 | elem.set(self.sanitize_name(k), v) 152 | 153 | def sanitize_name(self, name): 154 | """ 155 | Sanitize name as 'an XML Name, minus the ":"'. 156 | See http://www.w3.org/TR/REC-xml-names/#NT-NCName 157 | """ 158 | return self.NAME_RE.sub('_', name) 159 | 160 | 161 | class AttrListExtension(Extension): 162 | def extendMarkdown(self, md, md_globals): 163 | md.treeprocessors.add('attr_list', AttrListTreeprocessor(md), '>prettify') 164 | 165 | 166 | def makeExtension(*args, **kwargs): 167 | return AttrListExtension(*args, **kwargs) 168 | -------------------------------------------------------------------------------- /markdown/extensions/codehilite.py: -------------------------------------------------------------------------------- 1 | """ 2 | CodeHilite Extension for Python-Markdown 3 | ======================================== 4 | 5 | Adds code/syntax highlighting to standard Python-Markdown code blocks. 6 | 7 | See <https://pythonhosted.org/Markdown/extensions/code_hilite.html> 8 | for documentation. 9 | 10 | Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/). 11 | 12 | All changes Copyright 2008-2014 The Python Markdown Project 13 | 14 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 15 | 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import unicode_literals 20 | from . import Extension 21 | from ..treeprocessors import Treeprocessor 22 | import warnings 23 | try: 24 | from pygments import highlight 25 | from pygments.lexers import get_lexer_by_name, guess_lexer, TextLexer 26 | from pygments.formatters import HtmlFormatter 27 | pygments = True 28 | except ImportError: 29 | pygments = False 30 | 31 | 32 | def parse_hl_lines(expr): 33 | """Support our syntax for emphasizing certain lines of code. 34 | 35 | expr should be like '1 2' to emphasize lines 1 and 2 of a code block. 36 | Returns a list of ints, the line numbers to emphasize. 37 | """ 38 | if not expr: 39 | return [] 40 | 41 | try: 42 | return list(map(int, expr.split())) 43 | except ValueError: 44 | return [] 45 | 46 | 47 | # ------------------ The Main CodeHilite Class ---------------------- 48 | class CodeHilite(object): 49 | """ 50 | Determine language of source code, and pass it into the pygments hilighter. 51 | 52 | Basic Usage: 53 | >>> code = CodeHilite(src = 'some text') 54 | >>> html = code.hilite() 55 | 56 | * src: Source string or any object with a .readline attribute. 57 | 58 | * linenums: (Boolean) Set line numbering to 'on' (True), 'off' (False) or 'auto'(None). 59 | Set to 'auto' by default. 60 | 61 | * guess_lang: (Boolean) Turn language auto-detection 'on' or 'off' (on by default). 62 | 63 | * css_class: Set class name of wrapper div ('codehilite' by default). 64 | 65 | * hl_lines: (List of integers) Lines to emphasize, 1-indexed. 66 | 67 | Low Level Usage: 68 | >>> code = CodeHilite() 69 | >>> code.src = 'some text' # String or anything with a .readline attr. 70 | >>> code.linenos = True # True or False; Turns line numbering on or of. 71 | >>> html = code.hilite() 72 | 73 | """ 74 | 75 | def __init__(self, src=None, linenums=None, guess_lang=True, 76 | css_class="codehilite", lang=None, style='default', 77 | noclasses=False, tab_length=4, hl_lines=None): 78 | self.src = src 79 | self.lang = lang 80 | self.linenums = linenums 81 | self.guess_lang = guess_lang 82 | self.css_class = css_class 83 | self.style = style 84 | self.noclasses = noclasses 85 | self.tab_length = tab_length 86 | self.hl_lines = hl_lines or [] 87 | 88 | def hilite(self): 89 | """ 90 | Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with 91 | optional line numbers. The output should then be styled with css to 92 | your liking. No styles are applied by default - only styling hooks 93 | (i.e.: <span class="k">). 94 | 95 | returns : A string of html. 96 | 97 | """ 98 | 99 | self.src = self.src.strip('\n') 100 | 101 | if self.lang is None: 102 | self._parseHeader() 103 | 104 | if pygments: 105 | try: 106 | lexer = get_lexer_by_name(self.lang) 107 | except ValueError: 108 | try: 109 | if self.guess_lang: 110 | lexer = guess_lexer(self.src) 111 | else: 112 | lexer = TextLexer() 113 | except ValueError: 114 | lexer = TextLexer() 115 | formatter = HtmlFormatter(linenos=self.linenums, 116 | cssclass=self.css_class, 117 | style=self.style, 118 | noclasses=self.noclasses, 119 | hl_lines=self.hl_lines) 120 | return highlight(self.src, lexer, formatter) 121 | else: 122 | # just escape and build markup usable by JS highlighting libs 123 | txt = self.src.replace('&', '&') 124 | txt = txt.replace('<', '<') 125 | txt = txt.replace('>', '>') 126 | txt = txt.replace('"', '"') 127 | classes = [] 128 | if self.lang: 129 | classes.append('language-%s' % self.lang) 130 | if self.linenums: 131 | classes.append('linenums') 132 | class_str = '' 133 | if classes: 134 | class_str = ' class="%s"' % ' '.join(classes) 135 | return '<pre class="%s"><code%s>%s</code></pre>\n'% \ 136 | (self.css_class, class_str, txt) 137 | 138 | def _parseHeader(self): 139 | """ 140 | Determines language of a code block from shebang line and whether said 141 | line should be removed or left in place. If the sheband line contains a 142 | path (even a single /) then it is assumed to be a real shebang line and 143 | left alone. However, if no path is given (e.i.: #!python or :::python) 144 | then it is assumed to be a mock shebang for language identifitation of a 145 | code fragment and removed from the code block prior to processing for 146 | code highlighting. When a mock shebang (e.i: #!python) is found, line 147 | numbering is turned on. When colons are found in place of a shebang 148 | (e.i.: :::python), line numbering is left in the current state - off 149 | by default. 150 | 151 | Also parses optional list of highlight lines, like: 152 | 153 | :::python hl_lines="1 3" 154 | """ 155 | 156 | import re 157 | 158 | #split text into lines 159 | lines = self.src.split("\n") 160 | #pull first line to examine 161 | fl = lines.pop(0) 162 | 163 | c = re.compile(r''' 164 | (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons 165 | (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path 166 | (?P<lang>[\w+-]*) # The language 167 | \s* # Arbitrary whitespace 168 | # Optional highlight lines, single- or double-quote-delimited 169 | (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))? 170 | ''', re.VERBOSE) 171 | # search first line for shebang 172 | m = c.search(fl) 173 | if m: 174 | # we have a match 175 | try: 176 | self.lang = m.group('lang').lower() 177 | except IndexError: 178 | self.lang = None 179 | if m.group('path'): 180 | # path exists - restore first line 181 | lines.insert(0, fl) 182 | if self.linenums is None and m.group('shebang'): 183 | # Overridable and Shebang exists - use line numbers 184 | self.linenums = True 185 | 186 | self.hl_lines = parse_hl_lines(m.group('hl_lines')) 187 | else: 188 | # No match 189 | lines.insert(0, fl) 190 | 191 | self.src = "\n".join(lines).strip("\n") 192 | 193 | 194 | 195 | # ------------------ The Markdown Extension ------------------------------- 196 | class HiliteTreeprocessor(Treeprocessor): 197 | """ Hilight source code in code blocks. """ 198 | 199 | def run(self, root): 200 | """ Find code blocks and store in htmlStash. """ 201 | blocks = root.getiterator('pre') 202 | for block in blocks: 203 | children = block.getchildren() 204 | if len(children) == 1 and children[0].tag == 'code': 205 | code = CodeHilite(children[0].text, 206 | linenums=self.config['linenums'], 207 | guess_lang=self.config['guess_lang'], 208 | css_class=self.config['css_class'], 209 | style=self.config['pygments_style'], 210 | noclasses=self.config['noclasses'], 211 | tab_length=self.markdown.tab_length) 212 | placeholder = self.markdown.htmlStash.store(code.hilite(), 213 | safe=True) 214 | # Clear codeblock in etree instance 215 | block.clear() 216 | # Change to p element which will later 217 | # be removed when inserting raw html 218 | block.tag = 'p' 219 | block.text = placeholder 220 | 221 | 222 | class CodeHiliteExtension(Extension): 223 | """ Add source code hilighting to markdown codeblocks. """ 224 | 225 | def __init__(self, *args, **kwargs): 226 | # define default configs 227 | self.config = { 228 | 'linenums': [None, "Use lines numbers. True=yes, False=no, None=auto"], 229 | 'force_linenos' : [False, "Depreciated! Use 'linenums' instead. Force line numbers - Default: False"], 230 | 'guess_lang' : [True, "Automatic language detection - Default: True"], 231 | 'css_class' : ["codehilite", 232 | "Set class name for wrapper <div> - Default: codehilite"], 233 | 'pygments_style' : ['default', 'Pygments HTML Formatter Style (Colorscheme) - Default: default'], 234 | 'noclasses': [False, 'Use inline styles instead of CSS classes - Default false'] 235 | } 236 | 237 | super(CodeHiliteExtension, self).__init__(*args, **kwargs) 238 | 239 | def extendMarkdown(self, md, md_globals): 240 | """ Add HilitePostprocessor to Markdown instance. """ 241 | hiliter = HiliteTreeprocessor(md) 242 | hiliter.config = self.getConfigs() 243 | md.treeprocessors.add("hilite", hiliter, "<inline") 244 | 245 | md.registerExtension(self) 246 | 247 | 248 | def makeExtension(*args, **kwargs): 249 | return CodeHiliteExtension(*args, **kwargs) 250 | 251 | -------------------------------------------------------------------------------- /markdown/extensions/def_list.py: -------------------------------------------------------------------------------- 1 | """ 2 | Definition List Extension for Python-Markdown 3 | ============================================= 4 | 5 | Adds parsing of Definition Lists to Python-Markdown. 6 | 7 | See <https://pythonhosted.org/Markdown/extensions/definition_lists.html> 8 | for documentation. 9 | 10 | Original code Copyright 2008 [Waylan Limberg](http://achinghead.com) 11 | 12 | All changes Copyright 2008-2014 The Python Markdown Project 13 | 14 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 15 | 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import unicode_literals 20 | from . import Extension 21 | from ..blockprocessors import BlockProcessor, ListIndentProcessor 22 | from ..util import etree 23 | import re 24 | 25 | 26 | class DefListProcessor(BlockProcessor): 27 | """ Process Definition Lists. """ 28 | 29 | RE = re.compile(r'(^|\n)[ ]{0,3}:[ ]{1,3}(.*?)(\n|$)') 30 | NO_INDENT_RE = re.compile(r'^[ ]{0,3}[^ :]') 31 | 32 | def test(self, parent, block): 33 | return bool(self.RE.search(block)) 34 | 35 | def run(self, parent, blocks): 36 | 37 | raw_block = blocks.pop(0) 38 | m = self.RE.search(raw_block) 39 | terms = [l.strip() for l in raw_block[:m.start()].split('\n') if l.strip()] 40 | block = raw_block[m.end():] 41 | no_indent = self.NO_INDENT_RE.match(block) 42 | if no_indent: 43 | d, theRest = (block, None) 44 | else: 45 | d, theRest = self.detab(block) 46 | if d: 47 | d = '%s\n%s' % (m.group(2), d) 48 | else: 49 | d = m.group(2) 50 | sibling = self.lastChild(parent) 51 | if not terms and sibling is None: 52 | # This is not a definition item. Most likely a paragraph that 53 | # starts with a colon at the begining of a document or list. 54 | blocks.insert(0, raw_block) 55 | return False 56 | if not terms and sibling.tag == 'p': 57 | # The previous paragraph contains the terms 58 | state = 'looselist' 59 | terms = sibling.text.split('\n') 60 | parent.remove(sibling) 61 | # Aquire new sibling 62 | sibling = self.lastChild(parent) 63 | else: 64 | state = 'list' 65 | 66 | if sibling and sibling.tag == 'dl': 67 | # This is another item on an existing list 68 | dl = sibling 69 | if not terms and len(dl) and dl[-1].tag == 'dd' and len(dl[-1]): 70 | state = 'looselist' 71 | else: 72 | # This is a new list 73 | dl = etree.SubElement(parent, 'dl') 74 | # Add terms 75 | for term in terms: 76 | dt = etree.SubElement(dl, 'dt') 77 | dt.text = term 78 | # Add definition 79 | self.parser.state.set(state) 80 | dd = etree.SubElement(dl, 'dd') 81 | self.parser.parseBlocks(dd, [d]) 82 | self.parser.state.reset() 83 | 84 | if theRest: 85 | blocks.insert(0, theRest) 86 | 87 | class DefListIndentProcessor(ListIndentProcessor): 88 | """ Process indented children of definition list items. """ 89 | 90 | ITEM_TYPES = ['dd'] 91 | LIST_TYPES = ['dl'] 92 | 93 | def create_item(self, parent, block): 94 | """ Create a new dd and parse the block with it as the parent. """ 95 | dd = etree.SubElement(parent, 'dd') 96 | self.parser.parseBlocks(dd, [block]) 97 | 98 | 99 | 100 | class DefListExtension(Extension): 101 | """ Add definition lists to Markdown. """ 102 | 103 | def extendMarkdown(self, md, md_globals): 104 | """ Add an instance of DefListProcessor to BlockParser. """ 105 | md.parser.blockprocessors.add('defindent', 106 | DefListIndentProcessor(md.parser), 107 | '>indent') 108 | md.parser.blockprocessors.add('deflist', 109 | DefListProcessor(md.parser), 110 | '>ulist') 111 | 112 | 113 | def makeExtension(*args, **kwargs): 114 | return DefListExtension(*args, **kwargs) 115 | 116 | -------------------------------------------------------------------------------- /markdown/extensions/extra.py: -------------------------------------------------------------------------------- 1 | """ 2 | Python-Markdown Extra Extension 3 | =============================== 4 | 5 | A compilation of various Python-Markdown extensions that imitates 6 | [PHP Markdown Extra](http://michelf.com/projects/php-markdown/extra/). 7 | 8 | Note that each of the individual extensions still need to be available 9 | on your PYTHONPATH. This extension simply wraps them all up as a 10 | convenience so that only one extension needs to be listed when 11 | initiating Markdown. See the documentation for each individual 12 | extension for specifics about that extension. 13 | 14 | There may be additional extensions that are distributed with 15 | Python-Markdown that are not included here in Extra. Those extensions 16 | are not part of PHP Markdown Extra, and therefore, not part of 17 | Python-Markdown Extra. If you really would like Extra to include 18 | additional extensions, we suggest creating your own clone of Extra 19 | under a differant name. You could also edit the `extensions` global 20 | variable defined below, but be aware that such changes may be lost 21 | when you upgrade to any future version of Python-Markdown. 22 | 23 | See <https://pythonhosted.org/Markdown/extensions/extra.html> 24 | for documentation. 25 | 26 | Copyright The Python Markdown Project 27 | 28 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 29 | 30 | """ 31 | 32 | from __future__ import absolute_import 33 | from __future__ import unicode_literals 34 | from . import Extension 35 | from ..blockprocessors import BlockProcessor 36 | from .. import util 37 | import re 38 | 39 | extensions = [ 40 | 'markdown.extensions.smart_strong', 41 | 'markdown.extensions.fenced_code', 42 | 'markdown.extensions.footnotes', 43 | 'markdown.extensions.attr_list', 44 | 'markdown.extensions.def_list', 45 | 'markdown.extensions.tables', 46 | 'markdown.extensions.abbr' 47 | ] 48 | 49 | 50 | class ExtraExtension(Extension): 51 | """ Add various extensions to Markdown class.""" 52 | 53 | def __init__(self, *args, **kwargs): 54 | """ config is just a dumb holder which gets passed to actual ext later. """ 55 | self.config = kwargs.pop('configs', {}) 56 | self.config.update(kwargs) 57 | 58 | def extendMarkdown(self, md, md_globals): 59 | """ Register extension instances. """ 60 | md.registerExtensions(extensions, self.config) 61 | if not md.safeMode: 62 | # Turn on processing of markdown text within raw html 63 | md.preprocessors['html_block'].markdown_in_raw = True 64 | md.parser.blockprocessors.add('markdown_block', 65 | MarkdownInHtmlProcessor(md.parser), 66 | '_begin') 67 | md.parser.blockprocessors.tag_counter = -1 68 | md.parser.blockprocessors.contain_span_tags = re.compile( 69 | r'^(p|h[1-6]|li|dd|dt|td|th|legend|address)$', re.IGNORECASE) 70 | 71 | 72 | def makeExtension(*args, **kwargs): 73 | return ExtraExtension(*args, **kwargs) 74 | 75 | 76 | class MarkdownInHtmlProcessor(BlockProcessor): 77 | """Process Markdown Inside HTML Blocks.""" 78 | def test(self, parent, block): 79 | return block == util.TAG_PLACEHOLDER % \ 80 | str(self.parser.blockprocessors.tag_counter + 1) 81 | 82 | def _process_nests(self, element, block): 83 | """Process the element's child elements in self.run.""" 84 | # Build list of indexes of each nest within the parent element. 85 | nest_index = [] # a list of tuples: (left index, right index) 86 | i = self.parser.blockprocessors.tag_counter + 1 87 | while len(self._tag_data) > i and self._tag_data[i]['left_index']: 88 | left_child_index = self._tag_data[i]['left_index'] 89 | right_child_index = self._tag_data[i]['right_index'] 90 | nest_index.append((left_child_index - 1, right_child_index)) 91 | i += 1 92 | 93 | # Create each nest subelement. 94 | for i, (left_index, right_index) in enumerate(nest_index[:-1]): 95 | self.run(element, block[left_index:right_index], 96 | block[right_index:nest_index[i + 1][0]], True) 97 | self.run(element, block[nest_index[-1][0]:nest_index[-1][1]], # last 98 | block[nest_index[-1][1]:], True) # nest 99 | 100 | def run(self, parent, blocks, tail=None, nest=False): 101 | self._tag_data = self.parser.markdown.htmlStash.tag_data 102 | 103 | self.parser.blockprocessors.tag_counter += 1 104 | tag = self._tag_data[self.parser.blockprocessors.tag_counter] 105 | 106 | # Create Element 107 | markdown_value = tag['attrs'].pop('markdown') 108 | element = util.etree.SubElement(parent, tag['tag'], tag['attrs']) 109 | 110 | # Slice Off Block 111 | if nest: 112 | self.parser.parseBlocks(parent, tail) # Process Tail 113 | block = blocks[1:] 114 | else: # includes nests since a third level of nesting isn't supported 115 | block = blocks[tag['left_index'] + 1: tag['right_index']] 116 | del blocks[:tag['right_index']] 117 | 118 | # Process Text 119 | if (self.parser.blockprocessors.contain_span_tags.match( # Span Mode 120 | tag['tag']) and markdown_value != 'block') or \ 121 | markdown_value == 'span': 122 | element.text = '\n'.join(block) 123 | else: # Block Mode 124 | i = self.parser.blockprocessors.tag_counter + 1 125 | if len(self._tag_data) > i and self._tag_data[i]['left_index']: 126 | first_subelement_index = self._tag_data[i]['left_index'] - 1 127 | self.parser.parseBlocks( 128 | element, block[:first_subelement_index]) 129 | if not nest: 130 | block = self._process_nests(element, block) 131 | else: 132 | self.parser.parseBlocks(element, block) 133 | -------------------------------------------------------------------------------- /markdown/extensions/fenced_code.py: -------------------------------------------------------------------------------- 1 | """ 2 | Fenced Code Extension for Python Markdown 3 | ========================================= 4 | 5 | This extension adds Fenced Code Blocks to Python-Markdown. 6 | 7 | See <https://pythonhosted.org/Markdown/extensions/fenced_code_blocks.html> 8 | for documentation. 9 | 10 | Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/). 11 | 12 | 13 | All changes Copyright 2008-2014 The Python Markdown Project 14 | 15 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import unicode_literals 20 | from . import Extension 21 | from ..preprocessors import Preprocessor 22 | from .codehilite import CodeHilite, CodeHiliteExtension, parse_hl_lines 23 | import re 24 | 25 | 26 | class FencedCodeExtension(Extension): 27 | 28 | def extendMarkdown(self, md, md_globals): 29 | """ Add FencedBlockPreprocessor to the Markdown instance. """ 30 | md.registerExtension(self) 31 | 32 | md.preprocessors.add('fenced_code_block', 33 | FencedBlockPreprocessor(md), 34 | ">normalize_whitespace") 35 | 36 | 37 | class FencedBlockPreprocessor(Preprocessor): 38 | FENCED_BLOCK_RE = re.compile(r''' 39 | (?P<fence>^(?:~{3,}|`{3,}))[ ]* # Opening ``` or ~~~ 40 | (\{?\.?(?P<lang>[a-zA-Z0-9_+-]*))?[ ]* # Optional {, and lang 41 | # Optional highlight lines, single- or double-quote-delimited 42 | (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?[ ]* 43 | }?[ ]*\n # Optional closing } 44 | (?P<code>.*?)(?<=\n) 45 | (?P=fence)[ ]*$''', re.MULTILINE | re.DOTALL | re.VERBOSE) 46 | CODE_WRAP = '<pre><code%s>%s</code></pre>' 47 | LANG_TAG = ' class="%s"' 48 | 49 | def __init__(self, md): 50 | super(FencedBlockPreprocessor, self).__init__(md) 51 | 52 | self.checked_for_codehilite = False 53 | self.codehilite_conf = {} 54 | 55 | def run(self, lines): 56 | """ Match and store Fenced Code Blocks in the HtmlStash. """ 57 | 58 | # Check for code hilite extension 59 | if not self.checked_for_codehilite: 60 | for ext in self.markdown.registeredExtensions: 61 | if isinstance(ext, CodeHiliteExtension): 62 | self.codehilite_conf = ext.config 63 | break 64 | 65 | self.checked_for_codehilite = True 66 | 67 | text = "\n".join(lines) 68 | while 1: 69 | m = self.FENCED_BLOCK_RE.search(text) 70 | if m: 71 | lang = '' 72 | if m.group('lang'): 73 | lang = self.LANG_TAG % m.group('lang') 74 | 75 | # If config is not empty, then the codehighlite extension 76 | # is enabled, so we call it to highlight the code 77 | if self.codehilite_conf: 78 | highliter = CodeHilite(m.group('code'), 79 | linenums=self.codehilite_conf['linenums'][0], 80 | guess_lang=self.codehilite_conf['guess_lang'][0], 81 | css_class=self.codehilite_conf['css_class'][0], 82 | style=self.codehilite_conf['pygments_style'][0], 83 | lang=(m.group('lang') or None), 84 | noclasses=self.codehilite_conf['noclasses'][0], 85 | hl_lines=parse_hl_lines(m.group('hl_lines'))) 86 | 87 | code = highliter.hilite() 88 | else: 89 | code = self.CODE_WRAP % (lang, self._escape(m.group('code'))) 90 | 91 | placeholder = self.markdown.htmlStash.store(code, safe=True) 92 | text = '%s\n%s\n%s'% (text[:m.start()], placeholder, text[m.end():]) 93 | else: 94 | break 95 | return text.split("\n") 96 | 97 | def _escape(self, txt): 98 | """ basic html escaping """ 99 | txt = txt.replace('&', '&') 100 | txt = txt.replace('<', '<') 101 | txt = txt.replace('>', '>') 102 | txt = txt.replace('"', '"') 103 | return txt 104 | 105 | 106 | def makeExtension(*args, **kwargs): 107 | return FencedCodeExtension(*args, **kwargs) 108 | 109 | -------------------------------------------------------------------------------- /markdown/extensions/footnotes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Footnotes Extension for Python-Markdown 3 | ======================================= 4 | 5 | Adds footnote handling to Python-Markdown. 6 | 7 | See <https://pythonhosted.org/Markdown/extensions/footnotes.html> 8 | for documentation. 9 | 10 | Copyright The Python Markdown Project 11 | 12 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 13 | 14 | """ 15 | 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | from . import Extension 19 | from ..preprocessors import Preprocessor 20 | from ..inlinepatterns import Pattern 21 | from ..treeprocessors import Treeprocessor 22 | from ..postprocessors import Postprocessor 23 | from ..util import etree, text_type 24 | from ..odict import OrderedDict 25 | import re 26 | 27 | FN_BACKLINK_TEXT = "zz1337820767766393qq" 28 | NBSP_PLACEHOLDER = "qq3936677670287331zz" 29 | DEF_RE = re.compile(r'[ ]{0,3}\[\^([^\]]*)\]:\s*(.*)') 30 | TABBED_RE = re.compile(r'((\t)|( ))(.*)') 31 | 32 | class FootnoteExtension(Extension): 33 | """ Footnote Extension. """ 34 | 35 | def __init__ (self, *args, **kwargs): 36 | """ Setup configs. """ 37 | 38 | self.config = { 39 | 'PLACE_MARKER': 40 | ["///Footnotes Go Here///", 41 | "The text string that marks where the footnotes go"], 42 | 'UNIQUE_IDS': 43 | [False, 44 | "Avoid name collisions across " 45 | "multiple calls to reset()."], 46 | "BACKLINK_TEXT": 47 | ["↩", 48 | "The text string that links from the footnote to the reader's place."] 49 | } 50 | super(FootnoteExtension, self).__init__(*args, **kwargs) 51 | 52 | # In multiple invocations, emit links that don't get tangled. 53 | self.unique_prefix = 0 54 | 55 | self.reset() 56 | 57 | def extendMarkdown(self, md, md_globals): 58 | """ Add pieces to Markdown. """ 59 | md.registerExtension(self) 60 | self.parser = md.parser 61 | self.md = md 62 | # Insert a preprocessor before ReferencePreprocessor 63 | md.preprocessors.add("footnote", FootnotePreprocessor(self), 64 | "<reference") 65 | # Insert an inline pattern before ImageReferencePattern 66 | FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah 67 | md.inlinePatterns.add("footnote", FootnotePattern(FOOTNOTE_RE, self), 68 | "<reference") 69 | # Insert a tree-processor that would actually add the footnote div 70 | # This must be before all other treeprocessors (i.e., inline and 71 | # codehilite) so they can run on the the contents of the div. 72 | md.treeprocessors.add("footnote", FootnoteTreeprocessor(self), 73 | "_begin") 74 | # Insert a postprocessor after amp_substitute oricessor 75 | md.postprocessors.add("footnote", FootnotePostprocessor(self), 76 | ">amp_substitute") 77 | 78 | def reset(self): 79 | """ Clear the footnotes on reset, and prepare for a distinct document. """ 80 | self.footnotes = OrderedDict() 81 | self.unique_prefix += 1 82 | 83 | def findFootnotesPlaceholder(self, root): 84 | """ Return ElementTree Element that contains Footnote placeholder. """ 85 | def finder(element): 86 | for child in element: 87 | if child.text: 88 | if child.text.find(self.getConfig("PLACE_MARKER")) > -1: 89 | return child, element, True 90 | if child.tail: 91 | if child.tail.find(self.getConfig("PLACE_MARKER")) > -1: 92 | return child, element, False 93 | finder(child) 94 | return None 95 | 96 | res = finder(root) 97 | return res 98 | 99 | def setFootnote(self, id, text): 100 | """ Store a footnote for later retrieval. """ 101 | self.footnotes[id] = text 102 | 103 | def get_separator(self): 104 | if self.md.output_format in ['html5', 'xhtml5']: 105 | return '-' 106 | return ':' 107 | 108 | def makeFootnoteId(self, id): 109 | """ Return footnote link id. """ 110 | if self.getConfig("UNIQUE_IDS"): 111 | return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id) 112 | else: 113 | return 'fn%s%s' % (self.get_separator(), id) 114 | 115 | def makeFootnoteRefId(self, id): 116 | """ Return footnote back-link id. """ 117 | if self.getConfig("UNIQUE_IDS"): 118 | return 'fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id) 119 | else: 120 | return 'fnref%s%s' % (self.get_separator(), id) 121 | 122 | def makeFootnotesDiv(self, root): 123 | """ Return div of footnotes as et Element. """ 124 | 125 | if not list(self.footnotes.keys()): 126 | return None 127 | 128 | div = etree.Element("div") 129 | div.set('class', 'footnote') 130 | etree.SubElement(div, "hr") 131 | ol = etree.SubElement(div, "ol") 132 | 133 | for id in self.footnotes.keys(): 134 | li = etree.SubElement(ol, "li") 135 | li.set("id", self.makeFootnoteId(id)) 136 | self.parser.parseChunk(li, self.footnotes[id]) 137 | backlink = etree.Element("a") 138 | backlink.set("href", "#" + self.makeFootnoteRefId(id)) 139 | if self.md.output_format not in ['html5', 'xhtml5']: 140 | backlink.set("rev", "footnote") # Invalid in HTML5 141 | backlink.set("class", "footnote-backref") 142 | backlink.set("title", "Jump back to footnote %d in the text" % \ 143 | (self.footnotes.index(id)+1)) 144 | backlink.text = FN_BACKLINK_TEXT 145 | 146 | if li.getchildren(): 147 | node = li[-1] 148 | if node.tag == "p": 149 | node.text = node.text + NBSP_PLACEHOLDER 150 | node.append(backlink) 151 | else: 152 | p = etree.SubElement(li, "p") 153 | p.append(backlink) 154 | return div 155 | 156 | 157 | class FootnotePreprocessor(Preprocessor): 158 | """ Find all footnote references and store for later use. """ 159 | 160 | def __init__ (self, footnotes): 161 | self.footnotes = footnotes 162 | 163 | def run(self, lines): 164 | """ 165 | Loop through lines and find, set, and remove footnote definitions. 166 | 167 | Keywords: 168 | 169 | * lines: A list of lines of text 170 | 171 | Return: A list of lines of text with footnote definitions removed. 172 | 173 | """ 174 | newlines = [] 175 | i = 0 176 | while True: 177 | m = DEF_RE.match(lines[i]) 178 | if m: 179 | fn, _i = self.detectTabbed(lines[i+1:]) 180 | fn.insert(0, m.group(2)) 181 | i += _i-1 # skip past footnote 182 | self.footnotes.setFootnote(m.group(1), "\n".join(fn)) 183 | else: 184 | newlines.append(lines[i]) 185 | if len(lines) > i+1: 186 | i += 1 187 | else: 188 | break 189 | return newlines 190 | 191 | def detectTabbed(self, lines): 192 | """ Find indented text and remove indent before further proccesing. 193 | 194 | Keyword arguments: 195 | 196 | * lines: an array of strings 197 | 198 | Returns: a list of post processed items and the index of last line. 199 | 200 | """ 201 | items = [] 202 | blank_line = False # have we encountered a blank line yet? 203 | i = 0 # to keep track of where we are 204 | 205 | def detab(line): 206 | match = TABBED_RE.match(line) 207 | if match: 208 | return match.group(4) 209 | 210 | for line in lines: 211 | if line.strip(): # Non-blank line 212 | detabbed_line = detab(line) 213 | if detabbed_line: 214 | items.append(detabbed_line) 215 | i += 1 216 | continue 217 | elif not blank_line and not DEF_RE.match(line): 218 | # not tabbed but still part of first par. 219 | items.append(line) 220 | i += 1 221 | continue 222 | else: 223 | return items, i+1 224 | 225 | else: # Blank line: _maybe_ we are done. 226 | blank_line = True 227 | i += 1 # advance 228 | 229 | # Find the next non-blank line 230 | for j in range(i, len(lines)): 231 | if lines[j].strip(): 232 | next_line = lines[j]; break 233 | else: 234 | break # There is no more text; we are done. 235 | 236 | # Check if the next non-blank line is tabbed 237 | if detab(next_line): # Yes, more work to do. 238 | items.append("") 239 | continue 240 | else: 241 | break # No, we are done. 242 | else: 243 | i += 1 244 | 245 | return items, i 246 | 247 | 248 | class FootnotePattern(Pattern): 249 | """ InlinePattern for footnote markers in a document's body text. """ 250 | 251 | def __init__(self, pattern, footnotes): 252 | super(FootnotePattern, self).__init__(pattern) 253 | self.footnotes = footnotes 254 | 255 | def handleMatch(self, m): 256 | id = m.group(2) 257 | if id in self.footnotes.footnotes.keys(): 258 | sup = etree.Element("sup") 259 | a = etree.SubElement(sup, "a") 260 | sup.set('id', self.footnotes.makeFootnoteRefId(id)) 261 | a.set('href', '#' + self.footnotes.makeFootnoteId(id)) 262 | if self.footnotes.md.output_format not in ['html5', 'xhtml5']: 263 | a.set('rel', 'footnote') # invalid in HTML5 264 | a.set('class', 'footnote-ref') 265 | a.text = text_type(self.footnotes.footnotes.index(id) + 1) 266 | return sup 267 | else: 268 | return None 269 | 270 | 271 | class FootnoteTreeprocessor(Treeprocessor): 272 | """ Build and append footnote div to end of document. """ 273 | 274 | def __init__ (self, footnotes): 275 | self.footnotes = footnotes 276 | 277 | def run(self, root): 278 | footnotesDiv = self.footnotes.makeFootnotesDiv(root) 279 | if footnotesDiv: 280 | result = self.footnotes.findFootnotesPlaceholder(root) 281 | if result: 282 | child, parent, isText = result 283 | ind = parent.getchildren().index(child) 284 | if isText: 285 | parent.remove(child) 286 | parent.insert(ind, footnotesDiv) 287 | else: 288 | parent.insert(ind + 1, footnotesDiv) 289 | child.tail = None 290 | else: 291 | root.append(footnotesDiv) 292 | 293 | class FootnotePostprocessor(Postprocessor): 294 | """ Replace placeholders with html entities. """ 295 | def __init__(self, footnotes): 296 | self.footnotes = footnotes 297 | 298 | def run(self, text): 299 | text = text.replace(FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT")) 300 | return text.replace(NBSP_PLACEHOLDER, " ") 301 | 302 | def makeExtension(*args, **kwargs): 303 | """ Return an instance of the FootnoteExtension """ 304 | return FootnoteExtension(*args, **kwargs) 305 | 306 | -------------------------------------------------------------------------------- /markdown/extensions/headerid.py: -------------------------------------------------------------------------------- 1 | """ 2 | HeaderID Extension for Python-Markdown 3 | ====================================== 4 | 5 | Auto-generate id attributes for HTML headers. 6 | 7 | See <https://pythonhosted.org/Markdown/extensions/header_id.html> 8 | for documentation. 9 | 10 | Original code Copyright 2007-2011 [Waylan Limberg](http://achinghead.com/). 11 | 12 | All changes Copyright 2011-2014 The Python Markdown Project 13 | 14 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 15 | 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import unicode_literals 20 | from . import Extension 21 | from ..treeprocessors import Treeprocessor 22 | from ..util import HTML_PLACEHOLDER_RE, parseBoolValue 23 | import re 24 | import logging 25 | import unicodedata 26 | 27 | logger = logging.getLogger('MARKDOWN') 28 | 29 | IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$') 30 | 31 | 32 | def slugify(value, separator): 33 | """ Slugify a string, to make it URL friendly. """ 34 | value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') 35 | value = re.sub('[^\w\s-]', '', value.decode('ascii')).strip().lower() 36 | return re.sub('[%s\s]+' % separator, separator, value) 37 | 38 | 39 | def unique(id, ids): 40 | """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """ 41 | while id in ids or not id: 42 | m = IDCOUNT_RE.match(id) 43 | if m: 44 | id = '%s_%d'% (m.group(1), int(m.group(2))+1) 45 | else: 46 | id = '%s_%d'% (id, 1) 47 | ids.add(id) 48 | return id 49 | 50 | 51 | def itertext(elem): 52 | """ Loop through all children and return text only. 53 | 54 | Reimplements method of same name added to ElementTree in Python 2.7 55 | 56 | """ 57 | if elem.text: 58 | yield elem.text 59 | for e in elem: 60 | for s in itertext(e): 61 | yield s 62 | if e.tail: 63 | yield e.tail 64 | 65 | 66 | def stashedHTML2text(text, md): 67 | """ Extract raw HTML, reduce to plain text and swap with placeholder. """ 68 | def _html_sub(m): 69 | """ Substitute raw html with plain text. """ 70 | try: 71 | raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))] 72 | except (IndexError, TypeError): 73 | return m.group(0) 74 | if md.safeMode and not safe: 75 | return '' 76 | # Strip out tags and entities - leaveing text 77 | return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) 78 | 79 | return HTML_PLACEHOLDER_RE.sub(_html_sub, text) 80 | 81 | 82 | class HeaderIdTreeprocessor(Treeprocessor): 83 | """ Assign IDs to headers. """ 84 | 85 | IDs = set() 86 | 87 | def run(self, doc): 88 | start_level, force_id = self._get_meta() 89 | slugify = self.config['slugify'] 90 | sep = self.config['separator'] 91 | for elem in doc.getiterator(): 92 | if elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: 93 | if force_id: 94 | if "id" in elem.attrib: 95 | id = elem.get('id') 96 | else: 97 | id = stashedHTML2text(''.join(itertext(elem)), self.md) 98 | id = slugify(id, sep) 99 | elem.set('id', unique(id, self.IDs)) 100 | if start_level: 101 | level = int(elem.tag[-1]) + start_level 102 | if level > 6: 103 | level = 6 104 | elem.tag = 'h%d' % level 105 | 106 | 107 | def _get_meta(self): 108 | """ Return meta data suported by this ext as a tuple """ 109 | level = int(self.config['level']) - 1 110 | force = parseBoolValue(self.config['forceid']) 111 | if hasattr(self.md, 'Meta'): 112 | if 'header_level' in self.md.Meta: 113 | level = int(self.md.Meta['header_level'][0]) - 1 114 | if 'header_forceid' in self.md.Meta: 115 | force = parseBoolValue(self.md.Meta['header_forceid'][0]) 116 | return level, force 117 | 118 | 119 | class HeaderIdExtension(Extension): 120 | def __init__(self, *args, **kwargs): 121 | # set defaults 122 | self.config = { 123 | 'level' : ['1', 'Base level for headers.'], 124 | 'forceid' : ['True', 'Force all headers to have an id.'], 125 | 'separator' : ['-', 'Word separator.'], 126 | 'slugify' : [slugify, 'Callable to generate anchors'], 127 | } 128 | 129 | super(HeaderIdExtension, self).__init__(*args, **kwargs) 130 | 131 | def extendMarkdown(self, md, md_globals): 132 | md.registerExtension(self) 133 | self.processor = HeaderIdTreeprocessor() 134 | self.processor.md = md 135 | self.processor.config = self.getConfigs() 136 | if 'attr_list' in md.treeprocessors.keys(): 137 | # insert after attr_list treeprocessor 138 | md.treeprocessors.add('headerid', self.processor, '>attr_list') 139 | else: 140 | # insert after 'prettify' treeprocessor. 141 | md.treeprocessors.add('headerid', self.processor, '>prettify') 142 | 143 | def reset(self): 144 | self.processor.IDs = set() 145 | 146 | 147 | def makeExtension(*args, **kwargs): 148 | return HeaderIdExtension(*args, **kwargs) 149 | 150 | -------------------------------------------------------------------------------- /markdown/extensions/meta.py: -------------------------------------------------------------------------------- 1 | """ 2 | Meta Data Extension for Python-Markdown 3 | ======================================= 4 | 5 | This extension adds Meta Data handling to markdown. 6 | 7 | See <https://pythonhosted.org/Markdown/extensions/meta_data.html> 8 | for documentation. 9 | 10 | Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com). 11 | 12 | All changes Copyright 2008-2014 The Python Markdown Project 13 | 14 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 15 | 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import unicode_literals 20 | from . import Extension 21 | from ..preprocessors import Preprocessor 22 | import re 23 | 24 | # Global Vars 25 | META_RE = re.compile(r'^[ ]{0,3}(?P<key>[A-Za-z0-9_-]+):\s*(?P<value>.*)') 26 | META_MORE_RE = re.compile(r'^[ ]{4,}(?P<value>.*)') 27 | 28 | class MetaExtension (Extension): 29 | """ Meta-Data extension for Python-Markdown. """ 30 | 31 | def extendMarkdown(self, md, md_globals): 32 | """ Add MetaPreprocessor to Markdown instance. """ 33 | 34 | md.preprocessors.add("meta", MetaPreprocessor(md), ">normalize_whitespace") 35 | 36 | 37 | class MetaPreprocessor(Preprocessor): 38 | """ Get Meta-Data. """ 39 | 40 | def run(self, lines): 41 | """ Parse Meta-Data and store in Markdown.Meta. """ 42 | meta = {} 43 | key = None 44 | while lines: 45 | line = lines.pop(0) 46 | if line.strip() == '': 47 | break # blank line - done 48 | m1 = META_RE.match(line) 49 | if m1: 50 | key = m1.group('key').lower().strip() 51 | value = m1.group('value').strip() 52 | try: 53 | meta[key].append(value) 54 | except KeyError: 55 | meta[key] = [value] 56 | else: 57 | m2 = META_MORE_RE.match(line) 58 | if m2 and key: 59 | # Add another line to existing key 60 | meta[key].append(m2.group('value').strip()) 61 | else: 62 | lines.insert(0, line) 63 | break # no meta data - done 64 | self.markdown.Meta = meta 65 | return lines 66 | 67 | 68 | def makeExtension(*args, **kwargs): 69 | return MetaExtension(*args, **kwargs) 70 | 71 | -------------------------------------------------------------------------------- /markdown/extensions/nl2br.py: -------------------------------------------------------------------------------- 1 | """ 2 | NL2BR Extension 3 | =============== 4 | 5 | A Python-Markdown extension to treat newlines as hard breaks; like 6 | GitHub-flavored Markdown does. 7 | 8 | See <https://pythonhosted.org/Markdown/extensions/nl2br.html> 9 | for documentation. 10 | 11 | Oringinal code Copyright 2011 [Brian Neal](http://deathofagremmie.com/) 12 | 13 | All changes Copyright 2011-2014 The Python Markdown Project 14 | 15 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 16 | 17 | """ 18 | 19 | from __future__ import absolute_import 20 | from __future__ import unicode_literals 21 | from . import Extension 22 | from ..inlinepatterns import SubstituteTagPattern 23 | 24 | BR_RE = r'\n' 25 | 26 | class Nl2BrExtension(Extension): 27 | 28 | def extendMarkdown(self, md, md_globals): 29 | br_tag = SubstituteTagPattern(BR_RE, 'br') 30 | md.inlinePatterns.add('nl', br_tag, '_end') 31 | 32 | 33 | def makeExtension(*args, **kwargs): 34 | return Nl2BrExtension(*args, **kwargs) 35 | 36 | -------------------------------------------------------------------------------- /markdown/extensions/sane_lists.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sane List Extension for Python-Markdown 3 | ======================================= 4 | 5 | Modify the behavior of Lists in Python-Markdown to act in a sane manor. 6 | 7 | See <https://pythonhosted.org/Markdown/extensions/sane_lists.html> 8 | for documentation. 9 | 10 | Original code Copyright 2011 [Waylan Limberg](http://achinghead.com) 11 | 12 | All changes Copyright 2011-2014 The Python Markdown Project 13 | 14 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 15 | 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import unicode_literals 20 | from . import Extension 21 | from ..blockprocessors import OListProcessor, UListProcessor 22 | import re 23 | 24 | 25 | class SaneOListProcessor(OListProcessor): 26 | 27 | CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.))[ ]+(.*)') 28 | SIBLING_TAGS = ['ol'] 29 | 30 | 31 | class SaneUListProcessor(UListProcessor): 32 | 33 | CHILD_RE = re.compile(r'^[ ]{0,3}(([*+-]))[ ]+(.*)') 34 | SIBLING_TAGS = ['ul'] 35 | 36 | 37 | class SaneListExtension(Extension): 38 | """ Add sane lists to Markdown. """ 39 | 40 | def extendMarkdown(self, md, md_globals): 41 | """ Override existing Processors. """ 42 | md.parser.blockprocessors['olist'] = SaneOListProcessor(md.parser) 43 | md.parser.blockprocessors['ulist'] = SaneUListProcessor(md.parser) 44 | 45 | 46 | def makeExtension(*args, **kwargs): 47 | return SaneListExtension(*args, **kwargs) 48 | 49 | -------------------------------------------------------------------------------- /markdown/extensions/smart_strong.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Smart_Strong Extension for Python-Markdown 3 | ========================================== 4 | 5 | This extention adds smarter handling of double underscores within words. 6 | 7 | See <https://pythonhosted.org/Markdown/extensions/smart_strong.html> 8 | for documentation. 9 | 10 | Original code Copyright 2011 [Waylan Limberg](http://achinghead.com) 11 | 12 | All changes Copyright 2011-2014 The Python Markdown Project 13 | 14 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 15 | 16 | ''' 17 | 18 | from __future__ import absolute_import 19 | from __future__ import unicode_literals 20 | from . import Extension 21 | from ..inlinepatterns import SimpleTagPattern 22 | 23 | SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\2(?!\w)' 24 | STRONG_RE = r'(\*{2})(.+?)\2' 25 | 26 | class SmartEmphasisExtension(Extension): 27 | """ Add smart_emphasis extension to Markdown class.""" 28 | 29 | def extendMarkdown(self, md, md_globals): 30 | """ Modify inline patterns. """ 31 | md.inlinePatterns['strong'] = SimpleTagPattern(STRONG_RE, 'strong') 32 | md.inlinePatterns.add('strong2', SimpleTagPattern(SMART_STRONG_RE, 'strong'), '>emphasis2') 33 | 34 | def makeExtension(*args, **kwargs): 35 | return SmartEmphasisExtension(*args, **kwargs) 36 | -------------------------------------------------------------------------------- /markdown/extensions/smarty.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ''' 3 | Smarty extension for Python-Markdown 4 | ==================================== 5 | 6 | Adds conversion of ASCII dashes, quotes and ellipses to their HTML 7 | entity equivalents. 8 | 9 | See <https://pythonhosted.org/Markdown/extensions/smarty.html> 10 | for documentation. 11 | 12 | Author: 2013, Dmitry Shachnev <mitya57@gmail.com> 13 | 14 | All changes Copyright 2013-2014 The Python Markdown Project 15 | 16 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 17 | 18 | SmartyPants license: 19 | 20 | Copyright (c) 2003 John Gruber <http://daringfireball.net/> 21 | All rights reserved. 22 | 23 | Redistribution and use in source and binary forms, with or without 24 | modification, are permitted provided that the following conditions are 25 | met: 26 | 27 | * Redistributions of source code must retain the above copyright 28 | notice, this list of conditions and the following disclaimer. 29 | 30 | * Redistributions in binary form must reproduce the above copyright 31 | notice, this list of conditions and the following disclaimer in 32 | the documentation and/or other materials provided with the 33 | distribution. 34 | 35 | * Neither the name "SmartyPants" nor the names of its contributors 36 | may be used to endorse or promote products derived from this 37 | software without specific prior written permission. 38 | 39 | This software is provided by the copyright holders and contributors "as 40 | is" and any express or implied warranties, including, but not limited 41 | to, the implied warranties of merchantability and fitness for a 42 | particular purpose are disclaimed. In no event shall the copyright 43 | owner or contributors be liable for any direct, indirect, incidental, 44 | special, exemplary, or consequential damages (including, but not 45 | limited to, procurement of substitute goods or services; loss of use, 46 | data, or profits; or business interruption) however caused and on any 47 | theory of liability, whether in contract, strict liability, or tort 48 | (including negligence or otherwise) arising in any way out of the use 49 | of this software, even if advised of the possibility of such damage. 50 | 51 | 52 | smartypants.py license: 53 | 54 | smartypants.py is a derivative work of SmartyPants. 55 | Copyright (c) 2004, 2007 Chad Miller <http://web.chad.org/> 56 | 57 | Redistribution and use in source and binary forms, with or without 58 | modification, are permitted provided that the following conditions are 59 | met: 60 | 61 | * Redistributions of source code must retain the above copyright 62 | notice, this list of conditions and the following disclaimer. 63 | 64 | * Redistributions in binary form must reproduce the above copyright 65 | notice, this list of conditions and the following disclaimer in 66 | the documentation and/or other materials provided with the 67 | distribution. 68 | 69 | This software is provided by the copyright holders and contributors "as 70 | is" and any express or implied warranties, including, but not limited 71 | to, the implied warranties of merchantability and fitness for a 72 | particular purpose are disclaimed. In no event shall the copyright 73 | owner or contributors be liable for any direct, indirect, incidental, 74 | special, exemplary, or consequential damages (including, but not 75 | limited to, procurement of substitute goods or services; loss of use, 76 | data, or profits; or business interruption) however caused and on any 77 | theory of liability, whether in contract, strict liability, or tort 78 | (including negligence or otherwise) arising in any way out of the use 79 | of this software, even if advised of the possibility of such damage. 80 | 81 | ''' 82 | 83 | 84 | from __future__ import unicode_literals 85 | from . import Extension 86 | from ..inlinepatterns import HtmlPattern 87 | from ..odict import OrderedDict 88 | from ..treeprocessors import InlineProcessor 89 | from ..util import parseBoolValue 90 | 91 | # Constants for quote education. 92 | punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" 93 | endOfWordClass = r"[\s.,;:!?)]" 94 | closeClass = "[^\ \t\r\n\[\{\(\-\u0002\u0003]" 95 | 96 | openingQuotesBase = ( 97 | '(\s' # a whitespace char 98 | '| ' # or a non-breaking space entity 99 | '|--' # or dashes 100 | '|–|—' # or unicode 101 | '|&[mn]dash;' # or named dash entities 102 | '|–|—' # or decimal entities 103 | ')' 104 | ) 105 | 106 | substitutions = { 107 | 'mdash': '—', 108 | 'ndash': '–', 109 | 'ellipsis': '…', 110 | 'left-angle-quote': '«', 111 | 'right-angle-quote': '»', 112 | 'left-single-quote': '‘', 113 | 'right-single-quote': '’', 114 | 'left-double-quote': '“', 115 | 'right-double-quote': '”', 116 | } 117 | 118 | 119 | # Special case if the very first character is a quote 120 | # followed by punctuation at a non-word-break. Close the quotes by brute force: 121 | singleQuoteStartRe = r"^'(?=%s\B)" % punctClass 122 | doubleQuoteStartRe = r'^"(?=%s\B)' % punctClass 123 | 124 | # Special case for double sets of quotes, e.g.: 125 | # <p>He said, "'Quoted' words in a larger quote."</p> 126 | doubleQuoteSetsRe = r""""'(?=\w)""" 127 | singleQuoteSetsRe = r"""'"(?=\w)""" 128 | 129 | # Get most opening double quotes: 130 | openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase 131 | 132 | # Double closing quotes: 133 | closingDoubleQuotesRegex = r'"(?=\s)' 134 | closingDoubleQuotesRegex2 = '(?<=%s)"' % closeClass 135 | 136 | # Get most opening single quotes: 137 | openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase 138 | 139 | # Single closing quotes: 140 | closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass 141 | closingSingleQuotesRegex2 = r"(?<=%s)'(\s|s\b)" % closeClass 142 | 143 | # All remaining quotes should be opening ones 144 | remainingSingleQuotesRegex = "'" 145 | remainingDoubleQuotesRegex = '"' 146 | 147 | class SubstituteTextPattern(HtmlPattern): 148 | def __init__(self, pattern, replace, markdown_instance): 149 | """ Replaces matches with some text. """ 150 | HtmlPattern.__init__(self, pattern) 151 | self.replace = replace 152 | self.markdown = markdown_instance 153 | 154 | def handleMatch(self, m): 155 | result = '' 156 | for part in self.replace: 157 | if isinstance(part, int): 158 | result += m.group(part) 159 | else: 160 | result += self.markdown.htmlStash.store(part, safe=True) 161 | return result 162 | 163 | class SmartyExtension(Extension): 164 | def __init__(self, *args, **kwargs): 165 | self.config = { 166 | 'smart_quotes': [True, 'Educate quotes'], 167 | 'smart_angled_quotes': [False, 'Educate angled quotes'], 168 | 'smart_dashes': [True, 'Educate dashes'], 169 | 'smart_ellipses': [True, 'Educate ellipses'], 170 | 'substitutions' : [{}, 'Overwrite default substitutions'], 171 | } 172 | super(SmartyExtension, self).__init__(*args, **kwargs) 173 | self.substitutions = dict(substitutions) 174 | self.substitutions.update(self.getConfig('substitutions', default={})) 175 | 176 | def _addPatterns(self, md, patterns, serie): 177 | for ind, pattern in enumerate(patterns): 178 | pattern += (md,) 179 | pattern = SubstituteTextPattern(*pattern) 180 | after = ('>smarty-%s-%d' % (serie, ind - 1) if ind else '_begin') 181 | name = 'smarty-%s-%d' % (serie, ind) 182 | self.inlinePatterns.add(name, pattern, after) 183 | 184 | def educateDashes(self, md): 185 | emDashesPattern = SubstituteTextPattern(r'(?<!-)---(?!-)', 186 | (self.substitutions['mdash'],), md) 187 | enDashesPattern = SubstituteTextPattern(r'(?<!-)--(?!-)', 188 | (self.substitutions['ndash'],), md) 189 | self.inlinePatterns.add('smarty-em-dashes', emDashesPattern, '_begin') 190 | self.inlinePatterns.add('smarty-en-dashes', enDashesPattern, 191 | '>smarty-em-dashes') 192 | 193 | def educateEllipses(self, md): 194 | ellipsesPattern = SubstituteTextPattern(r'(?<!\.)\.{3}(?!\.)', 195 | (self.substitutions['ellipsis'],), md) 196 | self.inlinePatterns.add('smarty-ellipses', ellipsesPattern, '_begin') 197 | 198 | def educateAngledQuotes(self, md): 199 | leftAngledQuotePattern = SubstituteTextPattern(r'\<\<', 200 | (self.substitutions['left-angle-quote'],), md) 201 | rightAngledQuotePattern = SubstituteTextPattern(r'\>\>', 202 | (self.substitutions['right-angle-quote'],), md) 203 | self.inlinePatterns.add('smarty-left-angle-quotes', 204 | leftAngledQuotePattern, '_begin') 205 | self.inlinePatterns.add('smarty-right-angle-quotes', 206 | rightAngledQuotePattern, '>smarty-left-angle-quotes') 207 | 208 | def educateQuotes(self, md): 209 | configs = self.getConfigs() 210 | lsquo = self.substitutions['left-single-quote'] 211 | rsquo = self.substitutions['right-single-quote'] 212 | ldquo = self.substitutions['left-double-quote'] 213 | rdquo = self.substitutions['right-double-quote'] 214 | patterns = ( 215 | (singleQuoteStartRe, (rsquo,)), 216 | (doubleQuoteStartRe, (rdquo,)), 217 | (doubleQuoteSetsRe, (ldquo + lsquo,)), 218 | (singleQuoteSetsRe, (lsquo + ldquo,)), 219 | (openingSingleQuotesRegex, (2, lsquo)), 220 | (closingSingleQuotesRegex, (rsquo,)), 221 | (closingSingleQuotesRegex2, (rsquo, 2)), 222 | (remainingSingleQuotesRegex, (lsquo,)), 223 | (openingDoubleQuotesRegex, (2, ldquo)), 224 | (closingDoubleQuotesRegex, (rdquo,)), 225 | (closingDoubleQuotesRegex2, (rdquo,)), 226 | (remainingDoubleQuotesRegex, (ldquo,)) 227 | ) 228 | self._addPatterns(md, patterns, 'quotes') 229 | 230 | def extendMarkdown(self, md, md_globals): 231 | configs = self.getConfigs() 232 | self.inlinePatterns = OrderedDict() 233 | if configs['smart_ellipses']: 234 | self.educateEllipses(md) 235 | if configs['smart_quotes']: 236 | self.educateQuotes(md) 237 | if configs['smart_angled_quotes']: 238 | self.educateAngledQuotes(md) 239 | if configs['smart_dashes']: 240 | self.educateDashes(md) 241 | inlineProcessor = InlineProcessor(md) 242 | inlineProcessor.inlinePatterns = self.inlinePatterns 243 | md.treeprocessors.add('smarty', inlineProcessor, '_end') 244 | md.ESCAPED_CHARS.extend(['"', "'"]) 245 | 246 | def makeExtension(*args, **kwargs): 247 | return SmartyExtension(*args, **kwargs) 248 | -------------------------------------------------------------------------------- /markdown/extensions/tables.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tables Extension for Python-Markdown 3 | ==================================== 4 | 5 | Added parsing of tables to Python-Markdown. 6 | 7 | See <https://pythonhosted.org/Markdown/extensions/tables.html> 8 | for documentation. 9 | 10 | Original code Copyright 2009 [Waylan Limberg](http://achinghead.com) 11 | 12 | All changes Copyright 2008-2014 The Python Markdown Project 13 | 14 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 15 | 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import unicode_literals 20 | from . import Extension 21 | from ..blockprocessors import BlockProcessor 22 | from ..util import etree 23 | 24 | class TableProcessor(BlockProcessor): 25 | """ Process Tables. """ 26 | 27 | def test(self, parent, block): 28 | rows = block.split('\n') 29 | return (len(rows) > 2 and '|' in rows[0] and 30 | '|' in rows[1] and '-' in rows[1] and 31 | rows[1].strip()[0] in ['|', ':', '-']) 32 | 33 | def run(self, parent, blocks): 34 | """ Parse a table block and build table. """ 35 | block = blocks.pop(0).split('\n') 36 | header = block[0].strip() 37 | seperator = block[1].strip() 38 | rows = block[2:] 39 | # Get format type (bordered by pipes or not) 40 | border = False 41 | if header.startswith('|'): 42 | border = True 43 | # Get alignment of columns 44 | align = [] 45 | for c in self._split_row(seperator, border): 46 | if c.startswith(':') and c.endswith(':'): 47 | align.append('center') 48 | elif c.startswith(':'): 49 | align.append('left') 50 | elif c.endswith(':'): 51 | align.append('right') 52 | else: 53 | align.append(None) 54 | # Build table 55 | table = etree.SubElement(parent, 'table') 56 | thead = etree.SubElement(table, 'thead') 57 | self._build_row(header, thead, align, border) 58 | tbody = etree.SubElement(table, 'tbody') 59 | for row in rows: 60 | self._build_row(row.strip(), tbody, align, border) 61 | 62 | def _build_row(self, row, parent, align, border): 63 | """ Given a row of text, build table cells. """ 64 | tr = etree.SubElement(parent, 'tr') 65 | tag = 'td' 66 | if parent.tag == 'thead': 67 | tag = 'th' 68 | cells = self._split_row(row, border) 69 | # We use align here rather than cells to ensure every row 70 | # contains the same number of columns. 71 | for i, a in enumerate(align): 72 | c = etree.SubElement(tr, tag) 73 | try: 74 | c.text = cells[i].strip() 75 | except IndexError: #pragma: no cover 76 | c.text = "" 77 | if a: 78 | c.set('align', a) 79 | 80 | def _split_row(self, row, border): 81 | """ split a row of text into list of cells. """ 82 | if border: 83 | if row.startswith('|'): 84 | row = row[1:] 85 | if row.endswith('|'): 86 | row = row[:-1] 87 | return row.split('|') 88 | 89 | 90 | class TableExtension(Extension): 91 | """ Add tables to Markdown. """ 92 | 93 | def extendMarkdown(self, md, md_globals): 94 | """ Add an instance of TableProcessor to BlockParser. """ 95 | md.parser.blockprocessors.add('table', 96 | TableProcessor(md.parser), 97 | '<hashheader') 98 | 99 | 100 | def makeExtension(*args, **kwargs): 101 | return TableExtension(*args, **kwargs) 102 | 103 | -------------------------------------------------------------------------------- /markdown/extensions/toc.py: -------------------------------------------------------------------------------- 1 | """ 2 | Table of Contents Extension for Python-Markdown 3 | =============================================== 4 | 5 | See <https://pythonhosted.org/Markdown/extensions/toc.html> 6 | for documentation. 7 | 8 | Oringinal code Copyright 2008 [Jack Miller](http://codezen.org) 9 | 10 | All changes Copyright 2008-2014 The Python Markdown Project 11 | 12 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 13 | 14 | """ 15 | 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | from . import Extension 19 | from ..treeprocessors import Treeprocessor 20 | from ..util import etree, parseBoolValue, AMP_SUBSTITUTE 21 | from .headerid import slugify, unique, itertext, stashedHTML2text 22 | import re 23 | 24 | 25 | def order_toc_list(toc_list): 26 | """Given an unsorted list with errors and skips, return a nested one. 27 | [{'level': 1}, {'level': 2}] 28 | => 29 | [{'level': 1, 'children': [{'level': 2, 'children': []}]}] 30 | 31 | A wrong list is also converted: 32 | [{'level': 2}, {'level': 1}] 33 | => 34 | [{'level': 2, 'children': []}, {'level': 1, 'children': []}] 35 | """ 36 | 37 | def build_correct(remaining_list, prev_elements=[{'level': 1000}]): 38 | 39 | if not remaining_list: 40 | return [], [] 41 | 42 | current = remaining_list.pop(0) 43 | if not 'children' in current.keys(): 44 | current['children'] = [] 45 | 46 | if not prev_elements: 47 | # This happens for instance with [8, 1, 1], ie. when some 48 | # header level is outside a scope. We treat it as a 49 | # top-level 50 | next_elements, children = build_correct(remaining_list, [current]) 51 | current['children'].append(children) 52 | return [current] + next_elements, [] 53 | 54 | prev_element = prev_elements.pop() 55 | children = [] 56 | next_elements = [] 57 | # Is current part of the child list or next list? 58 | if current['level'] > prev_element['level']: 59 | #print "%d is a child of %d" % (current['level'], prev_element['level']) 60 | prev_elements.append(prev_element) 61 | prev_elements.append(current) 62 | prev_element['children'].append(current) 63 | next_elements2, children2 = build_correct(remaining_list, prev_elements) 64 | children += children2 65 | next_elements += next_elements2 66 | else: 67 | #print "%d is ancestor of %d" % (current['level'], prev_element['level']) 68 | if not prev_elements: 69 | #print "No previous elements, so appending to the next set" 70 | next_elements.append(current) 71 | prev_elements = [current] 72 | next_elements2, children2 = build_correct(remaining_list, prev_elements) 73 | current['children'].extend(children2) 74 | else: 75 | #print "Previous elements, comparing to those first" 76 | remaining_list.insert(0, current) 77 | next_elements2, children2 = build_correct(remaining_list, prev_elements) 78 | children.extend(children2) 79 | next_elements += next_elements2 80 | 81 | return next_elements, children 82 | 83 | ordered_list, __ = build_correct(toc_list) 84 | return ordered_list 85 | 86 | 87 | class TocTreeprocessor(Treeprocessor): 88 | 89 | # Iterator wrapper to get parent and child all at once 90 | def iterparent(self, root): 91 | for parent in root.getiterator(): 92 | for child in parent: 93 | yield parent, child 94 | 95 | def add_anchor(self, c, elem_id): #@ReservedAssignment 96 | anchor = etree.Element("a") 97 | anchor.text = c.text 98 | anchor.attrib["href"] = "#" + elem_id 99 | anchor.attrib["class"] = "toclink" 100 | c.text = "" 101 | for elem in c.getchildren(): 102 | anchor.append(elem) 103 | c.remove(elem) 104 | c.append(anchor) 105 | 106 | def add_permalink(self, c, elem_id): 107 | permalink = etree.Element("a") 108 | permalink.text = ("%spara;" % AMP_SUBSTITUTE 109 | if self.use_permalinks is True else self.use_permalinks) 110 | permalink.attrib["href"] = "#" + elem_id 111 | permalink.attrib["class"] = "headerlink" 112 | permalink.attrib["title"] = "Permanent link" 113 | c.append(permalink) 114 | 115 | def build_toc_etree(self, div, toc_list): 116 | # Add title to the div 117 | if self.config["title"]: 118 | header = etree.SubElement(div, "span") 119 | header.attrib["class"] = "toctitle" 120 | header.text = self.config["title"] 121 | 122 | def build_etree_ul(toc_list, parent): 123 | ul = etree.SubElement(parent, "ul") 124 | for item in toc_list: 125 | # List item link, to be inserted into the toc div 126 | li = etree.SubElement(ul, "li") 127 | link = etree.SubElement(li, "a") 128 | link.text = item.get('name', '') 129 | link.attrib["href"] = '#' + item.get('id', '') 130 | if item['children']: 131 | build_etree_ul(item['children'], li) 132 | return ul 133 | 134 | return build_etree_ul(toc_list, div) 135 | 136 | def run(self, doc): 137 | 138 | div = etree.Element("div") 139 | div.attrib["class"] = "toc" 140 | header_rgx = re.compile("[Hh][123456]") 141 | 142 | self.use_anchors = parseBoolValue(self.config["anchorlink"]) 143 | self.use_permalinks = parseBoolValue(self.config["permalink"], False) 144 | if self.use_permalinks is None: 145 | self.use_permalinks = self.config["permalink"] 146 | 147 | # Get a list of id attributes 148 | used_ids = set() 149 | for c in doc.getiterator(): 150 | if "id" in c.attrib: 151 | used_ids.add(c.attrib["id"]) 152 | 153 | toc_list = [] 154 | marker_found = False 155 | for (p, c) in self.iterparent(doc): 156 | text = ''.join(itertext(c)).strip() 157 | if not text: 158 | continue 159 | 160 | # To keep the output from screwing up the 161 | # validation by putting a <div> inside of a <p> 162 | # we actually replace the <p> in its entirety. 163 | # We do not allow the marker inside a header as that 164 | # would causes an enless loop of placing a new TOC 165 | # inside previously generated TOC. 166 | if c.text and c.text.strip() == self.config["marker"] and \ 167 | not header_rgx.match(c.tag) and c.tag not in ['pre', 'code']: 168 | for i in range(len(p)): 169 | if p[i] == c: 170 | p[i] = div 171 | break 172 | marker_found = True 173 | 174 | if header_rgx.match(c.tag): 175 | 176 | # Do not override pre-existing ids 177 | if not "id" in c.attrib: 178 | elem_id = stashedHTML2text(text, self.markdown) 179 | elem_id = unique(self.config["slugify"](elem_id, '-'), used_ids) 180 | c.attrib["id"] = elem_id 181 | else: 182 | elem_id = c.attrib["id"] 183 | 184 | tag_level = int(c.tag[-1]) 185 | 186 | toc_list.append({'level': tag_level, 187 | 'id': elem_id, 188 | 'name': text}) 189 | 190 | if self.use_anchors: 191 | self.add_anchor(c, elem_id) 192 | if self.use_permalinks: 193 | self.add_permalink(c, elem_id) 194 | 195 | toc_list_nested = order_toc_list(toc_list) 196 | self.build_toc_etree(div, toc_list_nested) 197 | prettify = self.markdown.treeprocessors.get('prettify') 198 | if prettify: prettify.run(div) 199 | if not marker_found: 200 | # serialize and attach to markdown instance. 201 | toc = self.markdown.serializer(div) 202 | for pp in self.markdown.postprocessors.values(): 203 | toc = pp.run(toc) 204 | self.markdown.toc = toc 205 | 206 | 207 | class TocExtension(Extension): 208 | 209 | TreeProcessorClass = TocTreeprocessor 210 | 211 | def __init__(self, *args, **kwargs): 212 | self.config = { 213 | "marker" : ["[TOC]", 214 | "Text to find and replace with Table of Contents - " 215 | "Defaults to \"[TOC]\""], 216 | "slugify" : [slugify, 217 | "Function to generate anchors based on header text - " 218 | "Defaults to the headerid ext's slugify function."], 219 | "title" : ["", 220 | "Title to insert into TOC <div> - " 221 | "Defaults to an empty string"], 222 | "anchorlink" : [0, 223 | "1 if header should be a self link - " 224 | "Defaults to 0"], 225 | "permalink" : [0, 226 | "1 or link text if a Sphinx-style permalink should be added - " 227 | "Defaults to 0"] 228 | } 229 | 230 | super(TocExtension, self).__init__(*args, **kwargs) 231 | 232 | def extendMarkdown(self, md, md_globals): 233 | tocext = self.TreeProcessorClass(md) 234 | tocext.config = self.getConfigs() 235 | # Headerid ext is set to '>prettify'. With this set to '_end', 236 | # it should always come after headerid ext (and honor ids assinged 237 | # by the header id extension) if both are used. Same goes for 238 | # attr_list extension. This must come last because we don't want 239 | # to redefine ids after toc is created. But we do want toc prettified. 240 | md.treeprocessors.add("toc", tocext, "_end") 241 | 242 | 243 | def makeExtension(*args, **kwargs): 244 | return TocExtension(*args, **kwargs) 245 | -------------------------------------------------------------------------------- /markdown/extensions/wikilinks.py: -------------------------------------------------------------------------------- 1 | ''' 2 | WikiLinks Extension for Python-Markdown 3 | ====================================== 4 | 5 | Converts [[WikiLinks]] to relative links. 6 | 7 | See <https://pythonhosted.org/Markdown/extensions/wikilinks.html> 8 | for documentation. 9 | 10 | Original code Copyright [Waylan Limberg](http://achinghead.com/). 11 | 12 | All changes Copyright The Python Markdown Project 13 | 14 | License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 15 | 16 | ''' 17 | 18 | from __future__ import absolute_import 19 | from __future__ import unicode_literals 20 | from . import Extension 21 | from ..inlinepatterns import Pattern 22 | from ..util import etree 23 | import re 24 | 25 | def build_url(label, base, end): 26 | """ Build a url from the label, a base, and an end. """ 27 | clean_label = re.sub(r'([ ]+_)|(_[ ]+)|([ ]+)', '_', label) 28 | return '%s%s%s'% (base, clean_label, end) 29 | 30 | 31 | class WikiLinkExtension(Extension): 32 | 33 | def __init__ (self, *args, **kwargs): 34 | self.config = { 35 | 'base_url' : ['/', 'String to append to beginning or URL.'], 36 | 'end_url' : ['/', 'String to append to end of URL.'], 37 | 'html_class' : ['wikilink', 'CSS hook. Leave blank for none.'], 38 | 'build_url' : [build_url, 'Callable formats URL from label.'], 39 | } 40 | 41 | super(WikiLinkExtension, self).__init__(*args, **kwargs) 42 | 43 | def extendMarkdown(self, md, md_globals): 44 | self.md = md 45 | 46 | # append to end of inline patterns 47 | WIKILINK_RE = r'\[\[([\w0-9_ -]+)\]\]' 48 | wikilinkPattern = WikiLinks(WIKILINK_RE, self.getConfigs()) 49 | wikilinkPattern.md = md 50 | md.inlinePatterns.add('wikilink', wikilinkPattern, "<not_strong") 51 | 52 | 53 | class WikiLinks(Pattern): 54 | def __init__(self, pattern, config): 55 | super(WikiLinks, self).__init__(pattern) 56 | self.config = config 57 | 58 | def handleMatch(self, m): 59 | if m.group(2).strip(): 60 | base_url, end_url, html_class = self._getMeta() 61 | label = m.group(2).strip() 62 | url = self.config['build_url'](label, base_url, end_url) 63 | a = etree.Element('a') 64 | a.text = label 65 | a.set('href', url) 66 | if html_class: 67 | a.set('class', html_class) 68 | else: 69 | a = '' 70 | return a 71 | 72 | def _getMeta(self): 73 | """ Return meta data or config data. """ 74 | base_url = self.config['base_url'] 75 | end_url = self.config['end_url'] 76 | html_class = self.config['html_class'] 77 | if hasattr(self.md, 'Meta'): 78 | if 'wiki_base_url' in self.md.Meta: 79 | base_url = self.md.Meta['wiki_base_url'][0] 80 | if 'wiki_end_url' in self.md.Meta: 81 | end_url = self.md.Meta['wiki_end_url'][0] 82 | if 'wiki_html_class' in self.md.Meta: 83 | html_class = self.md.Meta['wiki_html_class'][0] 84 | return base_url, end_url, html_class 85 | 86 | 87 | def makeExtension(*args, **kwargs) : 88 | return WikiLinkExtension(*args, **kwargs) 89 | -------------------------------------------------------------------------------- /markdown/odict.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from __future__ import absolute_import 3 | from . import util 4 | 5 | from copy import deepcopy 6 | 7 | class OrderedDict(dict): 8 | """ 9 | A dictionary that keeps its keys in the order in which they're inserted. 10 | 11 | Copied from Django's SortedDict with some modifications. 12 | 13 | """ 14 | def __new__(cls, *args, **kwargs): 15 | instance = super(OrderedDict, cls).__new__(cls, *args, **kwargs) 16 | instance.keyOrder = [] 17 | return instance 18 | 19 | def __init__(self, data=None): 20 | if data is None or isinstance(data, dict): 21 | data = data or [] 22 | super(OrderedDict, self).__init__(data) 23 | self.keyOrder = list(data) if data else [] 24 | else: 25 | super(OrderedDict, self).__init__() 26 | super_set = super(OrderedDict, self).__setitem__ 27 | for key, value in data: 28 | # Take the ordering from first key 29 | if key not in self: 30 | self.keyOrder.append(key) 31 | # But override with last value in data (dict() does this) 32 | super_set(key, value) 33 | 34 | def __deepcopy__(self, memo): 35 | return self.__class__([(key, deepcopy(value, memo)) 36 | for key, value in self.items()]) 37 | 38 | def __copy__(self): 39 | # The Python's default copy implementation will alter the state 40 | # of self. The reason for this seems complex but is likely related to 41 | # subclassing dict. 42 | return self.copy() 43 | 44 | def __setitem__(self, key, value): 45 | if key not in self: 46 | self.keyOrder.append(key) 47 | super(OrderedDict, self).__setitem__(key, value) 48 | 49 | def __delitem__(self, key): 50 | super(OrderedDict, self).__delitem__(key) 51 | self.keyOrder.remove(key) 52 | 53 | def __iter__(self): 54 | return iter(self.keyOrder) 55 | 56 | def __reversed__(self): 57 | return reversed(self.keyOrder) 58 | 59 | def pop(self, k, *args): 60 | result = super(OrderedDict, self).pop(k, *args) 61 | try: 62 | self.keyOrder.remove(k) 63 | except ValueError: 64 | # Key wasn't in the dictionary in the first place. No problem. 65 | pass 66 | return result 67 | 68 | def popitem(self): 69 | result = super(OrderedDict, self).popitem() 70 | self.keyOrder.remove(result[0]) 71 | return result 72 | 73 | def _iteritems(self): 74 | for key in self.keyOrder: 75 | yield key, self[key] 76 | 77 | def _iterkeys(self): 78 | for key in self.keyOrder: 79 | yield key 80 | 81 | def _itervalues(self): 82 | for key in self.keyOrder: 83 | yield self[key] 84 | 85 | if util.PY3: #pragma: no cover 86 | items = _iteritems 87 | keys = _iterkeys 88 | values = _itervalues 89 | else: #pragma: no cover 90 | iteritems = _iteritems 91 | iterkeys = _iterkeys 92 | itervalues = _itervalues 93 | 94 | def items(self): 95 | return [(k, self[k]) for k in self.keyOrder] 96 | 97 | def keys(self): 98 | return self.keyOrder[:] 99 | 100 | def values(self): 101 | return [self[k] for k in self.keyOrder] 102 | 103 | def update(self, dict_): 104 | for k in dict_: 105 | self[k] = dict_[k] 106 | 107 | def setdefault(self, key, default): 108 | if key not in self: 109 | self.keyOrder.append(key) 110 | return super(OrderedDict, self).setdefault(key, default) 111 | 112 | def value_for_index(self, index): 113 | """Returns the value of the item at the given zero-based index.""" 114 | return self[self.keyOrder[index]] 115 | 116 | def insert(self, index, key, value): 117 | """Inserts the key, value pair before the item with the given index.""" 118 | if key in self.keyOrder: 119 | n = self.keyOrder.index(key) 120 | del self.keyOrder[n] 121 | if n < index: 122 | index -= 1 123 | self.keyOrder.insert(index, key) 124 | super(OrderedDict, self).__setitem__(key, value) 125 | 126 | def copy(self): 127 | """Returns a copy of this object.""" 128 | # This way of initializing the copy means it works for subclasses, too. 129 | return self.__class__(self) 130 | 131 | def __repr__(self): 132 | """ 133 | Replaces the normal dict.__repr__ with a version that returns the keys 134 | in their Ordered order. 135 | """ 136 | return '{%s}' % ', '.join(['%r: %r' % (k, v) for k, v in self._iteritems()]) 137 | 138 | def clear(self): 139 | super(OrderedDict, self).clear() 140 | self.keyOrder = [] 141 | 142 | def index(self, key): 143 | """ Return the index of a given key. """ 144 | try: 145 | return self.keyOrder.index(key) 146 | except ValueError: 147 | raise ValueError("Element '%s' was not found in OrderedDict" % key) 148 | 149 | def index_for_location(self, location): 150 | """ Return index or None for a given location. """ 151 | if location == '_begin': 152 | i = 0 153 | elif location == '_end': 154 | i = None 155 | elif location.startswith('<') or location.startswith('>'): 156 | i = self.index(location[1:]) 157 | if location.startswith('>'): 158 | if i >= len(self): 159 | # last item 160 | i = None 161 | else: 162 | i += 1 163 | else: 164 | raise ValueError('Not a valid location: "%s". Location key ' 165 | 'must start with a ">" or "<".' % location) 166 | return i 167 | 168 | def add(self, key, value, location): 169 | """ Insert by key location. """ 170 | i = self.index_for_location(location) 171 | if i is not None: 172 | self.insert(i, key, value) 173 | else: 174 | self.__setitem__(key, value) 175 | 176 | def link(self, key, location): 177 | """ Change location of an existing item. """ 178 | n = self.keyOrder.index(key) 179 | del self.keyOrder[n] 180 | try: 181 | i = self.index_for_location(location) 182 | if i is not None: 183 | self.keyOrder.insert(i, key) 184 | else: 185 | self.keyOrder.append(key) 186 | except Exception as e: 187 | # restore to prevent data loss and reraise 188 | self.keyOrder.insert(n, key) 189 | raise e 190 | -------------------------------------------------------------------------------- /markdown/postprocessors.py: -------------------------------------------------------------------------------- 1 | """ 2 | POST-PROCESSORS 3 | ============================================================================= 4 | 5 | Markdown also allows post-processors, which are similar to preprocessors in 6 | that they need to implement a "run" method. However, they are run after core 7 | processing. 8 | 9 | """ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import unicode_literals 13 | from . import util 14 | from . import odict 15 | import re 16 | 17 | 18 | def build_postprocessors(md_instance, **kwargs): 19 | """ Build the default postprocessors for Markdown. """ 20 | postprocessors = odict.OrderedDict() 21 | postprocessors["raw_html"] = RawHtmlPostprocessor(md_instance) 22 | postprocessors["amp_substitute"] = AndSubstitutePostprocessor() 23 | postprocessors["unescape"] = UnescapePostprocessor() 24 | return postprocessors 25 | 26 | 27 | class Postprocessor(util.Processor): 28 | """ 29 | Postprocessors are run after the ElementTree it converted back into text. 30 | 31 | Each Postprocessor implements a "run" method that takes a pointer to a 32 | text string, modifies it as necessary and returns a text string. 33 | 34 | Postprocessors must extend markdown.Postprocessor. 35 | 36 | """ 37 | 38 | def run(self, text): 39 | """ 40 | Subclasses of Postprocessor should implement a `run` method, which 41 | takes the html document as a single text string and returns a 42 | (possibly modified) string. 43 | 44 | """ 45 | pass #pragma: no cover 46 | 47 | 48 | class RawHtmlPostprocessor(Postprocessor): 49 | """ Restore raw html to the document. """ 50 | 51 | def run(self, text): 52 | """ Iterate over html stash and restore "safe" html. """ 53 | for i in range(self.markdown.htmlStash.html_counter): 54 | html, safe = self.markdown.htmlStash.rawHtmlBlocks[i] 55 | if self.markdown.safeMode and not safe: 56 | if str(self.markdown.safeMode).lower() == 'escape': 57 | html = self.escape(html) 58 | elif str(self.markdown.safeMode).lower() == 'remove': 59 | html = '' 60 | else: 61 | html = self.markdown.html_replacement_text 62 | if self.isblocklevel(html) and (safe or not self.markdown.safeMode): 63 | text = text.replace("<p>%s</p>" % 64 | (self.markdown.htmlStash.get_placeholder(i)), 65 | html + "\n") 66 | text = text.replace(self.markdown.htmlStash.get_placeholder(i), 67 | html) 68 | return text 69 | 70 | def escape(self, html): 71 | """ Basic html escaping """ 72 | html = html.replace('&', '&') 73 | html = html.replace('<', '<') 74 | html = html.replace('>', '>') 75 | return html.replace('"', '"') 76 | 77 | def isblocklevel(self, html): 78 | m = re.match(r'^\<\/?([^ >]+)', html) 79 | if m: 80 | if m.group(1)[0] in ('!', '?', '@', '%'): 81 | # Comment, php etc... 82 | return True 83 | return util.isBlockLevel(m.group(1)) 84 | return False 85 | 86 | 87 | class AndSubstitutePostprocessor(Postprocessor): 88 | """ Restore valid entities """ 89 | 90 | def run(self, text): 91 | text = text.replace(util.AMP_SUBSTITUTE, "&") 92 | return text 93 | 94 | 95 | class UnescapePostprocessor(Postprocessor): 96 | """ Restore escaped chars """ 97 | 98 | RE = re.compile('%s(\d+)%s' % (util.STX, util.ETX)) 99 | 100 | def unescape(self, m): 101 | return util.int2str(int(m.group(1))) 102 | 103 | def run(self, text): 104 | return self.RE.sub(self.unescape, text) 105 | -------------------------------------------------------------------------------- /markdown/preprocessors.py: -------------------------------------------------------------------------------- 1 | """ 2 | PRE-PROCESSORS 3 | ============================================================================= 4 | 5 | Preprocessors work on source text before we start doing anything too 6 | complicated. 7 | """ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import unicode_literals 11 | from . import util 12 | from . import odict 13 | import re 14 | 15 | 16 | def build_preprocessors(md_instance, **kwargs): 17 | """ Build the default set of preprocessors used by Markdown. """ 18 | preprocessors = odict.OrderedDict() 19 | preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance) 20 | if md_instance.safeMode != 'escape': 21 | preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance) 22 | preprocessors["reference"] = ReferencePreprocessor(md_instance) 23 | return preprocessors 24 | 25 | 26 | class Preprocessor(util.Processor): 27 | """ 28 | Preprocessors are run after the text is broken into lines. 29 | 30 | Each preprocessor implements a "run" method that takes a pointer to a 31 | list of lines of the document, modifies it as necessary and returns 32 | either the same pointer or a pointer to a new list. 33 | 34 | Preprocessors must extend markdown.Preprocessor. 35 | 36 | """ 37 | def run(self, lines): 38 | """ 39 | Each subclass of Preprocessor should override the `run` method, which 40 | takes the document as a list of strings split by newlines and returns 41 | the (possibly modified) list of lines. 42 | 43 | """ 44 | pass #pragma: no cover 45 | 46 | 47 | class NormalizeWhitespace(Preprocessor): 48 | """ Normalize whitespace for consistant parsing. """ 49 | 50 | def run(self, lines): 51 | source = '\n'.join(lines) 52 | source = source.replace(util.STX, "").replace(util.ETX, "") 53 | source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" 54 | source = source.expandtabs(self.markdown.tab_length) 55 | source = re.sub(r'(?<=\n) +\n', '\n', source) 56 | return source.split('\n') 57 | 58 | 59 | class HtmlBlockPreprocessor(Preprocessor): 60 | """Remove html blocks from the text and store them for later retrieval.""" 61 | 62 | right_tag_patterns = ["</%s>", "%s>"] 63 | attrs_pattern = r""" 64 | \s+(?P<attr>[^>"'/= ]+)=(?P<q>['"])(?P<value>.*?)(?P=q) # attr="value" 65 | | # OR 66 | \s+(?P<attr1>[^>"'/= ]+)=(?P<value1>[^> ]+) # attr=value 67 | | # OR 68 | \s+(?P<attr2>[^>"'/= ]+) # attr 69 | """ 70 | left_tag_pattern = r'^\<(?P<tag>[^> ]+)(?P<attrs>(%s)*)\s*\/?\>?' % attrs_pattern 71 | attrs_re = re.compile(attrs_pattern, re.VERBOSE) 72 | left_tag_re = re.compile(left_tag_pattern, re.VERBOSE) 73 | markdown_in_raw = False 74 | 75 | def _get_left_tag(self, block): 76 | m = self.left_tag_re.match(block) 77 | if m: 78 | tag = m.group('tag') 79 | raw_attrs = m.group('attrs') 80 | attrs = {} 81 | if raw_attrs: 82 | for ma in self.attrs_re.finditer(raw_attrs): 83 | if ma.group('attr'): 84 | if ma.group('value'): 85 | attrs[ma.group('attr').strip()] = ma.group('value') 86 | else: 87 | attrs[ma.group('attr').strip()] = "" 88 | elif ma.group('attr1'): 89 | if ma.group('value1'): 90 | attrs[ma.group('attr1').strip()] = ma.group('value1') 91 | else: 92 | attrs[ma.group('attr1').strip()] = "" 93 | elif ma.group('attr2'): 94 | attrs[ma.group('attr2').strip()] = "" 95 | return tag, len(m.group(0)), attrs 96 | else: 97 | tag = block[1:].split(">", 1)[0].lower() 98 | return tag, len(tag)+2, {} 99 | 100 | def _recursive_tagfind(self, ltag, rtag, start_index, block): 101 | while 1: 102 | i = block.find(rtag, start_index) 103 | if i == -1: 104 | return -1 105 | j = block.find(ltag, start_index) 106 | # if no ltag, or rtag found before another ltag, return index 107 | if (j > i or j == -1): 108 | return i + len(rtag) 109 | # another ltag found before rtag, use end of ltag as starting 110 | # point and search again 111 | j = block.find('>', j) 112 | start_index = self._recursive_tagfind(ltag, rtag, j + 1, block) 113 | if start_index == -1: 114 | # HTML potentially malformed- ltag has no corresponding 115 | # rtag 116 | return -1 117 | 118 | def _get_right_tag(self, left_tag, left_index, block): 119 | for p in self.right_tag_patterns: 120 | tag = p % left_tag 121 | i = self._recursive_tagfind("<%s" % left_tag, tag, left_index, block) 122 | if i > 2: 123 | return tag.lstrip("<").rstrip(">"), i 124 | return block.rstrip()[-left_index:-1].lower(), len(block) 125 | 126 | def _equal_tags(self, left_tag, right_tag): 127 | if left_tag[0] in ['?', '@', '%']: # handle PHP, etc. 128 | return True 129 | if ("/" + left_tag) == right_tag: 130 | return True 131 | if (right_tag == "--" and left_tag == "--"): 132 | return True 133 | elif left_tag == right_tag[1:] \ 134 | and right_tag[0] == "/": 135 | return True 136 | else: 137 | return False 138 | 139 | def _is_oneliner(self, tag): 140 | return (tag in ['hr', 'hr/']) 141 | 142 | def _stringindex_to_listindex(self, stringindex, items): 143 | """ 144 | Same effect as concatenating the strings in items, 145 | finding the character to which stringindex refers in that string, 146 | and returning the index of the item in which that character resides. 147 | """ 148 | items.append('dummy') 149 | i, count = 0, 0 150 | while count <= stringindex: 151 | count += len(items[i]) 152 | i += 1 153 | return i - 1 154 | 155 | def _nested_markdown_in_html(self, items): 156 | """Find and process html child elements of the given element block.""" 157 | for i, item in enumerate(items): 158 | if self.left_tag_re.match(item): 159 | left_tag, left_index, attrs = \ 160 | self._get_left_tag(''.join(items[i:])) 161 | right_tag, data_index = self._get_right_tag( 162 | left_tag, left_index, ''.join(items[i:])) 163 | right_listindex = \ 164 | self._stringindex_to_listindex(data_index, items[i:]) + i 165 | if 'markdown' in attrs.keys(): 166 | items[i] = items[i][left_index:] # remove opening tag 167 | placeholder = self.markdown.htmlStash.store_tag( 168 | left_tag, attrs, i + 1, right_listindex + 1) 169 | items.insert(i, placeholder) 170 | if len(items) - right_listindex <= 1: # last nest, no tail 171 | right_listindex -= 1 172 | items[right_listindex] = items[right_listindex][ 173 | :-len(right_tag) - 2] # remove closing tag 174 | else: # raw html 175 | if len(items) - right_listindex <= 1: # last element 176 | right_listindex -= 1 177 | placeholder = self.markdown.htmlStash.store('\n\n'.join( 178 | items[i:right_listindex + 1])) 179 | del items[i:right_listindex + 1] 180 | items.insert(i, placeholder) 181 | return items 182 | 183 | def run(self, lines): 184 | text = "\n".join(lines) 185 | new_blocks = [] 186 | text = text.rsplit("\n\n") 187 | items = [] 188 | left_tag = '' 189 | right_tag = '' 190 | in_tag = False # flag 191 | 192 | while text: 193 | block = text[0] 194 | if block.startswith("\n"): 195 | block = block[1:] 196 | text = text[1:] 197 | 198 | if block.startswith("\n"): 199 | block = block[1:] 200 | 201 | if not in_tag: 202 | if block.startswith("<") and len(block.strip()) > 1: 203 | 204 | if block[1:4] == "!--": 205 | # is a comment block 206 | left_tag, left_index, attrs = "--", 2, {} 207 | else: 208 | left_tag, left_index, attrs = self._get_left_tag(block) 209 | right_tag, data_index = self._get_right_tag(left_tag, 210 | left_index, 211 | block) 212 | # keep checking conditions below and maybe just append 213 | 214 | if data_index < len(block) \ 215 | and (util.isBlockLevel(left_tag) 216 | or left_tag == '--'): 217 | text.insert(0, block[data_index:]) 218 | block = block[:data_index] 219 | 220 | if not (util.isBlockLevel(left_tag) \ 221 | or block[1] in ["!", "?", "@", "%"]): 222 | new_blocks.append(block) 223 | continue 224 | 225 | if self._is_oneliner(left_tag): 226 | new_blocks.append(block.strip()) 227 | continue 228 | 229 | if block.rstrip().endswith(">") \ 230 | and self._equal_tags(left_tag, right_tag): 231 | if self.markdown_in_raw and 'markdown' in attrs.keys(): 232 | block = block[left_index:-len(right_tag) - 2] 233 | new_blocks.append(self.markdown.htmlStash. 234 | store_tag(left_tag, attrs, 0, 2)) 235 | new_blocks.extend([block]) 236 | else: 237 | new_blocks.append( 238 | self.markdown.htmlStash.store(block.strip())) 239 | continue 240 | else: 241 | # if is block level tag and is not complete 242 | if (not self._equal_tags(left_tag, right_tag)) and \ 243 | (util.isBlockLevel(left_tag) or left_tag == "--"): 244 | items.append(block.strip()) 245 | in_tag = True 246 | else: 247 | new_blocks.append( 248 | self.markdown.htmlStash.store(block.strip())) 249 | 250 | continue 251 | 252 | else: 253 | new_blocks.append(block) 254 | 255 | else: 256 | items.append(block) 257 | 258 | right_tag, data_index = self._get_right_tag(left_tag, 0, block) 259 | 260 | if self._equal_tags(left_tag, right_tag): 261 | # if find closing tag 262 | 263 | if data_index < len(block): 264 | # we have more text after right_tag 265 | items[-1] = block[:data_index] 266 | text.insert(0, block[data_index:]) 267 | 268 | in_tag = False 269 | if self.markdown_in_raw and 'markdown' in attrs.keys(): 270 | items[0] = items[0][left_index:] 271 | items[-1] = items[-1][:-len(right_tag) - 2] 272 | if items[len(items) - 1]: # not a newline/empty string 273 | right_index = len(items) + 3 274 | else: 275 | right_index = len(items) + 2 276 | new_blocks.append(self.markdown.htmlStash.store_tag( 277 | left_tag, attrs, 0, right_index)) 278 | placeholderslen = len(self.markdown.htmlStash.tag_data) 279 | new_blocks.extend( 280 | self._nested_markdown_in_html(items)) 281 | nests = len(self.markdown.htmlStash.tag_data) - \ 282 | placeholderslen 283 | self.markdown.htmlStash.tag_data[-1 - nests][ 284 | 'right_index'] += nests - 2 285 | else: 286 | new_blocks.append( 287 | self.markdown.htmlStash.store('\n\n'.join(items))) 288 | items = [] 289 | 290 | if items: 291 | if self.markdown_in_raw and 'markdown' in attrs.keys(): 292 | items[0] = items[0][left_index:] 293 | items[-1] = items[-1][:-len(right_tag) - 2] 294 | if items[len(items) - 1]: # not a newline/empty string 295 | right_index = len(items) + 3 296 | else: 297 | right_index = len(items) + 2 298 | new_blocks.append( 299 | self.markdown.htmlStash.store_tag( 300 | left_tag, attrs, 0, right_index)) 301 | placeholderslen = len(self.markdown.htmlStash.tag_data) 302 | new_blocks.extend(self._nested_markdown_in_html(items)) 303 | nests = len(self.markdown.htmlStash.tag_data) - placeholderslen 304 | self.markdown.htmlStash.tag_data[-1 - nests][ 305 | 'right_index'] += nests - 2 306 | else: 307 | new_blocks.append( 308 | self.markdown.htmlStash.store('\n\n'.join(items))) 309 | new_blocks.append('\n') 310 | 311 | new_text = "\n\n".join(new_blocks) 312 | return new_text.split("\n") 313 | 314 | 315 | class ReferencePreprocessor(Preprocessor): 316 | """ Remove reference definitions from text and store for later use. """ 317 | 318 | TITLE = r'[ ]*(\"(.*)\"|\'(.*)\'|\((.*)\))[ ]*' 319 | RE = re.compile(r'^[ ]{0,3}\[([^\]]*)\]:\s*([^ ]*)[ ]*(%s)?$' % TITLE, re.DOTALL) 320 | TITLE_RE = re.compile(r'^%s$' % TITLE) 321 | 322 | def run (self, lines): 323 | new_text = []; 324 | while lines: 325 | line = lines.pop(0) 326 | m = self.RE.match(line) 327 | if m: 328 | id = m.group(1).strip().lower() 329 | link = m.group(2).lstrip('<').rstrip('>') 330 | t = m.group(5) or m.group(6) or m.group(7) 331 | if not t: 332 | # Check next line for title 333 | tm = self.TITLE_RE.match(lines[0]) 334 | if tm: 335 | lines.pop(0) 336 | t = tm.group(2) or tm.group(3) or tm.group(4) 337 | self.markdown.references[id] = (link, t) 338 | else: 339 | new_text.append(line) 340 | 341 | return new_text #+ "\n" 342 | -------------------------------------------------------------------------------- /markdown/serializers.py: -------------------------------------------------------------------------------- 1 | # markdown/searializers.py 2 | # 3 | # Add x/html serialization to Elementree 4 | # Taken from ElementTree 1.3 preview with slight modifications 5 | # 6 | # Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. 7 | # 8 | # fredrik@pythonware.com 9 | # http://www.pythonware.com 10 | # 11 | # -------------------------------------------------------------------- 12 | # The ElementTree toolkit is 13 | # 14 | # Copyright (c) 1999-2007 by Fredrik Lundh 15 | # 16 | # By obtaining, using, and/or copying this software and/or its 17 | # associated documentation, you agree that you have read, understood, 18 | # and will comply with the following terms and conditions: 19 | # 20 | # Permission to use, copy, modify, and distribute this software and 21 | # its associated documentation for any purpose and without fee is 22 | # hereby granted, provided that the above copyright notice appears in 23 | # all copies, and that both that copyright notice and this permission 24 | # notice appear in supporting documentation, and that the name of 25 | # Secret Labs AB or the author not be used in advertising or publicity 26 | # pertaining to distribution of the software without specific, written 27 | # prior permission. 28 | # 29 | # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 30 | # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 31 | # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 32 | # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 33 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 34 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 35 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 36 | # OF THIS SOFTWARE. 37 | # -------------------------------------------------------------------- 38 | 39 | 40 | from __future__ import absolute_import 41 | from __future__ import unicode_literals 42 | from . import util 43 | ElementTree = util.etree.ElementTree 44 | QName = util.etree.QName 45 | if hasattr(util.etree, 'test_comment'): #pragma: no cover 46 | Comment = util.etree.test_comment 47 | else: #pragma: no cover 48 | Comment = util.etree.Comment 49 | PI = util.etree.PI 50 | ProcessingInstruction = util.etree.ProcessingInstruction 51 | 52 | __all__ = ['to_html_string', 'to_xhtml_string'] 53 | 54 | HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", 55 | "img", "input", "isindex", "link", "meta" "param") 56 | 57 | try: 58 | HTML_EMPTY = set(HTML_EMPTY) 59 | except NameError: #pragma: no cover 60 | pass 61 | 62 | _namespace_map = { 63 | # "well-known" namespace prefixes 64 | "http://www.w3.org/XML/1998/namespace": "xml", 65 | "http://www.w3.org/1999/xhtml": "html", 66 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 67 | "http://schemas.xmlsoap.org/wsdl/": "wsdl", 68 | # xml schema 69 | "http://www.w3.org/2001/XMLSchema": "xs", 70 | "http://www.w3.org/2001/XMLSchema-instance": "xsi", 71 | # dublic core 72 | "http://purl.org/dc/elements/1.1/": "dc", 73 | } 74 | 75 | 76 | def _raise_serialization_error(text): #pragma: no cover 77 | raise TypeError( 78 | "cannot serialize %r (type %s)" % (text, type(text).__name__) 79 | ) 80 | 81 | def _encode(text, encoding): 82 | try: 83 | return text.encode(encoding, "xmlcharrefreplace") 84 | except (TypeError, AttributeError): #pragma: no cover 85 | _raise_serialization_error(text) 86 | 87 | def _escape_cdata(text): 88 | # escape character data 89 | try: 90 | # it's worth avoiding do-nothing calls for strings that are 91 | # shorter than 500 character, or so. assume that's, by far, 92 | # the most common case in most applications. 93 | if "&" in text: 94 | text = text.replace("&", "&") 95 | if "<" in text: 96 | text = text.replace("<", "<") 97 | if ">" in text: 98 | text = text.replace(">", ">") 99 | return text 100 | except (TypeError, AttributeError): #pragma: no cover 101 | _raise_serialization_error(text) 102 | 103 | 104 | def _escape_attrib(text): 105 | # escape attribute value 106 | try: 107 | if "&" in text: 108 | text = text.replace("&", "&") 109 | if "<" in text: 110 | text = text.replace("<", "<") 111 | if ">" in text: 112 | text = text.replace(">", ">") 113 | if "\"" in text: 114 | text = text.replace("\"", """) 115 | if "\n" in text: 116 | text = text.replace("\n", " ") 117 | return text 118 | except (TypeError, AttributeError): #pragma: no cover 119 | _raise_serialization_error(text) 120 | 121 | def _escape_attrib_html(text): 122 | # escape attribute value 123 | try: 124 | if "&" in text: 125 | text = text.replace("&", "&") 126 | if "<" in text: 127 | text = text.replace("<", "<") 128 | if ">" in text: 129 | text = text.replace(">", ">") 130 | if "\"" in text: 131 | text = text.replace("\"", """) 132 | return text 133 | except (TypeError, AttributeError): #pragma: no cover 134 | _raise_serialization_error(text) 135 | 136 | 137 | def _serialize_html(write, elem, qnames, namespaces, format): 138 | tag = elem.tag 139 | text = elem.text 140 | if tag is Comment: 141 | write("<!--%s-->" % _escape_cdata(text)) 142 | elif tag is ProcessingInstruction: 143 | write("<?%s?>" % _escape_cdata(text)) 144 | else: 145 | tag = qnames[tag] 146 | if tag is None: 147 | if text: 148 | write(_escape_cdata(text)) 149 | for e in elem: 150 | _serialize_html(write, e, qnames, None, format) 151 | else: 152 | write("<" + tag) 153 | items = elem.items() 154 | if items or namespaces: 155 | items = sorted(items) # lexical order 156 | for k, v in items: 157 | if isinstance(k, QName): 158 | k = k.text 159 | if isinstance(v, QName): 160 | v = qnames[v.text] 161 | else: 162 | v = _escape_attrib_html(v) 163 | if qnames[k] == v and format == 'html': 164 | # handle boolean attributes 165 | write(" %s" % v) 166 | else: 167 | write(" %s=\"%s\"" % (qnames[k], v)) 168 | if namespaces: 169 | items = namespaces.items() 170 | items.sort(key=lambda x: x[1]) # sort on prefix 171 | for v, k in items: 172 | if k: 173 | k = ":" + k 174 | write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v))) 175 | if format == "xhtml" and tag.lower() in HTML_EMPTY: 176 | write(" />") 177 | else: 178 | write(">") 179 | if text: 180 | if tag.lower() in ["script", "style"]: 181 | write(text) 182 | else: 183 | write(_escape_cdata(text)) 184 | for e in elem: 185 | _serialize_html(write, e, qnames, None, format) 186 | if tag.lower() not in HTML_EMPTY: 187 | write("</" + tag + ">") 188 | if elem.tail: 189 | write(_escape_cdata(elem.tail)) 190 | 191 | def _write_html(root, 192 | encoding=None, 193 | default_namespace=None, 194 | format="html"): 195 | assert root is not None 196 | data = [] 197 | write = data.append 198 | qnames, namespaces = _namespaces(root, default_namespace) 199 | _serialize_html(write, root, qnames, namespaces, format) 200 | if encoding is None: 201 | return "".join(data) 202 | else: 203 | return _encode("".join(data)) 204 | 205 | 206 | # -------------------------------------------------------------------- 207 | # serialization support 208 | 209 | def _namespaces(elem, default_namespace=None): 210 | # identify namespaces used in this tree 211 | 212 | # maps qnames to *encoded* prefix:local names 213 | qnames = {None: None} 214 | 215 | # maps uri:s to prefixes 216 | namespaces = {} 217 | if default_namespace: 218 | namespaces[default_namespace] = "" 219 | 220 | def add_qname(qname): 221 | # calculate serialized qname representation 222 | try: 223 | if qname[:1] == "{": 224 | uri, tag = qname[1:].split("}", 1) 225 | prefix = namespaces.get(uri) 226 | if prefix is None: 227 | prefix = _namespace_map.get(uri) 228 | if prefix is None: 229 | prefix = "ns%d" % len(namespaces) 230 | if prefix != "xml": 231 | namespaces[uri] = prefix 232 | if prefix: 233 | qnames[qname] = "%s:%s" % (prefix, tag) 234 | else: 235 | qnames[qname] = tag # default element 236 | else: 237 | if default_namespace: 238 | raise ValueError( 239 | "cannot use non-qualified names with " 240 | "default_namespace option" 241 | ) 242 | qnames[qname] = qname 243 | except TypeError: #pragma: no cover 244 | _raise_serialization_error(qname) 245 | 246 | # populate qname and namespaces table 247 | try: 248 | iterate = elem.iter 249 | except AttributeError: 250 | iterate = elem.getiterator # cET compatibility 251 | for elem in iterate(): 252 | tag = elem.tag 253 | if isinstance(tag, QName) and tag.text not in qnames: 254 | add_qname(tag.text) 255 | elif isinstance(tag, util.string_type): 256 | if tag not in qnames: 257 | add_qname(tag) 258 | elif tag is not None and tag is not Comment and tag is not PI: 259 | _raise_serialization_error(tag) 260 | for key, value in elem.items(): 261 | if isinstance(key, QName): 262 | key = key.text 263 | if key not in qnames: 264 | add_qname(key) 265 | if isinstance(value, QName) and value.text not in qnames: 266 | add_qname(value.text) 267 | text = elem.text 268 | if isinstance(text, QName) and text.text not in qnames: 269 | add_qname(text.text) 270 | return qnames, namespaces 271 | 272 | def to_html_string(element): 273 | return _write_html(ElementTree(element).getroot(), format="html") 274 | 275 | def to_xhtml_string(element): 276 | return _write_html(ElementTree(element).getroot(), format="xhtml") 277 | -------------------------------------------------------------------------------- /markdown/treeprocessors.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from __future__ import absolute_import 3 | from . import util 4 | from . import odict 5 | from . import inlinepatterns 6 | 7 | 8 | def build_treeprocessors(md_instance, **kwargs): 9 | """ Build the default treeprocessors for Markdown. """ 10 | treeprocessors = odict.OrderedDict() 11 | treeprocessors["inline"] = InlineProcessor(md_instance) 12 | treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance) 13 | return treeprocessors 14 | 15 | 16 | def isString(s): 17 | """ Check if it's string """ 18 | if not isinstance(s, util.AtomicString): 19 | return isinstance(s, util.string_type) 20 | return False 21 | 22 | 23 | class Treeprocessor(util.Processor): 24 | """ 25 | Treeprocessors are run on the ElementTree object before serialization. 26 | 27 | Each Treeprocessor implements a "run" method that takes a pointer to an 28 | ElementTree, modifies it as necessary and returns an ElementTree 29 | object. 30 | 31 | Treeprocessors must extend markdown.Treeprocessor. 32 | 33 | """ 34 | def run(self, root): 35 | """ 36 | Subclasses of Treeprocessor should implement a `run` method, which 37 | takes a root ElementTree. This method can return another ElementTree 38 | object, and the existing root ElementTree will be replaced, or it can 39 | modify the current tree and return None. 40 | """ 41 | pass #pragma: no cover 42 | 43 | 44 | class InlineProcessor(Treeprocessor): 45 | """ 46 | A Treeprocessor that traverses a tree, applying inline patterns. 47 | """ 48 | 49 | def __init__(self, md): 50 | self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX 51 | self.__placeholder_suffix = util.ETX 52 | self.__placeholder_length = 4 + len(self.__placeholder_prefix) \ 53 | + len(self.__placeholder_suffix) 54 | self.__placeholder_re = util.INLINE_PLACEHOLDER_RE 55 | self.markdown = md 56 | self.inlinePatterns = md.inlinePatterns 57 | 58 | def __makePlaceholder(self, type): 59 | """ Generate a placeholder """ 60 | id = "%04d" % len(self.stashed_nodes) 61 | hash = util.INLINE_PLACEHOLDER % id 62 | return hash, id 63 | 64 | def __findPlaceholder(self, data, index): 65 | """ 66 | Extract id from data string, start from index 67 | 68 | Keyword arguments: 69 | 70 | * data: string 71 | * index: index, from which we start search 72 | 73 | Returns: placeholder id and string index, after the found placeholder. 74 | 75 | """ 76 | m = self.__placeholder_re.search(data, index) 77 | if m: 78 | return m.group(1), m.end() 79 | else: 80 | return None, index + 1 81 | 82 | def __stashNode(self, node, type): 83 | """ Add node to stash """ 84 | placeholder, id = self.__makePlaceholder(type) 85 | self.stashed_nodes[id] = node 86 | return placeholder 87 | 88 | def __handleInline(self, data, patternIndex=0): 89 | """ 90 | Process string with inline patterns and replace it 91 | with placeholders 92 | 93 | Keyword arguments: 94 | 95 | * data: A line of Markdown text 96 | * patternIndex: The index of the inlinePattern to start with 97 | 98 | Returns: String with placeholders. 99 | 100 | """ 101 | if not isinstance(data, util.AtomicString): 102 | startIndex = 0 103 | while patternIndex < len(self.inlinePatterns): 104 | data, matched, startIndex = self.__applyPattern( 105 | self.inlinePatterns.value_for_index(patternIndex), 106 | data, patternIndex, startIndex) 107 | if not matched: 108 | patternIndex += 1 109 | return data 110 | 111 | def __processElementText(self, node, subnode, isText=True): 112 | """ 113 | Process placeholders in Element.text or Element.tail 114 | of Elements popped from self.stashed_nodes. 115 | 116 | Keywords arguments: 117 | 118 | * node: parent node 119 | * subnode: processing node 120 | * isText: bool variable, True - it's text, False - it's tail 121 | 122 | Returns: None 123 | 124 | """ 125 | if isText: 126 | text = subnode.text 127 | subnode.text = None 128 | else: 129 | text = subnode.tail 130 | subnode.tail = None 131 | 132 | childResult = self.__processPlaceholders(text, subnode, isText) 133 | 134 | if not isText and node is not subnode: 135 | pos = list(node).index(subnode) 136 | else: 137 | pos = 0 138 | 139 | childResult.reverse() 140 | for newChild in childResult: 141 | node.insert(pos, newChild) 142 | 143 | def __processPlaceholders(self, data, parent, isText=True): 144 | """ 145 | Process string with placeholders and generate ElementTree tree. 146 | 147 | Keyword arguments: 148 | 149 | * data: string with placeholders instead of ElementTree elements. 150 | * parent: Element, which contains processing inline data 151 | 152 | Returns: list with ElementTree elements with applied inline patterns. 153 | 154 | """ 155 | def linkText(text): 156 | if text: 157 | if result: 158 | if result[-1].tail: 159 | result[-1].tail += text 160 | else: 161 | result[-1].tail = text 162 | elif not isText: 163 | if parent.tail: 164 | parent.tail += text 165 | else: 166 | parent.tail = text 167 | else: 168 | if parent.text: 169 | parent.text += text 170 | else: 171 | parent.text = text 172 | result = [] 173 | strartIndex = 0 174 | while data: 175 | index = data.find(self.__placeholder_prefix, strartIndex) 176 | if index != -1: 177 | id, phEndIndex = self.__findPlaceholder(data, index) 178 | 179 | if id in self.stashed_nodes: 180 | node = self.stashed_nodes.get(id) 181 | 182 | if index > 0: 183 | text = data[strartIndex:index] 184 | linkText(text) 185 | 186 | if not isString(node): # it's Element 187 | for child in [node] + list(node): 188 | if child.tail: 189 | if child.tail.strip(): 190 | self.__processElementText(node, child, False) 191 | if child.text: 192 | if child.text.strip(): 193 | self.__processElementText(child, child) 194 | else: # it's just a string 195 | linkText(node) 196 | strartIndex = phEndIndex 197 | continue 198 | 199 | strartIndex = phEndIndex 200 | result.append(node) 201 | 202 | else: # wrong placeholder 203 | end = index + len(self.__placeholder_prefix) 204 | linkText(data[strartIndex:end]) 205 | strartIndex = end 206 | else: 207 | text = data[strartIndex:] 208 | if isinstance(data, util.AtomicString): 209 | # We don't want to loose the AtomicString 210 | text = util.AtomicString(text) 211 | linkText(text) 212 | data = "" 213 | 214 | return result 215 | 216 | def __applyPattern(self, pattern, data, patternIndex, startIndex=0): 217 | """ 218 | Check if the line fits the pattern, create the necessary 219 | elements, add it to stashed_nodes. 220 | 221 | Keyword arguments: 222 | 223 | * data: the text to be processed 224 | * pattern: the pattern to be checked 225 | * patternIndex: index of current pattern 226 | * startIndex: string index, from which we start searching 227 | 228 | Returns: String with placeholders instead of ElementTree elements. 229 | 230 | """ 231 | match = pattern.getCompiledRegExp().match(data[startIndex:]) 232 | leftData = data[:startIndex] 233 | 234 | if not match: 235 | return data, False, 0 236 | 237 | node = pattern.handleMatch(match) 238 | 239 | if node is None: 240 | return data, True, len(leftData)+match.span(len(match.groups()))[0] 241 | 242 | if not isString(node): 243 | if not isinstance(node.text, util.AtomicString): 244 | # We need to process current node too 245 | for child in [node] + list(node): 246 | if not isString(node): 247 | if child.text: 248 | child.text = self.__handleInline(child.text, 249 | patternIndex + 1) 250 | if child.tail: 251 | child.tail = self.__handleInline(child.tail, 252 | patternIndex) 253 | 254 | placeholder = self.__stashNode(node, pattern.type()) 255 | 256 | return "%s%s%s%s" % (leftData, 257 | match.group(1), 258 | placeholder, match.groups()[-1]), True, 0 259 | 260 | def run(self, tree): 261 | """Apply inline patterns to a parsed Markdown tree. 262 | 263 | Iterate over ElementTree, find elements with inline tag, apply inline 264 | patterns and append newly created Elements to tree. If you don't 265 | want to process your data with inline paterns, instead of normal string, 266 | use subclass AtomicString: 267 | 268 | node.text = markdown.AtomicString("This will not be processed.") 269 | 270 | Arguments: 271 | 272 | * tree: ElementTree object, representing Markdown tree. 273 | 274 | Returns: ElementTree object with applied inline patterns. 275 | 276 | """ 277 | self.stashed_nodes = {} 278 | 279 | stack = [tree] 280 | 281 | while stack: 282 | currElement = stack.pop() 283 | insertQueue = [] 284 | for child in currElement: 285 | if child.text and not isinstance(child.text, util.AtomicString): 286 | text = child.text 287 | child.text = None 288 | lst = self.__processPlaceholders(self.__handleInline( 289 | text), child) 290 | stack += lst 291 | insertQueue.append((child, lst)) 292 | if child.tail: 293 | tail = self.__handleInline(child.tail) 294 | dumby = util.etree.Element('d') 295 | child.tail = None 296 | tailResult = self.__processPlaceholders(tail, dumby, False) 297 | if dumby.tail: 298 | child.tail = dumby.tail 299 | pos = list(currElement).index(child) + 1 300 | tailResult.reverse() 301 | for newChild in tailResult: 302 | currElement.insert(pos, newChild) 303 | if len(child): 304 | stack.append(child) 305 | 306 | for element, lst in insertQueue: 307 | if self.markdown.enable_attributes: 308 | if element.text and isString(element.text): 309 | element.text = \ 310 | inlinepatterns.handleAttributes(element.text, 311 | element) 312 | i = 0 313 | for newChild in lst: 314 | if self.markdown.enable_attributes: 315 | # Processing attributes 316 | if newChild.tail and isString(newChild.tail): 317 | newChild.tail = \ 318 | inlinepatterns.handleAttributes(newChild.tail, 319 | element) 320 | if newChild.text and isString(newChild.text): 321 | newChild.text = \ 322 | inlinepatterns.handleAttributes(newChild.text, 323 | newChild) 324 | element.insert(i, newChild) 325 | i += 1 326 | return tree 327 | 328 | 329 | class PrettifyTreeprocessor(Treeprocessor): 330 | """ Add linebreaks to the html document. """ 331 | 332 | def _prettifyETree(self, elem): 333 | """ Recursively add linebreaks to ElementTree children. """ 334 | 335 | i = "\n" 336 | if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']: 337 | if (not elem.text or not elem.text.strip()) \ 338 | and len(elem) and util.isBlockLevel(elem[0].tag): 339 | elem.text = i 340 | for e in elem: 341 | if util.isBlockLevel(e.tag): 342 | self._prettifyETree(e) 343 | if not elem.tail or not elem.tail.strip(): 344 | elem.tail = i 345 | if not elem.tail or not elem.tail.strip(): 346 | elem.tail = i 347 | 348 | def run(self, root): 349 | """ Add linebreaks to ElementTree root object. """ 350 | 351 | self._prettifyETree(root) 352 | # Do <br />'s seperately as they are often in the middle of 353 | # inline content and missed by _prettifyETree. 354 | brs = root.getiterator('br') 355 | for br in brs: 356 | if not br.tail or not br.tail.strip(): 357 | br.tail = '\n' 358 | else: 359 | br.tail = '\n%s' % br.tail 360 | # Clean up extra empty lines at end of code blocks. 361 | pres = root.getiterator('pre') 362 | for pre in pres: 363 | if len(pre) and pre[0].tag == 'code': 364 | pre[0].text = util.AtomicString(pre[0].text.rstrip() + '\n') 365 | -------------------------------------------------------------------------------- /markdown/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | import re 4 | import sys 5 | 6 | 7 | """ 8 | Python 3 Stuff 9 | ============================================================================= 10 | """ 11 | PY3 = sys.version_info[0] == 3 12 | 13 | if PY3: #pragma: no cover 14 | string_type = str 15 | text_type = str 16 | int2str = chr 17 | else: #pragma: no cover 18 | string_type = basestring 19 | text_type = unicode 20 | int2str = unichr 21 | 22 | 23 | """ 24 | Constants you might want to modify 25 | ----------------------------------------------------------------------------- 26 | """ 27 | 28 | BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" 29 | "|script|noscript|form|fieldset|iframe|math" 30 | "|hr|hr/|style|li|dt|dd|thead|tbody" 31 | "|tr|th|td|section|footer|header|group|figure" 32 | "|figcaption|aside|article|canvas|output" 33 | "|progress|video|nav)$", re.IGNORECASE) 34 | # Placeholders 35 | STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder 36 | ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder 37 | INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" 38 | INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX 39 | INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)') 40 | AMP_SUBSTITUTE = STX+"amp"+ETX 41 | HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX 42 | HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)') 43 | TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX 44 | 45 | 46 | """ 47 | Constants you probably do not need to change 48 | ----------------------------------------------------------------------------- 49 | """ 50 | 51 | RTL_BIDI_RANGES = ( ('\u0590', '\u07FF'), 52 | # Hebrew (0590-05FF), Arabic (0600-06FF), 53 | # Syriac (0700-074F), Arabic supplement (0750-077F), 54 | # Thaana (0780-07BF), Nko (07C0-07FF). 55 | ('\u2D30', '\u2D7F'), # Tifinagh 56 | ) 57 | 58 | # Extensions should use "markdown.util.etree" instead of "etree" (or do `from 59 | # markdown.util import etree`). Do not import it by yourself. 60 | 61 | try: #pragma: no cover 62 | # Is the C implementation of ElementTree available? 63 | import xml.etree.cElementTree as etree 64 | from xml.etree.ElementTree import Comment 65 | # Serializers (including ours) test with non-c Comment 66 | etree.test_comment = Comment 67 | if etree.VERSION < "1.0.5": 68 | raise RuntimeError("cElementTree version 1.0.5 or higher is required.") 69 | except (ImportError, RuntimeError): #pragma: no cover 70 | # Use the Python implementation of ElementTree? 71 | import xml.etree.ElementTree as etree 72 | if etree.VERSION < "1.1": 73 | raise RuntimeError("ElementTree version 1.1 or higher is required") 74 | 75 | 76 | """ 77 | AUXILIARY GLOBAL FUNCTIONS 78 | ============================================================================= 79 | """ 80 | 81 | 82 | def isBlockLevel(tag): 83 | """Check if the tag is a block level HTML tag.""" 84 | if isinstance(tag, string_type): 85 | return BLOCK_LEVEL_ELEMENTS.match(tag) 86 | # Some ElementTree tags are not strings, so return False. 87 | return False 88 | 89 | def parseBoolValue(value, fail_on_errors=True, preserve_none=False): 90 | """Parses a string representing bool value. If parsing was successful, 91 | returns True or False. If preserve_none=True, returns True, False, 92 | or None. If parsing was not successful, raises ValueError, or, if 93 | fail_on_errors=False, returns None.""" 94 | if not isinstance(value, string_type): 95 | if preserve_none and value is None: 96 | return value 97 | return bool(value) 98 | elif preserve_none and value.lower() == 'none': 99 | return None 100 | elif value.lower() in ('true', 'yes', 'y', 'on', '1'): 101 | return True 102 | elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'): 103 | return False 104 | elif fail_on_errors: 105 | raise ValueError('Cannot parse bool value: %r' % value) 106 | 107 | """ 108 | MISC AUXILIARY CLASSES 109 | ============================================================================= 110 | """ 111 | 112 | class AtomicString(text_type): 113 | """A string which should not be further processed.""" 114 | pass 115 | 116 | 117 | class Processor(object): 118 | def __init__(self, markdown_instance=None): 119 | if markdown_instance: 120 | self.markdown = markdown_instance 121 | 122 | 123 | class HtmlStash(object): 124 | """ 125 | This class is used for stashing HTML objects that we extract 126 | in the beginning and replace with place-holders. 127 | """ 128 | 129 | def __init__(self): 130 | """ Create a HtmlStash. """ 131 | self.html_counter = 0 # for counting inline html segments 132 | self.rawHtmlBlocks = [] 133 | self.tag_counter = 0 134 | self.tag_data = [] # list of dictionaries in the order tags appear 135 | 136 | def store(self, html, safe=False): 137 | """ 138 | Saves an HTML segment for later reinsertion. Returns a 139 | placeholder string that needs to be inserted into the 140 | document. 141 | 142 | Keyword arguments: 143 | 144 | * html: an html segment 145 | * safe: label an html segment as safe for safemode 146 | 147 | Returns : a placeholder string 148 | 149 | """ 150 | self.rawHtmlBlocks.append((html, safe)) 151 | placeholder = self.get_placeholder(self.html_counter) 152 | self.html_counter += 1 153 | return placeholder 154 | 155 | def reset(self): 156 | self.html_counter = 0 157 | self.rawHtmlBlocks = [] 158 | 159 | def get_placeholder(self, key): 160 | return HTML_PLACEHOLDER % key 161 | 162 | def store_tag(self, tag, attrs, left_index, right_index): 163 | """Store tag data and return a placeholder.""" 164 | self.tag_data.append({'tag': tag, 'attrs': attrs, 165 | 'left_index': left_index, 166 | 'right_index': right_index}) 167 | placeholder = TAG_PLACEHOLDER % str(self.tag_counter) 168 | self.tag_counter += 1 # equal to the tag's index in self.tag_data 169 | return placeholder 170 | -------------------------------------------------------------------------------- /static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voscausa/appengine-gcs-blobstore-python/f3c41c8c2020d12ff674670caa3fb2b1978facaa/static/favicon.ico -------------------------------------------------------------------------------- /templates/blob_links.html: -------------------------------------------------------------------------------- 1 | {%- extends 'blob_upload.html' -%} 2 | {%- block links -%} 3 | {% if failed %} 4 | <h3>!! Upload FAILED: {{ failed }} !!</h3> 5 | {% else %} 6 | <p><b>Serving url</b> (download) :</p> 7 | <ul class="no-markers"> 8 | <li>Zip archive : <a href="{{ bzf_url }}">{{ bzf_name }}</a></li> 9 | <li>File upload : <a href="{{ bf_url }}">{{ bf_name }}</a></li> 10 | </ul> 11 | {% endif %} 12 | {%- endblock -%} -------------------------------------------------------------------------------- /templates/blob_upload.html: -------------------------------------------------------------------------------- 1 | <!DOCTYPE HTML> 2 | <html lang="en"> 3 | <head> 4 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> 5 | <style type="text/css">.no-markers { 6 | list-style: none; 7 | }</style> 8 | <title>Blob Upload 9 | 10 | 11 | 12 |
13 | Upload serving-url options : 14 |
    15 |
  • 17 |
  • 19 |
20 |
21 |

22 |
23 | {%- block links -%} 24 | {% if readme %} 25 | {{ readme|safe }} 26 | {% else %} 27 |

More info in this README

28 | {% endif %} 29 | {%- endblock -%} 30 | 31 | --------------------------------------------------------------------------------