├── .gitignore ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.rst ├── docverter.py ├── setup.py ├── test_docverter.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | sudo: false 3 | env: 4 | - TOXENV=py26 5 | - TOXENV=py27 6 | - TOXENV=py33 7 | - TOXENV=py34 8 | - TOXENV=pypy 9 | - TOXENV=flake8 10 | install: 11 | - travis_retry pip install tox 12 | script: 13 | - tox 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Marc Abramowitz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | pydocverter 2 | =========== 3 | 4 | Python client for Docverter_ service 5 | 6 | .. image:: https://pypip.in/version/pydocverter/badge.svg?style=flat 7 | :target: https://pypi.python.org/pypi/pydocverter/ 8 | :alt: Latest Version 9 | 10 | .. image:: https://travis-ci.org/msabramo/pydocverter.svg?branch=master 11 | :target: https://travis-ci.org/msabramo/pydocverter 12 | 13 | Docverter_ is a hosted service 14 | that can do convert documents from one format to another (using pandoc_). 15 | For example, it can be used to convert Markdown_ documents to reStructuredText_. 16 | This is very useful if you prefer to write your ``README`` in Markdown, 17 | but want to publish your package to PyPI, 18 | which only knows how to do nice rendering of descriptions 19 | written in reStructuredText. 20 | 21 | This module is a Python client to the Docverter service. 22 | 23 | It has a very similar API to that of pypandoc_, so that you can do stuff like: 24 | 25 | .. code-block:: python 26 | 27 | try: 28 | import pypandoc as converter 29 | except ImportError: 30 | import pydocverter as converter 31 | 32 | converter.convert('somefile.md', 'rst') 33 | 34 | 35 | Similar 36 | ================== 37 | 38 | https://github.com/lukedmor/ghmarkdown 39 | 40 | 41 | .. _Docverter: http://www.docverter.com/ 42 | .. _pandoc: http://johnmacfarlane.net/pandoc 43 | .. _Markdown: http://daringfireball.net/projects/markdown/ 44 | .. _reStructuredText: http://docutils.sourceforge.net/rst.html 45 | .. _pypandoc: https://github.com/bebraw/pypandoc 46 | -------------------------------------------------------------------------------- /docverter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import with_statement 3 | 4 | __author__ = 'Marc Abramowitz' 5 | __version__ = '0.0.0' 6 | __license__ = 'MIT' 7 | __all__ = ['convert', 'get_pandoc_formats'] 8 | 9 | from tempfile import NamedTemporaryFile 10 | import os 11 | 12 | import requests 13 | 14 | DOCVERTER_DEFAULT_URL = 'http://c.docverter.com/convert' 15 | 16 | 17 | def convert(source, to, format=None, extra_args=(), encoding='utf-8'): 18 | '''Converts given `source` from `format` `to` another. `source` may be 19 | either a file path or a string to be converted. It's possible to pass 20 | `extra_args` if needed. In case `format` is not provided, it will try to 21 | invert the format based on given `source`. 22 | 23 | Raises OSError if pandoc is not found! Make sure it has been installed and 24 | is available at path. 25 | 26 | ''' 27 | return _convert(_read_file, _process_file, source, to, 28 | format, extra_args, encoding=encoding) 29 | 30 | 31 | def _convert(reader, processor, source, to, 32 | format=None, extra_args=(), encoding=None): 33 | source, format = reader(source, format, encoding=encoding) 34 | 35 | formats = { 36 | 'dbk': 'docbook', 37 | 'md': 'markdown', 38 | 'rest': 'rst', 39 | 'tex': 'latex', 40 | } 41 | 42 | format = formats.get(format, format) 43 | to = formats.get(to, to) 44 | 45 | if not format: 46 | raise RuntimeError('Missing format!') 47 | 48 | from_formats, to_formats = get_pandoc_formats() 49 | 50 | if format not in from_formats: 51 | raise RuntimeError( 52 | 'Invalid input format! Expected one of these: ' + 53 | ', '.join(from_formats)) 54 | 55 | if to not in to_formats: 56 | raise RuntimeError( 57 | 'Invalid to format! Expected one of these: ' + 58 | ', '.join(to_formats)) 59 | 60 | return processor(source, to, format, extra_args) 61 | 62 | 63 | def _read_file(source, format, encoding='utf-8'): 64 | try: 65 | path = os.path.exists(source) 66 | except UnicodeEncodeError: 67 | path = os.path.exists(source.encode('utf-8')) 68 | if path: 69 | import codecs 70 | with codecs.open(source, encoding=encoding) as f: 71 | format = format or os.path.splitext(source)[1].strip('.') 72 | source = f.read() 73 | 74 | return source, format 75 | 76 | 77 | def _process_file(source_text, to_format, from_format, extra_args): 78 | # @todo: allow passing custom url 79 | url = DOCVERTER_DEFAULT_URL 80 | 81 | with NamedTemporaryFile('w+t') as temp_file: 82 | temp_file.write(source_text) 83 | temp_file.seek(0) 84 | 85 | req = requests.Request( 86 | 'POST', url, 87 | data={'from': from_format, 'to': to_format}, 88 | files={'input_files[]': temp_file}, 89 | ) 90 | prepared = req.prepare() 91 | session = requests.Session() 92 | resp = session.send(prepared) 93 | # import pdb; pdb.set_trace() 94 | if resp.ok: 95 | return resp.text 96 | else: 97 | if resp.status_code == 500: 98 | req = prepared 99 | print('**** Got a 500 error from server *****') 100 | print('{0}\n{1}\n{2}\n\n{3}'.format( 101 | '-----------START-----------', 102 | req.method + ' ' + req.url, 103 | '\n'.join('{0}: {1}'.format(k, v) 104 | for k, v in req.headers.items()), 105 | req.body, 106 | )) 107 | print('temp_file = %r' % temp_file) 108 | print('temp_file.name = %r' % temp_file.name) 109 | raise RuntimeError( 110 | 'Call to docverter failed - resp = %r; resp.content = %r' 111 | % (resp, resp.content)) 112 | 113 | 114 | def get_pandoc_formats(): 115 | ''' 116 | Dynamic preprocessor for Pandoc formats. 117 | Return 2 lists. "from_formats" and "to_formats". 118 | ''' 119 | from_formats = ['markdown', 'texttile', 'rst', 'html', 'docbook', 'latex'] 120 | to_formats = ['markdown', 'rst', 'html', 'latex', 'context', 'mediawiki', 121 | 'textile', 'org', 'texinfo', 'docbook', 'docx', 'epub', 122 | 'mobi', 'asciidoc', 'rtf'] 123 | 124 | return from_formats, to_formats 125 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import os 4 | from setuptools import setup 5 | 6 | long_description = open( 7 | os.path.join( 8 | os.path.dirname(__file__), 9 | 'README.rst' 10 | ) 11 | ).read() 12 | 13 | setup( 14 | name='pydocverter', 15 | version='0.0.0', 16 | url='https://github.com/msabramo/pydocverter', 17 | license='MIT', 18 | description='Client for Docverter document conversion service (pandoc as a service)', 19 | long_description=long_description, 20 | author='Marc Abramowitz', 21 | author_email='marc@marc-abramowitz.com', 22 | py_modules=['docverter'], 23 | install_requires=['requests'], 24 | classifiers=[ 25 | 'Development Status :: 4 - Beta', 26 | 'Environment :: Console', 27 | 'Intended Audience :: Developers', 28 | 'License :: OSI Approved :: MIT License', 29 | 'Operating System :: POSIX', 30 | 'Programming Language :: Python', 31 | 'Topic :: Text Processing', 32 | 'Topic :: Text Processing :: Filters', 33 | ], 34 | ) 35 | -------------------------------------------------------------------------------- /test_docverter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import tempfile 5 | import unittest 6 | 7 | import docverter 8 | 9 | 10 | def _test_converter(to, format=None, extra_args=()): 11 | 12 | def reader(*args, **kwargs): 13 | return source, format 14 | 15 | def processor(*args): 16 | return 'ok' 17 | 18 | source = 'foo' 19 | 20 | return docverter._convert( 21 | reader, processor, source, to, format, extra_args) 22 | 23 | 24 | class TestDocverter(unittest.TestCase): 25 | def test_converts_valid_format(self): 26 | self.assertEqual(_test_converter(format='md', to='rest'), 'ok') 27 | 28 | def test_does_not_convert_to_invalid_format(self): 29 | try: 30 | _test_converter(format='md', to='invalid') 31 | except RuntimeError: 32 | pass 33 | 34 | def test_does_not_convert_from_invalid_format(self): 35 | try: 36 | _test_converter(format='invalid', to='rest') 37 | except RuntimeError: 38 | pass 39 | 40 | def test_basic_conversion_from_file(self): 41 | # This will not work on windows: 42 | # http://docs.python.org/2/library/tempfile.html 43 | with tempfile.NamedTemporaryFile('w+t', suffix='.md', 44 | delete=False) as test_file: 45 | file_name = test_file.name 46 | print('test_file = %r' % test_file) 47 | print('file_name = %r' % file_name) 48 | test_file.write('#some title\n') 49 | test_file.flush() 50 | expected = 'some title{0}=========={0}{0}'.format(os.linesep) 51 | received = docverter.convert(file_name, 'rst') 52 | self.assertEqualExceptForNewlineEnd(expected, received) 53 | 54 | def test_basic_conversion_from_file_with_format(self): 55 | # This will not work on windows: 56 | # http://docs.python.org/2/library/tempfile.html 57 | with tempfile.NamedTemporaryFile('w+t', suffix='.rst', 58 | delete=False) as test_file: 59 | file_name = test_file.name 60 | print('test_file = %r' % test_file) 61 | print('file_name = %r' % file_name) 62 | test_file.write('#some title\n') 63 | test_file.flush() 64 | expected = 'some title{0}=========={0}{0}'.format(os.linesep) 65 | received = docverter.convert(file_name, 'rst', format='md') 66 | self.assertEqualExceptForNewlineEnd(expected, received) 67 | 68 | def test_basic_conversion_from_string(self): 69 | expected = 'some title{0}=========={0}{0}'.format(os.linesep) 70 | received = docverter.convert('#some title', 'rst', format='md') 71 | self.assertEqualExceptForNewlineEnd(expected, received) 72 | 73 | def assertEqualExceptForNewlineEnd(self, expected, received): 74 | self.assertEqual(expected.rstrip('\n'), received.rstrip('\n')) 75 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py26, py27, py32, py33, py34, pypy, pypy3, flake8 3 | 4 | [testenv] 5 | deps = 6 | pytest 7 | pytest-cov 8 | commands = 9 | py.test {posargs:-v --tb=short} 10 | 11 | [testenv:flake8] 12 | basepython = python2.6 13 | deps = 14 | flake8 15 | commands = 16 | flake8 docverter.py test_docverter.py 17 | --------------------------------------------------------------------------------