├── .gitignore
├── CREDITS
├── LICENCE
├── README.md
├── core
    ├── 3rd-party
    │   ├── README
    │   └── sscdocapi.html
    ├── __init__.py
    ├── document.py
    ├── document_type.py
    ├── docvert.py
    ├── docvert_exception.py
    ├── docvert_html.py
    ├── docvert_libreoffice.py
    ├── docvert_pipeline.py
    ├── docvert_storage.py
    ├── docvert_url.py
    ├── docvert_xml.py
    ├── linux
    │   └── debian
    │   │   ├── README
    │   │   └── docvert-webserver.sh
    ├── opendocument.py
    ├── pipeline_type
    │   ├── __init__.py
    │   ├── compare.py
    │   ├── convertimages.py
    │   ├── debug.py
    │   ├── docbooktoxhtml.py
    │   ├── generate.py
    │   ├── generatepostconversioneditorfiles.py
    │   ├── getpreface.py
    │   ├── loop.py
    │   ├── normalizeopendocument.py
    │   ├── pipeline_item.py
    │   ├── serialize.py
    │   ├── serializeopendocument.py
    │   ├── splitpages.py
    │   ├── test.py
    │   ├── transform.py
    │   ├── transformopendocumenttodocbook.py
    │   └── writemetadata.py
    ├── transform
    │   ├── docbook-to-html.xsl
    │   ├── each-page.xsl
    │   ├── extract-metadata.xsl
    │   ├── normalize-docbook.xsl
    │   ├── normalize-opendocument.xsl
    │   ├── opendocument-to-docbook.xsl
    │   └── turn-document-into-test.xsl
    └── web_service_themes
    │   ├── README
    │   └── default
    │       ├── docvertedges2-small.gif
    │       ├── favicon.ico
    │       ├── index.js
    │       ├── index.tpl
    │       ├── jquery.dropp.js
    │       ├── loading.gif
    │       ├── preview.css
    │       ├── screen.css
    │       ├── tests.js
    │       ├── tests.tpl
    │       ├── upload_computer.png
    │       ├── web-service.js
    │       └── web-service.tpl
├── doc
    ├── how-to-write-themes.txt
    └── sample
    │   ├── sample-document.doc
    │   └── sample-document.odt
├── docvert-cli.py
├── docvert-web.py
├── lib
    ├── README
    ├── __init__.py
    ├── bottle
    │   ├── __init__.py
    │   ├── bottle.py
    │   └── bottle_session_file_problem.html
    ├── bottlesession
    │   ├── __init__.py
    │   └── bottlesession.py
    ├── fonts
    │   ├── COPYRIGHT
    │   ├── README
    │   └── reenie-beanie.woff
    ├── jquery
    │   └── jquery-1.5.min.js
    └── workerpool
    │   ├── LICENSE
    │   ├── QueueWrapper.py
    │   ├── README
    │   ├── __init__.py
    │   ├── exceptions.py
    │   ├── jobs.py
    │   ├── pools.py
    │   └── workers.py
├── logo.gif
└── pipelines
    ├── auto_pipelines
        ├── Break up over Heading 1.default
        │   └── pipeline.xml
        └── Nothing (one long page)
        │   └── pipeline.xml
    ├── html_to_opendocument
        └── default
        │   └── pipeline.xml
    ├── pipelines
        ├── basic
        │   ├── onepage.xsl
        │   └── pipeline.xml
        ├── docbook
        │   └── pipeline.xml
        ├── open document
        │   └── pipeline.xml
        ├── pretty-lists
        │   ├── pipeline.xml
        │   └── pretty-lists.xsl
        ├── sanitised open document
        │   └── pipeline.xml
        ├── ssc
        │   ├── pipeline.xml
        │   └── pretty-lists.xsl
        └── web standards
        │   ├── mytheme2.xsl
        │   └── pipeline.xml
    └── tests
        ├── bold-italics
            ├── bold-italics.odt
            ├── bold-italics.txt
            ├── bold-italics.xsl
            └── pipeline.xml
        ├── footnotes
            ├── footnotes.doc
            ├── footnotes.txt
            ├── footnotes.xsl
            └── pipeline.xml
        ├── headings-and-paragraphs
            ├── opendocument.rng
            ├── pipeline.xml
            ├── sample-document-docbook-headings-and-paragraphs.txt
            ├── sample-document-html-headings-and-paragraphs.txt
            ├── sample-document.doc
            └── sample-document.txt
        ├── images
            ├── emf-copyright.txt
            ├── emf-sample.doc
            ├── emf-sample.odt
            ├── pipeline.xml
            ├── wmf-copyright.txt
            ├── wmf-sample.doc
            └── wmf-sample.odt
        ├── invalid-odf
            ├── invalid-page.odt
            ├── invalid-page.xsl
            └── pipeline.xml
        ├── links
            ├── links.odt
            ├── links.txt
            ├── links.xsl
            └── pipeline.xml
        ├── lists
            ├── continuation.odt
            ├── continuation.xsl
            ├── docvert-test-five-new.odt
            ├── docvert-test-five-new.txt
            ├── docvert-test-five.odt
            ├── docvert-test-five.txt
            ├── pipeline.xml
            ├── sample-document-docbook-lists.txt
            ├── sample-document-html-lists.txt
            ├── sample-document.doc
            ├── sample-document.txt
            ├── sample-document.xsl
            ├── table-list.odt
            └── table-list.txt
        └── tables
            ├── pipeline.xml
            ├── table-list.odt
            ├── table-list.txt
            ├── table-list.xsl
            └── table-span.doc


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 
3 | 


--------------------------------------------------------------------------------
/CREDITS:
--------------------------------------------------------------------------------
1 | Matthew Holloway (ex-Matthew Cruickshank) <docvert@holloway.co.nz> <http://holloway.co.nz/>
2 | Mark Rickerby <coretxt@gmail.com> <http://maetl.coretxt.net.nz/>
3 | Cyrille Bonnet <cbonnet99@yahoo.fr>
4 | Francois Marier <fmarier@gmail.com>
5 | Sol Quimpo <sol@catalyst.net.nz>
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 | <img src="logo.gif" alt="Docvert">
 3 | </p>
 4 | 
 5 | 
 6 | Converts Word Processor office files (e.g. .DOC files) to OpenDocument, DocBook, and structured HTML.
 7 | 
 8 | This is Docvert for Python 3. To find Docvert for Python 2.x see http://github.com/holloway/docvert/
 9 | 
10 | Web Service
11 | -----------
12 | 
13 |     python3 ./docvert-web.py [-p PORT] [-H host]
14 | 
15 | Command Line
16 | ------------
17 | 
18 |     python3 ./docvert-cli.py
19 | 
20 |     usage: docvert-cli.py [-h] [--version] --pipeline PIPELINE
21 |         [--response {auto,path,stdout}]
22 |         [--autopipeline {Break up over Heading 1.default,Nothing one long page}]
23 |         [--url URL]
24 |         [--list-pipelines]
25 |         [--pipelinetype {tests,auto_pipelines,pipelines}]
26 |         infile [infile ...]
27 | 
28 | Community
29 | ---------
30 | 
31 | http://lists.catalyst.net.nz/mailman/listinfo/docvert
32 | 
33 | Requirements
34 | ------------
35 | 
36 |     Python 3
37 |     libreoffice
38 |     python3-uno
39 |     python3-lxml
40 |     python3-imaging
41 |     pdf2svg
42 |     librsvg2-2
43 | 
44 | Quickstart Guide
45 | ----------------
46 | 
47 |     sudo apt-get install libreoffice python3-uno python3-lxml python3-imaging pdf2svg librsvg2-2 librsvg2-bin
48 | 
49 |     /usr/bin/soffice --headless --norestore --nologo --norestore --nofirststartwizard --accept="socket,port=2002;urp;"
50 | 
51 | then in another terminal
52 | 
53 |     cd ~
54 | 
55 |     git clone git://github.com/holloway/docvert-python3.git
56 | 
57 |     cd docvert-python3
58 | 
59 |     python3 ./docvert-web.py
60 | 
61 | and browse to http://localhost:8080
62 | 
63 | 
64 | LICENCE
65 | -------
66 | Released under the GPL3 see LICENCE
67 | 


--------------------------------------------------------------------------------
/core/3rd-party/README:
--------------------------------------------------------------------------------
1 | Mimics 3rd-party bindings to the web service api for manual testing purposes.
2 | 


--------------------------------------------------------------------------------
/core/3rd-party/sscdocapi.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <body>
 4 | 
 5 | <form method="post" action="/web-service.php" enctype="multipart/form-data">
 6 | 
 7 |     <input type="file" name="random file"><br>
 8 | 
 9 |     <select name="pipeline">
10 |         <option>autopipeline:ssc</option>
11 |     </select><br>
12 | 
13 |     <select name="autopipeline">
14 |         <option>Nothing (one long page)</option>
15 |     </select><br>
16 | 
17 |     <select name="afterconversion">
18 |         <option>downloadZip</option>
19 |         <option>preview</option>
20 |     </select><br>
21 | 
22 |     <select name="converter">
23 |         <option>pyodconverter</option>
24 |     </select>
25 | 
26 |     <hr>
27 | 
28 |     <input type="submit" value="Submit"/>
29 | 
30 | </form>
31 | 
32 | </body>
33 | </html>
34 | 


--------------------------------------------------------------------------------
/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/core/__init__.py


--------------------------------------------------------------------------------
/core/document.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import libreoffice
 4 | 
 5 | class document(object):
 6 |     def __init__(self, data):
 7 |         self.data = data
 8 | 
 9 |     def get_opendocument(self):
10 |         raise NotImplemented()
11 | 
12 | class opendocument(document):
13 |     def get_open_document(self):
14 |         return self.data
15 | 
16 |     def set_dimensions(self, width, height):
17 |         raise NotImplemented()
18 | 
19 | class binary_office_file(document)
20 |     def get_opendocument(self):
21 |         client = libreoffice.libreoffice_client()
22 |         return client.convert_by_stream(self.data)
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/core/document_type.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import zipfile
 3 | import io
 4 | 
 5 | class types(object):
 6 |     oasis_open_document = "oasis_open_document (any version)"
 7 |     pdf = "portable document format (any version)"
 8 |     xml = "xml"
 9 |     html = "html"
10 |     exception = "exception"
11 |     unknown_type = "unknown file type"
12 | 
13 | def detect_document_type(data):
14 |     if isinstance(data, Exception):
15 |         return types.exception
16 |     if isinstance(data, str):
17 |         data = io.BytesIO(data)
18 | 
19 |     # 1. Sniff for OpenDocument
20 |     try:
21 |         magic_bytes_open_document = 'PK'
22 |         data.seek(0)
23 |         first_bytes = data.read(len(magic_bytes_open_document)).decode('latin-1')
24 |         if first_bytes == magic_bytes_open_document: # Ok it's a ZIP but...
25 |             archive = zipfile.ZipFile(data)
26 |             if 'mimetype' in archive.namelist() and archive.read('mimetype').decode('utf-8') == 'application/vnd.oasis.opendocument.text': # ...if it doesn't have these files it's not an OpenDocument
27 |                 return types.oasis_open_document
28 |     except UnicodeDecodeError as e:
29 |         pass
30 |     except Exception as e:
31 |         print(e)
32 |     # 2. Sniff for PDF
33 |     try:
34 |         magic_bytes_pdf = '%PDF'
35 |         data.seek(0)
36 |         first_bytes = data.read(len(magic_bytes_pdf)).decode('latin-1')
37 |         if first_bytes == magic_bytes_pdf:
38 |             return types.pdf
39 |     except UnicodeDecodeError as e:
40 |         pass
41 |     except Exception as e:
42 |         print(e)
43 |     # 3. Sniff for HTML and XML
44 |     data.seek(0)
45 |     try:
46 |         first_bytes = data.read(200).decode('latin-1') #200 bytes in, because sometimes there's a really long doctype
47 |         #print first_bytes
48 |         data.seek(0)
49 |         if first_bytes.count("<html") > 0:
50 |             return types.html
51 |         if first_bytes.count("<?xml") > 0:
52 |             return types.xml
53 |     except UnicodeDecodeError as e:
54 |         pass
55 |     except Exception as e:
56 |         print(e)
57 |    
58 |     return types.unknown_type
59 | 


--------------------------------------------------------------------------------
/core/docvert.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import tempfile
  3 | import io
  4 | import os.path
  5 | from . import document_type
  6 | from . import docvert_exception
  7 | from . import docvert_pipeline
  8 | from . import docvert_storage
  9 | from . import docvert_libreoffice
 10 | from . import docvert_xml
 11 | from . import opendocument
 12 | import urllib.request, urllib.error, urllib.parse
 13 | 
 14 | docvert_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 15 | version = '6'
 16 | http_timeout = 10
 17 | 
 18 | class converter_type(object):
 19 |     python_streaming_to_libreoffice = "python streaming to libreoffice"
 20 | 
 21 | def process_conversion(files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice, suppress_errors=False):
 22 |     if files is None and urls is None:
 23 |         raise docvert_exception.needs_files_or_urls()
 24 |     if pipeline_id is None:
 25 |         raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id)
 26 |     storage = docvert_storage.get_storage(storage_type_name)
 27 | 
 28 |     def _title(name, files, data):
 29 |         filename = os.path.basename(name).replace('\\','-').replace('/','-').replace(':','-')
 30 |         if len(filename) == 0:
 31 |             filename = "document.odt"
 32 |         if filename in files:
 33 |             if data and hasattr(files[filename], 'read') and files[filename].getvalue() == data:
 34 |                 return filename
 35 |             unique = 1
 36 |             potential_filename = filename
 37 |             while potential_filename in files:
 38 |                 unique += 1
 39 |                 if filename.count("."):
 40 |                     potential_filename = filename.replace(".", "%i." % unique, 1)
 41 |                 else:
 42 |                     potential_filename = filename + str(unique)
 43 |             filename = potential_filename
 44 |         return filename
 45 | 
 46 |     for filename, data in files.items():
 47 |         storage.set_friendly_name(filename, filename)
 48 | 
 49 |     for url in urls:
 50 |         try:
 51 |             data = urllib.request.urlopen(url, None, http_timeout).read()
 52 |             doc_type = document_type.detect_document_type(data)
 53 |             if doc_type == document_type.types.html:
 54 |                 data = html_to_opendocument(data, url)
 55 |             filename = _title(url, files, data)
 56 |             storage.set_friendly_name(filename, "%s (%s)" % (filename, url))
 57 |             files[filename] = io.StringIO(data)
 58 |         except IOError as e:
 59 |             filename = _title(url, files, None)
 60 |             storage.set_friendly_name(filename, "%s (%s)" % (filename, url))
 61 |             files[filename] = Exception("Download error from %s: %s" % (url, e))
 62 | 
 63 |     for filename, data in files.items():
 64 |         if storage.default_document is None:
 65 |             storage.default_document = filename
 66 |         doc_type = document_type.detect_document_type(data)
 67 |         if doc_type == document_type.types.exception:
 68 |             storage.add("%s/index.txt" % filename, str(data))
 69 |         elif doc_type != document_type.types.oasis_open_document:
 70 |             try:
 71 |                 data = generate_open_document(data, converter)
 72 |                 doc_type = document_type.types.oasis_open_document
 73 |             except Exception as e:
 74 |                 if not suppress_errors:
 75 |                     raise e
 76 |                 storage.add("%s/index.txt" % filename, str(e))
 77 |         if doc_type == document_type.types.oasis_open_document:
 78 |             if pipeline_id == "open document": #reserved term, for when people want the Open Document file back directly. Don't bother loading pipeline.
 79 |                 storage.add("%s/index.odt" % filename, data)
 80 |                 thumbnail = opendocument.extract_thumbnail(data)
 81 |                 if thumbnail:
 82 |                     storage.add("%s/thumbnail.png" % filename, thumbnail)
 83 |             else:
 84 |                 document_xml = opendocument.extract_useful_open_document_files(data, storage, filename)
 85 |                 storage.add("%s/opendocument.xml" % filename, document_xml)
 86 |                 process_pipeline(document_xml, pipeline_id, pipeline_type, auto_pipeline_id, storage, filename)
 87 |                 storage.remove("%s/opendocument.xml" % filename)
 88 |     return storage
 89 | 
 90 | def process_pipeline(initial_pipeline_value, pipeline_id, pipeline_type, auto_pipeline_id, storage, storage_prefix=None):
 91 |     pipeline_definition = docvert_pipeline.get_pipeline_definition(pipeline_type, pipeline_id, auto_pipeline_id)
 92 |     pipeline = docvert_pipeline.pipeline_processor(storage, pipeline_definition['stages'], pipeline_definition['pipeline_directory'], storage_prefix)
 93 |     return pipeline.start(initial_pipeline_value)
 94 | 
 95 | def generate_open_document(data, converter=converter_type.python_streaming_to_libreoffice):
 96 |     if converter == converter_type.python_streaming_to_libreoffice:
 97 |         return docvert_libreoffice.get_client().convert_by_stream(data, docvert_libreoffice.LIBREOFFICE_OPEN_DOCUMENT)
 98 |     raise docvert_exception.unrecognised_converter("Unknown converter '%s'" % converter)
 99 | 
100 | def html_to_opendocument(html, url):
101 |     from BeautifulSoup import BeautifulSoup
102 |     import html.entities
103 |     import re
104 | 
105 |     def to_ncr(match):
106 |         text = match.group(0)
107 |         entity_string = text[1:-1]
108 |         entity = html.entities.entitydefs.get(entity_string)
109 |         if entity:
110 |             if len(entity) > 1:
111 |                 return entity
112 |             try:
113 |                 return "&#%s;" % ord(entity)
114 |             except ValueError:
115 |                 pass
116 |             except TypeError as e:
117 |                 print("TypeError on '%s'?" % entity)
118 |                 raise
119 |         return text
120 | 
121 |     soup = BeautifulSoup(html, convertEntities=BeautifulSoup.XML_ENTITIES)
122 |     to_extract = soup.findAll('script')
123 |     for item in to_extract:
124 |         item.extract()
125 |     pretty_xml = soup.html.prettify()
126 |     pretty_xml = re.sub("&?\w+;", to_ncr, pretty_xml)
127 |     pretty_xml = re.sub('&(\w+);', '&amp;\\1', pretty_xml)
128 |     pretty_xml = pretty_xml.replace("& ", "&amp; ")
129 |     #display_lines(pretty_xml, 5, 15)
130 |     xml = docvert_xml.get_document(pretty_xml)
131 |     storage = docvert_storage.get_storage(docvert_storage.storage_type.memory_based)
132 |     result = process_pipeline(xml, 'default', 'html_to_opendocument', None, storage)
133 |     #print result
134 |     #print storage
135 |     return result
136 | 
137 | def display_lines(data, start_line, end_line):
138 |     data = data.split("\n")
139 |     segment = data[start_line:end_line]
140 |     for line in segment:
141 |         print("%s%s" % (start_line, line))
142 |         start_line += 1
143 |     
144 | 
145 | def get_all_pipelines(include_default_autopipeline = True):
146 |     def _title(name):
147 |         if name.endswith('.default'):
148 |             name = name[0:-len('.default')]
149 |         return name.replace('_',' ').replace('-',' ').title()
150 | 
151 |     pipeline_types_path = os.path.join(docvert_root, "pipelines")
152 |     pipeline_types = dict()
153 |     for pipeline_type in os.listdir(pipeline_types_path):
154 |         pipeline_types[pipeline_type] = list()
155 |         for pipeline_directory in os.listdir(os.path.join(pipeline_types_path, pipeline_type)):
156 |             if pipeline_directory == 'ssc': #don't show this pipeline publicly. it's not important.
157 |                 pass
158 |             elif include_default_autopipeline is False and pipeline_type == "auto_pipelines" and "nothing" in pipeline_directory.lower():
159 |                 pass #print "Skipping?"
160 |             else:
161 |                 pipeline_types[pipeline_type].append(dict(id=pipeline_directory, name=_title(pipeline_directory)))
162 |     return pipeline_types
163 |     
164 | 
165 | 


--------------------------------------------------------------------------------
/core/docvert_exception.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | class docvert_exception(Exception):
 4 |     pass
 5 | 
 6 | class needs_files_or_urls(docvert_exception):
 7 |     pass
 8 | 
 9 | class unrecognised_pipeline(docvert_exception):
10 |     pass
11 | 
12 | class unrecognised_auto_pipeline(docvert_exception):
13 |     pass
14 | 
15 | class unrecognised_converter(docvert_exception):
16 |     pass
17 | 
18 | class converter_unable_to_generate_open_document(docvert_exception):
19 |     pass
20 | 
21 | class converter_unable_to_generate_pdf(docvert_exception):
22 |     pass
23 | 
24 | class unknown_docvert_process(docvert_exception):
25 |     pass
26 | 
27 | class unable_to_serialize_opendocument(docvert_exception):
28 |     pass
29 | 
30 | class unrecognised_pipeline_item(docvert_exception):
31 |     pass
32 | 
33 | class unrecognised_storage_type(docvert_exception):
34 |     pass
35 | 
36 | class unknown_pipeline_node(docvert_exception):
37 |     pass
38 | 
39 | class unknown_docvert_process(docvert_exception):
40 |     pass
41 | 
42 | class tests_disabled(docvert_exception):
43 |     pass
44 | 
45 | class unable_to_generate_xml_document(docvert_exception):
46 |     pass
47 | 
48 | class invalid_test_root_node(docvert_exception):
49 |     pass
50 | 
51 | class invalid_test_child_node(docvert_exception):
52 |     pass
53 | 
54 | class debug_exception(docvert_exception):
55 |     def __init__(self, message, data, content_type):
56 |         self.data = data
57 |         self.content_type = content_type
58 |         super(docvert_exception, self).__init__(message)
59 | 
60 | class debug_xml_exception(debug_exception):
61 |     pass
62 | 


--------------------------------------------------------------------------------
/core/docvert_html.py:
--------------------------------------------------------------------------------
 1 | # Based on code from
 2 | # http://stackoverflow.com/questions/257409/download-image-file-from-the-html-page-source-using-python
 3 | # but multithreaded, etc.
 4 | from BeautifulSoup import BeautifulSoup as bs
 5 | import urllib.parse
 6 | from urllib.request import urlopen
 7 | from urllib.request import urlretrieve
 8 | import os
 9 | import sys
10 | 
11 | def get_urls(url, storage, storage_prefix):
12 |     """Downloads all the images at 'url' to /test/"""
13 |     soup = bs(urlopen(url))
14 |     parsed = list(urllib.parse.urlparse(url))
15 | 
16 |     for image in soup.findAll("img"):
17 |         print("Image: %(src)s" % image)
18 |         filename = image["src"].split("/")[-1]
19 |         parsed[2] = image["src"]
20 |         storage_path = os.path.join(storage_prefix, filename)
21 |         
22 |         url = urllib.parse.urlunparse(parsed)
23 |         if url.lower().startswith("http"):
24 |             url = image["src"]
25 |         data = urllib.request.urlopen(url)
26 | 


--------------------------------------------------------------------------------
/core/docvert_libreoffice.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from os.path import abspath
  3 | from os.path import isfile
  4 | from os.path import splitext
  5 | import sys
  6 | import io
  7 | from . import document_type
  8 | from . import docvert_exception
  9 | import socket
 10 | 
 11 | DEFAULT_LIBREOFFICE_PORT = 2002
 12 | LIBREOFFICE_OPEN_DOCUMENT = 'writer8'
 13 | LIBREOFFICE_PDF = 'writer_pdf_Export'
 14 | 
 15 | client = None
 16 | 
 17 | try:
 18 |     import uno
 19 | except ImportError:
 20 |     sys.path.append('/opt/libreoffice/program/')
 21 |     sys.path.append('/usr/lib/libreoffice/program/')
 22 |     sys.path.append('/usr/share/libreoffice/program/')
 23 |     sys.path.append('/usr/lib/openoffice.org/program/')
 24 |     sys.path.append('/usr/lib/openoffice.org2.0/program/')
 25 |     try:
 26 |         import uno
 27 |     except ImportError:
 28 |         python_version_info = sys.version_info
 29 |         python_version = "%s.%s.%s" % (python_version_info[0], python_version_info[1], python_version_info[2])
 30 |         sys.stderr.write("Error: Unable to find Python UNO libraries in %s.\nAre Python UNO libraries somewhere else?\nAlternatively, Docvert is currently running Python %s. Maybe the libraries are available under a different version of Python?)\nExiting...\n" % (sys.path, python_version))
 31 |         sys.exit(0)
 32 | 
 33 | import unohelper
 34 | from com.sun.star.beans import PropertyValue
 35 | from com.sun.star.task import ErrorCodeIOException
 36 | from com.sun.star.uno import Exception as UnoException
 37 | from com.sun.star.connection import NoConnectException
 38 | from com.sun.star.io import XOutputStream
 39 | 
 40 | class output_stream_wrapper(unohelper.Base, XOutputStream):
 41 |     def __init__(self):
 42 |         self.data = io.BytesIO()
 43 |         self.position = 0
 44 | 
 45 |     def writeBytes(self, bytes):
 46 |         self.data.write(bytes.value)
 47 |         self.position += len(bytes.value)
 48 | 
 49 |     def close(self):
 50 |         self.data.close()
 51 | 
 52 |     def flush(self):
 53 |         pass
 54 | 
 55 | 
 56 | class libreoffice_client(object):
 57 |     def __init__(self, port=DEFAULT_LIBREOFFICE_PORT):
 58 |         self._local_context = uno.getComponentContext()
 59 |         self._service_manager = self._local_context.ServiceManager
 60 |         resolver = self._service_manager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", self._local_context)
 61 |         try:
 62 |             context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
 63 |         except NoConnectException as exception:
 64 |             raise Exception("Failed to connect to LibreOffice on port %s. Python 3 UNO library said: \n\n\t%s\n\nIf you don't have a server then read README for 'OPTIONAL LIBRARIES' to see how to set one up." % (port, exception))
 65 |         self._desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)
 66 | 
 67 |     def convert_by_stream(self, data, format=LIBREOFFICE_OPEN_DOCUMENT):
 68 |         input_stream = self._service_manager.createInstanceWithContext("com.sun.star.io.SequenceInputStream", self._local_context)
 69 | 
 70 |         
 71 |         data.seek(0)
 72 |         print(data, format)
 73 |         print(data.read(100))
 74 |         data.seek(0)
 75 |         handle = open('/tmp/what', 'wb')
 76 |         handle.write(data.read())
 77 | 
 78 | 
 79 |         data.seek(0)
 80 |         input_stream.initialize((uno.ByteSequence(data.read()),)) 
 81 |         #try:
 82 |         document = self._desktop.loadComponentFromURL('private:stream', "_blank", 8, self._to_properties(InputStream=input_stream))
 83 | 
 84 |         try:
 85 |             document.refresh()
 86 |         except AttributeError:
 87 |             pass
 88 | 
 89 |         #except Exception as e:
 90 |         #    print("Lost connection to LibreOffice. Trying to reconnect...")
 91 |         #    self.__init__(); #try to reconnect
 92 |         #    document = self._desktop.loadComponentFromURL('private:stream', "_blank", 0, self._to_properties(InputStream=input_stream))
 93 |         if not document:
 94 |             raise Exception("Error making document")
 95 | 
 96 |         input_stream.closeInput()
 97 |         output_stream = output_stream_wrapper()
 98 |         #try:
 99 |         document.storeToURL('private:stream', self._to_properties(OutputStream=output_stream, FilterName=format, Hidden=True))
100 |         #except Exception as e: #ignore any error, verify the output before complaining
101 |         #print(e)
102 |         #    pass
103 |         #finally:
104 |         #    document.close(True)
105 |         #    pass
106 |         if format == LIBREOFFICE_OPEN_DOCUMENT or format == LIBREOFFICE_PDF:
107 |             doc_type = document_type.detect_document_type(output_stream.data)
108 |             output_stream.data.seek(0)
109 |             if format == LIBREOFFICE_OPEN_DOCUMENT and doc_type != document_type.types.oasis_open_document:
110 |                 raise docvert_exception.converter_unable_to_generate_open_document("Unable to generate OpenDocument, was detected as %s.\n\nAre you sure you tried to convert an office document? If so then it\nmight be a bug, so please contact docvert@holloway.co.nz and we'll see\nif we can fix it. Thanks!" % doc_type)
111 |             elif format == LIBREOFFICE_PDF and doc_type != document_type.types.pdf:
112 |                 raise docvert_exception.converter_unable_to_generate_pdf("Unable to generate PDF, was detected as %s. First 4 bytes = %s" % (doc_type, output_stream.data.read(4)))
113 |         return output_stream.data
114 | 
115 |     def _to_properties(self, **args):
116 |         props = []
117 |         for key in args:
118 |             prop = PropertyValue()
119 |             prop.Name = key
120 |             prop.Value = args[key]
121 |             props.append(prop)
122 |         return tuple(props)
123 | 
124 | def checkLibreOfficeStatus():
125 |     try:
126 |         libreoffice_client()
127 |         return True
128 |     except Exception:
129 |         return False
130 | 
131 | def get_client():
132 |     global client
133 |     if client is None:
134 |         client = libreoffice_client()
135 |     return client
136 | 
137 | 


--------------------------------------------------------------------------------
/core/docvert_pipeline.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | import lxml.etree
 4 | from . import docvert_exception
 5 | from . import docvert_xml
 6 | 
 7 | docvert_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 8 | 
 9 | def get_pipeline_definition(pipeline_type, pipeline_id, auto_pipeline_id):
10 |     pipeline = get_pipeline_xml(pipeline_type, pipeline_id, auto_pipeline_id)
11 |     pipeline['stages'] = process_stage_level( pipeline['xml'].getroot() )
12 |     return pipeline
13 | 
14 | def process_stage_level(nodes):
15 |     stages = list()
16 |     for child_node in nodes:
17 |         if child_node.tag != "stage":
18 |             continue
19 |         child = dict()
20 |         child['attributes'] = child_node.attrib
21 |         child['children'] = None
22 |         if(len(child_node) > 0):
23 |             child['children'] = process_stage_level(child_node)
24 |         stages.append(child)
25 |     return stages
26 | 
27 | def get_pipeline_xml(pipeline_type, pipeline_id, auto_pipeline_id):
28 |     path = os.path.join(docvert_root, "pipelines", pipeline_type, pipeline_id, "pipeline.xml")
29 |     if not os.path.exists(path):
30 |         raise docvert_exception.unrecognised_pipeline("Unknown pipeline_id '%s' (checked %s)" % (pipeline_id, path))
31 |     autopipeline_path = None
32 |     xml = lxml.etree.parse(path)
33 |     if xml.getroot().tag == "autopipeline":
34 |         if auto_pipeline_id is None:
35 |             raise docvert_exception.unrecognised_auto_pipeline("Unknown auto pipeline '%s'" % auto_pipeline_id)
36 |         autopipeline_path = os.path.join(docvert_root, "pipelines", "auto_pipelines", auto_pipeline_id, "pipeline.xml")
37 |         if not os.path.exists(path):
38 |             raise docvert_exception.unrecognised_auto_pipeline("Unknown auto pipeline '%s'" % auto_pipeline_id)
39 |         custom_stages = lxml.etree.tostring(xml.getroot()).decode('utf-8')
40 |         autopipeline = ""
41 |         try:        
42 |             autopipeline_handle = open(autopipeline_path)
43 |         except IOError as e:
44 |             autopipeline_path_with_default = os.path.join(docvert_root, "pipelines", "auto_pipelines", "%s.default" % auto_pipeline_id, "pipeline.xml")
45 |             autopipeline_handle = open(autopipeline_path_with_default)
46 |         autopipeline = autopipeline_handle.read().replace('{{custom-stages}}', custom_stages)
47 |         autopipeline = docvert_xml.strip_encoding_declaration(autopipeline)
48 |         xml = lxml.etree.fromstring(autopipeline)
49 |         xml = xml.getroottree()
50 |     return dict(xml=xml, pipeline_directory=os.path.dirname(path), path=path, autopath=autopipeline_path)
51 | 
52 | class pipeline_processor(object):
53 |     """ Processes through a list() of pipeline_item(s) """
54 |     def __init__(self, storage, pipeline_items, pipeline_directory, pipeline_storage_prefix=None, depth=None):
55 |         self.storage = storage
56 |         self.pipeline_items = pipeline_items
57 |         self.pipeline_directory = pipeline_directory
58 |         self.pipeline_storage_prefix = pipeline_storage_prefix
59 |         self.depth = list() if depth is None else depth
60 | 
61 |     def start(self, pipeline_value):
62 |         for item in self.pipeline_items:
63 |             process = item['attributes']['process']
64 |             namespace = 'core.pipeline_type'
65 |             full_pipeline_type = "%s.%s" % (namespace, process.lower())
66 |             #try:
67 |             stage_module = __import__(full_pipeline_type, {}, {}, [full_pipeline_type.rsplit(".", 1)[-1]])
68 |             stage_class = getattr(stage_module, process)
69 |             stage_instance = stage_class(self.storage, self.pipeline_directory, item['attributes'], self.pipeline_storage_prefix, item['children'], self.depth)
70 |             pipeline_value = stage_instance.stage(pipeline_value)
71 |             #except ImportError, exception:
72 |             #    raise exception
73 |             #    raise docvert_exception.unknown_docvert_process('Unknown pipeline process of "%s" (at %s)' % (process, "%s.%s" % (namespace, process.lower()) ))
74 |         return pipeline_value
75 | 
76 | 


--------------------------------------------------------------------------------
/core/docvert_storage.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import zipfile
  3 | import io
  4 | import time
  5 | from . import docvert_exception
  6 | import core.docvert_xml
  7 | import core.docvert_exception
  8 | 
  9 | 
 10 | class storage_type(object):
 11 |     file_based = "file based storage"
 12 |     memory_based = "memory based storage"
 13 | 
 14 | def get_storage(name):
 15 |     if name == storage_type.file_based:
 16 |         return storage_file_based()
 17 |     elif name == storage_type.memory_based:
 18 |         return storage_memory_based()
 19 |     raise docvert_exception.unrecognised_storage_type("Unknown storage type '%s'" % name)
 20 | 
 21 | class storage(object):
 22 |     _docvert_xml_namespace = '{docvert:5}'
 23 |     
 24 |     def __init__(self, *args, **kargs):
 25 |         raise NotImplemented()
 26 | 
 27 |     def __setitem__(self, key, value):
 28 |         self.add(key, value)
 29 | 
 30 |     def __contains__(self, key):
 31 |         raise NotImplemented()
 32 | 
 33 |     def keys(self):
 34 |         return list(self.storage.keys())
 35 | 
 36 |     def has_key(self, key):
 37 |         return key in self.storage
 38 | 
 39 |     def __getitem__(self, key):
 40 |         return self.get(key)
 41 | 
 42 |     def set_friendly_name(self, filename, friendly_name):
 43 |         raise NotImplemented()
 44 | 
 45 |     def add_tests(self, tests):
 46 |         if not hasattr(self, 'tests'):
 47 |             self.tests = []
 48 |         if type(tests) == type([]): #assume correctly formatted list
 49 |             return self.tests.extend(tests)
 50 |         document = core.docvert_xml.get_document(tests)
 51 |         if hasattr(document, 'getroottree'):
 52 |             document = document.getroottree()
 53 |         root = document.getroot()
 54 |         if root.tag != "%sgroup" % self._docvert_xml_namespace:
 55 |             raise docvert_exception.invalid_test_root_node("Error parsing test results. Expected a root node of 'group' but got '%s'" % root.tag)
 56 |         for child in root:
 57 |             if child.tag == "%spass" % self._docvert_xml_namespace:
 58 |                 self.tests.append( {"status":"pass", "message":str(child.text)} )
 59 |             elif child.tag == "%sfail" % self._docvert_xml_namespace:
 60 |                 self.tests.append(dict(status="fail", message=str(child.text)))
 61 |             else:
 62 |                 raise invalid_test_child_node("Error parsing test results. Unexpected child element of '%s' %s" % (child.tag, child))
 63 | 
 64 |     def get_tests(self):
 65 |         if hasattr(self, 'tests'):
 66 |             return self.tests
 67 |         return list()
 68 | 
 69 |     def get_zip_name(self):
 70 |         raise NotImplemented("No implemented, yet...")
 71 | 
 72 | class storage_file_based(storage):
 73 |     def __init__(self):
 74 |         self.working_directory = tempfile.mkdtemp()
 75 |         self.created_at = time.time()
 76 |         self.default_document = None
 77 | 
 78 |     def add(self, path, data):
 79 |         handler = open(os.path.join(self.working_directory, path), 'w')
 80 |         handler.write(data)
 81 |         handler.close()
 82 | 
 83 |     def set_friendly_name(self, filename, friendly_name):
 84 |         raise NotImplemented()
 85 | 
 86 |     def get(self, path):
 87 |         handler = open(os.path.join(self.working_directory, path), 'r')
 88 |         return handler.read()
 89 | 
 90 |     def _dispose(self):
 91 |         os.removedirs(self.working_directory)
 92 | 
 93 |     def get_zip_name(self):
 94 |         raise NotImplemented("No implemented, yet...")
 95 | 
 96 |     def to_zip(self):
 97 |         raise NotImplemented("Not implemented, yet...")
 98 | 
 99 |     def __str__(self):
100 |         return '<file based storage at path "%s">' % self.working_directory
101 | 
102 | 
103 | class storage_memory_based(storage):
104 |     def __init__(self):
105 |         self.storage = dict()
106 |         self.created_at = time.time()
107 |         self.default_document = None
108 |         self.friendly_names = dict()
109 | 
110 |     def add(self, path, data):
111 |         self.storage[path] = data
112 | 
113 |     def set_friendly_name(self, filename, friendly_name):
114 |         self.friendly_names[filename] = friendly_name 
115 | 
116 |     def get_friendly_name_if_available(self, filename):
117 |         if filename in self.friendly_names:
118 |             return self.friendly_names[filename]
119 |         return filename
120 | 
121 |     def keys(self):
122 |         return list(self.storage.keys())
123 | 
124 |     def get(self, path):
125 |         return self.storage[path]
126 | 
127 |     def remove(self, path):
128 |         del self.storage[path]
129 | 
130 |     def __delitem__(self, path):
131 |         del self.storage[path]
132 | 
133 |     def __contains__(self, key):
134 |         return key in self.storage
135 | 
136 |     def to_zip(self):
137 |         zipdata = io.BytesIO()
138 |         archive = zipfile.ZipFile(zipdata, 'w')
139 |         for key, value in self.storage.items():
140 |             data = value
141 |             if hasattr(value, "read"):
142 |                 data = value.seek(0)
143 |                 data = value.read()
144 |             if not key.startswith("__"): #if it's not internal data
145 |                 archive.writestr(key.replace("\\", "/"), data)
146 |         archive.close()
147 |         return zipdata
148 | 
149 |     def get_zip_name(self):
150 |         friendly_names = ", ".join(list(self.friendly_names.keys()))
151 |         if friendly_names != "":
152 |             friendly_names = "-%s" % friendly_names
153 |         zip_name = "%s%s" % (time.strftime("docvert-%Y%m%d%H%M"), friendly_names)
154 |         return zip_name.replace("\"","").replace("\n","").replace("\r","").replace("\\","")
155 | 
156 |     def _dispose(self):
157 |         pass
158 | 
159 |     def __str__(self):
160 |         return '<memory based storage with these keys "%s">' % list(self.storage.keys())
161 | 


--------------------------------------------------------------------------------
/core/docvert_url.py:
--------------------------------------------------------------------------------
 1 | #based on https://github.com/shazow/workerpool/wiki/Mass-Downloader
 2 | import os
 3 | import urllib.request, urllib.error, urllib.parse
 4 | import lib.workerpool
 5 | 
 6 | class DownloadUrl(lib.workerpool.Job):
 7 |     def __init__(self, url, http_timeout=10):
 8 |         self.url = url
 9 |         self.http_timeout = http_timeout
10 | 
11 |     def run(self):
12 |         try:
13 |             self.response = urllib.request.urlopen(self.url, None, self.http_timeout).read()
14 |         except urllib.error.URLError as e:
15 |             self.response = e
16 | 
17 | def download(urls, workerpool_size=5):
18 |     pool = lib.workerpool.WorkerPool(size=workerpool_size)
19 |     for url in urls:
20 |         pool.put(DownloadUrl(url))
21 |     pool.shutdown()
22 |     pool.wait()
23 |     print(dir(pool))
24 | 
25 | def demo():
26 |     download([
27 |         'https://github.com/shazow/workerpool/wiki/Mass-Downloader',
28 |         'http://yahoo.com',
29 |         'http://twitter.com/',
30 |         'http://www.google.com/',
31 |         'http://www.stuff.co.nz/',
32 |         'http://trademe.co.nz/',
33 |         'http://av.com/',
34 |         'http://reddit.com/',
35 |         'http://slashdot.org/'
36 |     ])
37 | 


--------------------------------------------------------------------------------
/core/docvert_xml.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from . import docvert_exception
 3 | import lxml.etree
 4 | import xml.sax.saxutils
 5 | import re
 6 | 
 7 | def transform(data, xslt, params=None):
 8 |     if params is None:
 9 |         params = dict()
10 |     xslt_document = get_document(xslt)
11 |     xslt_processor = lxml.etree.XSLT(xslt_document)
12 |     xml_document = get_document(data)
13 |     params = convert_dict_to_params(params)
14 |     return xslt_processor(xml_document, **params)
15 | 
16 | def relaxng(data, relaxng_path):
17 |     relaxng_document = get_document(relaxng_path)
18 |     xml_document = get_document(data)
19 |     relaxng_processor = lxml.etree.RelaxNG(relaxng_document)
20 |     is_valid = relaxng_processor.validate(xml_document)
21 |     return dict(valid=is_valid, log=relaxng_processor.error_log)
22 | 
23 | def escape_text(text):
24 |     return xml.sax.saxutils.escape(text)
25 | 
26 | def get_document(data):
27 |     if isinstance(data, lxml.etree._Element):
28 |         return data
29 |     elif isinstance(data, lxml.etree._XSLTResultTree):
30 |         return data
31 |     elif hasattr(data, 'read'):
32 |         data.seek(0)
33 |         return lxml.etree.XML(data.read())
34 |     elif data[0:1] == "/" or data[0:1] == "\\": #path
35 |         return lxml.etree.XML(strip_encoding_declaration(open(data).read()))
36 |     elif data[0:1] == "<": #xml
37 |         return lxml.etree.XML(data)
38 |     else: #last ditch attempt...
39 |         return lxml.etree.XML(data)
40 |     raise docvert_exception.unable_to_generate_xml_document()
41 | 
42 | def convert_dict_to_params(params):
43 |     for key in list(params.keys()):
44 |         params[key] = "'%s'" % params[key]
45 |     return params
46 | 
47 | def file_as_string(path):
48 |     pass
49 | 
50 | def strip_encoding_declaration(xml_string):
51 |     return re.sub('<\?.*?\?>','<?xml version="1.0"?>', xml_string)


--------------------------------------------------------------------------------
/core/linux/debian/README:
--------------------------------------------------------------------------------
1 | docvert-webserver.sh -- A start/stop daemon for Docvert
2 | 


--------------------------------------------------------------------------------
/core/linux/debian/docvert-webserver.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | ### BEGIN INIT INFO
  3 | # Provides:          docvert
  4 | # Default-Start:     2 3 4 5
  5 | # Default-Stop:      0 1 6
  6 | # Required-Start:    $local_fs $network_fs $network $syslog
  7 | # Required-Stop:     $local_fs $network_fs $network $syslog
  8 | # Short-Description: Docvert web server
  9 | # Description:       Start/stop Docvert in server mode
 10 | ### END INIT INFO
 11 | 
 12 | scriptDirectory=$( dirname "$0" )
 13 | scriptDirectory=$( cd $scriptDirectory; pwd )
 14 | 
 15 | docvertcommand="/opt/docvert/docvert-web.py"
 16 | kill="/bin/kill"
 17 | pidfile="/tmp/docvert-webserver.pid"
 18 | startStopDaemon="/sbin/start-stop-daemon"
 19 | rpmDaemon="/usr/bin/daemon"
 20 | whoami=$( whoami )
 21 | 
 22 | export HOME=/tmp/
 23 | 
 24 | if [ -e "$startStopDaemon" ]
 25 | then
 26 |         if [ -n "$username" ]
 27 |         then
 28 |                 username="-c $username "
 29 |         fi
 30 |         if [ -n "$groupname" ]
 31 |         then
 32 |                 groupname="-g $groupname "
 33 |         fi
 34 | elif [ -e "$rpmDaemon" ]
 35 | then
 36 |         if [ -n "$username" ]
 37 |         then
 38 |                 username="--user=$username"
 39 |                 if [ -n "$groupname" ]
 40 |                 then
 41 |                         username="$username.$groupname"
 42 |                 fi
 43 |         fi
 44 | fi
 45 | 
 46 | docvert_start()
 47 | {
 48 |         if [ -s "$pidfile" ]
 49 |         then
 50 |                 pid=$( cat "$pidfile" )
 51 |                 echo "Already running? pid file exists at $pidfile that contains process #$pid"
 52 |         else
 53 |                 rm -f "$pidfile"
 54 |                 if [ -e "$startStopDaemon" ]
 55 |                 then
 56 |                         $startStopDaemon -b -m --pidfile $pidfile --start --exec "$docvertcommand"
 57 |                 elif [ -e "$rpmDaemon" ]
 58 |                 then 
 59 |                         $rpmDaemon --pidfile=$pidfile $docvertcommand
 60 |                 else
 61 |                         echo "Warning: Unable to find $startStopDaemon or $rpmDaemon so instead I'll daemonize by forking as the current user, $whoami."
 62 |                         $docvertcommand &
 63 |                 fi
 64 |                 return 0
 65 |         fi
 66 | }
 67 | 
 68 | docvert_stop()
 69 | {
 70 |         if [ -s "$pidfile" ]
 71 |         then
 72 |                 pid=$( cat "$pidfile" )
 73 |                 $kill $pid
 74 |                 sleep 1
 75 |                 $kill -s 9 "$pid" > /dev/null 2>&1 &
 76 |                 remainingProcess=$( ps "$pid" | grep $pid )
 77 |                 if [ -n $remainingProcess ]
 78 |                 then
 79 |                         rm -f "$pidfile"
 80 |                         echo "Successfully killed process #$pid"
 81 |                         return 0
 82 |                 else
 83 |                         echo "Unable to kill process #$pid. Check permissions? (remaining processes '$remainingProcess')"
 84 |                         return 0
 85 |                 fi
 86 |         else
 87 |                 echo "Stopped. Warning: No pid file found at $pidfile so I'm assuming it was never running."
 88 |         fi
 89 | }
 90 | 
 91 | case "$1" in
 92 |         start)
 93 |                 docvert_start
 94 |         ;;
 95 |         stop)
 96 |                 docvert_stop
 97 |         ;;
 98 |         restart)
 99 |                 docvert_stop
100 |                 sleep 1
101 |                 docvert_start
102 |         ;;
103 |         *)
104 |                 echo "Usage: docvert-webserver.init.sh {start|stop|restart}"
105 |                 exit 1
106 |         ;;
107 | esac
108 | 
109 | 


--------------------------------------------------------------------------------
/core/opendocument.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import zipfile
  3 | import io
  4 | import lxml.etree
  5 | import os.path
  6 | 
  7 | def extract_useful_open_document_files(data, storage=None, prefix=None):
  8 |     archive = zipfile.ZipFile(data)
  9 |     archive_files = archive.namelist()
 10 |     xml_string = extract_xml(archive, archive_files)
 11 |     if storage is None: #we can't extract binaries
 12 |         return xml_string
 13 |     return extract_useful_binaries(archive, archive_files, storage, prefix, xml_string)
 14 | 
 15 | def extract_thumbnail(data):
 16 |     archive = zipfile.ZipFile(data)
 17 |     thumbnail_path = 'Thumbnails/thumbnail.png'
 18 |     archive_files = archive.namelist()
 19 |     if thumbnail_path in archive_files:
 20 |         return archive.open(thumbnail_path).read()
 21 |     return None
 22 | 
 23 | def extract_useful_binaries(archive, archive_files, storage, prefix, xml_string):
 24 |     xlink_namespace = "http://www.w3.org/1999/xlink"
 25 |     xpath_template = '//*[@{%s}href="%s"]' % (xlink_namespace, '%s')
 26 |     document = lxml.etree.fromstring(xml_string.getvalue())
 27 |     extensions = [".wmf", ".emf", ".svg", ".png", ".gif", ".bmp", ".jpg", ".jpe", ".jpeg"]
 28 |     index = 0
 29 |     for archive_path in archive_files:
 30 |         path_minus_extension, extension = os.path.splitext(archive_path)
 31 |         if extension in extensions:
 32 |             storage_path = "%s/file%i.%s" % (prefix, index, extension)
 33 |             try:
 34 |                 storage_path = "%s/%s" % (prefix, os.path.basename(archive_path))
 35 |             except UnicodeDecodeError as e:
 36 |                 pass
 37 |             #step 1. extract binaries
 38 |             storage[storage_path] = archive.open(archive_path).read() 
 39 |             #step 2. update XML references
 40 |             path_relative_to_xml = os.path.basename(archive_path)
 41 |             xpath = lxml.etree.ETXPath(xpath_template % archive_path)
 42 |             for match in xpath(document):
 43 |                 match.attrib['{%s}href' % xlink_namespace] = storage_path
 44 |             index += 1
 45 |     return io.StringIO(lxml.etree.tostring(document).decode('utf-8'))
 46 | 
 47 | def extract_xml(archive, archive_files):
 48 |     xml_files_to_extract = ["content.xml", "meta.xml", "settings.xml", "styles.xml"]
 49 |     xml_string = io.StringIO()
 50 |     xml_string.write('<docvert:root xmlns:docvert="docvert:5">')
 51 |     for xml_file_to_extract in xml_files_to_extract:
 52 |         if xml_file_to_extract in archive_files:
 53 |             xml_string.write('<docvert:external-file xmlns:docvert="docvert:5" docvert:name="%s">' % xml_file_to_extract)
 54 |             xml_document_string = archive.open(xml_file_to_extract).read().decode('utf-8').replace('<?xml version="1.0" encoding="UTF-8"?>', '<?xml version="1.0"?>')
 55 |             document = lxml.etree.fromstring(xml_document_string) #parsing as XML to remove any doctype
 56 |             xml_string.write(lxml.etree.tostring(document).decode('utf-8'))
 57 |             xml_string.write('</docvert:external-file>')
 58 |     xml_string.write('</docvert:root>')
 59 |     return xml_string
 60 |     
 61 | def generate_single_image_document(image_data, width, height):
 62 |     content_xml = """<?xml version="1.0" encoding="UTF-8"?>
 63 |         <office:document-content office:version="1.2" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:css3t="http://www.w3.org/TR/css3-text/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2" xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:officeooo="http://openoffice.org/2009/office" xmlns:ooo="http://openoffice.org/2004/office" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:rpt="http://openoffice.org/2005/report" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:tableooo="http://openoffice.org/2009/table" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 64 |           <office:body>
 65 |             <office:text>
 66 |               %s
 67 |             </office:text>
 68 |           </office:body>
 69 |         </office:document-content>"""
 70 |     mimetype = 'application/vnd.oasis.opendocument.text'
 71 |     image_xml = """<text:p text:style-name="Standard">
 72 |         <draw:frame draw:name="graphics1" draw:style-name="fr1" svg:width="%s" svg:height="%s" text:anchor-type="char">
 73 |           <draw:image xlink:actuate="onLoad" xlink:href="%s" xlink:show="embed" xlink:type="simple"/>
 74 |         </draw:frame></text:p>"""
 75 |     image_path = "Pictures/image.png"
 76 |     manifest = """<?xml version="1.0" encoding="UTF-8"?>
 77 |         <manifest:manifest xmlns:manifest="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0">
 78 |             <manifest:file-entry manifest:media-type="application/vnd.oasis.opendocument.text" manifest:version="1.2" manifest:full-path="/"/>
 79 |             <manifest:file-entry manifest:media-type="image/png" manifest:full-path="%s"/>
 80 |             <manifest:file-entry manifest:media-type="" manifest:full-path="Pictures/"/>
 81 |             <manifest:file-entry manifest:media-type="text/xml" manifest:full-path="content.xml"/>
 82 |             <manifest:file-entry manifest:media-type="text/xml" manifest:full-path="styles.xml"/>
 83 |         </manifest:manifest>"""
 84 |     styles_xml = """<?xml version="1.0" encoding="UTF-8"?>
 85 |         <office:document-styles grddl:transformation="http://docs.oasis-open.org/office/1.2/xslt/odf2rdf.xsl" office:version="1.2" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:grddl="http://www.w3.org/2003/g/data-view#" xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0">
 86 |         <office:automatic-styles>
 87 |             <style:page-layout style:name="Mpm1">
 88 |               <style:page-layout-properties fo:background-color="#ffffff" fo:margin-bottom="0cm" fo:margin-left="0cm" fo:margin-right="0cm" fo:margin-top="0cm" fo:page-width="%s" fo:page-height="%s" style:footnote-max-height="0cm" style:layout-grid-base-height="0.635cm" style:layout-grid-base-width="0.369cm" style:layout-grid-color="#c0c0c0" style:layout-grid-display="false" style:layout-grid-lines="36" style:layout-grid-mode="none" style:layout-grid-print="false" style:layout-grid-ruby-below="false" style:layout-grid-ruby-height="0cm" style:layout-grid-snap-to-characters="true" style:num-format="1" style:print-orientation="portrait" style:writing-mode="lr-tb">
 89 |               </style:page-layout-properties>
 90 |             </style:page-layout>
 91 |           </office:automatic-styles>
 92 |           <office:master-styles>
 93 |             <style:master-page style:name="Standard" style:page-layout-name="Mpm1"/>
 94 |             <style:master-page style:display-name="First Page" style:name="First_20_Page" style:next-style-name="Standard" style:page-layout-name="Mpm1"/>
 95 |           </office:master-styles>
 96 |         </office:document-styles>"""
 97 |     image_xml = image_xml % (width, height, image_path) #filename doesn't matter
 98 |     zipio = io.BytesIO()
 99 |     archive = zipfile.ZipFile(zipio, 'w')
100 |     archive.writestr('mimetype', mimetype)
101 |     archive.writestr('content.xml', content_xml % image_xml)
102 |     archive.writestr('styles.xml', styles_xml % (width, height))
103 |     archive.writestr('META-INF/manifest.xml', manifest % image_path)
104 |     archive.writestr(image_path, image_data)
105 |     archive.close()
106 |     zipio.seek(0)
107 |     #pointer = open('/tmp/doc.odt', 'wb')
108 |     #pointer.write(zipio.read())
109 |     #pointer.close()
110 |     return zipio
111 | 
112 | 


--------------------------------------------------------------------------------
/core/pipeline_type/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/core/pipeline_type/__init__.py


--------------------------------------------------------------------------------
/core/pipeline_type/compare.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | import os.path
 4 | import lxml.etree
 5 | from . import pipeline_item
 6 | import core.docvert
 7 | import core.docvert_exception
 8 | import core.docvert_xml
 9 | import core.document_type
10 | import core.opendocument
11 | 
12 | class Compare(pipeline_item.pipeline_stage):
13 |     def stage(self, pipeline_value):
14 |         if pipeline_value is None:
15 |             raise pipeline_value_not_empty("A process type of Compare needs pipeline_value to compare with.")
16 |         if 'withFile' not in self.attributes:
17 |             raise needs_with_file_attribute("A process type of Compare needs a withFile attribute containing a filename/path.")
18 |         compare_path = self.resolve_pipeline_resource(self.attributes['withFile'])
19 |         if not os.path.exists(compare_path):
20 |             raise generation_file_not_found("A process type of Compare couldn't find a file at %s" % compare_path)
21 |         compare_data = file(compare_path)
22 |         compare_xml = None
23 |         doc_type = core.document_type.detect_document_type(compare_data)
24 |         if doc_type == core.document_type.types.oasis_open_document:
25 |             compare_xml = core.opendocument.extract_useful_open_document_files(compare_data)
26 |         elif doc_type == core.document_type.types.xml:
27 |             compare_xml = compare_data
28 |         else:
29 |             raise cannot_compare_with_non_xml_or_non_opendocument("Cannot compare withFile=%s with detected type of %s" % (compare_path, doc_type))
30 |         turn_document_into_test_filename = "internal://turn-document-into-test.xsl"
31 |         xslt_path = self.resolve_pipeline_resource(turn_document_into_test_filename)
32 |         test_xslt = core.docvert_xml.transform(compare_data, xslt_path)
33 |         storage_filename = "comparision-to-%s.xhtml" % self.attributes['withFile']
34 |         storage_path = "%s/%s" % (self.pipeline_storage_prefix, storage_filename)
35 |         if self.pipeline_storage_prefix is None:
36 |             storage_path = storage_filename
37 |         print(storage_path)
38 |         storage[storage_path] = core.docvert_xml.transform(pipeline_value, test_as_xslt)
39 |         return pipeline_value
40 | 
41 | class pipeline_value_not_empty(core.docvert_exception.docvert_exception):
42 |     pass
43 | 
44 | class needs_with_file_attribute(core.docvert_exception.docvert_exception):
45 |     pass
46 | 
47 | class generation_file_not_found(core.docvert_exception.docvert_exception):
48 |     pass
49 | 
50 | class cannot_compare_with_non_xml_or_non_opendocument(core.docvert_exception.docvert_exception):
51 |     pass
52 | 
53 | 


--------------------------------------------------------------------------------
/core/pipeline_type/convertimages.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import os.path
  4 | import tempfile
  5 | import io
  6 | import subprocess
  7 | from PIL import Image # Python PIL
  8 | from . import pipeline_item
  9 | import core.docvert_xml
 10 | import core.opendocument
 11 | import core.docvert_libreoffice
 12 | import lxml.etree
 13 | 
 14 | class ConvertImages(pipeline_item.pipeline_stage):
 15 |     synonym_formats = dict( #Not just synonyms, but types of files that are converted using the same code (eg, emf=wmf)
 16 |         emf='wmf',wmf='wmf',#horrible old vector
 17 |         pdf='pdf', ps='pdf', #moderately horrible vector
 18 |         svg='svg',#vector
 19 |         ani='png',apng='png',art='png',bef='png',bmf='png',bmp='png',cgm='png',cin='png',cpc='png',dpx='png',ecw='png',exr='png',fits='png',flic='png',fpx='png',gif='png',icer='png',ics='png',iff='png',iges='png',ilbm='png',jbig='png',jbig2='png',jng='png',jpe='png',jpg='png',jpeg='png',jp2='png',mng='png',miff='png',pbm='png',pcx='png',pgf='png',pgm='png',png='png',ppm='png',psp='png',raw='png',rad='png',rgbe='png',sgi='png',tga='png',tif='png',tiff='png',webp='png',xar='png',xbm='png',xcf='png',xpm='png' #bitmap
 20 |     )
 21 |     
 22 |     def stage(self, pipeline_value):
 23 |         self.intermediate_files = list()
 24 |         intermediate_file_extensions_to_retain = list()
 25 |         #TODO add format sniffing code
 26 |         conversions = dict()
 27 |         if '__convertimages' not in self.storage:
 28 |             self.storage['__convertimages'] = dict()
 29 |         # 1. Parse conversion requests
 30 |         formats = ("%s," % self.attributes["formats"]).split(",")
 31 |         for format in formats:
 32 |             conversion = format.strip(" ._-\n\r").lower()
 33 |             if len(conversion) == 0: continue
 34 |             from_format, to_format = conversion.split("2")
 35 |             if from_format in self.synonym_formats:
 36 |                 from_format = self.synonym_formats[from_format]
 37 |             if from_format not in conversions:
 38 |                 conversions[from_format] = list()
 39 |             intermediate_file_extensions_to_retain.append(str(to_format))
 40 |             conversions[str(from_format)].append(str(to_format))
 41 | 
 42 |         # 2. Convert images
 43 |         # <stage process="ConvertImages" formats="wmf2png, wmf2svg, bmp2png" deleteOriginals="true" autoCrop="false" autoCropThreshold="20"/>
 44 |         storage_paths = list(self.storage.keys())
 45 |         for storage_path in storage_paths:
 46 |             if self.pipeline_storage_prefix and not storage_path.startswith(self.pipeline_storage_prefix):
 47 |                 continue
 48 |             path, extension = os.path.splitext(storage_path)
 49 |             extension_minus_dot = str(extension[1:])
 50 |             for from_format, to_formats in conversions.items():
 51 |                 from_format_method = "convert_%s" % extension_minus_dot
 52 |                 if extension_minus_dot == from_format and hasattr(self, from_format_method):
 53 |                     for to_format in to_formats:
 54 |                         pipeline_value = getattr(self, from_format_method)(storage_path, to_format, pipeline_value)
 55 | 
 56 |         # 3. Delete original images
 57 |         if "deleteOriginals" in self.attributes and not self.attributes["deleteOriginals"].strip().lower() in ['false','f','n','0','']:
 58 |             for storage_path in storage_paths:
 59 |                 if not storage_path.startswith(self.pipeline_storage_prefix):
 60 |                     continue
 61 |                 extension = os.path.splitext(storage_path)[1][1:]
 62 |                 #if conversions.has_key(extension):
 63 |                 #    self.storage.remove(storage_path)
 64 | 
 65 |         for intermediate_file in self.intermediate_files:
 66 |             path, extension = os.path.splitext(intermediate_file)
 67 |             extension_minus_dot = str(extension[1:])
 68 |             if not extension_minus_dot in intermediate_file_extensions_to_retain:
 69 |                 try:
 70 |                     del self.storage[intermediate_file]
 71 |                 except KeyError as e:
 72 |                     pass
 73 | 
 74 |         return pipeline_value
 75 | 
 76 |     def convert_wmf(self, storage_path, to_format, pipeline_value, width=None, height=None):
 77 |         # We can't reliably parse wmf/emf here so use LibreOffice to generate PDF no matter the to_format
 78 |         path, extension = os.path.splitext(storage_path)
 79 |         pdf_path = "%s.pdf" % path
 80 |         if pdf_path not in self.storage:
 81 |             if width is None or height is None:
 82 |                 width, height, pipeline_value = self.get_dimensions_from_xml(storage_path, pipeline_value, to_format)
 83 |             #print "Generate document for %s because %s doesn't exist\n%s\n\n" % (storage_path, pdf_path, self.storage.keys())
 84 |             opendocument = core.opendocument.generate_single_image_document(self.storage[storage_path], width, height)
 85 |             self.storage[pdf_path] = core.docvert_libreoffice.get_client().convert_by_stream(opendocument, core.docvert_libreoffice.LIBREOFFICE_PDF)
 86 |         else:
 87 |             #print "Cache hit! No need to generate %s" % pdf_path
 88 |             pass
 89 |         if to_format == 'pdf':
 90 |             return pipeline_value
 91 |         self.intermediate_files.append(pdf_path)
 92 |         from_format = 'pdf'
 93 |         if from_format in self.synonym_formats:
 94 |             from_format = self.synonym_formats[from_format]
 95 |         from_format_method = "convert_%s" % from_format
 96 |         return getattr(self, from_format_method)(pdf_path, to_format, pipeline_value, width, height)
 97 | 
 98 |     def convert_pdf(self, storage_path, to_format, pipeline_value, width=None, height=None):
 99 |         path, extension = os.path.splitext(storage_path)
100 |         svg_path = "%s.svg" % path
101 |         if svg_path not in self.storage:
102 |             if width is None or height is None:
103 |                 width, height, pipeline_value = self.get_dimensions_from_xml(storage_path, pipeline_value)
104 |             from_format = str(extension[1:])
105 |             synonym_from_format = from_format
106 |             if synonym_from_format in self.synonym_formats:
107 |                 synonym_from_format = self.synonym_formats[synonym_from_format]
108 |             self.storage[svg_path] = self.run_conversion_command_with_temporary_files(storage_path, "pdf2svg %s %s")
109 |         else:
110 |             #print "Cache hit! No need to generate %s" % svg_path
111 |             pass
112 |         if to_format == 'svg':
113 |             return pipeline_value
114 |         self.intermediate_files.append(svg_path)
115 |         from_format = 'svg'
116 |         if from_format in self.synonym_formats:
117 |             from_format = self.synonym_formats[from_format]
118 |         from_format_method = "convert_%s" % from_format
119 |         return getattr(self, from_format_method)(svg_path, to_format, pipeline_value, width, height)
120 | 
121 |     def convert_svg(self, storage_path, to_format, pipeline_value, width=None, height=None):
122 |         path, extension = os.path.splitext(storage_path)
123 |         png_path = "%s.png" % path
124 |         if png_path not in self.storage:
125 |             if width is None or height is None:
126 |                 width, height, pipeline_value = self.get_dimensions_from_xml(storage_path, pipeline_value)
127 |             from_format = str(extension[1:])
128 |             synonym_from_format = from_format
129 |             if synonym_from_format in self.synonym_formats:
130 |                 synonym_from_format = self.synonym_formats[synonym_from_format]
131 |             self.storage[png_path] = self.run_conversion_command_with_temporary_files(storage_path, "rsvg %s %s")
132 |         else:
133 |             #print "Cache hit! No need to generate %s" % png_path
134 |             pass
135 |         if to_format == 'png':
136 |             return pipeline_value
137 |         self.intermediate_files.append(png_path)
138 |         from_format = 'svg'
139 |         if from_format in self.synonym_formats:
140 |             from_format = self.synonym_formats[from_format]
141 |         from_format_method = "convert_%s" % from_format
142 |         return getattr(self, from_format_method)(png_path, to_format, pipeline_value, width, height)
143 |         
144 |     def convert_png(self, storage_path, to_format, pipeline_value, width=None, height=None):
145 |         #im = Image.open('icon.gif')
146 |         #transparency = im.info['transparency'] 
147 |         #im .save('icon.png', transparency=transparency)
148 |         #print dir(Image)
149 |         return pipeline_value
150 | 
151 |     def get_dimensions_from_xml(self, storage_path, pipeline_value, change_image_path_extension_to=None):
152 |         def get_value(data):
153 |             if hasattr(data, 'read'):
154 |                 data.seek(0)
155 |                 return data.read()
156 |             return data
157 |         path, extension = os.path.splitext(storage_path)
158 |         if path in self.storage['__convertimages']: #intentionally extensionless because all formats of this single image are considered to have the same dimensions
159 |             return (self.storage['__convertimages'][path]['width'], self.storage['__convertimages'][path]['height'], pipeline_value)
160 | 
161 |         default_dimensions = ('10cm', '10cm') #we had to choose something
162 |         #if self.pipeline_storage_prefix:
163 |         #    storage_path = storage_path[len(self.pipeline_storage_prefix) + 1:]
164 |         xml = self.get_document(pipeline_value)
165 |         namespaces = {'xlink':'http://www.w3.org/1999/xlink'}
166 |         xpath = '//*[@xlink:href="%s"]/parent::*' % storage_path
167 |         image_nodes = xml.xpath(xpath, namespaces=namespaces)
168 |         if len(image_nodes) == 0: #can't do anything, might have been a thumbnail or unlinked image, but either way return 10cm square
169 |             #images = xml.xpath('//*[@xlink:href]', namespaces=namespaces)
170 |             #print "Could not find image node with %s. Document contains: \n%s\n%s. Prefix was %s" % (xpath, images[0], images[0].attrib, self.pipeline_storage_prefix)
171 |             return default_dimensions[0], default_dimensions[1], pipeline_value
172 |         #print "FOUND IMAGE!"
173 |         image_node = image_nodes[0] #first image will be do fine. It's possible to have multiple tags with different width/height pointing at the same image but for now we'll discount that possibility
174 |         oasis_opendocument_svg_namespace = 'urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0'
175 |         width_attribute = "{%s}%s" % (oasis_opendocument_svg_namespace, 'width')
176 |         height_attribute = "{%s}%s" % (oasis_opendocument_svg_namespace, 'height')
177 |         #print "about to read width/height"
178 |         try:
179 |             width = image_node.attrib[width_attribute]
180 |             height = image_node.attrib[height_attribute]
181 |             #print "success... and %s" % change_image_path_extension_to
182 |             if change_image_path_extension_to:
183 |                 path, extension = os.path.splitext(storage_path)
184 |                 xlink_href_attribute = "{%s}%s" % (namespaces['xlink'], 'href')
185 |                 change_image_path = '%s.%s' % (path, change_image_path_extension_to)
186 |                 #print "New image path is %s" % change_image_path
187 |                 image_nodes = image_node.xpath('*[@xlink:href="%s"]' % storage_path, namespaces=namespaces)
188 |                 for image_node in image_nodes:
189 |                     #print "Value was %s" % image_node.attrib[xlink_href_attribute]
190 |                     image_node.attrib[xlink_href_attribute] = change_image_path
191 |                     #print "Value is %s" % image_node.attrib[xlink_href_attribute]
192 |             self.storage['__convertimages'][path] = dict(width=width, height=height) #intentionally extensionless because all formats of this single image are considered to have the same dimensions
193 |             return (width, height, lxml.etree.tostring(xml))
194 |         except KeyError as e:
195 |             pass
196 |         return default_dimensions[0], default_dimensions[1], pipeline_value
197 | 
198 |     def get_document(self, pipeline_value):
199 |         xml = core.docvert_xml.get_document(pipeline_value)
200 |         if hasattr(xml, "getroottree"):
201 |             xml = xml.getroottree()
202 |         elif hasattr(xml, 'getroot'):
203 |             xml = xml.getroot()
204 |         return xml
205 | 
206 |     def run_conversion_command_with_temporary_files(self, from_storage_path, command_template):
207 |         def get_value(data):
208 |             if hasattr(data, 'read'):
209 |                 data.seek(0)
210 |                 return data.read()
211 |             return data
212 |         temporary_from_path = None
213 |         temporary_to_path = None
214 |         try:
215 |             os_handle, temporary_from_path = tempfile.mkstemp()
216 |             temporary_from_file = open(temporary_from_path, 'wb')
217 |             the_value = get_value(self.storage[from_storage_path])
218 |             if hasattr(the_value, 'encode'):
219 |                 the_value = the_value.encode('utf-8')
220 |             temporary_from_file.write(the_value);
221 |             temporary_from_file.flush()
222 |             temporary_from_file.close()
223 |             os_handle, temporary_to_path = tempfile.mkstemp()
224 |             command = command_template % (temporary_from_path, temporary_to_path)
225 |             std_response = subprocess.getstatusoutput(command)
226 |             if os.path.getsize(temporary_to_path) == 0:
227 |                 raise Exception('Error in convertimages.py: No output data created. Command was "%s" which returned "%s"' % (command_template, std_response))
228 |             temporary_to = open(temporary_to_path, 'rb')
229 |             to_data = temporary_to.read()
230 |             temporary_to.close()
231 |             return to_data
232 |         finally:
233 |             if temporary_from_path: os.remove(temporary_from_path)
234 |             if temporary_to_path: os.remove(temporary_to_path)
235 | 
236 | 
237 | """
238 | #NOTE: Poppler doesn't work on my [Matthew Holloway's] Ubuntu 10.10 machine. It seg faults so that's why I'm shelling out
239 | #import cairo
240 | #import poppler
241 | os_handle, temporary_file_path = tempfile.mkstemp()
242 | temporary_file = open(temporary_file_path, 'w')
243 | temporary_file.write(get_value(self.storage[storage_path]))
244 | temporary_file.flush()
245 | print temporary_file_path
246 | pdf = poppler.document_new_from_file(
247 |     "file://%s" % temporary_file_path,
248 |     password=None)
249 | first_page = pdf.get_page(0)
250 | surface = cairo.PDFSurface(surface_storage, width_float, height_float)
251 | cairo_context = cairo.Context(surface)
252 | 
253 | first_page.render(cairo_context)
254 | surface.write_to_png("/tmp/page0.png")
255 | print dir(first_page)
256 | temporary_file.close()
257 | """
258 | 
259 | 


--------------------------------------------------------------------------------
/core/pipeline_type/debug.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import lxml.etree
 3 | from . import pipeline_item
 4 | import core.docvert_exception
 5 | 
 6 | class Debug(pipeline_item.pipeline_stage):
 7 |     def stage(self, pipeline_value):
 8 |         def get_value(data):
 9 |             if hasattr(data, "read"):
10 |                 data.seek(0)
11 |                 return data.read()
12 |             return data
13 |         if isinstance(pipeline_value, lxml.etree._Element) or isinstance(pipeline_value, lxml.etree._XSLTResultTree):
14 |             pipeline_value = lxml.etree.tostring(pipeline_value)
15 |         elif hasattr(pipeline_value, 'read'):
16 |             pipeline_value.seek(0)
17 |             pipeline_value = pipeline_value.read()
18 |         if get_value(pipeline_value) is None:
19 |             raise core.docvert_exception.debug_exception("Current contents of pipeline", "Debug: pipeline_value is %s" % get_value(pipeline_value), "text/plain; charset=UTF-8")
20 |         try:
21 |             document = lxml.etree.fromstring(get_value(pipeline_value))
22 |         except lxml.etree.XMLSyntaxError as exception:
23 |             raise core.docvert_exception.debug_exception("Current contents of pipeline", "Error parsing as XML, here it is as plain text: %s\n%s" % (exception, pipeline_value), "text/plain; charset=UTF-8")
24 |         help_text = "In debug mode we want to display an XML tree but if the root node is <html> or there's an HTML namespace then popular browsers will\nrender it as HTML so these have been changed. See core/pipeline_type/debug.py for the details."
25 |         unit_tests = self.get_tests()
26 |         if unit_tests:
27 |             #help_text += "\n\nUnit tests so far in the pipeline:"
28 |             help_text += "\n\nFailed unit tests so far in the pipeline:"
29 |             for value in self.get_tests():
30 |                 #help_text += "\n\t%s:%s" % (value["status"], value["message"])
31 |                 if value["status"] == "fail":
32 |                     help_text += "\n\tFail: %s" % (value["message"])
33 | 
34 |         content_type = 'text/xml'
35 |         if "contentType" in self.attributes:
36 |             content_type = self.attributes['contentType']
37 |         if "zip" in self.attributes:
38 |             content_type = 'application/zip'
39 |             pipeline_value = self.storage.to_zip().getvalue()
40 |         if content_type == 'text/xml':
41 |             help_text += "\n\nConversion files:\n\t" + "\n\t".join(list(self.storage.keys()))
42 |             if hasattr(document, 'getroottree'):
43 |                 document = document.getroottree()
44 |             if document.getroot().tag == "{http://www.w3.org/1999/xhtml}html":
45 |                 pipeline_value = "<root><!-- %s -->%s</root>" % (help_text, lxml.etree.tostring(document.getroot())) 
46 |             else:
47 |                 pipeline_value = "<!-- %s -->%s" % (help_text, lxml.etree.tostring(document.getroot()).decode('utf-8') ) 
48 |             pipeline_value = pipeline_value.replace('"http://www.w3.org/1999/xhtml"', '"XHTML_NAMESPACE_REPLACED_BY_DOCVERT_DURING_DEBUG_MODE"')
49 |             xml_declaration = '<?xml version="1.0" ?>'
50 |             if pipeline_value[0:5] != xml_declaration[0:5]:
51 |                 pipeline_value = xml_declaration + "\n" + pipeline_value
52 |         raise core.docvert_exception.debug_xml_exception("Current contents of pipeline", pipeline_value, content_type)
53 | 


--------------------------------------------------------------------------------
/core/pipeline_type/docbooktoxhtml.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from . import pipeline_item
 3 | import core.docvert_xml
 4 | 
 5 | class DocBookToXHTML(pipeline_item.pipeline_stage):
 6 | 
 7 |     def stage(self, pipeline_value):
 8 |         docbook_to_html_path = self.resolve_pipeline_resource('internal://docbook-to-html.xsl')
 9 |         return core.docvert_xml.transform(pipeline_value, docbook_to_html_path)
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/core/pipeline_type/generate.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | import os.path
 4 | import io
 5 | from . import pipeline_item
 6 | import core.docvert
 7 | import core.opendocument
 8 | import core.document_type
 9 | import core.docvert_exception
10 | 
11 | class Generate(pipeline_item.pipeline_stage):
12 |     def stage(self, pipeline_value):
13 |         if 'withFile' not in self.attributes:
14 |             raise needs_with_file_attribute("A process type of Generate needs a withFile attribute containing a filename/path.")
15 |         path = self.resolve_pipeline_resource(self.attributes['withFile'])
16 |         if not os.path.exists(path):
17 |             raise generation_file_not_found("A process type of Generate couldn't find a file at %s" % path)
18 |         #print(path)
19 |         data = open(path, mode='rb')
20 |         doc_type = core.document_type.detect_document_type(data)
21 |         #print(doc_type)
22 |         if doc_type != core.document_type.types.oasis_open_document:
23 |             data = core.docvert.generate_open_document(data)
24 |         document_xml = core.opendocument.extract_useful_open_document_files(data, self.storage, os.path.basename(path))
25 |         return document_xml
26 | 
27 | class needs_with_file_attribute(core.docvert_exception.docvert_exception):
28 |     pass
29 | 
30 | class generation_file_not_found(core.docvert_exception.docvert_exception):
31 |     pass
32 | 


--------------------------------------------------------------------------------
/core/pipeline_type/generatepostconversioneditorfiles.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | import lxml.etree
 4 | import io
 5 | from . import pipeline_item
 6 | import core.docvert_exception
 7 | 
 8 | 
 9 | class GeneratePostConversionEditorFiles(pipeline_item.pipeline_stage):
10 |     def stage(self, pipeline_value):
11 |         return pipeline_value
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/core/pipeline_type/getpreface.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import lxml.etree
 3 | from . import pipeline_item
 4 | import core.docvert_exception
 5 | import core.docvert_xml
 6 | 
 7 | class GetPreface(pipeline_item.pipeline_stage):
 8 |     def stage(self, pipeline_value):
 9 |         params = dict(
10 |             loopDepth = 0,
11 |             process = self.attributes['process'],
12 |             customFilenameIndex = 'index.html',
13 |             customFilenameSection = 'section#.html'
14 |         )
15 |         xslt_path = self.resolve_pipeline_resource('internal://each-page.xsl')
16 |         return core.docvert_xml.transform(pipeline_value, xslt_path, params)
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/core/pipeline_type/loop.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | from . import pipeline_item
 4 | import copy
 5 | import core.docvert_exception
 6 | import core.docvert_pipeline
 7 | import core.docvert_xml
 8 | import lxml.etree
 9 | 
10 | class Loop(pipeline_item.pipeline_stage):
11 |     def stage(self, pipeline_value):
12 |         if 'numberOfTimes' not in self.attributes:
13 |             raise no_number_of_times_attribute("In process Loop there wasn't a numberOfTimes attribute.")
14 |         numberOfTimes = self.attributes['numberOfTimes']
15 |         if numberOfTimes.startswith('xpathCount:'):
16 |             xpath = numberOfTimes[len('xpathCount:'):]
17 |             xml = core.docvert_xml.get_document(pipeline_value)
18 |             namespaces = {
19 |                 'xlink':'http://www.w3.org/1999/xlink',
20 |                 'db':'http://docbook.org/ns/docbook',
21 |                 'text':'urn:oasis:names:tc:opendocument:xmlns:text:1.0',
22 |                 'office':'urn:oasis:names:tc:opendocument:xmlns:office:1.0',
23 |                 'html':'http://www.w3.org/1999/xhtml',
24 |                 'xhtml':'http://www.w3.org/1999/xhtml'}
25 |             nodes = xml.xpath(xpath, namespaces=namespaces)
26 |             index = 0
27 |             for node in nodes:
28 |                 index += 1
29 |                 child_depth = copy.copy(self.depth)
30 |                 child_depth.append(str(index))
31 |                 pipeline = core.docvert_pipeline.pipeline_processor(self.storage, self.child_stages, self.pipeline_directory, self.pipeline_storage_prefix, child_depth)
32 |                 child_pipeline_value = lxml.etree.tostring(pipeline_value).decode('utf-8')
33 |                 pipeline.start(child_pipeline_value) #discard return value
34 |         elif numberOfTimes.startswith('substring:'):
35 |             number = int(numberOfTimes[len('substring:'):])
36 |             for index in range(1, number):
37 |                 pass
38 |         elif numberOfTimes.startswith('number:'):
39 |             number = int(numberOfTimes[len('number:'):])
40 |             for index in range(1, number):
41 |                 pass
42 |         return pipeline_value
43 | 
44 | class no_number_of_times_attribute(core.docvert_exception.docvert_exception):
45 |     pass
46 | 
47 | 


--------------------------------------------------------------------------------
/core/pipeline_type/normalizeopendocument.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import lxml.etree
 3 | from . import pipeline_item
 4 | import core.docvert_xml
 5 | 
 6 | class NormalizeOpenDocument(pipeline_item.pipeline_stage):
 7 | 
 8 |     def stage(self, pipeline_value):
 9 |         normalize_opendocument_path = self.resolve_pipeline_resource('internal://normalize-opendocument.xsl')
10 |         return core.docvert_xml.transform(pipeline_value, normalize_opendocument_path)
11 | 
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/core/pipeline_type/pipeline_item.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os.path
 3 | 
 4 | docvert_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 5 | 
 6 | class pipeline_stage(object):
 7 |     def __init__(self, storage, pipeline_directory, attributes, pipeline_storage_prefix=None, child_stages=None, depth=None):
 8 |         self.storage = storage
 9 |         self.pipeline_directory = pipeline_directory
10 |         self.pipeline_id = os.path.basename(pipeline_directory)
11 |         self.pipeline_id_namespace = os.path.basename(os.path.dirname(pipeline_directory))
12 |         self.attributes = attributes
13 |         self.pipeline_storage_prefix = pipeline_storage_prefix
14 |         self.child_stages = child_stages
15 |         self.depth = list() if depth is None else depth
16 | 
17 |     def resolve_pipeline_resource(self, resource_path):
18 |         internal_prefix = 'internal://'
19 |         if resource_path.startswith(internal_prefix):
20 |             return os.path.join(docvert_root, 'core', 'transform', resource_path[len(internal_prefix):])
21 |         return os.path.join(docvert_root, "pipelines", self.pipeline_id_namespace, self.pipeline_id, resource_path)
22 | 
23 |     def log(self, message, log_type='error'):
24 |         log_filename = '%s.log' % log_type
25 |         storage_path = log_filename
26 |         if self.pipeline_storage_prefix is not None:
27 |             storage_path = "%s/%s" % (self.pipeline_storage_prefix, log_filename)
28 |         self.storage[storage_path] = message
29 | 
30 |     def add_tests(self, tests):
31 |         self.storage.add_tests(tests)
32 | 
33 |     def get_tests(self):
34 |         return self.storage.get_tests()
35 | 
36 | 


--------------------------------------------------------------------------------
/core/pipeline_type/serialize.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | from . import pipeline_item
 4 | import lxml.etree
 5 | 
 6 | class Serialize(pipeline_item.pipeline_stage):
 7 |     def stage(self, pipeline_value):
 8 |         storage_path = "%s/%s" % (self.pipeline_storage_prefix, self.attributes['toFile'])
 9 |         if self.pipeline_storage_prefix is None:
10 |             storage_path = self.attributes['toFile']
11 |         if '{customSection}' in storage_path:
12 |             depth_string = 'section'
13 |             depth_string += "-".join(self.depth)
14 |             depth_string += ".html"
15 |             storage_path = storage_path.replace('{customSection}', depth_string) 
16 |         if hasattr(pipeline_value, 'read'):
17 |             self.storage[storage_path] = str(pipeline_value)
18 |         elif isinstance(pipeline_value, lxml.etree._Element) or isinstance(pipeline_value, lxml.etree._XSLTResultTree):
19 |             self.storage[storage_path] = lxml.etree.tostring(pipeline_value)
20 |         else:
21 |             self.storage[storage_path] = str(pipeline_value)
22 |         return pipeline_value
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/core/pipeline_type/serializeopendocument.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import cgi
 3 | import os
 4 | import zipfile
 5 | import lxml.etree
 6 | import io
 7 | from . import pipeline_item
 8 | import core.docvert_exception
 9 | 
10 | class SerializeOpenDocument(pipeline_item.pipeline_stage):
11 |     def stage(self, pipeline_value):
12 |         storage_path = "%s/%s" % (self.pipeline_storage_prefix, self.attributes['toFile'])
13 |         if self.pipeline_storage_prefix is None:
14 |             storage_path = self.attributes['toFile']
15 |         if '{customSection}' in storage_path:
16 |             depth_string = 'section'
17 |             depth_string += "-".join(self.depth)
18 |             depth_string += ".odt"
19 |             storage_path = storage_path.replace('{customSection}', depth_string) 
20 |         if not isinstance(pipeline_value, lxml.etree._Element) and not isinstance(pipeline_value, lxml.etree._XSLTResultTree):
21 |             return pipeline_value
22 |         zipdata = io.StringIO()
23 |         archive = zipfile.ZipFile(zipdata, 'w')
24 |         archive.writestr('mimetype', 'application/vnd.oasis.opendocument.text')
25 |         manifest_xml = '<?xml version="1.0" encoding="UTF-8"?>\n<manifest:manifest xmlns:manifest="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0">\n\t<manifest:file-entry manifest:media-type="application/vnd.oasis.opendocument.text" manifest:version="1.2" manifest:full-path="/"/>\n'
26 |         root = pipeline_value.getroot()
27 |         expected_lxml_root = '{docvert:5}root'
28 |         if str(root.tag) != expected_lxml_root:
29 |             raise core.docvert_exception.unable_to_serialize_opendocument("Can't serialize OpenDocument with a pipeline_value root node of '%s'." % root.tag)
30 |         expected_lxml_child = '{docvert:5}external-file'
31 |         for child in root.iterchildren():
32 |             if str(child.tag) != expected_lxml_child:
33 |                 raise core.docvert_exception.unable_to_serialize_opendocument("Can't serialize OpenDocument with a pipeline_value child node of '%s'." % child.tag)
34 |             filename = str(child.attrib['{docvert:5}name'])
35 |             xml = "".join(map(lxml.etree.tostring, child.getchildren()))
36 |             print("{%s]" % filename)
37 |             archive.writestr(filename, xml)
38 |             manifest_xml += '\t<manifest:file-entry manifest:media-type="%s" manifest:full-path="%s"/>\n' % (cgi.escape('text/xml'), cgi.escape(filename))
39 |         manifest_xml += '\t<manifest:file-entry media-type="" manifest:full-path="Pictures/"/>\n'
40 |         imagetypes = {".svg":"image/svg+xml", ".png":"image/png", ".gif":"image/gif", ".bmp":"image/x-ms-bmp", ".jpg":"image/jpeg", ".jpe":"image/jpeg", ".jpeg":"image/jpeg"}
41 |         for storage_key in list(self.storage.keys()):
42 |             if storage_key.startswith(self.pipeline_storage_prefix):
43 |                 extension = os.path.splitext(storage_key)[1]
44 |                 if extension in list(imagetypes.keys()):
45 |                     odt_path = "Pictures/%s" % os.path.basename(storage_key)
46 |                     manifest_xml += '\t<manifest:file-entry media-type="%s" manifest:full-path="%s"/>\n' % (cgi.escape(imagetypes[extension]), cgi.escape(odt_path) )
47 |                     archive.writestr(odt_path, self.storage[storage_key])
48 |         manifest_xml += '</manifest:manifest>'
49 |         archive.writestr('META-INF/manifest.xml', manifest_xml.encode("utf-8") )
50 |         archive.close()
51 |         zipdata.seek(0)
52 |         self.storage.add(storage_path, zipdata.read())
53 | 
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/core/pipeline_type/splitpages.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import lxml.etree
 3 | from . import pipeline_item
 4 | import core.docvert_exception
 5 | import core.docvert_xml
 6 | 
 7 | class SplitPages(pipeline_item.pipeline_stage):
 8 |     def stage(self, pipeline_value):
 9 |         depth_string = '-'.join(self.depth)
10 |         params = dict(
11 |             loopDepth = depth_string,
12 |             process = self.attributes['process'],
13 |             customFilenameIndex = 'index.html',
14 |             customFilenameSection = 'section#.html'
15 |         )
16 |         xslt_path = self.resolve_pipeline_resource('internal://each-page.xsl')
17 |         
18 |         return core.docvert_xml.transform(pipeline_value, xslt_path, params)
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/core/pipeline_type/test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | import os.path
 4 | import lxml.etree
 5 | import io
 6 | from . import pipeline_item
 7 | import core.docvert_exception
 8 | import core.docvert
 9 | import core.docvert_xml
10 | 
11 | 
12 | class Test(pipeline_item.pipeline_stage):
13 |     def stage(self, pipeline_value):
14 |         def get_size(data):
15 |             if hasattr(data, 'read'):
16 |                 data.seek(0, os.SEEK_END)
17 |                 return data.tell()
18 |             return len(data)
19 | 
20 |         if not ("withFile" in self.attributes or "extensionExist" in self.attributes):
21 |             raise no_with_file_attribute("In process Test there wasn't a withFile or extensionExist attribute.")
22 |         if pipeline_value is None:
23 |             raise xml_empty("Cannot Test with %s because pipeline_value is None." % self.attributes['withFile'])
24 |         test_result = None
25 |         if "withFile" in self.attributes:
26 |             test_path = self.resolve_pipeline_resource(self.attributes['withFile'])
27 |             if not os.path.exists(test_path):
28 |                 raise file_not_found("Test file not found at %s" % test_path)
29 |             prefix = ""
30 |             if "prefix" in self.attributes:
31 |                 prefix = "%s: " % self.attributes["prefix"]
32 |             if test_path.endswith(".rng"): # RelaxNG test
33 |                 relaxng_response = core.docvert_xml.relaxng(pipeline_value, test_path)
34 |                 node_name = "pass"
35 |                 if not relaxng_response["valid"]:
36 |                     node_name = "fail"
37 |                 test_result = '<group xmlns="docvert:5"><%s>%s%s</%s></group>' % (node_name, prefix, core.docvert_xml.escape_text(str(relaxng_response["log"])), node_name)
38 |             elif test_path.endswith(".txt"): # Substring test (new substring on each line)
39 |                 document_string = str(pipeline_value)
40 |                 if hasattr(pipeline_value, "read"):
41 |                     document_string = pipeline_value.read()
42 |                     pipeline_value.seek(0)
43 |                 test_result = '<group xmlns="docvert:5">'
44 |                 for line in open(test_path, 'r').readlines():
45 |                     test_string = line[0:-1].strip()
46 |                     if len(test_string) == 0: continue
47 |                     node_name = "fail"
48 |                     description = "doesn't contain"
49 |                     occurences = document_string.count(test_string)
50 |                     if occurences == 1:
51 |                         node_name = "pass"
52 |                         description = "contains one of"
53 |                     elif occurences > 1:
54 |                         node_name = "fail"
55 |                         description = "contains %i of" % occurences
56 |                     test_result += '<%s>%s%s</%s>' % (node_name, prefix, core.docvert_xml.escape_text('Document %s the string "%s"' % (description, test_string)), node_name)
57 |                 test_result += '</group>'
58 |             else: #XSLT
59 |                 test_result = core.docvert_xml.transform(pipeline_value, test_path, dict(**self.attributes))
60 |         elif "extensionExist" in self.attributes:
61 |             extension = self.attributes["extensionExist"]
62 |             extension_exist_count = 1
63 |             if "extensionExistCount" in self.attributes:
64 |                 extension_exist_count = int(self.attributes["extensionExistCount"])
65 |             original_extension_exist_count = extension_exist_count
66 |             for key in list(self.storage.keys()):
67 |                 if key.endswith('thumbnail.png'): #ignore any inbuilt thumbnails
68 |                     continue
69 |                 if key.endswith(extension):
70 |                     if self.pipeline_storage_prefix is None or (self.pipeline_storage_prefix and key.startswith(self.pipeline_storage_prefix)):
71 |                         if get_size(self.storage[key]) > 0:
72 |                             extension_exist_count -= 1
73 |             test_result = "pass"
74 |             text = 'There were %i files with the extension "%s" as expected.' % (original_extension_exist_count, extension)
75 |             if extension_exist_count != 0:
76 |                 test_result = "fail"
77 |                 text = 'There were only %i (%i-%i) files instead of %i with the extension "%s". ' % (original_extension_exist_count - extension_exist_count, original_extension_exist_count, extension_exist_count, original_extension_exist_count, extension)
78 |             test_result = '<group xmlns="docvert:5"><%s>%s</%s></group>' % (test_result, core.docvert_xml.escape_text(text), test_result)
79 |         if "debug" in self.attributes:
80 |             raise core.docvert_exception.debug_xml_exception("Test Results", str(test_result), "text/xml; charset=UTF-8")
81 |         self.add_tests(test_result)
82 |         return pipeline_value        
83 | 
84 | 
85 | class no_with_file_attribute(core.docvert_exception.docvert_exception):
86 |     pass
87 | 
88 | class file_not_found(core.docvert_exception.docvert_exception):
89 |     pass
90 | 
91 | class xml_empty(core.docvert_exception.docvert_exception):
92 |     pass
93 | 


--------------------------------------------------------------------------------
/core/pipeline_type/transform.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | import io
 4 | from . import pipeline_item
 5 | import core.docvert_exception
 6 | import core.docvert_xml
 7 | 
 8 | class Transform(pipeline_item.pipeline_stage):
 9 |     def stage(self, pipeline_value):
10 |         if "withFile" not in self.attributes:
11 |             raise no_with_file_attribute("In process Transform there wasn't a withFile attribute.")
12 |         if pipeline_value is None:
13 |             raise xml_empty("Cannot Transform with %s because pipeline_value is None." % self.attributes['withFile'])
14 |         xslt_path = self.resolve_pipeline_resource(self.attributes['withFile'])
15 |         if not os.path.exists(xslt_path):
16 |             raise xslt_not_found("XSLT file not found at %s" % xslt_path)
17 |         return core.docvert_xml.transform(pipeline_value, xslt_path)
18 | 
19 | class no_with_file_attribute(core.docvert_exception.docvert_exception):
20 |     pass
21 | 
22 | class xslt_not_found(core.docvert_exception.docvert_exception):
23 |     pass
24 | 
25 | class xml_empty(core.docvert_exception.docvert_exception):
26 |     pass
27 | 


--------------------------------------------------------------------------------
/core/pipeline_type/transformopendocumenttodocbook.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import lxml.etree
 3 | from . import pipeline_item
 4 | import core.docvert_xml
 5 | import core.docvert_exception
 6 | 
 7 | class TransformOpenDocumentToDocBook(pipeline_item.pipeline_stage):
 8 | 
 9 |     def stage(self, pipeline_value):
10 |         normalize_opendocument_path = self.resolve_pipeline_resource('internal://normalize-opendocument.xsl')
11 |         pipeline_value = core.docvert_xml.transform(pipeline_value, normalize_opendocument_path)
12 |         if "debugAfterOpenDocumentNormalization" in self.attributes:
13 |             pipeline_value = lxml.etree.tostring(pipeline_value)
14 |             raise core.docvert_exception.debug_xml_exception("Current contents of pipeline", pipeline_value, 'text/xml')
15 |         opendocument_to_docbook_path = self.resolve_pipeline_resource('internal://opendocument-to-docbook.xsl')
16 |         pipeline_value = core.docvert_xml.transform(pipeline_value, opendocument_to_docbook_path)
17 |         normalize_docbook_path = self.resolve_pipeline_resource('internal://normalize-docbook.xsl')
18 |         pipeline_value = core.docvert_xml.transform(pipeline_value, normalize_docbook_path)
19 |         if "debugAfterDocBookNormalization" in self.attributes:
20 |             pipeline_value = lxml.etree.tostring(pipeline_value)
21 |             raise core.docvert_exception.debug_xml_exception("Current contents of pipeline", pipeline_value, 'text/xml')
22 |         return pipeline_value
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/core/pipeline_type/writemetadata.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | from . import pipeline_item
 4 | import core.docvert_exception
 5 | import core.docvert_xml
 6 | import lxml.etree
 7 | 
 8 | class WriteMetaData(pipeline_item.pipeline_stage):
 9 |     def stage(self, pipeline_value):
10 |         opendocument_xml_path = "%s/%s" % (self.pipeline_storage_prefix, 'opendocument.xml')
11 |         xslt_path = self.resolve_pipeline_resource('internal://extract-metadata.xsl')
12 |         if not os.path.exists(xslt_path):
13 |             raise xslt_not_found("XSLT file not found at %s" % xslt_path)
14 |         metadata_xml_path = "%s/%s" % (self.pipeline_storage_prefix, 'docvert-meta.xml')
15 |         metadata_xml = core.docvert_xml.transform(self.storage.get(opendocument_xml_path), xslt_path)
16 |         if isinstance(metadata_xml, lxml.etree._Element) or isinstance(metadata_xml, lxml.etree._XSLTResultTree):
17 |             metadata_xml = lxml.etree.tostring(metadata_xml)
18 |         self.storage[metadata_xml_path] = metadata_xml
19 |         return pipeline_value
20 | 
21 | class xslt_not_found(core.docvert_exception.docvert_exception):
22 |     pass
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/core/transform/extract-metadata.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet	version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:docvert="docvert:5">
 3 | <xsl:output	method="xml" version="1.0" encoding="UTF-8" indent="yes" omit-xml-declaration="no"/>
 4 | 
 5 | <xsl:template match="/">
 6 |     <xsl:copy-of select="//docvert:external-file[@docvert:name='meta.xml']/node()"/>
 7 | </xsl:template>
 8 | 
 9 | </xsl:stylesheet>
10 | 
11 | 


--------------------------------------------------------------------------------
/core/transform/normalize-docbook.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet	version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:db="http://docbook.org/ns/docbook">
 3 | <xsl:output method="xml" omit-xml-declaration="no"/>
 4 | 
 5 | 
 6 | <xsl:template match="@*|node()">
 7 |    <xsl:copy>
 8 |       <xsl:apply-templates select="@*|node()"/>
 9 |    </xsl:copy>
10 | </xsl:template>
11 | 
12 | </xsl:stylesheet>
13 | 


--------------------------------------------------------------------------------
/core/transform/turn-document-into-test.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet	version="1.0"
 3 |     xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 4 |     xmlns:xslo="http://www.w3.org/1999/XSL/TransformAlias">
 5 |     <xsl:namespace-alias stylesheet-prefix="xslo" result-prefix="xsl"/>
 6 | 
 7 | 	<xsl:output method="xml" omit-xml-declaration="no"/>
 8 | 
 9 |     <xsl:template match="/">
10 |         <xslo:stylesheet version="1.0">
11 |             <xsl:copy>
12 |                 <xsl:apply-templates select="@*|node()"/>
13 |             </xsl:copy>
14 |         </xslo:stylesheet>
15 |     </xsl:template>
16 | 
17 |     <xsl:template match="node()|@*">
18 |         <xslo:template match="node()|@*">
19 |             
20 |         <xsl:text disable-output-escaping="True">&lt;xsl:template&gt;</xsl:text>
21 |         <xsl:for-each select="ancestor-or-self::*">
22 |         </xsl:for-each>
23 |         <xsl:copy>
24 |             <xsl:apply-templates select="@*|node()"/>
25 |         </xsl:copy>
26 |         <xsl:text disable-output-escaping="True">&lt;/xsl:template&gt;</xsl:text>
27 |     </xsl:template>
28 | 
29 | </xsl:stylesheet>
30 | 


--------------------------------------------------------------------------------
/core/web_service_themes/README:
--------------------------------------------------------------------------------
1 | Warning this directory is available via the web server. Do not store sensitive files here or in subdirectories.
2 | 


--------------------------------------------------------------------------------
/core/web_service_themes/default/docvertedges2-small.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/core/web_service_themes/default/docvertedges2-small.gif


--------------------------------------------------------------------------------
/core/web_service_themes/default/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/core/web_service_themes/default/favicon.ico


--------------------------------------------------------------------------------
/core/web_service_themes/default/index.js:
--------------------------------------------------------------------------------
  1 | if(history) history.navigationMode = 'compatible';
  2 | 
  3 | if(location && location.hash.toString().indexOf("back-reload") != -1){ //see http://stackoverflow.com/questions/158319/cross-browser-onload-event-and-the-back-button
  4 |     window.location.href = (window.location.href.toString().indexOf("#") != -1) ? window.location.href.toString().substring(0, window.location.href.toString().indexOf("#")) : window.location.href.toString()
  5 | }
  6 | 
  7 | var docvert = {
  8 |     slide_in: function(){
  9 |         if(location && location.hash.toString().indexOf("slide-in") != -1){
 10 |             location.hash = ""
 11 |             var form_element = $("form")
 12 |             form_element.css({"top":-form_element.height(), position:"relative"})
 13 |             form_element.animate({top:0}, "slow")
 14 |         }
 15 |     },
 16 | 
 17 |     upload_file_change: function(event){
 18 |         $(event.target).parent().append($(event.target).clone())
 19 |         var text = $(event.target).val()
 20 |         var full_text = ""
 21 |         if(text.length > 25) {
 22 |             full_text = text
 23 |             text = text.substring(0,15) + "\u2026" + text.substring(text.length-10)
 24 |         }
 25 |         var list_item = $("<li>").attr("title",full_text).append($(event.target).attr("id","").css("display","none")).text(text).append(' <a href="#delete-item" class="delete" title="Remove upload item">&times</a>').hide()
 26 |         $("#upload_list").append(list_item)
 27 |         list_item.slideDown()
 28 |         $("#upload_submit").removeClass("disabled").addClass("enabled")
 29 |         $("#submit_error").slideUp()
 30 |         $(".upload_list").slideDown()
 31 |     },
 32 | 
 33 |     upload_file_delete: function(event){
 34 |         var container = $(event.target).parent()
 35 |         if(container.parent().children().length === 1) {
 36 |             $("#upload_submit").addClass("disabled").removeClass("enabled")
 37 |             $(".upload_list").slideUp()
 38 |         }
 39 |         container.slideUp('slow',function(){
 40 |             container.remove()
 41 |         })
 42 |         return false
 43 |     },
 44 | 
 45 |     upload_file_mouseover: function(event) {
 46 |         $("#upload_from_file").addClass("upload_button_hover")
 47 |     },
 48 | 
 49 |     upload_file_mouseout: function(event) {
 50 |         $("#upload_from_file").removeClass("upload_button_hover")
 51 |     },
 52 | 
 53 |     reveal_upload_web_dialog: function(event){
 54 |         var sender = $(event.target)
 55 |         sender_offset = sender.offset()
 56 |         $("#upload_from_web_dialog").show().css({"position":"absolute","left":sender_offset.left+"px","top":(sender_offset.top+sender.height())+"px"})
 57 |         $("#upload_from_web_dialog input").val("http://\u2026").select()
 58 |     },
 59 | 
 60 |     is_url: function(value){
 61 |         //an intentionally rather liberal url detector
 62 |         var url_pattern = /^(ftp|http|https):\/\/.*?\//i
 63 |         return url_pattern.test(value)
 64 |     },
 65 | 
 66 |     hide_upload_web_dialog: function(event){
 67 |         $("#upload_from_web_dialog").hide()
 68 |         var url = $("#upload_from_web_dialog input").val()
 69 |         if(docvert.is_url(url)) {
 70 |             docvert.upload_file_change(event)
 71 |         }
 72 |     },
 73 | 
 74 |     replace_select: function(select, width){
 75 |     },
 76 | 
 77 |     check_submit: function(event){
 78 |         docvert.hide_upload_web_dialog()
 79 |         var should_submit = ($("#upload_list li").length > 0)
 80 |         if(!should_submit) {
 81 |             $("#submit_error").slideDown().find("span").animate({"marginLeft": "50px"}, function(){
 82 |                 $("#submit_error span").animate({"marginLeft": "-50px"}, function(){
 83 |                     $("#submit_error span").animate({"marginLeft": "50px"}, function(){
 84 |                         $("#submit_error").slideDown().find("span").animate({"marginLeft": "-50px"}, function(){
 85 |                             $("#submit_error").slideDown().find("span").animate({"marginLeft": "0px"})
 86 |                         })
 87 |                     })
 88 |                 })
 89 |             })
 90 |             return false
 91 |         }
 92 |         if($("#after_conversion_preview").is(":checked")){
 93 |             location.hash = "back-reload" //see http://stackoverflow.com/questions/158319/cross-browser-onload-event-and-the-back-button
 94 |             var form_element = $("form")
 95 |             form_element.css({"position":"relative"}).animate({"top": -(form_element.offset().top + form_element.height() + 50)},"slow", function(){
 96 |                 $("form").submit()
 97 |             })
 98 |             return false
 99 |         }
100 |     },
101 | 
102 |     click_advanced: function(){
103 |         var inner = $(this).parents("fieldset").find(".inner")
104 |         if(inner.hasClass("closed")) {
105 |             $("span", this).html("&#9660;")
106 |             inner.removeClass("closed").slideDown()
107 |         } else {
108 |             $("span", this).html("&#9654;")
109 |             inner.addClass("closed").slideUp()
110 |         }
111 |         return false
112 |     },
113 | 
114 |     keydown: function(event){
115 |         var escape_key = 27
116 |         if (event.keyCode == escape_key) {
117 |             $("#upload_from_web_dialog").hide()
118 |         }
119 |     },
120 | 
121 |     reset_check_libreoffice_status: function(){
122 |         docvert.number_of_libreoffice_checks_remaining = 10
123 |     },
124 | 
125 |     check_libreoffice_status: function(event) {
126 |         $.ajax({
127 |             url: '/libreoffice-status',
128 |             dataType: 'json',
129 |             success: function(data, textStatus, jqXHR){
130 |                 if(data['libreoffice-status']) {
131 |                     $("#libreOfficeStatus").removeClass("libreOfficeStatus_False").addClass("libreOfficeStatus_True")
132 |                 } else {
133 |                     $("#libreOfficeStatus").removeClass("libreOfficeStatus_True").addClass("libreOfficeStatus_False")
134 |                 }
135 |                 docvert.number_of_libreoffice_checks_remaining -= 1
136 |                 if(docvert.number_of_libreoffice_checks_remaining > 0){
137 |                     docvert.libreoffice_status_timer = setTimeout(docvert.check_libreoffice_status, 1000)
138 |                 }
139 |                 //$("#advanced").text(docvert.number_of_libreoffice_checks_remaining)
140 |             }
141 |         })
142 |     }
143 | }
144 | 
145 | $(document).ready(function(){
146 |     docvert.slide_in()
147 |     $("#upload_submit").addClass("disabled").removeClass("enabled")
148 |     $(".upload_list").hide()
149 |     $(".delete").live("click", docvert.upload_file_delete)
150 |     var upload_file = $("#upload_file")
151 |     upload_file.change(docvert.upload_file_change)
152 |                .mouseover(docvert.upload_file_mouseover)
153 |                .mouseout(docvert.upload_file_mouseout)
154 |     $("#upload_documents label").css({
155 |         "width":upload_file.width() + "px",
156 |         "height": upload_file.height() + "px",
157 |         "margin-right": - upload_file.width() + "px"})
158 |     $("#upload_from_web label").click(docvert.reveal_upload_web_dialog)
159 |     $("#upload_from_web_dialog").hide()
160 |     $("#upload_from_web_dialog input").blur(docvert.hide_upload_web_dialog)
161 |     $("fieldset,#button_tray").width((upload_file.width() * 2) + 30)
162 |     $("#page,form").width((upload_file.width() * 2) + 53)
163 |     $("#upload_submit").click(docvert.check_submit)
164 |     $("select").dropp()
165 |     $("#advanced .inner").addClass("closed").hide()
166 |     $("#advanced legend a").click(docvert.click_advanced)
167 |     docvert.reset_check_libreoffice_status()
168 |     docvert.libreoffice_status_timer = setTimeout(docvert.check_libreoffice_status, 1000)
169 |     $("*").live("focus click", docvert.reset_check_libreoffice_status)
170 |     $("#break_up_pages").change(function(){
171 |         if($(this).is(":checked")){
172 |             $("#autopipelines_options").slideDown().parent()
173 |         } else {
174 |             $("#autopipelines_options").slideUp()
175 |         }
176 |         $("#autopipeline").nextAll(".dropp_dropdown_list").width(upload_file.width() * 2 + 30).css("clear","both")
177 |     }).change()
178 | }).keydown(docvert.keydown)
179 | 
180 | 


--------------------------------------------------------------------------------
/core/web_service_themes/default/index.tpl:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |     <head>
 4 |         <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 5 |         <title>Docvert - Web Service</title>
 6 |         <link rel="stylesheet" type="text/css" href="static/default/screen.css">
 7 |         <script type="text/javascript" src="lib/jquery/jquery-1.5.min.js"></script>
 8 |         <script type="text/javascript" src="static/default/jquery.dropp.js"></script>
 9 |         <script type="text/javascript" src="static/default/index.js"></script>
10 |     </head>
11 |     <body onunload="" class="index-page">
12 |         <ul id="menu">
13 |             <li><a href="tests">Tests</a></li>
14 |             <li class="current"><a href="#index">Web Service</a></li>
15 |         </ul>
16 |         <h1>Doc<span class="syllable">vert</span> <span class="version">6</span> <span class="slogan"><abbr title="Microsoft">MS</abbr>Word to Open Standards</span></h1>
17 |         <form method="post" action="web-service" enctype="multipart/form-data">
18 |             <div id="page">
19 |                 <fieldset id="upload_fieldset">
20 |                     <legend>Upload Documents</legend>
21 |                     <div id="upload_documents">
22 |                         <div id="upload_from_file" class="upload_button">
23 |                             <label for="upload_file"><span>From File</span></label>
24 |                             <input type="file" name="upload_file[]" id="upload_file" multiple="multiple">
25 |                         </div>
26 |                         <div id="upload_from_web" class="upload_button">
27 |                             <label for="upload_web"><span>From Web</span></label>
28 |                         </div>
29 |                     </div>
30 |                     <h2 class="upload_list">Documents to Convert</h2>
31 |                     <ul id="upload_list">
32 |                     </ul>
33 |                 </fieldset>
34 |                 <fieldset id="pipelines">
35 |                     <legend>Theme (<abbr title="Extensible Markup Language">XML</abbr> Pipeline)</legend>
36 |                     <select name="pipeline" id="pipeline">
37 | % for pipeline in pipelines:
38 |                         <option value="{{pipeline['id']}}">{{pipeline['name']}}</option>
39 | % end
40 |                     </select>
41 |                 </fieldset>
42 |                 <fieldset id="autopipelines">
43 |                     <legend>
44 |                         <input type="hidden" name="break_up_pages_ui_version" id="break_up_pages_ui_version" value="2">
45 |                         <label for="break_up_pages">Break over multiple pages? </label><input type="checkbox" name="break_up_pages" id="break_up_pages"/>
46 |                     </legend>
47 |                     <div id="autopipelines_options">
48 |                         <p class="break_pages_note"><span>Please note that some pipelines don't support multiple pages.</span></p>
49 |                         <select name="autopipeline" id="autopipeline">
50 | % for auto_pipeline in auto_pipelines:
51 |                             <option value="{{auto_pipeline['id']}}">{{auto_pipeline['name']}}</option>
52 | % end
53 |                         </select>
54 |                     </div>
55 |                 </fieldset>
56 |                 <div id="upload_from_web_dialog">
57 |                     <input type="text" name="upload_web[]" id="upload_web">
58 |                 </div>
59 |                 <fieldset id="advanced">
60 |                     <legend><a href="#advanced">Advanced <span class="showHide">&#9654;</span></a></legend>
61 |                     <div class="inner">
62 |                          <p id="afterconversion">
63 |                             <label for="after_conversion_preview"><input type="radio" id="after_conversion_preview" name="afterconversion" value="preview" checked="checked">Preview conversion</label> &nbsp;
64 |                             <label for="after_conversion_zip"><input type="radio" id="after_conversion_zip" name="afterconversion" value="zip">Download .ZIP</label>
65 |                         </p>
66 |                     </div>
67 |                 </fieldset>
68 |                 <div id="submit_error">
69 |                     <span>Please choose a file or web <abbr title="uniform resource locator">URL</abbr> to convert</span>
70 |                 </div>
71 |                 <div id="button_tray">
72 |                     <input type="submit" value="Submit" id="upload_submit">
73 |                 </div>
74 |             </div>
75 |             <div id="libreOfficeStatus" class="libreOfficeStatus_{{libreOfficeStatus}}">LibreOffice <span></span></div>
76 |         </form>
77 |         <div id="usageNote">
78 |             <h2>Dear programmers,</h2>
79 |             <p>this form sends files via HTTP POST so do the same in your software to build upon this web service.</p>
80 |         </div>
81 |     </body>
82 | </html>
83 | 


--------------------------------------------------------------------------------
/core/web_service_themes/default/jquery.dropp.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Dropp
  3 |  * http://github.com/matrushka/Dropp
  4 |  * @requires jQuery v1.3 or later
  5 |  * 
  6 |  * Dropp is a jQuery plugin which replaces regular droprown menus ( <select> elements ) with stylable alternatives.
  7 |  *
  8 |  * 2010 - Baris Gumustas
  9 |  */
 10 | (function ($) {
 11 | 	$.fn.dropp = function (user_settings) {
 12 | 		var settings = {
 13 | 			'phrase_on_multiple'          : false,
 14 | 			'class_dropdown_wrapper'      : 'dropdown_wrapper',
 15 | 			'class_dropdown_list'         : 'dropdown_list',
 16 | 			'class_visible_dropdown'      : 'dropdown',
 17 | 			'class_option_selected'       : 'selected',
 18 | 			'substract_list_border_width' : true
 19 | 		};
 20 | 		if (user_settings) {
 21 | 			$.extend(settings, user_settings);
 22 | 		}
 23 | 		return this.each(function () {
 24 | 			var select, dropdown, list, values, list_width, widest_element;
 25 | 			widest_element = null;
 26 | 			
 27 | 			select = $(this);
 28 | 			select.hide();
 29 | 			select.wrap('<div></div>').parent().attr('class', select.attr('class')).addClass(settings.class_dropdown_wrapper);
 30 | 			
 31 | 			dropdown = $('<a href="#"/>').addClass(settings.class_visible_dropdown).appendTo(select.parent());
 32 | 			list = $('<ul/>').addClass(settings.class_dropdown_list).addClass('dropp_dropdown_list').hide().appendTo(select.parent());
 33 | 			
 34 | 			// duplicate this line for dropdown opening
 35 | 			list_width = dropdown.get(0).offsetWidth;
 36 | 			
 37 | 			if (settings.substract_list_border_width) {
 38 | 				list_width -= (parseInt(list.css('borderLeftWidth'), 10) + parseInt(list.css('borderRightWidth'), 10));
 39 | 			}
 40 | 			
 41 | 			list.css('min-width', list_width);
 42 | 			
 43 | 			list.css('position', 'absolute').css('z-index', '9999');
 44 | 			
 45 | 			select.find('option').each(function () {
 46 | 				var item, list, list_item, link;
 47 | 				item = $(this);
 48 | 				list = item.closest('.' + settings.class_dropdown_wrapper).find('ul.dropp_dropdown_list');
 49 | 				list_item = $('<li/>').appendTo(list);
 50 | 				link = $('<a href="#"/>').text(item.text());
 51 | 				
 52 | 				link.data('option', item);
 53 | 				list_item.append(link);
 54 | 				item.data('replacement', link);
 55 | 				
 56 | 				if (typeof select.attr('multiple') !== undefined && (select.attr('multiple') === true || select.attr('multiple') === 'multiple')) {
 57 | 					if (typeof item.attr('selected') !== undefined && (item.attr('selected') === true || item.attr('selected') === 'selected')) {
 58 | 						link.addClass(settings.class_option_selected);
 59 | 					}
 60 | 				}
 61 | 				
 62 | 				// Select Event Listener
 63 | 				link.bind('select', function (event, trigger_drowndown) {
 64 | 					var link, wrapper, item, select, dropdown, values;
 65 | 					link = $(this);
 66 | 					wrapper = link.closest('.' + settings.class_dropdown_wrapper);
 67 | 					item = link.data('option');
 68 | 					select = wrapper.find('select');
 69 | 					dropdown = wrapper.find('.' + settings.class_visible_dropdown);
 70 | 					
 71 | 					if (typeof select.attr('multiple') === 'undefined' || select.attr('multiple') === false) {
 72 | 						select.find('option:selected').removeAttr('selected');
 73 |                         var selected_text = $('<div/>').text($(this).text()).html();
 74 |     					dropdown.html(selected_text + '<span class="ddl_icon">&#9660;</span>');
 75 | 						//dropdown.text($(this).text() + "-");
 76 | 						item.attr('selected', 'selected');
 77 | 						list.hide();
 78 | 					} else {
 79 | 						if (typeof item.attr('selected') === 'undefined' || item.attr('selected') === false) {
 80 | 							item.attr('selected', 'selected');
 81 | 							link.addClass(settings.class_option_selected);
 82 | 						} else {
 83 | 							item.removeAttr('selected');
 84 | 							link.removeClass(settings.class_option_selected);
 85 | 						}
 86 | 						
 87 | 						values = [];
 88 | 						select.find('option:selected').each(function () {
 89 | 							values.push($(this).text());
 90 | 						});
 91 | 						
 92 | 						if (values.length === 0) {
 93 | 							if (typeof select.attr('placeholder') !== 'undefined') {
 94 | 								dropdown.text(select.attr('placeholder'));
 95 | 							} else {
 96 | 								dropdown.html('&nbsp;');
 97 | 							}
 98 | 						} else {
 99 | 							if (values.length > 1 && settings.phrase_on_multiple) {
100 | 								dropdown.text(settings.phrase_on_multiple);
101 | 							} else {
102 | 								dropdown.text(values.join(', '));
103 | 							}
104 | 						}
105 | 					}
106 | 
107 | 					if (trigger_drowndown) {
108 | 						select.trigger('change');
109 | 					}
110 | 				});
111 | 				// Click Event
112 | 				link.click(function () {
113 | 					$(this).trigger('select', [true]);
114 | 					return false;
115 | 				});
116 | 			});
117 | 			
118 | 
119 | 			// Check for IE and apply a hack here for min-width problems
120 | 			if ($.browser.msie && $.browser.version === '6.0') {
121 | 				// Look for the widest option
122 | 				list.find('a').each(function(){
123 | 					if (widest_element === null || widest_element.width() < $(this).width()) {
124 | 						widest_element = $(this);
125 | 					}
126 | 				});
127 | 				if (widest_element.width() > list_width) {
128 | 					list.width(widest_element.width());
129 | 				} else {
130 | 					list.width(list_width);
131 | 				}
132 | 				
133 | 			}
134 | 			
135 | 			// Each loop ends here
136 | 			if (select.find('option:selected').length === 0) {
137 | 				if (typeof select.attr('placeholder') !== 'undefined') {
138 | 					dropdown.text(select.attr('placeholder'));
139 | 				} else {
140 | 					dropdown.html('&nbsp;');
141 | 				}
142 | 			} else {
143 | 				if (typeof select.attr('multiple') !== undefined && (select.attr('multiple') === true || select.attr('multiple') === 'multiple')) {
144 | 					values = [];
145 | 					select.find('option:selected').each(function () {
146 | 						values.push($(this).text());
147 | 					});
148 | 					dropdown.html(values.join(', '));
149 | 				} else {
150 |                     var selected_text = $('<div/>').text($(this).find('option:selected').text()).html();
151 | 					dropdown.html(selected_text + '<span class="ddl_icon">&#9660;</span>');
152 | 				}
153 | 			}
154 | 			
155 | 			dropdown.click(function () {
156 | 				if (list.is(':visible')) {
157 | 					list.hide();
158 | 					$('ul.dropp_dropdown_list').hide();
159 | 				} else {
160 | 					$('ul.dropp_dropdown_list').hide();
161 | 					list.show();
162 | 				}
163 | 				return false;
164 | 			});
165 | 			
166 | 			$(document).click(function () {
167 | 				list.hide();
168 | 			});
169 | 			
170 | 			$('.' + settings.class_dropdown_wrapper).click(function (event) {
171 | 				event.stopPropagation();
172 | 			});
173 | 		});
174 | 	};
175 | }(jQuery));
176 | 


--------------------------------------------------------------------------------
/core/web_service_themes/default/loading.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/core/web_service_themes/default/loading.gif


--------------------------------------------------------------------------------
/core/web_service_themes/default/preview.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     background:white;
 3 |     color:black;
 4 |     font-family:Helvetica, Tahoma;
 5 |     }
 6 | 
 7 | .page {
 8 |     float:right;
 9 |     width:70%;
10 |     }
11 | 
12 | #pagesMenu {
13 |     float:left;
14 |     width:30%;
15 | }
16 |     #pagesMenu h1,
17 |     #pagesMenu li {
18 |         font-size:small;
19 |     }
20 | 
21 |     #pagesMenu ul {
22 |         margin:0px;
23 |         padding:0px;
24 |     } 
25 | 
26 | #nextPreviousMenu {
27 |     border-top: solid 1px #cccccc;
28 |     float:right;
29 |     width:70%;
30 |     text-align:left;
31 | }
32 |     #nextPreviousMenu ul {
33 |         list-style:none;
34 |         margin:0px;
35 |         padding:0px;
36 |     }
37 | 
38 |     #nextPreviousMenu h1,
39 |     #nextPreviousMenu li {
40 |         font-size:small;
41 |     }
42 | 
43 | 
44 | h1,
45 | h2,
46 | h3,
47 | h4,
48 | h5,
49 | h6 {
50 |     margin:1em 0px 0px 0px;
51 |     }
52 | 
53 | .align-left {
54 |     text-align:left;
55 |     }
56 | 
57 | .align-center {
58 |     text-align:center;
59 |     }
60 | 
61 | 
62 | .align-right {
63 |     text-align:right;
64 |     }
65 | 


--------------------------------------------------------------------------------
/core/web_service_themes/default/tests.js:
--------------------------------------------------------------------------------
 1 | var docvert = {
 2 |     click_test: function(event) {
 3 |         var list = $(event.target).parent()
 4 |         var sublist = list.find("ul")
 5 |         if(sublist.length == 0) {
 6 |             sublist = $("<ul/>")
 7 |         } else {
 8 |             sublist.empty()
 9 |         }
10 |         list.append(sublist)
11 |         $(list).find(".testSummary").removeClass("pass fail").addClass("result").text("?")
12 |         sublist.html("<li>please wait...</li>").slideDown()
13 |         $.ajax({
14 |             url: $(event.target).attr("href") + '?suppress_error=true',
15 |             dataType: 'json',
16 |             error: docvert.error,
17 |             success: function(data, textStatus, jqXHR){
18 |                 if(sublist.length != 1) {
19 |                     return alert("Can't find a sublist.")
20 |                 }
21 |                 sublist.slideUp(function(){
22 |                     sublist.empty()
23 |                     var test_count = {"pass":0,"fail":0}
24 |                     var list_items_pass = "";
25 |                     var list_items_fail = "";
26 |                     $(data).each(function(key,value){
27 |                         var text;
28 |                         if(value.status == "fail") {
29 |                             test_count.fail++
30 |                             text = "&#x2718;";
31 |                             list_items_fail += $('<li><span class="' + value.status + '">' + text + '</span>' + $('<div/>').text(value.message).html() + '</li>').wrap("<div/>").parent().html()
32 |                         } else {
33 |                             test_count.pass++
34 |                             text = "&#x2714;";
35 |                             list_items_pass += $('<li><span class="' + value.status + '">' + text + '</span>' + $('<div/>').text(value.message).html() + '</li>').wrap("<div/>").parent().html()
36 |                         }
37 |                     })
38 |                     sublist.append(list_items_fail + list_items_pass)
39 |                     var maximum_rows = 6
40 |                     if(test_count.fail + test_count.pass > maximum_rows) {
41 |                         sublist.css("height","0px").show().animate({"display":"block","height":(maximum_rows * 20)+"px"})
42 |                     } else {
43 |                         sublist.slideDown()
44 |                     }
45 |                     var test_status = (test_count.fail > 0) ? "fail" : "pass"
46 |                     var text = (test_status == "pass") ? "&#x2714;" : "&#x2718;"
47 |                     sublist.parent().find(".testSummary").removeClass("result pass fail").addClass(test_status).html(text).attr("title", test_count.pass + " pass, " + test_count.fail + " fail")
48 |                 })
49 |             }
50 |         })  
51 |         return false
52 |     },
53 | 
54 |     error: function(){
55 |         alert("Unable to make AJAX request")
56 |     },
57 | 
58 |     run_all: function(event) {
59 |         $("ul.tests a").click()
60 |         return false
61 |     }
62 | 
63 | }
64 | 
65 | $(document).ready(function(){
66 |     $("ul.tests a").click(docvert.click_test)
67 |     $("#run-all a").click(docvert.run_all)
68 | })
69 | 
70 | 


--------------------------------------------------------------------------------
/core/web_service_themes/default/tests.tpl:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |     <head>
 4 |         <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 5 |         <title>Docvert - Tests</title>
 6 |         <link rel="stylesheet" type="text/css" href="static/default/screen.css">
 7 |         <script type="text/javascript" src="lib/jquery/jquery-1.5.min.js"></script>
 8 |         <script type="text/javascript" src="static/default/jquery.dropp.js"></script>
 9 |         <script type="text/javascript" src="static/default/tests.js"></script>
10 |     </head>
11 |     <body onunload="" class="tests-page">
12 |         <ul id="menu">
13 |             <li class="current"><a href="#tests">Tests</a></li>
14 |             <li><a href="index">Web Service</a></li>
15 |         </ul>
16 |         <h1><a href="index">Doc<span class="syllable">vert</span> <span class="version">6</span> <span class="slogan"><abbr title="Microsoft">MS</abbr>Word to Open Standards</span></a></h1>
17 |         <h2>Tests <span id="run-all">(<a href="#run-all">run all</a>)</span></h2>
18 |         <ul class="tests">
19 | % for pipeline in tests:
20 |             <li id="test-{{pipeline['id']}}"><a href="/web-service/tests/{{pipeline['id']}}" title="Perform {{pipeline['name']}} tests"><span class="result testSummary">?</span>  {{pipeline['name']}} <span class="ratio"></span></a></li>
21 | % end
22 |         </ul>
23 |     </body>
24 | </html>
25 | 


--------------------------------------------------------------------------------
/core/web_service_themes/default/upload_computer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/core/web_service_themes/default/upload_computer.png


--------------------------------------------------------------------------------
/core/web_service_themes/default/web-service.js:
--------------------------------------------------------------------------------
 1 | var docvert = {
 2 |     buffer: {"width":20, "height":50},
 3 |     resize_prefix_box: function(event){
 4 |         var iframe_element = $("iframe")
 5 |         var iframe_offset = iframe_element.offset()
 6 |         
 7 |         iframe_element.height( $(window).height() - iframe_offset.top - docvert.buffer.height).width($(window).width() - iframe_offset.left - docvert.buffer.width)
 8 |         iframe_element.css({"top":-iframe_element.height(), position:"relative"})
 9 |         iframe_element.animate({top:0}, "slow")
10 |     },
11 |     click_back: function(event){
12 |         var iframe_element = $("iframe")
13 |         iframe_element.animate({top: -(iframe_element.height() + (docvert.buffer.height * 3))}, "slow", function(){
14 |             window.location.href = "index"
15 |         })
16 |         return false
17 |     }
18 | }
19 | 
20 | $(document).ready(function(){
21 |     $(window).resize(docvert.resize_prefix_box).resize()
22 |     $(".back-link a").click(docvert.click_back)
23 | })
24 | 


--------------------------------------------------------------------------------
/core/web_service_themes/default/web-service.tpl:
--------------------------------------------------------------------------------
 1 | % from urllib.parse import quote
 2 | <!DOCTYPE html>
 3 | <html>
 4 |     <head>
 5 |         <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 6 |         <title>Docvert - Web Service</title>
 7 |         <link rel="stylesheet" type="text/css" href="static/default/screen.css">
 8 |         <script type="text/javascript" src="lib/jquery/jquery-1.5.min.js"></script>
 9 |         <script type="text/javascript" src="static/default/jquery.dropp.js"></script>
10 |         <script type="text/javascript" src="static/default/web-service.js"></script>
11 |     </head>
12 |     <body class="web-service-page">
13 |         <ul id="menu">
14 |             <li><a href="tests">Tests</a></li>
15 |             <li><a href="#index">Web Service</a></li>
16 |         </ul>
17 |         <h1>Doc<span class="syllable">vert</span> <span class="version">6</span> <span class="slogan"><abbr title="Microsoft">MS</abbr>Word to Open Standards</span></h1>
18 |         <ul id="conversion-navigation-tabs">
19 |             <li class="back-link"><a href="/index#slide-in">&#x25C2; back</a></li>
20 |             <li class="zip-download"><a href="conversions-zip/{{conversion_id}}">Download ZIP</a></li>
21 |         </ul>
22 |         <ul id="conversion-tabs">
23 | % for filename, conversion in conversions.items():
24 |             <li><a href="conversions/{{conversion_id}}/{{filename}}/" title="{{filename}} via {{conversion['pipeline']}}/{{conversion['auto_pipeline']}}" target="preview">{{conversion['friendly_name']}}</a></li>
25 | % end
26 |         </ul>
27 |         <iframe id="preview" name="preview" src="{{ quote(first_document_url) }}">
28 |         </iframe>
29 |     </body>
30 | </html>
31 | 


--------------------------------------------------------------------------------
/doc/how-to-write-themes.txt:
--------------------------------------------------------------------------------
  1 | How to Write Themes
  2 | ===================
  3 | 
  4 | 
  5 | Concepts
  6 | --------
  7 | 
  8 | If you don't know XSLT, well, you'll need to know XSLT. It's a
  9 | popular W3C standard for messing around with XML, so find a
 10 | tutorial and learn it because it's integral to theming in Docvert
 11 | and this "How to Write Themes" document doesn't teach you it.
 12 | 
 13 | Now that you know XSLT please do continue reading,
 14 | 
 15 | Docvert is built around "XML Pipelines" which are simply a
 16 | series of conversion stages, where the results of one stage
 17 | are passed to the next.
 18 | 
 19 | For example,
 20 | 
 21 | 	1)	world-news.xml
 22 | 			|
 23 | 	2)	filter-news-by-New-Zealand.xslt
 24 | 			|
 25 | 	3)	reformat-into-HTML.xslt
 26 | 			|
 27 | 	4)	Save the result to file "index.html"
 28 | 
 29 | The file from (1) is passed to (2) which makes some conversion.
 30 | The New Zealand results from (2) are passed as input to (3)
 31 | (Step 3 does not have access to world-news.xml, only the
 32 | result of step 2). Finally, the results are saved in step (4).
 33 | 
 34 | XML Pipelines are also known as XML Chains.
 35 | 
 36 | This is useful because - like in other types of programming
 37 | - you can separate out conversion steps into components and
 38 | reuse them in other pipelines (Eg, have two pipelines that
 39 | share the "filter-news-by-New-Zealand.xslt").
 40 | 
 41 | So writing Docvert themes is about learning Docverts particular
 42 | XML Pipeline syntax, and about writing the XSLT for each stage
 43 | to generate the XML/HTML that you want.
 44 | 
 45 | 
 46 | Pipelines
 47 | ---------
 48 | 
 49 | So far as Docvert's concerned a pipeline is a theme. And a
 50 | theme is a pipeline.
 51 | 
 52 | Each pipeline gets its own directory under "pipelines".
 53 | In there Docvert will look for a pipeline.xml file which
 54 | defines the XML Pipeline (the conversion stages). Each pipeline
 55 | can only have one of these pipeline.xml files. If you want another
 56 | one you'll have to make another directory.
 57 | 
 58 | Lets look at a simple example,
 59 | 
 60 | 	<?xml version="1.0" encoding="UTF-8"?>
 61 | 	<pipeline>
 62 | 		<stage process="TransformToDocBook"/>
 63 | 		<stage process="Transform" withFile="webstyle.xsl"/>
 64 | 		<stage process="Serialize" toFile="index.html"/>
 65 | 	</pipeline>
 66 | 
 67 | It doesn't matter if you don't understand this yet, just
 68 | see that there's a series of stages, each with a process
 69 | attribute.
 70 | 
 71 | -	The first process converts the file to DocBook. This is a
 72 | 	built-in process and is a feature of Docvert.
 73 | 
 74 | -	The second process passes the results of the first stage
 75 | 	through webstyle.xsl.
 76 | 
 77 | -	The last process saves this to "index.html".
 78 | 
 79 | Get it?
 80 | 
 81 | Let's assume so.
 82 | 
 83 | And try something harder. Here's "standard-break-up-on-heading1".
 84 | Try opening up its pipeline.xml and you'll find something like...
 85 | 
 86 | 	<?xml version="1.0" encoding="UTF-8"?>
 87 | 	<pipeline>
 88 | 		<stage process="TransformToDocBook"/>
 89 | 		<stage process="Loop" numberOfTimes="xpathCount://sect1">
 90 | 			<stage process="SplitPages"/>
 91 | 			<stage process="Transform" withFile="webstyle.xsl"/>
 92 | 			<stage process="Serialize" toFile="section{LoopIndex}.html"/>
 93 | 		</stage>
 94 | 		<stage process="GetPreface" forSectionLevel="0" splitPagesDepth="1"/>
 95 | 		<stage process="Transform" withFile="webstyle.xsl"/>
 96 | 		<stage process="Serialize" toFile="index.html"/>
 97 | 	</pipeline>
 98 | 
 99 | To explain, all pipelines begins with an Oasis OpenDocument, which is
100 | then converted to DocBook. It then loops a number of times based on
101 | the number of "sect1" tags in the document in order to pull out
102 | every section and transform it with "webstyle.xsl" and then serialize (save)
103 | each section to sequentially numbered file.
104 | 
105 | Now have a look at the GetPreface stage... Loops are effectively branches
106 | and their transformations don't affect the main pipeline's XML.
107 | This means you can use them to extract chapters, style and serialize,
108 | without affecting the rest.
109 | So the GetPreface stage is NOT affected by the transformations occuring
110 | above, in the loop.
111 | 
112 | The GetPreface extracts the preface, which in the following two stages is
113 | styled by webstyle.xsl and then serialized (saved) to index.html.
114 | 
115 | When creating pipelines valid processes are "TransformToDocBook",
116 | "Transform", "Serialize", "Loop", "SplitPages", and "GetPreface".
117 | 
118 | If you wish to add a custom process, see under ~core/pipeline_type (it's
119 | recommended that anything you can do in XSLT, but writing your own
120 | Python plugins that manipulation the XML is allowed)
121 | 
122 | If you wish to pass any parameters to your XSLT, just include them as
123 | XML attributes in the pipeline and they will be made available. Eg,
124 | 
125 | 	<stage process="Transform" myParam="some value"/>
126 | 
127 | Will make a parameter named "myParam" with a value of "some value"
128 | for your XSLT.
129 | 
130 | 
131 | 


--------------------------------------------------------------------------------
/doc/sample/sample-document.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/doc/sample/sample-document.doc


--------------------------------------------------------------------------------
/doc/sample/sample-document.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/doc/sample/sample-document.odt


--------------------------------------------------------------------------------
/docvert-cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | import sys
 4 | import io
 5 | import uuid
 6 | import os.path
 7 | import copy
 8 | import argparse
 9 | import tempfile
10 | import core.docvert
11 | import core.docvert_storage
12 | import core.docvert_exception
13 | 
14 | version = core.docvert.version
15 | pipeline_types = core.docvert.get_all_pipelines()
16 | auto_pipelines = []
17 | default_auto_pipeline = None
18 | for auto_pipeline in pipeline_types['auto_pipelines']:
19 |     auto_pipelines.append(auto_pipeline["id"])
20 |     if auto_pipeline["id"].endswith(".default"):
21 |         default_auto_pipeline = auto_pipeline["id"]
22 | 
23 | class PrintPipelines(argparse.Action):
24 |     def __call__(self, parser, namespace, values, option_string=None):
25 |         print("List of all pipelines\n---------------------")
26 |         for pipeline_type, pipelines in pipeline_types.items():
27 |             print("type: %s" % pipeline_type)
28 |             for pipeline in pipelines:
29 |                 print("      - %s" % pipeline['id'])
30 |             print("")
31 |         exit()
32 | 
33 | parser = argparse.ArgumentParser(description='Converts Office files to OpenDocument, DocBook and HTML.', epilog='E.g.: ./docvert-cli.py doc/sample/sample-document.doc -p="web standards"')
34 | parser.add_argument('--version', '-v', action='version', version='Docvert %s' % version)
35 | parser.add_argument('infile', nargs='?', type=argparse.FileType('rb'), help='Path or Stdin of Office file to convert', default=sys.stdin.buffer    )
36 | parser.add_argument('--pipeline', '-p', help='Pipeline you wish to use.', required=True)
37 | parser.add_argument('--response', '-r', help='Format of ZIP conversion response.', default='auto', choices=['auto','path','stdout'])
38 | parser.add_argument('--autopipeline', '-a', help='AutoPipeline to use (when your pipeline requires it).', default=default_auto_pipeline, choices=auto_pipelines)
39 | parser.add_argument('--url', '-u', help='URL to download and convert. Must be an Office file.')
40 | parser.add_argument('--list-pipelines', '-l', action=PrintPipelines, help='List all pipeline types', nargs=0)
41 | parser.add_argument('--pipelinetype', '-t', help='Pipeline type you wish to use.', default='pipelines', choices=list(pipeline_types.keys()))
42 | 
43 | args = parser.parse_args() #stops here if there were no args or if they asked for --help
44 | 
45 | def process_commands(filesdata, pipeline_id, pipeline_type, auto_pipeline_id, after_conversion, url):
46 |     if hasattr(filesdata, 'read'):
47 |         filesdata = [io.BytesIO(filesdata.read())]
48 |     docvert_4_default = '.default'
49 |     if auto_pipeline_id and auto_pipeline_id.endswith(docvert_4_default):
50 |         auto_pipeline_id = auto_pipeline_id[0:-len(docvert_4_default)]
51 |     files = dict()
52 |     file_index = 1
53 |     for filedata in filesdata:
54 |         files['document-%i.doc' % file_index] = filedata
55 |         file_index += 1
56 |     urls = list()
57 |     if url != None:
58 |         urls.append(url)
59 |     try:
60 |         response = core.docvert.process_conversion(files, urls, pipeline_id, pipeline_type, auto_pipeline_id)
61 |     except core.docvert_exception.debug_exception as exception:
62 |         print("%s: %s" % (exception, exception.data), file=sys.stderr)
63 |     #TODO: when after_conversion="auto"
64 |     if after_conversion == "stdout":
65 |         print(response.to_zip().getvalue(), file=sys.stdout)
66 |         exit()
67 |     os_handle, zip_path = tempfile.mkstemp()
68 |     zip_handler = open(zip_path, 'wb')
69 |     zip_handler.write(response.to_zip().getvalue())
70 |     zip_handler.close()
71 |     os.rename(zip_path, "%s.zip" % zip_path)
72 |     print("Success! ZIP conversion at: %s.zip" % zip_path)
73 | 
74 | 
75 | 
76 | process_commands(args.infile, args.pipeline, args.pipelinetype, args.autopipeline, args.response, args.url)
77 | 


--------------------------------------------------------------------------------
/docvert-web.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | import sys
  4 | import io
  5 | import uuid
  6 | import os.path
  7 | import socket
  8 | import optparse
  9 | import cgi
 10 | docvert_root = os.path.dirname(os.path.abspath(__file__))
 11 | inbuilt_bottle_path = os.path.join(docvert_root, 'lib/bottle')
 12 | try:
 13 |     import bottle
 14 |     if not hasattr(bottle, 'static_file'):
 15 |         message = "Notice: Old version of Bottle at %s, instead using bundled version at %s%sbottle.py" % (bottle.__file__, inbuilt_bottle_path, os.sep)
 16 |         print(message)
 17 |         raise ImportError(message)
 18 | except ImportError as exception:
 19 |     try:
 20 |         sys.path.insert(0, inbuilt_bottle_path)
 21 |         try:
 22 |             reload(bottle)
 23 |         except NameError:
 24 |             import bottle
 25 |     except ImportError:
 26 |         sys.stderr.write("Error: Unable to find Bottle libraries in %s. Exiting...\n" % sys.path)
 27 |         sys.exit(0)
 28 | import lib.bottlesession.bottlesession
 29 | bottle.debug(True)
 30 | import core.docvert
 31 | import core.docvert_storage
 32 | import core.docvert_exception
 33 | import core.document_type
 34 | 
 35 | # START DEFAULT CONFIG
 36 | theme='default'
 37 | host='localhost'
 38 | port=8080
 39 | # END CONFIG
 40 | parser = optparse.OptionParser()
 41 | parser.add_option("-p", "--port", dest="port", help="Port to run on", type="int")
 42 | parser.add_option("-H", "--host", dest="host", help="Hostname or IP run on", type="str")
 43 | (options, args) = parser.parse_args()
 44 | if options.port:
 45 |     port = options.port
 46 | if options.host:
 47 |     host = options.host
 48 | theme_directory='%s/core/web_service_themes' % docvert_root
 49 | bottle.TEMPLATE_PATH.append('%s/%s' % (theme_directory, theme))
 50 | 
 51 | # URL mappings
 52 | 
 53 | @bottle.route('/index', method='GET')
 54 | @bottle.route('/', method='GET')
 55 | @bottle.view('index')
 56 | def index():
 57 |     return dict(list(core.docvert.get_all_pipelines(False).items()) + list({"libreOfficeStatus": core.docvert_libreoffice.checkLibreOfficeStatus()}.items()) )
 58 | 
 59 | @bottle.route('/static/:path#.*#', method='GET')
 60 | def static(path=''):
 61 |     return bottle.static_file(path, root=theme_directory)
 62 | 
 63 | @bottle.route('/lib/:path#.*#', method='GET')
 64 | def libstatic(path=None):
 65 |     return bottle.static_file(path, root='%s/lib' % docvert_root)
 66 | 
 67 | @bottle.route('/web-service.php', method='POST') #for legacy Docvert support
 68 | @bottle.route('/web-service', method='POST')
 69 | @bottle.view('web-service')
 70 | def webservice():
 71 |     files = dict()
 72 |     first_document_id = None
 73 |     there_was_at_least_one_thing_uploaded = False
 74 |     for key, item in bottle.request.files.items():
 75 |         there_was_at_least_one_thing_uploaded = True
 76 |         items = bottle.request.files.getall(key)
 77 |         for field_storage in items:
 78 |             filename = field_storage.filename
 79 |             unique = 1
 80 |             if filename in files and files[filename].getvalue() == field_storage.value: #remove same file uploaded multiple times
 81 |                 continue
 82 |             while filename in files:
 83 |                 filename = field_storage.filename + str(unique)
 84 |                 unique += 1
 85 |             files[filename] = io.BytesIO(field_storage.value)
 86 |     pipeline_id = bottle.request.POST.get('pipeline')
 87 |     if pipeline_id.startswith('autopipeline:'): #Docvert 4.x
 88 |         pipeline_id = pipeline_id[len('autopipeline:'):]
 89 |     auto_pipeline_id = None
 90 |     if bottle.request.POST.get('break_up_pages_ui_version'):
 91 |         if bottle.request.POST.get('break_up_pages'):
 92 |             auto_pipeline_id = bottle.request.POST.get('autopipeline')
 93 |         if auto_pipeline_id is None:
 94 |             pipelines = list(core.docvert.get_all_pipelines().items())
 95 |             for pipelinetype_key, pipelinetype_value in pipelines:
 96 |                 if pipelinetype_key == "auto_pipelines":
 97 |                     for pipeline in pipelinetype_value:
 98 |                         if "nothing" in pipeline["id"].lower():
 99 |                             auto_pipeline_id = pipeline["id"]
100 |     else:
101 |         auto_pipeline_id = bottle.request.POST.get('autopipeline')
102 |     docvert_4_default = '.default'
103 |     if auto_pipeline_id and auto_pipeline_id.endswith(docvert_4_default):
104 |         auto_pipeline_id = auto_pipeline_id[0:-len(docvert_4_default)]
105 |     after_conversion = bottle.request.POST.get('afterconversion')
106 |     urls = bottle.request.POST.getall('upload_web[]')
107 |     if len(urls) == 1 and urls[0] == '':
108 |         urls = list()
109 |     else:
110 |         urls = set(urls)
111 |     response = None
112 |     if there_was_at_least_one_thing_uploaded is False: #while we could have counted len(files) or len(urls) the logic around those is more complex, and I don't want to show this error unless there was genuinely no files uploaded
113 |         bottle.response.content_type = "text/html"
114 |         return '<!DOCTYPE html><html><body><h1>Error: No files were uploaded</h1><p>Known issues that can cause this:</p><ul><li>Permissions problem on the server or browser: Try ensuring that your upload file has all read permissions set.</li><li>Chrome/Chromium can sometimes cause file upload problems (some combination of Chrome/Bottle, it\'s not a Docvert-specific bug). Sorry, but Firefox seems to work.</li></ul><hr><a href="/">Try again?</a></body></html>'
115 |     try:
116 |         response = core.docvert.process_conversion(files, urls, pipeline_id, 'pipelines', auto_pipeline_id, suppress_errors=True)
117 |     except core.docvert_exception.debug_exception as exception:
118 |         bottle.response.content_type = exception.content_type
119 |         return exception.data
120 |     conversion_id = "%s" % uuid.uuid4()
121 |     if after_conversion == "downloadZip" or after_conversion == "zip":
122 |         bottle.response.content_type = 'application/zip'
123 |         bottle.response.headers['Content-Disposition'] = 'attachment; filename="%s.zip"' % response.get_zip_name()
124 |         return response.to_zip().getvalue()
125 |     pipeline_summary = "%s (%s)" % (pipeline_id, auto_pipeline_id)
126 |     session_manager = lib.bottlesession.bottlesession.PickleSession()
127 |     session = session_manager.get_session()
128 |     session[conversion_id] = response
129 |     conversions_tabs = dict()
130 |     first_document_url = "conversions/%s/%s/" % (conversion_id, response.default_document)
131 |     for filename in list(files.keys()):
132 |         thumbnail_path = "%s/thumbnail.png" % filename
133 |         if thumbnail_path in response:
134 |             thumbnail_path = None
135 |         conversions_tabs[filename] = dict(friendly_name=response.get_friendly_name_if_available(filename), pipeline=pipeline_id, auto_pipeline=auto_pipeline_id, thumbnail_path=thumbnail_path)
136 |     try:
137 |         session_manager.save(session)
138 |     except OSError as e:
139 |         import traceback
140 |         traceback.print_exc(file=sys.stdout)
141 |         conversions_tabs = {'Session file problem': dict(friendly_name='Session file problem', pipeline=None, auto_pipeline=None, thumbnail_path=None) }
142 |         first_document_url = "/bottle_session_file_problem"
143 |     return dict(conversions=conversions_tabs, conversion_id=conversion_id, first_document_url=first_document_url)
144 | 
145 | @bottle.route('/favicon.ico', method='GET')
146 | def favicon():
147 |     return bottle.static_file('favicon.ico', root='%s/%s' % (theme_directory, theme))
148 | 
149 | @bottle.route('/bottle_session_file_problem', method='GET')
150 | def bottle_session_file_problem():
151 |     print('%s/lib/bottle' % docvert_root)
152 |     return bottle.static_file('bottle_session_file_problem.html', root='%s/lib/bottle' % docvert_root)
153 | 
154 | @bottle.route('/conversions/:conversion_id/:path#.*#')
155 | def conversion_static_file(conversion_id, path):
156 |     session_manager = lib.bottlesession.bottlesession.PickleSession()
157 |     session = session_manager.get_session()
158 |     if conversion_id not in session: # They don't have authorisation
159 |         raise bottle.HTTPError(code=404)
160 |     filetypes = {".xml":"text/xml", ".html":"text/html", ".xhtml":"text/html", ".htm":"text/html", ".svg":"image/svg+xml", ".txt":"text/plain", ".png":"image/png", ".gif":"image/gif", ".bmp":"image/x-ms-bmp", ".jpg":"image/jpeg", ".jpe":"image/jpeg", ".jpeg":"image/jpeg", ".css":"text/css", ".js":"text/javascript", ".odt":"application/vnd.oasis.opendocument.text", ".odp":"application/vnd.oasis.opendocument.presentation", ".ods":"application/vnd.oasis.opendocument.spreadsheet", ".dbk":"application/docbook+xml"}
161 |     if path not in session[conversion_id]: # They have authorisation but that exact path doesn't exist, try fallbacks
162 |         fallbacks = ["index.html", "index.htm", "index.xml", "index.php", "default.htm", "default.html", "index.asp", "default.aspx", "index.aspx", "default.aspx", "index.txt", "index.odt", "default.odt", "index.dbk", "default.dbk"]
163 |         valid_fallback_path = None
164 |         separator = "/"
165 |         if path.endswith("/"):
166 |             separator = ""
167 |         for fallback in fallbacks:
168 |             fallback_path = path+separator+fallback
169 |             if fallback_path in session[conversion_id]:
170 |                 valid_fallback_path = fallback_path
171 |                 break
172 |         if valid_fallback_path is None:
173 |             raise bottle.HTTPError(code=404)
174 |         path = valid_fallback_path
175 |         extension = os.path.splitext(path)[1]
176 |         if extension == ".odt":
177 |             bottle.response.content_type = filetypes[".html"]
178 |             link_html = 'click here to download %s' % cgi.escape(os.path.basename(path))
179 |             thumbnail_path = "%s/thumbnail.png" % path[0:path.rfind("/")]
180 |             if thumbnail_path in session[conversion_id]:
181 |                 link_html = '<img src="thumbnail.png"><br>' + link_html
182 |             return '<!DOCTYPE html><html><head><title>%s</title><style type="text/css">body{font-family:sans-serif;font-size:small} a{text-decoration:none} p{text-align:center} img{clear:both;border: solid 1px #cccccc}</style></head><body><p><a href="%s">%s</a></p></body></html>' % (
183 |                 cgi.escape(path),
184 |                 cgi.escape(os.path.basename(path)),
185 |                 link_html
186 |             )
187 |     extension = os.path.splitext(path)[1]
188 |     if extension in filetypes:
189 |         bottle.response.content_type = filetypes[extension]
190 |     else:
191 |         bottle.response.content_type = "text/plain"
192 |     return session[conversion_id][path]
193 | 
194 | @bottle.route('/conversions-zip/:conversion_id')
195 | def conversion_zip(conversion_id):
196 |     session_manager = lib.bottlesession.bottlesession.PickleSession()
197 |     session = session_manager.get_session()
198 |     if conversion_id not in session: # They don't have authorisation
199 |         raise bottle.HTTPError(code=404)
200 |     bottle.response.content_type = 'application/zip'
201 |     bottle.response.headers['Content-Disposition'] = 'attachment; filename="%s.zip"' % session[conversion_id].get_zip_name()
202 |     return session[conversion_id].to_zip().getvalue()
203 | 
204 | @bottle.route('/libreoffice-status', method='GET')
205 | def libreoffice_status():
206 |     return bottle.json_dumps( {"libreoffice-status":core.docvert_libreoffice.checkLibreOfficeStatus()} )
207 | 
208 | @bottle.route('/tests', method='GET')
209 | @bottle.view('tests')
210 | def tests():
211 |     return core.docvert.get_all_pipelines()
212 | 
213 | @bottle.route('/web-service/tests/:test_id', method='GET')
214 | def web_service_tests(test_id):
215 |     suppress_error = bottle.request.GET.get('suppress_error') == "true"
216 |     storage = core.docvert_storage.storage_memory_based()
217 |     error_message = None
218 |     if suppress_error:
219 |         try:
220 |             core.docvert.process_pipeline(None, test_id, "tests", None, storage)
221 |         except Exception as exception:
222 |             bottle.response.content_type = "text/plain"
223 |             class_name = "%s" % type(exception).__name__
224 |             return bottle.json_dumps([{"status":"fail", "message": "Unable to run tests due to exception. <%s> %s" % (class_name, exception)}])
225 |     else:
226 |         try:
227 |             core.docvert.process_pipeline(None, test_id, "tests", None, storage)
228 |         except (core.docvert_exception.debug_exception, core.docvert_exception.debug_xml_exception) as exception:
229 |             bottle.response.content_type = exception.content_type
230 |             return exception.data
231 |     return bottle.json_dumps(storage.tests)
232 | 
233 | @bottle.route('/tests/', method='GET')
234 | def tests_wrongdir():
235 |     bottle.redirect('/tests')
236 | 
237 | @bottle.route('/3rdparty/sscdocapi')
238 | def third_party_sscdocapi():
239 |     return bottle.static_file('sscdocapi.html', root='%s/core/3rd-party/' % docvert_root)    
240 | 
241 | try:
242 |     bottle.run(host=host, port=port, quiet=False)
243 | except socket.error as e:
244 |     if 'address already in use' in str(e).lower():
245 |         print('ERROR: %s:%i already in use.\nTry another port? Use command line parameter -H HOST or -p PORT to change it.' % (host, port))
246 |     else:
247 |         raise
248 | 
249 | 


--------------------------------------------------------------------------------
/lib/README:
--------------------------------------------------------------------------------
1 | Warning this directory is available via the web server. Do not store sensitive files here or in subdirectories.
2 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/lib/__init__.py


--------------------------------------------------------------------------------
/lib/bottle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/lib/bottle/__init__.py


--------------------------------------------------------------------------------
/lib/bottle/bottle_session_file_problem.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <title>Bottle session problem</title>
 5 |     <style type="text/css">
 6 |         body {font-family:sans-serif}
 7 |         h1 {font-size: medium}
 8 |     </style>
 9 | </head>
10 | <body>
11 | <h1>Session file problem</h1>
12 | <p>Please tell the administrator of this Docvert web service that there seems to be a file permissions problem with Bottle's session storage.</p>
13 | <p>Tell them that docvert-web.py should have printed a stacktrace of the error, and that on Linux the session files are usually in /tmp/docvert-session-* . Usually these files just need to be deleted.</p>
14 | <p>With this information they'll hopefully be able to fix it -- fingers crossed!</p>
15 | <hr>
16 | <p style="font-size:small">Error usually occurs in lib/bottlesession/bottlesession.py</p>
17 | </body>
18 | </html>
19 | 


--------------------------------------------------------------------------------
/lib/bottlesession/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/lib/bottlesession/__init__.py


--------------------------------------------------------------------------------
/lib/bottlesession/bottlesession.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #  From https://github.com/linsomniac/bottlesession/blob/master/bottlesession.py
  3 | #
  4 | #  Bottle session manager.  See README for full documentation.
  5 | #
  6 | #  Written by: Sean Reifschneider <jafo@tummy.com>
  7 | #  Changes by: Matthew Holloway <matthew@holloway.co.nz>
  8 | 
  9 | 
 10 | import os
 11 | import os.path
 12 | import pickle
 13 | import uuid
 14 | import hashlib
 15 | import time
 16 | 
 17 | try:
 18 |     import bottle
 19 | except ImportError:
 20 |     lib_directory = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 21 |     sys.path.append(os.path.join(lib_directory, 'bottle'))
 22 |     try:
 23 |         import bottle
 24 |     except ImportError:
 25 |         sys.stderr.write("Error: Unable to find Bottle libraries in %s. Exiting..." % sys.path)
 26 |         sys.exit(0)
 27 | 
 28 | class BaseSession(object):
 29 | 	'''Base class which implements some of the basic functionality required for
 30 | 	session managers.  Cannot be used directly.
 31 | 
 32 | 	:param cookie_expires: Expiration time of session ID cookie, either `None`
 33 | 			if the cookie is not to expire, a number of seconds in the future,
 34 | 			or a datetime object.  (default: 30 days)
 35 | 	'''
 36 | 	def __init__(self, cookie_expires = 86400*30):
 37 | 		self.cookie_expires = cookie_expires
 38 | 
 39 | 	def load(self, sessionid):
 40 | 		raise NotImplementedError
 41 | 
 42 | 	def save(self, sessionid, data):
 43 | 		raise NotImplementedError
 44 | 
 45 | 	def make_session_id(self):
 46 | 		return str(uuid.uuid4())
 47 | 
 48 | 	def allocate_new_session_id(self):
 49 | 		#  retry allocating a unique sessionid
 50 | 		for i in range(100):
 51 | 			sessionid = self.make_session_id()
 52 | 			if not self.load(sessionid): return sessionid
 53 | 		raise ValueError('Unable to allocate unique session')
 54 | 
 55 | 	def get_session(self):
 56 | 		#  get existing or create new session identifier
 57 | 		sessionid = bottle.request.COOKIES.get('sessionid')
 58 | 		if not sessionid:
 59 | 			sessionid = self.allocate_new_session_id()
 60 | 			bottle.response.set_cookie('sessionid', sessionid,
 61 | 					path = '/', expires = self.cookie_expires)
 62 | 		#  load existing or create new session
 63 | 		data = self.load(sessionid)
 64 | 		if not data:
 65 | 			data = { 'sessionid' : sessionid, 'valid' : False }
 66 | 			self.save(data)
 67 | 		return data
 68 | 
 69 | 
 70 | class PickleSession(BaseSession):
 71 | 	'''Class which stores session information in the file-system.
 72 | 
 73 | 	:param session_dir: Directory that session information is stored in.
 74 | 			(default: ``'/tmp'``).
 75 | 	'''
 76 | 	def __init__(self, session_dir = '/tmp', *args, **kwargs):
 77 | 		super(PickleSession, self).__init__(*args, **kwargs)
 78 | 		self.session_dir = session_dir
 79 | 
 80 | 	def load(self, sessionid):
 81 | 		filename = os.path.join(self.session_dir, 'docvert-session-%s' % sessionid)
 82 | 		if not os.path.exists(filename): return None
 83 | 		with open(filename, 'rb') as fp:
 84 | 			session = pickle.load(fp)
 85 | 		return session
 86 | 
 87 | 	def save(self, data):
 88 | 		sessionid = data['sessionid']
 89 | 		fileName = os.path.join(self.session_dir, 'docvert-session-%s' % sessionid)
 90 | 		tmpName = fileName + '.' + str(uuid.uuid4())
 91 | 		with open(tmpName, 'wb') as fp:
 92 | 			self.session = pickle.dump(data, fp, 2)
 93 | 		os.rename(tmpName, fileName)
 94 | 
 95 | 
 96 | class CookieSession(BaseSession):
 97 | 	'''Session manager class which stores session in a signed browser cookie.
 98 | 
 99 | 	:param cookie_name: Name of the cookie to store the session in.
100 | 			(default: ``session_data``)
101 | 	:param secret: Secret to be used for "secure cookie".  If ``None``,
102 | 			attempts will be made to generate a difficult to guess secret.
103 | 			However, this is probably only suitable for private web apps, and
104 | 			definitely only for a single web server.  You really should be
105 | 			using your own secret.  (default: ``None``)
106 | 	:param secret_file: File to read the secret from.  If ``secret`` is
107 | 			``None`` and ``secret_file`` is set, the first line of this file
108 | 			is read, and stripped, to produce the secret.
109 | 	'''
110 | 
111 | 	def __init__(self, secret = None, secret_file = None, cookie_name = 'docvert_session', *args, **kwargs):
112 | 		super(CookieSession, self).__init__(*args, **kwargs)
113 | 		self.cookie_name = cookie_name
114 | 		if not secret and secret_file is not None:
115 | 			with open(secret_file, 'r') as fp:
116 | 				secret = fp.readline().strip()
117 | 		if not secret: 	#  generate a difficult to guess secret
118 | 			secret = str(uuid.uuid1()).split('-', 1)[1]
119 | 			with open('/proc/uptime', 'r') as fp:
120 | 				uptime = int(time.time() - float(fp.readline().split()[0]))
121 | 				secret += '-' + str(uptime)
122 | 			secret = hashlib.sha1(secret).hexdigest()
123 | 		self.secret = secret
124 | 
125 | 	def load(self, sessionid):
126 | 		cookie = bottle.request.get_cookie(self.cookie_name, secret = self.secret)
127 | 		if cookie == None: return {}
128 | 		return pickle.loads(cookie)
129 | 
130 | 	def save(self, data):
131 | 		bottle.response.set_cookie(
132 |             self.cookie_name,
133 |             pickle.dumps(data),
134 | 		    secret = self.secret, path = '/', expires = self.cookie_expires)
135 | 
136 | 


--------------------------------------------------------------------------------
/lib/fonts/COPYRIGHT:
--------------------------------------------------------------------------------
1 | Reanie Beanie font by James Grieshaber
2 | 
3 | Released under the SIL Open Font License, 1.1
4 | 
5 | http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=OFL
6 | 


--------------------------------------------------------------------------------
/lib/fonts/README:
--------------------------------------------------------------------------------
1 | Reanie Beanie font by James Grieshaber
2 | 
3 | http://www.google.com/webfonts/specimen/Reenie+Beanie
4 | 
5 | Released under the SIL Open Font License, 1.1
6 | 
7 | http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=OFL
8 | 


--------------------------------------------------------------------------------
/lib/fonts/reenie-beanie.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/lib/fonts/reenie-beanie.woff


--------------------------------------------------------------------------------
/lib/workerpool/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (http://www.opensource.org/licenses/mit-license.php)
 2 | 
 3 | Copyright (c) 2008 Andrey Petrov
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/lib/workerpool/QueueWrapper.py:
--------------------------------------------------------------------------------
 1 | # NewQueue.py - Implements Python 2.5 Queue functionality for Python 2.4
 2 | # Copyright (c) 2008 Andrey Petrov
 3 | #
 4 | # This module is part of workerpool and is released under
 5 | # the MIT license: http://www.opensource.org/licenses/mit-license.php
 6 | 
 7 | # TODO: The extra methods provided here do nothing for now. Add real functionality to them someday.
 8 | 
 9 | from queue import Queue as OldQueue
10 | 
11 | __all__ = ['Queue']
12 | 
13 | class Queue(OldQueue):
14 |     def task_done(self):
15 |         "Does nothing in Python 2.4"
16 |         pass
17 | 
18 |     def join(self):
19 |         "Does nothing in Python 2.4"
20 |         pass
21 | 


--------------------------------------------------------------------------------
/lib/workerpool/README:
--------------------------------------------------------------------------------
 1 | workerpool
 2 | ==========
 3 | 
 4 | Copyright (c) 2008 Andrey Petrov (andrey.petrov@shazow.net)
 5 | 
 6 | Original copyright by Idee Inc. (http://ideeinc.com/)
 7 | 
 8 | http://code.google.com/p/workerpool/
 9 | 
10 | The workerpool module is a simple framework for easily distributing jobs 
11 | into multiple worker threads.
12 | 
13 | Examples of usage can be found in the unit tests (/test) and the samples 
14 | provided (/samples).
15 | 


--------------------------------------------------------------------------------
/lib/workerpool/__init__.py:
--------------------------------------------------------------------------------
 1 | # __init__.py
 2 | # Copyright (c) 2008 Andrey Petrov
 3 | #
 4 | # This module is part of workerpool and is released under
 5 | # the MIT license: http://www.opensource.org/licenses/mit-license.php
 6 | 
 7 | """
 8 | Workerpool module provides a threading framework for managing a constant pool
 9 | of worker threads that perform arbitrary jobs.
10 | 
11 | Tips:
12 | 
13 | * Workers can be terminated using a SuicideJob which raises a TerminationNotice
14 | exception.
15 | 
16 | * Performing a del on a pool object will cause the pool to terminate all of its
17 | workers.
18 | 
19 | * WorkerPool implements a simple map method which allows distributing work in a
20 | similar fashion as using a normal map operation.
21 | 
22 | * EquippedWorkers can be used to maintain an active resource which is required
23 | for performing a specialized type of job.
24 | 
25 | """
26 | 
27 | from .exceptions import *
28 | from .jobs import *
29 | from .pools import *
30 | from .workers import *
31 | 


--------------------------------------------------------------------------------
/lib/workerpool/exceptions.py:
--------------------------------------------------------------------------------
 1 | # exceptions.py - Exceptions used in the operation of a worker pool
 2 | # Copyright (c) 2008 Andrey Petrov
 3 | #
 4 | # This module is part of workerpool and is released under
 5 | # the MIT license: http://www.opensource.org/licenses/mit-license.php
 6 | 
 7 | class TerminationNotice(Exception):
 8 |     "This exception is raised inside a thread when it's time for it to die."
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/lib/workerpool/jobs.py:
--------------------------------------------------------------------------------
 1 | # jobs.py - Generic jobs used with the worker pool
 2 | # Copyright (c) 2008 Andrey Petrov
 3 | #
 4 | # This module is part of workerpool and is released under
 5 | # the MIT license: http://www.opensource.org/licenses/mit-license.php
 6 | 
 7 | from .exceptions import TerminationNotice
 8 | 
 9 | __all__ = ['Job', 'SuicideJob', 'SimpleJob']
10 | 
11 | class Job(object):
12 |     "Interface for a Job object."
13 |     def __init__(self):
14 |         pass
15 | 
16 |     def run(self):
17 |         "The actual task for the job should be implemented here."
18 |         pass
19 | 
20 | class SuicideJob(Job):
21 |     "A worker receiving this job will commit suicide."
22 |     def run(self, **kw):
23 |         raise TerminationNotice()
24 | 
25 | class SimpleJob(Job):
26 |     """
27 |     Given a `result` queue, a `method` pointer, and an `args` dictionary or
28 |     list, the method will execute r = method(*args) or r = method(**args), 
29 |     depending on args' type, and perform result.put(r).
30 |     """
31 |     def __init__(self, result, method, args=[]):
32 |         self.result = result
33 |         self.method = method
34 |         self.args = args
35 | 
36 |     def run(self):
37 |         if isinstance(self.args, list) or isinstance(self.args, tuple):
38 |             r = self.method(*self.args)
39 |         elif isinstance(self.args, dict):
40 |             r = self.method(**self.args)
41 |         self._return(r)
42 | 
43 |     def _return(self, r):
44 |         "Handle return value by appending to the ``self.result`` queue."
45 |         self.result.put(r)
46 | 


--------------------------------------------------------------------------------
/lib/workerpool/pools.py:
--------------------------------------------------------------------------------
  1 | # workerpool.py - Module for distributing jobs to a pool of worker threads.
  2 | # Copyright (c) 2008 Andrey Petrov
  3 | #
  4 | # This module is part of workerpool and is released under
  5 | # the MIT license: http://www.opensource.org/licenses/mit-license.php
  6 | 
  7 | 
  8 | from queue import Queue
  9 | import collections
 10 | if not hasattr(Queue, 'task_done'):
 11 |     # Graft Python 2.5's Queue functionality onto Python 2.4's implementation
 12 |     # TODO: The extra methods do nothing for now. Make them do something.
 13 |     from .QueueWrapper import Queue
 14 | 
 15 | from .workers import Worker
 16 | from .jobs import SimpleJob, SuicideJob
 17 | 
 18 | 
 19 | __all__ = ['WorkerPool', 'default_worker_factory']
 20 | 
 21 | 
 22 | def default_worker_factory(job_queue):
 23 |     return Worker(job_queue)
 24 | 
 25 | 
 26 | class WorkerPool(Queue):
 27 |     """
 28 |     WorkerPool servers two functions: It is a Queue and a master of Worker
 29 |     threads. The Queue accepts Job objects and passes it on to Workers, who are
 30 |     initialized during the construction of the pool and by using grow().
 31 | 
 32 |     Jobs are inserted into the WorkerPool with the `put` method.
 33 |     Hint: Have the Job append its result into a shared queue that the caller
 34 |     holds and then the caller reads an expected number of results from it.
 35 | 
 36 |     The shutdown() method must be explicitly called to terminate the Worker
 37 |     threads when the pool is no longer needed.
 38 | 
 39 |     Construction parameters:
 40 | 
 41 |     size = 1
 42 |         Number of active worker threads the pool should contain.
 43 | 
 44 |     maxjobs = 0
 45 |         Maximum number of jobs to allow in the queue at a time. Will block on
 46 |         `put` if full.
 47 | 
 48 |     default_worker = default_worker_factory
 49 |         The default worker factory is called with one argument, which is the
 50 |         jobs Queue object that it will read from to acquire jobs. The factory
 51 |         will produce a Worker object which will be added to the pool.
 52 |     """
 53 |     def __init__(self, size=1, maxjobs=0, worker_factory=default_worker_factory):
 54 |         if not isinstance(worker_factory, collections.Callable):
 55 |             raise TypeError("worker_factory must be callable")
 56 | 
 57 |         self.worker_factory = worker_factory # Used to build new workers
 58 |         self._size = 0 # Number of active workers we have
 59 | 
 60 |         # Initialize the Queue
 61 |         Queue.__init__(self, maxjobs) # The queue contains job that are read by workers
 62 |         self._jobs = self # Pointer to the queue, for backwards compatibility with version 0.9.1 and earlier
 63 | 
 64 |         # Hire some workers!
 65 |         for i in range(size):
 66 |             self.grow()
 67 | 
 68 |     def grow(self):
 69 |         "Add another worker to the pool."
 70 |         t = self.worker_factory(self)
 71 |         t.start()
 72 |         self._size += 1
 73 | 
 74 |     def shrink(self):
 75 |         "Get rid of one worker from the pool. Raises IndexError if empty."
 76 |         if self._size <= 0:
 77 |             raise IndexError("pool is already empty")
 78 |         self._size -= 1
 79 |         self.put(SuicideJob())
 80 | 
 81 |     def shutdown(self):
 82 |         "Retire the workers."
 83 |         for i in range(self.size()):
 84 |             self.put(SuicideJob())
 85 | 
 86 |     def size(self):
 87 |         "Approximate number of active workers (could be more if a shrinking is in progress)."
 88 |         return self._size
 89 | 
 90 |     def map(self, fn, *seq):
 91 |         "Perform a map operation distributed among the workers. Will block until done."
 92 |         results = Queue()
 93 |         args = list(zip(*seq))
 94 |         for seq in args:
 95 |             j = SimpleJob(results, fn, seq)
 96 |             self.put(j)
 97 | 
 98 |         # Aggregate results
 99 |         r = []
100 |         for i in range(len(args)):
101 |             r.append(results.get())
102 | 
103 |         return r
104 | 
105 |     def wait(self):
106 |         "DEPRECATED: Use join() instead."
107 |         self.join()
108 | 


--------------------------------------------------------------------------------
/lib/workerpool/workers.py:
--------------------------------------------------------------------------------
 1 | # workers.py - Worker objects who become members of a worker pool
 2 | # Copyright (c) 2008 Andrey Petrov
 3 | #
 4 | # This module is part of workerpool and is released under
 5 | # the MIT license: http://www.opensource.org/licenses/mit-license.php
 6 | 
 7 | from threading import Thread
 8 | from .jobs import Job, SimpleJob
 9 | from .exceptions import TerminationNotice
10 | 
11 | __all__ = ['Worker', 'EquippedWorker']
12 | 
13 | class Worker(Thread):
14 |     """
15 |     A loyal worker who will pull jobs from the `jobs` queue and perform them.
16 | 
17 |     The run method will get jobs from the `jobs` queue passed into the
18 |     constructor, and execute them. After each job, task_done() must be executed
19 |     on the `jobs` queue in order for the pool to know when no more jobs are
20 |     being processed.
21 |     """
22 | 
23 |     def __init__(self, jobs):
24 |         self.jobs = jobs
25 |         Thread.__init__(self)
26 | 
27 |     def run(self):
28 |         "Get jobs from the queue and perform them as they arrive."
29 |         while 1:
30 |             # Sleep until there is a job to perform.
31 |             job = self.jobs.get()
32 | 
33 |             # Yawn. Time to get some work done.
34 |             try:
35 |                 job.run()
36 |                 self.jobs.task_done()
37 |             except TerminationNotice:
38 |                 self.jobs.task_done()
39 |                 break 
40 | 
41 | class EquippedWorker(Worker):
42 |     """
43 |     Each worker will create an instance of ``toolbox`` and hang on to it during
44 |     its lifetime. This can be used to pass in a resource such as a persistent 
45 |     connections to services that the worker will be using.
46 | 
47 |     The toolbox factory is called without arguments to produce an instance of
48 |     an object which contains resources necessary for this Worker to perform.
49 |     """
50 |     # TODO: Should a variation of this become the default Worker someday?
51 | 
52 |     def __init__(self, jobs, toolbox_factory):
53 |         self.toolbox = toolbox_factory()
54 |         Worker.__init__(self, jobs)
55 | 
56 |     def run(self):
57 |         "Get jobs from the queue and perform them as they arrive."
58 |         while 1:
59 |             job = self.jobs.get()
60 |             try:
61 |                 job.run(toolbox=self.toolbox)
62 |                 self.jobs.task_done()
63 |             except TerminationNotice:
64 |                 self.jobs.task_done()
65 |                 break
66 | 


--------------------------------------------------------------------------------
/logo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/logo.gif


--------------------------------------------------------------------------------
/pipelines/auto_pipelines/Break up over Heading 1.default/pipeline.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <pipeline>
 3 |         <stage process="ConvertImages" formats="wmf2png, wmf2svg, bmp2png" deleteOriginals="true" autoCrop="false" autoCropThreshold="20"/>
 4 | 
 5 |         <stage process="TransformOpenDocumentToDocBook"/>
 6 | 
 7 |         <stage process="Loop" numberOfTimes="xpathCount://db:chapter">
 8 | 
 9 |                 <stage process="SplitPages"/>
10 | 
11 |                 <stage process="DocBookToXHTML"/>
12 |                 {{custom-stages}}
13 |                 <stage process="Serialize" toFile="{customSection}"/>
14 |         </stage>
15 | 
16 |         <stage process="GetPreface"/>
17 | 
18 |         <stage process="DocBookToXHTML"/>
19 |         {{custom-stages}}
20 |         <stage process="Serialize" toFile="index.html"/>
21 | </pipeline>
22 | 


--------------------------------------------------------------------------------
/pipelines/auto_pipelines/Nothing (one long page)/pipeline.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <pipeline>
 3 |         <stage process="ConvertImages" formats="wmf2png, wmf2svg, bmp2png" deleteOriginals="true" autoCrop="false" autoCropThreshold="20"/>
 4 | 
 5 |         <stage process="TransformOpenDocumentToDocBook"  /><!-- debugAfterOpenDocumentNormalization="true"  -->
 6 | 
 7 |         <stage process="DocBookToXHTML"/>
 8 | 
 9 |         {{custom-stages}}
10 | 
11 |         <stage process="Serialize" toFile="index.html"/>
12 | 
13 | </pipeline>
14 | 


--------------------------------------------------------------------------------
/pipelines/html_to_opendocument/default/pipeline.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <pipeline>
3 |         <stage process="Debug"/>
4 |         <stage process="Serialize" toFile="index.html"/>
5 | </pipeline>
6 | 


--------------------------------------------------------------------------------
/pipelines/pipelines/basic/onepage.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet	version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:html="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml">
 3 | 
 4 | 	<xsl:output method="xml" omit-xml-declaration="no"/>
 5 | 
 6 |     <xsl:template match="html:head">
 7 |         <xsl:copy>
 8 |             <xsl:apply-templates select="@*|node()"/>
 9 |             <link rel="stylesheet" type="text/css" href="/static/default/preview.css"/>
10 |         </xsl:copy>
11 |     </xsl:template>
12 | 
13 |     <xsl:template match="node()|@*">
14 |         <xsl:copy>
15 |             <xsl:apply-templates select="@*|node()"/>
16 |         </xsl:copy>
17 |     </xsl:template>
18 | 
19 | </xsl:stylesheet>
20 | 


--------------------------------------------------------------------------------
/pipelines/pipelines/basic/pipeline.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <autopipeline>
3 |     <stage process="Transform" withFile="onepage.xsl"/>
4 | </autopipeline>
5 | 


--------------------------------------------------------------------------------
/pipelines/pipelines/docbook/pipeline.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <pipeline>
3 |     <stage process="ConvertImages" formats="wmf2png, wmf2svg, bmp2png" deleteOriginals="true" autoCrop="false" autoCropThreshold="20"/>
4 |     <stage process="TransformOpenDocumentToDocBook"/>
5 |     <stage process="Serialize" toFile="index.xml"/>
6 | </pipeline>
7 | 


--------------------------------------------------------------------------------
/pipelines/pipelines/open document/pipeline.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!-- "open document" is reserved for when people want the OpenDocument file back directly without further Docvert interference. This pipeline won't be called. -->
3 | <pipeline/>
4 | 


--------------------------------------------------------------------------------
/pipelines/pipelines/pretty-lists/pipeline.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <autopipeline>
3 |     <stage process="Transform" withFile="pretty-lists.xsl"/>
4 | </autopipeline>
5 | 


--------------------------------------------------------------------------------
/pipelines/pipelines/pretty-lists/pretty-lists.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet
 3 | 	version="1.0"
 4 | 	xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 5 | 	xmlns="http://www.w3.org/1999/xhtml"
 6 | 	xmlns:html="http://www.w3.org/1999/xhtml"
 7 | 	exclude-result-prefixes="db html xlink"
 8 | 	>
 9 | 
10 | 	<xsl:output
11 | 		method="xml"
12 | 		version="1.0"
13 | 		encoding="UTF-8"
14 | 		indent="yes"
15 | 		doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"
16 | 		doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
17 | 		omit-xml-declaration="yes"/>
18 | 
19 |     <xsl:template match="html:ol">
20 |         <xsl:element name="ol">
21 |             <xsl:attribute name="class">
22 |                 <xsl:text>custom-order</xsl:text>
23 |                 <xsl:if test="@role='NestedOrderedList'">
24 |                     <xsl:text> NestedOrderedList</xsl:text>
25 |                 </xsl:if>
26 |             </xsl:attribute>
27 |             <xsl:choose>
28 |                 <xsl:when test="ancestor::html:ol">
29 |                     <xsl:attribute name="style">
30 |                         <xsl:variable name="depth" select="1 + count(ancestor::html:ol)"/>
31 |                         <xsl:text>list-style:none;padding-left:</xsl:text>
32 |                         <xsl:choose>
33 |                             <xsl:when test="$depth = 1">
34 |                                 <xsl:text>2</xsl:text>
35 |                             </xsl:when>
36 |                             <xsl:otherwise>
37 |                                 <xsl:value-of select="$depth * 1.3"/>
38 |                             </xsl:otherwise>
39 |                         </xsl:choose>
40 |                         <xsl:text>em;</xsl:text>
41 |                         <xsl:value-of select="@style"/>
42 |                     </xsl:attribute>
43 |                 </xsl:when>
44 |                 <xsl:when test="@style">
45 |                     <xsl:attribute name="style">
46 |                         <xsl:value-of select="@style"/>
47 |                     </xsl:attribute>
48 |                 </xsl:when>
49 |             </xsl:choose>
50 |             <xsl:if test="@start">
51 |               <xsl:attribute name="start"><xsl:value-of select="@start"/></xsl:attribute>
52 |             </xsl:if>
53 |             <xsl:apply-templates/>
54 |         </xsl:element>
55 |     </xsl:template>
56 | 
57 |     <xsl:template match="html:li[parent::html:ol]">
58 |         <xsl:variable name="depth" select="count(ancestor::html:li)+1"/>
59 |         <xsl:variable name="offset">
60 |             <xsl:choose>
61 |                 <xsl:when test="$depth = 1">
62 |                     <xsl:text>2</xsl:text>
63 |                 </xsl:when>
64 |                 <xsl:otherwise>
65 |                     <xsl:value-of select="$depth * 1.3"/>
66 |                 </xsl:otherwise>
67 |             </xsl:choose>
68 |         </xsl:variable>
69 |         <xsl:variable name="width" select="$offset"/>
70 |         <xsl:element name="li">
71 |             <xsl:attribute name="style">list-style:none</xsl:attribute>
72 |             <xsl:element name="span">
73 | 	            <xsl:attribute name="class">NestedOrderedListNumbering</xsl:attribute>
74 |                 <xsl:attribute name="style">float:left;width:<xsl:value-of select="$width"/>em;margin-left:-<xsl:value-of select="$offset"/>em;</xsl:attribute>
75 |                 <xsl:number level="multiple"/>
76 |             </xsl:element>
77 |             <xsl:text> </xsl:text>
78 |             <xsl:apply-templates/>
79 |         </xsl:element>
80 |     </xsl:template>
81 | 
82 |     <xsl:template match="node()|@*">
83 |         <xsl:copy>
84 |             <xsl:apply-templates select="@*|node()"/>
85 |         </xsl:copy>
86 |     </xsl:template>
87 | 
88 | </xsl:stylesheet>
89 | 


--------------------------------------------------------------------------------
/pipelines/pipelines/sanitised open document/pipeline.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <pipeline>
3 |         <stage process="Debug"/>
4 |         <stage process="NormalizeOpenDocument"/>
5 |         <stage process="SerializeOpenDocument" toFile="index.odt"/>
6 | </pipeline>
7 | 


--------------------------------------------------------------------------------
/pipelines/pipelines/ssc/pipeline.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <autopipeline>
3 |     <stage process="WriteMetaData"/>
4 |     <stage process="Transform" withFile="pretty-lists.xsl"/> 
5 | 
6 | </autopipeline>
7 | 


--------------------------------------------------------------------------------
/pipelines/pipelines/ssc/pretty-lists.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet
 3 | 	version="1.0"
 4 | 	xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 5 | 	xmlns="http://www.w3.org/1999/xhtml"
 6 | 	xmlns:html="http://www.w3.org/1999/xhtml"
 7 | 	exclude-result-prefixes="db html xlink"
 8 | 	>
 9 | 
10 | 	<xsl:output
11 | 		method="xml"
12 | 		version="1.0"
13 | 		encoding="UTF-8"
14 | 		indent="yes"
15 | 		doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"
16 | 		doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
17 | 		omit-xml-declaration="yes"/>
18 | 
19 |     <xsl:template match="html:ol">
20 |         <xsl:element name="ol">
21 |             <xsl:attribute name="class">
22 |                 <xsl:text>custom-order</xsl:text>
23 |                 <xsl:if test="@role='NestedOrderedList'">
24 |                     <xsl:text> NestedOrderedList</xsl:text>
25 |                 </xsl:if>
26 |             </xsl:attribute>
27 |             <xsl:choose>
28 |                 <xsl:when test="ancestor::html:ol">
29 |                     <xsl:attribute name="style">
30 |                         <xsl:variable name="depth" select="1 + count(ancestor::html:ol)"/>
31 |                         <xsl:text>list-style:none;padding-left:</xsl:text>
32 |                         <xsl:choose>
33 |                             <xsl:when test="$depth = 1">
34 |                                 <xsl:text>2</xsl:text>
35 |                             </xsl:when>
36 |                             <xsl:otherwise>
37 |                                 <xsl:value-of select="$depth * 1.3"/>
38 |                             </xsl:otherwise>
39 |                         </xsl:choose>
40 |                         <xsl:text>em;</xsl:text>
41 |                         <xsl:value-of select="@style"/>
42 |                     </xsl:attribute>
43 |                 </xsl:when>
44 |                 <xsl:when test="@style">
45 |                     <xsl:attribute name="style">
46 |                         <xsl:value-of select="@style"/>
47 |                     </xsl:attribute>
48 |                 </xsl:when>
49 |             </xsl:choose>
50 |             <xsl:if test="@start">
51 |               <xsl:attribute name="start"><xsl:value-of select="@start"/></xsl:attribute>
52 |             </xsl:if>
53 |             <xsl:apply-templates/>
54 |         </xsl:element>
55 |     </xsl:template>
56 | 
57 |     <xsl:template match="html:li[parent::html:ol]">
58 |         <xsl:variable name="depth" select="count(ancestor::html:li)+1"/>
59 |         <xsl:variable name="offset">
60 |             <xsl:choose>
61 |                 <xsl:when test="$depth = 1">
62 |                     <xsl:text>2</xsl:text>
63 |                 </xsl:when>
64 |                 <xsl:otherwise>
65 |                     <xsl:value-of select="$depth * 1.3"/>
66 |                 </xsl:otherwise>
67 |             </xsl:choose>
68 |         </xsl:variable>
69 |         <xsl:variable name="width" select="$offset"/>
70 |         <xsl:element name="li">
71 |             <xsl:attribute name="style">list-style:none</xsl:attribute>
72 |             <xsl:if test="*[not(self::html:ol)]">
73 |                 <xsl:element name="span">
74 | 	                <xsl:attribute name="class">NestedOrderedListNumbering</xsl:attribute>
75 |                     <xsl:attribute name="style">float:left;width:<xsl:value-of select="$width"/>em;margin-left:-<xsl:value-of select="$offset"/>em;</xsl:attribute>
76 |                     <xsl:for-each select="ancestor::html:ol">
77 |                         <xsl:choose>
78 |                             <xsl:when test="@start"><xsl:value-of select="@start"/></xsl:when>
79 |                             <xsl:otherwise><xsl:value-of select="count(preceding-sibling::html:ol)+1"/></xsl:otherwise>
80 |                         </xsl:choose>
81 |                         <xsl:if test="position() &lt; last()">
82 |                             <xsl:text>.</xsl:text>
83 |                         </xsl:if>
84 |                     </xsl:for-each>
85 |                 </xsl:element>
86 |                 <xsl:text> </xsl:text>
87 |             </xsl:if>
88 |             <xsl:apply-templates/>
89 |         </xsl:element>
90 |     </xsl:template>
91 | 
92 |     <xsl:template match="node()|@*">
93 |         <xsl:copy>
94 |             <xsl:apply-templates select="@*|node()"/>
95 |         </xsl:copy>
96 |     </xsl:template>
97 | 
98 | </xsl:stylesheet>
99 | 


--------------------------------------------------------------------------------
/pipelines/pipelines/web standards/mytheme2.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet	version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:html="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml">
 3 | 
 4 | 	<xsl:output method="xml" omit-xml-declaration="no"  />
 5 | 
 6 |     <xsl:template match="html:head">
 7 |         <xsl:copy>
 8 |             <xsl:apply-templates select="@*|node()"/>
 9 |             <link rel="stylesheet" type="text/css" href="/static/default/preview.css"/>
10 |         </xsl:copy>
11 |     </xsl:template>
12 | 
13 |     <xsl:template match="node()|@*">
14 |         <xsl:copy>
15 |             <xsl:apply-templates select="@*|node()"/>
16 |         </xsl:copy>
17 |     </xsl:template>
18 | 
19 | </xsl:stylesheet>
20 | 


--------------------------------------------------------------------------------
/pipelines/pipelines/web standards/pipeline.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <autopipeline>
3 |     <stage process="Transform" withFile="mytheme2.xsl"/>
4 | </autopipeline>
5 | 


--------------------------------------------------------------------------------
/pipelines/tests/bold-italics/bold-italics.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/bold-italics/bold-italics.odt


--------------------------------------------------------------------------------
/pipelines/tests/bold-italics/bold-italics.txt:
--------------------------------------------------------------------------------
1 | Paragraph one with
2 | inline italics
3 | Paragraph two with
4 | inline bold
5 | Paragraph three with
6 | inline-bold and italics
7 | 


--------------------------------------------------------------------------------
/pipelines/tests/bold-italics/bold-italics.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:db="http://docbook.org/ns/docbook" xmlns="docvert:5" xmlns:html="http://www.w3.org/1999/xhtml" xmlns:xlink="http://www.w3.org/1999/xlink" exclude-result-prefixes="db html xlink">
 3 | <xsl:output method="xml" version="1.0" encoding="utf-8" omit-xml-declaration="yes"/>
 4 | 
 5 | <xsl:template match="/">
 6 |     <group>
 7 |         <xsl:if test="count(//html:em) != 2">
 8 |             <fail>There were not as many emphasis tags as expected. There were <xsl:value-of select="count(//html:em)"/> em tags.</fail>
 9 |         </xsl:if>
10 |         <xsl:if test="count(//html:strong) != 2">
11 |             <fail>There were not as many strong tags as expected. There were <xsl:value-of select="count(//html:strong)"/> strong tags.</fail>
12 |         </xsl:if>
13 | 
14 |         <xsl:apply-templates/>
15 |     </group>
16 | </xsl:template>
17 | 
18 | <xsl:template match="html:em[count(preceding::html:em) = 0]">
19 |     <!-- first em -->
20 |     <xsl:choose>
21 |         <xsl:when test="normalize-space(.) = 'inline italics' ">
22 |             <pass>Emphasis: First em contains 'inline italics' as expected. Contained "<xsl:value-of select="normalize-space(.)"/>"</pass>
23 |         </xsl:when>
24 |         <xsl:otherwise>
25 |             <fail>Emphasis: First em does not contain 'inline italics' as expected. Contained "<xsl:value-of select="normalize-space(.)"/>"</fail>
26 |         </xsl:otherwise>
27 |     </xsl:choose>
28 | </xsl:template>
29 | 
30 | <xsl:template match="html:em[count(preceding::html:em) = 1]">
31 |     <!-- second em -->
32 |     <xsl:choose>
33 |         <xsl:when test="normalize-space(.) = 'inline-bold and italics' ">
34 |             <pass>Emphasis: Second em contains 'inline-bold and italics' as expected. Contained "<xsl:value-of select="normalize-space(.)"/>"</pass>
35 |         </xsl:when>
36 |         <xsl:otherwise>
37 |             <fail>Emphasis: Second em does not contain 'inline-bold and italics' as expected. Contained "<xsl:value-of select="normalize-space(.)"/>"</fail>
38 |         </xsl:otherwise>
39 |     </xsl:choose>
40 | </xsl:template>
41 | 
42 | <xsl:template match="html:strong[count(preceding::html:strong) = 0]">
43 |     <!-- first strong -->
44 |     <xsl:choose>
45 |         <xsl:when test="normalize-space(.) = 'inline bold' ">
46 |             <pass>Strong: First strong contains 'inline bold' as expected. Contained "<xsl:value-of select="normalize-space(.)"/>"</pass>
47 |         </xsl:when>
48 |         <xsl:otherwise>
49 |             <fail>Strong: First strong does not contain 'inline bold' as expected. Contained "<xsl:value-of select="normalize-space(.)"/>"</fail>
50 |         </xsl:otherwise>
51 |     </xsl:choose>
52 | </xsl:template>
53 | 
54 | <xsl:template match="html:strong[count(preceding::html:strong) = 1]">
55 |     <!-- second strong -->
56 |     <xsl:choose>
57 |         <xsl:when test="normalize-space(.) = 'inline-bold and italics' ">
58 |             <pass>Strong: Second strong contains 'inline-bold and italics' as expected. Contained "<xsl:value-of select="normalize-space(.)"/>"</pass>
59 |         </xsl:when>
60 |         <xsl:otherwise>
61 |             <fail>Strong: Second strong does not contain 'inline-bold and italics' as expected. Contained "<xsl:value-of select="normalize-space(.)"/>"</fail>
62 |         </xsl:otherwise>
63 |     </xsl:choose>
64 | </xsl:template>
65 | 
66 | <xsl:template match="text()"/>
67 | 
68 | </xsl:stylesheet>
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/pipelines/tests/bold-italics/pipeline.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <pipeline>
 3 |     <stage process="Generate" withFile="bold-italics.odt"/>
 4 |     <stage process="Test" withFile="bold-italics.txt" prefix="Pre-normalized OpenDocument"/>
 5 |     <stage process="TransformOpenDocumentToDocBook"/>
 6 |     <stage process="Test" withFile="bold-italics.txt" prefix="DocBook"/>
 7 |     <stage process="DocBookToXHTML"/>
 8 |     <stage process="Test" withFile="bold-italics.txt" prefix="HTML"/>
 9 |     <stage process="Test" withFile="bold-italics.xsl" prefix="HTML"/>
10 | </pipeline>
11 | 


--------------------------------------------------------------------------------
/pipelines/tests/footnotes/footnotes.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/footnotes/footnotes.doc


--------------------------------------------------------------------------------
/pipelines/tests/footnotes/footnotes.txt:
--------------------------------------------------------------------------------
1 | By SCOTT MCNEALY
2 | Don't you just hate receiving Word
3 | documents in email messages?
4 | In principle at least
5 | Microsoft Word-processing documents
6 | March 3, 2006; Page A10
7 | 


--------------------------------------------------------------------------------
/pipelines/tests/footnotes/footnotes.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:db="http://docbook.org/ns/docbook" xmlns="docvert:5" xmlns:html="http://www.w3.org/1999/xhtml" xmlns:xlink="http://www.w3.org/1999/xlink" exclude-result-prefixes="db html xlink">
 3 | <xsl:output method="xml" version="1.0" encoding="utf-8" omit-xml-declaration="yes"/>
 4 | 
 5 | <xsl:template match="/">
 6 |     <group>
 7 |         <xsl:if test="count(//db:footnote) != 2">
 8 |             <fail>There were not as many footnote tags as expected. There were <xsl:value-of select="count(//db:footnote)"/> footnote tags.</fail>
 9 |         </xsl:if>
10 |         <xsl:apply-templates/>
11 |     </group>
12 | </xsl:template>
13 | 
14 | <xsl:template match="db:footnote[count(preceding::db:footnote) = 0]">
15 |     <!-- first footnote -->
16 |     <xsl:choose>
17 |         <xsl:when test="@label = '1' and count(db:para) = 1 and normalize-space(db:para) = 'March 3, 2006; Page A10' ">
18 |             <pass>First footnote correctly formatted.</pass>
19 |         </xsl:when>
20 |         <xsl:otherwise>
21 |             <fail>First footnote incorrectly formatted.</fail>
22 |         </xsl:otherwise>
23 |     </xsl:choose>
24 | </xsl:template>
25 | 
26 | <xsl:template match="db:footnote[count(preceding::db:footnote) = 1]">
27 |     <!-- second footnote -->
28 |     <xsl:choose>
29 |         <xsl:when test="@label = '2' and count(db:para) = 1 and normalize-space(db:para) = 'Microsoft Word-processing documents'">
30 |             <pass>Second footnote correctly formatted.</pass>
31 |         </xsl:when>
32 |         <xsl:otherwise>
33 |             <fail>Second footnote incorrectly formatted.</fail>
34 |         </xsl:otherwise>
35 |     </xsl:choose>
36 | </xsl:template>
37 | 
38 | <xsl:template match="text()"/>
39 | 
40 | </xsl:stylesheet>
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/pipelines/tests/footnotes/pipeline.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <pipeline>
 3 |     <stage process="Generate" withFile="footnotes.doc"/>
 4 |     <stage process="Test" withFile="footnotes.txt" prefix="Pre-normalized OpenDocument"/>
 5 |     <stage process="TransformOpenDocumentToDocBook"/>
 6 |     <stage process="Test" withFile="footnotes.txt" prefix="DocBook"/>
 7 |     <stage process="Test" withFile="footnotes.xsl" prefix="DocBook"/>
 8 |     <stage process="DocBookToXHTML"/>
 9 |     <stage process="Test" withFile="footnotes.txt" prefix="HTML"/>
10 | </pipeline>
11 | 


--------------------------------------------------------------------------------
/pipelines/tests/headings-and-paragraphs/opendocument.rng:
--------------------------------------------------------------------------------
1 | <element name="text:p" xmlns="http://relaxng.org/ns/structure/1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0">
2 |   <zeroOrMore>
3 |      <element name="b">
4 |        <text />
5 |      </element>
6 |   </zeroOrMore>
7 | </element>
8 | 


--------------------------------------------------------------------------------
/pipelines/tests/headings-and-paragraphs/pipeline.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <pipeline>
 3 |     <stage process="Generate" withFile="sample-document.doc"/>
 4 |     <stage process="Test" withFile="sample-document.txt" prefix="Pre-normalized OpenDocument"/>
 5 |     <stage process="TransformOpenDocumentToDocBook"/>
 6 |     <stage process="Test" withFile="sample-document.txt" prefix="DocBook"/>
 7 |     <stage process="Test" withFile="sample-document-docbook-headings-and-paragraphs.txt" prefix="DocBook"/>
 8 |     <stage process="DocBookToXHTML"/>
 9 |     <stage process="Test" withFile="sample-document-html-headings-and-paragraphs.txt" prefix="HTML"/>
10 |     <stage process="Test" withFile="sample-document.txt" prefix="HTML"/>
11 | </pipeline>
12 | 


--------------------------------------------------------------------------------
/pipelines/tests/headings-and-paragraphs/sample-document-docbook-headings-and-paragraphs.txt:
--------------------------------------------------------------------------------
1 | <db:title>We Can Put an End to Word Attachments</db:title>
2 | <db:title>Here some list formatting to check</db:title>
3 | 


--------------------------------------------------------------------------------
/pipelines/tests/headings-and-paragraphs/sample-document-html-headings-and-paragraphs.txt:
--------------------------------------------------------------------------------
1 | <h1>We Can Put an End to Word Attachments</h1>
2 | <h1>Here some list formatting to check</h1>
3 | <p>by Richard M. Stallman, Jan 2002</p>
4 | <h2>An example response</h2>
5 | <h2>Another example response1</h2>
6 | <h2>Another example response2</h2>
7 | <h2>Other example responses</h2>
8 | 


--------------------------------------------------------------------------------
/pipelines/tests/headings-and-paragraphs/sample-document.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/headings-and-paragraphs/sample-document.doc


--------------------------------------------------------------------------------
/pipelines/tests/headings-and-paragraphs/sample-document.txt:
--------------------------------------------------------------------------------
1 | A collection of ideas about Word Processing (also a test document for Docvert)
2 | Here's a nested bulleted list
3 | Here some list formatting to check
4 | An example response
5 | Another example response1
6 | Another example response2
7 | Other example responses
8 | 


--------------------------------------------------------------------------------
/pipelines/tests/images/emf-copyright.txt:
--------------------------------------------------------------------------------
1 | All EMF files except the last one produced by http://pyemf.sourceforge.net and licensed under the LGPL 3 (linked on their site)
2 | The last EMF file taken from FreeHep http://java.freehep.org/vectorgraphics/ and licensed under the LGPL 3 (linked on their site)
3 | 


--------------------------------------------------------------------------------
/pipelines/tests/images/emf-sample.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/images/emf-sample.doc


--------------------------------------------------------------------------------
/pipelines/tests/images/emf-sample.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/images/emf-sample.odt


--------------------------------------------------------------------------------
/pipelines/tests/images/pipeline.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <pipeline>
 3 |     <stage process="Generate" withFile="emf-sample.odt"/>
 4 |     <stage process="ConvertImages" formats="emf2png"/>
 5 |     <stage process="Test" extensionExist=".png" extensionExistCount="5"/><!-- total images is now 5 -->
 6 | </pipeline>
 7 | <!-- 
 8 | 
 9 |     <stage process="Generate" withFile="wmf-sample.doc"/>
10 |     <stage process="ConvertImages" formats="wmf2png"/>
11 |     <stage process="Test" extensionExist=".png" extensionExistCount="6"/> total images now six
12 | -->
13 | 


--------------------------------------------------------------------------------
/pipelines/tests/images/wmf-copyright.txt:
--------------------------------------------------------------------------------
1 | Copied from Ubuntu 10.10 Desktop in /usr/lib/openoffice/basis3.2/share/template/en-US/wizard/bitmap/germany.wmf
2 | Image file licensed under OpenOffice.org license of the GPLv3
3 | 


--------------------------------------------------------------------------------
/pipelines/tests/images/wmf-sample.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/images/wmf-sample.doc


--------------------------------------------------------------------------------
/pipelines/tests/images/wmf-sample.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/images/wmf-sample.odt


--------------------------------------------------------------------------------
/pipelines/tests/invalid-odf/invalid-page.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/invalid-odf/invalid-page.odt


--------------------------------------------------------------------------------
/pipelines/tests/invalid-odf/invalid-page.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns="docvert:5" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0">
 3 | <xsl:output method="xml" version="1.0" encoding="utf-8" omit-xml-declaration="yes"/>
 4 | 
 5 | <xsl:template match="/">
 6 |     <group>
 7 |         <xsl:apply-templates/>
 8 |     </group>
 9 | </xsl:template>
10 | 
11 | <xsl:template match="draw:frame">
12 |      <xsl:choose>
13 |         <xsl:when test="@text:anchor-page-number='0'">
14 |             <fail>A &lt;draw:frame&gt; shouldn't have a page number of zero.</fail>
15 |         </xsl:when>
16 |         <xsl:otherwise>
17 |             <pass>A &lt;draw:frame&gt; had a valid anchoring.</pass>
18 |         </xsl:otherwise>
19 |     </xsl:choose>
20 | </xsl:template>
21 | 
22 | <xsl:template match="text()"/>
23 | 
24 | </xsl:stylesheet>
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/pipelines/tests/invalid-odf/pipeline.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <pipeline>
3 |     <stage process="Generate" withFile="invalid-page.odt"/>
4 |     <stage process="Transform" withFile="internal://normalize-opendocument.xsl"/>
5 |     <stage process="Test" withFile="invalid-page.xsl"/>
6 |   
7 | </pipeline>
8 | 


--------------------------------------------------------------------------------
/pipelines/tests/links/links.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/links/links.odt


--------------------------------------------------------------------------------
/pipelines/tests/links/links.txt:
--------------------------------------------------------------------------------
 1 | Preface
 2 | later
 3 | Chapter 1
 4 | paragraphs of content1.1
 5 | paragraphs of content1.2
 6 | Chapter 2
 7 | paragraphs of content2.1
 8 | paragraphs of content2.2
 9 | Chapter 3
10 | paragraphs of content3.1
11 | paragraphs of content3.2
12 | Chapter 4
13 | paragraphs of content4.1
14 | paragraphs of content4.2
15 | paragraphs of content5.2
16 | Chapter 6
17 | paragraphs of content6.1
18 | paragraphs of content6.2
19 | Chapter 7
20 | paragraphs of content7.1
21 | paragraphs of content7.2
22 | heading-column1
23 | heading-column2
24 | Body column 1
25 | Body column 2
26 | Chapter 8
27 | paragraphs of content8.1
28 | paragraphs of content8.2
29 | paragraphs of content9.1
30 | paragraphs of content9.2
31 | 


--------------------------------------------------------------------------------
/pipelines/tests/links/links.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:db="http://docbook.org/ns/docbook" xmlns="docvert:5" xmlns:html="http://www.w3.org/1999/xhtml" xmlns:xlink="http://www.w3.org/1999/xlink" exclude-result-prefixes="db html xlink">
 3 | <xsl:output method="xml" version="1.0" encoding="utf-8" omit-xml-declaration="yes"/>
 4 | 
 5 | <xsl:template match="/">
 6 |     <group>
 7 |         <xsl:apply-templates/>
 8 |     </group>
 9 | </xsl:template>
10 | 
11 | <xsl:template match="html:a[@href and starts-with(@href, '#')]">
12 |     <xsl:variable name="target" select="substring(@href, 2)"/>
13 |     <xsl:choose>
14 |         <xsl:when test="//*[@id=$target]">
15 |             <pass>Link has source:target of "<xsl:value-of select="$target"/>"</pass>
16 |         </xsl:when>
17 |         <xsl:otherwise>
18 |             <fail>Link target of "<xsl:value-of select="$target"/>" doesn't exist</fail>
19 |         </xsl:otherwise>
20 |     </xsl:choose>
21 | </xsl:template>
22 | 
23 | <xsl:template match="html:a[@id]">
24 |     <xsl:variable name="target" select="@id"/>
25 |     <xsl:choose>
26 |         <xsl:when test="//html:a[@href=concat('#', $target)]">
27 |             <pass>Link has source:target of "<xsl:value-of select="$target"/>"</pass>
28 |         </xsl:when>
29 |         <xsl:otherwise>
30 |             <fail>Link source of "<xsl:value-of select="$target"/>" doesn't exist</fail>
31 |         </xsl:otherwise>
32 |     </xsl:choose>
33 | </xsl:template>
34 | 
35 | <xsl:template match="text()"/>
36 | 
37 | </xsl:stylesheet>
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/pipelines/tests/links/pipeline.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <pipeline>
 3 |     <stage process="Generate" withFile="links.odt"/>
 4 |     <stage process="Test" withFile="links.txt" prefix="Pre-normalized OpenDocument"/>
 5 |     <stage process="TransformOpenDocumentToDocBook"/>
 6 |     <stage process="Test" withFile="links.txt" prefix="DocBook"/>
 7 |     <stage process="DocBookToXHTML"/>
 8 |     <stage process="Test" withFile="links.txt" prefix="HTML"/>
 9 |     <stage process="Test" withFile="links.xsl" prefix="HTML"/>
10 | </pipeline>
11 | 


--------------------------------------------------------------------------------
/pipelines/tests/lists/continuation.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/lists/continuation.odt


--------------------------------------------------------------------------------
/pipelines/tests/lists/continuation.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:db="http://docbook.org/ns/docbook" xmlns="docvert:5" xmlns:html="http://www.w3.org/1999/xhtml" xmlns:xlink="http://www.w3.org/1999/xlink" exclude-result-prefixes="db html xlink">
 3 | <xsl:output method="xml" version="1.0" encoding="utf-8" omit-xml-declaration="yes"/>
 4 | 
 5 | <xsl:template match="/">
 6 |     <group>
 7 |         <xsl:if test="count(//html:ol) != 6">
 8 |             <fail>There were not as many lists in continuation.odt as expected. There were <xsl:value-of select="count(//html:ol)"/> lists.</fail>
 9 |         </xsl:if>
10 |         <xsl:apply-templates/>
11 |     </group>
12 | </xsl:template>
13 | 
14 | <xsl:template match="html:ol[count(preceding::html:ol) = 1]">
15 |     <!-- first list -->
16 |     <xsl:choose>
17 |         <xsl:when test="@start = '5' ">
18 |             <pass>List continuation: First list starts counting at 5 as expected.</pass>
19 |         </xsl:when>
20 |         <xsl:otherwise>
21 |             <fail>List continuation: First list starts does not start at 5 as was expected.</fail>
22 |         </xsl:otherwise>
23 |     </xsl:choose>
24 | </xsl:template>
25 | 
26 | <xsl:template match="html:ol[count(preceding::html:ol) = 2]">
27 |     <!-- second list -->
28 |     <xsl:choose>
29 |         <xsl:when test="@start = '8' ">
30 |             <pass>List continuation: Second list starts counting at 8 as expected.</pass>
31 |         </xsl:when>
32 |         <xsl:otherwise>
33 |             <fail>List continuation: Second list starts does not start at 8 as was expected.</fail>
34 |         </xsl:otherwise>
35 |     </xsl:choose>
36 | </xsl:template>
37 | 
38 | <xsl:template match="html:ol[count(preceding::html:ol) = 3]">
39 |     <!-- third list -->
40 |     <xsl:choose>
41 |         <xsl:when test="@start = '11' ">
42 |             <pass>List continuation: Third list starts counting at 11 as expected.</pass>
43 |         </xsl:when>
44 |         <xsl:otherwise>
45 |             <fail>List continuation: Third list starts does not start at 11 as was expected.</fail>
46 |         </xsl:otherwise>
47 |     </xsl:choose>
48 | </xsl:template>
49 | 
50 | 
51 | <xsl:template match="text()"/>
52 | 
53 | </xsl:stylesheet>
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/pipelines/tests/lists/docvert-test-five-new.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/lists/docvert-test-five-new.odt


--------------------------------------------------------------------------------
/pipelines/tests/lists/docvert-test-five-new.txt:
--------------------------------------------------------------------------------
1 | Return and check the text. Numbering continues, further text shows indent check also tests OK
2 | Return and indent used for next level of list. Numbering continues and indent checked OK
3 | Return gives correct number return
4 | Return and indent for the next line gives correct result
5 | line continued for indent check that shows incorrectly...
6 | The next line is another multi level list. Numbering continues OK
7 | 


--------------------------------------------------------------------------------
/pipelines/tests/lists/docvert-test-five.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/lists/docvert-test-five.odt


--------------------------------------------------------------------------------
/pipelines/tests/lists/docvert-test-five.txt:
--------------------------------------------------------------------------------
 1 | This is a bullet point
 2 | This is a 2nd level bullet point
 3 | This is the 3rd level bullet
 4 | This is a numbered list
 5 | Return and check the text
 6 | Next line is multi level numbered list.
 7 | Next level formatted from normal text
 8 | Return and indent used for next level of list.
 9 | Return gives correct number return
10 | Return and indent for the next line gives correct result
11 | 


--------------------------------------------------------------------------------
/pipelines/tests/lists/pipeline.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <pipeline>
 3 |     <stage process="Generate" withFile="sample-document.doc"/>
 4 |     <stage process="Test" withFile="sample-document.txt" prefix="1. Pre-normalized OpenDocument"/>
 5 | 
 6 |     <stage process="TransformOpenDocumentToDocBook"/>
 7 |     <stage process="Test" withFile="sample-document.xsl" prefix="1. Pre-normalized OpenDocument"/>
 8 | 
 9 |     <stage process="Test" withFile="sample-document.txt" prefix="1. DocBook"/>
10 | 
11 | 
12 |     <stage process="Test" withFile="sample-document-docbook-lists.txt" prefix="1. DocBook"/>
13 | 
14 |     <stage process="DocBookToXHTML"/>
15 |     <stage process="Test" withFile="sample-document-html-lists.txt" prefix="1. HTML"/>
16 |     <stage process="Test" withFile="sample-document.txt" prefix="1. HTML"/>
17 | 
18 |     <stage process="Generate" withFile="docvert-test-five.odt"/>
19 |     <stage process="Test" withFile="docvert-test-five.txt" prefix="2. Pre-normalized OpenDocument"/>
20 |     <stage process="TransformOpenDocumentToDocBook"/>
21 |     <stage process="Test" withFile="docvert-test-five.txt" prefix="2. DocBook"/>
22 |     <stage process="DocBookToXHTML"/>
23 |     <stage process="Test" withFile="docvert-test-five.txt" prefix="2. HTML"/>
24 | 
25 |     <stage process="Generate" withFile="table-list.odt"/>
26 |     <stage process="Test" withFile="table-list.txt" prefix="3. Pre-normalized OpenDocument"/>
27 |     <stage process="TransformOpenDocumentToDocBook"/>
28 |     <stage process="Test" withFile="table-list.txt" prefix="3. DocBook"/>
29 |     <stage process="DocBookToXHTML"/>
30 |     <stage process="Test" withFile="table-list.txt" prefix="3. HTML"/>
31 | 
32 |     <stage process="Generate" withFile="continuation.odt"/>
33 |     <stage process="TransformOpenDocumentToDocBook"/>
34 |     <stage process="DocBookToXHTML"/>
35 |     <stage process="Test" withFile="continuation.xsl" prefix="HTML list continuations"/>
36 | 
37 | 
38 |     <stage process="Generate" withFile="docvert-test-five-new.odt"/>
39 |     <stage process="Test" withFile="docvert-test-five-new.txt" prefix="4. Pre-normalized OpenDocument"/>
40 |     <stage process="TransformOpenDocumentToDocBook" />
41 |     <stage process="Test" withFile="docvert-test-five-new.txt" prefix="4. DocBook"/>
42 |     <stage process="DocBookToXHTML"/>
43 |     <stage process="Test" withFile="docvert-test-five-new.txt" prefix="4. HTML"/>
44 | <!-- <stage process="Debug"/> -->
45 | <!-- <stage process="Debug" contentType="text/html"/> -->
46 | 
47 | 
48 | </pipeline>
49 | 


--------------------------------------------------------------------------------
/pipelines/tests/lists/sample-document-docbook-lists.txt:
--------------------------------------------------------------------------------
 1 | <db:listitem><db:para>list1option1</db:para>
 2 | <db:listitem><db:para>list1option2</db:para>
 3 | <db:listitem><db:para>list1option3</db:para>
 4 | <db:listitem><db:para>list2option1</db:para>
 5 | <db:listitem><db:para>list2option2</db:para>
 6 | <db:listitem><db:para>list2option3</db:para>
 7 | <db:listitem><db:para>list3option1</db:para>
 8 | <db:listitem><db:para>list3option2</db:para>
 9 | <db:listitem><db:para>list3option3</db:para>
10 | <db:listitem><db:para>list3option4</db:para>
11 | <db:listitem><db:para>list3option5</db:para>
12 | <db:listitem><db:para>list3option6</db:para>
13 | <db:listitem><db:para>list3option7</db:para>
14 | <db:listitem><db:para>list3option8</db:para>
15 | <db:listitem><db:para>list4option1</db:para>
16 | <db:listitem><db:para>list4option2</db:para>
17 | <db:listitem><db:para>list4option3</db:para>
18 | <db:listitem><db:para>list4option4</db:para>
19 | <db:listitem><db:para>list5option1</db:para>
20 | <db:listitem><db:para>list5option2</db:para>
21 | <db:listitem><db:para>list5option3</db:para>
22 | <db:listitem><db:para>list5option4</db:para>
23 | <db:listitem><db:para>list6option1</db:para>
24 | <db:listitem><db:para>list6option2</db:para>
25 | <db:listitem><db:para>list6option3</db:para>
26 | <db:listitem><db:para>list6option4</db:para>
27 | <db:listitem><db:para>list6option5</db:para>
28 | <db:listitem><db:para>list6option6</db:para>
29 | 


--------------------------------------------------------------------------------
/pipelines/tests/lists/sample-document-html-lists.txt:
--------------------------------------------------------------------------------
 1 | <li><p>list1option1
 2 | <li><p>list1option2
 3 | <li><p>list1option3
 4 | <li><p>list2option1</p>
 5 | <li><p>list2option2
 6 | <li><p>list2option3
 7 | <li><p>list3option1
 8 | <li><p>list3option2
 9 | <li><p>list3option3
10 | <li><p>list3option4
11 | <li><p>list3option5
12 | <li><p>list3option6
13 | <li><p>list3option7
14 | <li><p>list3option8
15 | <li><p>list4option1
16 | <li><p>list4option2
17 | <li><p>list4option3
18 | <li><p>list4option4
19 | <li><p>list5option1</p>
20 | <li><p>list5option2
21 | <li><p>list5option3
22 | <li><p>list5option4
23 | <li><p>list6option1
24 | <li><p>list6option2
25 | <li><p>list6option3
26 | <li><p>list6option4
27 | <li><p>list6option5
28 | <li><p>list6option6
29 | 


--------------------------------------------------------------------------------
/pipelines/tests/lists/sample-document.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/lists/sample-document.doc


--------------------------------------------------------------------------------
/pipelines/tests/lists/sample-document.txt:
--------------------------------------------------------------------------------
 1 | list1option1
 2 | list1option2
 3 | list1option3
 4 | list2option1
 5 | list2option2
 6 | list2option3
 7 | list3option1
 8 | list3option2
 9 | list3option3
10 | list3option4
11 | list3option5
12 | list3option6
13 | list3option7
14 | list3option8
15 | list4option1
16 | list4option2
17 | list4option3
18 | list4option4
19 | list5option1
20 | list5option2
21 | list5option3
22 | list5option4
23 | list6option1
24 | list6option2
25 | list6option3
26 | list6option4
27 | list6option5
28 | list6option6
29 | 


--------------------------------------------------------------------------------
/pipelines/tests/lists/sample-document.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet	version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns="docvert:5">
 3 | 
 4 | 	<xsl:output method="xml" omit-xml-declaration="no"/>
 5 | 
 6 |     <xsl:template match="/">
 7 |         <xsl:element name="group">
 8 |             <xsl:apply-templates/>
 9 |         </xsl:element>
10 |     </xsl:template>
11 | 
12 | </xsl:stylesheet>
13 | 


--------------------------------------------------------------------------------
/pipelines/tests/lists/table-list.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/lists/table-list.odt


--------------------------------------------------------------------------------
/pipelines/tests/lists/table-list.txt:
--------------------------------------------------------------------------------
 1 | table1list1
 2 | table1list2
 3 | table1list3
 4 | table1list4
 5 | table1list5
 6 | table1list6
 7 | table2list7
 8 | table2list8
 9 | table2list9
10 | table2list10
11 | table2list11
12 | table2list12
13 | table3list13
14 | table3list14
15 | table3list15
16 | table3list16
17 | table3list17
18 | table3list18
19 | table4list19
20 | table4list20
21 | table4list21
22 | table4list22
23 | table4list23
24 | table4list24
25 | 


--------------------------------------------------------------------------------
/pipelines/tests/tables/pipeline.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <pipeline>
3 |     <stage process="Generate" withFile="table-list.odt"/>
4 |     <stage process="TransformOpenDocumentToDocBook"/>
5 |     <stage process="DocBookToXHTML"/>
6 |     <stage process="Test" withFile="table-list.xsl" prefix="HTML"/>
7 | </pipeline>
8 | 


--------------------------------------------------------------------------------
/pipelines/tests/tables/table-list.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/tables/table-list.odt


--------------------------------------------------------------------------------
/pipelines/tests/tables/table-list.txt:
--------------------------------------------------------------------------------
 1 | table1list1
 2 | table1list2
 3 | table1list3
 4 | table1list4
 5 | table1list5
 6 | table1list6
 7 | table2list7
 8 | table2list8
 9 | table2list9
10 | table2list10
11 | table2list11
12 | table2list12
13 | table3list13
14 | table3list14
15 | table3list15
16 | table3list16
17 | table3list17
18 | table3list18
19 | table4list19
20 | table4list20
21 | table4list21
22 | table4list22
23 | table4list23
24 | table4list24
25 | 


--------------------------------------------------------------------------------
/pipelines/tests/tables/table-list.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:db="http://docbook.org/ns/docbook" xmlns="docvert:5" xmlns:html="http://www.w3.org/1999/xhtml" xmlns:xlink="http://www.w3.org/1999/xlink" exclude-result-prefixes="db html xlink">
 3 | <xsl:output method="xml" version="1.0" encoding="utf-8" omit-xml-declaration="yes"/>
 4 | 
 5 | <xsl:variable name="alignment" select=" normalize-space('
 6 | Table1Heading1:center,
 7 | Table1Heading2:center,
 8 | Table1Heading3:center,
 9 | table1cell1:center,
10 | table1cell2:center,
11 | table1cell3:center,
12 | table1cell4:center,
13 | table1cell5:center,
14 | table1cell6:center,
15 | table1cell7:center,
16 | table1cell8:center,
17 | table1cell9:center,
18 | Table2Heading1:left,
19 | Table2Heading2:left,
20 | Table2Heading3:left,
21 | Table3Heading1:right,
22 | Table3Heading2:right,
23 | Table3Heading3:right,
24 | Table4Heading1:center,
25 | Table4Heading2:center,
26 | Table4Heading3:center,
27 | ')"/>
28 | 
29 | <xsl:template match="/">
30 |     <group>
31 |         <xsl:apply-templates/>
32 |     </group>
33 | </xsl:template>
34 | 
35 | <xsl:template match="html:td[@class] | html:th[@class]">
36 |     <xsl:variable name="inner-text" select="normalize-space(.)"/>
37 |     <xsl:if test="contains($alignment, concat($inner-text,':'))">
38 |         <xsl:variable name="expected-alignment" select="substring-before(substring-after($alignment, concat($inner-text,':')), ',')"/>
39 |         <xsl:choose>
40 |             <xsl:when test="@class = concat('align-',$expected-alignment)"><pass>Cell with <xsl:value-of select="$inner-text"/> does contain correct alignment of <xsl:value-of select="$expected-alignment"/></pass></xsl:when>
41 |             <xsl:otherwise><fail>Cell with <xsl:value-of select="$inner-text"/> doesn't contain correct alignment of <xsl:value-of select="$expected-alignment"/></fail></xsl:otherwise>
42 |         </xsl:choose>
43 |     </xsl:if>
44 | </xsl:template>
45 | 
46 | <xsl:template match="text()"/>
47 | 
48 | 
49 | </xsl:stylesheet>
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/pipelines/tests/tables/table-span.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holloway/docvert-python3/a16d9ff0921186355e80c6f4905a00f20c12cf5f/pipelines/tests/tables/table-span.doc


--------------------------------------------------------------------------------