├── README.md
└── lambda_functions
    └── pdf_text_extract
        ├── _markerlib
            ├── __init__.py
            ├── __init__.pyc
            ├── markers.py
            └── markers.pyc
        ├── distribute-0.7.3-py2.7.egg-info
            ├── PKG-INFO
            ├── SOURCES.txt
            ├── dependency_links.txt
            ├── installed-files.txt
            ├── requires.txt
            ├── top_level.txt
            └── zip-safe
        ├── easy_install.py
        ├── easy_install.pyc
        ├── elasticsearch-2.2.0.dist-info
            ├── DESCRIPTION.rst
            ├── METADATA
            ├── RECORD
            ├── WHEEL
            ├── metadata.json
            ├── pbr.json
            └── top_level.txt
        ├── elasticsearch
            ├── __init__.py
            ├── __init__.pyc
            ├── client
            │   ├── __init__.py
            │   ├── __init__.pyc
            │   ├── cat.py
            │   ├── cat.pyc
            │   ├── cluster.py
            │   ├── cluster.pyc
            │   ├── indices.py
            │   ├── indices.pyc
            │   ├── nodes.py
            │   ├── nodes.pyc
            │   ├── snapshot.py
            │   ├── snapshot.pyc
            │   ├── utils.py
            │   └── utils.pyc
            ├── compat.py
            ├── compat.pyc
            ├── connection
            │   ├── __init__.py
            │   ├── __init__.pyc
            │   ├── base.py
            │   ├── base.pyc
            │   ├── esthrift
            │   │   ├── Rest.py
            │   │   ├── Rest.pyc
            │   │   ├── __init__.py
            │   │   ├── __init__.pyc
            │   │   ├── constants.py
            │   │   ├── constants.pyc
            │   │   ├── ttypes.py
            │   │   └── ttypes.pyc
            │   ├── http_requests.py
            │   ├── http_requests.pyc
            │   ├── http_urllib3.py
            │   ├── http_urllib3.pyc
            │   ├── memcached.py
            │   ├── memcached.pyc
            │   ├── pooling.py
            │   ├── pooling.pyc
            │   ├── thrift.py
            │   └── thrift.pyc
            ├── connection_pool.py
            ├── connection_pool.pyc
            ├── exceptions.py
            ├── exceptions.pyc
            ├── helpers
            │   ├── __init__.py
            │   ├── __init__.pyc
            │   ├── test.py
            │   └── test.pyc
            ├── serializer.py
            ├── serializer.pyc
            ├── transport.py
            └── transport.pyc
        ├── lambda_function.py
        ├── lambda_function.zip
        ├── pdfminer-20110515-py2.7.egg-info
            ├── PKG-INFO
            ├── SOURCES.txt
            ├── dependency_links.txt
            ├── installed-files.txt
            └── top_level.txt
        ├── pdfminer
            ├── __init__.py
            ├── __init__.pyc
            ├── arcfour.py
            ├── arcfour.pyc
            ├── ascii85.py
            ├── ascii85.pyc
            ├── cmap
            │   ├── __init__.py
            │   └── __init__.pyc
            ├── cmapdb.py
            ├── cmapdb.pyc
            ├── converter.py
            ├── converter.pyc
            ├── encodingdb.py
            ├── encodingdb.pyc
            ├── fontmetrics.py
            ├── fontmetrics.pyc
            ├── glyphlist.py
            ├── glyphlist.pyc
            ├── latin_enc.py
            ├── latin_enc.pyc
            ├── layout.py
            ├── layout.pyc
            ├── lzw.py
            ├── lzw.pyc
            ├── pdfcolor.py
            ├── pdfcolor.pyc
            ├── pdfdevice.py
            ├── pdfdevice.pyc
            ├── pdffont.py
            ├── pdffont.pyc
            ├── pdfinterp.py
            ├── pdfinterp.pyc
            ├── pdfparser.py
            ├── pdfparser.pyc
            ├── pdftypes.py
            ├── pdftypes.pyc
            ├── psparser.py
            ├── psparser.pyc
            ├── rijndael.py
            ├── rijndael.pyc
            ├── runlength.py
            ├── runlength.pyc
            ├── utils.py
            └── utils.pyc
        ├── pkg_resources
            ├── __init__.py
            ├── __init__.pyc
            └── _vendor
            │   ├── __init__.py
            │   ├── __init__.pyc
            │   └── packaging
            │       ├── __about__.py
            │       ├── __about__.pyc
            │       ├── __init__.py
            │       ├── __init__.pyc
            │       ├── _compat.py
            │       ├── _compat.pyc
            │       ├── _structures.py
            │       ├── _structures.pyc
            │       ├── specifiers.py
            │       ├── specifiers.pyc
            │       ├── version.py
            │       └── version.pyc
        ├── setuptools-19.2.dist-info
            ├── DESCRIPTION.rst
            ├── METADATA
            ├── RECORD
            ├── WHEEL
            ├── dependency_links.txt
            ├── entry_points.txt
            ├── metadata.json
            ├── top_level.txt
            └── zip-safe
        ├── setuptools
            ├── __init__.py
            ├── __init__.pyc
            ├── archive_util.py
            ├── archive_util.pyc
            ├── cli-32.exe
            ├── cli-64.exe
            ├── cli-arm-32.exe
            ├── cli.exe
            ├── command
            │   ├── __init__.py
            │   ├── __init__.pyc
            │   ├── alias.py
            │   ├── alias.pyc
            │   ├── bdist_egg.py
            │   ├── bdist_egg.pyc
            │   ├── bdist_rpm.py
            │   ├── bdist_rpm.pyc
            │   ├── bdist_wininst.py
            │   ├── bdist_wininst.pyc
            │   ├── build_ext.py
            │   ├── build_ext.pyc
            │   ├── build_py.py
            │   ├── build_py.pyc
            │   ├── develop.py
            │   ├── develop.pyc
            │   ├── easy_install.py
            │   ├── easy_install.pyc
            │   ├── egg_info.py
            │   ├── egg_info.pyc
            │   ├── install.py
            │   ├── install.pyc
            │   ├── install_egg_info.py
            │   ├── install_egg_info.pyc
            │   ├── install_lib.py
            │   ├── install_lib.pyc
            │   ├── install_scripts.py
            │   ├── install_scripts.pyc
            │   ├── launcher manifest.xml
            │   ├── register.py
            │   ├── register.pyc
            │   ├── rotate.py
            │   ├── rotate.pyc
            │   ├── saveopts.py
            │   ├── saveopts.pyc
            │   ├── sdist.py
            │   ├── sdist.pyc
            │   ├── setopt.py
            │   ├── setopt.pyc
            │   ├── test.py
            │   ├── test.pyc
            │   ├── upload_docs.py
            │   └── upload_docs.pyc
            ├── compat.py
            ├── compat.pyc
            ├── depends.py
            ├── depends.pyc
            ├── dist.py
            ├── dist.pyc
            ├── extension.py
            ├── extension.pyc
            ├── gui-32.exe
            ├── gui-64.exe
            ├── gui-arm-32.exe
            ├── gui.exe
            ├── lib2to3_ex.py
            ├── lib2to3_ex.pyc
            ├── msvc9_support.py
            ├── msvc9_support.pyc
            ├── package_index.py
            ├── package_index.pyc
            ├── py26compat.py
            ├── py26compat.pyc
            ├── py27compat.py
            ├── py27compat.pyc
            ├── py31compat.py
            ├── py31compat.pyc
            ├── sandbox.py
            ├── sandbox.pyc
            ├── script (dev).tmpl
            ├── script.tmpl
            ├── site-patch.py
            ├── site-patch.pyc
            ├── ssl_support.py
            ├── ssl_support.pyc
            ├── unicode_utils.py
            ├── unicode_utils.pyc
            ├── utils.py
            ├── utils.pyc
            ├── version.py
            ├── version.pyc
            ├── windows_support.py
            └── windows_support.pyc
        ├── slate-0.3-py2.7.egg-info
            ├── PKG-INFO
            ├── SOURCES.txt
            ├── dependency_links.txt
            ├── installed-files.txt
            ├── requires.txt
            └── top_level.txt
        ├── slate
            ├── __init__.py
            ├── __init__.pyc
            ├── conftest.py
            ├── conftest.pyc
            ├── slate.py
            ├── slate.pyc
            ├── test_slate.py
            ├── test_slate.pyc
            ├── utils.py
            └── utils.pyc
        ├── urllib3-1.14.dist-info
            ├── DESCRIPTION.rst
            ├── METADATA
            ├── RECORD
            ├── WHEEL
            ├── metadata.json
            ├── pbr.json
            └── top_level.txt
        └── urllib3
            ├── __init__.py
            ├── __init__.pyc
            ├── _collections.py
            ├── _collections.pyc
            ├── connection.py
            ├── connection.pyc
            ├── connectionpool.py
            ├── connectionpool.pyc
            ├── contrib
                ├── __init__.py
                ├── __init__.pyc
                ├── appengine.py
                ├── appengine.pyc
                ├── ntlmpool.py
                ├── ntlmpool.pyc
                ├── pyopenssl.py
                ├── pyopenssl.pyc
                ├── socks.py
                └── socks.pyc
            ├── exceptions.py
            ├── exceptions.pyc
            ├── fields.py
            ├── fields.pyc
            ├── filepost.py
            ├── filepost.pyc
            ├── packages
                ├── __init__.py
                ├── __init__.pyc
                ├── ordered_dict.py
                ├── ordered_dict.pyc
                ├── six.py
                ├── six.pyc
                └── ssl_match_hostname
                │   ├── __init__.py
                │   ├── __init__.pyc
                │   ├── _implementation.py
                │   └── _implementation.pyc
            ├── poolmanager.py
            ├── poolmanager.pyc
            ├── request.py
            ├── request.pyc
            ├── response.py
            ├── response.pyc
            └── util
                ├── __init__.py
                ├── __init__.pyc
                ├── connection.py
                ├── connection.pyc
                ├── request.py
                ├── request.pyc
                ├── response.py
                ├── response.pyc
                ├── retry.py
                ├── retry.pyc
                ├── ssl_.py
                ├── ssl_.pyc
                ├── timeout.py
                ├── timeout.pyc
                ├── url.py
                └── url.pyc


/README.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | This code, written to be executed as an AWS Lambda function, uses the Slate module to extract the text from a PDF file, and then indexes that text to an ElasticSearch cluster. It is designed to be invoked when a PDF document is put to an S3 bucket.
 4 | 
 5 | 
 6 | A few implementation notes:
 7 | * Because this is just a simple PoC, the only text data index to Elasticsearch is on the first page 
 8 | * Play around with the Lambda timeout time to set something that works for document sizes you're placing in the S3 bucket
 9 | * For smaller PDF docs, I've observed memory utilization (in CWL) of low 10s of Mbytes
10 | * This assumes some familiarity with AWS Lambda basics (configuring events sources, invocation policies, etc)
11 | * Specify a suffix of 'pdf' to make sure it's only executing for pdf files
12 | 
13 | To be implemented:
14 | * Signing of POSTs to Elasticsearch endpoints using SigV4, instead of using python modules 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/_markerlib/__init__.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import ast
 3 |     from _markerlib.markers import default_environment, compile, interpret
 4 | except ImportError:
 5 |     if 'ast' in globals():
 6 |         raise
 7 |     def default_environment():
 8 |         return {}
 9 |     def compile(marker):
10 |         def marker_fn(environment=None, override=None):
11 |             # 'empty markers are True' heuristic won't install extra deps.
12 |             return not marker.strip()
13 |         marker_fn.__doc__ = marker
14 |         return marker_fn
15 |     def interpret(marker, environment=None, override=None):
16 |         return compile(marker)()
17 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/_markerlib/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/_markerlib/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/_markerlib/markers.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Interpret PEP 345 environment markers.
  3 | 
  4 | EXPR [in|==|!=|not in] EXPR [or|and] ...
  5 | 
  6 | where EXPR belongs to any of those:
  7 | 
  8 |     python_version = '%s.%s' % (sys.version_info[0], sys.version_info[1])
  9 |     python_full_version = sys.version.split()[0]
 10 |     os.name = os.name
 11 |     sys.platform = sys.platform
 12 |     platform.version = platform.version()
 13 |     platform.machine = platform.machine()
 14 |     platform.python_implementation = platform.python_implementation()
 15 |     a free string, like '2.6', or 'win32'
 16 | """
 17 | 
 18 | __all__ = ['default_environment', 'compile', 'interpret']
 19 | 
 20 | import ast
 21 | import os
 22 | import platform
 23 | import sys
 24 | import weakref
 25 | 
 26 | _builtin_compile = compile
 27 | 
 28 | try:
 29 |     from platform import python_implementation
 30 | except ImportError:
 31 |     if os.name == "java":
 32 |         # Jython 2.5 has ast module, but not platform.python_implementation() function.
 33 |         def python_implementation():
 34 |             return "Jython"
 35 |     else:
 36 |         raise
 37 | 
 38 | 
 39 | # restricted set of variables
 40 | _VARS = {'sys.platform': sys.platform,
 41 |          'python_version': '%s.%s' % sys.version_info[:2],
 42 |          # FIXME parsing sys.platform is not reliable, but there is no other
 43 |          # way to get e.g. 2.7.2+, and the PEP is defined with sys.version
 44 |          'python_full_version': sys.version.split(' ', 1)[0],
 45 |          'os.name': os.name,
 46 |          'platform.version': platform.version(),
 47 |          'platform.machine': platform.machine(),
 48 |          'platform.python_implementation': python_implementation(),
 49 |          'extra': None # wheel extension
 50 |         }
 51 | 
 52 | for var in list(_VARS.keys()):
 53 |     if '.' in var:
 54 |         _VARS[var.replace('.', '_')] = _VARS[var]
 55 | 
 56 | def default_environment():
 57 |     """Return copy of default PEP 385 globals dictionary."""
 58 |     return dict(_VARS)
 59 | 
 60 | class ASTWhitelist(ast.NodeTransformer):
 61 |     def __init__(self, statement):
 62 |         self.statement = statement # for error messages
 63 | 
 64 |     ALLOWED = (ast.Compare, ast.BoolOp, ast.Attribute, ast.Name, ast.Load, ast.Str)
 65 |     # Bool operations
 66 |     ALLOWED += (ast.And, ast.Or)
 67 |     # Comparison operations
 68 |     ALLOWED += (ast.Eq, ast.Gt, ast.GtE, ast.In, ast.Is, ast.IsNot, ast.Lt, ast.LtE, ast.NotEq, ast.NotIn)
 69 | 
 70 |     def visit(self, node):
 71 |         """Ensure statement only contains allowed nodes."""
 72 |         if not isinstance(node, self.ALLOWED):
 73 |             raise SyntaxError('Not allowed in environment markers.\n%s\n%s' %
 74 |                                (self.statement,
 75 |                                (' ' * node.col_offset) + '^'))
 76 |         return ast.NodeTransformer.visit(self, node)
 77 | 
 78 |     def visit_Attribute(self, node):
 79 |         """Flatten one level of attribute access."""
 80 |         new_node = ast.Name("%s.%s" % (node.value.id, node.attr), node.ctx)
 81 |         return ast.copy_location(new_node, node)
 82 | 
 83 | def parse_marker(marker):
 84 |     tree = ast.parse(marker, mode='eval')
 85 |     new_tree = ASTWhitelist(marker).generic_visit(tree)
 86 |     return new_tree
 87 | 
 88 | def compile_marker(parsed_marker):
 89 |     return _builtin_compile(parsed_marker, '<environment marker>', 'eval',
 90 |                    dont_inherit=True)
 91 | 
 92 | _cache = weakref.WeakValueDictionary()
 93 | 
 94 | def compile(marker):
 95 |     """Return compiled marker as a function accepting an environment dict."""
 96 |     try:
 97 |         return _cache[marker]
 98 |     except KeyError:
 99 |         pass
100 |     if not marker.strip():
101 |         def marker_fn(environment=None, override=None):
102 |             """"""
103 |             return True
104 |     else:
105 |         compiled_marker = compile_marker(parse_marker(marker))
106 |         def marker_fn(environment=None, override=None):
107 |             """override updates environment"""
108 |             if override is None:
109 |                 override = {}
110 |             if environment is None:
111 |                 environment = default_environment()
112 |             environment.update(override)
113 |             return eval(compiled_marker, environment)
114 |     marker_fn.__doc__ = marker
115 |     _cache[marker] = marker_fn
116 |     return _cache[marker]
117 | 
118 | def interpret(marker, environment=None):
119 |     return compile(marker)(environment)
120 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/_markerlib/markers.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/_markerlib/markers.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/distribute-0.7.3-py2.7.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.1
 2 | Name: distribute
 3 | Version: 0.7.3
 4 | Summary: distribute legacy wrapper
 5 | Home-page: http://packages.python.org/distribute
 6 | Author: The fellowship of the packaging
 7 | Author-email: distutils-sig@python.org
 8 | License: PSF or ZPL
 9 | Description: Distribute - legacy package
10 |         
11 |         This package is a simple compatibility layer that installs Setuptools 0.7+.
12 |         
13 | Keywords: CPAN PyPI distutils eggs package management
14 | Platform: UNKNOWN
15 | Classifier: Development Status :: 5 - Production/Stable
16 | Classifier: Intended Audience :: Developers
17 | Classifier: License :: OSI Approved :: Python Software Foundation License
18 | Classifier: License :: OSI Approved :: Zope Public License
19 | Classifier: Operating System :: OS Independent
20 | Classifier: Programming Language :: Python :: 2.4
21 | Classifier: Programming Language :: Python :: 2.5
22 | Classifier: Programming Language :: Python :: 2.6
23 | Classifier: Programming Language :: Python :: 2.7
24 | Classifier: Programming Language :: Python :: 3
25 | Classifier: Programming Language :: Python :: 3.1
26 | Classifier: Programming Language :: Python :: 3.2
27 | Classifier: Programming Language :: Python :: 3.3
28 | Classifier: Topic :: Software Development :: Libraries :: Python Modules
29 | Classifier: Topic :: System :: Archiving :: Packaging
30 | Classifier: Topic :: System :: Systems Administration
31 | Classifier: Topic :: Utilities
32 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/distribute-0.7.3-py2.7.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | MANIFEST.in
 2 | pkg_resources.py
 3 | setup.cfg
 4 | setup.py
 5 | distribute.egg-info/PKG-INFO
 6 | distribute.egg-info/SOURCES.txt
 7 | distribute.egg-info/dependency_links.txt
 8 | distribute.egg-info/requires.txt
 9 | distribute.egg-info/top_level.txt
10 | distribute.egg-info/zip-safe
11 | setuptools/__init__.py
12 | setuptools/archive_util.py
13 | setuptools/compat.py
14 | setuptools/depends.py
15 | setuptools/dist.py
16 | setuptools/extension.py
17 | setuptools/package_index.py
18 | setuptools/py24compat.py
19 | setuptools/py27compat.py
20 | setuptools/sandbox.py
21 | setuptools/script template (dev).py
22 | setuptools/script template.py
23 | setuptools/site-patch.py
24 | setuptools/ssl_support.py
25 | setuptools.egg-info/PKG-INFO
26 | setuptools.egg-info/SOURCES.txt
27 | setuptools.egg-info/dependency_links.txt
28 | setuptools.egg-info/entry_points.txt
29 | setuptools.egg-info/entry_points.txt.orig
30 | setuptools.egg-info/requires.txt
31 | setuptools.egg-info/requires.txt.orig
32 | setuptools.egg-info/top_level.txt
33 | setuptools.egg-info/zip-safe
34 | setuptools/command/__init__.py
35 | setuptools/command/alias.py
36 | setuptools/command/bdist_egg.py
37 | setuptools/command/bdist_rpm.py
38 | setuptools/command/bdist_wininst.py
39 | setuptools/command/build_ext.py
40 | setuptools/command/build_py.py
41 | setuptools/command/develop.py
42 | setuptools/command/easy_install.py
43 | setuptools/command/egg_info.py
44 | setuptools/command/install.py
45 | setuptools/command/install_egg_info.py
46 | setuptools/command/install_lib.py
47 | setuptools/command/install_scripts.py
48 | setuptools/command/launcher manifest.xml
49 | setuptools/command/register.py
50 | setuptools/command/rotate.py
51 | setuptools/command/saveopts.py
52 | setuptools/command/sdist.py
53 | setuptools/command/setopt.py
54 | setuptools/command/test.py
55 | setuptools/command/upload.py
56 | setuptools/command/upload_docs.py


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/distribute-0.7.3-py2.7.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/distribute-0.7.3-py2.7.egg-info/installed-files.txt:
--------------------------------------------------------------------------------
1 | ./
2 | dependency_links.txt
3 | PKG-INFO
4 | requires.txt
5 | SOURCES.txt
6 | top_level.txt
7 | zip-safe
8 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/distribute-0.7.3-py2.7.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | setuptools>=0.7


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/distribute-0.7.3-py2.7.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/distribute-0.7.3-py2.7.egg-info/zip-safe:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/easy_install.py:
--------------------------------------------------------------------------------
1 | """Run the EasyInstall command"""
2 | 
3 | if __name__ == '__main__':
4 |     from setuptools.command.easy_install import main
5 |     main()
6 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/easy_install.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/easy_install.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch-2.2.0.dist-info/DESCRIPTION.rst:
--------------------------------------------------------------------------------
  1 | Python Elasticsearch Client
  2 | ===========================
  3 | 
  4 | Official low-level client for Elasticsearch. Its goal is to provide common
  5 | ground for all Elasticsearch-related code in Python; because of this it tries
  6 | to be opinion-free and very extendable.
  7 | 
  8 | For a more high level client library with more limited scope, have a look at
  9 | `elasticsearch-dsl`_ - a more pythonic library sitting on top of
 10 | ``elasticsearch-py``.
 11 | 
 12 | It provides a more convenient and idiomatic way to write and manipulate
 13 | `queries`_. It stays close to the Elasticsearch JSON DSL, mirroring its
 14 | terminology and structure while exposing the whole range of the DSL from Python
 15 | either directly using defined classes or a queryset-like expressions.
 16 | 
 17 | It also provides an optional `persistence layer`_ for working with documents as
 18 | Python objects in an ORM-like fashion: defining mappings, retrieving and saving
 19 | documents, wrapping the document data in user-defined classes.
 20 | 
 21 | .. _elasticsearch-dsl: http://elasticsearch-dsl.rtfd.org/
 22 | .. _queries: http://elasticsearch-dsl.readthedocs.org/en/latest/search_dsl.html
 23 | .. _persistence layer: http://elasticsearch-dsl.readthedocs.org/en/latest/persistence.html#doctype
 24 | 
 25 | Compatibility
 26 | -------------
 27 | 
 28 | The library is compatible with all Elasticsearch versions since ``0.90.x`` but you
 29 | **have to use a matching major version**:
 30 | 
 31 | For **Elasticsearch 2.0** and later, use the major version 2 (``2.x.y``) of the
 32 | library.
 33 | 
 34 | For **Elasticsearch 1.0** and later, use the major version 1 (``1.x.y``) of the
 35 | library.
 36 | 
 37 | For **Elasticsearch 0.90.x**, use a version from ``0.4.x`` releases of the
 38 | library.
 39 | 
 40 | The recommended way to set your requirements in your `setup.py` or
 41 | `requirements.txt` is::
 42 | 
 43 |     # Elasticsearch 2.x
 44 |     elasticsearch>=2.0.0,<3.0.0
 45 | 
 46 |     # Elasticsearch 1.x
 47 |     elasticsearch>=1.0.0,<2.0.0
 48 | 
 49 |     # Elasticsearch 0.90.x
 50 |     elasticsearch<1.0.0
 51 | 
 52 | The development is happening on ``master`` and ``1.x`` branches, respectively.
 53 | 
 54 | Installation
 55 | ------------
 56 | 
 57 | Install the ``elasticsearch`` package with `pip
 58 | <https://pypi.python.org/pypi/elasticsearch>`_::
 59 | 
 60 |     pip install elasticsearch
 61 | 
 62 | 
 63 | Example use
 64 | -----------
 65 | 
 66 | Simple use-case::
 67 | 
 68 |     >>> from datetime import datetime
 69 |     >>> from elasticsearch import Elasticsearch
 70 | 
 71 |     # by default we connect to localhost:9200
 72 |     >>> es = Elasticsearch()
 73 | 
 74 |     # create an index in elasticsearch, ignore status code 400 (index already exists)
 75 |     >>> es.indices.create(index='my-index', ignore=400)
 76 |     {u'acknowledged': True}
 77 | 
 78 |     # datetimes will be serialized
 79 |     >>> es.index(index="my-index", doc_type="test-type", id=42, body={"any": "data", "timestamp": datetime.now()})
 80 |     {u'_id': u'42', u'_index': u'my-index', u'_type': u'test-type', u'_version': 1, u'ok': True}
 81 | 
 82 |     # but not deserialized
 83 |     >>> es.get(index="my-index", doc_type="test-type", id=42)['_source']
 84 |     {u'any': u'data', u'timestamp': u'2013-05-12T19:45:31.804229'}
 85 | 
 86 | `Full documentation`_.
 87 | 
 88 | .. _Full documentation: http://elasticsearch-py.rtfd.org/
 89 | 
 90 | 
 91 | Features
 92 | --------
 93 | 
 94 | The client's features include:
 95 | 
 96 |  * translating basic Python data types to and from json (datetimes are not
 97 |    decoded for performance reasons)
 98 |  * configurable automatic discovery of cluster nodes
 99 |  * persistent connections
100 |  * load balancing (with pluggable selection strategy) across all available nodes
101 |  * failed connection penalization (time based - failed connections won't be
102 |    retried until a timeout is reached)
103 |  * support for ssl and http authentication
104 |  * thread safety
105 |  * pluggable architecture
106 | 
107 | 
108 | License
109 | -------
110 | 
111 | Copyright 2015 Elasticsearch
112 | 
113 | Licensed under the Apache License, Version 2.0 (the "License");
114 | you may not use this file except in compliance with the License.
115 | You may obtain a copy of the License at
116 | 
117 |     http://www.apache.org/licenses/LICENSE-2.0
118 | 
119 | Unless required by applicable law or agreed to in writing, software
120 | distributed under the License is distributed on an "AS IS" BASIS,
121 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
122 | See the License for the specific language governing permissions and
123 | limitations under the License.
124 | 
125 | Build status
126 | ------------
127 | 
128 | .. image:: https://secure.travis-ci.org/elastic/elasticsearch-py.png
129 |    :target: http://travis-ci.org/#!/elastic/elasticsearch-py
130 | 
131 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch-2.2.0.dist-info/RECORD:
--------------------------------------------------------------------------------
 1 | elasticsearch/compat.py,sha256=MLDabdJN3w5KkeVykvh0rpUO1SFFds0mBnDWZJ30SdI,312
 2 | elasticsearch/transport.py,sha256=El6Li6eHmkFZxeLMfWiZb-nf-zBnsXBwQR8KRamiAiU,15277
 3 | elasticsearch/exceptions.py,sha256=RGOIdnNlpKYB9-Hy-gyMIALNq3CxOdzPu4bySZN97q8,3127
 4 | elasticsearch/connection_pool.py,sha256=re2lYiUtyO3CSzkJLZc8L2ooVZ76bH7rKm1oQ_5SG9Q,9607
 5 | elasticsearch/serializer.py,sha256=qqyY9_JaaPuTw1UAbL99CSKllbvXOHF0VfCXcupM4sk,2287
 6 | elasticsearch/__init__.py,sha256=QLTdsCcV3Hy1XLmKE6qVe8BBGGMajdpRU4FWhQTmXzc,837
 7 | elasticsearch/client/cluster.py,sha256=ez_hB0Ywkn8s1QSh-Sdolr9EEaKXPiVQ1T8CU1v0udQ,7619
 8 | elasticsearch/client/cat.py,sha256=2egI0ak1z9u8ruR_rp9BqQxLOpDltt4dgL1o99QnN_Q,15962
 9 | elasticsearch/client/snapshot.py,sha256=w-cteBMUMYV0DOU1oJXFqRnhfvGjb0dJreSbj2Uy6Vs,7609
10 | elasticsearch/client/indices.py,sha256=juLLjjOtNo0BL6DzMcyXS2Ch_PrwL24M89Ut7XewezM,52688
11 | elasticsearch/client/utils.py,sha256=O9ELpuzQIShFON8UBhTo87T4vzQTQwHga8oVcskiVOk,2645
12 | elasticsearch/client/__init__.py,sha256=q4_DfE7ImodKSbvdndnYM8xNCP2lCCSeOX-zb6vhu5U,63969
13 | elasticsearch/client/nodes.py,sha256=P8YUL-BppKfn4tdw3gL5hEbne5nw5hzu_oivmQVXd0I,4805
14 | elasticsearch/connection/memcached.py,sha256=R8fqOEuTypsph4R0dD0GDsGz4s6vcQXEQTERZ3lRvEg,2878
15 | elasticsearch/connection/thrift.py,sha256=wOwxDADn03OTK06Dc9Uhgj8BGhwXcy-eAvYWRp7fGps,3872
16 | elasticsearch/connection/pooling.py,sha256=19x-kzyqndWJnnMGZ1ucoZvpMIliH3q3oJLe34OEROA,798
17 | elasticsearch/connection/http_urllib3.py,sha256=62rcfrXs2ygZxEC2j7j1xbvtwBmYKvamFl9sBfYFXzo,5029
18 | elasticsearch/connection/base.py,sha256=g4CNB4wa5ztJpGTEcx-k--KOU0I6h1q0ln4n6qZX4ik,4265
19 | elasticsearch/connection/http_requests.py,sha256=oBGf1xLPE4MInMaT5H2dJY14_aTh9y18eNmXqNsCkYY,3659
20 | elasticsearch/connection/__init__.py,sha256=V50xojadYNqjTkV1KdY4GQnxfG-hrxr-FkmQknvYjJo,127
21 | elasticsearch/connection/esthrift/Rest.py,sha256=OPIqP9KqsokuAcUnwvBXZaVUGr1qQTs4AicYoOIMGFI,6561
22 | elasticsearch/connection/esthrift/constants.py,sha256=hz320xjF2ljmXqRIVWUkjJMXnT9BegVGF2K7hnqsdHs,276
23 | elasticsearch/connection/esthrift/ttypes.py,sha256=XDpDtdxrrvBVMMXsiu3jLONRdYz8rcHJ9GSuJz0MPzc,12040
24 | elasticsearch/connection/esthrift/__init__.py,sha256=oX5iaOEMn-aGOlg5vEc9N8Fid1iNh0zWE5WvNgi7ysI,42
25 | elasticsearch/helpers/test.py,sha256=iyaFosPjWYupZOcpk6kYdIzpw6-qNGkfWuK-G-iFtYQ,1839
26 | elasticsearch/helpers/__init__.py,sha256=pQhagNrZI-N9pqdgPt5eFFFILKMThvyhq__1eTQ0brQ,14052
27 | elasticsearch-2.2.0.dist-info/pbr.json,sha256=7LDsmS2o3dnopaClq6T03zl5eegwepeyDWzJEMMl-Jg,47
28 | elasticsearch-2.2.0.dist-info/top_level.txt,sha256=Jp2bLWq49skvCN4YCZsg1Hfn_NDLgleC-x-Bn01_HgM,14
29 | elasticsearch-2.2.0.dist-info/WHEEL,sha256=AvR0WeTpDaxT645bl5FQxUK6NPsTls2ttpcGJg3j1Xg,110
30 | elasticsearch-2.2.0.dist-info/METADATA,sha256=lDAm41FzDBFrm537DYEpGSUlma2UUcijQkuTvmyCADQ,5261
31 | elasticsearch-2.2.0.dist-info/DESCRIPTION.rst,sha256=oVlNdYwfM0G2v0gLd_xVaUcGpbX5n4cmTM96JZb9DWo,4221
32 | elasticsearch-2.2.0.dist-info/metadata.json,sha256=hWhX3zVXpCY1lIXYrdvVzTrrpZ5W11LD5W_dJYLdKoc,1272
33 | elasticsearch-2.2.0.dist-info/RECORD,,
34 | elasticsearch/transport.pyc,,
35 | elasticsearch/client/cluster.pyc,,
36 | elasticsearch/connection/pooling.pyc,,
37 | elasticsearch/compat.pyc,,
38 | elasticsearch/client/utils.pyc,,
39 | elasticsearch/client/snapshot.pyc,,
40 | elasticsearch/connection/esthrift/ttypes.pyc,,
41 | elasticsearch/client/indices.pyc,,
42 | elasticsearch/connection/__init__.pyc,,
43 | elasticsearch/connection/esthrift/__init__.pyc,,
44 | elasticsearch/__init__.pyc,,
45 | elasticsearch/connection/thrift.pyc,,
46 | elasticsearch/client/nodes.pyc,,
47 | elasticsearch/connection/esthrift/Rest.pyc,,
48 | elasticsearch/client/__init__.pyc,,
49 | elasticsearch/client/cat.pyc,,
50 | elasticsearch/connection/http_urllib3.pyc,,
51 | elasticsearch/helpers/__init__.pyc,,
52 | elasticsearch/connection/memcached.pyc,,
53 | elasticsearch/serializer.pyc,,
54 | elasticsearch/connection_pool.pyc,,
55 | elasticsearch/connection/esthrift/constants.pyc,,
56 | elasticsearch/connection/http_requests.pyc,,
57 | elasticsearch/exceptions.pyc,,
58 | elasticsearch/connection/base.pyc,,
59 | elasticsearch/helpers/test.pyc,,
60 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch-2.2.0.dist-info/WHEEL:
--------------------------------------------------------------------------------
1 | Wheel-Version: 1.0
2 | Generator: bdist_wheel (0.24.0)
3 | Root-Is-Purelib: true
4 | Tag: py2-none-any
5 | Tag: py3-none-any
6 | 
7 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch-2.2.0.dist-info/metadata.json:
--------------------------------------------------------------------------------
1 | {"test_requires": [{"requires": ["requests (>=1.0.0,<3.0.0)", "nose", "coverage", "mock", "pyaml", "nosexcover"]}], "generator": "bdist_wheel (0.24.0)", "extensions": {"python.details": {"contacts": [{"email": "honza.kral@gmail.com", "role": "author", "name": "Honza Kr\u00e1l"}], "project_urls": {"Home": "https://github.com/elastic/elasticsearch-py"}, "document_names": {"description": "DESCRIPTION.rst"}}}, "name": "elasticsearch", "version": "2.2.0", "classifiers": ["Development Status :: 5 - Production/Stable", "License :: OSI Approved :: Apache Software License", "Intended Audience :: Developers", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy"], "run_requires": [{"requires": ["urllib3 (>=1.8,<2.0)"]}], "extras": [], "license": "Apache License, Version 2.0", "summary": "Python client for Elasticsearch", "metadata_version": "2.0"}


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch-2.2.0.dist-info/pbr.json:
--------------------------------------------------------------------------------
1 | {"git_version": "14f5b35", "is_release": false}


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch-2.2.0.dist-info/top_level.txt:
--------------------------------------------------------------------------------
1 | elasticsearch
2 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | VERSION = (2, 2, 0)
 4 | __version__ = VERSION
 5 | __versionstr__ = '.'.join(map(str, VERSION))
 6 | 
 7 | import sys
 8 | 
 9 | if (2, 7) <= sys.version_info < (3, 2):
10 |     # On Python 2.7 and Python3 < 3.2, install no-op handler to silence
11 |     # `No handlers could be found for logger "elasticsearch"` message per
12 |     # <https://docs.python.org/2/howto/logging.html#configuring-logging-for-a-library>
13 |     import logging
14 |     logger = logging.getLogger('elasticsearch')
15 |     logger.addHandler(logging.NullHandler())
16 | 
17 | from .client import Elasticsearch
18 | from .transport import Transport
19 | from .connection_pool import ConnectionPool, ConnectionSelector, \
20 |     RoundRobinSelector
21 | from .serializer import JSONSerializer
22 | from .connection import Connection, RequestsHttpConnection, \
23 |     Urllib3HttpConnection
24 | from .exceptions import *
25 | 
26 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/client/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/client/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/client/cat.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/client/cat.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/client/cluster.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/client/cluster.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/client/indices.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/client/indices.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/client/nodes.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/client/nodes.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/client/snapshot.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/client/snapshot.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/client/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | 
 3 | import weakref
 4 | from datetime import date, datetime
 5 | from functools import wraps
 6 | from ..compat import string_types, quote_plus
 7 | 
 8 | # parts of URL to be omitted
 9 | SKIP_IN_PATH = (None, '', b'', [], ())
10 | 
11 | def _escape(value):
12 |     """
13 |     Escape a single value of a URL string or a query parameter. If it is a list
14 |     or tuple, turn it into a comma-separated string first.
15 |     """
16 | 
17 |     # make sequences into comma-separated stings
18 |     if isinstance(value, (list, tuple)):
19 |         value = ','.join(value)
20 | 
21 |     # dates and datetimes into isoformat
22 |     elif isinstance(value, (date, datetime)):
23 |         value = value.isoformat()
24 | 
25 |     # make bools into true/false strings
26 |     elif isinstance(value, bool):
27 |         value = str(value).lower()
28 | 
29 |     # encode strings to utf-8
30 |     if isinstance(value, string_types):
31 |         try:
32 |             return value.encode('utf-8')
33 |         except UnicodeDecodeError:
34 |             # Python 2 and str, no need to re-encode
35 |             pass
36 |     
37 |     return str(value)
38 | 
39 | def _make_path(*parts):
40 |     """
41 |     Create a URL string from parts, omit all `None` values and empty strings.
42 |     Convert lists nad tuples to comma separated values.
43 |     """
44 |     #TODO: maybe only allow some parts to be lists/tuples ?
45 |     return '/' + '/'.join(
46 |         # preserve ',' and '*' in url for nicer URLs in logs
47 |         quote_plus(_escape(p), b',*') for p in parts if p not in SKIP_IN_PATH)
48 | 
49 | # parameters that apply to all methods
50 | GLOBAL_PARAMS = ('pretty', 'format', 'filter_path')
51 | 
52 | def query_params(*es_query_params):
53 |     """
54 |     Decorator that pops all accepted parameters from method's kwargs and puts
55 |     them in the params argument.
56 |     """
57 |     def _wrapper(func):
58 |         @wraps(func)
59 |         def _wrapped(*args, **kwargs):
60 |             params = kwargs.pop('params', {})
61 |             for p in es_query_params + GLOBAL_PARAMS:
62 |                 if p in kwargs:
63 |                     params[p] = _escape(kwargs.pop(p))
64 | 
65 |             # don't treat ignore and request_timeout as other params to avoid escaping
66 |             for p in ('ignore', 'request_timeout'):
67 |                 if p in kwargs:
68 |                     params[p] = kwargs.pop(p)
69 |             return func(*args, params=params, **kwargs)
70 |         return _wrapped
71 |     return _wrapper
72 | 
73 | 
74 | class NamespacedClient(object):
75 |     def __init__(self, client):
76 |         self.client = client
77 | 
78 |     @property
79 |     def transport(self):
80 |         return self.client.transport
81 | 
82 | class AddonClient(NamespacedClient):
83 |     @classmethod
84 |     def infect_client(cls, client):
85 |         addon = cls(weakref.proxy(client))
86 |         setattr(client, cls.namespace, addon)
87 |         return client
88 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/client/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/client/utils.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/compat.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | PY2 = sys.version_info[0] == 2
 4 | 
 5 | if PY2:
 6 |     string_types = basestring,
 7 |     from urllib import quote_plus, urlencode
 8 |     from urlparse import  urlparse
 9 |     from itertools import imap as map
10 | else:
11 |     string_types = str, bytes
12 |     from urllib.parse import quote_plus, urlencode, urlparse
13 |     map = map
14 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/compat.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/compat.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Connection
2 | from .http_requests import RequestsHttpConnection
3 | from .http_urllib3 import Urllib3HttpConnection
4 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/connection/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/base.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | try:
  3 |     import simplejson as json
  4 | except ImportError:
  5 |     import json
  6 | 
  7 | from ..exceptions import TransportError, HTTP_EXCEPTIONS
  8 | 
  9 | logger = logging.getLogger('elasticsearch')
 10 | 
 11 | # create the elasticsearch.trace logger, but only set propagate to False if the
 12 | # logger hasn't already been configured
 13 | _tracer_already_configured = 'elasticsearch.trace' in logging.Logger.manager.loggerDict
 14 | tracer = logging.getLogger('elasticsearch.trace')
 15 | if not _tracer_already_configured:
 16 |     tracer.propagate = False
 17 | 
 18 | 
 19 | class Connection(object):
 20 |     """
 21 |     Class responsible for maintaining a connection to an Elasticsearch node. It
 22 |     holds persistent connection pool to it and it's main interface
 23 |     (`perform_request`) is thread-safe.
 24 | 
 25 |     Also responsible for logging.
 26 |     """
 27 |     transport_schema = 'http'
 28 | 
 29 |     def __init__(self, host='localhost', port=9200, url_prefix='', timeout=10, **kwargs):
 30 |         """
 31 |         :arg host: hostname of the node (default: localhost)
 32 |         :arg port: port to use (integer, default: 9200)
 33 |         :arg url_prefix: optional url prefix for elasticsearch
 34 |         :arg timeout: default timeout in seconds (float, default: 10)
 35 |         """
 36 |         self.host = '%s://%s:%s' % (self.transport_schema, host, port)
 37 |         if url_prefix:
 38 |             url_prefix = '/' + url_prefix.strip('/')
 39 |         self.url_prefix = url_prefix
 40 |         self.timeout = timeout
 41 | 
 42 |     def __repr__(self):
 43 |         return '<%s: %s>' % (self.__class__.__name__, self.host)
 44 | 
 45 |     def log_request_success(self, method, full_url, path, body, status_code, response, duration):
 46 |         """ Log a successful API call.  """
 47 |         #  TODO: optionally pass in params instead of full_url and do urlencode only when needed
 48 |         def _pretty_json(data):
 49 |             # pretty JSON in tracer curl logs
 50 |             try:
 51 |                 return json.dumps(json.loads(data), sort_keys=True, indent=2, separators=(',', ': ')).replace("'", r'\u0027')
 52 |             except (ValueError, TypeError):
 53 |                 # non-json data or a bulk request
 54 |                 return data
 55 | 
 56 |         # body has already been serialized to utf-8, deserialize it for logging
 57 |         # TODO: find a better way to avoid (de)encoding the body back and forth
 58 |         if body:
 59 |             body = body.decode('utf-8')
 60 | 
 61 |         logger.info(
 62 |             '%s %s [status:%s request:%.3fs]', method, full_url,
 63 |             status_code, duration
 64 |         )
 65 |         logger.debug('> %s', body)
 66 |         logger.debug('< %s', response)
 67 | 
 68 |         if tracer.isEnabledFor(logging.INFO):
 69 |             # include pretty in trace curls
 70 |             path = path.replace('?', '?pretty&', 1) if '?' in path else path + '?pretty'
 71 |             if self.url_prefix:
 72 |                 path = path.replace(self.url_prefix, '', 1)
 73 |             tracer.info("curl -X%s 'http://localhost:9200%s' -d '%s'", method, path, _pretty_json(body) if body else '')
 74 | 
 75 |         if tracer.isEnabledFor(logging.DEBUG):
 76 |             tracer.debug('#[%s] (%.3fs)\n#%s', status_code, duration, _pretty_json(response).replace('\n', '\n#') if response else '')
 77 | 
 78 |     def log_request_fail(self, method, full_url, body, duration, status_code=None, exception=None):
 79 |         """ Log an unsuccessful API call.  """
 80 |         logger.warning(
 81 |             '%s %s [status:%s request:%.3fs]', method, full_url,
 82 |             status_code or 'N/A', duration, exc_info=exception is not None
 83 |         )
 84 | 
 85 |         # body has already been serialized to utf-8, deserialize it for logging
 86 |         # TODO: find a better way to avoid (de)encoding the body back and forth
 87 |         if body:
 88 |             body = body.decode('utf-8')
 89 | 
 90 |         logger.debug('> %s', body)
 91 | 
 92 |     def _raise_error(self, status_code, raw_data):
 93 |         """ Locate appropriate exception and raise it. """
 94 |         error_message = raw_data
 95 |         additional_info = None
 96 |         try:
 97 |             additional_info = json.loads(raw_data)
 98 |             error_message = additional_info.get('error', error_message)
 99 |             if isinstance(error_message, dict) and 'type' in error_message:
100 |                 error_message = error_message['type']
101 |         except:
102 |             # we don't care what went wrong
103 |             pass
104 | 
105 |         raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info)
106 | 
107 | 
108 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/base.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/connection/base.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/esthrift/Rest.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/connection/esthrift/Rest.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/esthrift/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['ttypes', 'constants', 'Rest']
2 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/esthrift/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/connection/esthrift/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/esthrift/constants.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Autogenerated by Thrift Compiler (0.9.0)
 3 | #
 4 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
 5 | #
 6 | #  options string: py:new_style=true,utf8strings=true
 7 | #
 8 | 
 9 | from thrift.Thrift import TType, TMessageType, TException, TApplicationException
10 | from ttypes import *
11 | 
12 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/esthrift/constants.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/connection/esthrift/constants.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/esthrift/ttypes.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/connection/esthrift/ttypes.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/http_requests.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import warnings
 3 | try:
 4 |     import requests
 5 |     REQUESTS_AVAILABLE = True
 6 | except ImportError:
 7 |     REQUESTS_AVAILABLE = False
 8 | 
 9 | from .base import Connection
10 | from ..exceptions import ConnectionError, ImproperlyConfigured, ConnectionTimeout, SSLError
11 | from ..compat import urlencode, string_types
12 | 
13 | class RequestsHttpConnection(Connection):
14 |     """
15 |     Connection using the `requests` library.
16 | 
17 |     :arg http_auth: optional http auth information as either ':' separated
18 |         string or a tuple. Any value will be passed into requests as `auth`.
19 |     :arg use_ssl: use ssl for the connection if `True`
20 |     :arg verify_certs: whether to verify SSL certificates
21 |     :arg ca_certs: optional path to CA bundle. By default standard requests'
22 |         bundle will be used.
23 |     :arg client_cert: path to the file containing the private key and the
24 |         certificate
25 |     """
26 |     def __init__(self, host='localhost', port=9200, http_auth=None,
27 |         use_ssl=False, verify_certs=False, ca_certs=None, client_cert=None,
28 |         **kwargs):
29 |         if not REQUESTS_AVAILABLE:
30 |             raise ImproperlyConfigured("Please install requests to use RequestsHttpConnection.")
31 | 
32 |         super(RequestsHttpConnection, self).__init__(host= host, port=port, **kwargs)
33 |         self.session = requests.session()
34 |         if http_auth is not None:
35 |             if isinstance(http_auth, (tuple, list)):
36 |                 http_auth = tuple(http_auth)
37 |             elif isinstance(http_auth, string_types):
38 |                 http_auth = tuple(http_auth.split(':', 1))
39 |             self.session.auth = http_auth
40 |         self.base_url = 'http%s://%s:%d%s' % (
41 |             's' if use_ssl else '',
42 |             host, port, self.url_prefix
43 |         )
44 |         self.session.verify = verify_certs
45 |         self.session.cert = client_cert
46 |         if ca_certs:
47 |             if not verify_certs:
48 |                 raise ImproperlyConfigured("You cannot pass CA certificates when verify SSL is off.")
49 |             self.session.verify = ca_certs
50 | 
51 |         if use_ssl and not verify_certs:
52 |             warnings.warn(
53 |                 'Connecting to %s using SSL with verify_certs=False is insecure.' % self.base_url)
54 | 
55 |     def perform_request(self, method, url, params=None, body=None, timeout=None, ignore=()):
56 |         url = self.base_url + url
57 |         if params:
58 |             url = '%s?%s' % (url, urlencode(params or {}))
59 | 
60 |         start = time.time()
61 |         try:
62 |             response = self.session.request(method, url, data=body, timeout=timeout or self.timeout)
63 |             duration = time.time() - start
64 |             raw_data = response.text
65 |         except requests.exceptions.SSLError as e:
66 |             self.log_request_fail(method, url, body, time.time() - start, exception=e)
67 |             raise SSLError('N/A', str(e), e)
68 |         except requests.Timeout as e:
69 |             self.log_request_fail(method, url, body, time.time() - start, exception=e)
70 |             raise ConnectionTimeout('TIMEOUT', str(e), e)
71 |         except requests.ConnectionError as e:
72 |             self.log_request_fail(method, url, body, time.time() - start, exception=e)
73 |             raise ConnectionError('N/A', str(e), e)
74 | 
75 |         # raise errors based on http status codes, let the client handle those if needed
76 |         if not (200 <= response.status_code < 300) and response.status_code not in ignore:
77 |             self.log_request_fail(method, url, body, duration, response.status_code)
78 |             self._raise_error(response.status_code, raw_data)
79 | 
80 |         self.log_request_success(method, url, response.request.path_url, body, response.status_code, raw_data, duration)
81 | 
82 |         return response.status_code, response.headers, raw_data
83 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/http_requests.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/connection/http_requests.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/http_urllib3.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/connection/http_urllib3.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/memcached.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | try:
 3 |     import simplejson as json
 4 | except ImportError:
 5 |     import json
 6 | 
 7 | from ..exceptions import TransportError, ConnectionError, ImproperlyConfigured
 8 | from ..compat import urlencode
 9 | from .pooling import PoolingConnection
10 | 
11 | class MemcachedConnection(PoolingConnection):
12 |     """
13 |     Client using the `pylibmc` python library to communicate with elasticsearch
14 |     using the memcached protocol. Requires plugin in the cluster.
15 | 
16 |     See https://github.com/elasticsearch/elasticsearch-transport-memcached for more details.
17 |     """
18 |     transport_schema = 'memcached'
19 | 
20 |     method_map = {
21 |         'PUT': 'set',
22 |         'POST': 'set',
23 |         'DELETE': 'delete',
24 |         'HEAD': 'get',
25 |         'GET': 'get',
26 |     }
27 | 
28 |     def __init__(self, host='localhost', port=11211, **kwargs):
29 |         try:
30 |             import pylibmc
31 |         except ImportError:
32 |             raise ImproperlyConfigured("You need to install pylibmc to use the MemcachedConnection class.")
33 |         super(MemcachedConnection, self).__init__(host=host, port=port, **kwargs)
34 |         self._make_connection = lambda: pylibmc.Client(['%s:%s' % (host, port)], behaviors={"tcp_nodelay": True})
35 | 
36 |     def perform_request(self, method, url, params=None, body=None, timeout=None, ignore=()):
37 |         mc = self._get_connection()
38 |         url = self.url_prefix + url
39 |         if params:
40 |             url = '%s?%s' % (url, urlencode(params or {}))
41 |         full_url = self.host + url
42 | 
43 |         mc_method = self.method_map.get(method, 'get')
44 | 
45 |         start = time.time()
46 |         try:
47 |             status = 200
48 |             if mc_method == 'set':
49 |                 # no response from set commands
50 |                 response = ''
51 |                 if not json.dumps(mc.set(url, body)):
52 |                     status = 500
53 |             else:
54 |                 response = mc.get(url)
55 | 
56 |             duration = time.time() - start
57 |             if response:
58 |                 response = response.decode('utf-8')
59 |         except Exception as e:
60 |             self.log_request_fail(method, full_url, body, time.time() - start, exception=e)
61 |             raise ConnectionError('N/A', str(e), e)
62 |         finally:
63 |             self._release_connection(mc)
64 | 
65 |         # try not to load the json every time
66 |         if response and response[0] == '{' and ('"status"' in response or '"error"' in response):
67 |             data = json.loads(response)
68 |             if 'status' in data and isinstance(data['status'], int):
69 |                 status = data['status']
70 |             elif 'error' in data:
71 |                 raise TransportError('N/A', data['error'])
72 | 
73 |         if not (200 <= status < 300) and status not in ignore:
74 |             self.log_request_fail(method, url, body, duration, status)
75 |             self._raise_error(status, response)
76 | 
77 |         self.log_request_success(method, full_url, url, body, status,
78 |             response, duration)
79 | 
80 |         return status, {}, response
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/memcached.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/connection/memcached.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/pooling.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import queue
 3 | except ImportError:
 4 |     import Queue as queue
 5 | from .base import Connection
 6 | 
 7 | 
 8 | class PoolingConnection(Connection):
 9 |     """
10 |     Base connection class for connections that use libraries without thread
11 |     safety and no capacity for connection pooling. To use this just implement a
12 |     ``_make_connection`` method that constructs a new connection and returns
13 |     it.
14 |     """
15 |     def __init__(self, *args, **kwargs):
16 |         self._free_connections = queue.Queue()
17 |         super(PoolingConnection, self).__init__(*args, **kwargs)
18 | 
19 |     def _get_connection(self):
20 |         try:
21 |             return self._free_connections.get_nowait()
22 |         except queue.Empty:
23 |             return self._make_connection()
24 | 
25 |     def _release_connection(self, con):
26 |         self._free_connections.put(con)
27 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/pooling.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/connection/pooling.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/thrift.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from socket import timeout as SocketTimeout
  3 | from socket import error as SocketError
  4 | import time
  5 | import logging
  6 | 
  7 | try:
  8 |     from .esthrift import Rest
  9 |     from .esthrift.ttypes import Method, RestRequest
 10 | 
 11 |     from thrift.transport import TTransport, TSocket, TSSLSocket
 12 |     from thrift.protocol import TBinaryProtocol
 13 |     from thrift.Thrift import TException
 14 |     THRIFT_AVAILABLE = True
 15 | except ImportError:
 16 |     THRIFT_AVAILABLE = False
 17 | 
 18 | from ..exceptions import ConnectionError, ImproperlyConfigured, ConnectionTimeout
 19 | from .pooling import PoolingConnection
 20 | 
 21 | logger = logging.getLogger('elasticsearch')
 22 | 
 23 | class ThriftConnection(PoolingConnection):
 24 |     """
 25 |     This connection class is deprecated and may be removed in future versions.
 26 | 
 27 |     Connection using the `thrift` protocol to communicate with elasticsearch.
 28 | 
 29 |     See https://github.com/elasticsearch/elasticsearch-transport-thrift for additional info.
 30 |     """
 31 |     transport_schema = 'thrift'
 32 | 
 33 |     def __init__(self, host='localhost', port=9500, framed_transport=False, use_ssl=False, **kwargs):
 34 |         """
 35 |         :arg framed_transport: use `TTransport.TFramedTransport` instead of
 36 |             `TTransport.TBufferedTransport`
 37 |         """
 38 |         if not THRIFT_AVAILABLE:
 39 |             raise ImproperlyConfigured("Thrift is not available.")
 40 | 
 41 |         super(ThriftConnection, self).__init__(host=host, port=port, **kwargs)
 42 |         self._framed_transport = framed_transport
 43 |         self._tsocket_class = TSocket.TSocket
 44 |         if use_ssl:
 45 |             self._tsocket_class = TSSLSocket.TSSLSocket 
 46 |         self._tsocket_args = (host, port)
 47 | 
 48 |     def _make_connection(self):
 49 |         socket = self._tsocket_class(*self._tsocket_args)
 50 |         socket.setTimeout(self.timeout * 1000.0)
 51 |         if self._framed_transport:
 52 |             transport = TTransport.TFramedTransport(socket)
 53 |         else:
 54 |             transport = TTransport.TBufferedTransport(socket)
 55 | 
 56 |         protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
 57 |         client = Rest.Client(protocol)
 58 |         client.transport = transport
 59 |         transport.open()
 60 |         return client
 61 | 
 62 |     def perform_request(self, method, url, params=None, body=None, timeout=None, ignore=()):
 63 |         request = RestRequest(method=Method._NAMES_TO_VALUES[method.upper()], uri=url,
 64 |                     parameters=params, body=body)
 65 | 
 66 |         start = time.time()
 67 |         tclient = None
 68 |         try:
 69 |             tclient = self._get_connection()
 70 |             response = tclient.execute(request)
 71 |             duration = time.time() - start
 72 |         except SocketTimeout as e:
 73 |             self.log_request_fail(method, url, body, time.time() - start, exception=e)
 74 |             raise ConnectionTimeout('TIMEOUT', str(e), e)
 75 |         except (TException, SocketError) as e:
 76 |             self.log_request_fail(method, url, body, time.time() - start, exception=e)
 77 |             if tclient:
 78 |                 try:
 79 |                     # try closing transport socket
 80 |                     tclient.transport.close()
 81 |                 except Exception as e:
 82 |                     logger.warning(
 83 |                         'Exception %s occured when closing a failed thrift connection.',
 84 |                         e, exc_info=True
 85 |                     )
 86 |             raise ConnectionError('N/A', str(e), e)
 87 | 
 88 |         self._release_connection(tclient)
 89 | 
 90 |         if not (200 <= response.status < 300) and response.status not in ignore:
 91 |             self.log_request_fail(method, url, body, duration, response.status)
 92 |             self._raise_error(response.status, response.body)
 93 | 
 94 |         self.log_request_success(method, url, url, body, response.status,
 95 |             response.body, duration)
 96 | 
 97 |         headers = {}
 98 |         if response.headers:
 99 |             headers = dict((k.lower(), v) for k, v in response.headers.items())
100 |         return response.status, headers, response.body or ''
101 | 
102 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection/thrift.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/connection/thrift.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/connection_pool.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/connection_pool.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/exceptions.py:
--------------------------------------------------------------------------------
  1 | __all__ = [
  2 |     'ImproperlyConfigured', 'ElasticsearchException', 'SerializationError',
  3 |     'TransportError', 'NotFoundError', 'ConflictError', 'RequestError', 'ConnectionError',
  4 |     'SSLError', 'ConnectionTimeout'
  5 | ]
  6 | 
  7 | class ImproperlyConfigured(Exception):
  8 |     """
  9 |     Exception raised when the config passed to the client is inconsistent or invalid.
 10 |     """
 11 | 
 12 | 
 13 | class ElasticsearchException(Exception):
 14 |     """
 15 |     Base class for all exceptions raised by this package's operations (doesn't
 16 |     apply to :class:`~elasticsearch.ImproperlyConfigured`).
 17 |     """
 18 | 
 19 | 
 20 | class SerializationError(ElasticsearchException):
 21 |     """
 22 |     Data passed in failed to serialize properly in the ``Serializer`` being
 23 |     used.
 24 |     """
 25 | 
 26 | 
 27 | class TransportError(ElasticsearchException):
 28 |     """
 29 |     Exception raised when ES returns a non-OK (>=400) HTTP status code. Or when
 30 |     an actual connection error happens; in that case the ``status_code`` will
 31 |     be set to ``'N/A'``.
 32 |     """
 33 |     @property
 34 |     def status_code(self):
 35 |         """
 36 |         The HTTP status code of the response that precipitated the error or
 37 |         ``'N/A'`` if not applicable.
 38 |         """
 39 |         return self.args[0]
 40 | 
 41 |     @property
 42 |     def error(self):
 43 |         """ A string error message. """
 44 |         return self.args[1]
 45 | 
 46 |     @property
 47 |     def info(self):
 48 |         """ Dict of returned error info from ES, where available. """
 49 |         return self.args[2]
 50 | 
 51 |     def __str__(self):
 52 |         cause = ''
 53 |         try:
 54 |             if self.info:
 55 |                 cause = ', %r' % self.info['error']['root_cause'][0]['reason']
 56 |         except LookupError:
 57 |             pass
 58 |         return 'TransportError(%s, %r%s)' % (self.status_code, self.error, cause)
 59 | 
 60 | 
 61 | class ConnectionError(TransportError):
 62 |     """
 63 |     Error raised when there was an exception while talking to ES. Original
 64 |     exception from the underlying :class:`~elasticsearch.Connection`
 65 |     implementation is available as ``.info.``
 66 |     """
 67 |     def __str__(self):
 68 |         return 'ConnectionError(%s) caused by: %s(%s)' % (
 69 |             self.error, self.info.__class__.__name__, self.info)
 70 | 
 71 | 
 72 | class SSLError(ConnectionError):
 73 |     """ Error raised when encountering SSL errors. """
 74 | 
 75 | 
 76 | class ConnectionTimeout(ConnectionError):
 77 |     """ A network timeout. Doesn't cause a node retry by default. """
 78 |     def __str__(self):
 79 |         return 'ConnectionTimeout caused by - %s(%s)' % (
 80 |             self.info.__class__.__name__, self.info)
 81 | 
 82 | 
 83 | class NotFoundError(TransportError):
 84 |     """ Exception representing a 404 status code. """
 85 | 
 86 | 
 87 | class ConflictError(TransportError):
 88 |     """ Exception representing a 409 status code. """
 89 | 
 90 | 
 91 | class RequestError(TransportError):
 92 |     """ Exception representing a 400 status code. """
 93 | 
 94 | 
 95 | class AuthenticationException(TransportError):
 96 |     """ Exception representing a 401 status code. """
 97 | 
 98 | 
 99 | class AuthorizationException(TransportError):
100 |     """ Exception representing a 403 status code. """
101 | 
102 | # more generic mappings from status_code to python exceptions
103 | HTTP_EXCEPTIONS = {
104 |     400: RequestError,
105 |     401: AuthenticationException,
106 |     403: AuthorizationException,
107 |     404: NotFoundError,
108 |     409: ConflictError,
109 | }
110 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/exceptions.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/exceptions.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/helpers/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/helpers/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/helpers/test.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import os
 3 | try:
 4 |     # python 2.6
 5 |     from unittest2 import TestCase, SkipTest
 6 | except ImportError:
 7 |     from unittest import TestCase, SkipTest
 8 | 
 9 | from elasticsearch import Elasticsearch
10 | from elasticsearch.exceptions import ConnectionError
11 | 
12 | def get_test_client(nowait=False, **kwargs):
13 |     # construct kwargs from the environment
14 |     kw = {'timeout': 30}
15 |     if 'TEST_ES_CONNECTION' in os.environ:
16 |         from elasticsearch import connection
17 |         kw['connection_class'] = getattr(connection, os.environ['TEST_ES_CONNECTION'])
18 | 
19 |     kw.update(kwargs)
20 |     client = Elasticsearch([os.environ.get('TEST_ES_SERVER', {})], **kw)
21 | 
22 |     # wait for yellow status
23 |     for _ in range(1 if nowait else 100):
24 |         try:
25 |             client.cluster.health(wait_for_status='yellow')
26 |             return client
27 |         except ConnectionError:
28 |             time.sleep(.1)
29 |     else:
30 |         # timeout
31 |         raise SkipTest("Elasticsearch failed to start.")
32 | 
33 | def _get_version(version_string):
34 |     if '.' not in version_string:
35 |         return ()
36 |     version = version_string.strip().split('.')
37 |     return tuple(int(v) if v.isdigit() else 999 for v in version)
38 | 
39 | class ElasticsearchTestCase(TestCase):
40 |     @staticmethod
41 |     def _get_client():
42 |         return get_test_client()
43 | 
44 |     @classmethod
45 |     def setUpClass(cls):
46 |         super(ElasticsearchTestCase, cls).setUpClass()
47 |         cls.client = cls._get_client()
48 | 
49 |     def tearDown(self):
50 |         super(ElasticsearchTestCase, self).tearDown()
51 |         self.client.indices.delete(index='*')
52 |         self.client.indices.delete_template(name='*', ignore=404)
53 | 
54 |     @property
55 |     def es_version(self):
56 |         if not hasattr(self, '_es_version'):
57 |             version_string = self.client.info()['version']['number']
58 |             self._es_version = _get_version(version_string)
59 |         return self._es_version
60 | 
61 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/helpers/test.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/helpers/test.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/serializer.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import simplejson as json
 3 | except ImportError:
 4 |     import json
 5 | import uuid
 6 | from datetime import date, datetime
 7 | from decimal import Decimal
 8 | 
 9 | from .exceptions import SerializationError, ImproperlyConfigured
10 | from .compat import string_types
11 | 
12 | class TextSerializer(object):
13 |     mimetype = 'text/plain'
14 | 
15 |     def loads(self, s):
16 |         return s
17 | 
18 |     def dumps(self, data):
19 |         if isinstance(data, string_types):
20 |             return data
21 | 
22 |         raise SerializationError('Cannot serialize %r into text.' % data)
23 | 
24 | class JSONSerializer(object):
25 |     mimetype = 'application/json'
26 | 
27 |     def default(self, data):
28 |         if isinstance(data, (date, datetime)):
29 |             return data.isoformat()
30 |         elif isinstance(data, Decimal):
31 |             return float(data)
32 |         elif isinstance(data, uuid.UUID):
33 |             return str(data)
34 |         raise TypeError("Unable to serialize %r (type: %s)" % (data, type(data)))
35 | 
36 |     def loads(self, s):
37 |         try:
38 |             return json.loads(s)
39 |         except (ValueError, TypeError) as e:
40 |             raise SerializationError(s, e)
41 | 
42 |     def dumps(self, data):
43 |         # don't serialize strings
44 |         if isinstance(data, string_types):
45 |             return data
46 | 
47 |         try:
48 |             return json.dumps(data, default=self.default, ensure_ascii=False)
49 |         except (ValueError, TypeError) as e:
50 |             raise SerializationError(data, e)
51 | 
52 | DEFAULT_SERIALIZERS = {
53 |     JSONSerializer.mimetype: JSONSerializer(),
54 |     TextSerializer.mimetype: TextSerializer(),
55 | }
56 | 
57 | class Deserializer(object):
58 |     def __init__(self, serializers, default_mimetype='application/json'):
59 |         try:
60 |             self.default = serializers[default_mimetype]
61 |         except KeyError:
62 |             raise ImproperlyConfigured('Cannot find default serializer (%s)' % default_mimetype)
63 |         self.serializers = serializers
64 | 
65 |     def loads(self, s, mimetype=None):
66 |         if not mimetype:
67 |             deserializer = self.default
68 |         else:
69 |             # split out charset
70 |             mimetype = mimetype.split(';', 1)[0]
71 |             try:
72 |                 deserializer = self.serializers[mimetype]
73 |             except KeyError:
74 |                 raise SerializationError('Unknown mimetype, unable to deserialize: %s' % mimetype)
75 | 
76 |         return deserializer.loads(s)
77 | 
78 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/serializer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/serializer.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/elasticsearch/transport.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/elasticsearch/transport.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/lambda_function.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import json
 4 | import urllib
 5 | import boto3
 6 | import slate # using a specific version of PDFminer due to incompatibilities of certain versions
 7 | import elasticsearch
 8 | import datetime
 9 | 
10 | es_endpoint = 'search-mattsona-pdf-repo-2vzllafnl4d5oeu647oyu6yy6i.us-west-2.es.amazonaws.com'
11 | es_index = 'pdf_text_extracts'
12 | es_type = 'document'
13 | 
14 | print('Loading function')
15 | 
16 | s3 = boto3.client('s3')
17 | 
18 | # prepare a dict to hold our document data
19 | doc_data = {}
20 | doc_data['insert_time'] = str(datetime.datetime.isoformat(datetime.datetime.now()))
21 | 
22 | 
23 | def lambda_handler(event, context):
24 |     #print("Received event: " + json.dumps(event, indent=2))
25 | 
26 |     # Get the object from the event and show its content type
27 |     bucket = event['Records'][0]['s3']['bucket']['name']
28 |     object_key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key']).decode('utf8')
29 |     try:
30 |         # get the file data from s3
31 |         temp_pdf_file = open('/tmp/tempfile.pdf', 'w') # create a file handler for the temporary file
32 |         response = s3.get_object(Bucket=bucket, Key=object_key)
33 |         print("CONTENT TYPE: " + response['ContentType'])
34 |         # return response['ContentType']
35 |         temp_pdf_file.write(response['Body'].read()) # write the object data to a local file; will be passed to slate
36 |         temp_pdf_file.close() # close the temporary file for now
37 | 
38 |         # pull the text from the temporary PDF file using slate
39 |         print("Extracting data from: " + object_key)
40 |         with open('/tmp/tempfile.pdf') as temp_pdf_file:
41 | 
42 |             doc = slate.PDF(temp_pdf_file)
43 | 
44 |         # store document data to dict
45 |         doc_data['source_pdf_name'] = object_key
46 |         doc_data['document_text'] = doc[0] # we're only worried about page 1 at this point
47 | 
48 |     except Exception as e:
49 |         print(e)
50 |         print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(object_key, bucket))
51 |         raise e
52 | 
53 |     # put the data in ES
54 |     try:
55 |         es = elasticsearch.Elasticsearch([{'host': es_endpoint, 'port': 443, 'use_ssl': True}]) # hold off on validating certs
56 |         es_response = es.index(index=es_index, doc_type=es_type, body=doc_data)
57 |         print('Data posted to ES: ' + str(es_response))
58 | 
59 |     except Exception as e:
60 |         print('Data post to ES failed: ' + str(e))
61 |         raise e   
62 | 
63 |     return "Done"
64 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/lambda_function.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/lambda_function.zip


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer-20110515-py2.7.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.1
 2 | Name: pdfminer
 3 | Version: 20110515
 4 | Summary: PDF parser and analyzer
 5 | Home-page: http://www.unixuser.org/~euske/python/pdfminer/index.html
 6 | Author: Yusuke Shinyama
 7 | Author-email: yusuke at cs dot nyu dot edu
 8 | License: MIT/X
 9 | Description: PDFMiner is a tool for extracting information from PDF documents.
10 |         Unlike other PDF-related tools, it focuses entirely on getting 
11 |         and analyzing text data. PDFMiner allows to obtain
12 |         the exact location of texts in a page, as well as 
13 |         other information such as fonts or lines.
14 |         It includes a PDF converter that can transform PDF files
15 |         into other text formats (such as HTML). It has an extensible
16 |         PDF parser that can be used for other purposes instead of text analysis.
17 | Keywords: pdf parser,pdf converter,layout analysis,text mining
18 | Platform: UNKNOWN
19 | Classifier: Development Status :: 4 - Beta
20 | Classifier: Environment :: Console
21 | Classifier: Intended Audience :: Developers
22 | Classifier: Intended Audience :: Science/Research
23 | Classifier: License :: OSI Approved :: MIT License
24 | Classifier: Topic :: Text Processing
25 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer-20110515-py2.7.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | README.txt
 2 | pdfminer/__init__.py
 3 | pdfminer/arcfour.py
 4 | pdfminer/ascii85.py
 5 | pdfminer/cmapdb.py
 6 | pdfminer/converter.py
 7 | pdfminer/encodingdb.py
 8 | pdfminer/fontmetrics.py
 9 | pdfminer/glyphlist.py
10 | pdfminer/latin_enc.py
11 | pdfminer/layout.py
12 | pdfminer/lzw.py
13 | pdfminer/pdfcolor.py
14 | pdfminer/pdfdevice.py
15 | pdfminer/pdffont.py
16 | pdfminer/pdfinterp.py
17 | pdfminer/pdfparser.py
18 | pdfminer/pdftypes.py
19 | pdfminer/psparser.py
20 | pdfminer/rijndael.py
21 | pdfminer/runlength.py
22 | pdfminer/utils.py
23 | pdfminer.egg-info/PKG-INFO
24 | pdfminer.egg-info/SOURCES.txt
25 | pdfminer.egg-info/dependency_links.txt
26 | pdfminer.egg-info/top_level.txt
27 | pdfminer/cmap/__init__.py
28 | tools/dumppdf.py
29 | tools/latin2ascii.py
30 | tools/pdf2txt.py


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer-20110515-py2.7.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer-20110515-py2.7.egg-info/installed-files.txt:
--------------------------------------------------------------------------------
 1 | ../pdfminer/__init__.py
 2 | ../pdfminer/arcfour.py
 3 | ../pdfminer/ascii85.py
 4 | ../pdfminer/cmapdb.py
 5 | ../pdfminer/converter.py
 6 | ../pdfminer/encodingdb.py
 7 | ../pdfminer/fontmetrics.py
 8 | ../pdfminer/glyphlist.py
 9 | ../pdfminer/latin_enc.py
10 | ../pdfminer/layout.py
11 | ../pdfminer/lzw.py
12 | ../pdfminer/pdfcolor.py
13 | ../pdfminer/pdfdevice.py
14 | ../pdfminer/pdffont.py
15 | ../pdfminer/pdfinterp.py
16 | ../pdfminer/pdfparser.py
17 | ../pdfminer/pdftypes.py
18 | ../pdfminer/psparser.py
19 | ../pdfminer/rijndael.py
20 | ../pdfminer/runlength.py
21 | ../pdfminer/utils.py
22 | ../pdfminer/cmap/__init__.py
23 | ../pdfminer/__init__.pyc
24 | ../pdfminer/arcfour.pyc
25 | ../pdfminer/ascii85.pyc
26 | ../pdfminer/cmapdb.pyc
27 | ../pdfminer/converter.pyc
28 | ../pdfminer/encodingdb.pyc
29 | ../pdfminer/fontmetrics.pyc
30 | ../pdfminer/glyphlist.pyc
31 | ../pdfminer/latin_enc.pyc
32 | ../pdfminer/layout.pyc
33 | ../pdfminer/lzw.pyc
34 | ../pdfminer/pdfcolor.pyc
35 | ../pdfminer/pdfdevice.pyc
36 | ../pdfminer/pdffont.pyc
37 | ../pdfminer/pdfinterp.pyc
38 | ../pdfminer/pdfparser.pyc
39 | ../pdfminer/pdftypes.pyc
40 | ../pdfminer/psparser.pyc
41 | ../pdfminer/rijndael.pyc
42 | ../pdfminer/runlength.pyc
43 | ../pdfminer/utils.pyc
44 | ../pdfminer/cmap/__init__.pyc
45 | ./
46 | dependency_links.txt
47 | PKG-INFO
48 | SOURCES.txt
49 | top_level.txt
50 | ../../../bin/dumppdf.py
51 | ../../../bin/latin2ascii.py
52 | ../../../bin/pdf2txt.py
53 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer-20110515-py2.7.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | pdfminer
2 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python2
2 | __version__ = '20110515'
3 | 
4 | if __name__ == '__main__': print __version__
5 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/arcfour.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | 
 3 | """ Python implementation of Arcfour encryption algorithm.
 4 | 
 5 | This code is in the public domain.
 6 | 
 7 | """
 8 | 
 9 | ##  Arcfour
10 | ##
11 | class Arcfour(object):
12 | 
13 |     """
14 |     >>> Arcfour('Key').process('Plaintext').encode('hex')
15 |     'bbf316e8d940af0ad3'
16 |     >>> Arcfour('Wiki').process('pedia').encode('hex')
17 |     '1021bf0420'
18 |     >>> Arcfour('Secret').process('Attack at dawn').encode('hex')
19 |     '45a01f645fc35b383552544b9bf5'
20 |     """
21 | 
22 |     def __init__(self, key):
23 |         s = range(256)
24 |         j = 0
25 |         klen = len(key)
26 |         for i in xrange(256):
27 |             j = (j + s[i] + ord(key[i % klen])) % 256
28 |             (s[i], s[j]) = (s[j], s[i])
29 |         self.s = s
30 |         (self.i, self.j) = (0, 0)
31 |         return
32 | 
33 |     def process(self, data):
34 |         (i, j) = (self.i, self.j)
35 |         s = self.s
36 |         r = ''
37 |         for c in data:
38 |             i = (i+1) % 256
39 |             j = (j+s[i]) % 256
40 |             (s[i], s[j]) = (s[j], s[i])
41 |             k = s[(s[i]+s[j]) % 256]
42 |             r += chr(ord(c) ^ k)
43 |         (self.i, self.j) = (i, j)
44 |         return r
45 | 
46 | # test
47 | if __name__ == '__main__':
48 |     import doctest
49 |     doctest.testmod()
50 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/arcfour.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/arcfour.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/ascii85.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | 
 3 | """ Python implementation of ASCII85/ASCIIHex decoder (Adobe version).
 4 | 
 5 | This code is in the public domain.
 6 | 
 7 | """
 8 | 
 9 | import re
10 | import struct
11 | 
12 | # ascii85decode(data)
13 | def ascii85decode(data):
14 |     """
15 |     In ASCII85 encoding, every four bytes are encoded with five ASCII
16 |     letters, using 85 different types of characters (as 256**4 < 85**5).
17 |     When the length of the original bytes is not a multiple of 4, a special
18 |     rule is used for round up.
19 |     
20 |     The Adobe's ASCII85 implementation is slightly different from
21 |     its original in handling the last characters.
22 |     
23 |     The sample string is taken from:
24 |       http://en.wikipedia.org/w/index.php?title=Ascii85
25 |     
26 |     >>> ascii85decode('9jqo^BlbD-BleB1DJ+*+F(f,q')
27 |     'Man is distinguished'
28 |     >>> ascii85decode('E,9)oF*2M7/c~>')
29 |     'pleasure.'
30 |     """
31 |     n = b = 0
32 |     out = ''
33 |     for c in data:
34 |         if '!' <= c and c <= 'u':
35 |             n += 1
36 |             b = b*85+(ord(c)-33)
37 |             if n == 5:
38 |                 out += struct.pack('>L',b)
39 |                 n = b = 0
40 |         elif c == 'z':
41 |             assert n == 0
42 |             out += '\0\0\0\0'
43 |         elif c == '~':
44 |             if n:
45 |                 for _ in range(5-n):
46 |                     b = b*85+84
47 |                 out += struct.pack('>L',b)[:n-1]
48 |             break
49 |     return out
50 | 
51 | # asciihexdecode(data)
52 | hex_re = re.compile(r'([a-f\d]{2})', re.IGNORECASE)
53 | trail_re = re.compile(r'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
54 | def asciihexdecode(data):
55 |     """
56 |     ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
57 |     For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
58 |     ASCIIHexDecode filter produces one byte of binary data. All white-space
59 |     characters are ignored. A right angle bracket character (>) indicates
60 |     EOD. Any other characters will cause an error. If the filter encounters
61 |     the EOD marker after reading an odd number of hexadecimal digits, it
62 |     will behave as if a 0 followed the last digit.
63 |     
64 |     >>> asciihexdecode('61 62 2e6364   65')
65 |     'ab.cde'
66 |     >>> asciihexdecode('61 62 2e6364   657>')
67 |     'ab.cdep'
68 |     >>> asciihexdecode('7>')
69 |     'p'
70 |     """
71 |     decode = (lambda hx: chr(int(hx, 16)))
72 |     out = map(decode, hex_re.findall(data))
73 |     m = trail_re.search(data)
74 |     if m:
75 |         out.append(decode("%c0" % m.group(1)))
76 |     return ''.join(out)
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     import doctest
81 |     doctest.testmod()
82 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/ascii85.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/ascii85.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/cmap/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/cmap/__init__.py


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/cmap/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/cmap/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/cmapdb.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/cmapdb.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/converter.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/converter.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/encodingdb.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | 
 3 | import re
 4 | from psparser import PSLiteral
 5 | from glyphlist import glyphname2unicode
 6 | from latin_enc import ENCODING
 7 | 
 8 | 
 9 | ##  name2unicode
10 | ##
11 | STRIP_NAME = re.compile(r'[0-9]+')
12 | def name2unicode(name):
13 |     """Converts Adobe glyph names to Unicode numbers."""
14 |     if name in glyphname2unicode:
15 |         return glyphname2unicode[name]
16 |     m = STRIP_NAME.search(name)
17 |     if not m: raise KeyError(name)
18 |     return unichr(int(m.group(0)))
19 | 
20 | 
21 | ##  EncodingDB
22 | ##
23 | class EncodingDB(object):
24 | 
25 |     std2unicode = {}
26 |     mac2unicode = {}
27 |     win2unicode = {}
28 |     pdf2unicode = {}
29 |     for (name,std,mac,win,pdf) in ENCODING:
30 |         c = name2unicode(name)
31 |         if std: std2unicode[std] = c
32 |         if mac: mac2unicode[mac] = c
33 |         if win: win2unicode[win] = c
34 |         if pdf: pdf2unicode[pdf] = c
35 | 
36 |     encodings = {
37 |       'StandardEncoding': std2unicode,
38 |       'MacRomanEncoding': mac2unicode,
39 |       'WinAnsiEncoding': win2unicode,
40 |       'PDFDocEncoding': pdf2unicode,
41 |       }
42 | 
43 |     @classmethod
44 |     def get_encoding(klass, name, diff=None):
45 |         cid2unicode = klass.encodings.get(name, klass.std2unicode)
46 |         if diff:
47 |             cid2unicode = cid2unicode.copy()
48 |             cid = 0
49 |             for x in diff:
50 |                 if isinstance(x, int):
51 |                     cid = x
52 |                 elif isinstance(x, PSLiteral):
53 |                     try:
54 |                         cid2unicode[cid] = name2unicode(x.name)
55 |                     except KeyError:
56 |                         pass
57 |                     cid += 1
58 |         return cid2unicode
59 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/encodingdb.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/encodingdb.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/fontmetrics.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/fontmetrics.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/glyphlist.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/glyphlist.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/latin_enc.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/latin_enc.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/layout.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/layout.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/lzw.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | import sys
  3 | try:
  4 |     from cStringIO import StringIO
  5 | except ImportError:
  6 |     from StringIO import StringIO
  7 | 
  8 | 
  9 | ##  LZWDecoder
 10 | ##
 11 | class LZWDecoder(object):
 12 | 
 13 |     debug = 0
 14 | 
 15 |     def __init__(self, fp):
 16 |         self.fp = fp
 17 |         self.buff = 0
 18 |         self.bpos = 8
 19 |         self.nbits = 9
 20 |         self.table = None
 21 |         self.prevbuf = None
 22 |         return
 23 | 
 24 |     def readbits(self, bits):
 25 |         v = 0
 26 |         while 1:
 27 |             # the number of remaining bits we can get from the current buffer.
 28 |             r = 8-self.bpos
 29 |             if bits <= r:
 30 |                 # |-----8-bits-----|
 31 |                 # |-bpos-|-bits-|  |
 32 |                 # |      |----r----|
 33 |                 v = (v<<bits) | ((self.buff>>(r-bits)) & ((1<<bits)-1))
 34 |                 self.bpos += bits
 35 |                 break
 36 |             else:
 37 |                 # |-----8-bits-----|
 38 |                 # |-bpos-|---bits----...
 39 |                 # |      |----r----|
 40 |                 v = (v<<r) | (self.buff & ((1<<r)-1))
 41 |                 bits -= r
 42 |                 x = self.fp.read(1)
 43 |                 if not x: raise EOFError
 44 |                 self.buff = ord(x)
 45 |                 self.bpos = 0
 46 |         return v
 47 | 
 48 |     def feed(self, code):
 49 |         x = ''
 50 |         if code == 256:
 51 |             self.table = [ chr(c) for c in xrange(256) ] # 0-255
 52 |             self.table.append(None) # 256
 53 |             self.table.append(None) # 257
 54 |             self.prevbuf = ''
 55 |             self.nbits = 9
 56 |         elif code == 257:
 57 |             pass
 58 |         elif not self.prevbuf:
 59 |             x = self.prevbuf = self.table[code]
 60 |         else:
 61 |             if code < len(self.table):
 62 |                 x = self.table[code]
 63 |                 self.table.append(self.prevbuf+x[0])
 64 |             else:
 65 |                 self.table.append(self.prevbuf+self.prevbuf[0])
 66 |                 x = self.table[code]
 67 |             l = len(self.table)
 68 |             if l == 511:
 69 |                 self.nbits = 10
 70 |             elif l == 1023:
 71 |                 self.nbits = 11
 72 |             elif l == 2047:
 73 |                 self.nbits = 12
 74 |             self.prevbuf = x
 75 |         return x
 76 | 
 77 |     def run(self):
 78 |         while 1:
 79 |             try:
 80 |                 code = self.readbits(self.nbits)
 81 |             except EOFError:
 82 |                 break
 83 |             x = self.feed(code)
 84 |             yield x
 85 |             if self.debug:
 86 |                 print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' %
 87 |                                      (self.nbits, code, x, self.table[258:]))
 88 |         return
 89 | 
 90 | # lzwdecode
 91 | def lzwdecode(data):
 92 |     """
 93 |     >>> lzwdecode('\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01')
 94 |     '\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42'
 95 |     """
 96 |     fp = StringIO(data)
 97 |     return ''.join(LZWDecoder(fp).run())
 98 | 
 99 | if __name__ == '__main__':
100 |     import doctest
101 |     doctest.testmod()
102 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/lzw.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/lzw.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/pdfcolor.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | from psparser import LIT
 3 | 
 4 | 
 5 | ##  PDFColorSpace
 6 | ##
 7 | LITERAL_DEVICE_GRAY = LIT('DeviceGray')
 8 | LITERAL_DEVICE_RGB = LIT('DeviceRGB')
 9 | LITERAL_DEVICE_CMYK = LIT('DeviceCMYK')
10 | 
11 | class PDFColorSpace(object):
12 | 
13 |     def __init__(self, name, ncomponents):
14 |         self.name = name
15 |         self.ncomponents = ncomponents
16 |         return
17 | 
18 |     def __repr__(self):
19 |         return '<PDFColorSpace: %s, ncomponents=%d>' % (self.name, self.ncomponents)
20 | 
21 | 
22 | PREDEFINED_COLORSPACE = dict(
23 |   (name, PDFColorSpace(name,n)) for (name,n) in {
24 |   'CalRGB': 3,
25 |   'CalGray': 1,
26 |   'Lab': 3,
27 |   'DeviceRGB': 3,
28 |   'DeviceCMYK': 4,
29 |   'DeviceGray': 1,
30 |   'Separation': 1,
31 |   'Indexed': 1,
32 |   'Pattern': 1,
33 |   }.iteritems())
34 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/pdfcolor.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/pdfcolor.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/pdfdevice.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/pdfdevice.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/pdffont.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/pdffont.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/pdfinterp.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/pdfinterp.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/pdfparser.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/pdfparser.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/pdftypes.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/pdftypes.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/psparser.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/psparser.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/rijndael.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/rijndael.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/runlength.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | #
 3 | # RunLength decoder (Adobe version) implementation based on PDF Reference
 4 | # version 1.4 section 3.3.4.
 5 | #
 6 | #  * public domain *
 7 | #
 8 | 
 9 | import sys
10 | 
11 | def rldecode(data):
12 |     """
13 |     RunLength decoder (Adobe version) implementation based on PDF Reference
14 |     version 1.4 section 3.3.4:
15 |         The RunLengthDecode filter decodes data that has been encoded in a
16 |         simple byte-oriented format based on run length. The encoded data
17 |         is a sequence of runs, where each run consists of a length byte
18 |         followed by 1 to 128 bytes of data. If the length byte is in the
19 |         range 0 to 127, the following length + 1 (1 to 128) bytes are
20 |         copied literally during decompression. If length is in the range
21 |         129 to 255, the following single byte is to be copied 257 - length
22 |         (2 to 128) times during decompression. A length value of 128
23 |         denotes EOD.
24 |     >>> s = "\x05123456\xfa7\x04abcde\x80junk"
25 |     >>> rldecode(s)
26 |     '1234567777777abcde'
27 |     """
28 |     decoded = []
29 |     i=0
30 |     while i < len(data):
31 |         #print "data[%d]=:%d:" % (i,ord(data[i]))
32 |         length = ord(data[i])
33 |         if length == 128:
34 |             break
35 |         if length >= 0 and length < 128:
36 |             run = data[i+1:(i+1)+(length+1)]
37 |             #print "length=%d, run=%s" % (length+1,run)
38 |             decoded.append(run)
39 |             i = (i+1) + (length+1)
40 |         if length > 128:
41 |             run = data[i+1]*(257-length)
42 |             #print "length=%d, run=%s" % (257-length,run)
43 |             decoded.append(run)
44 |             i = (i+1) + 1
45 |     return ''.join(decoded)
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     import doctest
50 |     doctest.testmod()
51 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/runlength.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/runlength.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pdfminer/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pdfminer/utils.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pkg_resources/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pkg_resources/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pkg_resources/_vendor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pkg_resources/_vendor/__init__.py


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pkg_resources/_vendor/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pkg_resources/_vendor/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/__about__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2014 Donald Stufft
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from __future__ import absolute_import, division, print_function
15 | 
16 | __all__ = [
17 |     "__title__", "__summary__", "__uri__", "__version__", "__author__",
18 |     "__email__", "__license__", "__copyright__",
19 | ]
20 | 
21 | __title__ = "packaging"
22 | __summary__ = "Core utilities for Python packages"
23 | __uri__ = "https://github.com/pypa/packaging"
24 | 
25 | __version__ = "15.3"
26 | 
27 | __author__ = "Donald Stufft"
28 | __email__ = "donald@stufft.io"
29 | 
30 | __license__ = "Apache License, Version 2.0"
31 | __copyright__ = "Copyright 2014 %s" % __author__
32 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/__about__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/__about__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2014 Donald Stufft
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from __future__ import absolute_import, division, print_function
15 | 
16 | from .__about__ import (
17 |     __author__, __copyright__, __email__, __license__, __summary__, __title__,
18 |     __uri__, __version__
19 | )
20 | 
21 | __all__ = [
22 |     "__title__", "__summary__", "__uri__", "__version__", "__author__",
23 |     "__email__", "__license__", "__copyright__",
24 | ]
25 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/_compat.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2014 Donald Stufft
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from __future__ import absolute_import, division, print_function
15 | 
16 | import sys
17 | 
18 | 
19 | PY2 = sys.version_info[0] == 2
20 | PY3 = sys.version_info[0] == 3
21 | 
22 | # flake8: noqa
23 | 
24 | if PY3:
25 |     string_types = str,
26 | else:
27 |     string_types = basestring,
28 | 
29 | 
30 | def with_metaclass(meta, *bases):
31 |     """
32 |     Create a base class with a metaclass.
33 |     """
34 |     # This requires a bit of explanation: the basic idea is to make a dummy
35 |     # metaclass for one level of class instantiation that replaces itself with
36 |     # the actual metaclass.
37 |     class metaclass(meta):
38 |         def __new__(cls, name, this_bases, d):
39 |             return meta(name, bases, d)
40 |     return type.__new__(metaclass, 'temporary_class', (), {})
41 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/_compat.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/_compat.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/_structures.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2014 Donald Stufft
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from __future__ import absolute_import, division, print_function
15 | 
16 | 
17 | class Infinity(object):
18 | 
19 |     def __repr__(self):
20 |         return "Infinity"
21 | 
22 |     def __hash__(self):
23 |         return hash(repr(self))
24 | 
25 |     def __lt__(self, other):
26 |         return False
27 | 
28 |     def __le__(self, other):
29 |         return False
30 | 
31 |     def __eq__(self, other):
32 |         return isinstance(other, self.__class__)
33 | 
34 |     def __ne__(self, other):
35 |         return not isinstance(other, self.__class__)
36 | 
37 |     def __gt__(self, other):
38 |         return True
39 | 
40 |     def __ge__(self, other):
41 |         return True
42 | 
43 |     def __neg__(self):
44 |         return NegativeInfinity
45 | 
46 | Infinity = Infinity()
47 | 
48 | 
49 | class NegativeInfinity(object):
50 | 
51 |     def __repr__(self):
52 |         return "-Infinity"
53 | 
54 |     def __hash__(self):
55 |         return hash(repr(self))
56 | 
57 |     def __lt__(self, other):
58 |         return True
59 | 
60 |     def __le__(self, other):
61 |         return True
62 | 
63 |     def __eq__(self, other):
64 |         return isinstance(other, self.__class__)
65 | 
66 |     def __ne__(self, other):
67 |         return not isinstance(other, self.__class__)
68 | 
69 |     def __gt__(self, other):
70 |         return False
71 | 
72 |     def __ge__(self, other):
73 |         return False
74 | 
75 |     def __neg__(self):
76 |         return Infinity
77 | 
78 | NegativeInfinity = NegativeInfinity()
79 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/_structures.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/_structures.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/specifiers.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/specifiers.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/version.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/pkg_resources/_vendor/packaging/version.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools-19.2.dist-info/WHEEL:
--------------------------------------------------------------------------------
1 | Wheel-Version: 1.0
2 | Generator: bdist_wheel (0.26.0)
3 | Root-Is-Purelib: true
4 | Tag: py2-none-any
5 | Tag: py3-none-any
6 | 
7 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools-19.2.dist-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | https://pypi.python.org/packages/source/c/certifi/certifi-2015.11.20.tar.gz#md5=25134646672c695c1ff1593c2dd75d08
2 | https://pypi.python.org/packages/source/w/wincertstore/wincertstore-0.2.zip#md5=ae728f2f007185648d0c7a8679b361e2
3 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools-19.2.dist-info/entry_points.txt:
--------------------------------------------------------------------------------
 1 | [console_scripts]
 2 | easy_install = setuptools.command.easy_install:main
 3 | easy_install-3.5 = setuptools.command.easy_install:main
 4 | 
 5 | [distutils.commands]
 6 | alias = setuptools.command.alias:alias
 7 | bdist_egg = setuptools.command.bdist_egg:bdist_egg
 8 | bdist_rpm = setuptools.command.bdist_rpm:bdist_rpm
 9 | bdist_wininst = setuptools.command.bdist_wininst:bdist_wininst
10 | build_ext = setuptools.command.build_ext:build_ext
11 | build_py = setuptools.command.build_py:build_py
12 | develop = setuptools.command.develop:develop
13 | easy_install = setuptools.command.easy_install:easy_install
14 | egg_info = setuptools.command.egg_info:egg_info
15 | install = setuptools.command.install:install
16 | install_egg_info = setuptools.command.install_egg_info:install_egg_info
17 | install_lib = setuptools.command.install_lib:install_lib
18 | install_scripts = setuptools.command.install_scripts:install_scripts
19 | register = setuptools.command.register:register
20 | rotate = setuptools.command.rotate:rotate
21 | saveopts = setuptools.command.saveopts:saveopts
22 | sdist = setuptools.command.sdist:sdist
23 | setopt = setuptools.command.setopt:setopt
24 | test = setuptools.command.test:test
25 | upload_docs = setuptools.command.upload_docs:upload_docs
26 | 
27 | [distutils.setup_keywords]
28 | convert_2to3_doctests = setuptools.dist:assert_string_list
29 | dependency_links = setuptools.dist:assert_string_list
30 | eager_resources = setuptools.dist:assert_string_list
31 | entry_points = setuptools.dist:check_entry_points
32 | exclude_package_data = setuptools.dist:check_package_data
33 | extras_require = setuptools.dist:check_extras
34 | include_package_data = setuptools.dist:assert_bool
35 | install_requires = setuptools.dist:check_requirements
36 | namespace_packages = setuptools.dist:check_nsp
37 | package_data = setuptools.dist:check_package_data
38 | packages = setuptools.dist:check_packages
39 | setup_requires = setuptools.dist:check_requirements
40 | test_loader = setuptools.dist:check_importable
41 | test_runner = setuptools.dist:check_importable
42 | test_suite = setuptools.dist:check_test_suite
43 | tests_require = setuptools.dist:check_requirements
44 | use_2to3 = setuptools.dist:assert_bool
45 | use_2to3_exclude_fixers = setuptools.dist:assert_string_list
46 | use_2to3_fixers = setuptools.dist:assert_string_list
47 | zip_safe = setuptools.dist:assert_bool
48 | 
49 | [egg_info.writers]
50 | PKG-INFO = setuptools.command.egg_info:write_pkg_info
51 | dependency_links.txt = setuptools.command.egg_info:overwrite_arg
52 | depends.txt = setuptools.command.egg_info:warn_depends_obsolete
53 | eager_resources.txt = setuptools.command.egg_info:overwrite_arg
54 | entry_points.txt = setuptools.command.egg_info:write_entries
55 | namespace_packages.txt = setuptools.command.egg_info:overwrite_arg
56 | requires.txt = setuptools.command.egg_info:write_requirements
57 | top_level.txt = setuptools.command.egg_info:write_toplevel_names
58 | 
59 | [setuptools.installation]
60 | eggsecutable = setuptools.command.easy_install:bootstrap
61 | 
62 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools-19.2.dist-info/metadata.json:
--------------------------------------------------------------------------------
1 | {"generator": "bdist_wheel (0.26.0)", "summary": "Easily download, build, install, upgrade, and uninstall Python packages", "classifiers": ["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: Python Software Foundation License", "License :: OSI Approved :: Zope Public License", "Operating System :: OS Independent", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: System :: Archiving :: Packaging", "Topic :: System :: Systems Administration", "Topic :: Utilities"], "extensions": {"python.details": {"project_urls": {"Home": "https://bitbucket.org/pypa/setuptools"}, "contacts": [{"email": "distutils-sig@python.org", "name": "Python Packaging Authority", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst"}}, "python.exports": {"console_scripts": {"easy_install": "setuptools.command.easy_install:main", "easy_install-3.5": "setuptools.command.easy_install:main"}, "distutils.commands": {"alias": "setuptools.command.alias:alias", "bdist_egg": "setuptools.command.bdist_egg:bdist_egg", "bdist_rpm": "setuptools.command.bdist_rpm:bdist_rpm", "bdist_wininst": "setuptools.command.bdist_wininst:bdist_wininst", "build_ext": "setuptools.command.build_ext:build_ext", "build_py": "setuptools.command.build_py:build_py", "develop": "setuptools.command.develop:develop", "easy_install": "setuptools.command.easy_install:easy_install", "egg_info": "setuptools.command.egg_info:egg_info", "install": "setuptools.command.install:install", "install_egg_info": "setuptools.command.install_egg_info:install_egg_info", "install_lib": "setuptools.command.install_lib:install_lib", "install_scripts": "setuptools.command.install_scripts:install_scripts", "register": "setuptools.command.register:register", "rotate": "setuptools.command.rotate:rotate", "saveopts": "setuptools.command.saveopts:saveopts", "sdist": "setuptools.command.sdist:sdist", "setopt": "setuptools.command.setopt:setopt", "test": "setuptools.command.test:test", "upload_docs": "setuptools.command.upload_docs:upload_docs"}, "distutils.setup_keywords": {"convert_2to3_doctests": "setuptools.dist:assert_string_list", "dependency_links": "setuptools.dist:assert_string_list", "eager_resources": "setuptools.dist:assert_string_list", "entry_points": "setuptools.dist:check_entry_points", "exclude_package_data": "setuptools.dist:check_package_data", "extras_require": "setuptools.dist:check_extras", "include_package_data": "setuptools.dist:assert_bool", "install_requires": "setuptools.dist:check_requirements", "namespace_packages": "setuptools.dist:check_nsp", "package_data": "setuptools.dist:check_package_data", "packages": "setuptools.dist:check_packages", "setup_requires": "setuptools.dist:check_requirements", "test_loader": "setuptools.dist:check_importable", "test_runner": "setuptools.dist:check_importable", "test_suite": "setuptools.dist:check_test_suite", "tests_require": "setuptools.dist:check_requirements", "use_2to3": "setuptools.dist:assert_bool", "use_2to3_exclude_fixers": "setuptools.dist:assert_string_list", "use_2to3_fixers": "setuptools.dist:assert_string_list", "zip_safe": "setuptools.dist:assert_bool"}, "egg_info.writers": {"PKG-INFO": "setuptools.command.egg_info:write_pkg_info", "dependency_links.txt": "setuptools.command.egg_info:overwrite_arg", "depends.txt": "setuptools.command.egg_info:warn_depends_obsolete", "eager_resources.txt": "setuptools.command.egg_info:overwrite_arg", "entry_points.txt": "setuptools.command.egg_info:write_entries", "namespace_packages.txt": "setuptools.command.egg_info:overwrite_arg", "requires.txt": "setuptools.command.egg_info:write_requirements", "top_level.txt": "setuptools.command.egg_info:write_toplevel_names"}, "setuptools.installation": {"eggsecutable": "setuptools.command.easy_install:bootstrap"}}, "python.commands": {"wrap_console": {"easy_install": "setuptools.command.easy_install:main", "easy_install-3.5": "setuptools.command.easy_install:main"}}}, "keywords": ["CPAN", "PyPI", "distutils", "eggs", "package", "management"], "license": "PSF or ZPL", "metadata_version": "2.0", "name": "setuptools", "extras": ["certs", "ssl"], "run_requires": [{"requires": ["certifi (==2015.11.20)"], "extra": "certs"}, {"requires": ["wincertstore (==0.2)"], "extra": "ssl", "environment": "sys_platform=='win32'"}], "version": "19.2", "test_requires": [{"requires": ["pytest (>=2.8)", "setuptools[ssl]"]}]}


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools-19.2.dist-info/top_level.txt:
--------------------------------------------------------------------------------
1 | _markerlib
2 | easy_install
3 | pkg_resources
4 | setuptools
5 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools-19.2.dist-info/zip-safe:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/archive_util.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/archive_util.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/cli-32.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/cli-32.exe


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/cli-64.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/cli-64.exe


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/cli-arm-32.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/cli-arm-32.exe


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/cli.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/cli.exe


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = [
 2 |     'alias', 'bdist_egg', 'bdist_rpm', 'build_ext', 'build_py', 'develop',
 3 |     'easy_install', 'egg_info', 'install', 'install_lib', 'rotate', 'saveopts',
 4 |     'sdist', 'setopt', 'test', 'install_egg_info', 'install_scripts',
 5 |     'register', 'bdist_wininst', 'upload_docs',
 6 | ]
 7 | 
 8 | from distutils.command.bdist import bdist
 9 | import sys
10 | 
11 | from setuptools.command import install_scripts
12 | 
13 | 
14 | if 'egg' not in bdist.format_commands:
15 |     bdist.format_command['egg'] = ('bdist_egg', "Python .egg file")
16 |     bdist.format_commands.append('egg')
17 | 
18 | del bdist, sys
19 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/alias.py:
--------------------------------------------------------------------------------
 1 | from distutils.errors import DistutilsOptionError
 2 | 
 3 | from setuptools.command.setopt import edit_config, option_base, config_file
 4 | 
 5 | 
 6 | def shquote(arg):
 7 |     """Quote an argument for later parsing by shlex.split()"""
 8 |     for c in '"', "'", "\\", "#":
 9 |         if c in arg:
10 |             return repr(arg)
11 |     if arg.split() != [arg]:
12 |         return repr(arg)
13 |     return arg
14 | 
15 | 
16 | class alias(option_base):
17 |     """Define a shortcut that invokes one or more commands"""
18 | 
19 |     description = "define a shortcut to invoke one or more commands"
20 |     command_consumes_arguments = True
21 | 
22 |     user_options = [
23 |         ('remove', 'r', 'remove (unset) the alias'),
24 |     ] + option_base.user_options
25 | 
26 |     boolean_options = option_base.boolean_options + ['remove']
27 | 
28 |     def initialize_options(self):
29 |         option_base.initialize_options(self)
30 |         self.args = None
31 |         self.remove = None
32 | 
33 |     def finalize_options(self):
34 |         option_base.finalize_options(self)
35 |         if self.remove and len(self.args) != 1:
36 |             raise DistutilsOptionError(
37 |                 "Must specify exactly one argument (the alias name) when "
38 |                 "using --remove"
39 |             )
40 | 
41 |     def run(self):
42 |         aliases = self.distribution.get_option_dict('aliases')
43 | 
44 |         if not self.args:
45 |             print("Command Aliases")
46 |             print("---------------")
47 |             for alias in aliases:
48 |                 print("setup.py alias", format_alias(alias, aliases))
49 |             return
50 | 
51 |         elif len(self.args) == 1:
52 |             alias, = self.args
53 |             if self.remove:
54 |                 command = None
55 |             elif alias in aliases:
56 |                 print("setup.py alias", format_alias(alias, aliases))
57 |                 return
58 |             else:
59 |                 print("No alias definition found for %r" % alias)
60 |                 return
61 |         else:
62 |             alias = self.args[0]
63 |             command = ' '.join(map(shquote, self.args[1:]))
64 | 
65 |         edit_config(self.filename, {'aliases': {alias: command}}, self.dry_run)
66 | 
67 | 
68 | def format_alias(name, aliases):
69 |     source, command = aliases[name]
70 |     if source == config_file('global'):
71 |         source = '--global-config '
72 |     elif source == config_file('user'):
73 |         source = '--user-config '
74 |     elif source == config_file('local'):
75 |         source = ''
76 |     else:
77 |         source = '--filename=%r' % source
78 |     return source + name + ' ' + command
79 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/alias.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/alias.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/bdist_egg.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/bdist_egg.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/bdist_rpm.py:
--------------------------------------------------------------------------------
 1 | import distutils.command.bdist_rpm as orig
 2 | 
 3 | 
 4 | class bdist_rpm(orig.bdist_rpm):
 5 |     """
 6 |     Override the default bdist_rpm behavior to do the following:
 7 | 
 8 |     1. Run egg_info to ensure the name and version are properly calculated.
 9 |     2. Always run 'install' using --single-version-externally-managed to
10 |        disable eggs in RPM distributions.
11 |     3. Replace dash with underscore in the version numbers for better RPM
12 |        compatibility.
13 |     """
14 | 
15 |     def run(self):
16 |         # ensure distro name is up-to-date
17 |         self.run_command('egg_info')
18 | 
19 |         orig.bdist_rpm.run(self)
20 | 
21 |     def _make_spec_file(self):
22 |         version = self.distribution.get_version()
23 |         rpmversion = version.replace('-', '_')
24 |         spec = orig.bdist_rpm._make_spec_file(self)
25 |         line23 = '%define version ' + version
26 |         line24 = '%define version ' + rpmversion
27 |         spec = [
28 |             line.replace(
29 |                 "Source0: %{name}-%{version}.tar",
30 |                 "Source0: %{name}-%{unmangled_version}.tar"
31 |             ).replace(
32 |                 "setup.py install ",
33 |                 "setup.py install --single-version-externally-managed "
34 |             ).replace(
35 |                 "%setup",
36 |                 "%setup -n %{name}-%{unmangled_version}"
37 |             ).replace(line23, line24)
38 |             for line in spec
39 |         ]
40 |         insert_loc = spec.index(line24) + 1
41 |         unmangled_version = "%define unmangled_version " + version
42 |         spec.insert(insert_loc, unmangled_version)
43 |         return spec
44 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/bdist_rpm.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/bdist_rpm.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/bdist_wininst.py:
--------------------------------------------------------------------------------
 1 | import distutils.command.bdist_wininst as orig
 2 | 
 3 | 
 4 | class bdist_wininst(orig.bdist_wininst):
 5 |     def reinitialize_command(self, command, reinit_subcommands=0):
 6 |         """
 7 |         Supplement reinitialize_command to work around
 8 |         http://bugs.python.org/issue20819
 9 |         """
10 |         cmd = self.distribution.reinitialize_command(
11 |             command, reinit_subcommands)
12 |         if command in ('install', 'install_lib'):
13 |             cmd.install_lib = None
14 |         return cmd
15 | 
16 |     def run(self):
17 |         self._is_running = True
18 |         try:
19 |             orig.bdist_wininst.run(self)
20 |         finally:
21 |             self._is_running = False
22 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/bdist_wininst.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/bdist_wininst.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/build_ext.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/build_ext.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/build_py.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/build_py.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/develop.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/develop.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/easy_install.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/easy_install.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/egg_info.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/egg_info.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/install.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/install.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/install_egg_info.py:
--------------------------------------------------------------------------------
  1 | from distutils import log, dir_util
  2 | import os
  3 | 
  4 | from setuptools import Command
  5 | from setuptools.archive_util import unpack_archive
  6 | import pkg_resources
  7 | 
  8 | 
  9 | class install_egg_info(Command):
 10 |     """Install an .egg-info directory for the package"""
 11 | 
 12 |     description = "Install an .egg-info directory for the package"
 13 | 
 14 |     user_options = [
 15 |         ('install-dir=', 'd', "directory to install to"),
 16 |     ]
 17 | 
 18 |     def initialize_options(self):
 19 |         self.install_dir = None
 20 | 
 21 |     def finalize_options(self):
 22 |         self.set_undefined_options('install_lib',
 23 |                                    ('install_dir', 'install_dir'))
 24 |         ei_cmd = self.get_finalized_command("egg_info")
 25 |         basename = pkg_resources.Distribution(
 26 |             None, None, ei_cmd.egg_name, ei_cmd.egg_version
 27 |         ).egg_name() + '.egg-info'
 28 |         self.source = ei_cmd.egg_info
 29 |         self.target = os.path.join(self.install_dir, basename)
 30 |         self.outputs = [self.target]
 31 | 
 32 |     def run(self):
 33 |         self.run_command('egg_info')
 34 |         if os.path.isdir(self.target) and not os.path.islink(self.target):
 35 |             dir_util.remove_tree(self.target, dry_run=self.dry_run)
 36 |         elif os.path.exists(self.target):
 37 |             self.execute(os.unlink, (self.target,), "Removing " + self.target)
 38 |         if not self.dry_run:
 39 |             pkg_resources.ensure_directory(self.target)
 40 |         self.execute(
 41 |             self.copytree, (), "Copying %s to %s" % (self.source, self.target)
 42 |         )
 43 |         self.install_namespaces()
 44 | 
 45 |     def get_outputs(self):
 46 |         return self.outputs
 47 | 
 48 |     def copytree(self):
 49 |         # Copy the .egg-info tree to site-packages
 50 |         def skimmer(src, dst):
 51 |             # filter out source-control directories; note that 'src' is always
 52 |             # a '/'-separated path, regardless of platform.  'dst' is a
 53 |             # platform-specific path.
 54 |             for skip in '.svn/', 'CVS/':
 55 |                 if src.startswith(skip) or '/' + skip in src:
 56 |                     return None
 57 |             self.outputs.append(dst)
 58 |             log.debug("Copying %s to %s", src, dst)
 59 |             return dst
 60 | 
 61 |         unpack_archive(self.source, self.target, skimmer)
 62 | 
 63 |     def install_namespaces(self):
 64 |         nsp = self._get_all_ns_packages()
 65 |         if not nsp:
 66 |             return
 67 |         filename, ext = os.path.splitext(self.target)
 68 |         filename += '-nspkg.pth'
 69 |         self.outputs.append(filename)
 70 |         log.info("Installing %s", filename)
 71 |         lines = map(self._gen_nspkg_line, nsp)
 72 | 
 73 |         if self.dry_run:
 74 |             # always generate the lines, even in dry run
 75 |             list(lines)
 76 |             return
 77 | 
 78 |         with open(filename, 'wt') as f:
 79 |             f.writelines(lines)
 80 | 
 81 |     _nspkg_tmpl = (
 82 |         "import sys, types, os",
 83 |         "p = os.path.join(sys._getframe(1).f_locals['sitedir'], *%(pth)r)",
 84 |         "ie = os.path.exists(os.path.join(p,'__init__.py'))",
 85 |         "m = not ie and "
 86 |             "sys.modules.setdefault(%(pkg)r, types.ModuleType(%(pkg)r))",
 87 |         "mp = (m or []) and m.__dict__.setdefault('__path__',[])",
 88 |         "(p not in mp) and mp.append(p)",
 89 |     )
 90 |     "lines for the namespace installer"
 91 | 
 92 |     _nspkg_tmpl_multi = (
 93 |         'm and setattr(sys.modules[%(parent)r], %(child)r, m)',
 94 |     )
 95 |     "additional line(s) when a parent package is indicated"
 96 | 
 97 |     @classmethod
 98 |     def _gen_nspkg_line(cls, pkg):
 99 |         # ensure pkg is not a unicode string under Python 2.7
100 |         pkg = str(pkg)
101 |         pth = tuple(pkg.split('.'))
102 |         tmpl_lines = cls._nspkg_tmpl
103 |         parent, sep, child = pkg.rpartition('.')
104 |         if parent:
105 |             tmpl_lines += cls._nspkg_tmpl_multi
106 |         return ';'.join(tmpl_lines) % locals() + '\n'
107 | 
108 |     def _get_all_ns_packages(self):
109 |         """Return sorted list of all package namespaces"""
110 |         nsp = set()
111 |         for pkg in self.distribution.namespace_packages or []:
112 |             pkg = pkg.split('.')
113 |             while pkg:
114 |                 nsp.add('.'.join(pkg))
115 |                 pkg.pop()
116 |         return sorted(nsp)
117 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/install_egg_info.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/install_egg_info.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/install_lib.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import imp
  3 | from itertools import product, starmap
  4 | import distutils.command.install_lib as orig
  5 | 
  6 | class install_lib(orig.install_lib):
  7 |     """Don't add compiled flags to filenames of non-Python files"""
  8 | 
  9 |     def run(self):
 10 |         self.build()
 11 |         outfiles = self.install()
 12 |         if outfiles is not None:
 13 |             # always compile, in case we have any extension stubs to deal with
 14 |             self.byte_compile(outfiles)
 15 | 
 16 |     def get_exclusions(self):
 17 |         """
 18 |         Return a collections.Sized collections.Container of paths to be
 19 |         excluded for single_version_externally_managed installations.
 20 |         """
 21 |         all_packages = (
 22 |             pkg
 23 |             for ns_pkg in self._get_SVEM_NSPs()
 24 |             for pkg in self._all_packages(ns_pkg)
 25 |         )
 26 | 
 27 |         excl_specs = product(all_packages, self._gen_exclusion_paths())
 28 |         return set(starmap(self._exclude_pkg_path, excl_specs))
 29 | 
 30 |     def _exclude_pkg_path(self, pkg, exclusion_path):
 31 |         """
 32 |         Given a package name and exclusion path within that package,
 33 |         compute the full exclusion path.
 34 |         """
 35 |         parts = pkg.split('.') + [exclusion_path]
 36 |         return os.path.join(self.install_dir, *parts)
 37 | 
 38 |     @staticmethod
 39 |     def _all_packages(pkg_name):
 40 |         """
 41 |         >>> list(install_lib._all_packages('foo.bar.baz'))
 42 |         ['foo.bar.baz', 'foo.bar', 'foo']
 43 |         """
 44 |         while pkg_name:
 45 |             yield pkg_name
 46 |             pkg_name, sep, child = pkg_name.rpartition('.')
 47 | 
 48 |     def _get_SVEM_NSPs(self):
 49 |         """
 50 |         Get namespace packages (list) but only for
 51 |         single_version_externally_managed installations and empty otherwise.
 52 |         """
 53 |         # TODO: is it necessary to short-circuit here? i.e. what's the cost
 54 |         # if get_finalized_command is called even when namespace_packages is
 55 |         # False?
 56 |         if not self.distribution.namespace_packages:
 57 |             return []
 58 | 
 59 |         install_cmd = self.get_finalized_command('install')
 60 |         svem = install_cmd.single_version_externally_managed
 61 | 
 62 |         return self.distribution.namespace_packages if svem else []
 63 | 
 64 |     @staticmethod
 65 |     def _gen_exclusion_paths():
 66 |         """
 67 |         Generate file paths to be excluded for namespace packages (bytecode
 68 |         cache files).
 69 |         """
 70 |         # always exclude the package module itself
 71 |         yield '__init__.py'
 72 | 
 73 |         yield '__init__.pyc'
 74 |         yield '__init__.pyo'
 75 | 
 76 |         if not hasattr(imp, 'get_tag'):
 77 |             return
 78 | 
 79 |         base = os.path.join('__pycache__', '__init__.' + imp.get_tag())
 80 |         yield base + '.pyc'
 81 |         yield base + '.pyo'
 82 |         yield base + '.opt-1.pyc'
 83 |         yield base + '.opt-2.pyc'
 84 | 
 85 |     def copy_tree(
 86 |             self, infile, outfile,
 87 |             preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1
 88 |     ):
 89 |         assert preserve_mode and preserve_times and not preserve_symlinks
 90 |         exclude = self.get_exclusions()
 91 | 
 92 |         if not exclude:
 93 |             return orig.install_lib.copy_tree(self, infile, outfile)
 94 | 
 95 |         # Exclude namespace package __init__.py* files from the output
 96 | 
 97 |         from setuptools.archive_util import unpack_directory
 98 |         from distutils import log
 99 | 
100 |         outfiles = []
101 | 
102 |         def pf(src, dst):
103 |             if dst in exclude:
104 |                 log.warn("Skipping installation of %s (namespace package)",
105 |                          dst)
106 |                 return False
107 | 
108 |             log.info("copying %s -> %s", src, os.path.dirname(dst))
109 |             outfiles.append(dst)
110 |             return dst
111 | 
112 |         unpack_directory(infile, outfile, pf)
113 |         return outfiles
114 | 
115 |     def get_outputs(self):
116 |         outputs = orig.install_lib.get_outputs(self)
117 |         exclude = self.get_exclusions()
118 |         if exclude:
119 |             return [f for f in outputs if f not in exclude]
120 |         return outputs
121 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/install_lib.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/install_lib.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/install_scripts.py:
--------------------------------------------------------------------------------
 1 | from distutils import log
 2 | import distutils.command.install_scripts as orig
 3 | import os
 4 | 
 5 | from pkg_resources import Distribution, PathMetadata, ensure_directory
 6 | 
 7 | 
 8 | class install_scripts(orig.install_scripts):
 9 |     """Do normal script install, plus any egg_info wrapper scripts"""
10 | 
11 |     def initialize_options(self):
12 |         orig.install_scripts.initialize_options(self)
13 |         self.no_ep = False
14 | 
15 |     def run(self):
16 |         import setuptools.command.easy_install as ei
17 | 
18 |         self.run_command("egg_info")
19 |         if self.distribution.scripts:
20 |             orig.install_scripts.run(self)  # run first to set up self.outfiles
21 |         else:
22 |             self.outfiles = []
23 |         if self.no_ep:
24 |             # don't install entry point scripts into .egg file!
25 |             return
26 | 
27 |         ei_cmd = self.get_finalized_command("egg_info")
28 |         dist = Distribution(
29 |             ei_cmd.egg_base, PathMetadata(ei_cmd.egg_base, ei_cmd.egg_info),
30 |             ei_cmd.egg_name, ei_cmd.egg_version,
31 |         )
32 |         bs_cmd = self.get_finalized_command('build_scripts')
33 |         exec_param = getattr(bs_cmd, 'executable', None)
34 |         bw_cmd = self.get_finalized_command("bdist_wininst")
35 |         is_wininst = getattr(bw_cmd, '_is_running', False)
36 |         writer = ei.ScriptWriter
37 |         if is_wininst:
38 |             exec_param = "python.exe"
39 |             writer = ei.WindowsScriptWriter
40 |         # resolve the writer to the environment
41 |         writer = writer.best()
42 |         cmd = writer.command_spec_class.best().from_param(exec_param)
43 |         for args in writer.get_args(dist, cmd.as_header()):
44 |             self.write_script(*args)
45 | 
46 |     def write_script(self, script_name, contents, mode="t", *ignored):
47 |         """Write an executable file to the scripts directory"""
48 |         from setuptools.command.easy_install import chmod, current_umask
49 | 
50 |         log.info("Installing %s script to %s", script_name, self.install_dir)
51 |         target = os.path.join(self.install_dir, script_name)
52 |         self.outfiles.append(target)
53 | 
54 |         mask = current_umask()
55 |         if not self.dry_run:
56 |             ensure_directory(target)
57 |             f = open(target, "w" + mode)
58 |             f.write(contents)
59 |             f.close()
60 |             chmod(target, 0o777 - mask)
61 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/install_scripts.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/install_scripts.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/launcher manifest.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 2 | <assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
 3 |     <assemblyIdentity version="1.0.0.0"
 4 |                       processorArchitecture="X86"
 5 |                       name="%(name)s"
 6 |                       type="win32"/>
 7 |     <!-- Identify the application security requirements. -->
 8 |     <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
 9 |         <security>
10 |             <requestedPrivileges>
11 |                 <requestedExecutionLevel level="asInvoker" uiAccess="false"/>
12 |             </requestedPrivileges>
13 |         </security>
14 |     </trustInfo>
15 | </assembly>
16 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/register.py:
--------------------------------------------------------------------------------
 1 | import distutils.command.register as orig
 2 | 
 3 | 
 4 | class register(orig.register):
 5 |     __doc__ = orig.register.__doc__
 6 | 
 7 |     def run(self):
 8 |         # Make sure that we are using valid current name/version info
 9 |         self.run_command('egg_info')
10 |         orig.register.run(self)
11 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/register.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/register.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/rotate.py:
--------------------------------------------------------------------------------
 1 | from distutils.util import convert_path
 2 | from distutils import log
 3 | from distutils.errors import DistutilsOptionError
 4 | import os
 5 | 
 6 | from setuptools import Command
 7 | from setuptools.compat import basestring
 8 | 
 9 | 
10 | class rotate(Command):
11 |     """Delete older distributions"""
12 | 
13 |     description = "delete older distributions, keeping N newest files"
14 |     user_options = [
15 |         ('match=', 'm', "patterns to match (required)"),
16 |         ('dist-dir=', 'd', "directory where the distributions are"),
17 |         ('keep=', 'k', "number of matching distributions to keep"),
18 |     ]
19 | 
20 |     boolean_options = []
21 | 
22 |     def initialize_options(self):
23 |         self.match = None
24 |         self.dist_dir = None
25 |         self.keep = None
26 | 
27 |     def finalize_options(self):
28 |         if self.match is None:
29 |             raise DistutilsOptionError(
30 |                 "Must specify one or more (comma-separated) match patterns "
31 |                 "(e.g. '.zip' or '.egg')"
32 |             )
33 |         if self.keep is None:
34 |             raise DistutilsOptionError("Must specify number of files to keep")
35 |         try:
36 |             self.keep = int(self.keep)
37 |         except ValueError:
38 |             raise DistutilsOptionError("--keep must be an integer")
39 |         if isinstance(self.match, basestring):
40 |             self.match = [
41 |                 convert_path(p.strip()) for p in self.match.split(',')
42 |             ]
43 |         self.set_undefined_options('bdist', ('dist_dir', 'dist_dir'))
44 | 
45 |     def run(self):
46 |         self.run_command("egg_info")
47 |         from glob import glob
48 | 
49 |         for pattern in self.match:
50 |             pattern = self.distribution.get_name() + '*' + pattern
51 |             files = glob(os.path.join(self.dist_dir, pattern))
52 |             files = [(os.path.getmtime(f), f) for f in files]
53 |             files.sort()
54 |             files.reverse()
55 | 
56 |             log.info("%d file(s) matching %s", len(files), pattern)
57 |             files = files[self.keep:]
58 |             for (t, f) in files:
59 |                 log.info("Deleting %s", f)
60 |                 if not self.dry_run:
61 |                     os.unlink(f)
62 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/rotate.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/rotate.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/saveopts.py:
--------------------------------------------------------------------------------
 1 | from setuptools.command.setopt import edit_config, option_base
 2 | 
 3 | 
 4 | class saveopts(option_base):
 5 |     """Save command-line options to a file"""
 6 | 
 7 |     description = "save supplied options to setup.cfg or other config file"
 8 | 
 9 |     def run(self):
10 |         dist = self.distribution
11 |         settings = {}
12 | 
13 |         for cmd in dist.command_options:
14 | 
15 |             if cmd == 'saveopts':
16 |                 continue  # don't save our own options!
17 | 
18 |             for opt, (src, val) in dist.get_option_dict(cmd).items():
19 |                 if src == "command line":
20 |                     settings.setdefault(cmd, {})[opt] = val
21 | 
22 |         edit_config(self.filename, settings, self.dry_run)
23 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/saveopts.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/saveopts.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/sdist.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/sdist.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/setopt.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/setopt.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/test.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/test.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/command/upload_docs.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/command/upload_docs.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/compat.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import itertools
 3 | 
 4 | PY3 = sys.version_info >= (3,)
 5 | PY2 = not PY3
 6 | 
 7 | if PY2:
 8 |     basestring = basestring
 9 |     import __builtin__ as builtins
10 |     import ConfigParser as configparser
11 |     from StringIO import StringIO
12 |     BytesIO = StringIO
13 |     func_code = lambda o: o.func_code
14 |     func_globals = lambda o: o.func_globals
15 |     im_func = lambda o: o.im_func
16 |     from htmlentitydefs import name2codepoint
17 |     import httplib
18 |     from BaseHTTPServer import HTTPServer
19 |     from SimpleHTTPServer import SimpleHTTPRequestHandler
20 |     from BaseHTTPServer import BaseHTTPRequestHandler
21 |     iteritems = lambda o: o.iteritems()
22 |     long_type = long
23 |     maxsize = sys.maxint
24 |     unichr = unichr
25 |     unicode = unicode
26 |     bytes = str
27 |     from urllib import url2pathname, splittag, pathname2url
28 |     import urllib2
29 |     from urllib2 import urlopen, HTTPError, URLError, unquote, splituser
30 |     from urlparse import urlparse, urlunparse, urljoin, urlsplit, urlunsplit
31 |     filterfalse = itertools.ifilterfalse
32 |     filter = itertools.ifilter
33 |     map = itertools.imap
34 | 
35 |     exec("""def reraise(tp, value, tb=None):
36 |     raise tp, value, tb""")
37 | 
38 | if PY3:
39 |     basestring = str
40 |     import builtins
41 |     import configparser
42 |     from io import StringIO, BytesIO
43 |     func_code = lambda o: o.__code__
44 |     func_globals = lambda o: o.__globals__
45 |     im_func = lambda o: o.__func__
46 |     from html.entities import name2codepoint
47 |     import http.client as httplib
48 |     from http.server import HTTPServer, SimpleHTTPRequestHandler
49 |     from http.server import BaseHTTPRequestHandler
50 |     iteritems = lambda o: o.items()
51 |     long_type = int
52 |     maxsize = sys.maxsize
53 |     unichr = chr
54 |     unicode = str
55 |     bytes = bytes
56 |     from urllib.error import HTTPError, URLError
57 |     import urllib.request as urllib2
58 |     from urllib.request import urlopen, url2pathname, pathname2url
59 |     from urllib.parse import (
60 |         urlparse, urlunparse, unquote, splituser, urljoin, urlsplit,
61 |         urlunsplit, splittag,
62 |     )
63 |     filterfalse = itertools.filterfalse
64 |     filter = filter
65 |     map = map
66 | 
67 |     def reraise(tp, value, tb=None):
68 |         if value.__traceback__ is not tb:
69 |             raise value.with_traceback(tb)
70 |         raise value
71 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/compat.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/compat.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/depends.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/depends.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/dist.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/dist.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/extension.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import re
 3 | import functools
 4 | import distutils.core
 5 | import distutils.errors
 6 | import distutils.extension
 7 | 
 8 | from .dist import _get_unpatched
 9 | from . import msvc9_support
10 | 
11 | _Extension = _get_unpatched(distutils.core.Extension)
12 | 
13 | msvc9_support.patch_for_specialized_compiler()
14 | 
15 | def _have_cython():
16 |     """
17 |     Return True if Cython can be imported.
18 |     """
19 |     cython_impl = 'Cython.Distutils.build_ext',
20 |     try:
21 |         # from (cython_impl) import build_ext
22 |         __import__(cython_impl, fromlist=['build_ext']).build_ext
23 |         return True
24 |     except Exception:
25 |         pass
26 |     return False
27 | 
28 | # for compatibility
29 | have_pyrex = _have_cython
30 | 
31 | 
32 | class Extension(_Extension):
33 |     """Extension that uses '.c' files in place of '.pyx' files"""
34 | 
35 |     def _convert_pyx_sources_to_lang(self):
36 |         """
37 |         Replace sources with .pyx extensions to sources with the target
38 |         language extension. This mechanism allows language authors to supply
39 |         pre-converted sources but to prefer the .pyx sources.
40 |         """
41 |         if _have_cython():
42 |             # the build has Cython, so allow it to compile the .pyx files
43 |             return
44 |         lang = self.language or ''
45 |         target_ext = '.cpp' if lang.lower() == 'c++' else '.c'
46 |         sub = functools.partial(re.sub, '.pyx$', target_ext)
47 |         self.sources = list(map(sub, self.sources))
48 | 
49 | class Library(Extension):
50 |     """Just like a regular Extension, but built as a library instead"""
51 | 
52 | distutils.core.Extension = Extension
53 | distutils.extension.Extension = Extension
54 | if 'distutils.command.build_ext' in sys.modules:
55 |     sys.modules['distutils.command.build_ext'].Extension = Extension
56 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/extension.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/extension.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/gui-32.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/gui-32.exe


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/gui-64.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/gui-64.exe


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/gui-arm-32.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/gui-arm-32.exe


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/gui.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/gui.exe


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/lib2to3_ex.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Customized Mixin2to3 support:
 3 | 
 4 |  - adds support for converting doctests
 5 | 
 6 | 
 7 | This module raises an ImportError on Python 2.
 8 | """
 9 | 
10 | from distutils.util import Mixin2to3 as _Mixin2to3
11 | from distutils import log
12 | from lib2to3.refactor import RefactoringTool, get_fixers_from_package
13 | import setuptools
14 | 
15 | class DistutilsRefactoringTool(RefactoringTool):
16 |     def log_error(self, msg, *args, **kw):
17 |         log.error(msg, *args)
18 | 
19 |     def log_message(self, msg, *args):
20 |         log.info(msg, *args)
21 | 
22 |     def log_debug(self, msg, *args):
23 |         log.debug(msg, *args)
24 | 
25 | class Mixin2to3(_Mixin2to3):
26 |     def run_2to3(self, files, doctests = False):
27 |         # See of the distribution option has been set, otherwise check the
28 |         # setuptools default.
29 |         if self.distribution.use_2to3 is not True:
30 |             return
31 |         if not files:
32 |             return
33 |         log.info("Fixing "+" ".join(files))
34 |         self.__build_fixer_names()
35 |         self.__exclude_fixers()
36 |         if doctests:
37 |             if setuptools.run_2to3_on_doctests:
38 |                 r = DistutilsRefactoringTool(self.fixer_names)
39 |                 r.refactor(files, write=True, doctests_only=True)
40 |         else:
41 |             _Mixin2to3.run_2to3(self, files)
42 | 
43 |     def __build_fixer_names(self):
44 |         if self.fixer_names: return
45 |         self.fixer_names = []
46 |         for p in setuptools.lib2to3_fixer_packages:
47 |             self.fixer_names.extend(get_fixers_from_package(p))
48 |         if self.distribution.use_2to3_fixers is not None:
49 |             for p in self.distribution.use_2to3_fixers:
50 |                 self.fixer_names.extend(get_fixers_from_package(p))
51 | 
52 |     def __exclude_fixers(self):
53 |         excluded_fixers = getattr(self, 'exclude_fixers', [])
54 |         if self.distribution.use_2to3_exclude_fixers is not None:
55 |             excluded_fixers.extend(self.distribution.use_2to3_exclude_fixers)
56 |         for fixer_name in excluded_fixers:
57 |             if fixer_name in self.fixer_names:
58 |                 self.fixer_names.remove(fixer_name)
59 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/lib2to3_ex.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/lib2to3_ex.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/msvc9_support.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import distutils.msvc9compiler
 3 | except ImportError:
 4 |     pass
 5 | 
 6 | unpatched = dict()
 7 | 
 8 | def patch_for_specialized_compiler():
 9 |     """
10 |     Patch functions in distutils.msvc9compiler to use the standalone compiler
11 |     build for Python (Windows only). Fall back to original behavior when the
12 |     standalone compiler is not available.
13 |     """
14 |     if 'distutils' not in globals():
15 |         # The module isn't available to be patched
16 |         return
17 | 
18 |     if unpatched:
19 |         # Already patched
20 |         return
21 | 
22 |     unpatched.update(vars(distutils.msvc9compiler))
23 | 
24 |     distutils.msvc9compiler.find_vcvarsall = find_vcvarsall
25 |     distutils.msvc9compiler.query_vcvarsall = query_vcvarsall
26 | 
27 | def find_vcvarsall(version):
28 |     Reg = distutils.msvc9compiler.Reg
29 |     VC_BASE = r'Software\%sMicrosoft\DevDiv\VCForPython\%0.1f'
30 |     key = VC_BASE % ('', version)
31 |     try:
32 |         # Per-user installs register the compiler path here
33 |         productdir = Reg.get_value(key, "installdir")
34 |     except KeyError:
35 |         try:
36 |             # All-user installs on a 64-bit system register here
37 |             key = VC_BASE % ('Wow6432Node\\', version)
38 |             productdir = Reg.get_value(key, "installdir")
39 |         except KeyError:
40 |             productdir = None
41 | 
42 |     if productdir:
43 |         import os
44 |         vcvarsall = os.path.join(productdir, "vcvarsall.bat")
45 |         if os.path.isfile(vcvarsall):
46 |             return vcvarsall
47 | 
48 |     return unpatched['find_vcvarsall'](version)
49 | 
50 | def query_vcvarsall(version, *args, **kwargs):
51 |     try:
52 |         return unpatched['query_vcvarsall'](version, *args, **kwargs)
53 |     except distutils.errors.DistutilsPlatformError as exc:
54 |         if exc and "vcvarsall.bat" in exc.args[0]:
55 |             message = 'Microsoft Visual C++ %0.1f is required (%s).' % (version, exc.args[0])
56 |             if int(version) == 9:
57 |                 # This redirection link is maintained by Microsoft.
58 |                 # Contact vspython@microsoft.com if it needs updating.
59 |                 raise distutils.errors.DistutilsPlatformError(
60 |                     message + ' Get it from http://aka.ms/vcpython27'
61 |                 )
62 |             raise distutils.errors.DistutilsPlatformError(message)
63 |         raise
64 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/msvc9_support.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/msvc9_support.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/package_index.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/package_index.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/py26compat.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Compatibility Support for Python 2.6 and earlier
 3 | """
 4 | 
 5 | import sys
 6 | 
 7 | from setuptools.compat import splittag
 8 | 
 9 | def strip_fragment(url):
10 | 	"""
11 | 	In `Python 8280 <http://bugs.python.org/issue8280>`_, Python 2.7 and
12 | 	later was patched to disregard the fragment when making URL requests.
13 | 	Do the same for Python 2.6 and earlier.
14 | 	"""
15 | 	url, fragment = splittag(url)
16 | 	return url
17 | 
18 | if sys.version_info >= (2,7):
19 | 	strip_fragment = lambda x: x
20 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/py26compat.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/py26compat.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/py27compat.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Compatibility Support for Python 2.7 and earlier
 3 | """
 4 | 
 5 | import sys
 6 | 
 7 | def get_all_headers(message, key):
 8 | 	"""
 9 | 	Given an HTTPMessage, return all headers matching a given key.
10 | 	"""
11 | 	return message.get_all(key)
12 | 
13 | if sys.version_info < (3,):
14 | 	def get_all_headers(message, key):
15 | 		return message.getheaders(key)
16 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/py27compat.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/py27compat.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/py31compat.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import unittest
 3 | 
 4 | __all__ = ['get_config_vars', 'get_path']
 5 | 
 6 | try:
 7 |     # Python 2.7 or >=3.2
 8 |     from sysconfig import get_config_vars, get_path
 9 | except ImportError:
10 |     from distutils.sysconfig import get_config_vars, get_python_lib
11 |     def get_path(name):
12 |         if name not in ('platlib', 'purelib'):
13 |             raise ValueError("Name must be purelib or platlib")
14 |         return get_python_lib(name=='platlib')
15 | 
16 | try:
17 |     # Python >=3.2
18 |     from tempfile import TemporaryDirectory
19 | except ImportError:
20 |     import shutil
21 |     import tempfile
22 |     class TemporaryDirectory(object):
23 |         """
24 |         Very simple temporary directory context manager.
25 |         Will try to delete afterward, but will also ignore OS and similar
26 |         errors on deletion.
27 |         """
28 |         def __init__(self):
29 |             self.name = None # Handle mkdtemp raising an exception
30 |             self.name = tempfile.mkdtemp()
31 | 
32 |         def __enter__(self):
33 |             return self.name
34 | 
35 |         def __exit__(self, exctype, excvalue, exctrace):
36 |             try:
37 |                 shutil.rmtree(self.name, True)
38 |             except OSError: #removal errors are not the only possible
39 |                 pass
40 |             self.name = None
41 | 
42 | 
43 | unittest_main = unittest.main
44 | 
45 | _PY31 = (3, 1) <= sys.version_info[:2] < (3, 2)
46 | if _PY31:
47 |     # on Python 3.1, translate testRunner==None to TextTestRunner
48 |     # for compatibility with Python 2.6, 2.7, and 3.2+
49 |     def unittest_main(*args, **kwargs):
50 |         if 'testRunner' in kwargs and kwargs['testRunner'] is None:
51 |             kwargs['testRunner'] = unittest.TextTestRunner
52 |         return unittest.main(*args, **kwargs)
53 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/py31compat.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/py31compat.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/sandbox.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/sandbox.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/script (dev).tmpl:
--------------------------------------------------------------------------------
1 | # EASY-INSTALL-DEV-SCRIPT: %(spec)r,%(script_name)r
2 | __requires__ = %(spec)r
3 | __import__('pkg_resources').require(%(spec)r)
4 | __file__ = %(dev_path)r
5 | exec(compile(open(__file__).read(), __file__, 'exec'))
6 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/script.tmpl:
--------------------------------------------------------------------------------
1 | # EASY-INSTALL-SCRIPT: %(spec)r,%(script_name)r
2 | __requires__ = %(spec)r
3 | __import__('pkg_resources').run_script(%(spec)r, %(script_name)r)
4 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/site-patch.py:
--------------------------------------------------------------------------------
 1 | def __boot():
 2 |     import sys
 3 |     import os
 4 |     PYTHONPATH = os.environ.get('PYTHONPATH')
 5 |     if PYTHONPATH is None or (sys.platform=='win32' and not PYTHONPATH):
 6 |         PYTHONPATH = []
 7 |     else:
 8 |         PYTHONPATH = PYTHONPATH.split(os.pathsep)
 9 | 
10 |     pic = getattr(sys,'path_importer_cache',{})
11 |     stdpath = sys.path[len(PYTHONPATH):]
12 |     mydir = os.path.dirname(__file__)
13 |     #print "searching",stdpath,sys.path
14 | 
15 |     for item in stdpath:
16 |         if item==mydir or not item:
17 |             continue    # skip if current dir. on Windows, or my own directory
18 |         importer = pic.get(item)
19 |         if importer is not None:
20 |             loader = importer.find_module('site')
21 |             if loader is not None:
22 |                 # This should actually reload the current module
23 |                 loader.load_module('site')
24 |                 break
25 |         else:
26 |             try:
27 |                 import imp # Avoid import loop in Python >= 3.3
28 |                 stream, path, descr = imp.find_module('site',[item])
29 |             except ImportError:
30 |                 continue
31 |             if stream is None:
32 |                 continue
33 |             try:
34 |                 # This should actually reload the current module
35 |                 imp.load_module('site',stream,path,descr)
36 |             finally:
37 |                 stream.close()
38 |             break
39 |     else:
40 |         raise ImportError("Couldn't find the real 'site' module")
41 | 
42 |     #print "loaded", __file__
43 | 
44 |     known_paths = dict([(makepath(item)[1],1) for item in sys.path]) # 2.2 comp
45 | 
46 |     oldpos = getattr(sys,'__egginsert',0)   # save old insertion position
47 |     sys.__egginsert = 0                     # and reset the current one
48 | 
49 |     for item in PYTHONPATH:
50 |         addsitedir(item)
51 | 
52 |     sys.__egginsert += oldpos           # restore effective old position
53 | 
54 |     d, nd = makepath(stdpath[0])
55 |     insert_at = None
56 |     new_path = []
57 | 
58 |     for item in sys.path:
59 |         p, np = makepath(item)
60 | 
61 |         if np==nd and insert_at is None:
62 |             # We've hit the first 'system' path entry, so added entries go here
63 |             insert_at = len(new_path)
64 | 
65 |         if np in known_paths or insert_at is None:
66 |             new_path.append(item)
67 |         else:
68 |             # new path after the insert point, back-insert it
69 |             new_path.insert(insert_at, item)
70 |             insert_at += 1
71 | 
72 |     sys.path[:] = new_path
73 | 
74 | if __name__=='site':
75 |     __boot()
76 |     del __boot
77 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/site-patch.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/site-patch.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/ssl_support.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/ssl_support.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/unicode_utils.py:
--------------------------------------------------------------------------------
 1 | import unicodedata
 2 | import sys
 3 | from setuptools.compat import unicode as decoded_string
 4 | 
 5 | 
 6 | # HFS Plus uses decomposed UTF-8
 7 | def decompose(path):
 8 |     if isinstance(path, decoded_string):
 9 |         return unicodedata.normalize('NFD', path)
10 |     try:
11 |         path = path.decode('utf-8')
12 |         path = unicodedata.normalize('NFD', path)
13 |         path = path.encode('utf-8')
14 |     except UnicodeError:
15 |         pass  # Not UTF-8
16 |     return path
17 | 
18 | 
19 | def filesys_decode(path):
20 |     """
21 |     Ensure that the given path is decoded,
22 |     NONE when no expected encoding works
23 |     """
24 | 
25 |     fs_enc = sys.getfilesystemencoding()
26 |     if isinstance(path, decoded_string):
27 |         return path
28 | 
29 |     for enc in (fs_enc, "utf-8"):
30 |         try:
31 |             return path.decode(enc)
32 |         except UnicodeDecodeError:
33 |             continue
34 | 
35 | 
36 | def try_encode(string, enc):
37 |     "turn unicode encoding into a functional routine"
38 |     try:
39 |         return string.encode(enc)
40 |     except UnicodeEncodeError:
41 |         return None
42 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/unicode_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/unicode_utils.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path
 3 | 
 4 | 
 5 | def cs_path_exists(fspath):
 6 |     if not os.path.exists(fspath): 
 7 |         return False
 8 |     # make absolute so we always have a directory
 9 |     abspath = os.path.abspath(fspath)
10 |     directory, filename = os.path.split(abspath)
11 |     return filename in os.listdir(directory)


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/utils.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '19.2'
2 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/version.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/version.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/windows_support.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | import ctypes
 3 | 
 4 | 
 5 | def windows_only(func):
 6 |     if platform.system() != 'Windows':
 7 |         return lambda *args, **kwargs: None
 8 |     return func
 9 | 
10 | 
11 | @windows_only
12 | def hide_file(path):
13 |     """
14 |     Set the hidden attribute on a file or directory.
15 | 
16 |     From http://stackoverflow.com/questions/19622133/
17 | 
18 |     `path` must be text.
19 |     """
20 |     __import__('ctypes.wintypes')
21 |     SetFileAttributes = ctypes.windll.kernel32.SetFileAttributesW
22 |     SetFileAttributes.argtypes = ctypes.wintypes.LPWSTR, ctypes.wintypes.DWORD
23 |     SetFileAttributes.restype = ctypes.wintypes.BOOL
24 | 
25 |     FILE_ATTRIBUTE_HIDDEN = 0x02
26 | 
27 |     ret = SetFileAttributes(path, FILE_ATTRIBUTE_HIDDEN)
28 |     if not ret:
29 |         raise ctypes.WinError()
30 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/setuptools/windows_support.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/setuptools/windows_support.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate-0.3-py2.7.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.1
 2 | Name: slate
 3 | Version: 0.3
 4 | Summary: Extract text from PDF documents easily.
 5 | Home-page: http://github.com/timClicks/slate
 6 | Author: Tim McNamara
 7 | Author-email: paperless@timmcnamara.co.nz
 8 | License: GPL v3 or later
 9 | Description: ======================================================
10 |         slate: the easiest way to get text from PDFs in Python
11 |         ======================================================
12 |         
13 |         
14 |         Slate is a Python package that simplifies the process of extracting
15 |         text from PDF files. It depends on the PDFMiner package.
16 |         
17 |         Slate provides one class, PDF. PDF takes a file-like object and
18 |         will extract all text from the document, presentating each page
19 |         as a string of text:
20 |         
21 |           >>> with open('example.pdf') as f:
22 |           ...    doc = slate.PDF(f)
23 |           ...
24 |           >>> doc 
25 |           [..., ..., ...]
26 |           >>> doc[1]
27 |           'Text from page 2...'
28 |         
29 |         If your pdf is password protected, pass the password as the
30 |         second argument:
31 |         
32 |           >>> with open('secrets.pdf') as f:
33 |           ...     doc = slate.PDF(f, 'password')
34 |           ...
35 |           >>> doc[0]
36 |           "My mother doesn't know this, but..."
37 |         
38 |         More complex operations
39 |         -----------------------
40 |         
41 |         If you would like access to the images, font files and other
42 |         information, then take some time to learn the PDFMiner API.
43 |         
44 |         
45 |         What is wrong with PDFMiner?
46 |         ----------------------------
47 |         
48 |           1. Getting simple things done, like extracting the text
49 |              is quite complex. The program is not designed to return
50 |              Python objects, which makes interfacing things irritating.
51 |           2. It's an extremely complete set of tools, with multiple 
52 |              and moderately  steep learning curves.
53 |           3. It's not written with hackability in mind.
54 |         
55 |         
56 |         GPL?
57 |         ----
58 |         
59 |         If you would like to use this software in a non-free manner,
60 |         please contact the copyright owner.
61 |         
62 | Keywords: pdf,text,text-extraction
63 | Platform: UNKNOWN
64 | Classifier: Development Status :: 4 - Beta
65 | Classifier: Intended Audience :: Developers
66 | Classifier: License :: OSI Approved :: GNU General Public License (GPL)
67 | Classifier: Programming Language :: Python
68 | Classifier: Programming Language :: Python :: 2.6
69 | Classifier: Topic :: Office/Business
70 | Classifier: Topic :: Software Development :: Libraries :: Python Modules
71 | Classifier: Topic :: Text Processing
72 | Classifier: Topic :: Utilities
73 | Requires: pdfminer
74 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate-0.3-py2.7.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | .gitignore
 2 | LICENSE
 3 | README
 4 | setup.cfg
 5 | setup.py
 6 | src/slate/__init__.py
 7 | src/slate/conftest.py
 8 | src/slate/slate.py
 9 | src/slate/test_slate.py
10 | src/slate/utils.py
11 | src/slate.egg-info/PKG-INFO
12 | src/slate.egg-info/SOURCES.txt
13 | src/slate.egg-info/dependency_links.txt
14 | src/slate.egg-info/requires.txt
15 | src/slate.egg-info/top_level.txt


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate-0.3-py2.7.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate-0.3-py2.7.egg-info/installed-files.txt:
--------------------------------------------------------------------------------
 1 | ../slate/__init__.py
 2 | ../slate/conftest.py
 3 | ../slate/slate.py
 4 | ../slate/test_slate.py
 5 | ../slate/utils.py
 6 | ../slate/__init__.pyc
 7 | ../slate/conftest.pyc
 8 | ../slate/slate.pyc
 9 | ../slate/test_slate.pyc
10 | ../slate/utils.pyc
11 | ./
12 | dependency_links.txt
13 | PKG-INFO
14 | requires.txt
15 | SOURCES.txt
16 | top_level.txt
17 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate-0.3-py2.7.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | distribute
2 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate-0.3-py2.7.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | slate
2 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate/__init__.py:
--------------------------------------------------------------------------------
 1 | #! /usr/env/bin python
 2 | 
 3 | """
 4 | slate provides a convenient interface to PDFMiner[1].
 5 | 
 6 | Intializing a slate.PDF object will provide you with
 7 | the text from the source file as a list of pages. So,
 8 | a five page PDF file will have a range of 0-4.
 9 | 
10 |     >>> with open('example.pdf', 'rb') as f:
11 |     ...    PDF(f)
12 |     ...
13 |     [..., ..., ..., ...]
14 | 
15 | Beware of page numbers. slate.PDF objects start at 0.
16 | 
17 |     >>> with open('example.pdf', 'rb') as f:
18 |     ...    doc = PDF(f)
19 |     ...
20 |     >>> doc[2]
21 |     "Hello, I'm page three."
22 | 
23 | Passwords are supported. Use them as the second argument
24 | of your intialization. Currently, UTF-8 encoding is 
25 | hard-coded. If you would like to access more advanced 
26 | features, you should take a look at the PDFMiner API[2].
27 | 
28 | 
29 |   [1] http://www.unixuser.org/~euske/python/pdfminer/index.html
30 |   [2] http://www.unixuser.org/~euske/python/pdfminer/programming.html
31 | """
32 | 
33 | #This file is part of slate.
34 | 
35 | #slate is free software: you can redistribute it and/or modify
36 | #it under the terms of the GNU General Public License as published by
37 | #the Free Software Foundation, either version 3 of the License, or
38 | #(at your option) any later version.
39 | 
40 | #slate is distributed in the hope that it will be useful,
41 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
42 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
43 | #GNU General Public License for more details.
44 | 
45 | #You should have received a copy of the GNU General Public License
46 | #along with slate.  If not, see <http://www.gnu.org/licenses/>.
47 | 
48 | from slate import PDF
49 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/slate/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate/conftest.py:
--------------------------------------------------------------------------------
1 | option_doctestmodules = True
2 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate/conftest.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/slate/conftest.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate/slate.py:
--------------------------------------------------------------------------------
 1 | from StringIO import StringIO
 2 | 
 3 | from pdfminer.pdfparser import PDFParser, PDFDocument
 4 | from pdfminer.pdfinterp import PDFResourceManager
 5 | from pdfminer.pdfinterp import PDFPageInterpreter as PI
 6 | from pdfminer.pdfdevice import PDFDevice
 7 | from pdfminer.converter import TextConverter
 8 | 
 9 | import utils
10 | 
11 | __all__ = ['PDF']
12 | 
13 | class PDFPageInterpreter(PI):
14 |     def process_page(self, page):
15 |         if 1 <= self.debug:
16 |             print >>stderr, 'Processing page: %r' % page
17 |         (x0,y0,x1,y1) = page.mediabox
18 |         if page.rotate == 90:
19 |             ctm = (0,-1,1,0, -y0,x1)
20 |         elif page.rotate == 180:
21 |             ctm = (-1,0,0,-1, x1,y1)
22 |         elif page.rotate == 270:
23 |             ctm = (0,1,-1,0, y1,-x0)
24 |         else:
25 |             ctm = (1,0,0,1, -x0,-y0)
26 |         self.device.outfp.seek(0)
27 |         self.device.outfp.buf = ''
28 |         self.device.begin_page(page, ctm)
29 |         self.render_contents(page.resources, page.contents, ctm=ctm)
30 |         self.device.end_page(page)
31 |         return self.device.outfp.getvalue()
32 | 
33 | class PDF(list):
34 |     def __init__(self, file, password='', just_text=1):
35 |         self.parser = PDFParser(file)
36 |         self.doc = PDFDocument()
37 |         self.parser.set_document(self.doc)
38 |         self.doc.set_parser(self.parser)
39 |         self.doc.initialize(password)
40 |         if self.doc.is_extractable:
41 |             self.resmgr = PDFResourceManager()
42 |             self.device = TextConverter(self.resmgr, outfp=StringIO())
43 |             self.interpreter = PDFPageInterpreter(
44 |                self.resmgr, self.device)
45 |             for page in self.doc.get_pages():
46 |                 self.append(self.interpreter.process_page(page))
47 |             self.metadata = self.doc.info
48 |         if just_text:
49 |             self._cleanup()
50 | 
51 |     def _cleanup(self):
52 |         """ 
53 |         Frees lots of non-textual information, such as the fonts
54 |         and images and the objects that were needed to parse the
55 |         PDF.
56 |         """
57 |         del self.device
58 |         del self.doc
59 |         del self.parser
60 |         del self.resmgr
61 |         del self.interpreter
62 | 
63 |     def text(self, clean=True):
64 |         """ 
65 |         Returns the text of the PDF as a single string.
66 |         Options:
67 | 
68 |           :clean:
69 |             Removes misc cruft, like lots of whitespace.
70 |         """
71 |         if clean:
72 |             return ''.join(utils.trim_whitespace(page) for page in self)
73 |         else:
74 |             return ''.join(self) 
75 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate/slate.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/slate/slate.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate/test_slate.py:
--------------------------------------------------------------------------------
 1 | """ 
 2 |   Tests for slate 
 3 |   http://pypi.python.org/slate
 4 | 
 5 |   Expected to be used with py.test:
 6 |   http://codespeak.net/py/dist/test/index.html
 7 | """
 8 | 
 9 | from slate import PDF
10 | 
11 | def pytest_funcarg__doc(request):
12 |     with open('basic.pdf', 'rb') as f:
13 |         return PDF(f)
14 | 
15 | def pytest_funcarg__passwd(request):
16 |     with open('passwd-a.pdf') as f:
17 |         return PDF(f, 'a')
18 | 
19 | def test_basic(doc):
20 |     assert doc[0] == 'This is a test.\x0c'
21 | 
22 | def test_metadata_extraction(doc):
23 |     assert doc.metadata
24 | 
25 | def test_text_method(doc):
26 |     assert doc.text() == "This is a test."
27 | 
28 | def test_text_method_unclean(doc):
29 |     assert '\x0c' in doc.text(clean=0)
30 | 
31 | def test_password(passwd):
32 |     assert passwd[0] == "Chamber of secrets.\x0c"
33 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate/test_slate.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/slate/test_slate.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate/utils.py:
--------------------------------------------------------------------------------
 1 | def trim_whitespace(s):
 2 |     """ 
 3 |     Returns a string that has at most one whitespace
 4 |     character between non-whitespace characters.
 5 | 
 6 |     >>> trim_whitespace(' hi   there')
 7 |     'hi there'
 8 |     """
 9 |     buffer  = ''
10 |     for i, letter in enumerate(s):
11 |          if letter.isspace():
12 |              try:
13 |                  if s[i+1].isspace(): continue
14 |              except IndexError: 
15 |                  pass
16 |          buffer = buffer + letter
17 |     
18 |     return buffer.strip()
19 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/slate/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/slate/utils.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3-1.14.dist-info/RECORD:
--------------------------------------------------------------------------------
 1 | urllib3/__init__.py,sha256=JaHe7rKmNZ1mIiTsBOnVTCVqUVNEQqpegwlQNX9UbmA,2645
 2 | urllib3/_collections.py,sha256=8G9PhO4XdkNDQATNL1uy86tSlH3EvIJHXebiOJnfFok,10542
 3 | urllib3/connection.py,sha256=XREoqqZh54Lgag5CLdVlC27bwCpOq0aYrMmNEMtSJWk,10286
 4 | urllib3/connectionpool.py,sha256=2J7aN994G8Jeppnrl8eOnEpha3QhBFk-5CE5ldsjwkk,31137
 5 | urllib3/exceptions.py,sha256=zGjhZCR1wefEnCN5b7WouQ3UhXesJ2bRKYIeWusaFJs,5599
 6 | urllib3/fields.py,sha256=WVUvPfSzNBniw9zKVDoLl9y5ko2qKBjbzkH-bTQMSgQ,5872
 7 | urllib3/filepost.py,sha256=NvLlFsdt8ih_Q4S2ekQF3CJG0nOXs32YI-G04_AdT2g,2320
 8 | urllib3/poolmanager.py,sha256=W09uewCGoKSzezei0DwaTXT7kuvsF2elO2wUXWfiAco,9614
 9 | urllib3/request.py,sha256=jET7OvA3FSjxABBRGhCyMdPvM9XuJA6df9gRhkJiJiY,5988
10 | urllib3/response.py,sha256=6Bs5LNzhW1YEEd6stBFJtruDVFMlWNxo0MFPmtJhvDU,18103
11 | urllib3/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12 | urllib3/contrib/appengine.py,sha256=VP10uoVbNpH0kYVbOFd7dN5dtDcVfEytMoriKsDBBuI,7938
13 | urllib3/contrib/ntlmpool.py,sha256=r-vMDMXAGbix9a7-IhbKVTATmAst-5g4hKYOLf8Kd5M,4531
14 | urllib3/contrib/pyopenssl.py,sha256=JBL3GO8YucHXkdpU7uxUGd9UgShsIhAU8oCMJDOo47s,10094
15 | urllib3/contrib/socks.py,sha256=hE8u1190XTNSE_HAtTxwpISa-jnDbpIA1ozlZoIq_Fg,5705
16 | urllib3/packages/__init__.py,sha256=nlChrGzkjCkmhCX9HrF_qHPUgosfsPQkVIJxiiLhk9g,109
17 | urllib3/packages/ordered_dict.py,sha256=VQaPONfhVMsb8B63Xg7ZOydJqIE_jzeMhVN3Pec6ogw,8935
18 | urllib3/packages/six.py,sha256=U-rO-WBrFS8PxHeamSl6okKCjqPF18NhiZb0qPZ67XM,11628
19 | urllib3/packages/ssl_match_hostname/__init__.py,sha256=cOWMIn1orgJoA35p6pSzO_-Dc6iOX9Dhl6D2sL9b_2o,460
20 | urllib3/packages/ssl_match_hostname/_implementation.py,sha256=fK28k37hL7-D79v9iM2fHgNK9Q1Pw0M7qVRL4rkfFjQ,3778
21 | urllib3/util/__init__.py,sha256=7LnyUDyddbD9VVmsbPP0ckT2paVTmgLPs5E_BUoHVu8,854
22 | urllib3/util/connection.py,sha256=6PvDBlK_6QDLHzEDT-uEMhqKcDoSuRO43Vtb4IXfkzQ,3380
23 | urllib3/util/request.py,sha256=ZMDewRK-mjlK72szGIIjzYnLIn-zPP0WgJUMjKeZ6Tg,2128
24 | urllib3/util/response.py,sha256=1UFd5TIp9MyBp4xgnZoyQZscZVPPr0tWRaXNR5w_vds,2165
25 | urllib3/util/retry.py,sha256=EC10NTVcyHOWzBlyKynLvr5ZgghcfwA-rjH4P2_RNE0,9975
26 | urllib3/util/ssl_.py,sha256=bm46-ql6Wq6ulhJw604iBTG16QHDzHB03cbLyvlIXq4,11464
27 | urllib3/util/timeout.py,sha256=ioAIYptFyBG7eU_r8_ZmO45hpj1dJE6WCvrGR9dNFjs,9596
28 | urllib3/util/url.py,sha256=EcX4ZfmgKWcqM4sY9FlC-yN4y_snuURPV0TpUPHNjnc,5879
29 | urllib3-1.14.dist-info/DESCRIPTION.rst,sha256=hud3mTd3qo3nF7giyoJMlmRXK-W1bu4uXySgqd0wkv8,24151
30 | urllib3-1.14.dist-info/METADATA,sha256=1BeC8yJkDQYsjh332O00jkPtPdMTbf53-Rvxy78cavE,25190
31 | urllib3-1.14.dist-info/metadata.json,sha256=zHDlvIbjCPUvbbOBrqhbF6xI8TcEKZjUI2quKMm1Etg,1178
32 | urllib3-1.14.dist-info/pbr.json,sha256=Bp61WOs2E_Dy1arJqQSN7uuuWVh-ZwEhoBsNUlxup_k,47
33 | urllib3-1.14.dist-info/RECORD,,
34 | urllib3-1.14.dist-info/top_level.txt,sha256=EMiXL2sKrTcmrMxIHTqdc3ET54pQI2Y072LexFEemvo,8
35 | urllib3-1.14.dist-info/WHEEL,sha256=AvR0WeTpDaxT645bl5FQxUK6NPsTls2ttpcGJg3j1Xg,110
36 | urllib3/filepost.pyc,,
37 | urllib3/contrib/__init__.pyc,,
38 | urllib3/util/response.pyc,,
39 | urllib3/packages/ordered_dict.pyc,,
40 | urllib3/packages/__init__.pyc,,
41 | urllib3/connection.pyc,,
42 | urllib3/connectionpool.pyc,,
43 | urllib3/util/timeout.pyc,,
44 | urllib3/packages/ssl_match_hostname/__init__.pyc,,
45 | urllib3/fields.pyc,,
46 | urllib3/util/__init__.pyc,,
47 | urllib3/response.pyc,,
48 | urllib3/packages/six.pyc,,
49 | urllib3/__init__.pyc,,
50 | urllib3/contrib/ntlmpool.pyc,,
51 | urllib3/poolmanager.pyc,,
52 | urllib3/contrib/pyopenssl.pyc,,
53 | urllib3/util/ssl_.pyc,,
54 | urllib3/util/request.pyc,,
55 | urllib3/packages/ssl_match_hostname/_implementation.pyc,,
56 | urllib3/request.pyc,,
57 | urllib3/util/connection.pyc,,
58 | urllib3/contrib/appengine.pyc,,
59 | urllib3/contrib/socks.pyc,,
60 | urllib3/exceptions.pyc,,
61 | urllib3/util/retry.pyc,,
62 | urllib3/_collections.pyc,,
63 | urllib3/util/url.pyc,,
64 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3-1.14.dist-info/WHEEL:
--------------------------------------------------------------------------------
1 | Wheel-Version: 1.0
2 | Generator: bdist_wheel (0.24.0)
3 | Root-Is-Purelib: true
4 | Tag: py2-none-any
5 | Tag: py3-none-any
6 | 
7 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3-1.14.dist-info/metadata.json:
--------------------------------------------------------------------------------
1 | {"license": "MIT", "name": "urllib3", "metadata_version": "2.0", "generator": "bdist_wheel (0.24.0)", "test_requires": [{"requires": ["nose", "mock", "tornado"]}], "summary": "HTTP library with thread-safe connection pooling, file post, and more.", "run_requires": [{"requires": ["certifi"], "extra": "secure"}, {"environment": "python_version<=\"2.7\"", "requires": ["pyOpenSSL>=0.13", "ndg-httpsclient", "pyasn1"], "extra": "secure"}], "version": "1.14", "extensions": {"python.details": {"project_urls": {"Home": "http://urllib3.readthedocs.org/"}, "document_names": {"description": "DESCRIPTION.rst"}, "contacts": [{"role": "author", "email": "andrey.petrov@shazow.net", "name": "Andrey Petrov"}]}}, "keywords": ["urllib", "httplib", "threadsafe", "filepost", "http", "https", "ssl", "pooling"], "classifiers": ["Environment :: Web Environment", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 3", "Topic :: Internet :: WWW/HTTP", "Topic :: Software Development :: Libraries"], "extras": ["secure"]}


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3-1.14.dist-info/pbr.json:
--------------------------------------------------------------------------------
1 | {"is_release": false, "git_version": "27df29b"}


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3-1.14.dist-info/top_level.txt:
--------------------------------------------------------------------------------
1 | urllib3
2 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | urllib3 - Thread-safe connection pooling and re-using.
 3 | """
 4 | 
 5 | from __future__ import absolute_import
 6 | import warnings
 7 | 
 8 | from .connectionpool import (
 9 |     HTTPConnectionPool,
10 |     HTTPSConnectionPool,
11 |     connection_from_url
12 | )
13 | 
14 | from . import exceptions
15 | from .filepost import encode_multipart_formdata
16 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url
17 | from .response import HTTPResponse
18 | from .util.request import make_headers
19 | from .util.url import get_host
20 | from .util.timeout import Timeout
21 | from .util.retry import Retry
22 | 
23 | 
24 | # Set default logging handler to avoid "No handler found" warnings.
25 | import logging
26 | try:  # Python 2.7+
27 |     from logging import NullHandler
28 | except ImportError:
29 |     class NullHandler(logging.Handler):
30 |         def emit(self, record):
31 |             pass
32 | 
33 | __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
34 | __license__ = 'MIT'
35 | __version__ = '1.14'
36 | 
37 | __all__ = (
38 |     'HTTPConnectionPool',
39 |     'HTTPSConnectionPool',
40 |     'PoolManager',
41 |     'ProxyManager',
42 |     'HTTPResponse',
43 |     'Retry',
44 |     'Timeout',
45 |     'add_stderr_logger',
46 |     'connection_from_url',
47 |     'disable_warnings',
48 |     'encode_multipart_formdata',
49 |     'get_host',
50 |     'make_headers',
51 |     'proxy_from_url',
52 | )
53 | 
54 | logging.getLogger(__name__).addHandler(NullHandler())
55 | 
56 | 
57 | def add_stderr_logger(level=logging.DEBUG):
58 |     """
59 |     Helper for quickly adding a StreamHandler to the logger. Useful for
60 |     debugging.
61 | 
62 |     Returns the handler after adding it.
63 |     """
64 |     # This method needs to be in this __init__.py to get the __name__ correct
65 |     # even if urllib3 is vendored within another package.
66 |     logger = logging.getLogger(__name__)
67 |     handler = logging.StreamHandler()
68 |     handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
69 |     logger.addHandler(handler)
70 |     logger.setLevel(level)
71 |     logger.debug('Added a stderr logging handler to logger: %s', __name__)
72 |     return handler
73 | 
74 | # ... Clean up.
75 | del NullHandler
76 | 
77 | 
78 | # SecurityWarning's always go off by default.
79 | warnings.simplefilter('always', exceptions.SecurityWarning, append=True)
80 | # SubjectAltNameWarning's should go off once per host
81 | warnings.simplefilter('default', exceptions.SubjectAltNameWarning)
82 | # InsecurePlatformWarning's don't vary between requests, so we keep it default.
83 | warnings.simplefilter('default', exceptions.InsecurePlatformWarning,
84 |                       append=True)
85 | # SNIMissingWarnings should go off only once.
86 | warnings.simplefilter('default', exceptions.SNIMissingWarning)
87 | 
88 | 
89 | def disable_warnings(category=exceptions.HTTPWarning):
90 |     """
91 |     Helper for quickly disabling all urllib3 warnings.
92 |     """
93 |     warnings.simplefilter('ignore', category)
94 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/_collections.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/_collections.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/connection.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/connection.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/connectionpool.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/connectionpool.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/contrib/__init__.py


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/contrib/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/contrib/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/contrib/appengine.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/contrib/appengine.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/contrib/ntlmpool.py:
--------------------------------------------------------------------------------
  1 | """
  2 | NTLM authenticating pool, contributed by erikcederstran
  3 | 
  4 | Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10
  5 | """
  6 | from __future__ import absolute_import
  7 | 
  8 | try:
  9 |     from http.client import HTTPSConnection
 10 | except ImportError:
 11 |     from httplib import HTTPSConnection
 12 | from logging import getLogger
 13 | from ntlm import ntlm
 14 | 
 15 | from urllib3 import HTTPSConnectionPool
 16 | 
 17 | 
 18 | log = getLogger(__name__)
 19 | 
 20 | 
 21 | class NTLMConnectionPool(HTTPSConnectionPool):
 22 |     """
 23 |     Implements an NTLM authentication version of an urllib3 connection pool
 24 |     """
 25 | 
 26 |     scheme = 'https'
 27 | 
 28 |     def __init__(self, user, pw, authurl, *args, **kwargs):
 29 |         """
 30 |         authurl is a random URL on the server that is protected by NTLM.
 31 |         user is the Windows user, probably in the DOMAIN\\username format.
 32 |         pw is the password for the user.
 33 |         """
 34 |         super(NTLMConnectionPool, self).__init__(*args, **kwargs)
 35 |         self.authurl = authurl
 36 |         self.rawuser = user
 37 |         user_parts = user.split('\\', 1)
 38 |         self.domain = user_parts[0].upper()
 39 |         self.user = user_parts[1]
 40 |         self.pw = pw
 41 | 
 42 |     def _new_conn(self):
 43 |         # Performs the NTLM handshake that secures the connection. The socket
 44 |         # must be kept open while requests are performed.
 45 |         self.num_connections += 1
 46 |         log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s',
 47 |                   self.num_connections, self.host, self.authurl)
 48 | 
 49 |         headers = {}
 50 |         headers['Connection'] = 'Keep-Alive'
 51 |         req_header = 'Authorization'
 52 |         resp_header = 'www-authenticate'
 53 | 
 54 |         conn = HTTPSConnection(host=self.host, port=self.port)
 55 | 
 56 |         # Send negotiation message
 57 |         headers[req_header] = (
 58 |             'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser))
 59 |         log.debug('Request headers: %s', headers)
 60 |         conn.request('GET', self.authurl, None, headers)
 61 |         res = conn.getresponse()
 62 |         reshdr = dict(res.getheaders())
 63 |         log.debug('Response status: %s %s', res.status, res.reason)
 64 |         log.debug('Response headers: %s', reshdr)
 65 |         log.debug('Response data: %s [...]', res.read(100))
 66 | 
 67 |         # Remove the reference to the socket, so that it can not be closed by
 68 |         # the response object (we want to keep the socket open)
 69 |         res.fp = None
 70 | 
 71 |         # Server should respond with a challenge message
 72 |         auth_header_values = reshdr[resp_header].split(', ')
 73 |         auth_header_value = None
 74 |         for s in auth_header_values:
 75 |             if s[:5] == 'NTLM ':
 76 |                 auth_header_value = s[5:]
 77 |         if auth_header_value is None:
 78 |             raise Exception('Unexpected %s response header: %s' %
 79 |                             (resp_header, reshdr[resp_header]))
 80 | 
 81 |         # Send authentication message
 82 |         ServerChallenge, NegotiateFlags = \
 83 |             ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value)
 84 |         auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge,
 85 |                                                          self.user,
 86 |                                                          self.domain,
 87 |                                                          self.pw,
 88 |                                                          NegotiateFlags)
 89 |         headers[req_header] = 'NTLM %s' % auth_msg
 90 |         log.debug('Request headers: %s', headers)
 91 |         conn.request('GET', self.authurl, None, headers)
 92 |         res = conn.getresponse()
 93 |         log.debug('Response status: %s %s', res.status, res.reason)
 94 |         log.debug('Response headers: %s', dict(res.getheaders()))
 95 |         log.debug('Response data: %s [...]', res.read()[:100])
 96 |         if res.status != 200:
 97 |             if res.status == 401:
 98 |                 raise Exception('Server rejected request: wrong '
 99 |                                 'username or password')
100 |             raise Exception('Wrong server response: %s %s' %
101 |                             (res.status, res.reason))
102 | 
103 |         res.fp = None
104 |         log.debug('Connection established')
105 |         return conn
106 | 
107 |     def urlopen(self, method, url, body=None, headers=None, retries=3,
108 |                 redirect=True, assert_same_host=True):
109 |         if headers is None:
110 |             headers = {}
111 |         headers['Connection'] = 'Keep-Alive'
112 |         return super(NTLMConnectionPool, self).urlopen(method, url, body,
113 |                                                        headers, retries,
114 |                                                        redirect,
115 |                                                        assert_same_host)
116 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/contrib/ntlmpool.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/contrib/ntlmpool.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/contrib/pyopenssl.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/contrib/pyopenssl.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/contrib/socks.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/contrib/socks.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/exceptions.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/exceptions.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/fields.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/fields.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/filepost.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import codecs
 3 | 
 4 | from uuid import uuid4
 5 | from io import BytesIO
 6 | 
 7 | from .packages import six
 8 | from .packages.six import b
 9 | from .fields import RequestField
10 | 
11 | writer = codecs.lookup('utf-8')[3]
12 | 
13 | 
14 | def choose_boundary():
15 |     """
16 |     Our embarassingly-simple replacement for mimetools.choose_boundary.
17 |     """
18 |     return uuid4().hex
19 | 
20 | 
21 | def iter_field_objects(fields):
22 |     """
23 |     Iterate over fields.
24 | 
25 |     Supports list of (k, v) tuples and dicts, and lists of
26 |     :class:`~urllib3.fields.RequestField`.
27 | 
28 |     """
29 |     if isinstance(fields, dict):
30 |         i = six.iteritems(fields)
31 |     else:
32 |         i = iter(fields)
33 | 
34 |     for field in i:
35 |         if isinstance(field, RequestField):
36 |             yield field
37 |         else:
38 |             yield RequestField.from_tuples(*field)
39 | 
40 | 
41 | def iter_fields(fields):
42 |     """
43 |     .. deprecated:: 1.6
44 | 
45 |     Iterate over fields.
46 | 
47 |     The addition of :class:`~urllib3.fields.RequestField` makes this function
48 |     obsolete. Instead, use :func:`iter_field_objects`, which returns
49 |     :class:`~urllib3.fields.RequestField` objects.
50 | 
51 |     Supports list of (k, v) tuples and dicts.
52 |     """
53 |     if isinstance(fields, dict):
54 |         return ((k, v) for k, v in six.iteritems(fields))
55 | 
56 |     return ((k, v) for k, v in fields)
57 | 
58 | 
59 | def encode_multipart_formdata(fields, boundary=None):
60 |     """
61 |     Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
62 | 
63 |     :param fields:
64 |         Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
65 | 
66 |     :param boundary:
67 |         If not specified, then a random boundary will be generated using
68 |         :func:`mimetools.choose_boundary`.
69 |     """
70 |     body = BytesIO()
71 |     if boundary is None:
72 |         boundary = choose_boundary()
73 | 
74 |     for field in iter_field_objects(fields):
75 |         body.write(b('--%s\r\n' % (boundary)))
76 | 
77 |         writer(body).write(field.render_headers())
78 |         data = field.data
79 | 
80 |         if isinstance(data, int):
81 |             data = str(data)  # Backwards compatibility
82 | 
83 |         if isinstance(data, six.text_type):
84 |             writer(body).write(data)
85 |         else:
86 |             body.write(data)
87 | 
88 |         body.write(b'\r\n')
89 | 
90 |     body.write(b('--%s--\r\n' % (boundary)))
91 | 
92 |     content_type = str('multipart/form-data; boundary=%s' % boundary)
93 | 
94 |     return body.getvalue(), content_type
95 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/filepost.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/filepost.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/packages/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | from . import ssl_match_hostname
4 | 
5 | __all__ = ('ssl_match_hostname', )
6 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/packages/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/packages/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/packages/ordered_dict.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/packages/ordered_dict.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/packages/six.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/packages/six.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/packages/ssl_match_hostname/__init__.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     # Python 3.2+
 3 |     from ssl import CertificateError, match_hostname
 4 | except ImportError:
 5 |     try:
 6 |         # Backport of the function from a pypi module
 7 |         from backports.ssl_match_hostname import CertificateError, match_hostname
 8 |     except ImportError:
 9 |         # Our vendored copy
10 |         from ._implementation import CertificateError, match_hostname
11 | 
12 | # Not needed, but documenting what we provide.
13 | __all__ = ('CertificateError', 'match_hostname')
14 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/packages/ssl_match_hostname/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/packages/ssl_match_hostname/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/packages/ssl_match_hostname/_implementation.py:
--------------------------------------------------------------------------------
  1 | """The match_hostname() function from Python 3.3.3, essential when using SSL."""
  2 | 
  3 | # Note: This file is under the PSF license as the code comes from the python
  4 | # stdlib.   http://docs.python.org/3/license.html
  5 | 
  6 | import re
  7 | 
  8 | __version__ = '3.4.0.2'
  9 | 
 10 | class CertificateError(ValueError):
 11 |     pass
 12 | 
 13 | 
 14 | def _dnsname_match(dn, hostname, max_wildcards=1):
 15 |     """Matching according to RFC 6125, section 6.4.3
 16 | 
 17 |     http://tools.ietf.org/html/rfc6125#section-6.4.3
 18 |     """
 19 |     pats = []
 20 |     if not dn:
 21 |         return False
 22 | 
 23 |     # Ported from python3-syntax:
 24 |     # leftmost, *remainder = dn.split(r'.')
 25 |     parts = dn.split(r'.')
 26 |     leftmost = parts[0]
 27 |     remainder = parts[1:]
 28 | 
 29 |     wildcards = leftmost.count('*')
 30 |     if wildcards > max_wildcards:
 31 |         # Issue #17980: avoid denials of service by refusing more
 32 |         # than one wildcard per fragment.  A survey of established
 33 |         # policy among SSL implementations showed it to be a
 34 |         # reasonable choice.
 35 |         raise CertificateError(
 36 |             "too many wildcards in certificate DNS name: " + repr(dn))
 37 | 
 38 |     # speed up common case w/o wildcards
 39 |     if not wildcards:
 40 |         return dn.lower() == hostname.lower()
 41 | 
 42 |     # RFC 6125, section 6.4.3, subitem 1.
 43 |     # The client SHOULD NOT attempt to match a presented identifier in which
 44 |     # the wildcard character comprises a label other than the left-most label.
 45 |     if leftmost == '*':
 46 |         # When '*' is a fragment by itself, it matches a non-empty dotless
 47 |         # fragment.
 48 |         pats.append('[^.]+')
 49 |     elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
 50 |         # RFC 6125, section 6.4.3, subitem 3.
 51 |         # The client SHOULD NOT attempt to match a presented identifier
 52 |         # where the wildcard character is embedded within an A-label or
 53 |         # U-label of an internationalized domain name.
 54 |         pats.append(re.escape(leftmost))
 55 |     else:
 56 |         # Otherwise, '*' matches any dotless string, e.g. www*
 57 |         pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
 58 | 
 59 |     # add the remaining fragments, ignore any wildcards
 60 |     for frag in remainder:
 61 |         pats.append(re.escape(frag))
 62 | 
 63 |     pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
 64 |     return pat.match(hostname)
 65 | 
 66 | 
 67 | def match_hostname(cert, hostname):
 68 |     """Verify that *cert* (in decoded format as returned by
 69 |     SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 and RFC 6125
 70 |     rules are followed, but IP addresses are not accepted for *hostname*.
 71 | 
 72 |     CertificateError is raised on failure. On success, the function
 73 |     returns nothing.
 74 |     """
 75 |     if not cert:
 76 |         raise ValueError("empty or no certificate")
 77 |     dnsnames = []
 78 |     san = cert.get('subjectAltName', ())
 79 |     for key, value in san:
 80 |         if key == 'DNS':
 81 |             if _dnsname_match(value, hostname):
 82 |                 return
 83 |             dnsnames.append(value)
 84 |     if not dnsnames:
 85 |         # The subject is only checked when there is no dNSName entry
 86 |         # in subjectAltName
 87 |         for sub in cert.get('subject', ()):
 88 |             for key, value in sub:
 89 |                 # XXX according to RFC 2818, the most specific Common Name
 90 |                 # must be used.
 91 |                 if key == 'commonName':
 92 |                     if _dnsname_match(value, hostname):
 93 |                         return
 94 |                     dnsnames.append(value)
 95 |     if len(dnsnames) > 1:
 96 |         raise CertificateError("hostname %r "
 97 |             "doesn't match either of %s"
 98 |             % (hostname, ', '.join(map(repr, dnsnames))))
 99 |     elif len(dnsnames) == 1:
100 |         raise CertificateError("hostname %r "
101 |             "doesn't match %r"
102 |             % (hostname, dnsnames[0]))
103 |     else:
104 |         raise CertificateError("no appropriate commonName or "
105 |             "subjectAltName fields were found")
106 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/packages/ssl_match_hostname/_implementation.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/packages/ssl_match_hostname/_implementation.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/poolmanager.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/poolmanager.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/request.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/request.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/response.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/response.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/util/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | # For backwards compatibility, provide imports that used to be here.
 3 | from .connection import is_connection_dropped
 4 | from .request import make_headers
 5 | from .response import is_fp_closed
 6 | from .ssl_ import (
 7 |     SSLContext,
 8 |     HAS_SNI,
 9 |     assert_fingerprint,
10 |     resolve_cert_reqs,
11 |     resolve_ssl_version,
12 |     ssl_wrap_socket,
13 | )
14 | from .timeout import (
15 |     current_time,
16 |     Timeout,
17 | )
18 | 
19 | from .retry import Retry
20 | from .url import (
21 |     get_host,
22 |     parse_url,
23 |     split_first,
24 |     Url,
25 | )
26 | 
27 | __all__ = (
28 |     'HAS_SNI',
29 |     'SSLContext',
30 |     'Retry',
31 |     'Timeout',
32 |     'Url',
33 |     'assert_fingerprint',
34 |     'current_time',
35 |     'is_connection_dropped',
36 |     'is_fp_closed',
37 |     'get_host',
38 |     'parse_url',
39 |     'make_headers',
40 |     'resolve_cert_reqs',
41 |     'resolve_ssl_version',
42 |     'split_first',
43 |     'ssl_wrap_socket',
44 | )
45 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/util/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/util/__init__.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/util/connection.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import socket
  3 | try:
  4 |     from select import poll, POLLIN
  5 | except ImportError:  # `poll` doesn't exist on OSX and other platforms
  6 |     poll = False
  7 |     try:
  8 |         from select import select
  9 |     except ImportError:  # `select` doesn't exist on AppEngine.
 10 |         select = False
 11 | 
 12 | 
 13 | def is_connection_dropped(conn):  # Platform-specific
 14 |     """
 15 |     Returns True if the connection is dropped and should be closed.
 16 | 
 17 |     :param conn:
 18 |         :class:`httplib.HTTPConnection` object.
 19 | 
 20 |     Note: For platforms like AppEngine, this will always return ``False`` to
 21 |     let the platform handle connection recycling transparently for us.
 22 |     """
 23 |     sock = getattr(conn, 'sock', False)
 24 |     if sock is False:  # Platform-specific: AppEngine
 25 |         return False
 26 |     if sock is None:  # Connection already closed (such as by httplib).
 27 |         return True
 28 | 
 29 |     if not poll:
 30 |         if not select:  # Platform-specific: AppEngine
 31 |             return False
 32 | 
 33 |         try:
 34 |             return select([sock], [], [], 0.0)[0]
 35 |         except socket.error:
 36 |             return True
 37 | 
 38 |     # This version is better on platforms that support it.
 39 |     p = poll()
 40 |     p.register(sock, POLLIN)
 41 |     for (fno, ev) in p.poll(0.0):
 42 |         if fno == sock.fileno():
 43 |             # Either data is buffered (bad), or the connection is dropped.
 44 |             return True
 45 | 
 46 | 
 47 | # This function is copied from socket.py in the Python 2.7 standard
 48 | # library test suite. Added to its signature is only `socket_options`.
 49 | def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
 50 |                       source_address=None, socket_options=None):
 51 |     """Connect to *address* and return the socket object.
 52 | 
 53 |     Convenience function.  Connect to *address* (a 2-tuple ``(host,
 54 |     port)``) and return the socket object.  Passing the optional
 55 |     *timeout* parameter will set the timeout on the socket instance
 56 |     before attempting to connect.  If no *timeout* is supplied, the
 57 |     global default timeout setting returned by :func:`getdefaulttimeout`
 58 |     is used.  If *source_address* is set it must be a tuple of (host, port)
 59 |     for the socket to bind as a source address before making the connection.
 60 |     An host of '' or port 0 tells the OS to use the default.
 61 |     """
 62 | 
 63 |     host, port = address
 64 |     if host.startswith('['):
 65 |         host = host.strip('[]')
 66 |     err = None
 67 |     for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
 68 |         af, socktype, proto, canonname, sa = res
 69 |         sock = None
 70 |         try:
 71 |             sock = socket.socket(af, socktype, proto)
 72 | 
 73 |             # If provided, set socket level options before connecting.
 74 |             # This is the only addition urllib3 makes to this function.
 75 |             _set_socket_options(sock, socket_options)
 76 | 
 77 |             if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
 78 |                 sock.settimeout(timeout)
 79 |             if source_address:
 80 |                 sock.bind(source_address)
 81 |             sock.connect(sa)
 82 |             return sock
 83 | 
 84 |         except socket.error as e:
 85 |             err = e
 86 |             if sock is not None:
 87 |                 sock.close()
 88 |                 sock = None
 89 | 
 90 |     if err is not None:
 91 |         raise err
 92 | 
 93 |     raise socket.error("getaddrinfo returns an empty list")
 94 | 
 95 | 
 96 | def _set_socket_options(sock, options):
 97 |     if options is None:
 98 |         return
 99 | 
100 |     for opt in options:
101 |         sock.setsockopt(*opt)
102 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/util/connection.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/util/connection.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/util/request.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from base64 import b64encode
 3 | 
 4 | from ..packages.six import b
 5 | 
 6 | ACCEPT_ENCODING = 'gzip,deflate'
 7 | 
 8 | 
 9 | def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
10 |                  basic_auth=None, proxy_basic_auth=None, disable_cache=None):
11 |     """
12 |     Shortcuts for generating request headers.
13 | 
14 |     :param keep_alive:
15 |         If ``True``, adds 'connection: keep-alive' header.
16 | 
17 |     :param accept_encoding:
18 |         Can be a boolean, list, or string.
19 |         ``True`` translates to 'gzip,deflate'.
20 |         List will get joined by comma.
21 |         String will be used as provided.
22 | 
23 |     :param user_agent:
24 |         String representing the user-agent you want, such as
25 |         "python-urllib3/0.6"
26 | 
27 |     :param basic_auth:
28 |         Colon-separated username:password string for 'authorization: basic ...'
29 |         auth header.
30 | 
31 |     :param proxy_basic_auth:
32 |         Colon-separated username:password string for 'proxy-authorization: basic ...'
33 |         auth header.
34 | 
35 |     :param disable_cache:
36 |         If ``True``, adds 'cache-control: no-cache' header.
37 | 
38 |     Example::
39 | 
40 |         >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
41 |         {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
42 |         >>> make_headers(accept_encoding=True)
43 |         {'accept-encoding': 'gzip,deflate'}
44 |     """
45 |     headers = {}
46 |     if accept_encoding:
47 |         if isinstance(accept_encoding, str):
48 |             pass
49 |         elif isinstance(accept_encoding, list):
50 |             accept_encoding = ','.join(accept_encoding)
51 |         else:
52 |             accept_encoding = ACCEPT_ENCODING
53 |         headers['accept-encoding'] = accept_encoding
54 | 
55 |     if user_agent:
56 |         headers['user-agent'] = user_agent
57 | 
58 |     if keep_alive:
59 |         headers['connection'] = 'keep-alive'
60 | 
61 |     if basic_auth:
62 |         headers['authorization'] = 'Basic ' + \
63 |             b64encode(b(basic_auth)).decode('utf-8')
64 | 
65 |     if proxy_basic_auth:
66 |         headers['proxy-authorization'] = 'Basic ' + \
67 |             b64encode(b(proxy_basic_auth)).decode('utf-8')
68 | 
69 |     if disable_cache:
70 |         headers['cache-control'] = 'no-cache'
71 | 
72 |     return headers
73 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/util/request.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/util/request.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/util/response.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from ..packages.six.moves import http_client as httplib
 3 | 
 4 | from ..exceptions import HeaderParsingError
 5 | 
 6 | 
 7 | def is_fp_closed(obj):
 8 |     """
 9 |     Checks whether a given file-like object is closed.
10 | 
11 |     :param obj:
12 |         The file-like object to check.
13 |     """
14 | 
15 |     try:
16 |         # Check via the official file-like-object way.
17 |         return obj.closed
18 |     except AttributeError:
19 |         pass
20 | 
21 |     try:
22 |         # Check if the object is a container for another file-like object that
23 |         # gets released on exhaustion (e.g. HTTPResponse).
24 |         return obj.fp is None
25 |     except AttributeError:
26 |         pass
27 | 
28 |     raise ValueError("Unable to determine whether fp is closed.")
29 | 
30 | 
31 | def assert_header_parsing(headers):
32 |     """
33 |     Asserts whether all headers have been successfully parsed.
34 |     Extracts encountered errors from the result of parsing headers.
35 | 
36 |     Only works on Python 3.
37 | 
38 |     :param headers: Headers to verify.
39 |     :type headers: `httplib.HTTPMessage`.
40 | 
41 |     :raises urllib3.exceptions.HeaderParsingError:
42 |         If parsing errors are found.
43 |     """
44 | 
45 |     # This will fail silently if we pass in the wrong kind of parameter.
46 |     # To make debugging easier add an explicit check.
47 |     if not isinstance(headers, httplib.HTTPMessage):
48 |         raise TypeError('expected httplib.Message, got {0}.'.format(
49 |             type(headers)))
50 | 
51 |     defects = getattr(headers, 'defects', None)
52 |     get_payload = getattr(headers, 'get_payload', None)
53 | 
54 |     unparsed_data = None
55 |     if get_payload:  # Platform-specific: Python 3.
56 |         unparsed_data = get_payload()
57 | 
58 |     if defects or unparsed_data:
59 |         raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data)
60 | 
61 | 
62 | def is_response_to_head(response):
63 |     """
64 |     Checks whether the request of a response has been a HEAD-request.
65 |     Handles the quirks of AppEngine.
66 | 
67 |     :param conn:
68 |     :type conn: :class:`httplib.HTTPResponse`
69 |     """
70 |     # FIXME: Can we do this somehow without accessing private httplib _method?
71 |     method = response._method
72 |     if isinstance(method, int):  # Platform-specific: Appengine
73 |         return method == 3
74 |     return method.upper() == 'HEAD'
75 | 


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/util/response.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/util/response.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/util/retry.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/util/retry.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/util/ssl_.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/util/ssl_.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/util/timeout.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/util/timeout.pyc


--------------------------------------------------------------------------------
/lambda_functions/pdf_text_extract/urllib3/util/url.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/theemadnes/PDF_text_extract/cc077ebec7b021cd0c4bc36b6866ca30a37a776b/lambda_functions/pdf_text_extract/urllib3/util/url.pyc


--------------------------------------------------------------------------------