├── .github └── workflows │ └── python-app.yml ├── .gitignore ├── README.rst ├── merge2pdfs ├── __init__.py ├── __main__.py └── merge2pdf.py ├── merged_pdfs.pdf ├── requirements-dev.txt ├── requirements.txt └── tests ├── __init__.py ├── imgs_jpg_png ├── Necto_Vertical_MONO_azul_necto_w120.jpg ├── Necto_Vertical_MONO_azul_necto_w120_jpeg2000_trans.jp2 ├── Necto_Vertical_MONO_azul_necto_w240.jpg └── Necto_Vertical_MONO_azul_necto_w240.png ├── merge2pdf_sample_code.py ├── pdf_samples ├── issue_repo_pypdf4.pdf ├── issue_repo_pypdf4_test.pdf ├── jpeg_w_350.jpg ├── pdf_sample_A Sample PDF_loremIpsum_pages_01.pdf ├── pdf_sample_b_pages_01.pdf ├── pdf_sample_dummy_w3c_pages_01.pdf ├── pdf_sample_googledocs_image_pages_02.pdf ├── pdf_sample_googlesheet_pages_02.pdf ├── pdf_sample_libreoffice_exported_ISO19005_pages_02.pdf ├── pdf_sample_libreoffice_exported_format_FDF_pages_02.pdf ├── pdf_sample_libreoffice_exported_hibrid_format_pages_02.pdf ├── pdf_sample_libreoffice_exported_not_hybrid_ISO19005_pages_02.pdf ├── pdf_sample_pages_01.pdf ├── pdf_sample_readthedocs_pdf_networkdays_pages_019.pdf ├── pdf_sample_text_edit_macos_pages_01.pdf ├── pdf_sample_wikimedia_org_pages_01.pdf └── sample_pdf_commandline_xhtml2pdf_generated_pages_01.pdf └── test_merge2pdf.py /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python application 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python 3.8 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: 3.8 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install flake8 pytest 27 | pip install -r requirements.txt 28 | if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi 29 | - name: Lint with flake8 30 | run: | 31 | # stop the build if there are Python syntax errors or undefined names 32 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 33 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 34 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 35 | - name: Test with unittest 36 | run: | 37 | coverage run -m unittest 38 | - name: Posting Coverage 39 | env: 40 | CODECOV_TOKEN: "b0f88bf5-fb35-4ca6-b635-f9c4a313cc33" 41 | run: | 42 | codecov -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | 3 | test_merged_pdf.pdf 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | 138 | # pytype static type analyzer 139 | .pytype/ 140 | 141 | # Cython debug symbols 142 | cython_debug/ 143 | 144 | 145 | **/.DS_Store 146 | .vscode/ 147 | merge_pdfs.code-workspace 148 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://codecov.io/gh/cadu-leite/networkdays/branch/master/graph/badge.svg 2 | :target: https://codecov.io/gh/cadu-leite/networkdays 3 | :alt: code coverage 4 | 5 | .. image:: https://github.com/cadu-leite/merge2pdf/workflows/Python%20application/badge.svg 6 | :alt: workflow passed 7 | 8 | 9 | ********* 10 | merge2pdf 11 | ********* 12 | 13 | 14 | Description 15 | ----------- 16 | 17 | Merges ".pdf" and image files into one ".pdf" file. 18 | 19 | 20 | Why? 21 | ---- 22 | 23 | Imagine you have to put together a bunch of files that is part of a processes, and those files are images, PDFs, and other docs. This little piece of code will help you to do that with a better looking not just converting images to PDF Format. 24 | 25 | The images are merged using `reportlab canvas`. 26 | 27 | *if you have a better solution, or you can manage it to have less dependencies drop a comments or issue* ;) 28 | 29 | Dependencies 30 | ------------ 31 | 32 | - io (BytesIO) 33 | - PyPDF4 34 | - reportlab 35 | 36 | 37 | How to use 38 | ---------- 39 | 40 | .. code-block:: python 41 | 42 | from merge2pdf import MergeToPdf 43 | 44 | if __name__ == '__main__': 45 | # make a list fo PDF files and images 46 | image_and_pdf_files = [ 47 | 'tests/pdf_samples/pdf_sample_A Sample PDF_loremIpsum_pages_01.pdf', 48 | 'tests/pdf_samples/sample_pdf_commandline_xhtml2pdf_generated_pages_01.pdf', 49 | # ... 50 | # ... 51 | # ... 52 | 'tests/pdf_samples/issue_repo_pypdf4.pdf', 53 | 'tests/pdf_samples/issue_repo_pypdf4_test.pdf', 54 | 'tests/pdf_samples/jpeg_w_350.jpg' 55 | ] 56 | # instantiate a class 57 | m = MergeToPdf(paths_list=image_and_pdf_files, output_file_path='pdf_gerado.pdf') 58 | # merge. 59 | m.merge_pdfs() 60 | 61 | 62 | ------------------------------------------------------------ 63 | 64 | Descrição (pt_BR): 65 | * Unifica arquivos ".pdf" e arquivos de imagens em um único PDF* 66 | 67 | ------------------------------------------- 68 | 69 | 70 | -------------------------------------------------------------------------------- /merge2pdfs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/merge2pdfs/__init__.py -------------------------------------------------------------------------------- /merge2pdfs/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | from merge2pdfs.merge2pdf import MergeToPdf 4 | 5 | DESCRIPTION = ''' 6 | Merge files into one pdf file. 7 | ''' 8 | 9 | 10 | def command_line_parser(sys_args): 11 | print(f'sys args:{sys_args}') 12 | parser = argparse.ArgumentParser(description=DESCRIPTION) 13 | parser.add_argument( 14 | '-o', '--output', 15 | # dest='output_file_path', 16 | default='merged_pdfs.pdf', 17 | help='Path and name to the outputfile') 18 | parser.add_argument( 19 | '-f', '--filespath', 20 | type=str, 21 | nargs='+', 22 | help='list of files to merge (images and PDF)') 23 | 24 | args = parser.parse_args(sys_args) 25 | return args 26 | 27 | 28 | def main(args): 29 | m = MergeToPdf(args.filespath, args.output) 30 | m.merge_pdfs() 31 | 32 | 33 | if __name__ == '__main__': 34 | args = command_line_parser(sys.argv[1:]) 35 | print(f'args: {args}') 36 | main(args) 37 | -------------------------------------------------------------------------------- /merge2pdfs/merge2pdf.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | 3 | from PyPDF4 import PdfFileReader, PdfFileWriter 4 | from PyPDF4 import PdfFileMerger 5 | 6 | from reportlab.lib.pagesizes import A4 7 | from reportlab.pdfgen import canvas 8 | 9 | 10 | class CommandError(Exception): 11 | """ 12 | """ 13 | 14 | def __init__(self, *args, returncode=1, **kwargs): 15 | self.returncode = returncode 16 | super().__init__(*args, **kwargs) 17 | 18 | 19 | class MergeToPdf: 20 | 21 | def __init__(self, paths_list: list, output_file_path: str = 'outputfile.pdf'): 22 | ''' 23 | A class to merge PDFs files. Pass a lista of File Paths to be merged 24 | 25 | Args: 26 | paths_list (list): a list of paths(string) ou a lists of tuples 27 | like (, 28 | output_file_path (str, optional): a string defining the outputpath. 29 | ''' 30 | if not isinstance(paths_list, list): 31 | raise CommandError('Its not a list') 32 | 33 | self.paths_list = paths_list 34 | self.output_file_path = output_file_path 35 | 36 | def _image_to_page_(self, image_path): 37 | ''' 38 | "draw" a image inside a PDF Page 39 | 40 | the reason to djust not use PIL was aesthetic, 41 | to have an image inside a real PDF page not just an image converted to PDF 42 | 43 | todo: an option to rotate the image if H > w. 44 | todo: an option to PDF Page Size 45 | 46 | Args: 47 | image_path (str): a path to a bitmap image file, jpeg, gif ... 48 | 49 | Returns: 50 | PdfFileReader object: its a PDF file to be merged into the result PDF. 51 | ''' 52 | 53 | imgTemp = BytesIO() 54 | imgDoc = canvas.Canvas(imgTemp, pagesize=A4) # todo: param page size 55 | imgDoc.drawImage(image_path, 20, 420) # todo: param margin 56 | imgDoc.save() 57 | return PdfFileReader(BytesIO(imgTemp.getvalue())) 58 | 59 | def _path_decople_(self, path_list_item) -> tuple: 60 | ''' 61 | Decomple the file path list into the FILE PATH and Page range .. 62 | 63 | the list could be 64 | 65 | [ 66 | ('path_to_file', (pag_start, pag_ends)), 67 | ... 68 | ] 69 | 70 | to indicade the range of pages of a pdf to be merged 71 | 72 | Args: 73 | path_list_item ([type]): a list os files path 74 | 75 | Returns: 76 | tuple: (, ()) 77 | ''' 78 | if not isinstance(path_list_item, str): 79 | return tuple((path_list_item[0], path_list_item[1])) 80 | else: 81 | return tuple((path_list_item, None)) 82 | 83 | def merge_pdfs(self): 84 | ''' 85 | merge files into a PDF 86 | 87 | # todo: check file type 88 | ''' 89 | merged_pdf = PdfFileMerger() 90 | 91 | for file_path in self.paths_list: 92 | 93 | file_name, page_range = self._path_decople_(file_path) 94 | if file_name.lower().endswith(('.png', '.jpg', '.jpeg')): 95 | merged_pdf.append(fileobj=self._image_to_page_(file_name)) 96 | else: 97 | if page_range: 98 | merged_pdf.append(fileobj=file_name, pages=page_range) 99 | else: 100 | merged_pdf.append(fileobj=file_name) 101 | 102 | # write to outputfile 103 | output = open(self.output_file_path, 'wb') # output file 104 | merged_pdf.write(output) # write merge content to file 105 | output.close() 106 | merged_pdf.close() 107 | -------------------------------------------------------------------------------- /merged_pdfs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/merged_pdfs.pdf -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | certifi==2020.4.5.2 3 | chardet==3.0.4 4 | codecov==2.1.7 5 | coverage==5.1 6 | flake8==3.8.3 7 | idna==2.9 8 | mccabe==0.6.1 9 | pycodestyle==2.6.0 10 | pyflakes==2.2.0 11 | requests==2.24.0 12 | urllib3==1.25.9 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | PyPDF4==1.27.0 2 | xhtml2pdf==0.2.4 3 | reportlab==3.5.46 4 | Pillow==7.2.0 -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/__init__.py -------------------------------------------------------------------------------- /tests/imgs_jpg_png/Necto_Vertical_MONO_azul_necto_w120.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/imgs_jpg_png/Necto_Vertical_MONO_azul_necto_w120.jpg -------------------------------------------------------------------------------- /tests/imgs_jpg_png/Necto_Vertical_MONO_azul_necto_w120_jpeg2000_trans.jp2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/imgs_jpg_png/Necto_Vertical_MONO_azul_necto_w120_jpeg2000_trans.jp2 -------------------------------------------------------------------------------- /tests/imgs_jpg_png/Necto_Vertical_MONO_azul_necto_w240.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/imgs_jpg_png/Necto_Vertical_MONO_azul_necto_w240.jpg -------------------------------------------------------------------------------- /tests/imgs_jpg_png/Necto_Vertical_MONO_azul_necto_w240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/imgs_jpg_png/Necto_Vertical_MONO_azul_necto_w240.png -------------------------------------------------------------------------------- /tests/merge2pdf_sample_code.py: -------------------------------------------------------------------------------- 1 | from merge2pdf import MergeToPdf 2 | 3 | if __name__ == '__main__': 4 | 5 | image_and_pdf_files = [ 6 | 'tests/pdf_samples/pdf_sample_A Sample PDF_loremIpsum_pages_01.pdf', 7 | 'tests/pdf_samples/pdf_sample_b_pages_01.pdf', 8 | 'tests/pdf_samples/pdf_sample_dummy_w3c_pages_01.pdf', 9 | 'tests/pdf_samples/pdf_sample_googledocs_image_pages_02.pdf', 10 | ## the next PDF fail to read - invalid literal for int() with base 10: b'F-1.4' !!! 11 | # 'tests/pdf_samples/pdf_sample_googlesheet_pages_02.pdf', 12 | 'tests/pdf_samples/pdf_sample_libreoffice_exported_ISO19005_pages_02.pdf', 13 | 'tests/pdf_samples/pdf_sample_libreoffice_exported_format_FDF_pages_02.pdf', 14 | 'tests/pdf_samples/pdf_sample_libreoffice_exported_hibrid_format_pages_02.pdf', 15 | 'tests/pdf_samples/pdf_sample_libreoffice_exported_not_hybrid_ISO19005_pages_02.pdf', 16 | 'tests/pdf_samples/pdf_sample_pages_01.pdf', 17 | ('tests/pdf_samples/pdf_sample_readthedocs_pdf_networkdays_pages_019.pdf', (0, 2)), 18 | 'tests/pdf_samples/pdf_sample_text_edit_macos_pages_01.pdf', 19 | 'tests/pdf_samples/pdf_sample_wikimedia_org_pages_01.pdf', 20 | 'tests/pdf_samples/sample_pdf_commandline_xhtml2pdf_generated_pages_01.pdf', 21 | 'tests/pdf_samples/issue_repo_pypdf4.pdf', 22 | 'tests/pdf_samples/issue_repo_pypdf4_test.pdf', 23 | 'tests/pdf_samples/jpeg_w_350.jpg' 24 | ] 25 | 26 | m = MergeToPdf(paths_list=image_and_pdf_files, output_file_path='pdf_gerado.pdf') 27 | m.merge_pdfs() 28 | -------------------------------------------------------------------------------- /tests/pdf_samples/issue_repo_pypdf4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/issue_repo_pypdf4.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/issue_repo_pypdf4_test.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/issue_repo_pypdf4_test.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/jpeg_w_350.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/jpeg_w_350.jpg -------------------------------------------------------------------------------- /tests/pdf_samples/pdf_sample_A Sample PDF_loremIpsum_pages_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/pdf_sample_A Sample PDF_loremIpsum_pages_01.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/pdf_sample_b_pages_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/pdf_sample_b_pages_01.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/pdf_sample_dummy_w3c_pages_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/pdf_sample_dummy_w3c_pages_01.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/pdf_sample_googledocs_image_pages_02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/pdf_sample_googledocs_image_pages_02.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/pdf_sample_googlesheet_pages_02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/pdf_sample_googlesheet_pages_02.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/pdf_sample_libreoffice_exported_ISO19005_pages_02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/pdf_sample_libreoffice_exported_ISO19005_pages_02.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/pdf_sample_libreoffice_exported_format_FDF_pages_02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/pdf_sample_libreoffice_exported_format_FDF_pages_02.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/pdf_sample_libreoffice_exported_hibrid_format_pages_02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/pdf_sample_libreoffice_exported_hibrid_format_pages_02.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/pdf_sample_libreoffice_exported_not_hybrid_ISO19005_pages_02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/pdf_sample_libreoffice_exported_not_hybrid_ISO19005_pages_02.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/pdf_sample_pages_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/pdf_sample_pages_01.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/pdf_sample_readthedocs_pdf_networkdays_pages_019.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/pdf_sample_readthedocs_pdf_networkdays_pages_019.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/pdf_sample_text_edit_macos_pages_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/pdf_sample_text_edit_macos_pages_01.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/pdf_sample_wikimedia_org_pages_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/pdf_sample_wikimedia_org_pages_01.pdf -------------------------------------------------------------------------------- /tests/pdf_samples/sample_pdf_commandline_xhtml2pdf_generated_pages_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadu-leite/merge2pdf/31580f13476f23055eff7d5292c194183e9eacde/tests/pdf_samples/sample_pdf_commandline_xhtml2pdf_generated_pages_01.pdf -------------------------------------------------------------------------------- /tests/test_merge2pdf.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from PyPDF4 import PdfFileReader 4 | 5 | from merge2pdfs.merge2pdf import MergeToPdf 6 | from merge2pdfs.merge2pdf import CommandError 7 | from merge2pdfs import __main__ 8 | 9 | class TestClassMerge2PdfShellArgs(unittest.TestCase): 10 | 11 | def test_list_files(self): 12 | args = __main__.command_line_parser(['-f', 'requirements.txt', 'requirements-dev.txt']) 13 | self.assertEqual(args.filespath, ['requirements.txt', 'requirements-dev.txt']) 14 | 15 | def test_arg_outputfile(self): 16 | args = __main__.command_line_parser(['-f', 'requirements.txt', 'requirements-dev.txt']) 17 | self.assertEqual(args.output, 'merged_pdfs.pdf') 18 | 19 | def test_arg_other_outputfile(self): 20 | args = __main__.command_line_parser(['-o', 'outrooutput.pdf']) 21 | self.assertEqual(args.output, 'outrooutput.pdf') 22 | 23 | 24 | class TestClassMerge2PdfParams(unittest.TestCase): 25 | 26 | def test_recieve_a_list_to_iter_on(self): 27 | ''' 28 | assert if raise an custom error when path list not a list 29 | there is no sense to call if you dont have 2 files to merge 30 | after all 31 | ''' 32 | with self.assertRaises(CommandError): 33 | MergeToPdf(paths_list=None) 34 | 35 | class TestClassMerge2Pdf(unittest.TestCase): 36 | 37 | def test_init(self): 38 | ''' 39 | ''' 40 | image_paths = [ 41 | 42 | ] 43 | m = MergeToPdf(paths_list=image_paths) 44 | 45 | self.assertEqual(len(image_paths), len(m.paths_list)) 46 | 47 | def test_decomple_has_page_range(self): 48 | ''' 49 | ''' 50 | image_paths = [ 51 | ('filepat.pdf', (0, 1)), 52 | ] 53 | m = MergeToPdf(paths_list=image_paths) 54 | r = m._path_decople_(image_paths[0]) 55 | 56 | self.assertEqual(r, tuple(('filepat.pdf', (0, 1))), msg='page_range fail') 57 | 58 | def test_decomple_no_page_range(self): 59 | ''' 60 | ''' 61 | image_paths = [ 62 | 'filepath.pdf', 63 | ] 64 | m = MergeToPdf(paths_list=image_paths) 65 | r = m._path_decople_(image_paths[0]) 66 | 67 | self.assertEqual(r, tuple(('filepath.pdf', None))) 68 | 69 | def test_merge_pdf_output(self): 70 | 71 | image_paths = [ 72 | 'tests/pdf_samples/jpeg_w_350.jpg', 73 | 'tests/pdf_samples/pdf_sample_A Sample PDF_loremIpsum_pages_01.pdf', 74 | 'tests/pdf_samples/pdf_sample_b_pages_01.pdf', 75 | 'tests/pdf_samples/pdf_sample_dummy_w3c_pages_01.pdf', 76 | 'tests/pdf_samples/pdf_sample_googledocs_image_pages_02.pdf', 77 | ## the next PDF fail to read - invalid literal for int() with base 10: b'F-1.4' !!! 78 | # 'tests/pdf_samples/pdf_sample_googlesheet_pages_02.pdf', 79 | 'tests/pdf_samples/pdf_sample_libreoffice_exported_ISO19005_pages_02.pdf', 80 | 'tests/pdf_samples/pdf_sample_libreoffice_exported_format_FDF_pages_02.pdf', 81 | 'tests/pdf_samples/pdf_sample_libreoffice_exported_hibrid_format_pages_02.pdf', 82 | 'tests/pdf_samples/pdf_sample_libreoffice_exported_not_hybrid_ISO19005_pages_02.pdf', 83 | 'tests/pdf_samples/pdf_sample_pages_01.pdf', 84 | ('tests/pdf_samples/pdf_sample_readthedocs_pdf_networkdays_pages_019.pdf', (0, 2)), 85 | 'tests/pdf_samples/pdf_sample_text_edit_macos_pages_01.pdf', 86 | 'tests/pdf_samples/pdf_sample_wikimedia_org_pages_01.pdf', 87 | 'tests/pdf_samples/sample_pdf_commandline_xhtml2pdf_generated_pages_01.pdf', 88 | 'tests/pdf_samples/issue_repo_pypdf4.pdf', 89 | 'tests/pdf_samples/issue_repo_pypdf4_test.pdf', 90 | ] 91 | m = MergeToPdf(paths_list=image_paths, output_file_path='test_merged_pdf.pdf') 92 | m.merge_pdfs() 93 | with open('test_merged_pdf.pdf', "rb") as outputfile: 94 | generated_pdf = PdfFileReader(outputfile) 95 | pages = generated_pdf.getNumPages() 96 | 97 | self.assertEqual(pages, 23) 98 | 99 | --------------------------------------------------------------------------------