├── .github
    └── workflows
    │   ├── linting.yml
    │   ├── release.yml
    │   └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── img
    ├── logo.png
    ├── pdfitdown_ui_demo.mp4
    └── thumbnail.png
├── pyproject.toml
├── src
    └── pdfitdown
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── pdfconversion.cpython-313.pyc
    │       └── reader.cpython-313.pyc
    │   ├── pdfconversion.py
    │   ├── pdfitdown_cli.py
    │   └── pdfitdown_ui.py
├── tests
    ├── data
    │   ├── test.txt
    │   ├── test0.png
    │   ├── test1.pptx
    │   ├── test2.md
    │   ├── test3.json
    │   ├── test4.docx
    │   └── test5.zip
    ├── llamaparse
    │   ├── test1.pptx
    │   └── test4.docx
    ├── test_llamaparse.py
    ├── test_markitdown.py
    └── test_ui.py
└── uv.lock


/.github/workflows/linting.yml:
--------------------------------------------------------------------------------
 1 | name: Linting
 2 | 
 3 | on:
 4 |   pull_request:
 5 | 
 6 | jobs:
 7 |   lint:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/checkout@v4
11 | 
12 |       - name: Install uv
13 |         uses: astral-sh/setup-uv@v6
14 | 
15 |       - name: Set up Python
16 |         run: uv python install 3.12
17 | 
18 |       - name: Install pre-commit
19 |         shell: bash
20 |         run: uv venv && source .venv/bin/activate && uv pip install pre-commit
21 | 
22 |       - name: Run linter
23 |         shell: bash
24 |         run: uv run -- pre-commit run -a
25 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: PyPI Release
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "v[0-9].[0-9]+.[0-9]+*"
 7 | 
 8 | jobs:
 9 |   release:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       contents: write
13 | 
14 |     steps:
15 |       - name: Checkout
16 |         uses: actions/checkout@v4
17 | 
18 |       - name: Install Hatch
19 |         run: pip install hatch
20 | 
21 |       - name: Publish on PyPi
22 |         env:
23 |           HATCH_INDEX_USER: __token__
24 |           HATCH_INDEX_AUTH: ${{ secrets.PYPI_API_TOKEN }}
25 |         run: |
26 |           hatch build
27 |           hatch publish -y
28 | 
29 |       - name: Create GitHub Release
30 |         uses: ncipollo/release-action@v1
31 |         with:
32 |           artifacts: "dist/*"
33 |           generateReleaseNotes: true
34 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | # This workflow comes from https://github.com/ofek/hatch-mypyc
 2 | # https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
 3 | name: test
 4 | 
 5 | on:
 6 |   # push:
 7 |   #   branches:
 8 |   #     - main
 9 |   pull_request:
10 | 
11 | concurrency:
12 |   group: test-${{ github.head_ref }}
13 |   cancel-in-progress: true
14 | 
15 | env:
16 |   PYTHONUNBUFFERED: "1"
17 |   FORCE_COLOR: "1"
18 | 
19 | jobs:
20 |   test-linux:
21 |     name: Python ${{ matrix.python-version }} on Linux
22 |     runs-on: ubuntu-latest
23 |     strategy:
24 |       fail-fast: false
25 |       matrix:
26 |         python-version: ["3.10", "3.11", "3.12"]
27 | 
28 |     steps:
29 |       - uses: actions/checkout@v4
30 | 
31 |       - name: Set up Python ${{ matrix.python-version }}
32 |         uses: actions/setup-python@v5
33 |         with:
34 |           python-version: ${{ matrix.python-version }}
35 | 
36 |       - name: Install Hatch
37 |         run: pip install --upgrade hatch
38 | 
39 |       - name: Run tests
40 |         env:
41 |           llamacloud_api_key: ${{ secrets.LLAMACLOUD_API_KEY }}
42 |         run: hatch run test
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | dist/
2 | src/pdfitdown.egg-info/
3 | .pytest_cache/
4 | build/
5 | .ruff_cache/
6 | tests/__pycache__/
7 | tests/data/*.pdf
8 | .env
9 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | default_language_version:
 3 |   python: python3
 4 | 
 5 | repos:
 6 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 7 |     rev: v4.5.0
 8 |     hooks:
 9 |       - id: check-byte-order-marker
10 |       - id: check-merge-conflict
11 |       - id: check-symlinks
12 |       - id: check-toml
13 |         exclude: llama-index-core/llama_index/core/_static
14 |       - id: check-yaml
15 |         exclude: llama-index-core/llama_index/core/_static
16 |       - id: detect-private-key
17 |       - id: end-of-file-fixer
18 |         exclude: llama-index-core/llama_index/core/_static
19 |       - id: mixed-line-ending
20 |         exclude: llama-index-core/llama_index/core/_static
21 |       - id: trailing-whitespace
22 |         exclude: llama-index-core/llama_index/core/_static
23 | 
24 |   - repo: https://github.com/charliermarsh/ruff-pre-commit
25 |     rev: v0.11.8
26 |     hooks:
27 |       - id: ruff
28 |         args: [--exit-non-zero-on-fix, --fix]
29 |         exclude: ".*poetry.lock|.*_static"
30 | 
31 |   # - repo: https://github.com/psf/black-pre-commit-mirror
32 |   #   rev: 23.10.1
33 |   #   hooks:
34 |   #     - id: black-jupyter
35 |   #       name: black-src
36 |   #       alias: black
37 |   #       exclude: "^docs|.*poetry.lock|.*_static"
38 | 
39 |   - repo: https://github.com/pre-commit/mirrors-mypy
40 |     rev: v1.0.1
41 |     hooks:
42 |       - id: mypy
43 |         additional_dependencies:
44 |           [
45 |             "types-requests",
46 |             "types-Deprecated",
47 |             "types-redis",
48 |             "types-setuptools",
49 |             "types-PyYAML",
50 |             "types-protobuf==4.24.0.4",
51 |           ]
52 |         args:
53 |           [
54 |             --namespace-packages,
55 |             --explicit-package-bases,
56 |             --disallow-untyped-defs,
57 |             --ignore-missing-imports,
58 |             --python-version=3.9,
59 |           ]
60 |         entry: bash -c "export MYPYPATH=ingest_anything"
61 | 
62 |   - repo: https://github.com/psf/black-pre-commit-mirror
63 |     rev: 23.10.1
64 |     hooks:
65 |       - id: black-jupyter
66 |         name: black-docs-py
67 |         alias: black
68 |         files: ^(docs/|examples/)
69 |         # Using PEP 8's line length in docs prevents excess left/right scrolling
70 |         args: [--line-length=79]
71 | 
72 |   - repo: https://github.com/pre-commit/mirrors-prettier
73 |     rev: v3.0.3
74 |     hooks:
75 |       - id: prettier
76 |         exclude: llama-index-core/llama_index/core/_static|poetry.lock|llama-index-legacy/llama_index/legacy/_static|docs/docs
77 | 
78 |   - repo: https://github.com/pappasam/toml-sort
79 |     rev: v0.23.1
80 |     hooks:
81 |       - id: toml-sort-fix
82 |         exclude: ".*poetry.lock|.*_static"
83 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to `PdfItDown`
 2 | 
 3 | Do you want to contribute to this project? Make sure to read this guidelines first :)
 4 | 
 5 | ## Issue
 6 | 
 7 | **When to do it**:
 8 | 
 9 | - You found bugs but you don't know how to solve them or don't have time/will to do the solve
10 | - You want new features but you don't know how to implement them or don't have time/will to do the implementation
11 | 
12 | > ⚠️ _Always check open and closed issues before you submit yours to avoid duplicates_
13 | 
14 | **How to do it**:
15 | 
16 | - Open an issue
17 | - Give the issue a meaningful title (short but effective problem description)
18 | - Describe the problem
19 | 
20 | ## Traditional contribution
21 | 
22 | **When to do it**:
23 | 
24 | - You found bugs and corrected them
25 | - You optimized/improved the code
26 | - You added new features that you think could be useful to others
27 | 
28 | **How to do it**:
29 | 
30 | 1. Fork this repository
31 | 2. Test your changes locally
32 | 
33 | ```
34 | uv pip install hatch
35 | cd PdfItDown/
36 | hatch run test
37 | ```
38 | 
39 | 3. If all the test pass, you can commit your changes.
40 | 4. Submit pull request (make sure to provide a thorough description of the changes)
41 | 
42 | > [!NOTE] > _If you add a new feature, you might need to add new tests!_
43 | 
44 | ### Thanks for contributing!
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Clelia (Astra) Bertelli
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | <h1>PdfItDown</h1>
  3 | <h2>Convert Everything to PDF</h2>
  4 | </div>
  5 | <br>
  6 | <div align="center">
  7 |     <a href="https://discord.gg/AXcVf269"><img src="https://img.shields.io/badge/Discord-%235865F2.svg?style=for-the-badge&logo=discord&logoColor=white" alt="Join Discord Server" width=200 height=60></a>
  8 | </div>
  9 | <br>
 10 | <div align="center">
 11 |     <img src="https://raw.githubusercontent.com/AstraBert/PdfItDown/main/img/logo.png" alt="PdfItDown Logo">
 12 | </div>
 13 | 
 14 | **PdfItDown** is a python package that relies on [`markitdown` by Microsoft](https://github.com/microsoft/markitdown/), [`markdown_pdf`](https://github.com/vb64/markdown-pdf), [img2pdf](https://pypi.org/project/img2pdf/) and [`LlamaIndex`](https://www.llamaindex.ai/). Visit us on our [documentation website](https://pdfitdown.eu)!
 15 | 
 16 | ### Applicability
 17 | 
 18 | **PdfItDown** is applicable to the following file formats:
 19 | 
 20 | - Markdown
 21 | - PowerPoint
 22 | - Word
 23 | - Excel
 24 | - HTML
 25 | - Text-based formats (CSV, XML, JSON)
 26 | - ZIP files (iterates over contents)
 27 | - Image files (PNG, JPG)
 28 | 
 29 | The format-specific support needs to be evaluated for the specific reader you are using.
 30 | 
 31 | ### How does it work?
 32 | 
 33 | **PdfItDown** works in a very simple way:
 34 | 
 35 | - From **markdown** to PDF
 36 | 
 37 | ```mermaid
 38 | graph LR
 39 | 2(Input File) --> 3[Markdown content]
 40 | 3[Markdown content] --> 4[markdown-pdf]
 41 | 4[markdown-pdf] --> 5(PDF file)
 42 | ```
 43 | 
 44 | - From **image** to PDF
 45 | 
 46 | ```mermaid
 47 | graph LR
 48 | 2(Input File) --> 3[Bytes]
 49 | 3[Bytes] --> 4[img2pdf]
 50 | 4[img2pdf] --> 5(PDF file)
 51 | ```
 52 | 
 53 | - From other **text-based** file formats to PDF
 54 | 
 55 | ```mermaid
 56 | graph LR
 57 | 2(Input File) -->  3[LlamaIndex-compatible Reader - defaults to MarkItDown]
 58 | 3[LlamaIndex-compatible Reader - defaults to MarkItDown] -->  4[Markdown content]
 59 | 4[Markdown content] --> 5[markdown-pdf]
 60 | 5[markdown-pdf] --> 6(PDF file)
 61 | ```
 62 | 
 63 | ### Installation and Usage
 64 | 
 65 | To install **PdfItDown**, just run:
 66 | 
 67 | ```bash
 68 | python3 -m pip install pdfitdown
 69 | ```
 70 | 
 71 | You can now use the **command line tool**:
 72 | 
 73 | ```
 74 | usage: pdfitdown [-h] [-i INPUTFILE] [-o OUTPUTFILE] [-t TITLE] [-d DIRECTORY]
 75 | 
 76 | options:
 77 |   -h, --help            show this help message and exit
 78 |   -i, --inputfile INPUTFILE
 79 |                         Path to the input file(s) that need to be converted to PDF. The path should be comma
 80 |                         separated: input1.csv,input2.md,...,inputN.xml.
 81 |   -o, --outputfile OUTPUTFILE
 82 |                         Path to the output PDF file(s). If more than one input file is provided, you should provide an
 83 |                         equally long list of output files. The path should be comma separated:
 84 |                         output1.pdf,output2.pdf,...,outputN.pdf. Defaults to 'None'
 85 |   -t, --title TITLE     Title to include in the PDF metadata. Default: 'File Converted with PdfItDown'. If more than
 86 |                         one file is provided, it will be ignored.
 87 |   -d, --directory DIRECTORY
 88 |                         Directory whose files you want to bulk-convert to PDF. If the --inputfile argument is also
 89 |                         provided, it will be ignored. Defaults to None.
 90 | ```
 91 | 
 92 | An example usage can be:
 93 | 
 94 | ```bash
 95 | pdfitdown -i README.md -o README.pdf -t "README"
 96 | ```
 97 | 
 98 | Or you can use it **inside your python scripts**:
 99 | 
100 | ```python
101 | from pdfitdown.pdfconversion import Converter
102 | 
103 | converter = Converter()
104 | converter.convert(file_path = "business_grow.md", output_path = "business_growth.pdf", title="Business Growth for Q3 in 2024")
105 | converter.convert(file_path = "logo.png", output_path = "logo.pdf")
106 | converter.convert(file_path = "users.xlsx", output_path = "users.pdf")
107 | ```
108 | 
109 | You can also convert **multiple files at once**:
110 | 
111 | - In the CLI:
112 | 
113 | ```bash
114 | # with custom output paths
115 | pdfitdown -i "test0.png,test1.csv" -o "testoutput0.pdf,testoutput1.pdf"
116 | # with inferred output paths
117 | pdfitdown -i "test0.png,test1.csv"
118 | ```
119 | 
120 | - In the Python API:
121 | 
122 | ```python
123 | from pdfitdown.pdfconversion import Converter
124 | from llama_parse import LlamaParse
125 | from dotenv import load_dotenv
126 | import os
127 | 
128 | load_dotenv()
129 | 
130 | reader = LlamaParse(api_key=os.getenv("llamacloud_api_key"), result_type="markdown")
131 | converter = Converter(reader=reader)
132 | # with custom output paths
133 | converter.multiple_convert(file_paths = ["business_growth.md", "logo.png"], output_paths = ["business_growth.pdf", "logo.pdf"])
134 | # with inferred output paths
135 | converter.multiple_convert(file_paths = ["business_growth.md", "logo.png"])
136 | ```
137 | 
138 | You can bulk-convert **all the files in a directory**:
139 | 
140 | - In the CLI:
141 | 
142 | ```bash
143 | pdfitdown -d tests/data/testdir
144 | ```
145 | 
146 | - In the Python API:
147 | 
148 | ```python
149 | from pdfitdown.pdfconversion import Converter
150 | 
151 | converter = Converter()
152 | output_paths = converter.convert_directory(directory_path = "tests/data/testdir")
153 | print(output_paths)
154 | ```
155 | 
156 | Or you can just launch a [Gradio](https://gradio.app)-based user interface:
157 | 
158 | ```bash
159 | pdfitdown_ui
160 | ```
161 | 
162 | You will be able to see the application running on `http://localhost:7860` within seconds!
163 | 
164 | Watch the demo here:
165 | 
166 | [![Watch the video demo!](https://raw.githubusercontent.com/AstraBert/PdfItDown/main/img/thumbnail.png)](https://raw.githubusercontent.com/AstraBert/PdfItDown/main/img/pdfitdown_ui_demo.mp4)
167 | 
168 | ### Contributing
169 | 
170 | Contributions are always welcome!
171 | 
172 | Find contribution guidelines at [CONTRIBUTING.md](https://github.com/AstraBert/PdfItDown/tree/main/CONTRIBUTING.md)
173 | 
174 | ### License and Funding
175 | 
176 | This project is open-source and is provided under an [MIT License](https://github.com/AstraBert/PdfItDown/tree/main/LICENSE).
177 | 
178 | If you found it useful, please consider [funding it](https://github.com/sponsors/AstraBert).
179 | 


--------------------------------------------------------------------------------
/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/PdfItDown/593434bd104b2c6f89464c5943ebe9573c60d475/img/logo.png


--------------------------------------------------------------------------------
/img/pdfitdown_ui_demo.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/PdfItDown/593434bd104b2c6f89464c5943ebe9573c60d475/img/pdfitdown_ui_demo.mp4


--------------------------------------------------------------------------------
/img/thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/PdfItDown/593434bd104b2c6f89464c5943ebe9573c60d475/img/thumbnail.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling>=1.0.0"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [options.package_data]
 6 | pdfitdown = ["*"]
 7 | 
 8 | [project]
 9 | name = "pdfitdown"
10 | version = "1.5.1"
11 | authors = [
12 |   {name = "Clelia (Astra) Bertelli", email = "astraberte9@gmail.com"}
13 | ]
14 | description = "PdfItDown - Convert Everything to PDF"
15 | readme = "README.md"
16 | requires-python = ">=3.10,<3.13"
17 | classifiers = [
18 |   "Programming Language :: Python :: 3",
19 |   "License :: OSI Approved :: MIT License",
20 |   "Operating System :: OS Independent"
21 | ]
22 | dependencies = [
23 |   'llama-index-readers-markitdown',
24 |   'markdown-pdf',
25 |   'img2pdf',
26 |   'pillow',
27 |   'gradio',
28 |   'termcolor'
29 | ]
30 | 
31 | [project.scripts]
32 | pdfitdown = "pdfitdown.pdfitdown_cli:main"
33 | pdfitdown_ui = "pdfitdown.pdfitdown_ui:main"
34 | 
35 | [project.urls]
36 | Homepage = "https://github.com/AstraBert/PdfItDown"
37 | Issues = "https://github.com/AstraBert/PdfItDown/issues"
38 | 
39 | [tool.hatch.build.targets.wheel]
40 | only-include = ["src/pdfitdown"]
41 | 
42 | [tool.hatch.build.targets.wheel.sources]
43 | "src" = ""
44 | 
45 | [tool.hatch.envs.default]
46 | dependencies = [
47 |   "pytest",
48 |   "llama_parse"
49 | ]
50 | 
51 | [tool.hatch.envs.default.py-version]
52 | 40 = "3.10"
53 | 41 = "3.11"
54 | 42 = "3.12"
55 | 
56 | [tool.hatch.envs.default.scripts]
57 | test = "cp src/pdfitdown/pdfconversion.py tests/ && cp src/pdfitdown/pdfitdown_ui.py tests/ && pytest tests/*.py -p no:warnings && rm tests/pdfconversion.py && rm tests/pdfitdown_ui.py"
58 | 
59 | [tool.setuptools.packages.find]
60 | where = ["src"]
61 | include = ["pdfitdown*"]
62 | 


--------------------------------------------------------------------------------
/src/pdfitdown/__init__.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | warnings.filterwarnings("ignore")
3 | 


--------------------------------------------------------------------------------
/src/pdfitdown/__pycache__/pdfconversion.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/PdfItDown/593434bd104b2c6f89464c5943ebe9573c60d475/src/pdfitdown/__pycache__/pdfconversion.cpython-313.pyc


--------------------------------------------------------------------------------
/src/pdfitdown/__pycache__/reader.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/PdfItDown/593434bd104b2c6f89464c5943ebe9573c60d475/src/pdfitdown/__pycache__/reader.cpython-313.pyc


--------------------------------------------------------------------------------
/src/pdfitdown/pdfconversion.py:
--------------------------------------------------------------------------------
  1 | import img2pdf
  2 | import warnings
  3 | import os
  4 | from PIL import Image
  5 | from markdown_pdf import MarkdownPdf, Section
  6 | from pydantic import BaseModel, field_validator, model_validator
  7 | from pathlib import Path
  8 | from typing import List, Optional
  9 | from typing_extensions import Self
 10 | from llama_index.core.readers.base import BaseReader
 11 | from llama_index.readers.markitdown import MarkItDownReader
 12 | 
 13 | class FilePath(BaseModel):
 14 |     file: str
 15 |     @field_validator("file")
 16 |     def is_valid_file(cls, file: str):
 17 |         p = Path(file)
 18 |         if not p.is_file():
 19 |             raise ValueError(f"{file} is not a file")
 20 |         return file
 21 | 
 22 | class FileExistsWarning(Warning):
 23 |     """Warns you that a file exists"""
 24 | 
 25 | class DirPath(BaseModel):
 26 |     path: str
 27 |     @model_validator(mode="after")
 28 |     def validate_dir_path(self) -> Self:
 29 |         if Path(self.path).is_dir():
 30 |             if len(os.listdir(self.path)) == 0:
 31 |                 raise ValueError("You should provide a non-empty directory")
 32 |             else:
 33 |                 return self
 34 |         else:
 35 |             raise ValueError("You should provide the path for an existing directory")
 36 | 
 37 | class OutputPath(BaseModel):
 38 |     file: str
 39 |     @field_validator("file")
 40 |     def file_exists_warning(cls, file: str):
 41 |         if os.path.splitext(file)[1] != ".pdf":
 42 |             raise ValueError("Output file must be a PDF")
 43 |         p = Path(file)
 44 |         if p.is_file():
 45 |             warnings.warn(f"The file {file} already exists, you are about to overwrite it", FileExistsWarning)
 46 |         return file
 47 | 
 48 | class MultipleFileConversion(BaseModel):
 49 |     input_files: List[FilePath]
 50 |     output_files: List[str] | List[OutputPath] | None
 51 |     @model_validator(mode="after")
 52 |     def validate_multiple_file_conversion(self) -> Self:
 53 |         if self.output_files is not None and len(self.input_files) != len(self.output_files):
 54 |             raise ValueError("Input and output files must be lists of the same length")
 55 |         else:
 56 |             if self.output_files is None:
 57 |                 self.output_files = [OutputPath(file=(fl.file.replace(os.path.splitext(fl.file)[1],".pdf"))) for fl in self.input_files]
 58 |             else:
 59 |                 if isinstance(self.output_files[0], str):
 60 |                     self.output_files = [OutputPath(file=fl) for fl in self.output_files]
 61 |         return self
 62 | 
 63 | class Converter:
 64 |     """A class for converting .docx, .html, .xml, .json, .csv, .md, .pptx, .xlsx, .png, .jpg, .png, .zip files into PDF"""
 65 |     def __init__(self, reader: Optional[BaseReader] = None) -> None:
 66 |         """
 67 |         Initialize the Converter class.
 68 | 
 69 |         Args:
 70 |             reader (Optional[BaseReader]): the reader to extract the file text (needs to be LlamaIndex-compatible). Defaults to MarkItDown reader.
 71 |         Returns:
 72 |             None
 73 |         """
 74 |         if reader is not None:
 75 |             self._reader = reader
 76 |         else:
 77 |             self._reader = MarkItDownReader()
 78 |         return
 79 |     def convert(self,  file_path: str, output_path: str, title: str = "File Converted with PdfItDown"):
 80 |         """
 81 |         Convert various document types into PDF format (supports .docx, .html, .xml, .json, .csv, .md, .pptx, .xlsx, .png, .jpg, .png, .zip).
 82 | 
 83 |         Args:
 84 |             file_path (str): The path to the input file
 85 |             output_path (str): The path to the output file
 86 |             title (str): The title for the PDF document (defaults to: 'File Converted with PdfItDown')
 87 |         Returns:
 88 |             output_path (str): Path to the output file
 89 |         Raises:
 90 |             ValidationError: if the format of the input file is not support or if the format of the output file is not PDF
 91 |             FileExistsWarning: if the output PDF path is an existing file, it warns you that the file will be overwritten
 92 |         """
 93 |         self.file_input = FilePath(file=file_path)
 94 |         self.file_output = OutputPath(file=output_path)
 95 |         if os.path.splitext(self.file_input.file)[1] == ".md":
 96 |             f = open(self.file_input.file, "r")
 97 |             finstr = f.read()
 98 |             f.close()
 99 |             pdf = MarkdownPdf(toc_level=0)
100 |             pdf.add_section(Section(finstr))
101 |             pdf.meta["title"] = title
102 |             pdf.save(self.file_output.file)
103 |             return self.file_output.file
104 |         elif os.path.splitext(self.file_input.file)[1] in [".jpg", ".png"]:
105 |             image = Image.open(self.file_input.file)
106 |             pdf_bytes = img2pdf.convert(image.filename)
107 |             with open(self.file_output.file, "wb") as file:
108 |                 file.write(pdf_bytes)
109 |             file.close()
110 |             image.close()
111 |             return self.file_output.file
112 |         else:
113 |             try:
114 |                 result = self._reader.load_data([self.file_input.file])
115 |                 finstr = result[0].text
116 |                 pdf = MarkdownPdf(toc_level=0)
117 |                 pdf.add_section(Section(finstr))
118 |                 pdf.meta["title"] = title
119 |                 pdf.save(self.file_output.file)
120 |                 return self.file_output.file
121 |             except Exception:
122 |                 return None
123 |     def multiple_convert(self,  file_paths: List[str], output_paths: Optional[List[str]] = None):
124 |         """
125 |         Convert various document types into PDF format (supports .docx, .html, .xml, .json, .csv, .md, .pptx, .xlsx, .png, .jpg, .png, .zip). Converts multiple files at once.
126 |         Args:
127 |             file_paths (str): The paths to the input files
128 |             output_paths (Optional[str]): The path to the output files
129 |         Returns:
130 |             output_paths (List[str]): Paths to the output files
131 |         Raises:
132 |             ValidationError: if the format of the input file is not support or if the format of the output file is not PDF
133 |             FileExistsWarning: if the output PDF path is an existing file, it warns you that the file will be overwritten
134 |         """
135 |         input_files = [FilePath(file=fl) for fl in file_paths]
136 |         to_convert_list = MultipleFileConversion(input_files=input_files, output_files=output_paths)
137 |         output_fls: List[OutputPath] = []
138 |         for i in range(len(to_convert_list.input_files)):
139 |             result = self.convert(file_path=to_convert_list.input_files[i].file, output_path=to_convert_list.output_files[i].file)
140 |             if result is not None:
141 |                 output_fls.append(to_convert_list.output_files[i])
142 |         return [el.file for el in output_fls]
143 |     def convert_directory(self, directory_path: str):
144 |         """
145 |         Convert various document types into PDF format (supports .docx, .html, .xml, .json, .csv, .md, .pptx, .xlsx, .png, .jpg, .png, .zip). Converts all the files in a directory at once.
146 |         Args:
147 |             directory_path (str): The paths to the input files
148 |         Returns:
149 |             output_paths (List[str]): Paths to the output files
150 |         Raises:
151 |             ValidationError: if the format of the input file is not support or if the format of the output file is not PDF
152 |             FileExistsWarning: if the output PDF path is an existing file, it warns you that the file will be overwritten
153 |         """
154 |         dirpath = DirPath(path=directory_path)
155 |         fls = []
156 |         p = os.walk(dirpath.path)
157 |         for root, parent, file in p:
158 |             for f in file:
159 |                 fls.append(root+"/"+f)
160 |         output_paths = self.multiple_convert(file_paths=fls)
161 |         return output_paths
162 | 


--------------------------------------------------------------------------------
/src/pdfitdown/pdfitdown_cli.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from argparse import ArgumentParser
 3 | from .pdfconversion import Converter
 4 | from termcolor import cprint
 5 | from pydantic import ValidationError
 6 | 
 7 | def main():
 8 |     parser = ArgumentParser()
 9 |     parser.add_argument("-i", "--inputfile",
10 |                        help="Path to the input file(s) that need to be converted to PDF. The path should be comma separated: input1.csv,input2.md,...,inputN.xml.",
11 |                        required=False, type=str, default=None)
12 |     parser.add_argument("-o", "--outputfile",
13 |                        help="Path to the output PDF file(s). If more than one input file is provided, you should provide an equally long list of output files. The path should be comma separated: output1.pdf,output2.pdf,...,outputN.pdf. Defaults to 'None'",
14 |                        required=False, type=str, default=None)
15 |     parser.add_argument("-t", "--title",
16 |                        help="Title to include in the PDF metadata. Default: 'File Converted with PdfItDown'. If more than one file is provided, it will be ignored.",
17 |                        required=False, default="File Converted with PdfItDown", type=str)
18 |     parser.add_argument("-d", "--directory",
19 |                        help="Directory whose files you want to bulk-convert to PDF. If the --inputfile argument is also provided, it will be ignored. Defaults to None.",
20 |                        required=False, default=None, type=str)
21 |     args = parser.parse_args()
22 |     inf = args.inputfile
23 |     outf = args.outputfile
24 |     diri = args.directory
25 |     titl = args.title
26 |     conv = Converter()
27 |     try:
28 |         if inf is not None:
29 |             if outf is not None and len(inf.split(",")) > 1:
30 |                 outf = conv.multiple_convert(inf.split(","), outf.split(","))
31 |                 cprint("Conversion successful!🎉", color="green", attrs=["bold"], file=sys.stdout)
32 |                 sys.exit(0)
33 |             elif outf is None and len(inf.split(",")) > 1:
34 |                 outf = conv.multiple_convert(inf.split(","), outf)
35 |                 cprint("Conversion successful!🎉", color="green", attrs=["bold"], file=sys.stdout)
36 |                 sys.exit(0)
37 |             elif outf is not None and len(inf.split(",")) == 1:
38 |                 outf = conv.convert(inf.split(",")[0], outf.split(",")[0], title=titl)
39 |                 cprint("Conversion successful!🎉", color="green", attrs=["bold"], file=sys.stdout)
40 |                 sys.exit(0)
41 |             else:
42 |                 cprint("ERROR! Invalid input provided, check your input and output files",color="red", file=sys.stderr)
43 |                 sys.exit(1)
44 |         elif inf is None and diri is None:
45 |             cprint("ERROR! You should provide at least one of --inputfile or --directory",color="red", file=sys.stderr)
46 |             sys.exit(1)
47 |         else:
48 |             outf = conv.convert_directory(diri)
49 |             cprint("Conversion successful!🎉", color="green", attrs=["bold"], file=sys.stdout)
50 |             sys.exit(0)
51 |     except ValidationError as e:
52 |         cprint(f"ERROR! Error:\n\n{e}\n\nwas raised during conversion",color="red", file=sys.stderr)
53 |         sys.exit(1)
54 | 
55 | if __name__ == "__main__":
56 |     main()
57 | 


--------------------------------------------------------------------------------
/src/pdfitdown/pdfitdown_ui.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | import os
 3 | try:
 4 |     from .pdfconversion import Converter
 5 | except ImportError:
 6 |     from pdfconversion import Converter
 7 | from typing import List
 8 | import gradio as gr
 9 | 
10 | class FileNotConvertedWarning(Warning):
11 |     """The file was not in one of the specified formats for conversion to PDF,thus it was not converted"""
12 | 
13 | def to_pdf(files: List[str]) -> List[str]:
14 |     pdfs = []
15 |     converter = Converter()
16 |     for fl in files:
17 |         try:
18 |             outf = converter.convert(fl, fl.replace(os.path.splitext(fl)[1], ".pdf"))
19 |         except Exception as e:
20 |             warnings.warn(f"File {fl} not converted because of an error during the conversion: {e}", FileNotConvertedWarning)
21 |         else:
22 |             pdfs.append(outf)
23 |     return pdfs
24 | 
25 | def convert_files(files: List[str]) -> List[str]:
26 |     pdfs = to_pdf(files)
27 |     return pdfs
28 | 
29 | def main():
30 |     iface = gr.Interface(
31 |         fn=convert_files,
32 |         inputs=gr.File(label="Upload your file", file_count="multiple"),
33 |         outputs=gr.File(label="Converted PDF", file_count="multiple"),
34 |         title="File to PDF Converter",
35 |         description="Upload a file in .docx, .xlsx, .html, .pptx, .json, .csv, .xml, .md, .jpg/.jpeg, .png, .zip format, and get it converted to PDF."
36 |     )
37 |     iface.launch()
38 | 
39 | if __name__ == "__main__":
40 |     main()
41 | 


--------------------------------------------------------------------------------
/tests/data/test.txt:
--------------------------------------------------------------------------------
1 | This is example text
2 | 


--------------------------------------------------------------------------------
/tests/data/test0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/PdfItDown/593434bd104b2c6f89464c5943ebe9573c60d475/tests/data/test0.png


--------------------------------------------------------------------------------
/tests/data/test1.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/PdfItDown/593434bd104b2c6f89464c5943ebe9573c60d475/tests/data/test1.pptx


--------------------------------------------------------------------------------
/tests/data/test2.md:
--------------------------------------------------------------------------------
1 | ## This is a test markdown
2 | 
3 | This is a test markdown
4 | 


--------------------------------------------------------------------------------
/tests/data/test3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "apiVersion": "v1",
 3 |   "kind": "ExampleData",
 4 |   "metadata": {
 5 |     "name": "sample-data",
 6 |     "creationTimestamp": "2023-10-27T10:00:00Z",
 7 |     "labels": {
 8 |       "environment": "production",
 9 |       "application": "data-processor"
10 |     },
11 |     "annotations": {
12 |       "description": "This is a sample data set for testing purposes."
13 |     },
14 |     "uid": "a1b2c3d4-e5f6-7890-1234-567890abcdef"
15 |   },
16 |   "spec": {
17 |     "dataType": "string",
18 |     "source": "Database",
19 |     "retentionPolicy": {
20 |       "duration": "30d",
21 |       "action": "archive"
22 |     },
23 |     "validationRules": [
24 |       {
25 |         "field": "name",
26 |         "type": "regex",
27 |         "pattern": "^[a-zA-Z0-9\\s]+$"
28 |       },
29 |       {
30 |         "field": "age",
31 |         "type": "range",
32 |         "min": 0,
33 |         "max": 120
34 |       }
35 |     ]
36 |   },
37 |   "status": {
38 |     "state": "Active",
39 |     "lastProcessed": "2023-10-27T09:55:00Z",
40 |     "processedRecords": 12345,
41 |     "errors": 0,
42 |     "message": "Data processing is running smoothly."
43 |   },
44 |   "data": [
45 |     {
46 |       "id": 1,
47 |       "name": "John Doe",
48 |       "age": 30,
49 |       "city": "New York"
50 |     },
51 |     {
52 |       "id": 2,
53 |       "name": "Jane Smith",
54 |       "age": 25,
55 |       "city": "Los Angeles"
56 |     },
57 |     {
58 |       "id": 3,
59 |       "name": "Peter Jones",
60 |       "age": 40,
61 |       "city": "Chicago"
62 |     }
63 |   ]
64 | }
65 | 


--------------------------------------------------------------------------------
/tests/data/test4.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/PdfItDown/593434bd104b2c6f89464c5943ebe9573c60d475/tests/data/test4.docx


--------------------------------------------------------------------------------
/tests/data/test5.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/PdfItDown/593434bd104b2c6f89464c5943ebe9573c60d475/tests/data/test5.zip


--------------------------------------------------------------------------------
/tests/llamaparse/test1.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/PdfItDown/593434bd104b2c6f89464c5943ebe9573c60d475/tests/llamaparse/test1.pptx


--------------------------------------------------------------------------------
/tests/llamaparse/test4.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/PdfItDown/593434bd104b2c6f89464c5943ebe9573c60d475/tests/llamaparse/test4.docx


--------------------------------------------------------------------------------
/tests/test_llamaparse.py:
--------------------------------------------------------------------------------
  1 | from pdfconversion import Converter
  2 | from llama_parse import LlamaParse
  3 | import pathlib
  4 | import os
  5 | from dotenv import load_dotenv
  6 | 
  7 | load_dotenv()
  8 | 
  9 | reader = LlamaParse(api_key=os.getenv("llamacloud_api_key"), result_type="markdown")
 10 | converter = Converter(reader=reader)
 11 | 
 12 | def test_single_file():
 13 |     test_cases = [
 14 |         {
 15 |             "test_name": "Successful HTML conversion",
 16 |             "file_input": "tests/data/test1.pptx",
 17 |             "file_output": "tests/data/test1.pdf",
 18 |             "expected": True
 19 |         },
 20 |         {
 21 |             "test_name": "Successful md file conversion",
 22 |             "file_input": "tests/data/test2.md",
 23 |             "file_output": "tests/data/test2.pdf",
 24 |             "expected": True
 25 |         },
 26 |         {
 27 |             "test_name": "Successful image file conversion",
 28 |             "file_input": "tests/data/test0.png",
 29 |             "file_output": "tests/data/test0.pdf",
 30 |             "expected": True
 31 |         },
 32 |         {
 33 |             "test_name": "Unsuccessful file conversion",
 34 |             "file_input": "tests/data/tes.md",
 35 |             "file_output": "tests/data/tes.pdf",
 36 |             "expected": False
 37 |         },
 38 |     ]
 39 |     for c in test_cases:
 40 |         print(c["test_name"])
 41 |         try:
 42 |             result = converter.convert(file_path=c["file_input"], output_path=c["file_output"])
 43 |             assert pathlib.Path(result).is_file() == c["expected"]
 44 |             if pathlib.Path(result).is_file():
 45 |                 os.remove(result)
 46 |         except Exception:
 47 |             result = c["file_output"]
 48 |             assert pathlib.Path(result).is_file() == c["expected"]
 49 | 
 50 | def test_multiple_files():
 51 |     test_cases = [
 52 |         {
 53 |             "test_name": "Specified output files",
 54 |             "file_input": ["tests/data/test1.pptx","tests/data/test4.docx","tests/data/test2.md"],
 55 |             "file_output": ["tests/data/test0_1.pdf","tests/data/test_1.pdf","tests/data/test2_1.pdf"],
 56 |             "expected": [True, True, True]
 57 |         },
 58 |         {
 59 |             "test_name": "Unspecified output files",
 60 |             "file_input": ["tests/data/test1.pptx","tests/data/test4.docx","tests/data/test2.md"],
 61 |             "file_output": None,
 62 |             "expected": [True, True, True]
 63 |         },
 64 |         {
 65 |             "test_name": "Unspecified output files",
 66 |             "file_input": ["tests/data/test1.pptx","tests/data/test4.docx","tests/data/test2.md"],
 67 |             "file_output": ["tests/data/test0_2.pdf"],
 68 |             "expected": False
 69 |         },
 70 |     ]
 71 |     for c in test_cases:
 72 |         print(c["test_name"])
 73 |         try:
 74 |             result = converter.multiple_convert(file_paths=c["file_input"], output_paths=c["file_output"])
 75 |             assert [pathlib.Path(r).is_file() for r in result] == c["expected"]
 76 |             for f in result:
 77 |                 if pathlib.Path(f).is_file():
 78 |                     os.remove(f)
 79 |         except Exception:
 80 |             assert pathlib.Path(c["file_output"][0]).is_file() == c["expected"]
 81 | 
 82 | 
 83 | def test_dir():
 84 |     test_cases = [
 85 |         {
 86 |             "test_name": "Correct dir path",
 87 |             "file_input": "tests/llamaparse",
 88 |             "file_output": ["tests/llamaparse/test1.pdf", "tests/llamaparse/test4.pdf"],
 89 |             "expected": [True, True]
 90 |         },
 91 |         {
 92 |             "test_name": "Wrong dir path",
 93 |             "file_input": "tests/data/llamapars",
 94 |             "file_output": ["tests/llamaparse/test1.pdf", "tests/llamaparse/test4.pdf"],
 95 |             "expected": [False, False]
 96 |         },
 97 |     ]
 98 |     for c in test_cases:
 99 |         print(c["test_name"])
100 |         try:
101 |             converter.convert_directory(directory_path=c["file_input"])
102 |             assert [pathlib.Path(r).is_file() for r in c["file_output"]] == c["expected"]
103 |             for f in c["file_output"]:
104 |                 if pathlib.Path(f).is_file():
105 |                     os.remove(f)
106 |         except Exception:
107 |             assert [pathlib.Path(r).is_file() for r in c["file_output"]] == c["expected"]
108 | 


--------------------------------------------------------------------------------
/tests/test_markitdown.py:
--------------------------------------------------------------------------------
  1 | from pdfconversion import Converter
  2 | import pathlib
  3 | import os
  4 | 
  5 | converter = Converter()
  6 | 
  7 | def test_single_file():
  8 |     test_cases = [
  9 |         {
 10 |             "test_name": "Successful image conversion",
 11 |             "file_input": "tests/data/test0.png",
 12 |             "file_output": "tests/data/test0.pdf",
 13 |             "expected": True
 14 |         },
 15 |         {
 16 |             "test_name": "Successful text file conversion",
 17 |             "file_input": "tests/data/test.txt",
 18 |             "file_output": "tests/data/test.pdf",
 19 |             "expected": True
 20 |         },
 21 |         {
 22 |             "test_name": "Successful md file conversion",
 23 |             "file_input": "tests/data/test2.md",
 24 |             "file_output": "tests/data/test2.pdf",
 25 |             "expected": True
 26 |         },
 27 |         {
 28 |             "test_name": "Unsuccessful file conversion",
 29 |             "file_input": "tests/data/test1.pptx",
 30 |             "file_output": "tests/data/test1.pdf",
 31 |             "expected": True
 32 |         },
 33 |         {
 34 |             "test_name": "Unsuccessful file conversion",
 35 |             "file_input": "tests/data/tes.md",
 36 |             "file_output": "tests/data/tes.pdf",
 37 |             "expected": False
 38 |         },
 39 |     ]
 40 |     for c in test_cases:
 41 |         print(c["test_name"])
 42 |         try:
 43 |             result = converter.convert(file_path=c["file_input"], output_path=c["file_output"])
 44 |             assert pathlib.Path(result).is_file() == c["expected"]
 45 |             if pathlib.Path(result).is_file():
 46 |                 os.remove(result)
 47 |         except Exception:
 48 |             result = c["file_output"]
 49 |             assert pathlib.Path(result).is_file() == c["expected"]
 50 | 
 51 | def test_multiple_files():
 52 |     test_cases = [
 53 |         {
 54 |             "test_name": "Specified output files",
 55 |             "file_input": ["tests/data/test0.png","tests/data/test.txt","tests/data/test2.md"],
 56 |             "file_output": ["tests/data/test0_1.pdf","tests/data/test_1.pdf","tests/data/test2_1.pdf"],
 57 |             "expected": [True, True, True]
 58 |         },
 59 |         {
 60 |             "test_name": "Unspecified output files",
 61 |             "file_input": ["tests/data/test0.png","tests/data/test.txt","tests/data/test2.md"],
 62 |             "file_output": None,
 63 |             "expected": [True, True, True]
 64 |         },
 65 |         {
 66 |             "test_name": "Unspecified output files",
 67 |             "file_input": ["tests/data/test0.png","tests/data/test.txt","tests/data/test2.md"],
 68 |             "file_output": ["tests/data/test0_2.pdf"],
 69 |             "expected": False
 70 |         },
 71 |     ]
 72 |     for c in test_cases:
 73 |         print(c["test_name"])
 74 |         try:
 75 |             result = converter.multiple_convert(file_paths=c["file_input"], output_paths=c["file_output"])
 76 |             assert [pathlib.Path(r).is_file() for r in result] == c["expected"]
 77 |             for f in result:
 78 |                 if pathlib.Path(f).is_file():
 79 |                     os.remove(f)
 80 |         except Exception:
 81 |             assert pathlib.Path(c["file_output"][0]).is_file() == c["expected"]
 82 | 
 83 | 
 84 | def test_dir():
 85 |     test_cases = [
 86 |         {
 87 |             "test_name": "Correct dir path",
 88 |             "file_input": "tests/data",
 89 |             "file_output": ["tests/data/test0.pdf","tests/data/test1.pdf", "tests/data/test.pdf","tests/data/test2.pdf", "tests/data/test3.pdf", "tests/data/test4.pdf", "tests/data/test5.pdf"],
 90 |             "expected": [True, True, True, True, True, True, True]
 91 |         },
 92 |         {
 93 |             "test_name": "Wrong dir path",
 94 |             "file_input": "tests/dat",
 95 |             "file_output": ["tests/data/test0.pdf","tests/data/test1.pdf", "tests/data/test.pdf","tests/data/test2.pdf", "tests/data/test3.pdf", "tests/data/test4.pdf", "tests/data/test5.pdf"],
 96 |             "expected": [False, False, False, False, False, False, False]
 97 |         },
 98 |     ]
 99 |     for c in test_cases:
100 |         print(c["test_name"])
101 |         try:
102 |             converter.convert_directory(directory_path=c["file_input"])
103 |             assert [pathlib.Path(r).is_file() for r in c["file_output"]] == c["expected"]
104 |             for f in c["file_output"]:
105 |                 if pathlib.Path(f).is_file():
106 |                     os.remove(f)
107 |         except Exception:
108 |             assert [pathlib.Path(r).is_file() for r in c["file_output"]] == c["expected"]
109 | 


--------------------------------------------------------------------------------
/tests/test_ui.py:
--------------------------------------------------------------------------------
 1 | from pdfitdown_ui import to_pdf
 2 | import os
 3 | from pathlib import Path
 4 | 
 5 | def test_to_pdf():
 6 |     test_files = ["tests/data/test0.png","tests/data/test.txt","tests/data/test2.md"]
 7 |     expected_outputs = ["tests/data/test0.pdf","tests/data/test.pdf","tests/data/test2.pdf"]
 8 |     assert to_pdf(test_files) == expected_outputs
 9 |     for p in expected_outputs:
10 |         if Path(p).is_file():
11 |             os.remove(p)
12 | 


--------------------------------------------------------------------------------