├── .gitignore ├── README.md ├── pil_pdf ├── __init__.py ├── main.py └── routers │ ├── __init__.py │ └── pdf_operations.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiladores e intérpretes 2 | __pycache__/ 3 | *.pyc 4 | *.pyo 5 | *.pyd 6 | 7 | # Entornos virtuales de Python 8 | venv/ 9 | venv3/ 10 | env/ 11 | env3/ 12 | *.egg-info/ 13 | *.egg 14 | *.eggs/ 15 | 16 | # Archivos generados 17 | *.log 18 | *.sqlite 19 | *.db 20 | *.csv 21 | 22 | # Archivos de cache 23 | .cache/ 24 | .pytest_cache/ 25 | .mypy_cache/ 26 | .tox/ 27 | .ipynb_checkpoints/ 28 | coverage/ 29 | 30 | # Archivos de IDEs y editores 31 | *.vscode/ 32 | .idea/ 33 | *.sublime-workspace 34 | 35 | # Archivos de sistemas de control de versiones 36 | .DS_Store 37 | Thumbs.db 38 | 39 | # Paquetes 40 | *.egg/ 41 | *.whl 42 | 43 | # Node.js / JavaScript 44 | node_modules/ 45 | 46 | # FastAPI / Uvicorn 47 | *.uvicorn.run 48 | *.uvicorn.log 49 | *.dat 50 | 51 | # Configuración de entornos Docker 52 | .dockerignore 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # PIL_PDF 3 | 4 | This is a [FastAPI](https://fastapi.tiangolo.com/)-based project that provides services for working with PDF files. Currently, it offers a function to protect PDF files with a password. As the project evolves, more PDF-related functions may be added. 5 | 6 | [![Build Status](https://travis-ci.org/joemccann/dillinger.svg?branch=master)](https://travis-ci.org/joemccann/dillinger) 7 | 8 | ## Project Structure 9 | 10 | - **/Proyecto_PDF/**: Project root directory. 11 | - **/pil_pdf/**: Directory containing code related to PDF operations. 12 | - **/__init__.py**: File that marks the directory as a Python package. 13 | - **/main.py**: File containing initialization and configuration of the FastAPI application. 14 | - **/routers/**: Directory containing FastAPI routers for defining API routes. 15 | - **/__init__.py**: File that marks the directory as a Python package. 16 | - **/pdf_operations.py**: File containing routes related to PDF operations. 17 | - **requirements.txt**: File containing project dependencies. 18 | - **README.md**: This file, providing information about the project. 19 | - **.gitignore**: Configuration file for ignoring unwanted files in version control. 20 | 21 | ## Dependencies 22 | 23 | Project dependencies are defined in the `requirements.txt` file. You can install them using the following command: 24 | 25 | ```bash 26 | pip install -r requirements.txt 27 | ``` 28 | ## Running the Project 29 | 30 | To run the project, ensure you have a configured Python environment. Then, run the main.py file in the project root directory using the following command: 31 | 32 | ```bash 33 | uvicorn pil_pdf.main:app --reload 34 | ``` 35 | This will start a development server that will automatically reload when code changes are detected. 36 | 37 | ## Usage 38 | ### Protecting a PDF with a Password 39 | The project currently offers one endpoint to protect PDF files with a password: 40 | 41 | **POST /protect-pdf/** 42 | You can send a POST request to this endpoint with a PDF file and a password in the body of the request. If the file and password are valid, the server will return the protected PDF file. 43 | 44 | Example request: 45 | 46 | - Using the FastAPI interactive API documentation at http://localhost:8000/docs, navigate to the /protect-pdf/ endpoint. 47 | - Upload a PDF file and specify the password you want to use to protect the file. 48 | - Submit the request and you will receive the protected PDF file in response. 49 | 50 | Alternatively, you can use curl: 51 | 52 | ```bash 53 | curl -X POST "http://localhost:8000/protect-pdf/" \ 54 | -F "file=@/path/to/file.pdf" \ 55 | -F "pwd=secret_password" 56 | ``` 57 | This will send a PDF file to the server and receive the protected PDF file as a response. 58 | 59 | ## Contributions 60 | This project is open to contributions. If you have ideas for new features or improvements, feel free to open an issue or submit a pull request. 61 | -------------------------------------------------------------------------------- /pil_pdf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pildorasdeprogramacion/Proyecto_PDF/1f9f9a310465dda1e283b5adfae00f2ad501b589/pil_pdf/__init__.py -------------------------------------------------------------------------------- /pil_pdf/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from .routers import pdf_operations 3 | 4 | app = FastAPI() 5 | 6 | app.include_router(pdf_operations.router) 7 | 8 | -------------------------------------------------------------------------------- /pil_pdf/routers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pildorasdeprogramacion/Proyecto_PDF/1f9f9a310465dda1e283b5adfae00f2ad501b589/pil_pdf/routers/__init__.py -------------------------------------------------------------------------------- /pil_pdf/routers/pdf_operations.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, UploadFile, File, HTTPException 2 | from pypdf import PdfReader, PdfWriter 3 | from starlette.responses import Response 4 | from pdf2docx import Converter 5 | import io 6 | 7 | router = APIRouter() 8 | 9 | MAX_FILE_SIZE = 7 * 1024 * 1024 10 | MAX_FILE_SIZE_MERGE = 20 * 1024 * 1024 11 | 12 | @router.post("/protect-pdf/") 13 | async def protect_pdf(file: UploadFile = File(...), pwd: str = ""): 14 | 15 | if not pwd: 16 | raise HTTPException(status_code=400, detail="pwd is required") 17 | if file.size > MAX_FILE_SIZE: 18 | raise HTTPException(status_code=413, detail="File size exceeds the maximum allowed limit") 19 | try: 20 | reader_pdf = PdfReader(file.file) 21 | except Exception: 22 | raise HTTPException(status_code=400, detail="The uploaded file is not a valid PDF") 23 | 24 | writer_pdf = PdfWriter() 25 | 26 | for page in reader_pdf.pages: 27 | writer_pdf.add_page(page) 28 | 29 | writer_pdf.encrypt(user_password=pwd, use_128bit=True) 30 | 31 | protected_pdf_buffer = io.BytesIO() 32 | writer_pdf.write(protected_pdf_buffer) 33 | protected_pdf_buffer.seek(0) 34 | 35 | content = protected_pdf_buffer.getvalue() 36 | headers = { 37 | "Content-Disposition": "attachment; filename=protected.pdf" 38 | } 39 | return Response(content=content, media_type="application/pdf", headers=headers) 40 | 41 | @router.post("/merge-pdfs/") 42 | async def merge_pdfs(files: list[UploadFile] = File(...)): 43 | if len(files) < 2: 44 | raise HTTPException(status_code=400, 45 | detail="At least two PDF files are required to merge") 46 | if sum([file.size for file in files]) > MAX_FILE_SIZE: 47 | raise HTTPException(status_code=413, 48 | detail="Total file size exceeds the maximum allowed limit") 49 | 50 | writer_pdf = PdfWriter() 51 | 52 | for file in files: 53 | try: 54 | reader_pdf = PdfReader(file.file) 55 | except Exception: 56 | raise HTTPException(status_code=400, 57 | detail="The uploaded file is not a valid PDF") 58 | 59 | for page in reader_pdf.pages: 60 | writer_pdf.add_page(page) 61 | 62 | merged_pdf_buffer = io.BytesIO() 63 | writer_pdf.write(merged_pdf_buffer) 64 | merged_pdf_buffer.seek(0) 65 | 66 | content = merged_pdf_buffer.getvalue() 67 | headers = { 68 | "Content-Disposition": "attachment; filename=merged.pdf" 69 | } 70 | return Response(content=content, 71 | media_type="application/pdf", 72 | headers=headers) 73 | 74 | @router.post("/remove-password/") 75 | async def remove_password(file: UploadFile = File(...), 76 | pwd: str = ""): 77 | if not pwd: 78 | raise HTTPException(status_code=400, 79 | detail="pwd is required") 80 | if file.size > MAX_FILE_SIZE: 81 | raise HTTPException(status_code=413, 82 | detail="File size exceeds the\ 83 | maximum allowed limit") 84 | try: 85 | reader_pdf = PdfReader(file.file) 86 | if reader_pdf.is_encrypted: 87 | reader_pdf.decrypt(pwd) 88 | except Exception: 89 | raise HTTPException(status_code=400, 90 | detail="The uploaded file is not a\ 91 | valid PDF or password is incorrect") 92 | 93 | writer_pdf = PdfWriter(clone_from=reader_pdf) 94 | 95 | pdf_buffer = io.BytesIO() 96 | writer_pdf.write(pdf_buffer) 97 | pdf_buffer.seek(0) 98 | 99 | content = pdf_buffer.getvalue() 100 | headers = { 101 | "Content-Disposition": "attachment; \ 102 | filename=unlocked.pdf" 103 | } 104 | return Response(content=content, 105 | media_type="application/pdf", 106 | headers=headers) 107 | 108 | @router.post("/pdf-to-word/") 109 | async def pdf_to_word(file: UploadFile = File(...)): 110 | if file.size > MAX_FILE_SIZE: 111 | raise HTTPException(status_code=413, 112 | detail="File size exceeds the\ 113 | maximum allowed limit") 114 | try: 115 | pdf_content = await file.read() 116 | except Exception: 117 | raise HTTPException(status_code=400, 118 | detail="The uploaded file is \ 119 | not a valid PDF") 120 | 121 | docx_buffer = io.BytesIO() 122 | 123 | try: 124 | converter = Converter(stream=pdf_content) 125 | converter.convert(docx_buffer) 126 | converter.close() 127 | except Exception as e: 128 | raise HTTPException(status_code=500, 129 | detail=f"Error converting PDF to Word: {e}") 130 | 131 | docx_buffer.seek(0) 132 | headers = { 133 | "Content-Disposition": "attachment; \ 134 | filename=converted.docx" 135 | } 136 | return Response(content=docx_buffer.getvalue(), 137 | media_type="application/vnd.openxml\ 138 | formats-officedocument.\ 139 | wordprocessingml.document", 140 | headers=headers) 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi==0.110.1 2 | pypdf==4.2.0 3 | python-multipart==0.0.9 4 | uvicorn==0.29.0 5 | cryptography==42.0.5 6 | pdf2docx==0.5.8 --------------------------------------------------------------------------------