├── .gitignore ├── public ├── uploads │ └── .gitignore ├── upload.php └── index.php ├── src ├── Interface │ └── IConvert.php ├── FactoryMethod.php └── Convert │ ├── DocxToPdfConvert.php │ └── PdfToDocxConvert.php ├── composer.json ├── Dockerfile ├── README.md └── composer.lock /.gitignore: -------------------------------------------------------------------------------- 1 | /vendor 2 | .~lock.prueba.docx# -------------------------------------------------------------------------------- /public/uploads/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /src/Interface/IConvert.php: -------------------------------------------------------------------------------- 1 | formatToConvert($format, $filename)); 18 | 19 | //if($factory) return header("Location: index.php"); 20 | } else { 21 | header("Location: index.php"); 22 | } -------------------------------------------------------------------------------- /src/FactoryMethod.php: -------------------------------------------------------------------------------- 1 | DocxToPdfConvert::convert( $filename), 16 | "docx" => PdfToDocxConvert::convert($filename), 17 | default => UnhandledMatchError::class 18 | }; 19 | } catch (\UnhandledMatchError $e) { 20 | echo "Formato no válido: $format"; 21 | echo $e->getMessage(); 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM php:8.3-apache 2 | RUN docker-php-ext-install pdo pdo_mysql 3 | 4 | #Config apache 5 | RUN a2enmod rewrite 6 | ENV APACHE_DOCUMENT_ROOT /var/www/html/public 7 | 8 | 9 | RUN sed -ri -e 's!/var/www/html!${APACHE_DOCUMENT_ROOT}!g' /etc/apache2/sites-available/*.conf 10 | RUN sed -ri -e 's!/var/www/!${APACHE_DOCUMENT_ROOT}!g' /etc/apache2/apache2.conf /etc/apache2/conf-available/*.conf 11 | 12 | # git install 13 | RUN apt-get -y update 14 | RUN apt-get -y install git libzip-dev 15 | RUN docker-php-ext-install zip 16 | 17 | 18 | WORKDIR /var/www/html 19 | 20 | 21 | #Instalación de composer 22 | COPY --from=composer /usr/bin/composer /usr/bin/composer 23 | RUN chmod +x /usr/bin/composer 24 | 25 | 26 | 27 | # Cambiar el propietario y permisos del directorio /var/www/html 28 | RUN chown -R www-data:www-data /var/www/html 29 | RUN chmod 777 /var/www/html 30 | 31 | 32 | COPY . /var/www/html 33 | 34 | EXPOSE 80 -------------------------------------------------------------------------------- /src/Convert/DocxToPdfConvert.php: -------------------------------------------------------------------------------- 1 | save(self::$pathFilesSaved . $filename . ".pdf"); 27 | 28 | if(!$fileWrite) return "No se Convitió"; 29 | return $fileWrite; 30 | 31 | } catch (Exception $e) { 32 | echo "Error: " . $e->getMessage() . "\n"; 33 | echo "Linea Del Error: " . $e->getLine(); 34 | } 35 | } 36 | } -------------------------------------------------------------------------------- /src/Convert/PdfToDocxConvert.php: -------------------------------------------------------------------------------- 1 | parser->parseFile(self::$pathFilesSaved . $filename); 28 | 29 | // Extrear texto 30 | $texts = ""; 31 | foreach ($document->getPages() as $index => $page) { 32 | $texts .= $page->getText(); 33 | } 34 | 35 | $section = $documents->documentWord->addSection(); 36 | $section->addText($texts); 37 | 38 | $write = IOFactory::createWriter($documents->documentWord, "Word2007"); 39 | $write->save(self::$pathFilesSaved . $filename . ".docx"); 40 | 41 | } catch (Exception $e) { 42 | echo "Error: " . $e->getMessage() . "\n"; 43 | echo "Linea Del Error: " . $e->getLine(); 44 | } 45 | } 46 | } -------------------------------------------------------------------------------- /public/index.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Convert File 7 | 8 | 9 | 10 | 11 | 12 |
13 |

Subir Archivo

14 |
15 | 16 |
17 | 18 | 19 |
20 | 21 |
22 | 23 | 27 |
28 | 29 | 30 |
31 |
32 | 33 | 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Convertidor de Archivos 2 | 3 |

4 | Este es un desarrollo que consiste en la conversión de archivos entre **PDF** a **Word** 5 | y de **Word** a **PDF** 6 |

7 | 8 |

9 | Para este proyecto usé el patrón `Factory`. Además, PHP provee una manera más elegante para validar información que un if o un swith. Ese método se llama: *match* integrado en sus últimas versiones. 10 |

11 | 12 | 13 | ## Cosas a Tener en cuenta 14 | 15 |

16 | Para desarrollar o hacer uso de este proyecto, debes tener instalada estas extensiones en php 17 | 18 |

19 | 27 | 28 |

29 | Para comprobar si tenemos estas extensiones, usemos el método *phpinfo()* de php 30 |

31 | 32 | ## Obtener el proyecto 33 | 34 |

35 | Para poder hacer uso de este proyecto, es necesario clonar este repositorio. Una vez clonado, lo siguiente es instalar las dependencias con: 36 |

37 | 38 | ``` 39 | composer update 40 | ``` 41 | 42 | ## Guardando archivos en el servidor 43 | 44 | este código se encuentra el archivo `upload.php` 45 | 46 | ``` 47 | $filename = $_FILES["file"]["name"]; 48 | $pathFiles = __DIR__ . '/uploads/' ; 49 | $tmp_file = $_FILES["file"]["tmp_name"]; 50 | move_uploaded_file($tmp_file, $pathFiles . basename($filename)); 51 | ``` 52 | 53 | 54 | ## Clase Factory 55 | 56 |

57 | Esta clase se encarga de escoger qué clase usar, dependiendo del formato elegido. 58 |

59 | 60 | ``` 61 | namespace App; 62 | 63 | use App\Convert\PdfToDocxConvert; 64 | use App\Convert\DocxToPdfConvert; 65 | use UnhandledMatchError; 66 | 67 | class FactoryMethod 68 | { 69 | static function formatToConvert($format, $filename) { 70 | try { 71 | 72 | return match ($format) { 73 | "pdf" => DocxToPdfConvert::convert( $filename), 74 | "docx" => PdfToDocxConvert::convert($filename), 75 | default => UnhandledMatchError::class 76 | }; 77 | } catch (\UnhandledMatchError $e) { 78 | echo "Formato no válido: $format"; 79 | echo $e->getMessage(); 80 | } 81 | } 82 | } 83 | ``` 84 | 85 | ## Trabajando con archivos .docx 86 | 87 |

88 | Para lograr la conversión correcta de los archivos con extensión **.docx** es algo complicado debido que es un Zip comprimido y, además en un formato XML. Sin embargo, para poder leer el contenido de un archivo word, es necesario usar las librería `phpoffice/phpword` y `tecnickcom/tcpdf`. 89 |

90 | 91 | 92 | ``` 93 | namespace App\Convert; 94 | 95 | use PhpOffice\PhpWord\IOFactory; 96 | use PhpOffice\PhpWord\Settings; 97 | 98 | use App\Interface\IConvert; 99 | use Exception; 100 | 101 | class DocxToPdfConvert implements IConvert 102 | { 103 | 104 | private static $pathFilesSaved = __DIR__ . '/../../public/uploads/'; 105 | 106 | static function convert($filename) 107 | { 108 | try { 109 | Settings::setPdfRendererPath(__DIR__ . '/../../vendor/tecnickcom/tcpdf'); 110 | Settings::setPdfRendererName(Settings::PDF_RENDERER_TCPDF); 111 | 112 | $readFile = self::$pathFilesSaved . $filename; 113 | $phpWord = IOFactory::load($readFile); 114 | 115 | $fileWrite = IOFactory::createWriter($phpWord, "PDF"); 116 | $fileWrite->save(self::$pathFilesSaved . $filename . ".pdf"); 117 | 118 | if(!$fileWrite) return "No se Convitió"; 119 | return $fileWrite; 120 | 121 | } catch (Exception $e) { 122 | echo "Error: " . $e->getMessage() . "\n"; 123 | echo "Linea Del Error: " . $e->getLine(); 124 | } 125 | } 126 | } 127 | ``` 128 | 129 | ## Trabajando con archivos .pdf 130 | 131 |

132 | Ahora, para leer archivos pdf, usamos la librería previamente instalada: 133 |

134 | 135 | ``` 136 | Snamespace App\Convert; 137 | 138 | use Exception; 139 | 140 | use App\Interface\IConvert; 141 | use PhpOffice\PhpWord\IOFactory; 142 | use Smalot\PdfParser\Parser; 143 | use PhpOffice\PhpWord\PhpWord; 144 | 145 | 146 | class PdfToDocxConvert implements IConvert 147 | { 148 | private static $pathFilesSaved = __DIR__ . '/../../public/uploads/'; 149 | 150 | function __construct( 151 | private Parser $parser = new Parser, 152 | private PhpWord $documentWord = new PhpWord 153 | ) 154 | {} 155 | 156 | static function convert($filename) 157 | { 158 | try { 159 | $documents = new self(new Parser(), new PhpWord()); 160 | $document = $documents->parser->parseFile(self::$pathFilesSaved . $filename); 161 | 162 | // Extrear texto 163 | $texts = ""; 164 | foreach ($document->getPages() as $index => $page) { 165 | $texts .= $page->getText(); 166 | } 167 | 168 | $section = $documents->documentWord->addSection(); 169 | $section->addText($texts); 170 | 171 | $write = IOFactory::createWriter($documents->documentWord, "Word2007"); 172 | $write->save(self::$pathFilesSaved . $filename . ".docx"); 173 | 174 | } catch (Exception $e) { 175 | echo "Error: " . $e->getMessage() . "\n"; 176 | echo "Linea Del Error: " . $e->getLine(); 177 | } 178 | } 179 | } 180 | ``` -------------------------------------------------------------------------------- /composer.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_readme": [ 3 | "This file locks the dependencies of your project to a known state", 4 | "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", 5 | "This file is @generated automatically" 6 | ], 7 | "content-hash": "f85266da9f289a8f585f88dda9cb965b", 8 | "packages": [ 9 | { 10 | "name": "phpoffice/math", 11 | "version": "0.1.0", 12 | "source": { 13 | "type": "git", 14 | "url": "https://github.com/PHPOffice/Math.git", 15 | "reference": "f0f8cad98624459c540cdd61d2a174d834471773" 16 | }, 17 | "dist": { 18 | "type": "zip", 19 | "url": "https://api.github.com/repos/PHPOffice/Math/zipball/f0f8cad98624459c540cdd61d2a174d834471773", 20 | "reference": "f0f8cad98624459c540cdd61d2a174d834471773", 21 | "shasum": "" 22 | }, 23 | "require": { 24 | "ext-dom": "*", 25 | "ext-xml": "*", 26 | "php": "^7.1|^8.0" 27 | }, 28 | "require-dev": { 29 | "phpstan/phpstan": "^0.12.88 || ^1.0.0", 30 | "phpunit/phpunit": "^7.0 || ^9.0" 31 | }, 32 | "type": "library", 33 | "autoload": { 34 | "psr-4": { 35 | "PhpOffice\\Math\\": "src/Math/", 36 | "Tests\\PhpOffice\\Math\\": "tests/Math/" 37 | } 38 | }, 39 | "notification-url": "https://packagist.org/downloads/", 40 | "license": [ 41 | "MIT" 42 | ], 43 | "authors": [ 44 | { 45 | "name": "Progi1984", 46 | "homepage": "https://lefevre.dev" 47 | } 48 | ], 49 | "description": "Math - Manipulate Math Formula", 50 | "homepage": "https://phpoffice.github.io/Math/", 51 | "keywords": [ 52 | "MathML", 53 | "officemathml", 54 | "php" 55 | ], 56 | "support": { 57 | "issues": "https://github.com/PHPOffice/Math/issues", 58 | "source": "https://github.com/PHPOffice/Math/tree/0.1.0" 59 | }, 60 | "time": "2023-09-25T12:08:20+00:00" 61 | }, 62 | { 63 | "name": "phpoffice/phpword", 64 | "version": "1.2.0", 65 | "source": { 66 | "type": "git", 67 | "url": "https://github.com/PHPOffice/PHPWord.git", 68 | "reference": "e76b701ef538cb749641514fcbc31a68078550fa" 69 | }, 70 | "dist": { 71 | "type": "zip", 72 | "url": "https://api.github.com/repos/PHPOffice/PHPWord/zipball/e76b701ef538cb749641514fcbc31a68078550fa", 73 | "reference": "e76b701ef538cb749641514fcbc31a68078550fa", 74 | "shasum": "" 75 | }, 76 | "require": { 77 | "ext-dom": "*", 78 | "ext-json": "*", 79 | "ext-xml": "*", 80 | "php": "^7.1|^8.0", 81 | "phpoffice/math": "^0.1" 82 | }, 83 | "require-dev": { 84 | "dompdf/dompdf": "^2.0", 85 | "ext-gd": "*", 86 | "ext-libxml": "*", 87 | "ext-zip": "*", 88 | "friendsofphp/php-cs-fixer": "^3.3", 89 | "mpdf/mpdf": "^8.1", 90 | "phpmd/phpmd": "^2.13", 91 | "phpstan/phpstan-phpunit": "@stable", 92 | "phpunit/phpunit": ">=7.0", 93 | "symfony/process": "^4.4 || ^5.0", 94 | "tecnickcom/tcpdf": "^6.5" 95 | }, 96 | "suggest": { 97 | "dompdf/dompdf": "Allows writing PDF", 98 | "ext-gd2": "Allows adding images", 99 | "ext-xmlwriter": "Allows writing OOXML and ODF", 100 | "ext-xsl": "Allows applying XSL style sheet to headers, to main document part, and to footers of an OOXML template", 101 | "ext-zip": "Allows writing OOXML and ODF" 102 | }, 103 | "type": "library", 104 | "autoload": { 105 | "psr-4": { 106 | "PhpOffice\\PhpWord\\": "src/PhpWord" 107 | } 108 | }, 109 | "notification-url": "https://packagist.org/downloads/", 110 | "license": [ 111 | "LGPL-3.0" 112 | ], 113 | "authors": [ 114 | { 115 | "name": "Mark Baker" 116 | }, 117 | { 118 | "name": "Gabriel Bull", 119 | "email": "me@gabrielbull.com", 120 | "homepage": "http://gabrielbull.com/" 121 | }, 122 | { 123 | "name": "Franck Lefevre", 124 | "homepage": "https://rootslabs.net/blog/" 125 | }, 126 | { 127 | "name": "Ivan Lanin", 128 | "homepage": "http://ivan.lanin.org" 129 | }, 130 | { 131 | "name": "Roman Syroeshko", 132 | "homepage": "http://ru.linkedin.com/pub/roman-syroeshko/34/a53/994/" 133 | }, 134 | { 135 | "name": "Antoine de Troostembergh" 136 | } 137 | ], 138 | "description": "PHPWord - A pure PHP library for reading and writing word processing documents (OOXML, ODF, RTF, HTML, PDF)", 139 | "homepage": "https://phpoffice.github.io/PHPWord/", 140 | "keywords": [ 141 | "ISO IEC 29500", 142 | "OOXML", 143 | "Office Open XML", 144 | "OpenDocument", 145 | "OpenXML", 146 | "PhpOffice", 147 | "PhpWord", 148 | "Rich Text Format", 149 | "WordprocessingML", 150 | "doc", 151 | "docx", 152 | "html", 153 | "odf", 154 | "odt", 155 | "office", 156 | "pdf", 157 | "php", 158 | "reader", 159 | "rtf", 160 | "template", 161 | "template processor", 162 | "word", 163 | "writer" 164 | ], 165 | "support": { 166 | "issues": "https://github.com/PHPOffice/PHPWord/issues", 167 | "source": "https://github.com/PHPOffice/PHPWord/tree/1.2.0" 168 | }, 169 | "time": "2023-11-30T11:22:23+00:00" 170 | }, 171 | { 172 | "name": "smalot/pdfparser", 173 | "version": "v2.9.0", 174 | "source": { 175 | "type": "git", 176 | "url": "https://github.com/smalot/pdfparser.git", 177 | "reference": "6b53144fcb24af77093d4150dd7d0dd571f25761" 178 | }, 179 | "dist": { 180 | "type": "zip", 181 | "url": "https://api.github.com/repos/smalot/pdfparser/zipball/6b53144fcb24af77093d4150dd7d0dd571f25761", 182 | "reference": "6b53144fcb24af77093d4150dd7d0dd571f25761", 183 | "shasum": "" 184 | }, 185 | "require": { 186 | "ext-iconv": "*", 187 | "ext-zlib": "*", 188 | "php": ">=7.1", 189 | "symfony/polyfill-mbstring": "^1.18" 190 | }, 191 | "type": "library", 192 | "autoload": { 193 | "psr-0": { 194 | "Smalot\\PdfParser\\": "src/" 195 | } 196 | }, 197 | "notification-url": "https://packagist.org/downloads/", 198 | "license": [ 199 | "LGPL-3.0" 200 | ], 201 | "authors": [ 202 | { 203 | "name": "Sebastien MALOT", 204 | "email": "sebastien@malot.fr" 205 | } 206 | ], 207 | "description": "Pdf parser library. Can read and extract information from pdf file.", 208 | "homepage": "https://www.pdfparser.org", 209 | "keywords": [ 210 | "extract", 211 | "parse", 212 | "parser", 213 | "pdf", 214 | "text" 215 | ], 216 | "support": { 217 | "issues": "https://github.com/smalot/pdfparser/issues", 218 | "source": "https://github.com/smalot/pdfparser/tree/v2.9.0" 219 | }, 220 | "time": "2024-03-01T09:51:10+00:00" 221 | }, 222 | { 223 | "name": "symfony/polyfill-mbstring", 224 | "version": "v1.29.0", 225 | "source": { 226 | "type": "git", 227 | "url": "https://github.com/symfony/polyfill-mbstring.git", 228 | "reference": "9773676c8a1bb1f8d4340a62efe641cf76eda7ec" 229 | }, 230 | "dist": { 231 | "type": "zip", 232 | "url": "https://api.github.com/repos/symfony/polyfill-mbstring/zipball/9773676c8a1bb1f8d4340a62efe641cf76eda7ec", 233 | "reference": "9773676c8a1bb1f8d4340a62efe641cf76eda7ec", 234 | "shasum": "" 235 | }, 236 | "require": { 237 | "php": ">=7.1" 238 | }, 239 | "provide": { 240 | "ext-mbstring": "*" 241 | }, 242 | "suggest": { 243 | "ext-mbstring": "For best performance" 244 | }, 245 | "type": "library", 246 | "extra": { 247 | "thanks": { 248 | "name": "symfony/polyfill", 249 | "url": "https://github.com/symfony/polyfill" 250 | } 251 | }, 252 | "autoload": { 253 | "files": [ 254 | "bootstrap.php" 255 | ], 256 | "psr-4": { 257 | "Symfony\\Polyfill\\Mbstring\\": "" 258 | } 259 | }, 260 | "notification-url": "https://packagist.org/downloads/", 261 | "license": [ 262 | "MIT" 263 | ], 264 | "authors": [ 265 | { 266 | "name": "Nicolas Grekas", 267 | "email": "p@tchwork.com" 268 | }, 269 | { 270 | "name": "Symfony Community", 271 | "homepage": "https://symfony.com/contributors" 272 | } 273 | ], 274 | "description": "Symfony polyfill for the Mbstring extension", 275 | "homepage": "https://symfony.com", 276 | "keywords": [ 277 | "compatibility", 278 | "mbstring", 279 | "polyfill", 280 | "portable", 281 | "shim" 282 | ], 283 | "support": { 284 | "source": "https://github.com/symfony/polyfill-mbstring/tree/v1.29.0" 285 | }, 286 | "funding": [ 287 | { 288 | "url": "https://symfony.com/sponsor", 289 | "type": "custom" 290 | }, 291 | { 292 | "url": "https://github.com/fabpot", 293 | "type": "github" 294 | }, 295 | { 296 | "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", 297 | "type": "tidelift" 298 | } 299 | ], 300 | "time": "2024-01-29T20:11:03+00:00" 301 | }, 302 | { 303 | "name": "tecnickcom/tcpdf", 304 | "version": "6.7.4", 305 | "source": { 306 | "type": "git", 307 | "url": "https://github.com/tecnickcom/TCPDF.git", 308 | "reference": "d4adef47ca21c90e6483d59dcb9e5b1023696937" 309 | }, 310 | "dist": { 311 | "type": "zip", 312 | "url": "https://api.github.com/repos/tecnickcom/TCPDF/zipball/d4adef47ca21c90e6483d59dcb9e5b1023696937", 313 | "reference": "d4adef47ca21c90e6483d59dcb9e5b1023696937", 314 | "shasum": "" 315 | }, 316 | "require": { 317 | "php": ">=5.5.0" 318 | }, 319 | "type": "library", 320 | "autoload": { 321 | "classmap": [ 322 | "config", 323 | "include", 324 | "tcpdf.php", 325 | "tcpdf_parser.php", 326 | "tcpdf_import.php", 327 | "tcpdf_barcodes_1d.php", 328 | "tcpdf_barcodes_2d.php", 329 | "include/tcpdf_colors.php", 330 | "include/tcpdf_filters.php", 331 | "include/tcpdf_font_data.php", 332 | "include/tcpdf_fonts.php", 333 | "include/tcpdf_images.php", 334 | "include/tcpdf_static.php", 335 | "include/barcodes/datamatrix.php", 336 | "include/barcodes/pdf417.php", 337 | "include/barcodes/qrcode.php" 338 | ] 339 | }, 340 | "notification-url": "https://packagist.org/downloads/", 341 | "license": [ 342 | "LGPL-3.0-or-later" 343 | ], 344 | "authors": [ 345 | { 346 | "name": "Nicola Asuni", 347 | "email": "info@tecnick.com", 348 | "role": "lead" 349 | } 350 | ], 351 | "description": "TCPDF is a PHP class for generating PDF documents and barcodes.", 352 | "homepage": "http://www.tcpdf.org/", 353 | "keywords": [ 354 | "PDFD32000-2008", 355 | "TCPDF", 356 | "barcodes", 357 | "datamatrix", 358 | "pdf", 359 | "pdf417", 360 | "qrcode" 361 | ], 362 | "support": { 363 | "issues": "https://github.com/tecnickcom/TCPDF/issues", 364 | "source": "https://github.com/tecnickcom/TCPDF/tree/6.7.4" 365 | }, 366 | "funding": [ 367 | { 368 | "url": "https://www.paypal.com/cgi-bin/webscr?cmd=_donations¤cy_code=GBP&business=paypal@tecnick.com&item_name=donation%20for%20tcpdf%20project", 369 | "type": "custom" 370 | } 371 | ], 372 | "time": "2024-03-25T23:56:24+00:00" 373 | } 374 | ], 375 | "packages-dev": [], 376 | "aliases": [], 377 | "minimum-stability": "stable", 378 | "stability-flags": [], 379 | "prefer-stable": false, 380 | "prefer-lowest": false, 381 | "platform": [], 382 | "platform-dev": [], 383 | "plugin-api-version": "2.6.0" 384 | } 385 | --------------------------------------------------------------------------------