├── .github └── workflows │ └── php.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── composer.json ├── composer.lock ├── coveralls.yml ├── docker-compose.yml ├── docker └── xdebug.ini ├── examples ├── run.php ├── templates │ ├── t_0.txt │ ├── t_1.txt │ ├── t_2.txt │ ├── t_3.txt │ ├── t_4.txt │ ├── t_5.txt │ ├── t_6.txt │ ├── t_7.txt │ └── t_8.txt └── test_txt_files │ ├── m_0.txt │ ├── m_1.txt │ ├── m_2.txt │ ├── m_3.txt │ ├── m_4.txt │ ├── m_5.txt │ ├── m_6.txt │ ├── m_7.txt │ └── m_8.txt ├── phpunit.xml ├── src ├── Exception │ ├── InvalidParseFileException.php │ ├── InvalidParsedDataKeyException.php │ ├── InvalidTemplatesDirectoryException.php │ └── UnstructuredTextParserException.php ├── Helper │ └── TemplatesHelper.php ├── ParseResult.php └── TextParser.php └── tests ├── Helper ├── TemplatesHelperTest.php ├── expected_templates │ ├── temp1.txt │ ├── temp2.txt │ ├── temp3.txt │ └── temp4.txt └── helper_templates │ ├── temp1.txt │ ├── temp2.txt │ ├── temp3.txt │ └── temp4.txt ├── ParseResultTest.php ├── TextParserTest.php ├── templates ├── t1webFeedback.txt ├── t2webFeedback.txt ├── t_0.txt └── t_1.txt └── test_txt_files ├── noMatch.txt ├── t0TemplateMatch.txt └── webFeedback.html /.github/workflows/php.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | php-versions: ['7.4', '8.0', '8.1', '8.2'] 19 | steps: 20 | - uses: actions/checkout@v3 21 | 22 | - name: Validate composer.json and composer.lock 23 | run: composer validate --strict 24 | 25 | - name: Cache Composer packages 26 | id: composer-cache 27 | uses: actions/cache@v3 28 | with: 29 | path: vendor 30 | key: ${{ runner.os }}-php-${{ hashFiles('**/composer.lock') }} 31 | restore-keys: | 32 | ${{ runner.os }}-php- 33 | - name: Install dependencies 34 | run: composer install --prefer-dist --no-progress 35 | 36 | - name: Setup PHP 37 | uses: shivammathur/setup-php@v2 38 | with: 39 | php-version: ${{ matrix.php-versions }} 40 | - name: Run Tests 41 | run: composer run-script test -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDEA Ignores # 2 | ################ 3 | *.iml 4 | *.ipr 5 | *.iws 6 | .idea/ 7 | out/ 8 | local.properties 9 | 10 | # Packages # 11 | ############ 12 | *.7z 13 | *.dmg 14 | *.gz 15 | *.iso 16 | *.rar 17 | *.tar 18 | *.zip 19 | vendor/ 20 | 21 | # Logs and databases # 22 | ###################### 23 | log/ 24 | *.log 25 | *.sql 26 | *.sqlite 27 | 28 | # OS files # 29 | ###################### 30 | .DS_Store 31 | .DS_Store? 32 | ehthumbs.db 33 | Icon? 34 | Thumbs.db 35 | 36 | # Project Specific # 37 | ###################### 38 | examples/Logs/* 39 | 40 | # Git Directories # 41 | ###################### 42 | !empty 43 | !.gitkeep 44 | 45 | 46 | # PHP Unit # 47 | ###################### 48 | .phpunit.result.cache 49 | /cache.properties 50 | tests/_reports/* 51 | build/logs/* -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PHP_VERSION 2 | ARG ALPINE_VERSION=3.18 3 | 4 | FROM php:${PHP_VERSION}-cli-alpine${ALPINE_VERSION} 5 | 6 | ARG DOCKER_USER_ID=1001 7 | ARG DOCKER_GROUP_ID=1001 8 | ARG PHP_XDEBUG_VERSION 9 | 10 | # https://blog.codito.dev/2022/11/composer-binary-only-docker-images/ 11 | COPY --from=composer/composer:2-bin /composer /usr/local/bin/composer 12 | 13 | RUN if ! getent group "${DOCKER_GROUP_ID}" > /dev/null; \ 14 | then addgroup -S -g "${DOCKER_GROUP_ID}" devs; \ 15 | fi \ 16 | && if ! getent passwd "${DOCKER_USER_ID}" > /dev/null; \ 17 | then adduser -S -u "${DOCKER_USER_ID}" -G "$(getent group "${DOCKER_GROUP_ID}" | awk -F: '{printf $1}')" dev; \ 18 | fi \ 19 | # php extensions 20 | && curl --location --output /usr/local/bin/install-php-extensions https://github.com/mlocati/docker-php-extension-installer/releases/latest/download/install-php-extensions \ 21 | && chmod +x /usr/local/bin/install-php-extensions \ 22 | && sync \ 23 | && install-php-extensions \ 24 | pcntl \ 25 | xdebug-${PHP_XDEBUG_VERSION} \ 26 | # xdebug command 27 | && curl --location --output /usr/local/bin/xdebug https://github.com/julienfalque/xdebug/releases/download/v2.0.0/xdebug \ 28 | && chmod +x /usr/local/bin/xdebug 29 | 30 | COPY docker/xdebug.ini /usr/local/etc/php/conf.d/docker-php-ext-xdebug.ini -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Ayman Reda Bedair 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Unstructured Text Parser [PHP] 2 | =========================================== 3 | [![Tests](https://github.com/aymanrb/php-unstructured-text-parser/actions/workflows/php.yml/badge.svg)](https://github.com/aymanrb/php-unstructured-text-parser/actions/workflows/php.yml) 4 | [![Coverage Status](https://coveralls.io/repos/github/aymanrb/php-unstructured-text-parser/badge.svg?branch=master)](https://coveralls.io/github/aymanrb/php-unstructured-text-parser?branch=master) 5 | [![Latest Stable Version](https://poser.pugx.org/aymanrb/php-unstructured-text-parser/v/stable.svg)](https://packagist.org/packages/aymanrb/php-unstructured-text-parser) 6 | [![Total Downloads](https://poser.pugx.org/aymanrb/php-unstructured-text-parser/downloads)](https://packagist.org/packages/aymanrb/php-unstructured-text-parser) 7 | [![License](https://poser.pugx.org/aymanrb/php-unstructured-text-parser/license.svg)](https://packagist.org/packages/aymanrb/php-unstructured-text-parser) 8 | 9 | About Unstructured Text Parser 10 | ---------------------------------- 11 | This is a small PHP library to help extract text out of documents that are not structured in a processing friendly format. 12 | When you want to parse text out of form generated emails for example you can create a template matching the expected incoming mail format 13 | while specifying the variable text elements and leave the rest for the class to extract your pre-formatted variables out of the incoming mails' body text. 14 | 15 | Useful when you want to parse data out of: 16 | * Emails generated from web forms 17 | * Documents with definable templates / expressions 18 | 19 | Installation 20 | ---------- 21 | PHP Unstructured Text Parser is available on [Packagist](https://packagist.org/packages/aymanrb/php-unstructured-text-parser) (using semantic versioning), and installation via [Composer](https://getcomposer.org) is recommended. 22 | Add the following line to your `composer.json` file: 23 | 24 | ```json 25 | "aymanrb/php-unstructured-text-parser": "~2.0" 26 | ``` 27 | 28 | or run 29 | 30 | ```sh 31 | composer require aymanrb/php-unstructured-text-parser 32 | ``` 33 | 34 | 35 | [Usage example](https://github.com/aymanrb/php-unstructured-text-parser/blob/master/examples/run.php) 36 | ---------- 37 | ```php 38 | parseText($textToParse); 47 | print_r($parseResults->getParsedRawData()); 48 | 49 | //slower, performs a similarity check on available templates to select the most matching template before parsing 50 | print_r( 51 | $parser 52 | ->parseText($textToParse, true) 53 | ->getParsedRawData() 54 | ); 55 | ``` 56 | 57 | Parsing Procedure 58 | ---------- 59 | 1- Grab a single copy of the text you want to parse. 60 | 61 | 2- Replace every single varying text within it to a named variable in the form of ``{%VariableName%}`` if you want to match 62 | everything in this part of text or ``{%VariableName:Pattern%}`` if you want to match a specific set of characters or use a more 63 | precise pattern. 64 | 65 | 3- Add the templates file into the templates directory (defined in parsing code) with a txt extension ``fileName.txt`` 66 | 67 | 4- Pass the text you wish to parse to the parse method of the class and let it do the magic for you. 68 | 69 | Template Example 70 | ------------------------ 71 | If the text documents you want to parse looks like this: 72 | 73 | ``` 74 | Hello, 75 | If you wish to parse message coming from a website that states info like: 76 | ID & Source: 12234432 Website Form 77 | Name: Pet Cat 78 | E-Mail: email@example.com 79 | Comment: Some text goes here 80 | 81 | Thank You, 82 | Best Regards 83 | Admin 84 | ``` 85 | 86 | Your Template file (``example_template.txt``) could be something like: 87 | 88 | ``` 89 | Hello, 90 | If you wish to parse message coming from a website that states info like: 91 | ID & Source: {%id:[0-9]+%} {%source%} 92 | Name: {%senderName%} 93 | E-Mail: {%senderEmail%} 94 | Comment: {%comment%} 95 | 96 | Thank You, 97 | Best Regards 98 | Admin 99 | ``` 100 | 101 | The output of a successful parsing job would be: 102 | 103 | ``` 104 | Array( 105 | 'id' => '12234432', 106 | 'source' => 'Website Form', 107 | 'senderName' => 'Pet Cat', 108 | 'senderEmail' => 'email@example.com', 109 | 'comment' => 'Some text goes here' 110 | ) 111 | ``` 112 | 113 | Upgrading from v1.x to v2.x 114 | ------------------------ 115 | Version 2.0 is more or less a refactored copy of version 1.x of the library and provides the exact same functionality. 116 | There is just one slight difference in the results returned. It's now a parsed data object instead of an array. 117 | To get the results as an array like it used to be in v1.x simply call "*getParsedRawData()*" on the returned object. 118 | 119 | ```php 120 | parseText($textToParse); 123 | 124 | //In 2.x you need to do the following if you want an array 125 | $extractedArray = $parser->parseText($textToParse)->getParsedRawData(); 126 | ``` -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "aymanrb/php-unstructured-text-parser", 3 | "description": "A PHP library to help extract text out of text documents", 4 | "keywords": [ 5 | "text parser", 6 | "extract data", 7 | "php parser", 8 | "templates parsing", 9 | "regex parsing", 10 | "form parsing", 11 | "text parse" 12 | ], 13 | "config": { 14 | "platform": { 15 | "php": "7.4.0" 16 | } 17 | }, 18 | "type": "library", 19 | "license": "MIT", 20 | "authors": [ 21 | { 22 | "name": "Ayman R. Bedair", 23 | "email": "aymanrb@gmail.com", 24 | "homepage": "http://www.aymanrb.com", 25 | "role": "Developer" 26 | }, 27 | { 28 | "name": "Pavel", 29 | "homepage": "http://www.aisamiery.ru", 30 | "role": "Developer" 31 | } 32 | ], 33 | "homepage": "https://github.com/aymanrb/php-unstructured-text-parser", 34 | "support": { 35 | "issues": "https://github.com/aymanrb/php-unstructured-text-parser/issues", 36 | "source": "https://github.com/aymanrb/php-unstructured-text-parser" 37 | }, 38 | "autoload": { 39 | "psr-4": { 40 | "aymanrb\\UnstructuredTextParser\\": "src/" 41 | } 42 | }, 43 | "autoload-dev": { 44 | "psr-4": { 45 | "aymanrb\\UnstructuredTextParser\\Tests\\": "tests/" 46 | } 47 | }, 48 | "require": { 49 | "ext-json": "*", 50 | "php": ">=7.4.0", 51 | "psr/log": "^1.0.1 || ^2.0 || ^3.0" 52 | }, 53 | "require-dev": { 54 | "phpunit/phpunit": "^8.4", 55 | "php-coveralls/php-coveralls": "^2.1" 56 | }, 57 | "scripts": { 58 | "test": "phpunit tests" 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /coveralls.yml: -------------------------------------------------------------------------------- 1 | coverage_clover: build/logs/clover.xml 2 | json_path: build/logs/coveralls-upload.json 3 | service_name: travis-ci -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | php-7.4: &php 5 | build: 6 | args: 7 | ALPINE_VERSION: "3.16" 8 | PHP_VERSION: "7.4" 9 | PHP_XDEBUG_VERSION: "3.1.2" 10 | working_dir: /app 11 | volumes: 12 | - .:/app 13 | environment: 14 | PHP_IDE_CONFIG: serverName=php-text-parser 15 | php-8.0: 16 | <<: *php 17 | build: 18 | args: 19 | PHP_VERSION: "8.0" 20 | php-8.1: 21 | <<: *php 22 | build: 23 | args: 24 | PHP_VERSION: "8.1" 25 | php-8.2: 26 | <<: *php 27 | build: 28 | args: 29 | PHP_VERSION: "8.2" 30 | PHP_XDEBUG_VERSION: "3.2.1" -------------------------------------------------------------------------------- /docker/xdebug.ini: -------------------------------------------------------------------------------- 1 | ; XDebug 3 → https://xdebug.org/docs/upgrade_guide 2 | ; You can dynamically enable XDebug by setting XDEBUG_MODE env variable. 3 | ; Some options can be dynamically overridden with XDEBUG_CONFIG env variable. 4 | xdebug.mode = off 5 | xdebug.start_with_request = yes 6 | xdebug.discover_client_host = true 7 | xdebug.client_host = host.docker.internal 8 | 9 | ; Required so XDebug DOES NOT print warning "Could not connect to debugging client" 10 | xdebug.log = /app/docker/php/xdebug.log 11 | xdebug.log_level = 1 -------------------------------------------------------------------------------- /examples/run.php: -------------------------------------------------------------------------------- 1 | getExtension() === 'txt') { 13 | echo $txtFileObj->getFilename() . PHP_EOL; 14 | 15 | $parseResults = $parser->parseFileContent($txtFileObj->getPathname(), true); 16 | 17 | print_r($parseResults->getParsedRawData()); 18 | 19 | if ($parseResults->getAppliedTemplateFile()) { 20 | echo 'Matched Template: ' . $parseResults->getAppliedTemplateFile() . PHP_EOL; 21 | } 22 | } 23 | } 24 | } catch (Exception $e) { 25 | echo $e->getMessage(); 26 | } 27 | -------------------------------------------------------------------------------- /examples/templates/t_0.txt: -------------------------------------------------------------------------------- 1 | Sent at {%created%} 2 | 3 |
4 |
5 | Hi {%name1%},
6 |
7 |
{%action%}


8 |
9 | 10 | 11 | Click here to view the booked tour: {%tour%}


E-mail address :{%mail1%}

Customer Name :{%myname%}

Country :{%country%}

Arrival Date : {%arrival_date%}

Departure Date : {%departure_date%}

Number of Adults : {%adults%}

Number of Children :{%children%}

Additional Requests :{%more_data%}

12 | -------------------------------------------------------------------------------- /examples/templates/t_1.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 |
Name{%Name%}
UserEmail{%Mail%}
Nationality{%Nationality%}
UserPhone{%Phone%}
Arrival{%ArrivalDate%}
Departure{%Departure%}
Adults{%adults%}
Children{%Children%}
Children Age{%child%}
Comment{%comment%}
Url Goal{%goal%}
Ver Code{%ver_code%}
Url{%url%} 42 |
Ip Address{%ip%}
Date & Time{%created%}
Source{%source%}
Http Referer{%referer%}
56 | -------------------------------------------------------------------------------- /examples/templates/t_2.txt: -------------------------------------------------------------------------------- 1 | Hi Sir, 2 | 3 | New Booking ... information follows: 4 | 5 | Username: {%user%} 6 | Full Name: {%name%} 7 | E-Mail Address: {%email%} 8 | URL: {%url%} 9 | Nationality: {%nationality%} 10 | Phone: {%phone%} 11 | Comments: {%text%} 12 | -------------------------------------------------------------------------------- /examples/templates/t_3.txt: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 115 | 116 | 117 | 118 | 119 | 120 |
 
9 | 10 | 11 | 12 | 14 | 18 | 20 | 21 | 22 |
 JotForm.com 
23 | 24 | 25 | 26 | 28 | 101 | 103 | 104 | 105 | 107 | 109 | 111 | 112 | 113 |
  29 | 30 | 31 | 32 | 34 | 36 | 37 | 38 | 40 | 41 | 42 | 43 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 58 | 59 | 60 | 61 | 63 | 64 | 65 | 66 | 68 | 69 | 70 | 71 | 73 | 74 | 75 | 76 | 78 | 79 | 80 | 81 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 |
QuestionAnswer
Preferred Date 39 | of Arrival{%arrival_date%}
Date of 44 | Departure{%departure_date%}
Adults{%adults%}
Children{%children%}
Children's 57 | Age{%cages%}
Preferred 62 | Hotel Class{%class%}
Describe your 67 | tour. The more details the better{%more_info%}
Full 72 | Name{%name%}
Nationality{%nationality%}
Phone 82 | Number{%phone%}
E-mail{%email%}
Confirm 91 | E-mail{%emaila%}
Comments{%comments%}
100 |
 
   
114 |
 
121 |



122 | -------------------------------------------------------------------------------- /examples/templates/t_4.txt: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 103 | 104 | 105 | 106 | 107 |
 
10 | 11 | 13 | 19 | 21 | 22 |
15 | jotform.com 18 |
23 | 25 | 26 | 28 | 91 | 93 | 94 | 95 | 97 | 99 | 101 | 102 |
29 | 31 | 32 | 34 | 36 | 37 | 39 | 41 | 42 | 44 | 46 | 47 | 49 | 51 | 52 | 54 | 56 | 57 | 59 | 60 | 61 | 63 | 64 | 65 | 67 | 69 | 70 | 72 | 74 | 75 | 77 | 79 | 80 | 82 | 83 | 84 | 86 | 88 | 89 |
QuestionAnswer
Date of Arrival{%a%}
Adults{%as%}
Children{%cc%}
Children's age{%ca%}
Full Name{%sad%}
Phone Number{%phone%}
Nationality{%nb%}
E-mail{%mail%}
Confirm E-mail{%mjs%}
Your Comments{%saas%}
url{%url%}
90 |
 
108 | 109 | -------------------------------------------------------------------------------- /examples/templates/t_5.txt: -------------------------------------------------------------------------------- 1 | 104 | 105 |
2 | 3 | 4 | 7 | 8 | 9 | 10 | 13 | 14 | 15 | 18 | 19 | 20 | 21 | 24 | 25 | 26 | 29 | 30 | 31 | 32 | 35 | 36 | 37 | 40 | 41 | 42 | 43 | 46 | 47 | 48 | 51 | 52 | 53 | 54 | 57 | 58 | 59 | 62 | 63 | 64 | 65 | 68 | 69 | 70 | 73 | 74 | 75 | 76 | 79 | 80 | 81 | 84 | 85 | 86 | 87 | 90 | 91 | 92 | 95 | 96 | 97 | 98 | 101 | 102 |
5 | Date of arrival 6 |
  11 | {%vt%} 12 |
16 | Name 17 |
  22 | {%vb%} 23 |
27 | Nationality 28 |
  33 | {%vfds%} 34 |
38 | Phone 39 |
  44 | {%vsac%} 45 |
49 | Email 50 |
  55 | {%v523%} 56 |
60 | Adults 61 |
  66 | {%adsa%} 67 |
71 | Children 72 |
  77 | {%c1%} 78 |
82 | Tour Package 83 |
  88 | {%url%} 89 |
93 | IP Address 94 |
  99 | {%ip%} 100 |
103 |
106 | -------------------------------------------------------------------------------- /examples/templates/t_6.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 30 | 31 | 32 | 33 | 34 |

Chat Transcript

35 | 36 | 37 | 56 | 57 |
38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
Visitor:{%name%}
Operator:{%operator_name%}
Company:{%source%}
Started:{%start%}
Finished:{%end%}
55 |
58 | {%data%} 59 | 60 | 61 | -------------------------------------------------------------------------------- /examples/templates/t_7.txt: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 115 | 116 | 117 | 118 | 119 | 120 |
 
9 | 10 | 11 | 12 | 14 | 18 | 20 | 21 | 22 |
 JotForm.com 
23 | 24 | 25 | 26 | 28 | 101 | 103 | 104 | 105 | 107 | 109 | 111 | 112 | 113 |
  29 | 30 | 31 | 32 | 34 | 36 | 37 | 38 | 40 | 41 | 42 | 43 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 58 | 59 | 60 | 61 | 63 | 64 | 65 | 66 | 68 | 69 | 70 | 71 | 73 | 74 | 75 | 76 | 78 | 79 | 80 | 81 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 |
QuestionAnswer
Preferred Date 39 | of Arrival{%arrival_date%}
Date of 44 | Departure{%departure_date%}
Adults{%adults%}
Children{%children%}
Children's 57 | Age{%cages%}
Preferred 62 | Hotel Class{%class%}
Describe your 67 | tour. The more details the better{%more_info%}
Full 72 | Name{%name%}
Nationality{%nationality%}
Phone 82 | Number{%phone%}
E-mail{%email%}
Confirm 91 | E-mail{%emaila%}
Comments{%comments%}
100 |
 
   
114 |
 
121 |



122 | -------------------------------------------------------------------------------- /examples/templates/t_8.txt: -------------------------------------------------------------------------------- 1 | ~ New message received ~ 2 | 3 | Sender: {%id:[0-9]+%} {%name%} 4 | Sender-id: {%senderId%} 5 | Sender-full-name: {%senderName%} 6 | Sender-email: {%senderEmail%} 7 | Sender-website: {%senderSite%} 8 | Sender-number: {%senderPhone:\+[0-9]{10,15}%} 9 | Sender-nationality: {%senderCountry%} 10 | Message: 11 | {%senderMessage%} -------------------------------------------------------------------------------- /examples/test_txt_files/m_0.txt: -------------------------------------------------------------------------------- 1 | Sent at Thursday 9th of October 2014 03:20:44 PM 2 | 3 |
4 |
5 | Hi Ayman ,
6 |
7 |
New Booking


8 |
9 | 10 | 11 | Click here to view the booked tour: Tour Name


E-mail address : mymail@test.com

Customer Name : Hello World

Country : Egypt

Arrival Date : 11 - 10 - 2014

Departure Date : 11 - 10 - 2014

Number of Adults : 4

Number of Children : 2

Additional Requests : dsadsa

12 | -------------------------------------------------------------------------------- /examples/test_txt_files/m_1.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 |
NameClient Name
UserEmailtest@example.com
NationalityUnited States of America
UserPhone8885544555850
Arrival13/08/2013
Departure08/08/2013
Adults2
ChildrenNo. Child
Children AgeChild Age
Commentssssss Test
Url Goalcruise
Ver Code789456
Urlhttp://www.example.com/something.php 42 |
Ip Address99.999.9999.99
Date & Time2013-08-06 13:02:19
SourceDirect Booking
Http Refererhttp://www.example.com
56 | -------------------------------------------------------------------------------- /examples/test_txt_files/m_2.txt: -------------------------------------------------------------------------------- 1 | Hi Sir, 2 | 3 | New Booking ... information follows: 4 | 5 | Username: test 6 | Full Name: Eve Arron 7 | E-Mail Address: example@test.com 8 | URL: www.example.com/something 9 | Nationality: German 10 | Phone: 6655885524455 11 | Comments: Some Text Goes Here - Some Text Goes Here - Some Text Goes Here - Some Text Goes Here - Some Text Goes Here - Some Text Goes Here - Some Text Goes Here - Some Text Goes Here - Some Text Goes Here - Some Text Goes Here 12 | Some text goes here but will be ignored by the template since it's on a new line -------------------------------------------------------------------------------- /examples/test_txt_files/m_3.txt: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 115 | 116 | 117 | 118 | 119 | 120 |
 
9 | 10 | 11 | 12 | 14 | 18 | 20 | 21 | 22 |
 JotForm.com 
23 | 24 | 25 | 26 | 28 | 101 | 103 | 104 | 105 | 107 | 109 | 111 | 112 | 113 |
  29 | 30 | 31 | 32 | 34 | 36 | 37 | 38 | 40 | 41 | 42 | 43 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 58 | 59 | 60 | 61 | 63 | 64 | 65 | 66 | 68 | 69 | 70 | 71 | 73 | 74 | 75 | 76 | 78 | 79 | 80 | 81 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 |
QuestionAnswer
Preferred Date 39 | of Arrival4-22-2013
Date of 44 | Departure4-29-2013
Adults2
Children3
Children's 57 | Age3,3,4
Preferred 62 | Hotel Class5★ Standard
Describe your 67 | tour. The more details the betterWe would like to book
Full 72 | NameMy Name Test
NationalityUnited States
Phone 82 | Number98855448888554
E-mail321tinker@gmail.com
Confirm 91 | E-mailayss@gmail.com
CommentsTest some more information
100 |
 
   
114 |
 
121 |



122 | -------------------------------------------------------------------------------- /examples/test_txt_files/m_4.txt: -------------------------------------------------------------------------------- 1 | 3 | 5 | 6 | 7 | 8 | 9 | 106 | 107 | 108 | 109 | 110 |
 
11 | 12 | 14 | 20 | 22 | 23 |
16 | jotform.com 19 |
24 | 26 | 27 | 29 | 94 | 96 | 97 | 98 | 100 | 102 | 104 | 105 |
30 | 32 | 33 | 35 | 37 | 38 | 40 | 42 | 43 | 45 | 47 | 48 | 50 | 52 | 53 | 55 | 57 | 58 | 60 | 62 | 63 | 65 | 67 | 68 | 70 | 72 | 73 | 75 | 77 | 78 | 80 | 82 | 83 | 85 | 86 | 87 | 89 | 91 | 92 |
QuestionAnswer
Date of Arrival18-03-2014
Adults2
Children0
Children's age
Full NameJohn 61 | Williams
Phone Number() 66 |
NationalityUnited States
E-mailsomemail@yahoo.com
Confirm E-mailsomemail@yahoo.com
Your Commentswe would like to ....
urlhttp://www.domain.com/test/yus
93 |
 
111 | 112 | -------------------------------------------------------------------------------- /examples/test_txt_files/m_5.txt: -------------------------------------------------------------------------------- 1 | 104 | 105 |
2 | 3 | 4 | 7 | 8 | 9 | 10 | 13 | 14 | 15 | 18 | 19 | 20 | 21 | 24 | 25 | 26 | 29 | 30 | 31 | 32 | 35 | 36 | 37 | 40 | 41 | 42 | 43 | 46 | 47 | 48 | 51 | 52 | 53 | 54 | 57 | 58 | 59 | 62 | 63 | 64 | 65 | 68 | 69 | 70 | 73 | 74 | 75 | 76 | 79 | 80 | 81 | 84 | 85 | 86 | 87 | 90 | 91 | 92 | 95 | 96 | 97 | 98 | 101 | 102 |
5 | Date of arrival 6 |
  11 | 06/02/2015 12 |
16 | Name 17 |
  22 | Full Name of New Client 23 |
27 | Nationality 28 |
  33 | United Kingdom 34 |
38 | Phone 39 |
  44 | 07780918266 45 |
49 | Email 50 |
  55 | test@test.com 56 |
60 | Adults 61 |
  66 | 2 67 |
71 | Children 72 |
  77 | 0 78 |
82 | Tour Package 83 |
  88 | http://www.test.com/test/ 89 |
93 | IP Address 94 |
  99 | 99.999.999.999 100 |
103 |
106 | -------------------------------------------------------------------------------- /examples/test_txt_files/m_6.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 30 | 31 | 32 | 33 | 34 |

Chat Transcript

35 | 36 | 37 | 56 | 57 |
38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
Visitor:Jue
Operator:Chat Person
Company:chat.com
Started:12-Oct-2014 11:43:45 PM
Finished:13-Oct-2014 12:12:51 AM
55 |
58 |
11:55:49 PM: Client:
    sounds good!
59 |
11:56:10 PM: Chat Person:
    May I have your phone number ?
60 |
12:02:48 AM:
Chat Person is now off-line and may not reply. Currently in room: Client.
61 |
12:03:49 AM:
Client is now off-line and may not reply. Currently in room: room is empty.
62 |
63 |
64 |
65 | Visitor Details 66 | 67 | 68 | 99 | 100 |
69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 |
Name:Client
E-Mail:client@gmail.com
Department:Planning
Called From:http://www.myurl.com/page.html
IP Address:66.666.66.123
Host Name:98.234.53.159
Referrer:http://www.google.com
Browser/OS:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36
Location:California, United States
98 |
101 |
102 |
103 |
104 |
105 |
This transcript email message was automatically generated by ServiceProvider
106 | 107 | 108 | -------------------------------------------------------------------------------- /examples/test_txt_files/m_7.txt: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 115 | 116 | 117 | 118 | 119 | 120 |
 
9 | 10 | 11 | 12 | 14 | 18 | 20 | 21 | 22 |
 JotForm.com 
23 | 24 | 25 | 26 | 28 | 101 | 103 | 104 | 105 | 107 | 109 | 111 | 112 | 113 |
  29 | 30 | 31 | 32 | 34 | 36 | 37 | 38 | 40 | 41 | 42 | 43 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 58 | 59 | 60 | 61 | 63 | 64 | 65 | 66 | 68 | 69 | 70 | 71 | 73 | 74 | 75 | 76 | 78 | 79 | 80 | 81 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 |
QuestionAnswer
Preferred Date 39 | of Arrival4-22-2013
Date of 44 | Departure4-29-2013
Adults2
Children3
Children's 57 | Age3,3,4
Preferred 62 | Hotel Class5★ Standard
Describe your 67 | tour. The more details the betterWe would like to book
Full 72 | NameMy Name Test
NationalityUnited States
Phone 82 | Number98855448888554
E-mail321tinker@gmail.com
Confirm 91 | E-mailayss@gmail.com
CommentsTest some more information
100 |
 
   
114 |
 
121 |



122 | -------------------------------------------------------------------------------- /examples/test_txt_files/m_8.txt: -------------------------------------------------------------------------------- 1 | ~ New message received ~ 2 | 3 | Sender: 12345678 John Anthony Doe 4 | Sender-id: 12345678 5 | Sender-full-name: John Anthony Doe 6 | Sender-email: example@test.com 7 | Sender-website: www.example.com/something 8 | Sender-number: +4917914999410 9 | Sender-nationality: N/A 10 | Message: 11 | Some Text Goes Here - Some Text Goes Here -------------------------------------------------------------------------------- /phpunit.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 8 | 9 | 10 | ./tests/ 11 | 12 | 13 | 14 | 15 | ./src 16 | 17 | ./src/Exception 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/Exception/InvalidParseFileException.php: -------------------------------------------------------------------------------- 1 | Pattern) 14 | private const REGEX_ORPHAN_BACKSLASH = '/(?.*)'; //(?.*) 18 | private const REPLACE_VARIABLE_WITH_PATTERN = '(?<$1>$2)'; //(?Pattern) 19 | 20 | private \FilesystemIterator $directoryIterator; 21 | 22 | public function __construct(string $templatesDir) 23 | { 24 | $this->directoryIterator = $this->createTemplatesDirIterator($templatesDir); 25 | } 26 | 27 | public function getTemplates(string $text, bool $findMatchingTemplate = false): array 28 | { 29 | if ($findMatchingTemplate) { 30 | return $this->findTemplate($text); 31 | } 32 | 33 | return $this->getAllValidTemplates(); 34 | } 35 | 36 | private function createTemplatesDirIterator(string $iterableDirectoryPath): \FilesystemIterator 37 | { 38 | if (empty($iterableDirectoryPath) || !is_dir($iterableDirectoryPath)) { 39 | throw new InvalidTemplatesDirectoryException( 40 | 'Invalid templates directory provided' 41 | ); 42 | } 43 | 44 | return new \FilesystemIterator(rtrim($iterableDirectoryPath, '/')); 45 | } 46 | 47 | private function findTemplate(string $text): array 48 | { 49 | $matchedTemplate = []; 50 | $maxMatch = -1; 51 | 52 | foreach ($this->directoryIterator as $fileInfo) { 53 | $templateContent = file_get_contents($fileInfo->getPathname()); 54 | 55 | // compare template against text to decide on similarity percentage 56 | similar_text($text, $templateContent, $matchPercentage); 57 | 58 | if ($matchPercentage > $maxMatch) { 59 | $maxMatch = $matchPercentage; 60 | $matchedTemplate = [$fileInfo->getPathname() => $this->prepareTemplate($templateContent)]; 61 | } 62 | } 63 | 64 | return $matchedTemplate; 65 | } 66 | 67 | private function getAllValidTemplates(): array 68 | { 69 | $templates = []; 70 | foreach ($this->directoryIterator as $fileInfo) { 71 | if (!is_file($fileInfo->getPathname())) { 72 | continue; 73 | } 74 | 75 | $templateContent = file_get_contents($fileInfo->getPathname()); 76 | $templates[$fileInfo->getPathname()] = $this->prepareTemplate($templateContent); 77 | } 78 | 79 | krsort($templates); 80 | 81 | return $templates; 82 | } 83 | 84 | private function prepareTemplate(string $templateText): string 85 | { 86 | $templateText = preg_quote($templateText, '/'); 87 | 88 | $templateText = preg_replace( 89 | self::REGEX_VARIABLE_WITH_PATTERN, 90 | self::REPLACE_VARIABLE_WITH_PATTERN, 91 | $templateText 92 | ); 93 | 94 | $templateText = preg_replace_callback( 95 | self::REGEX_PREPARED_VARIABLE_WITH_PATTERN, 96 | function ($matches) { 97 | $variableWithPattern = preg_replace(self::REGEX_ORPHAN_BACKSLASH, '', $matches[0]); 98 | 99 | return str_replace(self::STR_SEARCH_TRIPLE_BACKSLASHES, '\\', $variableWithPattern); 100 | }, 101 | $templateText 102 | ); 103 | 104 | return preg_replace( 105 | self::REGEX_GENERIC_VARIABLE, 106 | self::REPLACE_GENERIC_VARIABLE, 107 | $templateText 108 | ); 109 | } 110 | } -------------------------------------------------------------------------------- /src/ParseResult.php: -------------------------------------------------------------------------------- 1 | parsedRawData; 18 | } 19 | 20 | public function setParsedRawData(array $parsedRawData): void 21 | { 22 | $this->parsedRawData = $parsedRawData; 23 | $this->cleanData(); 24 | } 25 | 26 | public function getAppliedTemplateFile(): ?string 27 | { 28 | return $this->appliedTemplateFile; 29 | } 30 | 31 | public function setAppliedTemplateFile(string $appliedTemplateFile): void 32 | { 33 | $this->appliedTemplateFile = $appliedTemplateFile; 34 | } 35 | 36 | public function countResults(): int 37 | { 38 | return count($this->parsedRawData); 39 | } 40 | 41 | public function keyExists(string $key): bool 42 | { 43 | return array_key_exists($key, $this->parsedRawData); 44 | } 45 | 46 | public function get(string $resultDataKey, bool $failOnUndefinedKey = false): ?string 47 | { 48 | if (!$this->keyExists($resultDataKey)) { 49 | if (!$failOnUndefinedKey) { 50 | return null; 51 | } 52 | 53 | throw new InvalidParsedDataKeyException('Undefined results key: ' . $resultDataKey); 54 | } 55 | 56 | return $this->parsedRawData[$resultDataKey]; 57 | } 58 | 59 | private function cleanData(): void 60 | { 61 | foreach ($this->parsedRawData as $key => $value) { 62 | $this->parsedRawData[$key] = $this->cleanElement($value); 63 | } 64 | } 65 | 66 | private function cleanElement(string $value): string 67 | { 68 | return trim(strip_tags($value)); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/TextParser.php: -------------------------------------------------------------------------------- 1 | setLogger($logger); 28 | $this->templatesHelper = new TemplatesHelper($templatesDir); 29 | $this->resetParseResults(); 30 | } 31 | 32 | public function parseFileContent(string $filePath, bool $findMatchingTemplate = false): ParseResult 33 | { 34 | if (!is_file($filePath)) { 35 | throw new InvalidParseFileException($filePath); 36 | } 37 | 38 | return $this->parseText(file_get_contents($filePath), $findMatchingTemplate); 39 | } 40 | 41 | public function parseText(string $text, bool $findMatchingTemplate = false): ParseResult 42 | { 43 | $this->resetParseResults(); 44 | 45 | $parsableTemplates = $this->templatesHelper->getTemplates($text, $findMatchingTemplate); 46 | 47 | foreach ($parsableTemplates as $templatePath => $templatePattern) { 48 | $this->logger->debug(sprintf('Parsing against template: %s', $templatePath)); 49 | 50 | if ($this->extractData($text, $templatePattern)) { 51 | $this->parseResults->setAppliedTemplateFile($templatePath); 52 | } 53 | } 54 | 55 | $this->logger->info(sprintf('Data extracted: %s', json_encode($this->parseResults->getParsedRawData()))); 56 | 57 | return $this->parseResults; 58 | } 59 | 60 | public function getParseResults(): ParseResult 61 | { 62 | return $this->parseResults; 63 | } 64 | 65 | private function extractData(string $text, string $template): bool 66 | { 67 | //Extract the text based on the provided template using REGEX 68 | preg_match('/' . $template . '/s', $text, $matches); 69 | 70 | //Extract only the named parameters from the matched regex array 71 | $keys = array_filter(array_keys($matches), 'is_string'); 72 | $matches = array_intersect_key($matches, array_flip($keys)); 73 | 74 | if (empty($matches)) { 75 | return false; 76 | } 77 | 78 | $this->parseResults->setParsedRawData($matches); 79 | 80 | return true; 81 | } 82 | 83 | private function resetParseResults(): void 84 | { 85 | $this->parseResults = new ParseResult(); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /tests/Helper/TemplatesHelperTest.php: -------------------------------------------------------------------------------- 1 | expectException(InvalidTemplatesDirectoryException::class); 17 | new TemplatesHelper(__DIR__ . '/DirectoryThatDoesNotExist'); 18 | } 19 | 20 | private function getTemplatesHelperInstance() 21 | { 22 | return new TemplatesHelper(__DIR__ . self::DIR_HELPER_TEMPLATES); 23 | } 24 | 25 | public function testGetAllTemplates() 26 | { 27 | $templatesHelper = $this->getTemplatesHelperInstance(); 28 | $expectedTemplatesCount = count(glob(__DIR__ . self::DIR_HELPER_TEMPLATES . "/*")); 29 | 30 | $returnedTemplates = $templatesHelper->getTemplates('regardless of what comes here'); 31 | $this->assertCount($expectedTemplatesCount, $returnedTemplates); 32 | } 33 | 34 | public function testGetAllTemplatesRegexIsPrepared() 35 | { 36 | $templatesHelper = $this->getTemplatesHelperInstance(); 37 | 38 | $returnedTemplates = $templatesHelper->getTemplates('regardless of what comes here'); 39 | $this->assertTrue($this->checkPreparedTemplates($returnedTemplates)); 40 | } 41 | 42 | public function testGetMostMatchingTemplateToText() 43 | { 44 | $templatesHelper = $this->getTemplatesHelperInstance(); 45 | 46 | $returnedTemplates = $templatesHelper->getTemplates('Sent to customer service from Someone', true); 47 | $this->assertCount(1, $returnedTemplates); 48 | } 49 | 50 | public function testGetMostMatchingTemplateToTextRegexIsPrepared() 51 | { 52 | $templatesHelper = $this->getTemplatesHelperInstance(); 53 | 54 | $returnedTemplates = $templatesHelper->getTemplates('Sent to customer service from Someone', true); 55 | $this->assertTrue($this->checkPreparedTemplates($returnedTemplates)); 56 | } 57 | 58 | private function checkPreparedTemplates(array $templatesArray): bool 59 | { 60 | foreach ($templatesArray as $templatePath => $template) { 61 | $expectedTemplate = $this->getExpectedTemplate($templatePath); 62 | 63 | $this->assertEquals($expectedTemplate, $template); 64 | $this->assertTrue($this->isValidRegex($template)); 65 | } 66 | 67 | return true; 68 | } 69 | 70 | private function isValidRegex(string $pattern): bool 71 | { 72 | try { 73 | preg_match('/' . $pattern . '/s', ''); 74 | } catch (\Throwable $exception) { 75 | return false; 76 | } 77 | 78 | return true; 79 | } 80 | 81 | private function getExpectedTemplate(string $templatePath) 82 | { 83 | return file_get_contents( 84 | str_replace( 85 | self::DIR_HELPER_TEMPLATES, 86 | self::DIR_EXPECTED_TEMPLATES, 87 | $templatePath 88 | ) 89 | ); 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /tests/Helper/expected_templates/temp1.txt: -------------------------------------------------------------------------------- 1 | \Simple Template 01 (?.*)\<\/htmlTag\> 2 | 3 | Multi Line -------------------------------------------------------------------------------- /tests/Helper/expected_templates/temp2.txt: -------------------------------------------------------------------------------- 1 | Simple Template 01 (?.*) -------------------------------------------------------------------------------- /tests/Helper/expected_templates/temp3.txt: -------------------------------------------------------------------------------- 1 | Template with specified pattern (?[0-9]+) -------------------------------------------------------------------------------- /tests/Helper/expected_templates/temp4.txt: -------------------------------------------------------------------------------- 1 | Template with specified pattern containing special escaped characters (?\+[0-9]{10,16}) -------------------------------------------------------------------------------- /tests/Helper/helper_templates/temp1.txt: -------------------------------------------------------------------------------- 1 | Simple Template 01 {%variable%} 2 | 3 | Multi Line -------------------------------------------------------------------------------- /tests/Helper/helper_templates/temp2.txt: -------------------------------------------------------------------------------- 1 | Simple Template 01 {%variable%} -------------------------------------------------------------------------------- /tests/Helper/helper_templates/temp3.txt: -------------------------------------------------------------------------------- 1 | Template with specified pattern {%variable:[0-9]+%} -------------------------------------------------------------------------------- /tests/Helper/helper_templates/temp4.txt: -------------------------------------------------------------------------------- 1 | Template with specified pattern containing special escaped characters {%variable:\+[0-9]{10,16}%} -------------------------------------------------------------------------------- /tests/ParseResultTest.php: -------------------------------------------------------------------------------- 1 | setParsedRawData( 19 | [ 20 | 'resultsKey' => 'resultsValue ', 21 | 'foundKey' => 'parsedContent SometextInBold', 22 | 'date' => '2019-01-01', 23 | 'time' => ' 20:11', 24 | ] 25 | ); 26 | } 27 | 28 | return $parseResult; 29 | } 30 | 31 | public function testGetParsedRawData() 32 | { 33 | $parseResult = $this->getNewParseResultObject(); 34 | 35 | $this->assertIsArray($parseResult->getParsedRawData()); 36 | $this->assertEmpty($parseResult->getParsedRawData()); 37 | } 38 | 39 | public function testSetParsedRawData() 40 | { 41 | $parseResult = $this->getNewParseResultObject(true); 42 | 43 | $this->assertIsArray($parseResult->getParsedRawData()); 44 | $this->assertNotEmpty($parseResult->getParsedRawData()); 45 | $this->assertArrayHasKey('resultsKey', $parseResult->getParsedRawData()); 46 | $this->assertArrayHasKey('foundKey', $parseResult->getParsedRawData()); 47 | } 48 | 49 | public function testSetParsedRawDataCleansContent() 50 | { 51 | $parseResult = $this->getNewParseResultObject(true); 52 | 53 | $resultsArray = $parseResult->getParsedRawData(); 54 | 55 | $this->assertEquals('resultsValue', $resultsArray['resultsKey']); 56 | $this->assertEquals('parsedContent SometextInBold', $resultsArray['foundKey']); 57 | $this->assertEquals('20:11', $resultsArray['time']); 58 | } 59 | 60 | public function testAppliedTemplateFileSetterAndGetter() 61 | { 62 | $parseResult = $this->getNewParseResultObject(); 63 | 64 | $this->assertEmpty($parseResult->getAppliedTemplateFile()); 65 | 66 | $matchedTemplatePath = 'path/to/matched/Template.txt'; 67 | $parseResult->setAppliedTemplateFile($matchedTemplatePath); 68 | 69 | $this->assertNotEmpty($parseResult->getAppliedTemplateFile()); 70 | $this->assertEquals($matchedTemplatePath, $parseResult->getAppliedTemplateFile()); 71 | } 72 | 73 | public function testCountResults() 74 | { 75 | $parseResult = $this->getNewParseResultObject(true); 76 | 77 | $this->assertEquals(4, $parseResult->countResults()); 78 | } 79 | 80 | public function testKeyExists() 81 | { 82 | $parseResult = $this->getNewParseResultObject(true); 83 | 84 | $this->assertTrue($parseResult->keyExists('foundKey')); 85 | $this->assertTrue($parseResult->keyExists('resultsKey')); 86 | $this->assertFalse($parseResult->keyExists('AKeyWeNeverFound')); 87 | } 88 | 89 | public function testGetResultKey() 90 | { 91 | $parseResult = $this->getNewParseResultObject(true); 92 | $this->assertEquals('resultsValue', $parseResult->get('resultsKey')); 93 | $this->assertEquals('parsedContent SometextInBold', $parseResult->get('foundKey')); 94 | $this->assertEquals('2019-01-01', $parseResult->get('date')); 95 | $this->assertEquals('20:11', $parseResult->get('time')); 96 | $this->assertNull($parseResult->get('AKeyWeNeverFound')); 97 | } 98 | 99 | public function testStrictGetResultKeyThrowsInvalidKeyException() 100 | { 101 | $parseResult = $this->getNewParseResultObject(true); 102 | $this->assertEquals('resultsValue', $parseResult->get('resultsKey', true)); 103 | 104 | 105 | $this->expectException(InvalidParsedDataKeyException::class); 106 | $parseResult->get('AKeyWeNeverFound', true); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /tests/TextParserTest.php: -------------------------------------------------------------------------------- 1 | expectException(InvalidTemplatesDirectoryException::class); 17 | new TextParser(__DIR__ . '/DirectoryThatDoesNotExist'); 18 | } 19 | 20 | public function testTextParsingFailure() 21 | { 22 | $parser = $this->getTemplatesParser(); 23 | $parser->parseText('Some Text that can not be matched against a template'); 24 | 25 | $this->assertEmpty($parser->getParseResults()->getParsedRawData()); 26 | } 27 | 28 | public function testTextParsingResetsPreviousMatch() 29 | { 30 | $parser = $this->getTemplatesParser(); 31 | $parser->parseFileContent(__DIR__ . '/test_txt_files/t0TemplateMatch.txt'); 32 | $this->assertEquals(13, $parser->getParseResults()->countResults()); 33 | 34 | $parser->parseFileContent(__DIR__ . '/test_txt_files/noMatch.txt'); 35 | $this->assertEmpty($parser->getParseResults()->getParsedRawData()); 36 | } 37 | 38 | public function testTextParsingSuccess() 39 | { 40 | $parser = $this->getTemplatesParser(); 41 | $parser->parseFileContent(__DIR__ . '/test_txt_files/t0TemplateMatch.txt'); 42 | $this->assertEquals(13, $parser->getParseResults()->countResults()); 43 | } 44 | 45 | public function testSimilarityCheckFalseSelectsFirstMatchTemplateRatherBestFit() 46 | { 47 | $parser = $this->getTemplatesParser(); 48 | $parseResults = $parser->parseFileContent(__DIR__ . '/test_txt_files/webFeedback.html'); 49 | $this->assertEquals(1, $parseResults->countResults()); 50 | $this->assertTrue($parseResults->keyExists('theWholeMessageMatch')); 51 | } 52 | 53 | public function testSimilarityCheckTrueSelectsBestFitTemplateRatherThanFirstMatch() 54 | { 55 | $parser = $this->getTemplatesParser(); 56 | $parseResults = $parser->parseFileContent( 57 | __DIR__ . '/test_txt_files/webFeedback.html', 58 | true 59 | ); 60 | $this->assertEquals(10, $parseResults->countResults()); 61 | $this->assertFalse($parseResults->keyExists('theWholeMessageMatch')); 62 | $this->assertEquals('Mozilla', $parseResults->get('browserCode')); 63 | } 64 | 65 | public function testTextParsingReturns() 66 | { 67 | $parser = $this->getTemplatesParser(); 68 | $parseResults = $parser->parseFileContent(__DIR__ . '/test_txt_files/t0TemplateMatch.txt'); 69 | 70 | //Make sure no html scripts are returned 71 | $this->assertEquals( 72 | $parseResults->get('country'), 73 | htmlspecialchars($parseResults->get('country')) 74 | ); 75 | //Make sure data is trimmed on return 76 | $this->assertEquals('2', $parseResults->get('children')); 77 | 78 | //Make sure data format and whitespaces are preserved 79 | $this->assertEquals( 80 | '11 - 10 - 2014', 81 | $parseResults->get('arrival_date') 82 | ); 83 | } 84 | 85 | public function testParseInvalidFileContentException() 86 | { 87 | $parser = $this->getTemplatesParser(); 88 | $this->expectException(InvalidParseFileException::class); 89 | $parser->parseFileContent(__DIR__ . '/test_txt_files/unknown.txt'); 90 | } 91 | 92 | private function getTemplatesParser(): TextParser 93 | { 94 | return new TextParser(__DIR__ . '/templates'); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /tests/templates/t1webFeedback.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {%theWholeMessageMatch%} 5 | 6 | -------------------------------------------------------------------------------- /tests/templates/t2webFeedback.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |

New Web App Bug / Feedback Received

5 |

Dear admin,

6 |

Here below are the details we gathered from the user regarding this feedback:

7 |

Browser Details:

8 | appCodeName: {%browserCode%}
9 | appName: {%browserName%}
10 | appVersion: {%browserVersion%}
11 | cookieEnabled: {%cookiesUsed%}
12 | platform: {%OS%}
13 | userAgent: {%agent%} 14 | {%agentLine2%}
15 | plugins: {%plugins%}
16 |
17 |

Report / Feedback:

18 | URL: {%feedbackUrl%}/
19 | User Note:: {%userFeedbackMessage%}
20 | 21 | 22 |

The Web App Team

23 |

Note: This is an automatic notification. Don't reply to this message

24 | 25 | -------------------------------------------------------------------------------- /tests/templates/t_0.txt: -------------------------------------------------------------------------------- 1 | Sent at {%created%} 2 | 3 |
4 |
5 | Hi {%name1%},
6 |
7 |
{%action%}


8 |
9 | 10 | 11 | Click here to view the booked tour: {%tour%}


E-mail address :{%mail1%}

Customer Name :{%myname%}

Country :{%country%}

Arrival Date : {%arrival_date%}

Departure Date : {%departure_date%}

Number of Adults : {%adults%}

Number of Children :{%children%}

Additional Requests :{%more_data%}

12 | -------------------------------------------------------------------------------- /tests/templates/t_1.txt: -------------------------------------------------------------------------------- 1 | Sent to customer service from {%source%} 2 | ========================================== 3 | Ref No. : {%reference%} 4 | E-mail address :{%mail1%} 5 | Phone No. :{%phone%} 6 | Name :{%customer_name%} 7 | Country :{%country%} 8 | Comment :{%more_data%} 9 | -------------------------------------------------------------------------------- /tests/test_txt_files/noMatch.txt: -------------------------------------------------------------------------------- 1 | A file that has no matching templates in there that should not return any data :S although it may contain a structure like 2 | 3 | Name: Test Name Goes Here 4 | Nationality: Test Nationality 5 | ..... 6 | .... 7 | ... 8 | ... 9 | .. 10 | . 11 | Comments: some text - some text - some text - some text - some text - some text 12 | -------------------------------------------------------------------------------- /tests/test_txt_files/t0TemplateMatch.txt: -------------------------------------------------------------------------------- 1 | Sent at Thursday 9th of October 2014 03:20:44 PM 2 | 3 |
4 |
5 | Hi Carlo ,
6 |
7 |
New Booking


8 |
9 | 10 | 11 | Click here to view the booked tour: Tour Name


E-mail address : mymail@test.com

Customer Name : Hello World

Country : Egypt

Arrival Date : 11 - 10 - 2014

Departure Date : 11 - 10 - 2014

Number of Adults : 4

Number of Children : 2

Additional Requests : Some more information goes here

12 | -------------------------------------------------------------------------------- /tests/test_txt_files/webFeedback.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |

New Web App Bug / Feedback Received

5 |

Dear admin,

6 |

Here below are the details we gathered from the user regarding this feedback:

7 |

Browser Details:

8 | appCodeName: Mozilla
9 | appName: Netscape
10 | appVersion: 5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36
11 | cookieEnabled: 1
12 | platform: Linux x86_64
13 | userAgent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 14 | Safari/537.36
15 | plugins: a:5:{i:0;s:15:"Shockwave Flash";i:1;s:28:"Chrome Remote Desktop Viewer";i:2;s:34:"Widevine Content 16 | Decryption Module";i:3;s:13:"Native Client";i:4;s:17:"Chrome PDF Viewer";}
17 |
18 |

Report / Feedback:

19 | URL: http://example.org/user/dashboard/
20 | User Note:: The logging out message alerts is not working
21 | 22 | 23 |

The Web App Team

24 |

Note: This is an automatic notification. Don't reply to this message

25 | 26 | --------------------------------------------------------------------------------