├── .github
└── workflows
│ └── php.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── composer.json
├── composer.lock
├── coveralls.yml
├── docker-compose.yml
├── docker
└── xdebug.ini
├── examples
├── run.php
├── templates
│ ├── t_0.txt
│ ├── t_1.txt
│ ├── t_2.txt
│ ├── t_3.txt
│ ├── t_4.txt
│ ├── t_5.txt
│ ├── t_6.txt
│ ├── t_7.txt
│ └── t_8.txt
└── test_txt_files
│ ├── m_0.txt
│ ├── m_1.txt
│ ├── m_2.txt
│ ├── m_3.txt
│ ├── m_4.txt
│ ├── m_5.txt
│ ├── m_6.txt
│ ├── m_7.txt
│ └── m_8.txt
├── phpunit.xml
├── src
├── Exception
│ ├── InvalidParseFileException.php
│ ├── InvalidParsedDataKeyException.php
│ ├── InvalidTemplatesDirectoryException.php
│ └── UnstructuredTextParserException.php
├── Helper
│ └── TemplatesHelper.php
├── ParseResult.php
└── TextParser.php
└── tests
├── Helper
├── TemplatesHelperTest.php
├── expected_templates
│ ├── temp1.txt
│ ├── temp2.txt
│ ├── temp3.txt
│ └── temp4.txt
└── helper_templates
│ ├── temp1.txt
│ ├── temp2.txt
│ ├── temp3.txt
│ └── temp4.txt
├── ParseResultTest.php
├── TextParserTest.php
├── templates
├── t1webFeedback.txt
├── t2webFeedback.txt
├── t_0.txt
└── t_1.txt
└── test_txt_files
├── noMatch.txt
├── t0TemplateMatch.txt
└── webFeedback.html
/.github/workflows/php.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 |
9 | permissions:
10 | contents: read
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 | strategy:
17 | matrix:
18 | php-versions: ['7.4', '8.0', '8.1', '8.2']
19 | steps:
20 | - uses: actions/checkout@v3
21 |
22 | - name: Validate composer.json and composer.lock
23 | run: composer validate --strict
24 |
25 | - name: Cache Composer packages
26 | id: composer-cache
27 | uses: actions/cache@v3
28 | with:
29 | path: vendor
30 | key: ${{ runner.os }}-php-${{ hashFiles('**/composer.lock') }}
31 | restore-keys: |
32 | ${{ runner.os }}-php-
33 | - name: Install dependencies
34 | run: composer install --prefer-dist --no-progress
35 |
36 | - name: Setup PHP
37 | uses: shivammathur/setup-php@v2
38 | with:
39 | php-version: ${{ matrix.php-versions }}
40 | - name: Run Tests
41 | run: composer run-script test
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # IDEA Ignores #
2 | ################
3 | *.iml
4 | *.ipr
5 | *.iws
6 | .idea/
7 | out/
8 | local.properties
9 |
10 | # Packages #
11 | ############
12 | *.7z
13 | *.dmg
14 | *.gz
15 | *.iso
16 | *.rar
17 | *.tar
18 | *.zip
19 | vendor/
20 |
21 | # Logs and databases #
22 | ######################
23 | log/
24 | *.log
25 | *.sql
26 | *.sqlite
27 |
28 | # OS files #
29 | ######################
30 | .DS_Store
31 | .DS_Store?
32 | ehthumbs.db
33 | Icon?
34 | Thumbs.db
35 |
36 | # Project Specific #
37 | ######################
38 | examples/Logs/*
39 |
40 | # Git Directories #
41 | ######################
42 | !empty
43 | !.gitkeep
44 |
45 |
46 | # PHP Unit #
47 | ######################
48 | .phpunit.result.cache
49 | /cache.properties
50 | tests/_reports/*
51 | build/logs/*
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG PHP_VERSION
2 | ARG ALPINE_VERSION=3.18
3 |
4 | FROM php:${PHP_VERSION}-cli-alpine${ALPINE_VERSION}
5 |
6 | ARG DOCKER_USER_ID=1001
7 | ARG DOCKER_GROUP_ID=1001
8 | ARG PHP_XDEBUG_VERSION
9 |
10 | # https://blog.codito.dev/2022/11/composer-binary-only-docker-images/
11 | COPY --from=composer/composer:2-bin /composer /usr/local/bin/composer
12 |
13 | RUN if ! getent group "${DOCKER_GROUP_ID}" > /dev/null; \
14 | then addgroup -S -g "${DOCKER_GROUP_ID}" devs; \
15 | fi \
16 | && if ! getent passwd "${DOCKER_USER_ID}" > /dev/null; \
17 | then adduser -S -u "${DOCKER_USER_ID}" -G "$(getent group "${DOCKER_GROUP_ID}" | awk -F: '{printf $1}')" dev; \
18 | fi \
19 | # php extensions
20 | && curl --location --output /usr/local/bin/install-php-extensions https://github.com/mlocati/docker-php-extension-installer/releases/latest/download/install-php-extensions \
21 | && chmod +x /usr/local/bin/install-php-extensions \
22 | && sync \
23 | && install-php-extensions \
24 | pcntl \
25 | xdebug-${PHP_XDEBUG_VERSION} \
26 | # xdebug command
27 | && curl --location --output /usr/local/bin/xdebug https://github.com/julienfalque/xdebug/releases/download/v2.0.0/xdebug \
28 | && chmod +x /usr/local/bin/xdebug
29 |
30 | COPY docker/xdebug.ini /usr/local/etc/php/conf.d/docker-php-ext-xdebug.ini
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2014 Ayman Reda Bedair
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Unstructured Text Parser [PHP]
2 | ===========================================
3 | [](https://github.com/aymanrb/php-unstructured-text-parser/actions/workflows/php.yml)
4 | [](https://coveralls.io/github/aymanrb/php-unstructured-text-parser?branch=master)
5 | [](https://packagist.org/packages/aymanrb/php-unstructured-text-parser)
6 | [](https://packagist.org/packages/aymanrb/php-unstructured-text-parser)
7 | [](https://packagist.org/packages/aymanrb/php-unstructured-text-parser)
8 |
9 | About Unstructured Text Parser
10 | ----------------------------------
11 | This is a small PHP library to help extract text out of documents that are not structured in a processing friendly format.
12 | When you want to parse text out of form generated emails for example you can create a template matching the expected incoming mail format
13 | while specifying the variable text elements and leave the rest for the class to extract your pre-formatted variables out of the incoming mails' body text.
14 |
15 | Useful when you want to parse data out of:
16 | * Emails generated from web forms
17 | * Documents with definable templates / expressions
18 |
19 | Installation
20 | ----------
21 | PHP Unstructured Text Parser is available on [Packagist](https://packagist.org/packages/aymanrb/php-unstructured-text-parser) (using semantic versioning), and installation via [Composer](https://getcomposer.org) is recommended.
22 | Add the following line to your `composer.json` file:
23 |
24 | ```json
25 | "aymanrb/php-unstructured-text-parser": "~2.0"
26 | ```
27 |
28 | or run
29 |
30 | ```sh
31 | composer require aymanrb/php-unstructured-text-parser
32 | ```
33 |
34 |
35 | [Usage example](https://github.com/aymanrb/php-unstructured-text-parser/blob/master/examples/run.php)
36 | ----------
37 | ```php
38 | parseText($textToParse);
47 | print_r($parseResults->getParsedRawData());
48 |
49 | //slower, performs a similarity check on available templates to select the most matching template before parsing
50 | print_r(
51 | $parser
52 | ->parseText($textToParse, true)
53 | ->getParsedRawData()
54 | );
55 | ```
56 |
57 | Parsing Procedure
58 | ----------
59 | 1- Grab a single copy of the text you want to parse.
60 |
61 | 2- Replace every single varying text within it to a named variable in the form of ``{%VariableName%}`` if you want to match
62 | everything in this part of text or ``{%VariableName:Pattern%}`` if you want to match a specific set of characters or use a more
63 | precise pattern.
64 |
65 | 3- Add the templates file into the templates directory (defined in parsing code) with a txt extension ``fileName.txt``
66 |
67 | 4- Pass the text you wish to parse to the parse method of the class and let it do the magic for you.
68 |
69 | Template Example
70 | ------------------------
71 | If the text documents you want to parse looks like this:
72 |
73 | ```
74 | Hello,
75 | If you wish to parse message coming from a website that states info like:
76 | ID & Source: 12234432 Website Form
77 | Name: Pet Cat
78 | E-Mail: email@example.com
79 | Comment: Some text goes here
80 |
81 | Thank You,
82 | Best Regards
83 | Admin
84 | ```
85 |
86 | Your Template file (``example_template.txt``) could be something like:
87 |
88 | ```
89 | Hello,
90 | If you wish to parse message coming from a website that states info like:
91 | ID & Source: {%id:[0-9]+%} {%source%}
92 | Name: {%senderName%}
93 | E-Mail: {%senderEmail%}
94 | Comment: {%comment%}
95 |
96 | Thank You,
97 | Best Regards
98 | Admin
99 | ```
100 |
101 | The output of a successful parsing job would be:
102 |
103 | ```
104 | Array(
105 | 'id' => '12234432',
106 | 'source' => 'Website Form',
107 | 'senderName' => 'Pet Cat',
108 | 'senderEmail' => 'email@example.com',
109 | 'comment' => 'Some text goes here'
110 | )
111 | ```
112 |
113 | Upgrading from v1.x to v2.x
114 | ------------------------
115 | Version 2.0 is more or less a refactored copy of version 1.x of the library and provides the exact same functionality.
116 | There is just one slight difference in the results returned. It's now a parsed data object instead of an array.
117 | To get the results as an array like it used to be in v1.x simply call "*getParsedRawData()*" on the returned object.
118 |
119 | ```php
120 | parseText($textToParse);
123 |
124 | //In 2.x you need to do the following if you want an array
125 | $extractedArray = $parser->parseText($textToParse)->getParsedRawData();
126 | ```
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "aymanrb/php-unstructured-text-parser",
3 | "description": "A PHP library to help extract text out of text documents",
4 | "keywords": [
5 | "text parser",
6 | "extract data",
7 | "php parser",
8 | "templates parsing",
9 | "regex parsing",
10 | "form parsing",
11 | "text parse"
12 | ],
13 | "config": {
14 | "platform": {
15 | "php": "7.4.0"
16 | }
17 | },
18 | "type": "library",
19 | "license": "MIT",
20 | "authors": [
21 | {
22 | "name": "Ayman R. Bedair",
23 | "email": "aymanrb@gmail.com",
24 | "homepage": "http://www.aymanrb.com",
25 | "role": "Developer"
26 | },
27 | {
28 | "name": "Pavel",
29 | "homepage": "http://www.aisamiery.ru",
30 | "role": "Developer"
31 | }
32 | ],
33 | "homepage": "https://github.com/aymanrb/php-unstructured-text-parser",
34 | "support": {
35 | "issues": "https://github.com/aymanrb/php-unstructured-text-parser/issues",
36 | "source": "https://github.com/aymanrb/php-unstructured-text-parser"
37 | },
38 | "autoload": {
39 | "psr-4": {
40 | "aymanrb\\UnstructuredTextParser\\": "src/"
41 | }
42 | },
43 | "autoload-dev": {
44 | "psr-4": {
45 | "aymanrb\\UnstructuredTextParser\\Tests\\": "tests/"
46 | }
47 | },
48 | "require": {
49 | "ext-json": "*",
50 | "php": ">=7.4.0",
51 | "psr/log": "^1.0.1 || ^2.0 || ^3.0"
52 | },
53 | "require-dev": {
54 | "phpunit/phpunit": "^8.4",
55 | "php-coveralls/php-coveralls": "^2.1"
56 | },
57 | "scripts": {
58 | "test": "phpunit tests"
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/coveralls.yml:
--------------------------------------------------------------------------------
1 | coverage_clover: build/logs/clover.xml
2 | json_path: build/logs/coveralls-upload.json
3 | service_name: travis-ci
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3.8'
2 |
3 | services:
4 | php-7.4: &php
5 | build:
6 | args:
7 | ALPINE_VERSION: "3.16"
8 | PHP_VERSION: "7.4"
9 | PHP_XDEBUG_VERSION: "3.1.2"
10 | working_dir: /app
11 | volumes:
12 | - .:/app
13 | environment:
14 | PHP_IDE_CONFIG: serverName=php-text-parser
15 | php-8.0:
16 | <<: *php
17 | build:
18 | args:
19 | PHP_VERSION: "8.0"
20 | php-8.1:
21 | <<: *php
22 | build:
23 | args:
24 | PHP_VERSION: "8.1"
25 | php-8.2:
26 | <<: *php
27 | build:
28 | args:
29 | PHP_VERSION: "8.2"
30 | PHP_XDEBUG_VERSION: "3.2.1"
--------------------------------------------------------------------------------
/docker/xdebug.ini:
--------------------------------------------------------------------------------
1 | ; XDebug 3 → https://xdebug.org/docs/upgrade_guide
2 | ; You can dynamically enable XDebug by setting XDEBUG_MODE env variable.
3 | ; Some options can be dynamically overridden with XDEBUG_CONFIG env variable.
4 | xdebug.mode = off
5 | xdebug.start_with_request = yes
6 | xdebug.discover_client_host = true
7 | xdebug.client_host = host.docker.internal
8 |
9 | ; Required so XDebug DOES NOT print warning "Could not connect to debugging client"
10 | xdebug.log = /app/docker/php/xdebug.log
11 | xdebug.log_level = 1
--------------------------------------------------------------------------------
/examples/run.php:
--------------------------------------------------------------------------------
1 | getExtension() === 'txt') {
13 | echo $txtFileObj->getFilename() . PHP_EOL;
14 |
15 | $parseResults = $parser->parseFileContent($txtFileObj->getPathname(), true);
16 |
17 | print_r($parseResults->getParsedRawData());
18 |
19 | if ($parseResults->getAppliedTemplateFile()) {
20 | echo 'Matched Template: ' . $parseResults->getAppliedTemplateFile() . PHP_EOL;
21 | }
22 | }
23 | }
24 | } catch (Exception $e) {
25 | echo $e->getMessage();
26 | }
27 |
--------------------------------------------------------------------------------
/examples/templates/t_0.txt:
--------------------------------------------------------------------------------
1 | Sent at {%created%}
2 |
3 |
4 |
5 | Hi {%name1%},
6 |
7 |
8 |
9 |
10 |
11 | Click here to view the booked tour: {%tour%}
E-mail address :{%mail1%}
Customer Name :{%myname%}
Country :{%country%}
Arrival Date : {%arrival_date%}
Departure Date : {%departure_date%}
Number of Adults : {%adults%}
Number of Children :{%children%}
Additional Requests :{%more_data%}
12 |
--------------------------------------------------------------------------------
/examples/templates/t_1.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Name |
4 | {%Name%} |
5 |
6 | UserEmail |
7 | {%Mail%} |
8 |
9 | Nationality |
10 | {%Nationality%} |
11 |
12 | UserPhone |
13 | {%Phone%} |
14 |
15 | Arrival |
16 | {%ArrivalDate%} |
17 |
18 | Departure |
19 | {%Departure%} |
20 |
21 | Adults |
22 | {%adults%} |
23 |
24 | Children |
25 | {%Children%} |
26 |
27 | Children Age |
28 | {%child%} |
29 |
30 | Comment |
31 | {%comment%} |
32 |
33 | Url Goal |
34 | {%goal%} |
35 |
36 | Ver Code |
37 | {%ver_code%} |
38 |
39 | Url |
40 | {%url%}
42 | |
43 |
44 | Ip Address |
45 | {%ip%} |
46 |
47 | Date & Time |
48 | {%created%} |
49 |
50 | Source |
51 | {%source%} |
52 |
53 | Http Referer |
54 | {%referer%} |
55 |
56 |
--------------------------------------------------------------------------------
/examples/templates/t_2.txt:
--------------------------------------------------------------------------------
1 | Hi Sir,
2 |
3 | New Booking ... information follows:
4 |
5 | Username: {%user%}
6 | Full Name: {%name%}
7 | E-Mail Address: {%email%}
8 | URL: {%url%}
9 | Nationality: {%nationality%}
10 | Phone: {%phone%}
11 | Comments: {%text%}
12 |
--------------------------------------------------------------------------------
/examples/templates/t_3.txt:
--------------------------------------------------------------------------------
1 |
3 |
4 |
5 | |
6 |
7 |
8 |
9 |
10 |
11 |
12 | |
14 |  |
18 | |
20 |
21 |
22 |
23 |
24 |
25 |
26 | |
28 |
29 |
30 |
31 |
32 | Question |
34 | Answer |
36 |
37 |
38 | Preferred Date
39 | of Arrival |
40 | {%arrival_date%} |
41 |
42 |
43 | Date of
44 | Departure |
45 | {%departure_date%} |
46 |
47 |
48 | Adults |
49 | {%adults%} |
50 |
51 |
52 | Children |
53 | {%children%} |
54 |
55 |
56 | Children's
57 | Age |
58 | {%cages%} |
59 |
60 |
61 | Preferred
62 | Hotel Class |
63 | {%class%} |
64 |
65 |
66 | Describe your
67 | tour. The more details the better |
68 | {%more_info%} |
69 |
70 |
71 | Full
72 | Name |
73 | {%name%} |
74 |
75 |
76 | Nationality |
78 | {%nationality%} |
79 |
80 |
81 | Phone
82 | Number |
83 | {%phone%} |
84 |
85 |
86 | E-mail |
87 | {%email%} |
88 |
89 |
90 | Confirm
91 | E-mail |
92 | {%emaila%} |
93 |
94 |
95 | Comments |
96 | {%comments%} |
97 |
98 |
99 |
100 | |
101 | |
103 |
104 |
105 | |
107 | |
109 | |
111 |
112 |
113 |
114 | |
115 |
116 |
117 | |
118 |
119 |
120 |
121 |
122 |
--------------------------------------------------------------------------------
/examples/templates/t_4.txt:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 | |
6 |
7 |
8 |
10 |
11 | |
13 |
15 |
18 | |
19 | |
21 |
22 |
23 |
25 |
26 | |
28 |
29 |
31 |
32 | Question |
34 | Answer |
36 |
37 | Date of Arrival |
39 | {%a%} |
41 |
42 | Adults |
44 | {%as%} |
46 |
47 | Children |
49 | {%cc%} |
51 |
52 | Children's age |
54 | {%ca%} |
56 |
57 | Full Name |
59 | {%sad%} |
60 |
61 | Phone Number |
63 | {%phone%} |
64 |
65 | Nationality |
67 | {%nb%} |
69 |
70 | E-mail |
72 | {%mail%} |
74 |
75 | Confirm E-mail |
77 | {%mjs%} |
79 |
80 | Your Comments |
82 | {%saas%} |
83 |
84 | url |
86 | {%url%} |
88 |
89 |
90 | |
91 | |
93 |
94 |
95 | |
97 | |
99 | |
101 |
102 | |
103 |
104 |
105 | |
106 |
107 |
108 |
109 |
--------------------------------------------------------------------------------
/examples/templates/t_5.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Date of arrival
6 | |
7 |
8 |
9 | |
10 |
11 | {%vt%}
12 | |
13 |
14 |
15 |
16 | Name
17 | |
18 |
19 |
20 | |
21 |
22 | {%vb%}
23 | |
24 |
25 |
26 |
27 | Nationality
28 | |
29 |
30 |
31 | |
32 |
33 | {%vfds%}
34 | |
35 |
36 |
37 |
38 | Phone
39 | |
40 |
41 |
42 | |
43 |
44 | {%vsac%}
45 | |
46 |
47 |
48 |
49 | Email
50 | |
51 |
52 |
53 | |
54 |
55 | {%v523%}
56 | |
57 |
58 |
59 |
60 | Adults
61 | |
62 |
63 |
64 | |
65 |
66 | {%adsa%}
67 | |
68 |
69 |
70 |
71 | Children
72 | |
73 |
74 |
75 | |
76 |
77 | {%c1%}
78 | |
79 |
80 |
81 |
82 | Tour Package
83 | |
84 |
85 |
86 | |
87 |
88 | {%url%}
89 | |
90 |
91 |
92 |
93 | IP Address
94 | |
95 |
96 |
97 | |
98 |
99 | {%ip%}
100 | |
101 |
102 |
103 | |
104 |
105 |
106 |
--------------------------------------------------------------------------------
/examples/templates/t_6.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
30 |
31 |
32 |
33 |
34 | Chat Transcript
35 |
36 |
37 | |
38 |
39 |
40 | Visitor: | {%name%} |
41 |
42 |
43 | Operator: | {%operator_name%} |
44 |
45 |
46 | Company: | {%source%} |
47 |
48 |
49 | Started: | {%start%} |
50 |
51 |
52 | Finished: | {%end%} |
53 |
54 |
55 | |
56 |
57 |
58 | {%data%}
59 |
60 |
61 |
--------------------------------------------------------------------------------
/examples/templates/t_7.txt:
--------------------------------------------------------------------------------
1 |
3 |
4 |
5 | |
6 |
7 |
8 |
9 |
10 |
11 |
12 | |
14 |  |
18 | |
20 |
21 |
22 |
23 |
24 |
25 |
26 | |
28 |
29 |
30 |
31 |
32 | Question |
34 | Answer |
36 |
37 |
38 | Preferred Date
39 | of Arrival |
40 | {%arrival_date%} |
41 |
42 |
43 | Date of
44 | Departure |
45 | {%departure_date%} |
46 |
47 |
48 | Adults |
49 | {%adults%} |
50 |
51 |
52 | Children |
53 | {%children%} |
54 |
55 |
56 | Children's
57 | Age |
58 | {%cages%} |
59 |
60 |
61 | Preferred
62 | Hotel Class |
63 | {%class%} |
64 |
65 |
66 | Describe your
67 | tour. The more details the better |
68 | {%more_info%} |
69 |
70 |
71 | Full
72 | Name |
73 | {%name%} |
74 |
75 |
76 | Nationality |
78 | {%nationality%} |
79 |
80 |
81 | Phone
82 | Number |
83 | {%phone%} |
84 |
85 |
86 | E-mail |
87 | {%email%} |
88 |
89 |
90 | Confirm
91 | E-mail |
92 | {%emaila%} |
93 |
94 |
95 | Comments |
96 | {%comments%} |
97 |
98 |
99 |
100 | |
101 | |
103 |
104 |
105 | |
107 | |
109 | |
111 |
112 |
113 |
114 | |
115 |
116 |
117 | |
118 |
119 |
120 |
121 |
122 |
--------------------------------------------------------------------------------
/examples/templates/t_8.txt:
--------------------------------------------------------------------------------
1 | ~ New message received ~
2 |
3 | Sender: {%id:[0-9]+%} {%name%}
4 | Sender-id: {%senderId%}
5 | Sender-full-name: {%senderName%}
6 | Sender-email: {%senderEmail%}
7 | Sender-website: {%senderSite%}
8 | Sender-number: {%senderPhone:\+[0-9]{10,15}%}
9 | Sender-nationality: {%senderCountry%}
10 | Message:
11 | {%senderMessage%}
--------------------------------------------------------------------------------
/examples/test_txt_files/m_0.txt:
--------------------------------------------------------------------------------
1 | Sent at Thursday 9th of October 2014 03:20:44 PM
2 |
3 |
4 |
5 | Hi Ayman ,
6 |
7 |
8 |
9 |
10 |
11 | Click here to view the booked tour: Tour Name
E-mail address : mymail@test.com
Customer Name : Hello World
Country : Egypt
Arrival Date : 11 - 10 - 2014
Departure Date : 11 - 10 - 2014
Number of Adults : 4
Number of Children : 2
Additional Requests : dsadsa
12 |
--------------------------------------------------------------------------------
/examples/test_txt_files/m_1.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Name |
4 | Client Name |
5 |
6 | UserEmail |
7 | test@example.com |
8 |
9 | Nationality |
10 | United States of America |
11 |
12 | UserPhone |
13 | 8885544555850 |
14 |
15 | Arrival |
16 | 13/08/2013 |
17 |
18 | Departure |
19 | 08/08/2013 |
20 |
21 | Adults |
22 | 2 |
23 |
24 | Children |
25 | No. Child |
26 |
27 | Children Age |
28 | Child Age |
29 |
30 | Comment |
31 | ssssss Test |
32 |
33 | Url Goal |
34 | cruise |
35 |
36 | Ver Code |
37 | 789456 |
38 |
39 | Url |
40 | http://www.example.com/something.php
42 | |
43 |
44 | Ip Address |
45 | 99.999.9999.99 |
46 |
47 | Date & Time |
48 | 2013-08-06 13:02:19 |
49 |
50 | Source |
51 | Direct Booking |
52 |
53 | Http Referer |
54 | http://www.example.com |
55 |
56 |
--------------------------------------------------------------------------------
/examples/test_txt_files/m_2.txt:
--------------------------------------------------------------------------------
1 | Hi Sir,
2 |
3 | New Booking ... information follows:
4 |
5 | Username: test
6 | Full Name: Eve Arron
7 | E-Mail Address: example@test.com
8 | URL: www.example.com/something
9 | Nationality: German
10 | Phone: 6655885524455
11 | Comments: Some Text Goes Here - Some Text Goes Here - Some Text Goes Here - Some Text Goes Here - Some Text Goes Here - Some Text Goes Here - Some Text Goes Here - Some Text Goes Here - Some Text Goes Here - Some Text Goes Here
12 | Some text goes here but will be ignored by the template since it's on a new line
--------------------------------------------------------------------------------
/examples/test_txt_files/m_3.txt:
--------------------------------------------------------------------------------
1 |
3 |
4 |
5 | |
6 |
7 |
8 |
9 |
10 |
11 |
12 | |
14 |  |
18 | |
20 |
21 |
22 |
23 |
24 |
25 |
26 | |
28 |
29 |
30 |
31 |
32 | Question |
34 | Answer |
36 |
37 |
38 | Preferred Date
39 | of Arrival |
40 | 4-22-2013 |
41 |
42 |
43 | Date of
44 | Departure |
45 | 4-29-2013 |
46 |
47 |
48 | Adults |
49 | 2 |
50 |
51 |
52 | Children |
53 | 3 |
54 |
55 |
56 | Children's
57 | Age |
58 | 3,3,4 |
59 |
60 |
61 | Preferred
62 | Hotel Class |
63 | 5★ Standard |
64 |
65 |
66 | Describe your
67 | tour. The more details the better |
68 | We would like to book |
69 |
70 |
71 | Full
72 | Name |
73 | My Name Test |
74 |
75 |
76 | Nationality |
78 | United States |
79 |
80 |
81 | Phone
82 | Number |
83 | 98855448888554 |
84 |
85 |
86 | E-mail |
87 | 321tinker@gmail.com |
88 |
89 |
90 | Confirm
91 | E-mail |
92 | ayss@gmail.com |
93 |
94 |
95 | Comments |
96 | Test some more information |
97 |
98 |
99 |
100 | |
101 | |
103 |
104 |
105 | |
107 | |
109 | |
111 |
112 |
113 |
114 | |
115 |
116 |
117 | |
118 |
119 |
120 |
121 |
122 |
--------------------------------------------------------------------------------
/examples/test_txt_files/m_4.txt:
--------------------------------------------------------------------------------
1 |
3 |
5 |
6 | |
7 |
8 |
9 |
11 |
12 | |
14 |
16 |
19 | |
20 | |
22 |
23 |
24 |
26 |
27 | |
29 |
30 |
32 |
33 | Question |
35 | Answer |
37 |
38 | Date of Arrival |
40 | 18-03-2014 |
42 |
43 | Adults |
45 | 2 |
47 |
48 | Children |
50 | 0 |
52 |
53 | Children's age |
55 | |
57 |
58 | Full Name |
60 | John
61 | Williams |
62 |
63 | Phone Number |
65 | ()
66 | |
67 |
68 | Nationality |
70 | United States |
72 |
73 | E-mail |
75 | somemail@yahoo.com |
77 |
78 | Confirm E-mail |
80 | somemail@yahoo.com |
82 |
83 | Your Comments |
85 | we would like to .... |
86 |
87 | url |
89 | http://www.domain.com/test/yus |
91 |
92 |
93 | |
94 | |
96 |
97 |
98 | |
100 | |
102 | |
104 |
105 | |
106 |
107 |
108 | |
109 |
110 |
111 |
112 |
--------------------------------------------------------------------------------
/examples/test_txt_files/m_5.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Date of arrival
6 | |
7 |
8 |
9 | |
10 |
11 | 06/02/2015
12 | |
13 |
14 |
15 |
16 | Name
17 | |
18 |
19 |
20 | |
21 |
22 | Full Name of New Client
23 | |
24 |
25 |
26 |
27 | Nationality
28 | |
29 |
30 |
31 | |
32 |
33 | United Kingdom
34 | |
35 |
36 |
37 |
38 | Phone
39 | |
40 |
41 |
42 | |
43 |
44 | 07780918266
45 | |
46 |
47 |
48 |
49 | Email
50 | |
51 |
52 |
53 | |
54 |
55 | test@test.com
56 | |
57 |
58 |
59 |
60 | Adults
61 | |
62 |
63 |
64 | |
65 |
66 | 2
67 | |
68 |
69 |
70 |
71 | Children
72 | |
73 |
74 |
75 | |
76 |
77 | 0
78 | |
79 |
80 |
81 |
82 | Tour Package
83 | |
84 |
85 |
86 | |
87 |
88 | http://www.test.com/test/
89 | |
90 |
91 |
92 |
93 | IP Address
94 | |
95 |
96 |
97 | |
98 |
99 | 99.999.999.999
100 | |
101 |
102 |
103 | |
104 |
105 |
106 |
--------------------------------------------------------------------------------
/examples/test_txt_files/m_6.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
30 |
31 |
32 |
33 |
34 | Chat Transcript
35 |
36 |
37 | |
38 |
39 |
40 | Visitor: | Jue |
41 |
42 |
43 | Operator: | Chat Person |
44 |
45 |
46 | Company: | chat.com |
47 |
48 |
49 | Started: | 12-Oct-2014 11:43:45 PM |
50 |
51 |
52 | Finished: | 13-Oct-2014 12:12:51 AM |
53 |
54 |
55 | |
56 |
57 |
58 |
11:55:49 PM: Client:
sounds good!
59 |
11:56:10 PM: Chat Person:
May I have your phone number ?
60 |
12:02:48 AM: 
 | | Chat Person is now off-line and may not reply. Currently in room: Client. |

61 |
12:03:49 AM: 
 | | Client is now off-line and may not reply. Currently in room: room is empty. |

62 |
63 |
64 |
65 | Visitor Details
66 |
67 |
68 |
69 |
70 |
71 | Name: | Client |
72 |
73 |
74 | E-Mail: | client@gmail.com |
75 |
76 |
77 | Department: | Planning |
78 |
79 |
80 | Called From: | http://www.myurl.com/page.html |
81 |
82 |
83 | IP Address: | 66.666.66.123 |
84 |
85 |
86 | Host Name: | 98.234.53.159 |
87 |
88 |
89 | Referrer: | http://www.google.com |
90 |
91 |
92 | Browser/OS: | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36 |
93 |
94 |
95 | Location: | California, United States |
96 |
97 |
98 | | |
99 |
100 |
101 |
102 |
103 |
104 |
105 | This transcript email message was automatically generated by ServiceProvider
106 |
107 |
108 |
--------------------------------------------------------------------------------
/examples/test_txt_files/m_7.txt:
--------------------------------------------------------------------------------
1 |
3 |
4 |
5 | |
6 |
7 |
8 |
9 |
10 |
11 |
12 | |
14 |  |
18 | |
20 |
21 |
22 |
23 |
24 |
25 |
26 | |
28 |
29 |
30 |
31 |
32 | Question |
34 | Answer |
36 |
37 |
38 | Preferred Date
39 | of Arrival |
40 | 4-22-2013 |
41 |
42 |
43 | Date of
44 | Departure |
45 | 4-29-2013 |
46 |
47 |
48 | Adults |
49 | 2 |
50 |
51 |
52 | Children |
53 | 3 |
54 |
55 |
56 | Children's
57 | Age |
58 | 3,3,4 |
59 |
60 |
61 | Preferred
62 | Hotel Class |
63 | 5★ Standard |
64 |
65 |
66 | Describe your
67 | tour. The more details the better |
68 | We would like to book |
69 |
70 |
71 | Full
72 | Name |
73 | My Name Test |
74 |
75 |
76 | Nationality |
78 | United States |
79 |
80 |
81 | Phone
82 | Number |
83 | 98855448888554 |
84 |
85 |
86 | E-mail |
87 | 321tinker@gmail.com |
88 |
89 |
90 | Confirm
91 | E-mail |
92 | ayss@gmail.com |
93 |
94 |
95 | Comments |
96 | Test some more information |
97 |
98 |
99 |
100 | |
101 | |
103 |
104 |
105 | |
107 | |
109 | |
111 |
112 |
113 |
114 | |
115 |
116 |
117 | |
118 |
119 |
120 |
121 |
122 |
--------------------------------------------------------------------------------
/examples/test_txt_files/m_8.txt:
--------------------------------------------------------------------------------
1 | ~ New message received ~
2 |
3 | Sender: 12345678 John Anthony Doe
4 | Sender-id: 12345678
5 | Sender-full-name: John Anthony Doe
6 | Sender-email: example@test.com
7 | Sender-website: www.example.com/something
8 | Sender-number: +4917914999410
9 | Sender-nationality: N/A
10 | Message:
11 | Some Text Goes Here - Some Text Goes Here
--------------------------------------------------------------------------------
/phpunit.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
8 |
9 |
10 | ./tests/
11 |
12 |
13 |
14 |
15 | ./src
16 |
17 | ./src/Exception
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/src/Exception/InvalidParseFileException.php:
--------------------------------------------------------------------------------
1 | Pattern)
14 | private const REGEX_ORPHAN_BACKSLASH = '/(?.*)'; //(?.*)
18 | private const REPLACE_VARIABLE_WITH_PATTERN = '(?<$1>$2)'; //(?Pattern)
19 |
20 | private \FilesystemIterator $directoryIterator;
21 |
22 | public function __construct(string $templatesDir)
23 | {
24 | $this->directoryIterator = $this->createTemplatesDirIterator($templatesDir);
25 | }
26 |
27 | public function getTemplates(string $text, bool $findMatchingTemplate = false): array
28 | {
29 | if ($findMatchingTemplate) {
30 | return $this->findTemplate($text);
31 | }
32 |
33 | return $this->getAllValidTemplates();
34 | }
35 |
36 | private function createTemplatesDirIterator(string $iterableDirectoryPath): \FilesystemIterator
37 | {
38 | if (empty($iterableDirectoryPath) || !is_dir($iterableDirectoryPath)) {
39 | throw new InvalidTemplatesDirectoryException(
40 | 'Invalid templates directory provided'
41 | );
42 | }
43 |
44 | return new \FilesystemIterator(rtrim($iterableDirectoryPath, '/'));
45 | }
46 |
47 | private function findTemplate(string $text): array
48 | {
49 | $matchedTemplate = [];
50 | $maxMatch = -1;
51 |
52 | foreach ($this->directoryIterator as $fileInfo) {
53 | $templateContent = file_get_contents($fileInfo->getPathname());
54 |
55 | // compare template against text to decide on similarity percentage
56 | similar_text($text, $templateContent, $matchPercentage);
57 |
58 | if ($matchPercentage > $maxMatch) {
59 | $maxMatch = $matchPercentage;
60 | $matchedTemplate = [$fileInfo->getPathname() => $this->prepareTemplate($templateContent)];
61 | }
62 | }
63 |
64 | return $matchedTemplate;
65 | }
66 |
67 | private function getAllValidTemplates(): array
68 | {
69 | $templates = [];
70 | foreach ($this->directoryIterator as $fileInfo) {
71 | if (!is_file($fileInfo->getPathname())) {
72 | continue;
73 | }
74 |
75 | $templateContent = file_get_contents($fileInfo->getPathname());
76 | $templates[$fileInfo->getPathname()] = $this->prepareTemplate($templateContent);
77 | }
78 |
79 | krsort($templates);
80 |
81 | return $templates;
82 | }
83 |
84 | private function prepareTemplate(string $templateText): string
85 | {
86 | $templateText = preg_quote($templateText, '/');
87 |
88 | $templateText = preg_replace(
89 | self::REGEX_VARIABLE_WITH_PATTERN,
90 | self::REPLACE_VARIABLE_WITH_PATTERN,
91 | $templateText
92 | );
93 |
94 | $templateText = preg_replace_callback(
95 | self::REGEX_PREPARED_VARIABLE_WITH_PATTERN,
96 | function ($matches) {
97 | $variableWithPattern = preg_replace(self::REGEX_ORPHAN_BACKSLASH, '', $matches[0]);
98 |
99 | return str_replace(self::STR_SEARCH_TRIPLE_BACKSLASHES, '\\', $variableWithPattern);
100 | },
101 | $templateText
102 | );
103 |
104 | return preg_replace(
105 | self::REGEX_GENERIC_VARIABLE,
106 | self::REPLACE_GENERIC_VARIABLE,
107 | $templateText
108 | );
109 | }
110 | }
--------------------------------------------------------------------------------
/src/ParseResult.php:
--------------------------------------------------------------------------------
1 | parsedRawData;
18 | }
19 |
20 | public function setParsedRawData(array $parsedRawData): void
21 | {
22 | $this->parsedRawData = $parsedRawData;
23 | $this->cleanData();
24 | }
25 |
26 | public function getAppliedTemplateFile(): ?string
27 | {
28 | return $this->appliedTemplateFile;
29 | }
30 |
31 | public function setAppliedTemplateFile(string $appliedTemplateFile): void
32 | {
33 | $this->appliedTemplateFile = $appliedTemplateFile;
34 | }
35 |
36 | public function countResults(): int
37 | {
38 | return count($this->parsedRawData);
39 | }
40 |
41 | public function keyExists(string $key): bool
42 | {
43 | return array_key_exists($key, $this->parsedRawData);
44 | }
45 |
46 | public function get(string $resultDataKey, bool $failOnUndefinedKey = false): ?string
47 | {
48 | if (!$this->keyExists($resultDataKey)) {
49 | if (!$failOnUndefinedKey) {
50 | return null;
51 | }
52 |
53 | throw new InvalidParsedDataKeyException('Undefined results key: ' . $resultDataKey);
54 | }
55 |
56 | return $this->parsedRawData[$resultDataKey];
57 | }
58 |
59 | private function cleanData(): void
60 | {
61 | foreach ($this->parsedRawData as $key => $value) {
62 | $this->parsedRawData[$key] = $this->cleanElement($value);
63 | }
64 | }
65 |
66 | private function cleanElement(string $value): string
67 | {
68 | return trim(strip_tags($value));
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/src/TextParser.php:
--------------------------------------------------------------------------------
1 | setLogger($logger);
28 | $this->templatesHelper = new TemplatesHelper($templatesDir);
29 | $this->resetParseResults();
30 | }
31 |
32 | public function parseFileContent(string $filePath, bool $findMatchingTemplate = false): ParseResult
33 | {
34 | if (!is_file($filePath)) {
35 | throw new InvalidParseFileException($filePath);
36 | }
37 |
38 | return $this->parseText(file_get_contents($filePath), $findMatchingTemplate);
39 | }
40 |
41 | public function parseText(string $text, bool $findMatchingTemplate = false): ParseResult
42 | {
43 | $this->resetParseResults();
44 |
45 | $parsableTemplates = $this->templatesHelper->getTemplates($text, $findMatchingTemplate);
46 |
47 | foreach ($parsableTemplates as $templatePath => $templatePattern) {
48 | $this->logger->debug(sprintf('Parsing against template: %s', $templatePath));
49 |
50 | if ($this->extractData($text, $templatePattern)) {
51 | $this->parseResults->setAppliedTemplateFile($templatePath);
52 | }
53 | }
54 |
55 | $this->logger->info(sprintf('Data extracted: %s', json_encode($this->parseResults->getParsedRawData())));
56 |
57 | return $this->parseResults;
58 | }
59 |
60 | public function getParseResults(): ParseResult
61 | {
62 | return $this->parseResults;
63 | }
64 |
65 | private function extractData(string $text, string $template): bool
66 | {
67 | //Extract the text based on the provided template using REGEX
68 | preg_match('/' . $template . '/s', $text, $matches);
69 |
70 | //Extract only the named parameters from the matched regex array
71 | $keys = array_filter(array_keys($matches), 'is_string');
72 | $matches = array_intersect_key($matches, array_flip($keys));
73 |
74 | if (empty($matches)) {
75 | return false;
76 | }
77 |
78 | $this->parseResults->setParsedRawData($matches);
79 |
80 | return true;
81 | }
82 |
83 | private function resetParseResults(): void
84 | {
85 | $this->parseResults = new ParseResult();
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/tests/Helper/TemplatesHelperTest.php:
--------------------------------------------------------------------------------
1 | expectException(InvalidTemplatesDirectoryException::class);
17 | new TemplatesHelper(__DIR__ . '/DirectoryThatDoesNotExist');
18 | }
19 |
20 | private function getTemplatesHelperInstance()
21 | {
22 | return new TemplatesHelper(__DIR__ . self::DIR_HELPER_TEMPLATES);
23 | }
24 |
25 | public function testGetAllTemplates()
26 | {
27 | $templatesHelper = $this->getTemplatesHelperInstance();
28 | $expectedTemplatesCount = count(glob(__DIR__ . self::DIR_HELPER_TEMPLATES . "/*"));
29 |
30 | $returnedTemplates = $templatesHelper->getTemplates('regardless of what comes here');
31 | $this->assertCount($expectedTemplatesCount, $returnedTemplates);
32 | }
33 |
34 | public function testGetAllTemplatesRegexIsPrepared()
35 | {
36 | $templatesHelper = $this->getTemplatesHelperInstance();
37 |
38 | $returnedTemplates = $templatesHelper->getTemplates('regardless of what comes here');
39 | $this->assertTrue($this->checkPreparedTemplates($returnedTemplates));
40 | }
41 |
42 | public function testGetMostMatchingTemplateToText()
43 | {
44 | $templatesHelper = $this->getTemplatesHelperInstance();
45 |
46 | $returnedTemplates = $templatesHelper->getTemplates('Sent to customer service from Someone', true);
47 | $this->assertCount(1, $returnedTemplates);
48 | }
49 |
50 | public function testGetMostMatchingTemplateToTextRegexIsPrepared()
51 | {
52 | $templatesHelper = $this->getTemplatesHelperInstance();
53 |
54 | $returnedTemplates = $templatesHelper->getTemplates('Sent to customer service from Someone', true);
55 | $this->assertTrue($this->checkPreparedTemplates($returnedTemplates));
56 | }
57 |
58 | private function checkPreparedTemplates(array $templatesArray): bool
59 | {
60 | foreach ($templatesArray as $templatePath => $template) {
61 | $expectedTemplate = $this->getExpectedTemplate($templatePath);
62 |
63 | $this->assertEquals($expectedTemplate, $template);
64 | $this->assertTrue($this->isValidRegex($template));
65 | }
66 |
67 | return true;
68 | }
69 |
70 | private function isValidRegex(string $pattern): bool
71 | {
72 | try {
73 | preg_match('/' . $pattern . '/s', '');
74 | } catch (\Throwable $exception) {
75 | return false;
76 | }
77 |
78 | return true;
79 | }
80 |
81 | private function getExpectedTemplate(string $templatePath)
82 | {
83 | return file_get_contents(
84 | str_replace(
85 | self::DIR_HELPER_TEMPLATES,
86 | self::DIR_EXPECTED_TEMPLATES,
87 | $templatePath
88 | )
89 | );
90 | }
91 |
92 | }
93 |
--------------------------------------------------------------------------------
/tests/Helper/expected_templates/temp1.txt:
--------------------------------------------------------------------------------
1 | \Simple Template 01 (?.*)\<\/htmlTag\>
2 |
3 | Multi Line
--------------------------------------------------------------------------------
/tests/Helper/expected_templates/temp2.txt:
--------------------------------------------------------------------------------
1 | Simple Template 01 (?.*)
--------------------------------------------------------------------------------
/tests/Helper/expected_templates/temp3.txt:
--------------------------------------------------------------------------------
1 | Template with specified pattern (?[0-9]+)
--------------------------------------------------------------------------------
/tests/Helper/expected_templates/temp4.txt:
--------------------------------------------------------------------------------
1 | Template with specified pattern containing special escaped characters (?\+[0-9]{10,16})
--------------------------------------------------------------------------------
/tests/Helper/helper_templates/temp1.txt:
--------------------------------------------------------------------------------
1 | Simple Template 01 {%variable%}
2 |
3 | Multi Line
--------------------------------------------------------------------------------
/tests/Helper/helper_templates/temp2.txt:
--------------------------------------------------------------------------------
1 | Simple Template 01 {%variable%}
--------------------------------------------------------------------------------
/tests/Helper/helper_templates/temp3.txt:
--------------------------------------------------------------------------------
1 | Template with specified pattern {%variable:[0-9]+%}
--------------------------------------------------------------------------------
/tests/Helper/helper_templates/temp4.txt:
--------------------------------------------------------------------------------
1 | Template with specified pattern containing special escaped characters {%variable:\+[0-9]{10,16}%}
--------------------------------------------------------------------------------
/tests/ParseResultTest.php:
--------------------------------------------------------------------------------
1 | setParsedRawData(
19 | [
20 | 'resultsKey' => 'resultsValue ',
21 | 'foundKey' => 'parsedContent SometextInBold',
22 | 'date' => '2019-01-01',
23 | 'time' => ' 20:11',
24 | ]
25 | );
26 | }
27 |
28 | return $parseResult;
29 | }
30 |
31 | public function testGetParsedRawData()
32 | {
33 | $parseResult = $this->getNewParseResultObject();
34 |
35 | $this->assertIsArray($parseResult->getParsedRawData());
36 | $this->assertEmpty($parseResult->getParsedRawData());
37 | }
38 |
39 | public function testSetParsedRawData()
40 | {
41 | $parseResult = $this->getNewParseResultObject(true);
42 |
43 | $this->assertIsArray($parseResult->getParsedRawData());
44 | $this->assertNotEmpty($parseResult->getParsedRawData());
45 | $this->assertArrayHasKey('resultsKey', $parseResult->getParsedRawData());
46 | $this->assertArrayHasKey('foundKey', $parseResult->getParsedRawData());
47 | }
48 |
49 | public function testSetParsedRawDataCleansContent()
50 | {
51 | $parseResult = $this->getNewParseResultObject(true);
52 |
53 | $resultsArray = $parseResult->getParsedRawData();
54 |
55 | $this->assertEquals('resultsValue', $resultsArray['resultsKey']);
56 | $this->assertEquals('parsedContent SometextInBold', $resultsArray['foundKey']);
57 | $this->assertEquals('20:11', $resultsArray['time']);
58 | }
59 |
60 | public function testAppliedTemplateFileSetterAndGetter()
61 | {
62 | $parseResult = $this->getNewParseResultObject();
63 |
64 | $this->assertEmpty($parseResult->getAppliedTemplateFile());
65 |
66 | $matchedTemplatePath = 'path/to/matched/Template.txt';
67 | $parseResult->setAppliedTemplateFile($matchedTemplatePath);
68 |
69 | $this->assertNotEmpty($parseResult->getAppliedTemplateFile());
70 | $this->assertEquals($matchedTemplatePath, $parseResult->getAppliedTemplateFile());
71 | }
72 |
73 | public function testCountResults()
74 | {
75 | $parseResult = $this->getNewParseResultObject(true);
76 |
77 | $this->assertEquals(4, $parseResult->countResults());
78 | }
79 |
80 | public function testKeyExists()
81 | {
82 | $parseResult = $this->getNewParseResultObject(true);
83 |
84 | $this->assertTrue($parseResult->keyExists('foundKey'));
85 | $this->assertTrue($parseResult->keyExists('resultsKey'));
86 | $this->assertFalse($parseResult->keyExists('AKeyWeNeverFound'));
87 | }
88 |
89 | public function testGetResultKey()
90 | {
91 | $parseResult = $this->getNewParseResultObject(true);
92 | $this->assertEquals('resultsValue', $parseResult->get('resultsKey'));
93 | $this->assertEquals('parsedContent SometextInBold', $parseResult->get('foundKey'));
94 | $this->assertEquals('2019-01-01', $parseResult->get('date'));
95 | $this->assertEquals('20:11', $parseResult->get('time'));
96 | $this->assertNull($parseResult->get('AKeyWeNeverFound'));
97 | }
98 |
99 | public function testStrictGetResultKeyThrowsInvalidKeyException()
100 | {
101 | $parseResult = $this->getNewParseResultObject(true);
102 | $this->assertEquals('resultsValue', $parseResult->get('resultsKey', true));
103 |
104 |
105 | $this->expectException(InvalidParsedDataKeyException::class);
106 | $parseResult->get('AKeyWeNeverFound', true);
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/tests/TextParserTest.php:
--------------------------------------------------------------------------------
1 | expectException(InvalidTemplatesDirectoryException::class);
17 | new TextParser(__DIR__ . '/DirectoryThatDoesNotExist');
18 | }
19 |
20 | public function testTextParsingFailure()
21 | {
22 | $parser = $this->getTemplatesParser();
23 | $parser->parseText('Some Text that can not be matched against a template');
24 |
25 | $this->assertEmpty($parser->getParseResults()->getParsedRawData());
26 | }
27 |
28 | public function testTextParsingResetsPreviousMatch()
29 | {
30 | $parser = $this->getTemplatesParser();
31 | $parser->parseFileContent(__DIR__ . '/test_txt_files/t0TemplateMatch.txt');
32 | $this->assertEquals(13, $parser->getParseResults()->countResults());
33 |
34 | $parser->parseFileContent(__DIR__ . '/test_txt_files/noMatch.txt');
35 | $this->assertEmpty($parser->getParseResults()->getParsedRawData());
36 | }
37 |
38 | public function testTextParsingSuccess()
39 | {
40 | $parser = $this->getTemplatesParser();
41 | $parser->parseFileContent(__DIR__ . '/test_txt_files/t0TemplateMatch.txt');
42 | $this->assertEquals(13, $parser->getParseResults()->countResults());
43 | }
44 |
45 | public function testSimilarityCheckFalseSelectsFirstMatchTemplateRatherBestFit()
46 | {
47 | $parser = $this->getTemplatesParser();
48 | $parseResults = $parser->parseFileContent(__DIR__ . '/test_txt_files/webFeedback.html');
49 | $this->assertEquals(1, $parseResults->countResults());
50 | $this->assertTrue($parseResults->keyExists('theWholeMessageMatch'));
51 | }
52 |
53 | public function testSimilarityCheckTrueSelectsBestFitTemplateRatherThanFirstMatch()
54 | {
55 | $parser = $this->getTemplatesParser();
56 | $parseResults = $parser->parseFileContent(
57 | __DIR__ . '/test_txt_files/webFeedback.html',
58 | true
59 | );
60 | $this->assertEquals(10, $parseResults->countResults());
61 | $this->assertFalse($parseResults->keyExists('theWholeMessageMatch'));
62 | $this->assertEquals('Mozilla', $parseResults->get('browserCode'));
63 | }
64 |
65 | public function testTextParsingReturns()
66 | {
67 | $parser = $this->getTemplatesParser();
68 | $parseResults = $parser->parseFileContent(__DIR__ . '/test_txt_files/t0TemplateMatch.txt');
69 |
70 | //Make sure no html scripts are returned
71 | $this->assertEquals(
72 | $parseResults->get('country'),
73 | htmlspecialchars($parseResults->get('country'))
74 | );
75 | //Make sure data is trimmed on return
76 | $this->assertEquals('2', $parseResults->get('children'));
77 |
78 | //Make sure data format and whitespaces are preserved
79 | $this->assertEquals(
80 | '11 - 10 - 2014',
81 | $parseResults->get('arrival_date')
82 | );
83 | }
84 |
85 | public function testParseInvalidFileContentException()
86 | {
87 | $parser = $this->getTemplatesParser();
88 | $this->expectException(InvalidParseFileException::class);
89 | $parser->parseFileContent(__DIR__ . '/test_txt_files/unknown.txt');
90 | }
91 |
92 | private function getTemplatesParser(): TextParser
93 | {
94 | return new TextParser(__DIR__ . '/templates');
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/tests/templates/t1webFeedback.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | {%theWholeMessageMatch%}
5 |
6 |
--------------------------------------------------------------------------------
/tests/templates/t2webFeedback.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | New Web App Bug / Feedback Received
5 | Dear admin,
6 | Here below are the details we gathered from the user regarding this feedback:
7 | Browser Details:
8 | appCodeName: {%browserCode%}
9 | appName: {%browserName%}
10 | appVersion: {%browserVersion%}
11 | cookieEnabled: {%cookiesUsed%}
12 | platform: {%OS%}
13 | userAgent: {%agent%}
14 | {%agentLine2%}
15 | plugins: {%plugins%}
16 |
17 | Report / Feedback:
18 | URL: {%feedbackUrl%}/
19 | User Note:: {%userFeedbackMessage%}
20 |
21 |
22 | The Web App Team
23 | Note: This is an automatic notification. Don't reply to this message
24 |
25 |
--------------------------------------------------------------------------------
/tests/templates/t_0.txt:
--------------------------------------------------------------------------------
1 | Sent at {%created%}
2 |
3 |
4 |
5 | Hi {%name1%},
6 |
7 |
8 |
9 |
10 |
11 | Click here to view the booked tour: {%tour%}
E-mail address :{%mail1%}
Customer Name :{%myname%}
Country :{%country%}
Arrival Date : {%arrival_date%}
Departure Date : {%departure_date%}
Number of Adults : {%adults%}
Number of Children :{%children%}
Additional Requests :{%more_data%}
12 |
--------------------------------------------------------------------------------
/tests/templates/t_1.txt:
--------------------------------------------------------------------------------
1 | Sent to customer service from {%source%}
2 | ==========================================
3 | Ref No. : {%reference%}
4 | E-mail address :{%mail1%}
5 | Phone No. :{%phone%}
6 | Name :{%customer_name%}
7 | Country :{%country%}
8 | Comment :{%more_data%}
9 |
--------------------------------------------------------------------------------
/tests/test_txt_files/noMatch.txt:
--------------------------------------------------------------------------------
1 | A file that has no matching templates in there that should not return any data :S although it may contain a structure like
2 |
3 | Name: Test Name Goes Here
4 | Nationality: Test Nationality
5 | .....
6 | ....
7 | ...
8 | ...
9 | ..
10 | .
11 | Comments: some text - some text - some text - some text - some text - some text
12 |
--------------------------------------------------------------------------------
/tests/test_txt_files/t0TemplateMatch.txt:
--------------------------------------------------------------------------------
1 | Sent at Thursday 9th of October 2014 03:20:44 PM
2 |
3 |
4 |
5 | Hi Carlo ,
6 |
7 |
8 |
9 |
10 |
11 | Click here to view the booked tour: Tour Name
E-mail address : mymail@test.com
Customer Name : Hello World
Country : Egypt
Arrival Date : 11 - 10 - 2014
Departure Date : 11 - 10 - 2014
Number of Adults : 4
Number of Children : 2
Additional Requests : Some more information goes here
12 |
--------------------------------------------------------------------------------
/tests/test_txt_files/webFeedback.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | New Web App Bug / Feedback Received
5 | Dear admin,
6 | Here below are the details we gathered from the user regarding this feedback:
7 | Browser Details:
8 | appCodeName: Mozilla
9 | appName: Netscape
10 | appVersion: 5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36
11 | cookieEnabled: 1
12 | platform: Linux x86_64
13 | userAgent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95
14 | Safari/537.36
15 | plugins: a:5:{i:0;s:15:"Shockwave Flash";i:1;s:28:"Chrome Remote Desktop Viewer";i:2;s:34:"Widevine Content
16 | Decryption Module";i:3;s:13:"Native Client";i:4;s:17:"Chrome PDF Viewer";}
17 |
18 | Report / Feedback:
19 | URL: http://example.org/user/dashboard/
20 | User Note:: The logging out message alerts is not working
21 |
22 |
23 | The Web App Team
24 | Note: This is an automatic notification. Don't reply to this message
25 |
26 |
--------------------------------------------------------------------------------