├── src ├── enums │ ├── FileProcessingModeEnum.php │ ├── SortEnum.php │ ├── AbstractEnum.php │ └── DatatypeEnum.php ├── extensions │ └── DatatypeTrait.php └── Csv.php ├── parsecsv.lib.php ├── License.txt ├── composer.json ├── .github └── workflows │ └── phpunit.yml └── README.md /src/enums/FileProcessingModeEnum.php: -------------------------------------------------------------------------------- 1 | SORT_REGULAR, 18 | self::SORT_TYPE_STRING => SORT_STRING, 19 | self::SORT_TYPE_NUMERIC => SORT_NUMERIC, 20 | ); 21 | 22 | public static function getSorting($type) { 23 | if (array_key_exists($type, self::$sorting)) { 24 | return self::$sorting[$type]; 25 | } 26 | 27 | return self::$sorting[self::__DEFAULT]; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /parsecsv.lib.php: -------------------------------------------------------------------------------- 1 | isValid($value)) { 16 | throw new \UnexpectedValueException("Value '$value' is not part of the enum " . get_called_class()); 17 | } 18 | $this->value = $value; 19 | } 20 | 21 | public static function getConstants() { 22 | $class = get_called_class(); 23 | $reflection = new \ReflectionClass($class); 24 | 25 | return $reflection->getConstants(); 26 | } 27 | 28 | /** 29 | * Check if enum value is valid 30 | * 31 | * @param $value 32 | * 33 | * @return bool 34 | */ 35 | public static function isValid($value) { 36 | return in_array($value, static::getConstants(), true); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /License.txt: -------------------------------------------------------------------------------- 1 | (The MIT license) 2 | 3 | Copyright (c) 2014 Jim Myhrberg. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "parsecsv/php-parsecsv", 3 | "description": "CSV data parser for PHP", 4 | "license": "MIT", 5 | "authors": [ 6 | { 7 | "name": "Jim Myhrberg", 8 | "email": "contact@jimeh.me" 9 | }, 10 | { 11 | "name": "William Knauss", 12 | "email": "will.knauss@gmail.com" 13 | }, 14 | { 15 | "name": "Susann Sgorzaly", 16 | "homepage": "https://github.com/susgo" 17 | }, 18 | { 19 | "name": "Christian Bläul", 20 | "homepage": "https://github.com/Fonata" 21 | } 22 | ], 23 | "autoload": { 24 | "psr-4": { 25 | "ParseCsv\\": "src" 26 | } 27 | }, 28 | "autoload-dev": { 29 | "psr-4": { 30 | "ParseCsv\\tests\\": "tests" 31 | } 32 | }, 33 | "require": { 34 | "php": ">=5.5" 35 | }, 36 | "require-dev": { 37 | "phpunit/phpunit": "^6", 38 | "squizlabs/php_codesniffer": "^3.5" 39 | }, 40 | "suggest": { 41 | "illuminate/support": "Fluent array interface for map functions" 42 | }, 43 | "extra": { 44 | "branch-alias": { 45 | "dev-master": "1.0.x-dev" 46 | } 47 | }, 48 | "scripts": { 49 | "test": [ 50 | "vendor/bin/phpunit -c tests tests --disallow-test-output --coverage-clover coverage_clover.xml --whitelist src" 51 | ] 52 | }, 53 | "support": { 54 | "issues": "https://github.com/parsecsv/parsecsv-for-php/issues", 55 | "source": "https://github.com/parsecsv/parsecsv-for-php" 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /.github/workflows/phpunit.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: PHPUnit 3 | on: 4 | push: 5 | 6 | jobs: 7 | test_php_82_and_newer: 8 | runs-on: ubuntu-latest 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | php_version: 13 | - "8.3" 14 | - "8.2" 15 | steps: 16 | - uses: actions/checkout@v3 17 | - uses: shivammathur/setup-php@v2 18 | with: 19 | php-version: ${{ matrix.php_version }} 20 | env: 21 | COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }} 22 | - name: Get composer cache directory 23 | id: composer-cache 24 | run: echo "::set-output name=dir::$(composer config cache-files-dir)" 25 | - name: Cache composer dependencies 26 | uses: actions/cache@v2 27 | with: 28 | path: ${{ steps.composer-cache.outputs.dir }} 29 | key: ${{ runner.os }}-composer-${{ hashFiles('**/composer.lock') }} 30 | restore-keys: ${{ runner.os }}-composer- 31 | - name: Update PHPUnit 32 | run: composer require phpunit/phpunit --dev -W 33 | - name: Install dependencies 34 | run: composer update 35 | - name: Validate dependencies 36 | run: composer validate 37 | - name: install Rector 38 | run: composer require rector/rector --dev -W 39 | - name: run Rector 40 | run: cd tests && ../vendor/bin/phpunit --migrate-configuration 41 | shell: bash 42 | - name: run Rector 43 | run: cd tests && ../vendor/bin/rector process . 44 | shell: bash 45 | - name: Run tests 46 | run: vendor/bin/phpunit --configuration tests/phpunit.xml 47 | test_php_81_and_lower: 48 | runs-on: ubuntu-latest 49 | strategy: 50 | fail-fast: false 51 | matrix: 52 | php_version: 53 | - "8.1" 54 | - "8.0" 55 | - "7.4" 56 | - "7.3" 57 | - "7.2" 58 | - "7.1" 59 | steps: 60 | - uses: actions/checkout@v3 61 | - uses: shivammathur/setup-php@v2 62 | with: 63 | php-version: ${{ matrix.php_version }} 64 | env: 65 | COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }} 66 | - name: Get composer cache directory 67 | id: composer-cache 68 | run: echo "::set-output name=dir::$(composer config cache-files-dir)" 69 | - name: Cache composer dependencies 70 | uses: actions/cache@v2 71 | with: 72 | path: ${{ steps.composer-cache.outputs.dir }} 73 | key: ${{ runner.os }}-composer-${{ hashFiles('**/composer.lock') }} 74 | restore-keys: ${{ runner.os }}-composer- 75 | - name: Update PHPUnit 76 | run: composer require phpunit/phpunit --dev -W 77 | - name: Install dependencies 78 | run: composer update 79 | - name: Validate dependencies 80 | run: composer validate 81 | - name: Run tests 82 | run: vendor/bin/phpunit --configuration tests/phpunit.xml 83 | -------------------------------------------------------------------------------- /src/enums/DatatypeEnum.php: -------------------------------------------------------------------------------- 1 | null, 44 | self::TYPE_INT => 'isValidInteger', 45 | self::TYPE_BOOL => 'isValidBoolean', 46 | self::TYPE_FLOAT => 'isValidFloat', 47 | self::TYPE_DATE => 'isValidDate', 48 | ); 49 | 50 | /** 51 | * Checks data type for given string. 52 | * 53 | * @param string $value 54 | * 55 | * @return bool|string 56 | */ 57 | public static function getValidTypeFromSample($value) { 58 | $value = trim((string) $value); 59 | 60 | if (empty($value)) { 61 | return false; 62 | } 63 | 64 | foreach (self::$validators as $type => $validator) { 65 | if ($validator === null) { 66 | continue; 67 | } 68 | 69 | if (method_exists(__CLASS__, $validator) && self::$validator($value)) { 70 | return $type; 71 | } 72 | } 73 | 74 | return self::__DEFAULT; 75 | } 76 | 77 | /** 78 | * Check if string is float value. 79 | * 80 | * @param string $value 81 | * 82 | * @return bool 83 | */ 84 | private static function isValidFloat($value) { 85 | return (bool) preg_match(self::REGEX_FLOAT, $value); 86 | } 87 | 88 | /** 89 | * Check if string is integer value. 90 | * 91 | * @param string $value 92 | * 93 | * @return bool 94 | */ 95 | private static function isValidInteger($value) { 96 | return (bool) preg_match(self::REGEX_INT, $value); 97 | } 98 | 99 | /** 100 | * Check if string is boolean. 101 | * 102 | * @param string $value 103 | * 104 | * @return bool 105 | */ 106 | private static function isValidBoolean($value) { 107 | return (bool) preg_match(self::REGEX_BOOL, $value); 108 | } 109 | 110 | /** 111 | * Check if string is date. 112 | * 113 | * @param string $value 114 | * 115 | * @return bool 116 | */ 117 | private static function isValidDate($value) { 118 | return (bool) strtotime($value); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/extensions/DatatypeTrait.php: -------------------------------------------------------------------------------- 1 | = 5.5 46 | * 47 | * @uses DatatypeEnum::getValidTypeFromSample 48 | * 49 | * @return array|bool 50 | */ 51 | public function getDatatypes() { 52 | if (empty($this->data)) { 53 | $this->data = $this->_parse_string(); 54 | } 55 | if (!is_array($this->data)) { 56 | throw new \UnexpectedValueException('No data set yet.'); 57 | } 58 | 59 | $result = []; 60 | foreach ($this->titles as $cName) { 61 | $column = array_column($this->data, $cName); 62 | $cDatatypes = array_map(DatatypeEnum::class . '::getValidTypeFromSample', $column); 63 | 64 | $result[$cName] = $this->getMostFrequentDatatypeForColumn($cDatatypes); 65 | } 66 | 67 | $this->data_types = $result; 68 | 69 | return !empty($this->data_types) ? $this->data_types : []; 70 | } 71 | 72 | /** 73 | * Check data type of titles / first row for auto detecting if this could be 74 | * a heading line. 75 | * 76 | * Requires PHP >= 5.5 77 | * 78 | * @uses DatatypeEnum::getValidTypeFromSample 79 | * 80 | * @return bool 81 | */ 82 | public function autoDetectFileHasHeading() { 83 | if (empty($this->data)) { 84 | throw new \UnexpectedValueException('No data set yet.'); 85 | } 86 | 87 | if ($this->heading) { 88 | $firstRow = $this->titles; 89 | } else { 90 | $firstRow = $this->data[0]; 91 | } 92 | 93 | $firstRow = array_filter($firstRow); 94 | if (empty($firstRow)) { 95 | return false; 96 | } 97 | 98 | $firstRowDatatype = array_map(DatatypeEnum::class . '::getValidTypeFromSample', $firstRow); 99 | 100 | return $this->getMostFrequentDatatypeForColumn($firstRowDatatype) === DatatypeEnum::TYPE_STRING; 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ParseCsv 2 | ![PHPUnit](https://github.com/parsecsv/parsecsv-for-php/actions/workflows/phpunit.yml/badge.svg) 3 | 4 | ParseCsv is an easy-to-use PHP class that reads and writes CSV data properly. It 5 | fully conforms to the specifications outlined on the 6 | [Wikipedia article][CSV] (and thus RFC 4180). It has many advanced features which help make your 7 | life easier when dealing with CSV data. 8 | 9 | You may not need a library at all: before using ParseCsv, please make sure if PHP's own `str_getcsv()`, ``fgetcsv()`` or `fputcsv()` meets your needs. 10 | 11 | This library was originally created in early 2007 by [jimeh](https://github.com/jimeh) due to the lack of built-in 12 | and third-party support for handling CSV data in PHP. 13 | 14 | [csv]: http://en.wikipedia.org/wiki/Comma-separated_values 15 | 16 | ## Features 17 | 18 | * ParseCsv is a complete and fully featured CSV solution for PHP 19 | * Supports enclosed values, enclosed commas, double quotes and new lines. 20 | * Automatic delimiter character detection. 21 | * Sort data by specific fields/columns. 22 | * Easy data manipulation. 23 | * Basic SQL-like _conditions_, _offset_ and _limit_ options for filtering 24 | data. 25 | * Error detection for incorrectly formatted input. It attempts to be 26 | intelligent, but can not be trusted 100% due to the structure of CSV, and 27 | how different programs like Excel for example outputs CSV data. 28 | * Support for character encoding conversion using PHP's 29 | `iconv()` and `mb_convert_encoding()` functions. 30 | * Supports PHP 5.5 and higher. 31 | It certainly works with PHP 8.3 and all versions in between. 32 | 33 | ## Installation 34 | 35 | Installation is easy using Composer. Just run the following on the 36 | command line: 37 | ``` 38 | composer require parsecsv/php-parsecsv 39 | ``` 40 | 41 | If you don't use a framework such as Drupal, Laravel, Symfony, Yii etc., 42 | you may have to manually include Composer's autoloader file in your PHP 43 | script: 44 | ```php 45 | require_once __DIR__ . '/vendor/autoload.php'; 46 | ``` 47 | 48 | #### Without composer 49 | Not recommended, but technically possible: you can also clone the 50 | repository or extract the 51 | [ZIP](https://github.com/parsecsv/parsecsv-for-php/archive/master.zip). 52 | To use ParseCSV, you then have to add a `require 'parsecsv.lib.php';` line. 53 | 54 | ## Example Usage 55 | 56 | **Parse a tab-delimited CSV file with encoding conversion** 57 | 58 | ```php 59 | $csv = new \ParseCsv\Csv(); 60 | $csv->encoding('UTF-16', 'UTF-8'); 61 | $csv->delimiter = "\t"; 62 | $csv->parseFile('data.tsv'); 63 | print_r($csv->data); 64 | ``` 65 | 66 | **Auto-detect field delimiter character** 67 | 68 | ```php 69 | $csv = new \ParseCsv\Csv(); 70 | $csv->auto('data.csv'); 71 | print_r($csv->data); 72 | ``` 73 | 74 | **Parse data with offset** 75 | * ignoring the first X (e.g. two) rows 76 | ```php 77 | $csv = new \ParseCsv\Csv(); 78 | $csv->offset = 2; 79 | $csv->parseFile('data.csv'); 80 | print_r($csv->data); 81 | ``` 82 | 83 | **Limit the number of returned data rows** 84 | ```php 85 | $csv = new \ParseCsv\Csv(); 86 | $csv->limit = 5; 87 | $csv->parseFile('data.csv'); 88 | print_r($csv->data); 89 | ``` 90 | 91 | **Get total number of data rows without parsing whole data** 92 | * Excluding heading line if present (see $csv->header property) 93 | ```php 94 | $csv = new \ParseCsv\Csv(); 95 | $csv->loadFile('data.csv'); 96 | $count = $csv->getTotalDataRowCount(); 97 | print_r($count); 98 | ``` 99 | 100 | **Get most common data type for each column** 101 | 102 | ```php 103 | $csv = new \ParseCsv\Csv('data.csv'); 104 | $csv->getDatatypes(); 105 | print_r($csv->data_types); 106 | ``` 107 | 108 | **Modify data in a CSV file** 109 | 110 | Change data values: 111 | ```php 112 | $csv = new \ParseCsv\Csv(); 113 | $csv->sort_by = 'id'; 114 | $csv->parseFile('data.csv'); 115 | # "4" is the value of the "id" column of the CSV row 116 | $csv->data[4] = array('firstname' => 'John', 'lastname' => 'Doe', 'email' => 'john@doe.com'); 117 | $csv->save(); 118 | ``` 119 | 120 | Enclose each data value by quotes: 121 | ```php 122 | $csv = new \ParseCsv\Csv(); 123 | $csv->parseFile('data.csv'); 124 | $csv->enclose_all = true; 125 | $csv->save(); 126 | ``` 127 | 128 | **Replace field names or set ones if missing** 129 | 130 | ```php 131 | $csv = new \ParseCsv\Csv(); 132 | $csv->fields = ['id', 'name', 'category']; 133 | $csv->parseFile('data.csv'); 134 | ``` 135 | 136 | **Add row/entry to end of CSV file** 137 | 138 | _Only recommended when you know the exact structure of the file._ 139 | 140 | ```php 141 | $csv = new \ParseCsv\Csv(); 142 | $csv->save('data.csv', array(array('1986', 'Home', 'Nowhere', '')), /* append */ true); 143 | ``` 144 | 145 | **Convert 2D array to CSV data and send headers to browser to treat output as 146 | a file and download it** 147 | 148 | Your web app users would call this an export. 149 | 150 | ```php 151 | $csv = new \ParseCsv\Csv(); 152 | $csv->linefeed = "\n"; 153 | $header = array('field 1', 'field 2'); 154 | $csv->output('movies.csv', $data_array, $header, ','); 155 | ``` 156 | 157 | For more complex examples, see the ``tests`` and `examples` directories. 158 | 159 | ## Test coverage 160 | 161 | All tests are located in the `tests` directory. To execute tests, run the following commands: 162 | 163 | ````bash 164 | composer install 165 | composer run test 166 | ```` 167 | 168 | Note that PHP 8.2 and newer allow PHPUnit versions that deprecate `@annotations`. The GitHub actions use Rector to 169 | convert them to `#[attributes]`. 170 | When pushing code to GitHub, tests will be executed using GitHub Actions. The relevant configuration is in the 171 | file `.github/workflows/ci.yml`. To run the `test` action locally, you can execute the following command: 172 | 173 | ````bash 174 | make local-ci 175 | ```` 176 | 177 | ## Security 178 | 179 | If you discover any security related issues, please email ParseCsv@blaeul.de instead of using GitHub issues. 180 | 181 | ## Credits 182 | 183 | * ParseCsv is based on the concept of [Ming Hong Ng][ming]'s [CsvFileParser][] 184 | class. 185 | 186 | [ming]: http://minghong.blogspot.com/ 187 | [CsvFileParser]: http://minghong.blogspot.com/2006/07/csv-parser-for-php.html 188 | 189 | 190 | ## Contributors 191 | 192 | ### Code Contributors 193 | 194 | This project exists thanks to all the people who contribute. 195 | 196 | Please find a complete list on the project's [contributors][] page. 197 | 198 | [contributors]: https://github.com/parsecsv/parsecsv-for-php/graphs/contributors 199 | 200 | ## License 201 | 202 | (The MIT license) 203 | 204 | Copyright (c) 2014 Jim Myhrberg. 205 | 206 | Permission is hereby granted, free of charge, to any person obtaining a copy 207 | of this software and associated documentation files (the "Software"), to deal 208 | in the Software without restriction, including without limitation the rights 209 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 210 | copies of the Software, and to permit persons to whom the Software is 211 | furnished to do so, subject to the following conditions: 212 | 213 | The above copyright notice and this permission notice shall be included in all 214 | copies or substantial portions of the Software. 215 | 216 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 217 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 218 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 219 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 220 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 221 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 222 | SOFTWARE. 223 | 224 | [![Build Status](https://travis-ci.org/parsecsv/parsecsv-for-php.svg?branch=master)](https://travis-ci.org/parsecsv/parsecsv-for-php) 225 | -------------------------------------------------------------------------------- /src/Csv.php: -------------------------------------------------------------------------------- 1 | var_name = 'value'; 49 | */ 50 | 51 | /** 52 | * Header row: 53 | * Use first line/entry as field names 54 | * 55 | * @var bool 56 | */ 57 | public $heading = true; 58 | 59 | /** 60 | * Override field names 61 | * 62 | * @var array 63 | */ 64 | public $fields = array(); 65 | 66 | /** 67 | * Sort CSV by this field 68 | * 69 | * @var string|null 70 | */ 71 | public $sort_by = null; 72 | 73 | /** 74 | * Reverse the sort direction 75 | * 76 | * @var bool 77 | */ 78 | public $sort_reverse = false; 79 | 80 | /** 81 | * Sort behavior passed to sort methods 82 | * 83 | * regular = SORT_REGULAR 84 | * numeric = SORT_NUMERIC 85 | * string = SORT_STRING 86 | * 87 | * @var string|null 88 | */ 89 | public $sort_type = SortEnum::SORT_TYPE_REGULAR; 90 | 91 | /** 92 | * Field delimiter character 93 | * 94 | * @var string 95 | */ 96 | public $delimiter = ','; 97 | 98 | /** 99 | * Enclosure character 100 | * 101 | * This is useful for cell values that are either multi-line 102 | * or contain the field delimiter character. 103 | * 104 | * @var string 105 | */ 106 | public $enclosure = '"'; 107 | 108 | /** 109 | * Force enclosing all columns. 110 | * 111 | * If false, only cells that are either multi-line or 112 | * contain the field delimiter character are enclosed 113 | * in the $enclosure char. 114 | * 115 | * @var bool 116 | */ 117 | public $enclose_all = false; 118 | 119 | /** 120 | * Basic SQL-Like conditions for row matching 121 | * 122 | * @var string|null 123 | */ 124 | public $conditions = null; 125 | 126 | /** 127 | * Number of rows to ignore from beginning of data. If present, the heading 128 | * row is also counted (if $this->heading == true). In other words, 129 | * $offset == 1 and $offset == 0 have the same meaning in that situation. 130 | * 131 | * @var int|null 132 | */ 133 | public $offset = null; 134 | 135 | /** 136 | * Limits the number of returned rows to the specified amount 137 | * 138 | * @var int|null 139 | */ 140 | public $limit = null; 141 | 142 | /** 143 | * Number of rows to analyze when attempting to auto-detect delimiter 144 | * 145 | * @var int 146 | */ 147 | public $auto_depth = 15; 148 | 149 | /** 150 | * Characters that should be ignored when attempting to auto-detect delimiter 151 | * 152 | * @var string 153 | */ 154 | public $auto_non_chars = "a-zA-Z0-9\n\r"; 155 | 156 | /** 157 | * preferred delimiter characters, only used when all filtering method 158 | * returns multiple possible delimiters (happens very rarely) 159 | * 160 | * @var string 161 | */ 162 | public $auto_preferred = ",;\t.:|"; 163 | 164 | /** 165 | * Should we convert the CSV character encoding? 166 | * Used for both parse and unparse operations. 167 | * 168 | * @var bool 169 | */ 170 | public $convert_encoding = false; 171 | 172 | /** 173 | * Set the input encoding 174 | * 175 | * @var string 176 | */ 177 | public $input_encoding = 'ISO-8859-1'; 178 | 179 | /** 180 | * Set the output encoding 181 | * 182 | * @var string 183 | */ 184 | public $output_encoding = 'ISO-8859-1'; 185 | 186 | /** 187 | * Whether to use mb_convert_encoding() instead of iconv(). 188 | * 189 | * The former is platform-independent whereas the latter is the traditional 190 | * default go-to solution. 191 | * 192 | * @var bool (if false, iconv() is used) 193 | */ 194 | public $use_mb_convert_encoding = false; 195 | 196 | /** 197 | * Line feed characters used by unparse, save, and output methods 198 | * Popular choices are "\r\n" and "\n". 199 | * 200 | * @var string 201 | */ 202 | public $linefeed = "\r"; 203 | 204 | /** 205 | * Sets the output delimiter used by the output method 206 | * 207 | * @var string 208 | */ 209 | public $output_delimiter = ','; 210 | 211 | /** 212 | * Sets the output filename 213 | * 214 | * @var string 215 | */ 216 | public $output_filename = 'data.csv'; 217 | 218 | /** 219 | * keep raw file data in memory after successful parsing (useful for debugging) 220 | * 221 | * @var bool 222 | */ 223 | public $keep_file_data = false; 224 | 225 | /** 226 | * Internal variables 227 | */ 228 | 229 | /** 230 | * File 231 | * Current Filename 232 | * 233 | * @var string 234 | */ 235 | public $file; 236 | 237 | /** 238 | * File Data 239 | * Current file data 240 | * 241 | * @var string 242 | */ 243 | public $file_data; 244 | 245 | /** 246 | * Error 247 | * Contains the error code if one occurred 248 | * 249 | * 0 = No errors found. Everything should be fine :) 250 | * 1 = Hopefully correctable syntax error was found. 251 | * 2 = Enclosure character (double quote by default) 252 | * was found in non-enclosed field. This means 253 | * the file is either corrupt, or does not 254 | * standard CSV formatting. Please validate 255 | * the parsed data yourself. 256 | * 257 | * @var int 258 | */ 259 | public $error = 0; 260 | 261 | /** 262 | * Detailed error information 263 | * 264 | * @var array 265 | */ 266 | public $error_info = array(); 267 | 268 | /** 269 | * $titles has 4 distinct tasks: 270 | * 1. After reading in CSV data, $titles will contain the column headers 271 | * present in the data. 272 | * 273 | * 2. It defines which fields from the $data array to write e.g. when 274 | * calling unparse(), and in which order. This lets you skip columns you 275 | * don't want in your output, but are present in $data. 276 | * See examples/save_to_file_without_header_row.php. 277 | * 278 | * 3. It lets you rename columns. See StreamTest::testWriteStream for an 279 | * example. 280 | * 281 | * 4. When writing data and $header is true, then $titles is also used for 282 | * the first row. 283 | * 284 | * @var array 285 | */ 286 | public $titles = array(); 287 | 288 | /** 289 | * Two-dimensional array of CSV data. 290 | * The first dimension are the line numbers. Each line is represented as an array with field names as keys. 291 | * 292 | * @var array 293 | */ 294 | public $data = array(); 295 | 296 | use DatatypeTrait; 297 | 298 | /** 299 | * Class constructor 300 | * 301 | * @param string|null $data The CSV string or a direct file path. 302 | * 303 | * WARNING: Supplying file paths here is 304 | * deprecated. Use parseFile() instead. 305 | * 306 | * @param int|null $offset Number of rows to ignore from the 307 | * beginning of the data 308 | * @param int|null $limit Limits the number of returned rows 309 | * to specified amount 310 | * @param string|null $conditions Basic SQL-like conditions for row 311 | * matching 312 | * @param null|true $keep_file_data Keep raw file data in memory after 313 | * successful parsing 314 | * (useful for debugging) 315 | */ 316 | public function __construct($data = null, $offset = null, $limit = null, $conditions = null, $keep_file_data = null) { 317 | $this->init($offset, $limit, $conditions, $keep_file_data); 318 | 319 | if (!empty($data)) { 320 | $this->parse($data); 321 | } 322 | } 323 | 324 | /** 325 | * @param int|null $offset Number of rows to ignore from the 326 | * beginning of the data 327 | * @param int|null $limit Limits the number of returned rows 328 | * to specified amount 329 | * @param string|null $conditions Basic SQL-like conditions for row 330 | * matching 331 | * @param null|true $keep_file_data Keep raw file data in memory after 332 | * successful parsing 333 | * (useful for debugging) 334 | */ 335 | public function init($offset = null, $limit = null, $conditions = null, $keep_file_data = null) { 336 | if (!is_null($offset)) { 337 | $this->offset = $offset; 338 | } 339 | 340 | if (!is_null($limit)) { 341 | $this->limit = $limit; 342 | } 343 | 344 | if (!is_null($conditions)) { 345 | $this->conditions = $conditions; 346 | } 347 | 348 | if (!is_null($keep_file_data)) { 349 | $this->keep_file_data = $keep_file_data; 350 | } 351 | } 352 | 353 | // ============================================== 354 | // ----- [ Main Functions ] --------------------- 355 | // ============================================== 356 | 357 | /** 358 | * Parse a CSV file or string 359 | * 360 | * @param string|null $dataString The CSV string or a direct file path 361 | * WARNING: Supplying file paths here is 362 | * deprecated and will trigger an 363 | * E_USER_DEPRECATED error. 364 | * @param int|null $offset Number of rows to ignore from the 365 | * beginning of the data 366 | * @param int|null $limit Limits the number of returned rows to 367 | * specified amount 368 | * @param string|null $conditions Basic SQL-like conditions for row 369 | * matching 370 | * 371 | * @return bool True on success 372 | */ 373 | public function parse($dataString = null, $offset = null, $limit = null, $conditions = null) { 374 | if (is_null($dataString)) { 375 | $this->data = $this->parseFile(); 376 | return $this->data !== false; 377 | } 378 | 379 | if (empty($dataString)) { 380 | return false; 381 | } 382 | 383 | $this->init($offset, $limit, $conditions); 384 | 385 | if (strlen($dataString) <= PHP_MAXPATHLEN && is_readable($dataString)) { 386 | $this->file = $dataString; 387 | $this->data = $this->parseFile(); 388 | trigger_error( 389 | 'Supplying file paths to parse() will no longer ' . 390 | 'be supported in a future version of ParseCsv. ' . 391 | 'Use ->parseFile() instead.', 392 | E_USER_DEPRECATED 393 | ); 394 | } else { 395 | $this->file = null; 396 | $this->file_data = &$dataString; 397 | $this->data = $this->_parse_string(); 398 | } 399 | 400 | return $this->data !== false; 401 | } 402 | 403 | /** 404 | * Save changes, or write a new file and/or data. 405 | * 406 | * @param string $file File location to save to 407 | * @param array $data 2D array of data 408 | * @param bool $append Append current data to end of target CSV, if file 409 | * exists 410 | * @param array $fields Field names. Sets the header. If it is not set 411 | * $this->titles would be used instead. 412 | * 413 | * @return bool 414 | * True on success 415 | */ 416 | public function save($file = '', $data = array(), $append = FileProcessingModeEnum::MODE_FILE_OVERWRITE, $fields = array()) { 417 | if (empty($file)) { 418 | $file = &$this->file; 419 | } 420 | 421 | $mode = FileProcessingModeEnum::getAppendMode($append); 422 | $is_php = (bool) preg_match('/\.php$/i', $file); 423 | 424 | return $this->_wfile($file, $this->unparse($data, $fields, $append, $is_php), $mode); 425 | } 426 | 427 | /** 428 | * Generate a CSV-based string for output. 429 | * 430 | * Useful for exports in web applications. 431 | * 432 | * @param string|null $filename If a filename is specified here or in the 433 | * object, headers and data will be output 434 | * directly to browser as a downloadable 435 | * file. This file doesn't have to exist on 436 | * the server; the parameter only affects 437 | * how the download is called to the 438 | * browser. 439 | * @param array[] $data 2D array with data 440 | * @param array $fields Field names 441 | * @param string|null $delimiter character used to separate data 442 | * 443 | * @return string The resulting CSV string 444 | */ 445 | public function output($filename = null, $data = array(), $fields = array(), $delimiter = null) { 446 | if (empty($filename)) { 447 | $filename = $this->output_filename; 448 | } 449 | 450 | if ($delimiter === null) { 451 | $delimiter = $this->output_delimiter; 452 | } 453 | 454 | $flat_string = $this->unparse($data, $fields, null, null, $delimiter); 455 | 456 | if (!is_null($filename)) { 457 | $mime = $delimiter === "\t" ? 458 | 'text/tab-separated-values' : 459 | 'application/csv'; 460 | header('Content-type: ' . $mime); 461 | header('Content-Length: ' . strlen($flat_string)); 462 | header('Cache-Control: no-cache, must-revalidate'); 463 | header('Pragma: no-cache'); 464 | header('Expires: 0'); 465 | header('Content-Disposition: attachment; filename="' . $filename . '"; modification-date="' . date('r') . '";'); 466 | 467 | echo $flat_string; 468 | } 469 | 470 | return $flat_string; 471 | } 472 | 473 | /** 474 | * Convert character encoding 475 | * 476 | * Specify the encoding to use for the next parsing or unparsing. 477 | * Calling this function will not change the data held in the object immediately. 478 | * 479 | * @param string|null $input Input character encoding 480 | * If the value null is passed, the existing input encoding remains set (default: ISO-8859-1). 481 | * @param string|null $output Output character encoding, uses default if left blank 482 | * If the value null is passed, the existing input encoding remains set (default: ISO-8859-1). 483 | * 484 | * @return void 485 | */ 486 | public function encoding($input = null, $output = null) { 487 | $this->convert_encoding = true; 488 | if (!is_null($input)) { 489 | $this->input_encoding = $input; 490 | } 491 | 492 | if (!is_null($output)) { 493 | $this->output_encoding = $output; 494 | } 495 | } 496 | 497 | /** 498 | * Auto-detect delimiter: Find delimiter by analyzing a specific number of 499 | * rows to determine most probable delimiter character 500 | * 501 | * @param string|null $file Local CSV file 502 | * Supplying CSV data (file content) here is deprecated. 503 | * For CSV data, please use autoDetectionForDataString(). 504 | * Support for CSV data will be removed in v2.0.0. 505 | * @param bool $parse True/false parse file directly 506 | * @param int|null $search_depth Number of rows to analyze 507 | * @param string|null $preferred Preferred delimiter characters 508 | * @param string|null $enclosure Enclosure character, default is double quote ("). 509 | * 510 | * @return string|false The detected field delimiter 511 | */ 512 | public function auto($file = null, $parse = true, $search_depth = null, $preferred = null, $enclosure = null) { 513 | if (is_null($file)) { 514 | $file = $this->file; 515 | } 516 | 517 | if (empty($search_depth)) { 518 | $search_depth = $this->auto_depth; 519 | } 520 | 521 | if (is_null($enclosure)) { 522 | $enclosure = $this->enclosure; 523 | } else { 524 | $this->enclosure = $enclosure; 525 | } 526 | 527 | if (is_null($preferred)) { 528 | $preferred = $this->auto_preferred; 529 | } 530 | 531 | if (empty($this->file_data)) { 532 | if ($this->_check_data($file)) { 533 | $data = &$this->file_data; 534 | } else { 535 | return false; 536 | } 537 | } else { 538 | $data = &$this->file_data; 539 | } 540 | 541 | $this->autoDetectionForDataString($data, $parse, $search_depth, $preferred, $enclosure); 542 | 543 | return $this->delimiter; 544 | } 545 | 546 | public function autoDetectionForDataString($data, $parse = true, $search_depth = null, $preferred = null, $enclosure = null) { 547 | $this->file_data = &$data; 548 | if (!$this->_detect_and_remove_sep_row_from_data($data)) { 549 | $this->_guess_delimiter($search_depth, $preferred, $enclosure, $data); 550 | } 551 | 552 | // parse data 553 | if ($parse) { 554 | $this->data = $this->_parse_string(); 555 | } 556 | 557 | return $this->delimiter; 558 | } 559 | 560 | /** 561 | * Get total number of data rows (exclusive heading line if present) in CSV 562 | * without parsing the whole data string. 563 | * 564 | * @return bool|int 565 | */ 566 | public function getTotalDataRowCount() { 567 | if (empty($this->file_data)) { 568 | return false; 569 | } 570 | 571 | $data = $this->file_data; 572 | 573 | $this->_detect_and_remove_sep_row_from_data($data); 574 | 575 | $pattern = sprintf('/%1$s[^%1$s]*%1$s/i', $this->enclosure); 576 | preg_match_all($pattern, $data, $matches); 577 | 578 | /** @var array[] $matches */ 579 | foreach ($matches[0] as $match) { 580 | if (empty($match) || (strpos($match, $this->enclosure) === false)) { 581 | continue; 582 | } 583 | 584 | $replace = str_replace(["\r", "\n"], '', $match); 585 | $data = str_replace($match, $replace, $data); 586 | } 587 | 588 | $headingRow = $this->heading ? 1 : 0; 589 | 590 | return substr_count($data, "\r") 591 | + substr_count($data, "\n") 592 | - substr_count($data, "\r\n") 593 | - $headingRow; 594 | } 595 | 596 | // ============================================== 597 | // ----- [ Core Functions ] --------------------- 598 | // ============================================== 599 | 600 | /** 601 | * Read file to string and call _parse_string() 602 | * 603 | * @param string|null $file Path to a CSV file. 604 | * If configured in files such as php.ini, 605 | * the path may also contain a protocol: 606 | * https://example.org/some/file.csv 607 | * 608 | * @return array|false 609 | */ 610 | public function parseFile($file = null) { 611 | if (is_null($file)) { 612 | $file = $this->file; 613 | } 614 | 615 | /** 616 | * @see self::keep_file_data 617 | * Usually, _parse_string will clean this 618 | * Instead of leaving stale data for the next parseFile call behind. 619 | */ 620 | if (empty($this->file_data) && !$this->loadFile($file)) { 621 | return false; 622 | } 623 | 624 | if (empty($this->file_data)) { 625 | return false; 626 | } 627 | return $this->data = $this->_parse_string(); 628 | } 629 | 630 | /** 631 | * Internal function to parse CSV strings to arrays. 632 | * 633 | * If you need BOM detection or character encoding conversion, please call 634 | * $csv->load_data($your_data_string) first, followed by a call to 635 | * $csv->parse($csv->file_data). 636 | * 637 | * To detect field separators, please use auto() instead. 638 | * 639 | * @param string|null $data CSV data 640 | * 641 | * @return array|false 642 | * 2D array with CSV data, or false on failure 643 | */ 644 | protected function _parse_string($data = null) { 645 | if (empty($data)) { 646 | if ($this->_check_data()) { 647 | $data = &$this->file_data; 648 | } else { 649 | return false; 650 | } 651 | } 652 | 653 | $white_spaces = str_replace($this->delimiter, '', " \t\x0B\0"); 654 | 655 | $rows = array(); 656 | $row = array(); 657 | $row_count = 0; 658 | $current = ''; 659 | $head = !empty($this->fields) ? $this->fields : array(); 660 | $col = 0; 661 | $enclosed = false; 662 | $was_enclosed = false; 663 | $strlen = strlen($data); 664 | 665 | // force the parser to process end of data as a character (false) when 666 | // data does not end with a line feed or carriage return character. 667 | $lch = $data[$strlen - 1]; 668 | if ($lch != "\n" && $lch != "\r") { 669 | $data .= "\n"; 670 | $strlen++; 671 | } 672 | 673 | // walk through each character 674 | for ($i = 0; $i < $strlen; $i++) { 675 | $ch = isset($data[$i]) ? $data[$i] : false; 676 | $nch = isset($data[$i + 1]) ? $data[$i + 1] : false; 677 | 678 | // open/close quotes, and inline quotes 679 | if ($ch == $this->enclosure) { 680 | if (!$enclosed) { 681 | if (ltrim($current, $white_spaces) == '') { 682 | $enclosed = true; 683 | $was_enclosed = true; 684 | } else { 685 | $this->error = 2; 686 | $error_row = count($rows) + 1; 687 | $error_col = $col + 1; 688 | $index = $error_row . '-' . $error_col; 689 | if (!isset($this->error_info[$index])) { 690 | $this->error_info[$index] = array( 691 | 'type' => 2, 692 | 'info' => 'Syntax error found on row ' . $error_row . '. Non-enclosed fields can not contain double-quotes.', 693 | 'row' => $error_row, 694 | 'field' => $error_col, 695 | 'field_name' => !empty($head[$col]) ? $head[$col] : null, 696 | ); 697 | } 698 | 699 | $current .= $ch; 700 | } 701 | } elseif ($nch == $this->enclosure) { 702 | $current .= $ch; 703 | $i++; 704 | } elseif ($nch != $this->delimiter && $nch != "\r" && $nch != "\n") { 705 | $x = $i + 1; 706 | while (isset($data[$x]) && ltrim($data[$x], $white_spaces) == '') { 707 | $x++; 708 | } 709 | if ($data[$x] == $this->delimiter) { 710 | $enclosed = false; 711 | $i = $x; 712 | } else { 713 | if ($this->error < 1) { 714 | $this->error = 1; 715 | } 716 | 717 | $error_row = count($rows) + 1; 718 | $error_col = $col + 1; 719 | $index = $error_row . '-' . $error_col; 720 | if (!isset($this->error_info[$index])) { 721 | $this->error_info[$index] = array( 722 | 'type' => 1, 723 | 'info' => 724 | 'Syntax error found on row ' . (count($rows) + 1) . '. ' . 725 | 'A single double-quote was found within an enclosed string. ' . 726 | 'Enclosed double-quotes must be escaped with a second double-quote.', 727 | 'row' => count($rows) + 1, 728 | 'field' => $col + 1, 729 | 'field_name' => !empty($head[$col]) ? $head[$col] : null, 730 | ); 731 | } 732 | 733 | $current .= $ch; 734 | $enclosed = false; 735 | } 736 | } else { 737 | $enclosed = false; 738 | } 739 | // end of field/row/csv 740 | } elseif ((in_array($ch, [$this->delimiter, "\n", "\r", false], true)) && !$enclosed) { 741 | $key = !empty($head[$col]) ? $head[$col] : $col; 742 | $row[$key] = $was_enclosed ? $current : trim($current); 743 | $current = ''; 744 | $was_enclosed = false; 745 | $col++; 746 | 747 | // end of row 748 | if (in_array($ch, ["\n", "\r", false], true)) { 749 | if ($this->_validate_offset($row_count) && $this->_validate_row_conditions($row, $this->conditions)) { 750 | if ($this->heading && empty($head)) { 751 | $head = $row; 752 | } elseif (empty($this->fields) || (!empty($this->fields) && (($this->heading && $row_count > 0) || !$this->heading))) { 753 | if (!empty($this->sort_by) && !empty($row[$this->sort_by])) { 754 | $sort_field = $row[$this->sort_by]; 755 | if (isset($rows[$sort_field])) { 756 | $rows[$sort_field . '_0'] = &$rows[$sort_field]; 757 | unset($rows[$sort_field]); 758 | $sn = 1; 759 | while (isset($rows[$sort_field . '_' . $sn])) { 760 | $sn++; 761 | } 762 | $rows[$sort_field . '_' . $sn] = $row; 763 | } else { 764 | $rows[$sort_field] = $row; 765 | } 766 | 767 | } else { 768 | $rows[] = $row; 769 | } 770 | } 771 | } 772 | 773 | $row = array(); 774 | $col = 0; 775 | $row_count++; 776 | 777 | if ($this->sort_by === null && $this->limit !== null && count($rows) == $this->limit) { 778 | $i = $strlen; 779 | } 780 | 781 | if ($ch == "\r" && $nch == "\n") { 782 | $i++; 783 | } 784 | } 785 | 786 | // append character to current field 787 | } else { 788 | $current .= $ch; 789 | } 790 | } 791 | 792 | $this->titles = $head; 793 | if (!empty($this->sort_by)) { 794 | $sort_type = SortEnum::getSorting($this->sort_type); 795 | $this->sort_reverse ? krsort($rows, $sort_type) : ksort($rows, $sort_type); 796 | 797 | if ($this->offset !== null || $this->limit !== null) { 798 | $rows = array_slice($rows, ($this->offset === null ? 0 : $this->offset), $this->limit, true); 799 | } 800 | } 801 | 802 | if (!$this->keep_file_data) { 803 | $this->file_data = null; 804 | } 805 | 806 | return $rows; 807 | } 808 | 809 | /** 810 | * Create CSV data string from array 811 | * 812 | * @param array[] $data 2D array with data 813 | * @param array $fields field names 814 | * @param bool $append if true, field names will not be output 815 | * @param bool $is_php if a php die() call should be put on the 816 | * first line of the file, this is later 817 | * ignored when read. 818 | * @param string|null $delimiter field delimiter to use 819 | * 820 | * @return string CSV data 821 | */ 822 | public function unparse($data = array(), $fields = array(), $append = FileProcessingModeEnum::MODE_FILE_OVERWRITE, $is_php = false, $delimiter = null) { 823 | if (!is_array($data) || empty($data)) { 824 | $data = &$this->data; 825 | } else { 826 | /** @noinspection ReferenceMismatchInspection */ 827 | $this->data = $data; 828 | } 829 | 830 | if (!is_array($fields) || empty($fields)) { 831 | $fields = &$this->titles; 832 | } 833 | 834 | if ($delimiter === null) { 835 | $delimiter = $this->delimiter; 836 | } 837 | 838 | $string = $is_php ? "" . $this->linefeed : ''; 839 | $entry = array(); 840 | 841 | // create heading 842 | /** @noinspection ReferenceMismatchInspection */ 843 | $fieldOrder = $this->_validate_fields_for_unparse($fields); 844 | if (!$fieldOrder && !empty($data)) { 845 | $column_count = count($data[0]); 846 | $columns = range(0, $column_count - 1, 1); 847 | $fieldOrder = array_combine($columns, $columns); 848 | } 849 | 850 | if ($this->heading && !$append && !empty($fields)) { 851 | foreach ($fieldOrder as $column_name) { 852 | $entry[] = $this->_enclose_value($column_name, $delimiter); 853 | } 854 | 855 | $string .= implode($delimiter, $entry) . $this->linefeed; 856 | $entry = array(); 857 | } 858 | // create data 859 | foreach ($data as $row) { 860 | foreach (array_keys($fieldOrder) as $index) { 861 | $cell_value = $row[$index]; 862 | $entry[] = $this->_enclose_value($cell_value, $delimiter); 863 | } 864 | 865 | $string .= implode($delimiter, $entry) . $this->linefeed; 866 | $entry = array(); 867 | } 868 | 869 | if ($this->convert_encoding) { 870 | /** @noinspection PhpComposerExtensionStubsInspection 871 | * 872 | * If you receive an error at the following 3 lines, you must enable 873 | * the following PHP extension: 874 | * 875 | * - if $use_mb_convert_encoding is true: mbstring 876 | * - if $use_mb_convert_encoding is false: iconv 877 | */ 878 | $string = $this->use_mb_convert_encoding ? 879 | mb_convert_encoding($string, $this->output_encoding, $this->input_encoding) : 880 | iconv($this->input_encoding, $this->output_encoding, $string); 881 | } 882 | 883 | return $string; 884 | } 885 | 886 | /** 887 | * @param array $fields 888 | * 889 | * @return array|false 890 | */ 891 | private function _validate_fields_for_unparse(array $fields) { 892 | if (empty($fields)) { 893 | $fields = $this->titles; 894 | } 895 | 896 | if (empty($fields)) { 897 | return array(); 898 | } 899 | 900 | // this is needed because sometime titles property is overwritten instead of using fields parameter! 901 | $titlesOnParse = !empty($this->data) ? array_keys(reset($this->data)) : array(); 902 | 903 | // both are identical, also in ordering OR we have no data (only titles) 904 | if (empty($titlesOnParse) || array_values($fields) === array_values($titlesOnParse)) { 905 | return array_combine($fields, $fields); 906 | } 907 | 908 | // if renaming given by: $oldName => $newName (maybe with reorder and / or subset): 909 | // todo: this will only work if titles are unique 910 | $fieldOrder = array_intersect(array_flip($fields), $titlesOnParse); 911 | if (!empty($fieldOrder)) { 912 | return array_flip($fieldOrder); 913 | } 914 | 915 | $fieldOrder = array_intersect($fields, $titlesOnParse); 916 | if (!empty($fieldOrder)) { 917 | return array_combine($fieldOrder, $fieldOrder); 918 | } 919 | 920 | // original titles are not given in fields. that is okay if count is okay. 921 | if (count($fields) != count($titlesOnParse)) { 922 | throw new \UnexpectedValueException( 923 | "The specified fields do not match any titles and do not match column count.\n" . 924 | "\$fields was " . print_r($fields, true) . 925 | "\$titlesOnParse was " . print_r($titlesOnParse, true)); 926 | } 927 | 928 | return array_combine($titlesOnParse, $fields); 929 | } 930 | 931 | /** 932 | * Load local file or string. 933 | * 934 | * Only use this function if auto() and parse() don't handle your data well. 935 | * 936 | * This function load_data() is able to handle BOMs and encodings. The data 937 | * is stored within the $this->file_data class field. 938 | * 939 | * @param string|null $input CSV file path or CSV data as a string 940 | * 941 | * Supplying CSV data (file content) here is deprecated. 942 | * For CSV data, please use loadDataString(). 943 | * Support for CSV data will be removed in v2.0.0. 944 | * 945 | * @return bool True on success 946 | * @deprecated Use loadDataString() or loadFile() instead. 947 | */ 948 | public function load_data($input = null) { 949 | return $this->loadFile($input); 950 | } 951 | 952 | /** 953 | * Load a file, but don't parse it. 954 | * 955 | * Only use this function if auto() and parseFile() don't handle your data well. 956 | * 957 | * This function is able to handle BOMs and encodings. The data 958 | * is stored within the $this->file_data class field. 959 | * 960 | * @param string|null $file CSV file path 961 | * 962 | * @return bool True on success 963 | */ 964 | public function loadFile($file = null) { 965 | $data = null; 966 | 967 | if (is_null($file)) { 968 | $data = $this->_rfile($this->file); 969 | } elseif (\strlen($file) <= PHP_MAXPATHLEN && file_exists($file)) { 970 | $data = $this->_rfile($file); 971 | if ($this->file != $file) { 972 | $this->file = $file; 973 | } 974 | } else { 975 | // It is CSV data as a string. 976 | 977 | // WARNING: 978 | // Supplying CSV data to load_data() will no longer 979 | // be supported in a future version of ParseCsv. 980 | // This function will return false for invalid paths from v2.0.0 onwards. 981 | 982 | // Use ->loadDataString() instead. 983 | 984 | $data = $file; 985 | } 986 | 987 | return $this->loadDataString($data); 988 | } 989 | 990 | /** 991 | * Load a data string, but don't parse it. 992 | * 993 | * Only use this function if autoDetectionForDataString() and parse() don't handle your data well. 994 | * 995 | * This function is able to handle BOMs and encodings. The data 996 | * is stored within the $this->file_data class field. 997 | * 998 | * @param string|null $file_path CSV file path 999 | * 1000 | * @return bool True on success 1001 | */ 1002 | public function loadDataString($data) { 1003 | if (!empty($data)) { 1004 | if (strpos($data, "\xef\xbb\xbf") === 0) { 1005 | // strip off BOM (UTF-8) 1006 | $data = substr($data, 3); 1007 | $this->encoding('UTF-8'); 1008 | } elseif (strpos($data, "\xff\xfe") === 0) { 1009 | // strip off BOM (UTF-16 little endian) 1010 | $data = substr($data, 2); 1011 | $this->encoding("UCS-2LE"); 1012 | } elseif (strpos($data, "\xfe\xff") === 0) { 1013 | // strip off BOM (UTF-16 big endian) 1014 | $data = substr($data, 2); 1015 | $this->encoding("UTF-16"); 1016 | } 1017 | 1018 | if ($this->convert_encoding && $this->input_encoding !== $this->output_encoding) { 1019 | /** @noinspection PhpComposerExtensionStubsInspection 1020 | * 1021 | * If you receive an error at the following 3 lines, you must enable 1022 | * the following PHP extension: 1023 | * 1024 | * - if $use_mb_convert_encoding is true: mbstring 1025 | * - if $use_mb_convert_encoding is false: iconv 1026 | */ 1027 | $data = $this->use_mb_convert_encoding ? 1028 | mb_convert_encoding($data, $this->output_encoding, $this->input_encoding) : 1029 | iconv($this->input_encoding, $this->output_encoding, $data); 1030 | } 1031 | 1032 | if (substr($data, -1) != "\n") { 1033 | $data .= "\n"; 1034 | } 1035 | 1036 | $this->file_data = &$data; 1037 | return true; 1038 | } 1039 | 1040 | return false; 1041 | } 1042 | 1043 | // ============================================== 1044 | // ----- [ Internal Functions ] ----------------- 1045 | // ============================================== 1046 | 1047 | /** 1048 | * Validate a row against specified conditions 1049 | * 1050 | * @param array $row array with values from a row 1051 | * @param string|null $conditions specified conditions that the row must match 1052 | * 1053 | * @return bool 1054 | */ 1055 | protected function _validate_row_conditions($row = array(), $conditions = null) { 1056 | if (!empty($row)) { 1057 | if (!empty($conditions)) { 1058 | $condition_array = (strpos($conditions, ' OR ') !== false) ? 1059 | explode(' OR ', $conditions) : 1060 | array($conditions); 1061 | $or = ''; 1062 | foreach ($condition_array as $key => $value) { 1063 | if (strpos($value, ' AND ') !== false) { 1064 | $value = explode(' AND ', $value); 1065 | $and = ''; 1066 | 1067 | foreach ($value as $k => $v) { 1068 | $and .= $this->_validate_row_condition($row, $v); 1069 | } 1070 | 1071 | $or .= (strpos($and, '0') !== false) ? '0' : '1'; 1072 | } else { 1073 | $or .= $this->_validate_row_condition($row, $value); 1074 | } 1075 | } 1076 | 1077 | return strpos($or, '1') !== false; 1078 | } 1079 | 1080 | return true; 1081 | } 1082 | 1083 | return false; 1084 | } 1085 | 1086 | /** 1087 | * Validate a row against a single condition 1088 | * 1089 | * @param array $row array with values from a row 1090 | * @param string $condition specified condition that the row must match 1091 | * 1092 | * @return string single 0 or 1 1093 | */ 1094 | protected function _validate_row_condition($row, $condition) { 1095 | $operators = array( 1096 | '=', 1097 | 'equals', 1098 | 'is', 1099 | '!=', 1100 | 'is not', 1101 | '<', 1102 | 'is less than', 1103 | '>', 1104 | 'is greater than', 1105 | '<=', 1106 | 'is less than or equals', 1107 | '>=', 1108 | 'is greater than or equals', 1109 | 'contains', 1110 | 'does not contain', 1111 | 'is number', 1112 | 'is not number', 1113 | ); 1114 | 1115 | $operators_regex = array(); 1116 | 1117 | foreach ($operators as $value) { 1118 | $operators_regex[] = preg_quote($value, '/'); 1119 | } 1120 | 1121 | $operators_regex = implode('|', $operators_regex); 1122 | 1123 | if (preg_match('/^(.+) (' . $operators_regex . ') (.+)$/i', trim($condition), $capture)) { 1124 | $field = $capture[1]; 1125 | $op = strtolower($capture[2]); 1126 | $value = $capture[3]; 1127 | if ($op == 'equals' && preg_match('/^(.+) is (less|greater) than or$/i', $field, $m)) { 1128 | $field = $m[1]; 1129 | $op = strtolower($m[2]) == 'less' ? '<=' : '>='; 1130 | } 1131 | if ($op == 'is' && preg_match('/^(less|greater) than (.+)$/i', $value, $m)) { 1132 | $value = $m[2]; 1133 | $op = strtolower($m[1]) == 'less' ? '<' : '>'; 1134 | } 1135 | if ($op == 'is' && preg_match('/^not (.+)$/i', $value, $m)) { 1136 | $value = $m[1]; 1137 | $op = '!='; 1138 | } 1139 | 1140 | if (preg_match('/^([\'"])(.*)([\'"])$/', $value, $capture) && $capture[1] == $capture[3]) { 1141 | $value = strtr($capture[2], array( 1142 | "\\n" => "\n", 1143 | "\\r" => "\r", 1144 | "\\t" => "\t", 1145 | )); 1146 | 1147 | $value = stripslashes($value); 1148 | } 1149 | 1150 | if (array_key_exists($field, $row)) { 1151 | $op_equals = in_array($op, ['=', 'equals', 'is'], true); 1152 | if ($op_equals && $row[$field] == $value) { 1153 | return '1'; 1154 | } elseif ($op_equals && $value == 'number' && is_numeric($row[$field])) { 1155 | return '1'; 1156 | } elseif (($op == '!=' || $op == 'is not') && $value == 'number' && !is_numeric($row[$field])) { 1157 | return '1'; 1158 | } elseif (($op == '!=' || $op == 'is not') && $row[$field] != $value) { 1159 | return '1'; 1160 | } elseif (($op == '<' || $op == 'is less than') && $row[$field] < $value) { 1161 | return '1'; 1162 | } elseif (($op == '>' || $op == 'is greater than') && $row[$field] > $value) { 1163 | return '1'; 1164 | } elseif (($op == '<=' || $op == 'is less than or equals') && $row[$field] <= $value) { 1165 | return '1'; 1166 | } elseif (($op == '>=' || $op == 'is greater than or equals') && $row[$field] >= $value) { 1167 | return '1'; 1168 | } elseif ($op == 'contains' && preg_match('/' . preg_quote($value, '/') . '/i', $row[$field])) { 1169 | return '1'; 1170 | } elseif ($op == 'does not contain' && !preg_match('/' . preg_quote($value, '/') . '/i', $row[$field])) { 1171 | return '1'; 1172 | } else { 1173 | return '0'; 1174 | } 1175 | } 1176 | } 1177 | 1178 | return '1'; 1179 | } 1180 | 1181 | /** 1182 | * Validates if the row is within the offset or not if sorting is disabled 1183 | * 1184 | * @param int $current_row the current row number being processed 1185 | * 1186 | * @return bool 1187 | */ 1188 | protected function _validate_offset($current_row) { 1189 | return 1190 | $this->sort_by !== null || 1191 | $this->offset === null || 1192 | $current_row >= $this->offset || 1193 | ($this->heading && $current_row == 0); 1194 | } 1195 | 1196 | /** 1197 | * Enclose values if needed 1198 | * - only used by unparse() 1199 | * 1200 | * @param string|null $value Cell value to process 1201 | * @param string $delimiter Character to put between cells on the same row 1202 | * 1203 | * @return string Processed value 1204 | */ 1205 | protected function _enclose_value($value, $delimiter) { 1206 | if ($value !== null && $value != '') { 1207 | $delimiter_quoted = $delimiter ? 1208 | preg_quote($delimiter, '/') . "|" 1209 | : ''; 1210 | $enclosure_quoted = preg_quote($this->enclosure, '/'); 1211 | $pattern = "/" . $delimiter_quoted . $enclosure_quoted . "|\n|\r/i"; 1212 | if ($this->enclose_all || preg_match($pattern, $value) || strpos($value, ' ') === 0 || substr($value, -1) == ' ') { 1213 | $value = str_replace($this->enclosure, $this->enclosure . $this->enclosure, $value); 1214 | $value = $this->enclosure . $value . $this->enclosure; 1215 | } 1216 | } 1217 | 1218 | return $value; 1219 | } 1220 | 1221 | /** 1222 | * Check file data 1223 | * 1224 | * @param string|null $file local filename 1225 | * 1226 | * @return bool 1227 | */ 1228 | protected function _check_data($file = null) { 1229 | if (empty($this->file_data)) { 1230 | if (is_null($file)) { 1231 | $file = $this->file; 1232 | } 1233 | 1234 | return $this->loadFile($file); 1235 | } 1236 | 1237 | return true; 1238 | } 1239 | 1240 | /** 1241 | * Check if passed info might be delimiter. 1242 | * Only used by find_delimiter 1243 | * 1244 | * @param string $char Potential field separating character 1245 | * @param array $array Frequency 1246 | * @param int $depth Number of analyzed rows 1247 | * @param string $preferred Preferred delimiter characters 1248 | * 1249 | * @return string|false special string used for delimiter selection, or false 1250 | */ 1251 | protected function _check_count($char, $array, $depth, $preferred) { 1252 | if ($depth === count($array)) { 1253 | $first = null; 1254 | $equal = null; 1255 | $almost = false; 1256 | foreach ($array as $value) { 1257 | if ($first == null) { 1258 | $first = $value; 1259 | } elseif ($value == $first && $equal !== false) { 1260 | $equal = true; 1261 | } elseif ($value == $first + 1 && $equal !== false) { 1262 | $equal = true; 1263 | $almost = true; 1264 | } else { 1265 | $equal = false; 1266 | } 1267 | } 1268 | 1269 | if ($equal || $depth === 1) { 1270 | $match = $almost ? 2 : 1; 1271 | $pref = strpos($preferred, $char); 1272 | $pref = ($pref !== false) ? str_pad($pref, 3, '0', STR_PAD_LEFT) : '999'; 1273 | 1274 | return $pref . $match . '.' . (99999 - str_pad($first, 5, '0', STR_PAD_LEFT)); 1275 | } else { 1276 | return false; 1277 | } 1278 | } 1279 | return false; 1280 | } 1281 | 1282 | /** 1283 | * Read local file. 1284 | * 1285 | * @param string $filePath local filename 1286 | * 1287 | * @return string|false Data from file, or false on failure 1288 | */ 1289 | protected function _rfile($filePath) { 1290 | if (is_readable($filePath)) { 1291 | $data = file_get_contents($filePath); 1292 | if ($data === false) { 1293 | return false; 1294 | } 1295 | 1296 | if (preg_match('/\.php$/i', $filePath) && preg_match('/<\?.*?\?>(.*)/ms', $data, $strip)) { 1297 | // Return section behind closing tags. 1298 | // This parsing is deprecated and will be removed in v2.0.0. 1299 | $data = ltrim($strip[1]); 1300 | } 1301 | 1302 | return rtrim($data, "\r\n"); 1303 | } 1304 | 1305 | return false; 1306 | } 1307 | 1308 | /** 1309 | * Write to local file 1310 | * 1311 | * @param string $file local filename 1312 | * @param string $content data to write to file 1313 | * @param string $mode fopen() mode 1314 | * @param int $lock flock() mode 1315 | * 1316 | * @return bool 1317 | * True on success 1318 | * 1319 | */ 1320 | protected function _wfile($file, $content = '', $mode = 'wb', $lock = LOCK_EX) { 1321 | if ($fp = fopen($file, $mode)) { 1322 | flock($fp, $lock); 1323 | $re = fwrite($fp, $content); 1324 | $re2 = fclose($fp); 1325 | 1326 | if ($re !== false && $re2 !== false) { 1327 | return true; 1328 | } 1329 | } 1330 | 1331 | return false; 1332 | } 1333 | 1334 | /** 1335 | * Detect separator using a nonstandard hack: such file starts with the 1336 | * first line containing only "sep=;", where the last character is the 1337 | * separator. Microsoft Excel is able to open such files. 1338 | * 1339 | * @param string $data file data 1340 | * 1341 | * @return string|false detected delimiter, or false if none found 1342 | */ 1343 | protected function _get_delimiter_from_sep_row($data) { 1344 | $sep = false; 1345 | // 32 bytes should be quite enough data for our sniffing, chosen arbitrarily 1346 | $sepPrefix = substr($data, 0, 32); 1347 | if (preg_match('/^sep=(.)\\r?\\n/i', $sepPrefix, $sepMatch)) { 1348 | // we get separator. 1349 | $sep = $sepMatch[1]; 1350 | } 1351 | return $sep; 1352 | } 1353 | 1354 | /** 1355 | * Support for Excel-compatible sep=? row. 1356 | * 1357 | * @param string $data_string file data to be updated 1358 | * 1359 | * @return bool TRUE if sep= line was found at the very beginning of the file 1360 | */ 1361 | protected function _detect_and_remove_sep_row_from_data(&$data_string) { 1362 | $sep = $this->_get_delimiter_from_sep_row($data_string); 1363 | if ($sep === false) { 1364 | return false; 1365 | } 1366 | 1367 | $this->delimiter = $sep; 1368 | 1369 | // likely to be 5, but let's not assume we're always single-byte. 1370 | $pos = 4 + strlen($sep); 1371 | // the next characters should be a line-end 1372 | if (substr($data_string, $pos, 1) === "\r") { 1373 | $pos++; 1374 | } 1375 | if (substr($data_string, $pos, 1) === "\n") { 1376 | $pos++; 1377 | } 1378 | 1379 | // remove delimiter and its line-end (the data param is by-ref!) 1380 | $data_string = substr($data_string, $pos); 1381 | return true; 1382 | } 1383 | 1384 | /** 1385 | * @param int $search_depth Number of rows to analyze 1386 | * @param string $preferred Preferred delimiter characters 1387 | * @param string $enclosure Enclosure character, default is double quote 1388 | * @param string $data The file content 1389 | */ 1390 | protected function _guess_delimiter($search_depth, $preferred, $enclosure, $data) { 1391 | $chars = []; 1392 | $strlen = strlen($data); 1393 | $enclosed = false; 1394 | $current_row = 1; 1395 | $to_end = true; 1396 | 1397 | // The dash is the only character we don't want quoted, as it would 1398 | // prevent character ranges within $auto_non_chars: 1399 | $quoted_auto_non_chars = preg_quote($this->auto_non_chars, '/'); 1400 | $quoted_auto_non_chars = str_replace('\-', '-', $quoted_auto_non_chars); 1401 | $pattern = '/[' . $quoted_auto_non_chars . ']/i'; 1402 | 1403 | // walk specific depth finding possible delimiter characters 1404 | for ($i = 0; $i < $strlen; $i++) { 1405 | $ch = $data[$i]; 1406 | $nch = isset($data[$i + 1]) ? $data[$i + 1] : false; 1407 | $pch = isset($data[$i - 1]) ? $data[$i - 1] : false; 1408 | 1409 | // open and closing quotes 1410 | $is_newline = ($ch == "\n" && $pch != "\r") || $ch == "\r"; 1411 | if ($ch == $enclosure) { 1412 | if (!$enclosed || $nch != $enclosure) { 1413 | $enclosed = !$enclosed; 1414 | } elseif ($enclosed) { 1415 | $i++; 1416 | } 1417 | 1418 | // end of row 1419 | } elseif ($is_newline && !$enclosed) { 1420 | if ($current_row >= $search_depth) { 1421 | $strlen = 0; 1422 | $to_end = false; 1423 | } else { 1424 | $current_row++; 1425 | } 1426 | 1427 | // count character 1428 | } elseif (!$enclosed) { 1429 | if (!preg_match($pattern, $ch)) { 1430 | if (!isset($chars[$ch][$current_row])) { 1431 | $chars[$ch][$current_row] = 1; 1432 | } else { 1433 | $chars[$ch][$current_row]++; 1434 | } 1435 | } 1436 | } 1437 | } 1438 | 1439 | // filtering 1440 | $depth = $to_end ? $current_row - 1 : $current_row; 1441 | $filtered = []; 1442 | foreach ($chars as $char => $value) { 1443 | if ($match = $this->_check_count($char, $value, $depth, $preferred)) { 1444 | $filtered[$match] = $char; 1445 | } 1446 | } 1447 | 1448 | // capture most probable delimiter 1449 | ksort($filtered); 1450 | $this->delimiter = reset($filtered); 1451 | } 1452 | 1453 | /** 1454 | * getCollection 1455 | * Returns a Illuminate/Collection object 1456 | * This may prove to be helpful to people who want to 1457 | * create macros, and or use map functions 1458 | * 1459 | * @access public 1460 | * @link https://laravel.com/docs/5.6/collections 1461 | * 1462 | * @throws \ErrorException - If the Illuminate\Support\Collection class is not found 1463 | * 1464 | * @return Collection 1465 | */ 1466 | public function getCollection() { 1467 | //does the Illuminate\Support\Collection class exists? 1468 | //this uses the autoloader to try to determine 1469 | //@see http://php.net/manual/en/function.class-exists.php 1470 | if (class_exists('Illuminate\Support\Collection', true) == false) { 1471 | throw new \ErrorException('It would appear you have not installed the illuminate/support package!'); 1472 | } 1473 | 1474 | //return the collection 1475 | return new Collection($this->data); 1476 | } 1477 | } 1478 | --------------------------------------------------------------------------------