├── .classpath ├── .gitignore ├── .project ├── .settings ├── org.eclipse.core.resources.prefs ├── org.eclipse.jdt.core.prefs └── org.eclipse.m2e.core.prefs ├── .travis.yml ├── LICENSE-2.0.html ├── README.md ├── pom.xml └── src └── main ├── java └── com │ └── univocity │ └── articles │ └── csvcomparison │ ├── CorrectnessComparison.java │ ├── HugeFileGenerator.java │ ├── PerformanceComparison.java │ ├── parser │ ├── AbstractParser.java │ ├── BeanIoParser.java │ ├── CSVeedParser.java │ ├── CommonsCsvParser.java │ ├── DataPipelineCsvParser.java │ ├── EsperioCsvParser.java │ ├── FlatpackParser.java │ ├── GenJavaParser.java │ ├── JCsvParser.java │ ├── JacksonParser.java │ ├── JavaCsvParser.java │ ├── OpenCsvParser.java │ ├── OsterMillerParser.java │ ├── Parsers.java │ ├── ParsersRegistry.java │ ├── ProductCollectionsParser.java │ ├── SimpleCsvParser.java │ ├── SimpleFlatMapperParser.java │ ├── SuperCsvParser.java │ ├── UnivocityParser.java │ └── WayIoParser.java │ └── parser8 │ ├── DecsParser.java │ └── Parsers.java ├── patches ├── esperio-csv-6.x.patch └── flatpack-4.x.patch └── resources ├── .gitignore ├── correctness.csv └── logback.xml /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | .idea/ 3 | *.iml 4 | *.ipr 5 | *.iws 6 | **/worldcitiespop.txt 7 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | csv-parsers-comparison 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//src/main/java=UTF-8 3 | encoding//src/main/resources=UTF-8 4 | encoding/=UTF-8 5 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 3 | org.eclipse.jdt.core.compiler.compliance=1.6 4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 5 | org.eclipse.jdt.core.compiler.source=1.6 6 | -------------------------------------------------------------------------------- /.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: java 3 | addons: 4 | apt: 5 | packages: 6 | - openjdk-6-jdk 7 | jdk: 8 | - openjdk6 9 | - openjdk7 10 | - openjdk8 11 | - oraclejdk8 12 | -------------------------------------------------------------------------------- /LICENSE-2.0.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Apache License, Version 2.0 - The Apache Software Foundation 8 | 9 | 10 |

11 | Apache License
12 | Version 2.0, January 2004
13 | http://www.apache.org/licenses/ 14 |

15 |

16 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 17 |

18 |

1. Definitions.

19 |

20 | "License" shall mean the terms and conditions for use, reproduction, 21 | and distribution as defined by Sections 1 through 9 of this document. 22 |

23 |

24 | "Licensor" shall mean the copyright owner or entity authorized by 25 | the copyright owner that is granting the License. 26 |

27 |

28 | "Legal Entity" shall mean the union of the acting entity and all 29 | other entities that control, are controlled by, or are under common 30 | control with that entity. For the purposes of this definition, 31 | "control" means (i) the power, direct or indirect, to cause the 32 | direction or management of such entity, whether by contract or 33 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 34 | outstanding shares, or (iii) beneficial ownership of such entity. 35 |

36 |

37 | "You" (or "Your") shall mean an individual or Legal Entity 38 | exercising permissions granted by this License. 39 |

40 |

41 | "Source" form shall mean the preferred form for making modifications, 42 | including but not limited to software source code, documentation 43 | source, and configuration files. 44 |

45 |

46 | "Object" form shall mean any form resulting from mechanical 47 | transformation or translation of a Source form, including but 48 | not limited to compiled object code, generated documentation, 49 | and conversions to other media types. 50 |

51 |

52 | "Work" shall mean the work of authorship, whether in Source or 53 | Object form, made available under the License, as indicated by a 54 | copyright notice that is included in or attached to the work 55 | (an example is provided in the Appendix below). 56 |

57 |

58 | "Derivative Works" shall mean any work, whether in Source or Object 59 | form, that is based on (or derived from) the Work and for which the 60 | editorial revisions, annotations, elaborations, or other modifications 61 | represent, as a whole, an original work of authorship. For the purposes 62 | of this License, Derivative Works shall not include works that remain 63 | separable from, or merely link (or bind by name) to the interfaces of, 64 | the Work and Derivative Works thereof. 65 |

66 |

67 | "Contribution" shall mean any work of authorship, including 68 | the original version of the Work and any modifications or additions 69 | to that Work or Derivative Works thereof, that is intentionally 70 | submitted to Licensor for inclusion in the Work by the copyright owner 71 | or by an individual or Legal Entity authorized to submit on behalf of 72 | the copyright owner. For the purposes of this definition, "submitted" 73 | means any form of electronic, verbal, or written communication sent 74 | to the Licensor or its representatives, including but not limited to 75 | communication on electronic mailing lists, source code control systems, 76 | and issue tracking systems that are managed by, or on behalf of, the 77 | Licensor for the purpose of discussing and improving the Work, but 78 | excluding communication that is conspicuously marked or otherwise 79 | designated in writing by the copyright owner as "Not a Contribution." 80 |

81 |

82 | "Contributor" shall mean Licensor and any individual or Legal Entity 83 | on behalf of whom a Contribution has been received by Licensor and 84 | subsequently incorporated within the Work. 85 |

86 |

2. Grant of Copyright License. 87 | Subject to the terms and conditions of 88 | this License, each Contributor hereby grants to You a perpetual, 89 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 90 | copyright license to reproduce, prepare Derivative Works of, 91 | publicly display, publicly perform, sublicense, and distribute the 92 | Work and such Derivative Works in Source or Object form. 93 |

94 |

3. Grant of Patent License. 95 | Subject to the terms and conditions of 96 | this License, each Contributor hereby grants to You a perpetual, 97 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 98 | (except as stated in this section) patent license to make, have made, 99 | use, offer to sell, sell, import, and otherwise transfer the Work, 100 | where such license applies only to those patent claims licensable 101 | by such Contributor that are necessarily infringed by their 102 | Contribution(s) alone or by combination of their Contribution(s) 103 | with the Work to which such Contribution(s) was submitted. If You 104 | institute patent litigation against any entity (including a 105 | cross-claim or counterclaim in a lawsuit) alleging that the Work 106 | or a Contribution incorporated within the Work constitutes direct 107 | or contributory patent infringement, then any patent licenses 108 | granted to You under this License for that Work shall terminate 109 | as of the date such litigation is filed. 110 |

111 |

4. Redistribution. 112 | You may reproduce and distribute copies of the 113 | Work or Derivative Works thereof in any medium, with or without 114 | modifications, and in Source or Object form, provided that You 115 | meet the following conditions: 116 |

    117 |
  1. You must give any other recipients of the Work or 118 | Derivative Works a copy of this License; and 119 |

  2. 120 | 121 |
  3. You must cause any modified files to carry prominent notices 122 | stating that You changed the files; and 123 |

  4. 124 | 125 |
  5. You must retain, in the Source form of any Derivative Works 126 | that You distribute, all copyright, patent, trademark, and 127 | attribution notices from the Source form of the Work, 128 | excluding those notices that do not pertain to any part of 129 | the Derivative Works; and 130 |

  6. 131 | 132 |
  7. If the Work includes a "NOTICE" text file as part of its 133 | distribution, then any Derivative Works that You distribute must 134 | include a readable copy of the attribution notices contained 135 | within such NOTICE file, excluding those notices that do not 136 | pertain to any part of the Derivative Works, in at least one 137 | of the following places: within a NOTICE text file distributed 138 | as part of the Derivative Works; within the Source form or 139 | documentation, if provided along with the Derivative Works; or, 140 | within a display generated by the Derivative Works, if and 141 | wherever such third-party notices normally appear. The contents 142 | of the NOTICE file are for informational purposes only and 143 | do not modify the License. You may add Your own attribution 144 | notices within Derivative Works that You distribute, alongside 145 | or as an addendum to the NOTICE text from the Work, provided 146 | that such additional attribution notices cannot be construed 147 | as modifying the License.
  8. 148 |
149 | You may add Your own copyright statement to Your modifications and 150 | may provide additional or different license terms and conditions 151 | for use, reproduction, or distribution of Your modifications, or 152 | for any such Derivative Works as a whole, provided Your use, 153 | reproduction, and distribution of the Work otherwise complies with 154 | the conditions stated in this License. 155 | 156 |

5. Submission of Contributions. 157 | Unless You explicitly state otherwise, 158 | any Contribution intentionally submitted for inclusion in the Work 159 | by You to the Licensor shall be under the terms and conditions of 160 | this License, without any additional terms or conditions. 161 | Notwithstanding the above, nothing herein shall supersede or modify 162 | the terms of any separate license agreement you may have executed 163 | with Licensor regarding such Contributions. 164 |

165 |

6. Trademarks. 166 | This License does not grant permission to use the trade 167 | names, trademarks, service marks, or product names of the Licensor, 168 | except as required for reasonable and customary use in describing the 169 | origin of the Work and reproducing the content of the NOTICE file. 170 |

171 |

7. Disclaimer of Warranty. 172 | Unless required by applicable law or 173 | agreed to in writing, Licensor provides the Work (and each 174 | Contributor provides its Contributions) on an "AS IS" BASIS, 175 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 176 | implied, including, without limitation, any warranties or conditions 177 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 178 | PARTICULAR PURPOSE. You are solely responsible for determining the 179 | appropriateness of using or redistributing the Work and assume any 180 | risks associated with Your exercise of permissions under this License. 181 |

182 |

8. Limitation of Liability. 183 | In no event and under no legal theory, 184 | whether in tort (including negligence), contract, or otherwise, 185 | unless required by applicable law (such as deliberate and grossly 186 | negligent acts) or agreed to in writing, shall any Contributor be 187 | liable to You for damages, including any direct, indirect, special, 188 | incidental, or consequential damages of any character arising as a 189 | result of this License or out of the use or inability to use the 190 | Work (including but not limited to damages for loss of goodwill, 191 | work stoppage, computer failure or malfunction, or any and all 192 | other commercial damages or losses), even if such Contributor 193 | has been advised of the possibility of such damages. 194 |

195 |

9. Accepting Warranty or Additional Liability. 196 | While redistributing 197 | the Work or Derivative Works thereof, You may choose to offer, 198 | and charge a fee for, acceptance of support, warranty, indemnity, 199 | or other liability obligations and/or rights consistent with this 200 | License. However, in accepting such obligations, You may act only 201 | on Your own behalf and on Your sole responsibility, not on behalf 202 | of any other Contributor, and only if You agree to indemnify, 203 | defend, and hold each Contributor harmless for any liability 204 | incurred by, or claims asserted against, such Contributor by reason 205 | of your accepting any such warranty or additional liability. 206 |

207 |

208 | END OF TERMS AND CONDITIONS 209 |

210 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # csv-parsers-comparison 2 | 3 | This project aims to compare all CSV parsers for Java in existence, or at least the ones that seem to work and are 4 | available to the general public. There are too many and the intention here is to help you decide which one is the best 5 | for you. Commercial parsers are welcome in the test. Please send us the details of your commercial parser and we will 6 | include the results. 7 | 8 | Currently, we are only testing parsing performance. As the input file, we will be using the 9 | famous [worldcitiespop.txt](http://www.maxmind.com/download/worldcities/worldcitiespop.txt.gz), which is made available 10 | for free by [Maxmind](http://www.maxmind.com). It contains more than 3 million rows, which should be sufficient for our test. 11 | 12 | ## Building and Running 13 | 14 | Prerequisites: Git, wget, gunzip, GNU Patch, Apache Maven 3, and Java 1.6+. 15 | The appropriate CSV parser library version will be chosen depending on your JDK version (i.e. 1.6, 1.7, or 1.8). 16 | 17 | If you wish to reproduce our performance results: 18 | 19 | ```bash 20 | $ git clone https://github.com/uniVocity/csv-parsers-comparison.git 21 | $ cd csv-parsers-comparison 22 | $ wget http://www.maxmind.com/download/worldcities/worldcitiespop.txt.gz 23 | $ gunzip worldcitiespop.txt.gz 24 | $ git checkout src/ 25 | $ mvn clean package 26 | $ java -jar target/csv-parsers-comparison-1.0-uber.jar . 27 | ``` 28 | 29 | NOTE: the `.` at the end of the last command, this tells Java the folder containing the `worldcitiespop.txt`. You can 30 | alternatively specify a path to any folder that contains a `worldcitiespop.txt` file. 31 | 32 | If you just want to run from the main() method, download the [worldcitiespop.txt](http://www.maxmind.com/download/worldcities/worldcitiespop.txt.gz) and place 33 | it under `src/main/resources/` before executing the main class [PerformanceComparison.java](./src/main/java/com/univocity/articles/csvcomparison/PerformanceComparison.java). 34 | 35 | Our test is very simple and involves just counting the number of rows read from the input file. The implementation using 36 | each parser is [here](./src/main/java/com/univocity/articles/csvcomparison/parser). 37 | 38 | ### Important 39 | The input file is **not** [RFC 4180](https://www.rfc-editor.org/rfc/rfc4180.txt) compliant. We generate a compliant 40 | version using the [HugeFileGenerator](./src/main/java/com/univocity/articles/csvcomparison/HugeFileGenerator.java) 41 | class to test the parsers against a generated file with the same data, but enclosed within quotes and properly escaped. 42 | 43 | It's important to notice that there's no such thing as a CSV standard and we do not recommend you to use parsers that 44 | follow the RFC strictly, as they will blow up in face of non-compliant inputs. The reality is: your parser must be 45 | ready to process crooked data instead of going belly up. In the end, your client is the one who tells you what you 46 | must swallow, and in many circumstances it's not up to you to decide how your data is going to be generated. 47 | 48 | We generate a RFC compliant version to give those sensitive parsers a chance to see how they perform. 49 | Once again, we consider their usage risky. 50 | 51 | ## CSV Parsers 52 | 53 | This is the list of all parsers currently tested. 54 | 55 | | Parser | Version | Website | 56 | |------------------------------|------------------:|----------------------------------------------------------------------------------------------------| 57 | | uniVocity-parsers' CsvParser | 2.6.0 | [www.univocity.com](http://www.univocity.com) | 58 | | CSVeed | 0.5.0 | [csveed.org](http://csveed.org) | 59 | | Apache Commons CSV | 1.4/1.5 | [commons.apache.org/proper/commons-csv] (http://commons.apache.org/proper/commons-csv) | 60 | | OpenCSV | 4.1 | [opencsv.sourceforge.net](http://opencsv.sourceforge.net/) | 61 | | SuperCSV | 2.4.0 | [supercsv.sourceforge.net](http://supercsv.sourceforge.net/) | 62 | | JavaCSV | 2.0 | [sourceforge.net/projects/javacsv](http://sourceforge.net/projects/javacsv) | 63 | | jCSV | 1.4.0 | [code.google.com/p/jcsv](https://code.google.com/p/jcsv/) | 64 | | flatpack | 3.4.2/4.0.1 | [flatpack.sourceforge.net](http://flatpack.sourceforge.net/) | 65 | | SimpleCSV | 2.1 | [github.com/quux00/simplecsv](https://github.com/quux00/simplecsv) | 66 | | gj-csv | 1.0 | ? | 67 | | esperio-csv | 5.5.0/7.0.0 | [www.espertech.com](http://www.espertech.com/) | 68 | | way-io | 1.25.0/2.1.0 | [www.objectos.com.br](http://www.objectos.com.br/) | 69 | | beanIO | 2.1.0 | [beanio.org](http://beanio.org/) | 70 | | jackson-dataformat-csv | 2.6.7/2.7.9/2.9.4 | [github.com/FasterXML/jackson-dataformat-csv](http://github.com/FasterXML/jackson-dataformat-csv) | 71 | | OsterMiller Utils | 1.07.00 | [ostermiller.org/utils/CSV.html](http://ostermiller.org/utils/CSV.html) | 72 | | SimpleFlatMapper CSV parser | 3.15.9 | [github.com/arnaudroger/SimpleFlatMapper](https://github.com/arnaudroger/SimpleFlatMapper) | 73 | | Diergo Easy CSV Streamable | 3.1.0 | [github.com/aburmeis/decs](https://github.com/aburmeis/decs) | 74 | | Product Collections | 1.4.5 | [github.com/marklister/product-collections](https://github.com/marklister/product-collections) | 75 | 76 | ## Statistics (updated 28th of February, 2018) 77 | 78 | Results will vary depending on your setup and hardware, here is mine: 79 | 80 | * CPU: AMD Ryzen 7 1700 Eight-Core Processor @ 4.0 GHz 81 | * RAM: 32 GB 82 | * Storage: 1TB SSD drive 83 | * OS: Arch Linux 64-bit 84 | * JDK: 9.0.4 64-bit (Linux) 85 | * JDK: 1.8.0_144 64-bit (Linux) 86 | * JDK: 1.7.0_80 64-bit (Linux) 87 | * JDK: 1.6.0_45 64-bit (Linux) 88 | 89 | *Note* [uniVocity-parsers](http://github.com/uniVocity/univocity-parsers/) provides an option to select the fields you 90 | are interested in, and our parsers will execute faster by not processing values that are not selected. It makes quite 91 | a difference in performance but we removed this test as the other parsers don't have a similar feature. 92 | 93 | ### Processing 3,173,958 rows of non [RFC 4180](https://www.rfc-editor.org/rfc/rfc4180.txt) compliant input. No quoted values. 94 | 95 | ## JDK 9 96 | | Parser | Average time | % Slower than best | Best time | Worst time | 97 | |--------------------------------------------|-------------------:|-------------------:|----------:|-----------:| 98 | | uniVocity CSV parser | 739 ms | Best time! | 707 ms | 768 ms | 99 | | SimpleFlatMapper CSV parser | 861 ms | 16% | 848 ms | 901 ms | 100 | | Jackson CSV parser | 1212 ms | 64% | 1169 ms | 1238 ms | 101 | | Product Collections parser | 1409 ms | 90% | 1389 ms | 1451 ms | 102 | | Java CSV Parser | 1498 ms | 102% | 1490 ms | 1508 ms | 103 | | JCSV Parser | 1681 ms | 127% | 1660 ms | 1710 ms | 104 | | Oster Miller CSV parser | 1772 ms | 139% | 1762 ms | 1780 ms | 105 | | Gen-Java CSV | 1799 ms | 143% | 1790 ms | 1805 ms | 106 | | Simple CSV parser | 1861 ms | 151% | 1832 ms | 1900 ms | 107 | | SuperCSV | 1893 ms | 156% | 1858 ms | 1964 ms | 108 | | OpenCSV | 2022 ms | 173% | 2007 ms | 2037 ms | 109 | | Apache Commons CSV | 2424 ms | 228% | 2409 ms | 2442 ms | 110 | | Way IO Parser | 2577 ms | 248% | 2532 ms | 2638 ms | 111 | 112 | ## JDK 8 113 | | Parser | Average time | % Slower than best | Best time | Worst time | 114 | |--------------------------------------------|-------------------:|-------------------:|----------:|-----------:| 115 | | uniVocity CSV parser | 723 ms | Best time! | 716 ms | 736 ms | 116 | | SimpleFlatMapper CSV parser | 769 ms | 6% | 761 ms | 778 ms | 117 | | Jackson CSV parser | 924 ms | 27% | 914 ms | 944 ms | 118 | | Diergo Easy CSV Streamable | 1186 ms | 64% | 1173 ms | 1217 ms | 119 | | Simple CSV parser | 1273 ms | 76% | 1255 ms | 1297 ms | 120 | | Product Collections parser | 1328 ms | 83% | 1314 ms | 1354 ms | 121 | | JCSV Parser | 1440 ms | 99% | 1432 ms | 1459 ms | 122 | | SuperCSV | 1497 ms | 107% | 1478 ms | 1511 ms | 123 | | Java CSV Parser | 1512 ms | 109% | 1495 ms | 1548 ms | 124 | | Gen-Java CSV | 1517 ms | 109% | 1440 ms | 1541 ms | 125 | | Oster Miller CSV parser | 1639 ms | 126% | 1634 ms | 1649 ms | 126 | | OpenCSV | 1687 ms | 133% | 1676 ms | 1702 ms | 127 | | Apache Commons CSV | 2197 ms | 203% | 2187 ms | 2208 ms | 128 | | Way IO Parser | 2318 ms | 220% | 2260 ms | 2368 ms | 129 | 130 | ## JDK 7 131 | | Parser | Average time | % Slower than best | Best time | Worst time | 132 | |--------------------------------------------|-------------------:|-------------------:|----------:|-----------:| 133 | | uniVocity CSV parser | 771 ms | Best time! | 742 ms | 827 ms | 134 | | SimpleFlatMapper CSV parser | 888 ms | 15% | 881 ms | 899 ms | 135 | | Jackson CSV parser | 1015 ms | 31% | 986 ms | 1055 ms | 136 | | JCSV Parser | 1371 ms | 77% | 1362 ms | 1376 ms | 137 | | Product Collections parser | 1405 ms | 82% | 1376 ms | 1482 ms | 138 | | Simple CSV parser | 1450 ms | 88% | 1362 ms | 1594 ms | 139 | | Java CSV Parser | 1477 ms | 91% | 1464 ms | 1518 ms | 140 | | SuperCSV | 1500 ms | 94% | 1477 ms | 1520 ms | 141 | | OpenCSV | 1529 ms | 98% | 1518 ms | 1545 ms | 142 | | Oster Miller CSV parser | 1615 ms | 109% | 1594 ms | 1638 ms | 143 | | Gen-Java CSV | 2096 ms | 171% | 2059 ms | 2195 ms | 144 | | Way IO Parser | 2118 ms | 174% | 2102 ms | 2139 ms | 145 | | Apache Commons CSV | 2204 ms | 185% | 2194 ms | 2226 ms | 146 | 147 | 148 | ## JDK 6 149 | | Parser | Average time | % Slower than best | Best time | Worst time | 150 | |--------------------------------------------|-------------------:|-------------------:|----------:|-----------:| 151 | | uniVocity CSV parser | 824 ms | Best time! | 811 ms | 844 ms | 152 | | SimpleFlatMapper CSV parser | 879 ms | 6% | 871 ms | 887 ms | 153 | | Jackson CSV parser | 1071 ms | 29% | 1062 ms | 1085 ms | 154 | | Product Collections parser | 1368 ms | 66% | 1360 ms | 1375 ms | 155 | | SuperCSV | 1561 ms | 89% | 1537 ms | 1629 ms | 156 | | OpenCSV | 1579 ms | 91% | 1555 ms | 1604 ms | 157 | | Java CSV Parser | 1605 ms | 94% | 1596 ms | 1617 ms | 158 | | Oster Miller CSV parser | 1744 ms | 111% | 1736 ms | 1757 ms | 159 | | JCSV Parser | 1766 ms | 114% | 1758 ms | 1776 ms | 160 | | Simple CSV parser | 1905 ms | 131% | 1888 ms | 1918 ms | 161 | | Apache Commons CSV | 2105 ms | 155% | 2094 ms | 2123 ms | 162 | | Gen-Java CSV | 2135 ms | 159% | 2085 ms | 2200 ms | 163 | | Way IO Parser | 2148 ms | 160% | 2133 ms | 2161 ms | 164 | 165 | 166 | * `Esperio-csv` and `CSVeed` were unable to process the file and threw exceptions. 167 | * `Flatpack` hanged so I had to remove it from the test [here](./src/main/java/com/univocity/articles/csvcomparison/parser/Parsers.java). 168 | * `BeanIO` threw an exception I could understand and debug. Turns out it is unable to parse fields when the quote 169 | character is part of the value, e.g. `value1, val"ue2, value3 `. 170 | 171 | ### Processing 3,173,958 rows of [RFC 4180](https://www.rfc-editor.org/rfc/rfc4180.txt) compliant input. All values quoted. 172 | 173 | **Note** this input file has all the values enclosed within quotes. We generated the input like this on purpose as 174 | the algorithm to process quotes is a bit different. 175 | 176 | ## JDK 9 177 | | Parser | Average time | % Slower than best | Best time | Worst time | 178 | |--------------------------------------------|-------------------:|-------------------:|-----------:|-----------:| 179 | | uniVocity CSV parser | 982 ms | Best time! | 966 ms | 1002 ms | 180 | | SimpleFlatMapper CSV parser | 1137 ms | 15% | 1125 ms | 1156 ms | 181 | | Jackson CSV parser | 1338 ms | 36% | 1313 ms | 1369 ms | 182 | | Product Collections parser | 1480 ms | 50% | 1467 ms | 1504 ms | 183 | | Java CSV Parser | 1735 ms | 76% | 1718 ms | 1761 ms | 184 | | JCSV Parser | 2043 ms | 108% | 2029 ms | 2055 ms | 185 | | Gen-Java CSV | 2162 ms | 120% | 2134 ms | 2182 ms | 186 | | Oster Miller CSV parser | 2353 ms | 139% | 2335 ms | 2389 ms | 187 | | SuperCSV | 2511 ms | 155% | 2495 ms | 2522 ms | 188 | | Simple CSV parser | 2544 ms | 159% | 2529 ms | 2558 ms | 189 | | OpenCSV | 2665 ms | 171% | 2647 ms | 2686 ms | 190 | | Apache Commons CSV | 2917 ms | 197% | 2851 ms | 3020 ms | 191 | | Way IO Parser | 3243 ms | 230% | 3227 ms | 3252 ms | 192 | | Esperio CSV parser | 3406 ms | 246% | 3349 ms | 3458 ms | 193 | | Bean IO Parser | 3740 ms | 280% | 3681 ms | 3792 ms | 194 | 195 | 196 | ## JDK 8 197 | | Parser | Average time | % Slower than best | Best time | Worst time | 198 | |--------------------------------------------|-------------------:|-------------------:|-----------:|-----------:| 199 | | uniVocity CSV parser | 855 ms | Best time! | 839 ms | 870 ms | 200 | | SimpleFlatMapper CSV parser | 964 ms | 12% | 959 ms | 971 ms | 201 | | Jackson CSV parser | 1023 ms | 19% | 1009 ms | 1058 ms | 202 | | Diergo Easy CSV Streamable | 1385 ms | 61% | 1378 ms | 1394 ms | 203 | | Product Collections parser | 1388 ms | 62% | 1386 ms | 1391 ms | 204 | | Java CSV Parser | 1642 ms | 92% | 1635 ms | 1650 ms | 205 | | JCSV Parser | 1756 ms | 105% | 1739 ms | 1765 ms | 206 | | Simple CSV parser | 1813 ms | 112% | 1804 ms | 1841 ms | 207 | | Gen-Java CSV | 1954 ms | 128% | 1950 ms | 1961 ms | 208 | | SuperCSV | 1989 ms | 132% | 1975 ms | 2001 ms | 209 | | Apache Commons CSV | 2152 ms | 151% | 2145 ms | 2161 ms | 210 | | OpenCSV | 2234 ms | 161% | 2227 ms | 2256 ms | 211 | | Oster Miller CSV parser | 2292 ms | 168% | 2285 ms | 2306 ms | 212 | | Way IO Parser | 2915 ms | 240% | 2905 ms | 2932 ms | 213 | | Esperio CSV parser | 2981 ms | 248% | 2944 ms | 3044 ms | 214 | | Bean IO Parser | 3238 ms | 278% | 3219 ms | 3263 ms | 215 | 216 | 217 | ## JDK 7 218 | | Parser | Average time | % Slower than best | Best time | Worst time | 219 | |--------------------------------------------|-------------------:|-------------------:|-----------:|-----------:| 220 | | uniVocity CSV parser | 960 ms | Best time! | 945 ms | 984 ms | 221 | | SimpleFlatMapper CSV parser | 1021 ms | 6% | 1018 ms | 1031 ms | 222 | | Jackson CSV parser | 1102 ms | 14% | 1092 ms | 1112 ms | 223 | | Product Collections parser | 1475 ms | 53% | 1467 ms | 1492 ms | 224 | | Java CSV Parser | 1659 ms | 72% | 1652 ms | 1665 ms | 225 | | JCSV Parser | 1709 ms | 78% | 1689 ms | 1728 ms | 226 | | SuperCSV | 1848 ms | 92% | 1837 ms | 1858 ms | 227 | | Simple CSV parser | 1939 ms | 101% | 1887 ms | 2028 ms | 228 | | OpenCSV | 2022 ms | 110% | 2013 ms | 2032 ms | 229 | | Oster Miller CSV parser | 2422 ms | 152% | 2406 ms | 2436 ms | 230 | | Gen-Java CSV | 2551 ms | 165% | 2546 ms | 2562 ms | 231 | | Way IO Parser | 2621 ms | 173% | 2613 ms | 2628 ms | 232 | | Apache Commons CSV | 2804 ms | 192% | 2798 ms | 2814 ms | 233 | | Bean IO Parser | 3460 ms | 260% | 3420 ms | 3564 ms | 234 | | Esperio CSV parser | 4021 ms | 318% | 3996 ms | 4058 ms | 235 | 236 | 237 | ## JDK 6 238 | | Parser | Average time | % Slower than best | Best time | Worst time | 239 | |--------------------------------------------|-------------------:|-------------------:|-----------:|-----------:| 240 | | SimpleFlatMapper CSV parser | 1187 ms | Best time! | 1183 ms | 1193 ms | 241 | | uniVocity CSV parser | 1216 ms | 2% | 1206 ms | 1226 ms | 242 | | Jackson CSV parser | 1397 ms | 17% | 1391 ms | 1413 ms | 243 | | Product Collections parser | 1488 ms | 25% | 1478 ms | 1496 ms | 244 | | SuperCSV | 1873 ms | 57% | 1864 ms | 1892 ms | 245 | | OpenCSV | 2205 ms | 85% | 2198 ms | 2214 ms | 246 | | JCSV Parser | 2227 ms | 87% | 2210 ms | 2250 ms | 247 | | Oster Miller CSV parser | 2323 ms | 95% | 2316 ms | 2334 ms | 248 | | Java CSV Parser | 2437 ms | 105% | 2428 ms | 2454 ms | 249 | | Simple CSV parser | 2510 ms | 111% | 2472 ms | 2609 ms | 250 | | Gen-Java CSV | 2590 ms | 118% | 2542 ms | 2609 ms | 251 | | Apache Commons CSV | 2739 ms | 130% | 2734 ms | 2748 ms | 252 | | Way IO Parser | 2831 ms | 138% | 2819 ms | 2843 ms | 253 | | Bean IO Parser | 3685 ms | 210% | 3661 ms | 3721 ms | 254 | | Esperio CSV parser | 4002 ms | 237% | 3986 ms | 4039 ms | 255 | 256 | 257 | 258 | * `CSVeed` was unable to process the file and threw exception with the message "Parsing symbol OTHER_SYMBOL [44] in state ESCAPING". 259 | * `Flatpack` blew up the Java heap space so I had to remove it from the test [here](./src/main/java/com/univocity/articles/csvcomparison/parser/Parsers.java). 260 | 261 | 262 | ## Reliability (updated 9th of October, 2017) 263 | 264 | The following parsers were unable to process the [RFC 4180](https://www.rfc-editor.org/rfc/rfc4180.txt) compliant file 265 | [correctness.csv](./src/main/resources/correctness.csv). This test is executed using the class [CorrectnessComparison.java](./src/main/java/com/univocity/articles/csvcomparison/CorrectnessComparison.java) 266 | 267 | | Parser | Error | 268 | |--------------------------------------------|:------| 269 | |CSVeed | CSVeed threw exception "Illegal state transition: Parsing symbol QUOTE_SYMBOL [34] in state INSIDE_FIELD" | 270 | |jCSV Parser | JCSV Parser produced ["Year,Make,Model,Description,Price"] instead of ["Year", "Make", "Model", "Description", "Price"] | 271 | |Simple CSV parser | Simple CSV parser threw exception "The separator, quote, and escape characters must be different!" | 272 | |Way IO Parser | Way IO Parser threw exception "Could not convert to class java.lang.String" | 273 | |Gen-Java CSV | Gen-Java CSV produced 7 rows instead of 6 | 274 | |Flatpack | Flatpack produced 5 rows instead of 6 | 275 | 276 | The exact same errors have been reported 3 years ago when I last updated this page. Just avoid these parsers. 277 | 278 | ## Conclusion 279 | 280 | Currently, three parsers stand out as the fastest CSV parsers for Java: 281 | 282 | *uniVocity-parsers*, *SimpleFlatMapper* and *Jackson CSV*. Keep in mind that *Simpleflatmapper* is a very simple 283 | implementation that does not provide any customization options. Results are affected by a simple change in the JDK version, 284 | however these three parsers are always at the top. 285 | 286 | We will keep working to improve the performance of our parsers, and will try to update the results of this benchmark 287 | every time a new parser is added to the list. 288 | 289 | Head on to the [uniVocity-parsers github page](http://github.com/uniVocity/univocity-parsers/) to get access to its 290 | source code and documentation. Contributions are welcome. 291 | 292 | #### Commercial support is available for your peace of mind. [Click here to learn more.](http://www.univocity.com/products/parsers-support) 293 | 294 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | com.univocity 4 | csv-parsers-comparison 5 | 1.0 6 | csv-parsers-comparison 7 | 8 | A comparison among different CSV parsers for Java 9 | 10 | 11 | uniVocity Software Pty Ltd 12 | www.univocity.com 13 | 14 | 15 | http://github.com/uniVocity/csv-parsers-comparison 16 | 17 | 18 | 19 | Apache 2 20 | http://www.apache.org/licenses/LICENSE-2.0.txt 21 | repo 22 | A business-friendly OSS license 23 | 24 | 25 | 26 | 27 | 28 | GitHub Issues 29 | https://github.com/uniVocity/csv-parsers-comparison/issues 30 | 31 | 32 | 33 | https://github.com/uniVocity/csv-parsers-comparison 34 | scm:git:git://github.com/uniVocity/csv-parsers-comparison.git 35 | scm:git:git@github.com:uniVocity/csv-parsers-comparison.git 36 | 37 | 38 | 39 | 40 | jbax 41 | Jeronimo Backes 42 | jbax@univocity.com 43 | +9:30 44 | 45 | 46 | 47 | dev 48 | uniVocity development team 49 | dev@univocity.com 50 | 51 | 52 | 53 | 54 | UTF-8 55 | UTF-8 56 | 57 | 58 | 59 | 60 | 61 | org.apache.maven.plugins 62 | maven-compiler-plugin 63 | 3.1 64 | 65 | 1.6 66 | 1.6 67 | 68 | 69 | 70 | default-compile 71 | compile 72 | 73 | compile 74 | 75 | 76 | 77 | **/parser8/* 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | org.apache.maven.plugins 86 | maven-source-plugin 87 | 2.2.1 88 | 89 | 90 | attach-sources 91 | 92 | jar 93 | 94 | 95 | 96 | 97 | 98 | 99 | org.apache.maven.plugins 100 | maven-shade-plugin 101 | 3.1.0 102 | 103 | 104 | 105 | *:* 106 | 107 | META-INF/*.SF 108 | META-INF/*.DSA 109 | META-INF/*.RSA 110 | 111 | 112 | 113 | 114 | 115 | com.univocity.articles.csvcomparison.PerformanceComparison 116 | 117 | 118 | META-INF/LICENSE-2.0.html 119 | LICENSE-2.0.html 120 | 121 | 122 | META-INF/README.md 123 | README.md 124 | 125 | 126 | correctness.csv 127 | correctness.csv 128 | 129 | 130 | true 131 | uber 132 | 133 | 134 | 135 | package 136 | 137 | shade 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | com.univocity 149 | univocity-parsers 150 | 2.6.0 151 | 152 | 153 | 154 | org.csveed 155 | csveed 156 | 0.5.0 157 | 158 | 159 | 160 | org.apache.commons 161 | commons-csv 162 | 1.4 163 | 164 | 165 | 166 | net.sf.flatpack 167 | flatpack 168 | 3.4.2 169 | 170 | 171 | 172 | net.sf.supercsv 173 | super-csv 174 | 2.4.0 175 | 176 | 177 | 178 | com.googlecode.jcsv 179 | jcsv 180 | 1.4.0 181 | 182 | 183 | 184 | 185 | net.sourceforge.javacsv 186 | javacsv 187 | 2.0 188 | 189 | 190 | 191 | com.espertech 192 | esperio-csv 193 | 5.5.0 194 | 195 | 196 | 197 | br.com.objectos 198 | way-io 199 | 1.25.0 200 | 201 | 202 | 203 | com.opencsv 204 | opencsv 205 | 4.1 206 | 207 | 208 | 209 | genjava 210 | gj-csv 211 | 1.0 212 | 213 | 214 | 215 | net.quux00.simplecsv 216 | simplecsv 217 | 2.1 218 | 219 | 220 | 221 | org.jdom 222 | jdom 223 | 1.1 224 | 225 | 226 | 227 | org.beanio 228 | beanio 229 | 2.1.0 230 | 231 | 232 | 233 | com.fasterxml.jackson.dataformat 234 | jackson-dataformat-csv 235 | 2.6.7 236 | 237 | 238 | com.fasterxml.jackson.core 239 | jackson-core 240 | 2.6.7 241 | 242 | 243 | 244 | org.simpleflatmapper 245 | sfm-csv 246 | 3.13.2 247 | jdk16 248 | 249 | 250 | 251 | org.ostermiller 252 | utils 253 | 1.07.00 254 | 255 | 256 | 257 | com.github.marklister 258 | product-collections_2.11 259 | 1.4.5 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | jdk1.7 268 | 269 | 1.7 270 | 271 | 272 | 273 | 274 | org.apache.commons 275 | commons-csv 276 | 1.5 277 | 278 | 279 | 280 | br.com.objectos 281 | way-io 282 | 2.1.0 283 | 284 | 285 | 286 | com.fasterxml.jackson.dataformat 287 | jackson-dataformat-csv 288 | 2.7.9 289 | 290 | 291 | com.fasterxml.jackson.core 292 | jackson-core 293 | 2.7.9 294 | 295 | 296 | 297 | 298 | 299 | 300 | jdk1.8 301 | 302 | 1.8 303 | 304 | 305 | 306 | 307 | org.apache.commons 308 | commons-csv 309 | 1.5 310 | 311 | 312 | 313 | br.com.objectos 314 | way-io 315 | 2.1.0 316 | 317 | 318 | 319 | com.fasterxml.jackson.dataformat 320 | jackson-dataformat-csv 321 | 2.9.4 322 | 323 | 324 | com.fasterxml.jackson.core 325 | jackson-core 326 | 2.9.4 327 | 328 | 329 | 330 | net.sf.flatpack 331 | flatpack 332 | 4.0.1 333 | 334 | 335 | 336 | org.simpleflatmapper 337 | sfm-csv 338 | 3.15.9 339 | 340 | 341 | 342 | com.espertech 343 | esperio-csv 344 | 7.0.0 345 | 346 | 347 | 348 | diergo 349 | decs 350 | 3.1.0-RELEASE 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | org.apache.maven.plugins 359 | maven-patch-plugin 360 | 1.2 361 | 362 | 363 | 364 | 365 | 366 | esperio-csv-6.x.patch 367 | flatpack-4.x.patch 368 | 369 | 370 | 371 | 372 | patch 373 | 374 | apply 375 | 376 | 377 | 378 | 379 | 380 | 381 | org.apache.maven.plugins 382 | maven-compiler-plugin 383 | 3.1 384 | 385 | 386 | parsers8-compile 387 | compile 388 | 389 | compile 390 | 391 | 392 | 1.8 393 | 1.8 394 | 395 | **/parser8/* 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | false 411 | 412 | central 413 | bintray 414 | https://jcenter.bintray.com 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/CorrectnessComparison.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison; 17 | 18 | import java.io.*; 19 | import java.net.URISyntaxException; 20 | import java.net.URL; 21 | import java.util.*; 22 | 23 | import com.univocity.articles.csvcomparison.parser.*; 24 | 25 | public class CorrectnessComparison { 26 | 27 | private static final String CORRECTNESS_FILE = "correctness.csv"; 28 | private static final String CORRECTNESS_FILE_ENCODING = "ascii"; 29 | 30 | private static String[][] expectedResult = new String[][] { 31 | { "Year", "Make", "Model", "Description", "Price" }, 32 | { "1997", "Ford", "E350", "ac, abs, moon", "3000.00" }, 33 | { "1999", "Chevy", "Venture \"Extended Edition\"", null, "4900.00" }, 34 | { "1996", "Jeep", "Grand Cherokee", "MUST SELL!\nair, moon roof, loaded", "4799.00" }, 35 | { "1999", "Chevy", "Venture \"Extended Edition, Very Large\"", null, "5000.00" }, 36 | { null, null, "Venture \"Extended Edition\"", null, "4900.00" } 37 | }; 38 | 39 | private static void assertHeadersAndValuesMatch(File file, String fileEncoding, AbstractParser parser) throws Exception { 40 | 41 | Reader reader = null; 42 | try { 43 | reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), fileEncoding)); 44 | 45 | List parsedRows = parser.parseRows(reader); 46 | 47 | if (parsedRows.size() != expectedResult.length) { 48 | System.err.println("Parser " + parser.getName() + " produced " + parsedRows.size() + " rows instead of " + expectedResult.length); 49 | return; 50 | } 51 | for (int i = 0; i < expectedResult.length; i++) { 52 | String[] row = parsedRows.get(i); 53 | String[] expectedRow = expectedResult[i]; 54 | 55 | if (row.length != expectedRow.length) { 56 | System.err.println("Parser " + parser.getName() + " produced " + Arrays.toString(row) + " rows instead of " + Arrays.toString(expectedRow)); 57 | return; 58 | } 59 | 60 | for (int j = 0; j < expectedRow.length; j++) { 61 | String value = row[j] == null ? "" : String.valueOf(row[j]).trim(); 62 | String expected = expectedRow[j] == null ? "" : String.valueOf(expectedRow[j]).trim(); 63 | 64 | if (!value.equals(expected)) { 65 | System.err.println("Parser " + parser.getName() + " produced " + value + " instead of " + expected + ", at row " + j); 66 | } 67 | } 68 | } 69 | } finally { 70 | if(reader != null) { 71 | reader.close(); 72 | } 73 | } 74 | } 75 | 76 | public static void main(final String... args) throws URISyntaxException { 77 | 78 | final File input; 79 | final URL inputUrl = CorrectnessComparison.class.getClassLoader().getResource(CORRECTNESS_FILE); 80 | if(inputUrl != null) { 81 | input = new File(inputUrl.toURI()); 82 | } else { 83 | if(args.length > 0) { 84 | input = new File(args[0], CORRECTNESS_FILE); 85 | if(!input.exists()) { 86 | throw new IllegalStateException("Could not find '" + CORRECTNESS_FILE + "' in classpath or in folder: " + args[0]); 87 | } 88 | } else { 89 | throw new IllegalStateException("Could not find '" + CORRECTNESS_FILE + "' in classpath"); 90 | } 91 | } 92 | 93 | for (final AbstractParser parser : ParsersRegistry.getParsers()) { 94 | try { 95 | System.out.println("try Parser " + parser.getName()); 96 | assertHeadersAndValuesMatch(input, CORRECTNESS_FILE_ENCODING, parser); 97 | } catch (Throwable ex) { 98 | System.err.println("Parser " + parser.getName() + " threw exception: " + ex.getMessage()); 99 | } 100 | } 101 | 102 | } 103 | 104 | } 105 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/HugeFileGenerator.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison; 17 | 18 | import java.io.*; 19 | 20 | import com.univocity.parsers.csv.*; 21 | 22 | public class HugeFileGenerator { 23 | 24 | public static void generateHugeFile(File input, String inputEncoding, int timesToExpand, File hugeFile) throws Exception { 25 | 26 | if (hugeFile.exists()) { 27 | System.out.println("Huge file already generated."); 28 | return; 29 | } 30 | 31 | CsvParserSettings readerSettings = new CsvParserSettings(); 32 | readerSettings.getFormat().setLineSeparator("\n"); 33 | CsvParser parser = new CsvParser(readerSettings); 34 | 35 | CsvWriterSettings settings = new CsvWriterSettings(); 36 | settings.setQuoteAllFields(true); //let's see how all parsers perform when the contents are enclosed within quotes. 37 | settings.getFormat().setLineSeparator("\n"); 38 | 39 | CsvWriter writer = new CsvWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(hugeFile), inputEncoding)), settings); 40 | long totalTime = 0L; 41 | try { 42 | Object[] row; 43 | for (int i = 0; i < timesToExpand; i++) { 44 | long start = System.currentTimeMillis(); 45 | 46 | parser.beginParsing(new BufferedReader(new InputStreamReader(new FileInputStream(input), inputEncoding))); 47 | while ((row = parser.parseNext()) != null) { 48 | writer.writeRow(row); 49 | } 50 | long loopTime = System.currentTimeMillis() - start; 51 | totalTime += loopTime; 52 | System.out.println("Loop " + (i + 1) + " took " + loopTime + "ms. Total time: " + totalTime + " ms"); 53 | } 54 | } finally { 55 | writer.close(); 56 | } 57 | System.out.println("Finished!"); 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/PerformanceComparison.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison; 17 | 18 | import java.io.*; 19 | import java.net.URL; 20 | import java.util.*; 21 | import java.util.Map.Entry; 22 | 23 | import com.univocity.articles.csvcomparison.parser.*; 24 | 25 | public class PerformanceComparison { 26 | 27 | private static final String WORLDCITIES_FILE = "worldcitiespop.txt"; 28 | private static final String WORLDCITIES_FILE_ENCODING = "ISO-8859-1"; 29 | private static final String WORLDCITIES_HUGE_FILE = "worldcitiespop_huge.txt"; 30 | private static final String WORLDCITIES_HUGE_FILE_ENCODING = "ISO-8859-1"; 31 | 32 | private final File file; 33 | private final String fileEncoding; 34 | 35 | PerformanceComparison(File file, String fileEncoding) { 36 | this.file = file; 37 | this.fileEncoding = fileEncoding; 38 | } 39 | 40 | private long run(AbstractParser parser) throws Exception { 41 | Reader reader = null; 42 | try { 43 | reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), fileEncoding)); 44 | 45 | long start = System.currentTimeMillis(); 46 | 47 | parser.processRows(reader); 48 | 49 | long time = (System.currentTimeMillis() - start); 50 | System.out.println("took " + time + " ms to read " + parser.getRowCount() + " rows. "); 51 | parser.resetRowCount(); 52 | System.setProperty("blackhole", parser.getBlackhole()); 53 | return time; 54 | } finally { 55 | if (reader != null) { 56 | reader.close(); 57 | } 58 | } 59 | } 60 | 61 | private TreeMap orderByAverageTime(int loops, Map stats) { 62 | TreeMap averages = new TreeMap(); 63 | 64 | for (Entry parserTimes : stats.entrySet()) { 65 | Long[] times = parserTimes.getValue(); 66 | long average = 0L; 67 | //we are discarding the first recorded time here to take into account JIT optimizations 68 | for (int i = 1; i < times.length; i++) { 69 | average = average + times[i]; 70 | } 71 | average = average / (loops - 1); 72 | averages.put(average, parserTimes.getKey()); 73 | } 74 | 75 | return averages; 76 | } 77 | 78 | private long getBestTime(Long[] times) { 79 | long best = times[1]; 80 | for (int i = 1; i < times.length; i++) { 81 | if (times[i] < best) { 82 | best = times[i]; 83 | } 84 | } 85 | return best; 86 | } 87 | 88 | private long getWorstTime(Long[] times) { 89 | long worst = times[1]; 90 | for (int i = 1; i < times.length; i++) { 91 | if (times[i] > worst) { 92 | worst = times[i]; 93 | } 94 | } 95 | return worst; 96 | } 97 | 98 | private void printResults(int loops, Map stats) { 99 | System.out.println("\n=========\n AVERAGES \n=========\n"); 100 | 101 | Map averages = orderByAverageTime(loops, stats); 102 | long bestTime = 0; 103 | for (Entry average : averages.entrySet()) { 104 | long time = average.getKey(); 105 | String parser = average.getValue(); 106 | System.out.print("| " + parser + " \t | " + time + " ms "); 107 | 108 | if (time == -1) { 109 | System.out.println("Could not execute"); 110 | continue; 111 | } 112 | 113 | if (bestTime != 0) { 114 | long increasePercentage = time * 100 / bestTime - 100; 115 | System.out.print(" \t | " + increasePercentage + "% "); 116 | } else { 117 | bestTime = time; 118 | System.out.print(" \t | Best time! "); 119 | } 120 | 121 | long best = getBestTime(stats.get(parser)); 122 | long worst = getWorstTime(stats.get(parser)); 123 | 124 | System.out.println(" \t | " + best + " ms \t | " + worst + " ms |"); 125 | 126 | } 127 | } 128 | 129 | public void execute(final int loops) throws Exception { 130 | Map stats = new HashMap(); 131 | 132 | for (final AbstractParser parser : ParsersRegistry.getParsers()) { 133 | Long[] times = new Long[loops]; 134 | Arrays.fill(times, -1L); 135 | stats.put(parser.getName(), times); 136 | } 137 | 138 | for (int i = 0; i < loops; i++) { 139 | for (final AbstractParser parser : ParsersRegistry.getParsers()) { 140 | try { 141 | System.out.print("Loop " + (i + 1) + " - executing " + parser.getName() + "... "); 142 | long time = run(parser); 143 | 144 | stats.get(parser.getName())[i] = time; 145 | } catch (Throwable ex) { 146 | System.out.println("Parser " + parser.getName() + " threw exception: " + ex.getMessage()); 147 | } 148 | System.gc(); 149 | Thread.sleep(500); 150 | } 151 | } 152 | 153 | printResults(loops, stats); 154 | } 155 | 156 | public static void main(String... args) throws Exception { 157 | 158 | int loops = 6; 159 | 160 | File input = null; 161 | final URL inputUrl = PerformanceComparison.class.getClassLoader().getResource(WORLDCITIES_FILE); 162 | 163 | if (inputUrl != null) { 164 | try { 165 | input = new File(inputUrl.toURI()); 166 | } catch (Exception ex) { 167 | System.err.println("Error reading file from " + inputUrl + ": " + ex.getMessage()); 168 | } 169 | } 170 | 171 | if (input == null) { 172 | if (args.length > 0) { 173 | input = new File(args[0], WORLDCITIES_FILE); 174 | if (!input.exists()) { 175 | throw new IllegalStateException("Could not find '" + WORLDCITIES_FILE + "' in classpath or in folder: " + args[0]); 176 | } 177 | } else { 178 | throw new IllegalStateException("Could not find '" + WORLDCITIES_FILE + "' in classpath, or path not specified as arg[0]"); 179 | } 180 | } 181 | 182 | 183 | new PerformanceComparison(input, WORLDCITIES_FILE_ENCODING).execute(loops); 184 | 185 | File hugeInput = null; 186 | final URL hugeInputUrl = PerformanceComparison.class.getClassLoader().getResource(WORLDCITIES_HUGE_FILE); 187 | if (hugeInputUrl != null) { 188 | try { 189 | hugeInput = new File(hugeInputUrl.toURI()); 190 | } catch (Exception ex) { 191 | System.err.println("Error reading file from " + inputUrl + ": " + ex.getMessage()); 192 | } 193 | } 194 | 195 | if (hugeInput == null) { 196 | if (args.length > 0) { 197 | hugeInput = new File(args[0], WORLDCITIES_HUGE_FILE); 198 | } else { 199 | throw new IllegalStateException("Could not find '" + WORLDCITIES_HUGE_FILE + "' in classpath, or path not specified as arg[0]"); 200 | } 201 | } 202 | 203 | 204 | //executes only if the file has not been generated yet. 205 | 206 | 207 | //Previously, we created a huge file with the original input, replicated 15 times. All fields enclosed within quotes. 208 | //It would generate a file with 47,609,385 rows 209 | 210 | //Now, creates a copy of the original input. All fields enclosed within quotes. 211 | //Overall performance is the similar in percentage terms, regardless of size. No point in melting our CPU's to get the same result. 212 | 213 | HugeFileGenerator.generateHugeFile(input, WORLDCITIES_FILE_ENCODING, 1, hugeInput); 214 | 215 | System.out.println("=================================="); 216 | System.out.println("=== Processing huge input file ==="); 217 | System.out.println("=================================="); 218 | 219 | 220 | new PerformanceComparison(hugeInput, WORLDCITIES_HUGE_FILE_ENCODING).execute(loops); 221 | } 222 | 223 | } 224 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/AbstractParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.nio.charset.*; 20 | import java.util.*; 21 | 22 | public abstract class AbstractParser { 23 | 24 | private final String name; 25 | private int rowCount; 26 | private int blackhole; //something to keep values from processed objects to avoid unwanted JIT's dead code removal 27 | 28 | protected AbstractParser(String name) { 29 | this.name = name; 30 | } 31 | 32 | public final String getName() { 33 | return name; 34 | } 35 | 36 | 37 | protected boolean process(Object row) { 38 | if(row == null){ 39 | return false; 40 | } 41 | blackhole += System.identityHashCode(row); 42 | rowCount++; 43 | return true; 44 | } 45 | 46 | public void resetRowCount(){ 47 | rowCount = 0; 48 | } 49 | 50 | public int getRowCount() { 51 | return rowCount; 52 | } 53 | 54 | public String getBlackhole(){ 55 | return String.valueOf(blackhole); 56 | } 57 | 58 | public abstract void processRows(Reader reader) throws Exception; 59 | 60 | public abstract List parseRows(Reader reader) throws Exception; 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/BeanIoParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import org.beanio.stream.csv.*; 22 | 23 | public class BeanIoParser extends AbstractParser { 24 | 25 | protected BeanIoParser() { 26 | super("Bean IO Parser"); 27 | } 28 | 29 | @Override 30 | public void processRows(final Reader input) throws Exception { 31 | CsvReader reader = new CsvReader(input); 32 | 33 | while (process(reader.read())); 34 | } 35 | 36 | @Override 37 | public List parseRows(final Reader input) throws Exception { 38 | List rows = new ArrayList(); 39 | 40 | CsvParserConfiguration cfg = new CsvParserConfiguration(); 41 | cfg.setMultilineEnabled(true); 42 | cfg.setEscape('"'); 43 | cfg.setQuote('"'); 44 | cfg.setDelimiter(','); 45 | 46 | CsvReader reader = new CsvReader(input, cfg); 47 | 48 | String[] row; 49 | while ((row = reader.read()) != null) { 50 | rows.add(row); 51 | } 52 | 53 | return rows; 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/CSVeedParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import org.csveed.api.*; 22 | 23 | class CSVeedParser extends AbstractParser { 24 | 25 | protected CSVeedParser() { 26 | super("CSVeed"); 27 | } 28 | 29 | @Override 30 | public void processRows(final Reader input) throws Exception { 31 | CsvClient parser = new CsvClientImpl(input); 32 | while (process(parser.readRow())); 33 | } 34 | 35 | @Override 36 | public List parseRows(final Reader input) throws Exception { 37 | List rows = new ArrayList(); 38 | 39 | Row row; 40 | CsvClient parser = new CsvClientImpl(input); 41 | while ((row = parser.readRow()) != null) { 42 | String[] data = new String[row.size()]; 43 | for (int i = 0; i < data.length; i++) { 44 | data[i] = row.get(i); 45 | } 46 | rows.add(data); 47 | } 48 | 49 | return rows; 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/CommonsCsvParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import org.apache.commons.csv.*; 22 | 23 | class CommonsCsvParser extends AbstractParser { 24 | 25 | protected CommonsCsvParser() { 26 | super("Apache Commons CSV"); 27 | } 28 | 29 | @Override 30 | public void processRows(final Reader input) throws Exception { 31 | CSVFormat format = CSVFormat.RFC4180; 32 | CSVParser parser = new CSVParser(input, format); 33 | for (CSVRecord record : parser) { 34 | process(record); 35 | } 36 | } 37 | 38 | @Override 39 | public List parseRows(final Reader input) throws Exception { 40 | CSVFormat format = CSVFormat.RFC4180; 41 | CSVParser parser = new CSVParser(input, format); 42 | 43 | List rows = new ArrayList(); 44 | 45 | for (CSVRecord record : parser) { 46 | String[] row = new String[record.size()]; 47 | for (int i = 0; i < row.length; i++) { 48 | row[i] = record.get(i); 49 | } 50 | rows.add(row); 51 | } 52 | return rows; 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/DataPipelineCsvParser.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Note: this test depends on the DataPipeline.jar that must be obtained at: http://northconcepts.com/data-pipeline/ 3 | * 4 | * Once obtain this library, add it to your classpath locally. 5 | */ 6 | /* 7 | package com.univocity.articles.csvcomparison.parser; 8 | 9 | import java.io.*; 10 | import java.util.*; 11 | 12 | import com.northconcepts.datapipeline.core.*; 13 | import com.northconcepts.datapipeline.csv.*; 14 | 15 | public class DataPipelineCsvParser extends AbstractParser { 16 | 17 | protected DataPipelineCsvParser() { 18 | super("Data pipeline"); 19 | } 20 | 21 | @Override 22 | public void processRows(File input) throws Exception { 23 | 24 | DataReader reader = new CSVReader(input).setFieldNamesInFirstRow(true); 25 | reader.open(); 26 | while (process(reader.read())); 27 | reader.close(); 28 | 29 | } 30 | 31 | @Override 32 | public List parseRows(File input) throws Exception { 33 | List rows = new ArrayList(); 34 | 35 | DataReader reader = new CSVReader(input) 36 | .setAllowMultiLineText(true) 37 | .setFieldNamesInFirstRow(false); 38 | 39 | reader.open(); 40 | Record record; 41 | while ((record = reader.read()) != null) { 42 | rows.add(record.getValues().toArray(new String[record.getFieldCount()])); 43 | } 44 | reader.close(); 45 | 46 | return rows; 47 | } 48 | 49 | } 50 | */ -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/EsperioCsvParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import com.espertech.esperio.*; 22 | import com.espertech.esperio.csv.*; 23 | 24 | class EsperioCsvParser extends AbstractParser { 25 | 26 | protected EsperioCsvParser() { 27 | super("Esperio CSV parser"); 28 | } 29 | 30 | @Override 31 | public void processRows(final Reader input) throws Exception { 32 | 33 | AdapterInputSource adapterInputSource = new AdapterInputSource(input); 34 | CSVReader reader = new CSVReader(adapterInputSource); 35 | try { 36 | while (process(reader.getNextRecord())); 37 | } catch (EOFException ex) { 38 | //end of file, return... lovely implementation 39 | } 40 | } 41 | 42 | @Override 43 | public List parseRows(final Reader input) throws Exception { 44 | List rows = new ArrayList(); 45 | AdapterInputSource adapterInputSource = new AdapterInputSource(input); 46 | CSVReader reader = new CSVReader(adapterInputSource); 47 | String[] row; 48 | try { 49 | while ((row = reader.getNextRecord()) != null) { 50 | rows.add(row); 51 | } 52 | } catch (EOFException ex) { 53 | //end of file, return... lovely implementation 54 | } 55 | return rows; 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/FlatpackParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import net.sf.flatpack.*; 22 | 23 | class FlatpackParser extends AbstractParser { 24 | 25 | protected FlatpackParser() { 26 | super("Flatpack"); 27 | } 28 | 29 | @Override 30 | public void processRows(final Reader input) throws Exception { 31 | 32 | Parser parser = DefaultParserFactory.getInstance().newDelimitedParser(input, ',', '\n'); 33 | DataSet dataset = parser.parse(); 34 | while (process(dataset.next())); 35 | } 36 | 37 | @Override 38 | public List parseRows(final Reader input) throws Exception { 39 | List rows = new ArrayList(); 40 | Parser parser = DefaultParserFactory.getInstance().newDelimitedParser(input, ',', '"'); 41 | 42 | DataSet dataset = parser.parse(); 43 | 44 | while (dataset.next()) { 45 | Record record = dataset.getRecord(); 46 | String[] row = new String[record.getColumns().length]; 47 | int i = 0; 48 | for (String column : record.getColumns()) { 49 | row[i++] = record.getString(column); 50 | } 51 | rows.add(row); 52 | } 53 | return rows; 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/GenJavaParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import com.generationjava.io.*; 22 | 23 | class GenJavaParser extends AbstractParser { 24 | 25 | protected GenJavaParser() { 26 | super("Gen-Java CSV"); 27 | } 28 | 29 | @Override 30 | public void processRows(final Reader input) throws Exception { 31 | CsvReader reader = new CsvReader(input); 32 | while (process(reader.readLine())); 33 | } 34 | 35 | @Override 36 | public List parseRows(final Reader input) throws Exception { 37 | List rows = new ArrayList(); 38 | 39 | CsvReader reader = new CsvReader(input); 40 | String[] row; 41 | while ((row = reader.readLine()) != null) { 42 | rows.add(row); 43 | } 44 | return rows; 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/JCsvParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import com.googlecode.jcsv.reader.*; 22 | import com.googlecode.jcsv.reader.internal.*; 23 | 24 | class JCsvParser extends AbstractParser { 25 | 26 | protected JCsvParser() { 27 | super("JCSV Parser"); 28 | } 29 | 30 | @Override 31 | public void processRows(final Reader input) throws Exception { 32 | 33 | CSVReader reader = CSVReaderBuilder.newDefaultReader(input); 34 | 35 | while (process(reader.readNext())); 36 | } 37 | 38 | @Override 39 | public List parseRows(final Reader input) throws Exception { 40 | List rows = new ArrayList(); 41 | CSVReader reader = CSVReaderBuilder.newDefaultReader(input); 42 | String[] row; 43 | 44 | while ((row = reader.readNext()) != null) { 45 | rows.add(row); 46 | } 47 | return rows; 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/JacksonParser.java: -------------------------------------------------------------------------------- 1 | package com.univocity.articles.csvcomparison.parser; 2 | 3 | import com.fasterxml.jackson.databind.MappingIterator; 4 | import com.fasterxml.jackson.dataformat.csv.CsvMapper; 5 | import com.fasterxml.jackson.dataformat.csv.CsvParser; 6 | 7 | import java.io.Reader; 8 | import java.util.*; 9 | 10 | public class JacksonParser extends AbstractParser { 11 | 12 | protected JacksonParser() { 13 | 14 | super("Jackson CSV parser"); 15 | } 16 | 17 | @Override 18 | public void processRows(final Reader input) throws Exception { 19 | 20 | CsvMapper csvMapper = new CsvMapper(); 21 | csvMapper.enable(CsvParser.Feature.WRAP_AS_ARRAY); 22 | 23 | MappingIterator iterator = csvMapper.readerFor(String[].class).readValues(input); 24 | 25 | while (iterator.hasNext()) { 26 | process(iterator.next()); 27 | } 28 | 29 | } 30 | 31 | @Override 32 | public List parseRows(final Reader input) throws Exception { 33 | 34 | CsvMapper csvMapper = new CsvMapper(); 35 | csvMapper.enable(CsvParser.Feature.WRAP_AS_ARRAY); 36 | 37 | MappingIterator iterator = csvMapper.reader(String[].class).readValues(input); 38 | 39 | List values = new ArrayList(); 40 | while (iterator.hasNext()) { 41 | values.add(iterator.next()); 42 | } 43 | 44 | return values; 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/JavaCsvParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import com.csvreader.*; 22 | 23 | class JavaCsvParser extends AbstractParser { 24 | 25 | protected JavaCsvParser() { 26 | super("Java CSV Parser"); 27 | } 28 | 29 | @Override 30 | public void processRows(final Reader input) throws Exception { 31 | 32 | CsvReader reader = new CsvReader(input); 33 | while (reader.readRecord()){ 34 | process(reader.getValues()); 35 | } 36 | } 37 | 38 | @Override 39 | public List parseRows(final Reader input) throws Exception { 40 | List rows = new ArrayList(); 41 | 42 | CsvReader reader = new CsvReader(input); 43 | while (reader.readRecord()) { 44 | rows.add(reader.getValues()); 45 | } 46 | 47 | return rows; 48 | 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/OpenCsvParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import au.com.bytecode.opencsv.*; 22 | 23 | class OpenCsvParser extends AbstractParser { 24 | 25 | protected OpenCsvParser() { 26 | super("OpenCSV"); 27 | } 28 | 29 | @Override 30 | public void processRows(final Reader input) throws Exception { 31 | CSVReader reader = new CSVReader(input); 32 | try { 33 | while (process(reader.readNext())); 34 | } finally { 35 | reader.close(); 36 | } 37 | } 38 | 39 | @Override 40 | public List parseRows(final Reader input) throws Exception { 41 | CSVReader reader = new CSVReader(input); 42 | try { 43 | return reader.readAll(); 44 | } finally { 45 | reader.close(); 46 | } 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/OsterMillerParser.java: -------------------------------------------------------------------------------- 1 | package com.univocity.articles.csvcomparison.parser; 2 | 3 | import static java.util.Arrays.*; 4 | 5 | import java.io.*; 6 | import java.util.*; 7 | 8 | import com.Ostermiller.util.*; 9 | 10 | public class OsterMillerParser extends AbstractParser { 11 | 12 | protected OsterMillerParser() { 13 | 14 | super("Oster Miller CSV parser"); 15 | } 16 | 17 | @Override 18 | public void processRows(final Reader input) throws Exception { 19 | CSVParse csvParser = new ExcelCSVParser(input); 20 | while (process(csvParser.getLine())); 21 | } 22 | 23 | @Override 24 | public List parseRows(final Reader input) throws Exception { 25 | final CSVParse csvParser = new ExcelCSVParser(input); 26 | return asList(csvParser.getAllValues()); 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/Parsers.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.util.*; 19 | 20 | public class Parsers { 21 | 22 | private static final List parsers = Arrays.asList( 23 | new CSVeedParser(), 24 | new BeanIoParser(), 25 | new CommonsCsvParser(), 26 | //new DataPipelineCsvParser(), // commercial, depends on license. I can't include on github. 27 | new EsperioCsvParser(), 28 | //new FlatpackParser(),// appears to hang while processing worldcitiespop.txt & consumes all heap space when parsing a huge file 29 | new GenJavaParser(), 30 | new JavaCsvParser(), 31 | new JCsvParser(), 32 | new OpenCsvParser(), 33 | new SimpleCsvParser(), 34 | new SuperCsvParser(), 35 | new UnivocityParser(), 36 | new WayIoParser(), 37 | new OsterMillerParser(), 38 | new JacksonParser(), 39 | new SimpleFlatMapperParser(), 40 | new ProductCollectionsParser() 41 | ); 42 | 43 | private Parsers() { 44 | } 45 | 46 | public static List list() { 47 | return Collections.unmodifiableList(parsers); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/ParsersRegistry.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.util.ArrayList; 19 | import java.util.Collections; 20 | import java.util.List; 21 | 22 | public class ParsersRegistry { 23 | 24 | private static List parsers = null; 25 | 26 | public static List getParsers() { 27 | if(parsers == null) { 28 | parsers = getAllParsers(); 29 | } 30 | return parsers; 31 | } 32 | 33 | private static List getAllParsers() { 34 | // Get Parsers for current VM version 35 | final List parsers = new ArrayList(Parsers.list()); 36 | 37 | // Also include Java 8 only parsers? 38 | final String javaVersion = System.getProperty("java.version"); 39 | System.out.println("Detected Java version: " + javaVersion); 40 | 41 | if(javaVersion != null && javaVersion.startsWith("1.8.")) { 42 | System.out.println("Also enabling Java 8 only parsers!"); 43 | parsers.addAll(getJava8OnlyParsers()); 44 | } 45 | 46 | return Collections.unmodifiableList(parsers); 47 | } 48 | 49 | private static List getJava8OnlyParsers() { 50 | try { 51 | final Class java8Parsers = Class.forName("com.univocity.articles.csvcomparison.parser8.Parsers"); 52 | return (List) java8Parsers.getMethod("list").invoke(null); 53 | } catch (final LinkageError e) { 54 | throw new RuntimeException("Cannot get Java 8 Only parsers", e); 55 | } catch (final Exception e) { 56 | throw new RuntimeException("Cannot get Java 8 Only parsers", e); 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/ProductCollectionsParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import com.github.marklister.collections.io.*; 22 | 23 | class ProductCollectionsParser extends AbstractParser { 24 | 25 | protected ProductCollectionsParser() { 26 | super("Product Collections parser"); 27 | } 28 | 29 | @Override 30 | public void processRows(final Reader input) throws Exception { 31 | final CSVReader reader = new CSVReader(input,',','"',1); 32 | try { 33 | while(reader.hasNext()) process(reader.next()); 34 | } finally { 35 | reader.reader().close(); 36 | } 37 | } 38 | 39 | @Override 40 | public List parseRows(final Reader input) throws Exception { 41 | final CSVReader reader = new CSVReader(input,',','"',0); 42 | try { 43 | final List values = new ArrayList(); 44 | while (reader.hasNext()) { 45 | values.add(reader.next()); 46 | } 47 | return values; 48 | } finally { 49 | reader.reader().close(); 50 | } 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/SimpleCsvParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import net.quux00.simplecsv.*; 22 | 23 | class SimpleCsvParser extends AbstractParser { 24 | 25 | protected SimpleCsvParser() { 26 | super("Simple CSV parser"); 27 | } 28 | 29 | @Override 30 | public void processRows(final Reader input) throws Exception { 31 | CsvReader reader = new CsvReader(input); 32 | try { 33 | while (process(reader.readNext())); 34 | } finally { 35 | reader.close(); 36 | } 37 | } 38 | 39 | @Override 40 | public List parseRows(final Reader input) throws Exception { 41 | List rows = new ArrayList(); 42 | 43 | CsvParser parser = new CsvParserBuilder().escapeChar('"').multiLine(true).separator(',').build(); 44 | CsvReader reader = new CsvReader(input, parser); 45 | try { 46 | List row; 47 | while ((row = reader.readNext()) != null) { 48 | rows.add(row.toArray(new String[0])); 49 | } 50 | } finally { 51 | reader.close(); 52 | } 53 | return rows; 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/SimpleFlatMapperParser.java: -------------------------------------------------------------------------------- 1 | package com.univocity.articles.csvcomparison.parser; 2 | 3 | 4 | import java.io.Reader; 5 | import java.util.*; 6 | 7 | import org.simpleflatmapper.csv.CsvParser; 8 | 9 | public class SimpleFlatMapperParser extends AbstractParser { 10 | protected SimpleFlatMapperParser() { 11 | 12 | super("SimpleFlatMapper CSV parser"); 13 | } 14 | 15 | @Override 16 | public void processRows(final Reader input) throws Exception { 17 | final Iterator it = CsvParser.iterator(input); 18 | while(it.hasNext()) { 19 | process(it.next()); 20 | } 21 | } 22 | 23 | @Override 24 | public List parseRows(final Reader input) throws Exception { 25 | final List list = new ArrayList(); 26 | final Iterator it = CsvParser.iterator(input); 27 | while(it.hasNext()) { 28 | list.add(it.next()); 29 | } 30 | return list; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/SuperCsvParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import org.supercsv.io.*; 22 | import org.supercsv.prefs.*; 23 | 24 | class SuperCsvParser extends AbstractParser { 25 | 26 | protected SuperCsvParser() { 27 | super("SuperCSV"); 28 | } 29 | 30 | @Override 31 | public void processRows(final Reader input) throws Exception { 32 | ICsvListReader listReader = new CsvListReader(input, CsvPreference.STANDARD_PREFERENCE); 33 | try { 34 | listReader.getHeader(true); 35 | while (process(listReader.read())); 36 | 37 | } finally { 38 | if (listReader != null) { 39 | listReader.close(); 40 | } 41 | } 42 | } 43 | 44 | @Override 45 | public List parseRows(final Reader input) throws Exception { 46 | List rows = new ArrayList(); 47 | 48 | ICsvListReader listReader = new CsvListReader(input, CsvPreference.STANDARD_PREFERENCE); 49 | try { 50 | List row = null; 51 | while ((row = listReader.read()) != null) { 52 | rows.add(row.toArray(new String[0])); 53 | } 54 | 55 | } finally { 56 | if (listReader != null) { 57 | listReader.close(); 58 | } 59 | } 60 | 61 | return rows; 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/UnivocityParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import com.univocity.parsers.common.*; 22 | import com.univocity.parsers.common.processor.*; 23 | import com.univocity.parsers.csv.*; 24 | 25 | class UnivocityParser extends AbstractParser { 26 | 27 | protected UnivocityParser() { 28 | super("uniVocity CSV parser"); 29 | } 30 | 31 | @Override 32 | public void processRows(final Reader input) { 33 | CsvParserSettings settings = new CsvParserSettings(); 34 | settings.getFormat().setLineSeparator("\n"); 35 | 36 | //turning off features enabled by default 37 | settings.getFormat().setLineSeparator("\n"); 38 | settings.setIgnoreLeadingWhitespaces(false); 39 | settings.setIgnoreTrailingWhitespaces(false); 40 | settings.setSkipEmptyLines(false); 41 | settings.setColumnReorderingEnabled(false); 42 | 43 | settings.setProcessor(new AbstractRowProcessor() { 44 | @Override 45 | public void rowProcessed(String[] row, ParsingContext context) { 46 | process(row); 47 | } 48 | }); 49 | 50 | CsvParser parser = new CsvParser(settings); 51 | parser.parse(input); 52 | } 53 | 54 | @Override 55 | public List parseRows(final Reader input) { 56 | 57 | CsvParserSettings settings = new CsvParserSettings(); 58 | settings.getFormat().setLineSeparator("\n"); 59 | CsvParser parser = new CsvParser(settings); 60 | 61 | return parser.parseAll(input); 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser/WayIoParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import br.com.objectos.comuns.io.*; 22 | import br.com.objectos.comuns.io.csv.*; 23 | 24 | class WayIoParser extends AbstractParser { 25 | 26 | protected WayIoParser() { 27 | super("Way IO Parser"); 28 | } 29 | 30 | @Override 31 | public void processRows(final Reader input) throws Exception { 32 | CsvFile reader = CsvFile.parseReader(input); 33 | 34 | ParsedLines lines = reader.getLines(); 35 | for (Line line : lines) { 36 | process(line); 37 | } 38 | 39 | } 40 | 41 | @Override 42 | public List parseRows(final Reader input) throws Exception { 43 | List rows = new ArrayList(); 44 | CsvFile reader = CsvFile.parseReader(input); 45 | 46 | ParsedLines lines = reader.getLines(); 47 | 48 | //the API does not help us to provide the number of rows in each column 49 | final int COLS = 5; 50 | 51 | for (Line line : lines) { 52 | String[] row = new String[COLS]; 53 | for (int i = 0; i < COLS; i++) { 54 | String value = line.column(i).get(String.class); 55 | row[i] = value; 56 | } 57 | rows.add(row); 58 | } 59 | return rows; 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser8/DecsParser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser8; 17 | 18 | import com.univocity.articles.csvcomparison.parser.AbstractParser; 19 | import diergo.csv.Row; 20 | 21 | import java.io.Reader; 22 | import java.util.Collection; 23 | import java.util.List; 24 | 25 | import static diergo.csv.CsvParserBuilder.csvParser; 26 | import static diergo.csv.Readers.asLines; 27 | import static java.util.Spliterator.SIZED; 28 | import static java.util.Spliterators.spliterator; 29 | import static java.util.stream.Collectors.toList; 30 | import static java.util.stream.StreamSupport.stream; 31 | 32 | class DecsParser extends AbstractParser { 33 | 34 | public DecsParser() { 35 | super("Diergo Easy CSV Streamable"); 36 | } 37 | 38 | @Override 39 | public void processRows(final Reader input) throws Exception { 40 | asLines(input) 41 | .map(csvParser().separatedBy(',').inLaxMode().build()).flatMap(Collection::stream) 42 | .forEach(this::process); 43 | } 44 | 45 | @Override 46 | public List parseRows(final Reader input) throws Exception { 47 | return asLines(input) 48 | .map(csvParser().separatedBy(',').build()).flatMap(Collection::stream) 49 | .map(this::toStringArray).collect(toList()); 50 | } 51 | 52 | private String[] toStringArray(Row row) { 53 | return stream(spliterator(row.iterator(), row.getLength(), SIZED), false).toArray(String[]::new); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/com/univocity/articles/csvcomparison/parser8/Parsers.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2014 uniVocity Software Pty Ltd 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package com.univocity.articles.csvcomparison.parser8; 17 | 18 | import com.univocity.articles.csvcomparison.parser.*; 19 | 20 | import java.util.Arrays; 21 | import java.util.Collections; 22 | import java.util.List; 23 | 24 | public class Parsers { 25 | 26 | private static final List parsers = Arrays.asList( 27 | new DecsParser() 28 | ); 29 | 30 | private Parsers() { 31 | } 32 | 33 | public static List list() { 34 | return Collections.unmodifiableList(parsers); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/patches/esperio-csv-6.x.patch: -------------------------------------------------------------------------------- 1 | --- com/univocity/articles/csvcomparison/parser/EsperioCsvParser.java 2 | +++ com/univocity/articles/csvcomparison/parser/EsperioCsvParser.java 3 | @@ -18,7 +18,6 @@ package com.univocity.articles.csvcomparison.parser; 4 | import java.io.*; 5 | import java.util.*; 6 | 7 | -import com.espertech.esperio.*; 8 | import com.espertech.esperio.csv.*; 9 | 10 | class EsperioCsvParser extends AbstractParser { 11 | -------------------------------------------------------------------------------- /src/main/patches/flatpack-4.x.patch: -------------------------------------------------------------------------------- 1 | --- com/univocity/articles/csvcomparison/parser/FlatpackParser.java 2 | +++ com/univocity/articles/csvcomparison/parser/FlatpackParser.java 3 | @@ -17,6 +17,7 @@ package com.univocity.articles.csvcomparison.parser; 4 | 5 | import java.io.*; 6 | import java.util.*; 7 | +import java.util.Optional; 8 | 9 | import net.sf.flatpack.*; 10 | 11 | @@ -42,13 +43,16 @@ class FlatpackParser extends AbstractParser { 12 | DataSet dataset = parser.parse(); 13 | 14 | while (dataset.next()) { 15 | - Record record = dataset.getRecord(); 16 | - String[] row = new String[record.getColumns().length]; 17 | - int i = 0; 18 | - for (String column : record.getColumns()) { 19 | - row[i++] = record.getString(column); 20 | + final Optional maybeRecord = dataset.getRecord(); 21 | + if(maybeRecord.isPresent()) { 22 | + final Record record = maybeRecord.get(); 23 | + String[] row = new String[record.getColumns().length]; 24 | + int i = 0; 25 | + for (String column : record.getColumns()) { 26 | + row[i++] = record.getString(column); 27 | + } 28 | + rows.add(row); 29 | } 30 | - rows.add(row); 31 | } 32 | return rows; 33 | } 34 | -------------------------------------------------------------------------------- /src/main/resources/.gitignore: -------------------------------------------------------------------------------- 1 | /worldcitiespop.txt 2 | /worldcitiespop_huge.txt 3 | -------------------------------------------------------------------------------- /src/main/resources/correctness.csv: -------------------------------------------------------------------------------- 1 | Year,Make,Model,Description,Price 2 | 1997,Ford,E350,"ac, abs, moon",3000.00 3 | 1999,Chevy,"Venture ""Extended Edition""","",4900.00 4 | 1996,Jeep,Grand Cherokee,"MUST SELL! 5 | air, moon roof, loaded",4799.00 6 | 1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00 7 | ,,"Venture ""Extended Edition""","",4900.00 -------------------------------------------------------------------------------- /src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | --------------------------------------------------------------------------------