├── .gitattributes ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── build.gradle ├── release-notes.md ├── settings.gradle └── src ├── jmh └── java │ └── uk │ └── elementarysoftware │ └── quickcsv │ └── benchmarks │ ├── BenchmarkParserAndMapperInMemory.java │ ├── City.java │ └── OpenCSVParser.java ├── main └── java │ └── uk │ └── elementarysoftware │ └── quickcsv │ ├── api │ ├── ByteArraySource.java │ ├── CSVParser.java │ ├── CSVParserBuilder.java │ ├── CSVRecord.java │ ├── CSVRecordWithHeader.java │ ├── Field.java │ └── StandardMappers.java │ ├── decoder │ ├── Decoder.java │ ├── ParserFactory.java │ ├── doubles │ │ ├── DoubleParser.java │ │ ├── JDKDoubleParserAdapter.java │ │ └── QuickDoubleParser.java │ └── ints │ │ ├── ExceptionHelper.java │ │ ├── IntParser.java │ │ ├── LongParser.java │ │ ├── QuickIntParser.java │ │ └── QuickLongParser.java │ ├── functional │ ├── Pair.java │ └── PrimitiveFunctions.java │ ├── ioutils │ └── IOUtils.java │ └── parser │ ├── BufferPool.java │ ├── ByteArrayField.java │ ├── ByteSlice.java │ ├── FieldSubsetView.java │ ├── InputStreamToByteArraySourceAdapter.java │ └── QuickCSVParser.java └── test ├── java └── uk │ └── elementarysoftware │ └── quickcsv │ ├── decoder │ ├── doubles │ │ └── DoubleParserTest.java │ └── ints │ │ ├── IntParserTest.java │ │ └── LongParserTest.java │ ├── integration │ ├── CorrectnessTest.java │ ├── HttpStreamTest.java │ └── IntegrationTest.java │ ├── manual │ └── CityManualPerformanceTester.java │ ├── parser │ ├── ByteSliceTest.java │ ├── CharsetHandlingTest.java │ ├── FieldSubsetViewTest.java │ ├── TestParsingSpecialCases.java │ ├── TestParsingWithHeader.java │ ├── TestParsingWithHeaderQuoted.java │ └── simple │ │ └── StraightForwardParser.java │ └── sampledomain │ └── City.java └── resources ├── cities-dos.txt ├── cities-rus-cp1251.txt ├── cities-rus-utf8.txt ├── cities-unix-quoted.txt ├── cities-unix.txt ├── cities-with-header-quoted.txt ├── cities-with-header.txt └── correctness.txt /.gitattributes: -------------------------------------------------------------------------------- 1 | cities-dos.txt text eol=crlf 2 | cities-unix.txt text eol=lf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /bin/ 2 | .classpath 3 | .project 4 | .gradle 5 | .settings 6 | bin 7 | build 8 | private-notes.txt 9 | /keys/ 10 | gradle.properties 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | jdk: 3 | - oraclejdk8 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Quick CSV Streamer 2 | ============= 3 | 4 | [![Build Status](https://travis-ci.org/titorenko/quick-csv-streamer.svg?branch=master)](https://travis-ci.org/titorenko/quick-csv-streamer) 5 | [![Maven Central](https://maven-badges.herokuapp.com/maven-central/uk.elementarysoftware/quick-csv-streamer/badge.svg)](https://maven-badges.herokuapp.com/maven-central/uk.elementarysoftware/quick-csv-streamer/) 6 | [![Javadoc](https://javadoc-emblem.rhcloud.com/doc/uk.elementarysoftware/quick-csv-streamer/badge.svg)](http://www.javadoc.io/doc/uk.elementarysoftware/quick-csv-streamer) 7 | 8 | Quick CSV streamer is a high performance CSV parsing library with Java 8 Stream API. 9 | The library operates in "zero-copy" mode and only parses what is required by the client. Amount 10 | of garbage produced is also optimized, reducing pressure on the garbage collector. 11 | Parallel, multi-core parsing is supported transparently via Java Stream API. 12 | 13 | Compared to other open source Java CSV parsing libraries Quick CSV achieves speed ups at 2x - 10x range in sequential, single thread, mode. Naturally parallel mode improves performance further. See benchmarking results below for more details. 14 | 15 | The library is limited to so called "line-optimal" charsets like UTF-8, US-ASCII, ISO-8859-1 and some others. Such line-optimal charsets have the property that line feed ('\n'), carriage return ('\r'), CSV separator are easily identifiable from other encoded characters. 16 | 17 | 18 | Maven dependency 19 | -------------- 20 | 21 | Available from Maven Central: 22 | 23 | ```xml 24 | 25 | uk.elementarysoftware 26 | quick-csv-streamer 27 | 0.2.4 28 | 29 | ``` 30 | 31 | Example usage 32 | -------------- 33 | 34 | Suppose following CSV file needs to be parsed 35 | 36 | Country,City,AccentCity,Region,Population,Latitude,Longitude 37 | ad,andorra,Andorra,07,,42.5,1.5166667 38 | gb,city of london,City of London,H9,,51.514125,-.093689 39 | ua,kharkiv,Kharkiv,07,,49.980814,36.252718 40 | 41 | First define Java class to represent the records as follows 42 | 43 | public class City { 44 | private final String city; 45 | private final int population; 46 | private final double latitude; 47 | private final double longitude; 48 | 49 | ... 50 | } 51 | 52 | here we will be sourcing 4 fields from the source file, ignoring other 3. 53 | 54 | Parsing the file is simple 55 | 56 | import uk.elementarysoftware.quickcsv.api.*; 57 | 58 | CSVParser parser = CSVParserBuilder.aParser(City::new, City.CSVFields.class).forRfc4180().build(); 59 | 60 | the parser will be using CSV separators as per RFC 4180, default encoding and will be expecting header as first record in the source. Custom separators, quotes, encodings and header sources are supported. 61 | 62 | Actual mapping is done in `City` constructor 63 | 64 | public class City { 65 | 66 | public static enum CSVFields { 67 | AccentCity, 68 | Population, 69 | Latitude, 70 | Longitude 71 | } 72 | 73 | public City(CSVRecordWithHeader r) { 74 | this.city = r.getField(CSVFields.AccentCity).asString(); 75 | this.population = r.getField(CSVFields.Population).asInt(); 76 | this.latitude = r.getField(CSVFields.Latitude).asDouble(); 77 | this.longitude = r.getField(CSVFields.Longitude).asDouble(); 78 | } 79 | 80 | first `CSVFields` enum specifies which fields should be sourced and only these fields will be actually parsed. After that `CSVRecordWithHeader` instance is used to populate `City` instance fields, refering to CSV fields by enum values. 81 | 82 | Of course mapping can also be done outside domain class constructor, just pass different `Function` to `CSVParserBuilder`. 83 | 84 | Resulting stream can be processed in parallel or sequentially with usual Java stream API. For example to parse sequentially on a single thread 85 | 86 | Stream stream = parser.parse(source).sequential(); 87 | stream.forEach(System.out::println); 88 | 89 | By default parser will operate in parallel mode. 90 | 91 | Please see [sample project](https://github.com/titorenko/quick-csv-streamer-cities-sample) for full source code of the above example. 92 | 93 | Special cases for headers 94 | -------------- 95 | 96 | When header contains special characters the fields can not be simply encoded by enum literals. In such cases `toString` should be overwritten, for example 97 | 98 | enum Fields { 99 | Latitude("City Latitude"), 100 | Longitude("City Longitude"), 101 | City("City name"), 102 | Population("City Population"); 103 | 104 | private final String headerFieldName; 105 | 106 | private Fields(String headerFieldName) { 107 | this.headerFieldName = headerFieldName; 108 | } 109 | 110 | @Override public String toString() { 111 | return headerFieldName; 112 | } 113 | } 114 | 115 | If header is missing from the source it can be supplied during parser constuction 116 | 117 | CSVParserBuilder 118 | .aParser(City::new, City.CSVFields.class) 119 | .usingExplicitHeader("Country", "City", "AccentCity", "Region", "Population", "Latitude", "Longitude") 120 | .build(); 121 | 122 | 123 | Advanced usage 124 | -------------- 125 | About 10% performance improvement compared to normal usage can be achieved by referencing the fields by position instead of name. In this case parser construction is even simpler 126 | 127 | CSVParser parser = CSVParserBuilder.aParser(City::new).build(); 128 | 129 | as enumeration specifying field names is not needed. However now constructor will be using `CSVRecord` interface 130 | 131 | public City(CSVRecord r) { 132 | r.skipFields(2); 133 | this.city = r.getNextField().asString(); 134 | r.skipField(); 135 | this.population = r.getNextField().asInt(); 136 | this.latitude = r.getNextField().asDouble(); 137 | this.longitude = r.getNextField().asDouble(); 138 | } 139 | 140 | effectively this encodes field order in the CSV source. 141 | 142 | Performance 143 | -------------- 144 | 145 | Best way to check performance of the library is to run benchmark on your target system with 146 | 147 | gradle jmh 148 | 149 | reports can be then found in build/reports/jmh. 150 | 151 | It is very important to appreciate that performance might vary dramattically depending on the actual CSV content. As a very rough guideline see below sample output of "gradle jmh" on i7 2700k Ubuntu system, which uses `cities.txt` similar to example above, expanded to have 3173800 rows and 157 MB in size: 152 | 153 | |Benchmark |Mode |Cnt | Score | Error |Units| 154 | | ----------------------------- | ---- | --- | ------- | --------- | --- | 155 | |OpenCSVParser |avgt | 5 |2393.921 |± 262.347 |ms/op| 156 | |Quick CSV Parallel with header |avgt | 5 | 205.013 |± 1.739 |ms/op| 157 | |Quick CSV Parallel (advanced) |avgt | 5 | 177.262 |± 1.739 |ms/op| 158 | |Quick CSV Sequential |avgt | 5 | 648.462 |± 45.991 |ms/op| 159 | 160 | Comparison is done with OpenCSV library v3.8, performance of other libraries can be extrapolated using chart from https://github.com/uniVocity/csv-parsers-comparison 161 | 162 | Prerequisites 163 | -------------- 164 | Quick CSV Streamer library requires Java 8, it has no other dependencies. 165 | 166 | License 167 | -------------- 168 | Library is licensed under the terms of [GPL v2.0 license](http://www.gnu.org/licenses/gpl-2.0.html). 169 | Please contact me if you wish to use this library under more commercially friendly license or want to extend it, for example to add async parsing or support different file formats. 170 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'me.champeau.gradle.jmh' version '0.3.0' 3 | } 4 | 5 | apply plugin: 'java' 6 | apply plugin: 'maven' 7 | apply plugin: 'signing' 8 | apply plugin: 'me.champeau.gradle.jmh' 9 | apply plugin: "eclipse" 10 | 11 | group = 'uk.elementarysoftware' 12 | version = '0.2.4' 13 | 14 | sourceCompatibility = JavaVersion.VERSION_1_8 15 | 16 | tasks.withType(JavaCompile) { 17 | options.encoding = 'UTF-8' 18 | } 19 | 20 | repositories { 21 | mavenCentral() 22 | } 23 | 24 | dependencies { 25 | testCompile group: 'commons-io', name: 'commons-io', version: '2.5' 26 | testCompile group: 'junit', name: 'junit', version: '4.12' 27 | testCompile group: 'org.eclipse.jetty', name: 'jetty-server', version: '9.4.6.v20170531' 28 | testCompile group: 'org.apache.httpcomponents', name: 'httpclient', version: '4.5.3' 29 | 30 | jmh "commons-io:commons-io:2.4" 31 | jmh "com.opencsv:opencsv:3.8" 32 | } 33 | 34 | jmh { 35 | include = '.*BenchmarkParserAndMapperInMemory.*' 36 | jmhVersion = '1.19' 37 | jvmArgsAppend = '-Xmx1g -XX:+AggressiveOpts' 38 | } 39 | 40 | eclipse { 41 | classpath { 42 | plusConfigurations += [ configurations.jmh ] 43 | } 44 | } 45 | 46 | task javadocJar(type: Jar) { 47 | classifier = 'javadoc' 48 | from javadoc 49 | } 50 | 51 | task sourcesJar(type: Jar) { 52 | classifier = 'sources' 53 | from sourceSets.main.allSource 54 | } 55 | 56 | artifacts { 57 | archives javadocJar, sourcesJar 58 | } 59 | 60 | if (hasProperty('ossrhUsername')) { 61 | 62 | signing { 63 | sign configurations.archives 64 | } 65 | 66 | uploadArchives { 67 | repositories { 68 | mavenDeployer { 69 | beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) } 70 | 71 | repository(url: "https://oss.sonatype.org/service/local/staging/deploy/maven2/") { 72 | authentication(userName: ossrhUsername, password: ossrhPassword) 73 | } 74 | 75 | snapshotRepository(url: "https://oss.sonatype.org/content/repositories/snapshots/") { 76 | authentication(userName: ossrhUsername, password: ossrhPassword) 77 | } 78 | 79 | pom.project { 80 | name 'Quick CSV Streamer' 81 | packaging 'jar' 82 | description 'Quick CSV Parser with Java 8 Streams API' 83 | url 'https://github.com/titorenko/quick-csv-streamer' 84 | 85 | scm { 86 | connection 'scm:git:git://github.com/titorenko/quick-csv-streamer.git' 87 | developerConnection 'scm:git:git@github.com:titorenko/quick-csv-streamer.git' 88 | url 'https://github.com/titorenko/quick-csv-streamer' 89 | } 90 | 91 | licenses { 92 | license { 93 | name 'GNU General Public License, version 2' 94 | url 'http://www.gnu.org/licenses/gpl-2.0.html' 95 | } 96 | } 97 | 98 | developers { 99 | developer { 100 | id 'elementarysoftware' 101 | name 'Elementary Software Ltd.' 102 | email 'elementarysoftware@gmail.com' 103 | } 104 | } 105 | } 106 | } 107 | } 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /release-notes.md: -------------------------------------------------------------------------------- 1 | 0.2.4 2 | ========== 3 | Added asBoxedInt, asBoxedDouble convenience methods. 4 | 5 | Bugfixes: 6 | * Issues #6 and #7 fixed 7 | 8 | 0.2.3 9 | ========== 10 | Improvement in parsing performance for quoted data 11 | Bugfixes 12 | * Additional fix for issue quotes in the end of the line, thanks to https://github.com/jasonk000 13 | 14 | 0.2.2 15 | ========== 16 | Converted tabs to spaces in source files 17 | Bugfixes 18 | * Fix for issue #3 with quotes in the end of the line 19 | 20 | 0.2.1 21 | ========== 22 | Bugfixes 23 | * Fix NPE occuring when first column is included into parsing results using header in the source API. 24 | Issue was reported by Pradeep Jaligama. 25 | 26 | 0.2.0 27 | ========== 28 | New features 29 | * header aware parsing 30 | * charset support 31 | * more flexible input config 32 | * performance improvements: less garbage, better composite slice impl., int and long parsers 33 | * new interface with mapper 34 | 35 | Bugfixes 36 | * issue with skipping records via stream api 37 | * stream completion flag as returned by advance() was not properly calculated 38 | 39 | 0.1.1 40 | ========== 41 | * sample project added 42 | 43 | 0.1.0 44 | ========== 45 | * initial release 46 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'quick-csv-streamer' -------------------------------------------------------------------------------- /src/jmh/java/uk/elementarysoftware/quickcsv/benchmarks/BenchmarkParserAndMapperInMemory.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.benchmarks; 2 | 3 | import java.io.ByteArrayInputStream; 4 | import java.io.File; 5 | import java.io.IOException; 6 | import java.io.UncheckedIOException; 7 | import java.util.concurrent.TimeUnit; 8 | import java.util.stream.Stream; 9 | 10 | import org.apache.commons.io.FileUtils; 11 | import org.openjdk.jmh.annotations.Benchmark; 12 | import org.openjdk.jmh.annotations.BenchmarkMode; 13 | import org.openjdk.jmh.annotations.Fork; 14 | import org.openjdk.jmh.annotations.Measurement; 15 | import org.openjdk.jmh.annotations.Mode; 16 | import org.openjdk.jmh.annotations.OutputTimeUnit; 17 | import org.openjdk.jmh.annotations.Scope; 18 | import org.openjdk.jmh.annotations.State; 19 | import org.openjdk.jmh.annotations.Warmup; 20 | import org.openjdk.jmh.infra.Blackhole; 21 | import org.openjdk.jmh.runner.Runner; 22 | import org.openjdk.jmh.runner.options.Options; 23 | import org.openjdk.jmh.runner.options.OptionsBuilder; 24 | 25 | import uk.elementarysoftware.quickcsv.api.CSVParser; 26 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder; 27 | 28 | @BenchmarkMode(Mode.AverageTime) 29 | @Fork(1) 30 | @Warmup(iterations = 3, time = 5000, timeUnit = TimeUnit.MILLISECONDS) 31 | @Measurement(iterations = 5, time = 7000, timeUnit = TimeUnit.MILLISECONDS) 32 | @OutputTimeUnit(TimeUnit.MILLISECONDS) 33 | @State(Scope.Benchmark) 34 | public class BenchmarkParserAndMapperInMemory { 35 | 36 | private static final String TEST_FILE = "src/test/resources/cities-unix.txt"; 37 | private static final String TEST_FILE_QUOTED = "src/test/resources/cities-unix-quoted.txt"; 38 | 39 | @State(Scope.Benchmark) 40 | public static class BenchmarkState { 41 | 42 | byte[] content = loadFile(prepareFile(100, TEST_FILE)); 43 | 44 | byte[] quotedContent = loadFile(prepareFile(100, TEST_FILE_QUOTED)); 45 | 46 | private File prepareFile(int sizeMultiplier, String testFile) { 47 | try { 48 | byte[] content= FileUtils.readFileToByteArray(new File(testFile)); 49 | File result = File.createTempFile("csv", "large"); 50 | for (int i = 0; i < sizeMultiplier; i++) { 51 | FileUtils.writeByteArrayToFile(result, content, true); 52 | } 53 | return result; 54 | } catch (IOException e) { 55 | throw new UncheckedIOException(e); 56 | } 57 | } 58 | 59 | private byte[] loadFile(File file) { 60 | try { 61 | return FileUtils.readFileToByteArray(file); 62 | } catch (IOException e) { 63 | throw new UncheckedIOException(e); 64 | } 65 | } 66 | } 67 | 68 | @Benchmark 69 | public void benchmarkParallelParser(BenchmarkState state, Blackhole bh) { 70 | CSVParser parser = CSVParserBuilder.aParser(City.MAPPER).build(); 71 | Stream stream = parser.parse(new ByteArrayInputStream(state.content)); 72 | stream.forEach(c -> bh.consume(c)); 73 | } 74 | 75 | @Benchmark 76 | public void benchmarkParallelParserWithHeader(BenchmarkState state, Blackhole bh) { 77 | CSVParser parser = CSVParserBuilder 78 | .aParser(City.EnumMapper.MAPPER, City.EnumMapper.Fields.class) 79 | .usingExplicitHeader("Country", "City", "AccentCity", "Region", "Population", "Latitude", "Longitude") 80 | .build(); 81 | Stream stream = parser.parse(new ByteArrayInputStream(state.content)); 82 | stream.forEach(c -> bh.consume(c)); 83 | } 84 | 85 | @Benchmark 86 | public void benchmarkSequentialParser(BenchmarkState state, Blackhole bh) { 87 | CSVParser parser = CSVParserBuilder.aParser(City.MAPPER).build(); 88 | Stream stream = parser.parse(new ByteArrayInputStream(state.content)); 89 | stream.sequential().forEach(c -> bh.consume(c)); 90 | } 91 | 92 | 93 | @Benchmark 94 | public void benchmarkSequentialParserWithQuotes(BenchmarkState state, Blackhole bh) { 95 | CSVParser parser = CSVParserBuilder.aParser(City.MAPPER).build(); 96 | Stream stream = parser.parse(new ByteArrayInputStream(state.quotedContent)); 97 | stream.sequential().forEach(c -> bh.consume(c)); 98 | } 99 | 100 | @Benchmark 101 | public void benchmarkOpenCSVParser(BenchmarkState state, Blackhole bh) { 102 | OpenCSVParser parser = new OpenCSVParser(); 103 | Stream stream = parser.parse(new ByteArrayInputStream(state.content)); 104 | stream.forEach(c -> bh.consume(c)); 105 | } 106 | 107 | public static void main(String[] args) throws Exception { 108 | Options opt = new OptionsBuilder() 109 | .include(".*" + BenchmarkParserAndMapperInMemory.class.getSimpleName()+".*") 110 | //.addProfiler(LinuxPerfAsmProfiler.class) 111 | //.addProfiler(StackProfiler.class) 112 | .build(); 113 | new Runner(opt).run(); 114 | } 115 | 116 | } -------------------------------------------------------------------------------- /src/jmh/java/uk/elementarysoftware/quickcsv/benchmarks/City.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.benchmarks; 2 | 3 | import java.util.function.Function; 4 | 5 | import uk.elementarysoftware.quickcsv.api.CSVRecord; 6 | import uk.elementarysoftware.quickcsv.api.CSVRecordWithHeader; 7 | 8 | public class City { 9 | 10 | public static final Function MAPPER = City::new; 11 | 12 | public static class EnumMapper { 13 | 14 | public static enum Fields { 15 | AccentCity, 16 | Population, 17 | Latitude, 18 | Longitude 19 | } 20 | 21 | public static final Function, City> MAPPER = r -> { 22 | return new City( 23 | r.getField(Fields.AccentCity).asString(), 24 | r.getField(Fields.Population).asInt(), 25 | r.getField(Fields.Latitude).asDouble(), 26 | r.getField(Fields.Longitude).asDouble() 27 | ); 28 | }; 29 | } 30 | 31 | private static final int CITY_INDEX = 2; 32 | 33 | private final String city; 34 | private final int population; 35 | private final double latitude; 36 | private final double longitude; 37 | 38 | public City(CSVRecord r) { 39 | r.skipFields(CITY_INDEX); 40 | this.city = r.getNextField().asString(); 41 | r.skipField(); 42 | this.population = r.getNextField().asInt(); 43 | this.latitude = r.getNextField().asDouble(); 44 | this.longitude = r.getNextField().asDouble(); 45 | } 46 | 47 | public City(String city, int population, double latitude, double longitude) { 48 | this.city = city; 49 | this.population = population; 50 | this.latitude = latitude; 51 | this.longitude = longitude; 52 | } 53 | 54 | public String getCity() { 55 | return city; 56 | } 57 | 58 | public int getPopulation() { 59 | return population; 60 | } 61 | 62 | public double getLatitude() { 63 | return latitude; 64 | } 65 | 66 | public double getLongitude() { 67 | return longitude; 68 | } 69 | 70 | @Override 71 | public int hashCode() { 72 | final int prime = 31; 73 | int result = 1; 74 | result = prime * result + ((city == null) ? 0 : city.hashCode()); 75 | long temp; 76 | temp = Double.doubleToLongBits(latitude); 77 | result = prime * result + (int) (temp ^ (temp >>> 32)); 78 | temp = Double.doubleToLongBits(longitude); 79 | result = prime * result + (int) (temp ^ (temp >>> 32)); 80 | result = prime * result + population; 81 | return result; 82 | } 83 | 84 | @Override 85 | public boolean equals(Object obj) { 86 | if (this == obj) 87 | return true; 88 | if (obj == null) 89 | return false; 90 | if (getClass() != obj.getClass()) 91 | return false; 92 | City other = (City) obj; 93 | if (city == null) { 94 | if (other.city != null) 95 | return false; 96 | } else if (!city.equals(other.city)) 97 | return false; 98 | if (Double.doubleToLongBits(latitude) != Double.doubleToLongBits(other.latitude)) 99 | return false; 100 | if (Double.doubleToLongBits(longitude) != Double.doubleToLongBits(other.longitude)) 101 | return false; 102 | if (population != other.population) 103 | return false; 104 | return true; 105 | } 106 | 107 | @Override 108 | public String toString() { 109 | return "City [city=" + city + ", population=" + population + ", latitude=" + latitude + ", longitude=" + longitude + "]"; 110 | } 111 | 112 | } 113 | -------------------------------------------------------------------------------- /src/jmh/java/uk/elementarysoftware/quickcsv/benchmarks/OpenCSVParser.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.benchmarks; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.io.InputStreamReader; 6 | import java.io.Reader; 7 | import java.io.UncheckedIOException; 8 | import java.util.Iterator; 9 | import java.util.Spliterator; 10 | import java.util.Spliterators; 11 | import java.util.stream.Stream; 12 | import java.util.stream.StreamSupport; 13 | 14 | import org.apache.commons.io.IOUtils; 15 | 16 | import com.opencsv.CSVReader; 17 | 18 | 19 | public class OpenCSVParser { 20 | 21 | public Stream parse(InputStream is) { 22 | Reader reader = new InputStreamReader(is); 23 | CSVReader csvReader = new CSVReader(reader); 24 | Iterator iterator = new Iterator() { 25 | private boolean isEndReached = false; 26 | 27 | @Override 28 | public boolean hasNext() { 29 | return !isEndReached; 30 | } 31 | 32 | @Override 33 | public City next() { 34 | try { 35 | String[] values = csvReader.readNext(); 36 | if (values == null) { 37 | isEndReached = true; 38 | return null; 39 | } else { 40 | return toCity(values); 41 | } 42 | } catch (IOException e) { 43 | throw new UncheckedIOException(e); 44 | } 45 | } 46 | }; 47 | Spliterator spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED); 48 | return StreamSupport.stream(spliterator, false).onClose(new Runnable() { 49 | @Override 50 | public void run() { 51 | IOUtils.closeQuietly(csvReader); 52 | } 53 | }); 54 | } 55 | 56 | protected City toCity(String[] values) { 57 | if (values.length < 7) return null; 58 | return new City(values[2], parseInt(values[4]), parseDouble(values[5]), parseDouble(values[6])); 59 | } 60 | 61 | private int parseInt(String value) { 62 | try { 63 | return value.isEmpty() ? 0 : Integer.parseInt(value); 64 | } catch (Exception e) { 65 | return 0; 66 | } 67 | } 68 | 69 | private double parseDouble(String value) { 70 | return value.isEmpty() ? 0 : Double.parseDouble(value); 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/api/ByteArraySource.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.api; 2 | 3 | import java.util.concurrent.atomic.AtomicInteger; 4 | import java.util.function.Consumer; 5 | 6 | /** 7 | * Abstract source of byte arrays to allow parsing of synchronous or asynchronous streams. 8 | */ 9 | public interface ByteArraySource { 10 | 11 | ByteArrayChunk getNext() throws Exception; 12 | 13 | public abstract static class ReusableChunk { 14 | 15 | private final Runnable onFree; 16 | private final AtomicInteger usageCount = new AtomicInteger(0); 17 | 18 | /** 19 | * @param onFree - callback that will be called when usage count reaches zero 20 | */ 21 | protected ReusableChunk(Runnable onFree) { 22 | this.onFree = onFree; 23 | } 24 | 25 | public void incrementUseCount() { 26 | usageCount.incrementAndGet(); 27 | } 28 | 29 | public void decrementUseCount() { 30 | int value = usageCount.decrementAndGet(); 31 | if (value <= 0) onFree.run(); 32 | } 33 | } 34 | 35 | public static class ByteArrayChunk extends ReusableChunk { 36 | public static final ByteArrayChunk EMPTY = new ByteArrayChunk(new byte[0], 0, false, (b) -> {}); 37 | 38 | private final byte[] data; 39 | private final int length; 40 | private final boolean isLast; 41 | 42 | /** 43 | * @param data - underlying content 44 | * @param length - content length 45 | * @param isLast - is this chunk of is last 46 | * @param onFree - callback that will be called when data from this chunk has been fully consumed. 47 | */ 48 | public ByteArrayChunk(byte[] data, int length, boolean isLast, Consumer onFree) { 49 | super(() -> onFree.accept(data)); 50 | this.data = data; 51 | this.length = length; 52 | this.isLast = isLast; 53 | } 54 | 55 | public byte[] getData() { 56 | return data; 57 | } 58 | 59 | public int getLength() { 60 | return length; 61 | } 62 | 63 | public boolean isLast() { 64 | return isLast; 65 | } 66 | } 67 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/api/CSVParser.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.api; 2 | 3 | import java.io.File; 4 | import java.io.FileInputStream; 5 | import java.io.IOException; 6 | import java.io.InputStream; 7 | import java.util.stream.Stream; 8 | 9 | import uk.elementarysoftware.quickcsv.ioutils.IOUtils; 10 | 11 | /** 12 | * CSV Parser can parse inputs such as {@link InputStream} or more generally {@link ByteArraySource} to Stream<T>. 13 | * 14 | * @param - the type of the parsing result 15 | */ 16 | public interface CSVParser { 17 | 18 | public default Stream parse(File file) throws IOException { 19 | InputStream is = new FileInputStream(file); 20 | return parse(is).onClose(() -> IOUtils.closeQuietly(is)); 21 | } 22 | 23 | public Stream parse(InputStream is); 24 | 25 | public Stream parse(ByteArraySource bas); 26 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/api/CSVParserBuilder.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.api; 2 | 3 | import java.nio.charset.Charset; 4 | import java.util.Objects; 5 | import java.util.Optional; 6 | import java.util.function.Function; 7 | 8 | import uk.elementarysoftware.quickcsv.parser.FieldSubsetView; 9 | import uk.elementarysoftware.quickcsv.parser.QuickCSVParser; 10 | 11 | /** 12 | * CSV Parser builder, use this class to construct {@link CSVParser}. 13 | * 14 | * @param - type of object that each record of the CSV data will be mapped to 15 | * @param - type of enumeration that is used to specify fields to be parsed, only relevant for header-aware parser. 16 | */ 17 | public class CSVParserBuilder> { 18 | 19 | private int bufferSize = 512*1024; 20 | 21 | private CSVFileMetadata metadata = CSVFileMetadata.RFC_4180; 22 | 23 | private Function recordMapper; 24 | 25 | private Function, T> recordWithHeaderMapper; 26 | private FieldSubsetView subsetView = null; 27 | 28 | private Charset charset = Charset.defaultCharset(); 29 | 30 | private CSVParserBuilder() { 31 | } 32 | 33 | /** 34 | * Create new parser using supplied mapping function. 35 | * 36 | * Mapping function can not store reference to {@link CSVRecord} object, 37 | * it needs to be a pure function that creates new instance of T. 38 | * CSVRecord could be mutated by the parser when next field or record are processed. 39 | * 40 | * @param mapper - mapping function from CSVRecord to T 41 | * @param - type of object that each record of the CSV data will be mapped to 42 | * @param - ignored 43 | * @return this parser builder 44 | */ 45 | public static > CSVParserBuilder aParser(Function mapper) { 46 | CSVParserBuilder builder = new CSVParserBuilder(); 47 | builder.recordMapper = mapper; 48 | return builder; 49 | } 50 | 51 | /** 52 | * Create new header-aware parser using supplied mapping function. 53 | * 54 | * Mapping function can not store reference to {@link CSVRecordWithHeader} object, 55 | * it needs to be a pure function that create new instance of T. 56 | * 57 | * CSVRecordWithHeader could be mutated by the parser when next record is processed. 58 | * 59 | * @param mapper - mapping function from CSVRecordWithHeader to T 60 | * @param fields - enumeration specifying fields that should be parsed 61 | * @param - type of object that each record of the CSV data will be mapped to 62 | * @param - type of enumeration that is used to specify fields to be parsed 63 | * 64 | * @return this parser builder 65 | */ 66 | 67 | public static > CSVParserBuilder aParser(Function, T> mapper, Class fields) { 68 | CSVParserBuilder builder = new CSVParserBuilder(); 69 | builder.recordWithHeaderMapper = mapper; 70 | builder.subsetView = FieldSubsetView.forSourceSuppliedHeader(fields); 71 | return builder; 72 | } 73 | 74 | /** 75 | * Use supplied header and do not take header from the source. 76 | * @param header - header fields 77 | * @return this parser builder 78 | */ 79 | public CSVParserBuilder usingExplicitHeader(String... header) { 80 | Objects.requireNonNull(subsetView); 81 | this.subsetView = FieldSubsetView.forExplicitHeader(subsetView.getFieldSubset(), header); 82 | return this; 83 | } 84 | 85 | /** 86 | * Use tabs as separator and no quoting 87 | * @return this parser builder 88 | */ 89 | public CSVParserBuilder forTabs() { 90 | this.metadata = CSVFileMetadata.TABS; 91 | return this; 92 | } 93 | 94 | /** 95 | * Use comma as separator and double quotes as quote character as per RFC 4180 document. 96 | * @return this parser builder 97 | */ 98 | public CSVParserBuilder forRfc4180() { 99 | this.metadata = CSVFileMetadata.RFC_4180; 100 | return this; 101 | } 102 | 103 | /** 104 | * Use specified character as field separator. 105 | * @param separator - field separator character 106 | * @return this parser builder 107 | */ 108 | public CSVParserBuilder usingSeparatorWithNoQuotes(char separator) { 109 | this.metadata = new CSVFileMetadata(separator, Optional.empty()); 110 | return this; 111 | } 112 | 113 | /** 114 | * Use specified characters as field separator and quote character. 115 | * Quote character can be escaped by preceding it with another quote character. 116 | * @param separator - field separator character 117 | * @param quote - quote character 118 | * @return this parser builder 119 | */ 120 | public CSVParserBuilder usingSeparatorWithQuote(char separator, char quote) { 121 | this.metadata = new CSVFileMetadata(separator, Optional.of(quote)); 122 | return this; 123 | } 124 | 125 | /** 126 | * Buffer size to use when reading from file and parsing. Each buffer is parsed by single thread. 127 | * @param size - size in bytes 128 | * @return this parser builder 129 | */ 130 | public CSVParserBuilder usingBufferSize(int size) { 131 | this.bufferSize = size; 132 | return this; 133 | } 134 | 135 | 136 | /** 137 | * Specifies charset to use during parsing. By default Charset.defaultCharset() is used. 138 | * This parser only supports charset that represent separators and digits as single bytes. 139 | * @param charset - charset to use during parsing 140 | * @return this parser builder 141 | */ 142 | public CSVParserBuilder usingCharset(Charset charset) { 143 | this.charset = charset; 144 | return this; 145 | } 146 | 147 | /** 148 | * Specifies charset name to use during parsing. By default Charset.defaultCharset() is used. 149 | * This parser only supports charset that represent separators and digits as single bytes. 150 | * @param charsetName - charset to use during parsing 151 | * @return this parser builder 152 | */ 153 | public CSVParserBuilder usingCharset(String charsetName) { 154 | return usingCharset(Charset.forName(charsetName)); 155 | } 156 | 157 | /** 158 | * Construct parser using current setting 159 | * @return CSV Parser 160 | */ 161 | public CSVParser build() { 162 | return subsetView == null ? 163 | new QuickCSVParser(bufferSize, metadata, recordMapper, charset) : 164 | new QuickCSVParser(bufferSize, metadata, recordWithHeaderMapper, subsetView, charset); 165 | } 166 | 167 | public static class CSVFileMetadata { 168 | 169 | public static CSVFileMetadata RFC_4180 = new CSVFileMetadata(',', Optional.of('"')); 170 | public static CSVFileMetadata TABS = new CSVFileMetadata('\t', Optional.empty()); 171 | 172 | public final char separator; 173 | public final Optional quote; 174 | 175 | public CSVFileMetadata(char separator, Optional quote) { 176 | this.separator = separator; 177 | this.quote = quote; 178 | } 179 | } 180 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/api/CSVRecord.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.api; 2 | 3 | 4 | /** 5 | * Interface to access parsed CSV data in efficient manner. 6 | * Fields are parsed in order they appear in the CSV source. 7 | */ 8 | public interface CSVRecord { 9 | public void skipField(); 10 | public void skipFields(int nFields); 11 | 12 | public Field getNextField(); 13 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/api/CSVRecordWithHeader.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.api; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * CSV Record with header that gives access to all fields from enumeration K. 7 | * The fields can be accessed by name using enum values. 8 | * Enum values toString() should match with header column names. 9 | * 10 | * @param - field enumeration 11 | */ 12 | public interface CSVRecordWithHeader> { 13 | 14 | public Field getField(K field); 15 | 16 | public List getHeader(); 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/api/Field.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.api; 2 | 3 | import java.nio.ByteBuffer; 4 | 5 | /** 6 | * Interface to access underlying raw data as particular type. 7 | * 8 | * Usually underlying the field is some kind of byte large array and the field maintains view onto this array. 9 | * Underlying array can be mutated during parsing and the field object itself can be re-used, therefore clients 10 | * should not maintain references to Field instances, instead client is expected to map field to it's own data 11 | * structure and the no longer use it. 12 | * 13 | * Methods that return primitive types will throw NPE if underlying field is empty. This should be tested with isEmpty() 14 | * call where needed or boxed methods should be used. 15 | */ 16 | public interface Field { 17 | 18 | public ByteBuffer raw(); 19 | 20 | public String asString(); 21 | 22 | public double asDouble(); 23 | public byte asByte(); 24 | public char asChar(); 25 | public short asShort(); 26 | public int asInt(); 27 | public long asLong(); 28 | 29 | public Integer asBoxedInt(); 30 | public Double asBoxedDouble(); 31 | 32 | public boolean isEmpty(); 33 | 34 | public Field clone(); 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/api/StandardMappers.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.api; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.function.Function; 6 | 7 | public class StandardMappers { 8 | /** 9 | * Just convert to string list. Note that is NOT recommended to use this function in high volume scenarios, 10 | * more effective is to directly convert to domain object or array. 11 | */ 12 | public static final Function> TO_STRING_LIST = new Function>() { 13 | 14 | @Override 15 | public List apply(CSVRecord r) { 16 | List result = new ArrayList<>(); 17 | while(true) { 18 | Field f = r.getNextField(); 19 | if (f == null) break; 20 | result.add(f.asString()); 21 | } 22 | return result; 23 | } 24 | 25 | }; 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/decoder/Decoder.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.decoder; 2 | 3 | import java.nio.charset.Charset; 4 | 5 | import uk.elementarysoftware.quickcsv.decoder.ints.IntParser; 6 | import uk.elementarysoftware.quickcsv.decoder.ints.LongParser; 7 | 8 | public class Decoder { 9 | 10 | private final uk.elementarysoftware.quickcsv.decoder.doubles.DoubleParser doubleParser; 11 | private final Charset charset; 12 | private final IntParser intParser; 13 | private final LongParser longParser; 14 | 15 | public Decoder(Charset charset) { 16 | this.charset = charset; 17 | ParserFactory parserFactory = new ParserFactory(); 18 | this.doubleParser = parserFactory.getDoubleParser(); 19 | this.intParser = parserFactory.getIntParser(); 20 | this.longParser = parserFactory.getLongParser(); 21 | } 22 | 23 | public String decodeToString(byte[] buffer, int offset, int length) { 24 | return new String(buffer, offset, length, charset); 25 | } 26 | 27 | public double decodeToDouble(byte[] buffer, int offset, int length) { 28 | if (length == 0) return 0.0; 29 | return doubleParser.parse(buffer, offset, length); 30 | } 31 | 32 | public int decodeToInt(byte[] buffer, int offset, int length) { 33 | if (length == 0) return 0; 34 | return intParser.parse(buffer, offset, length); 35 | } 36 | 37 | public long decodeToLong(byte[] buffer, int offset, int length) { 38 | if (length == 0) return 0L; 39 | return longParser.parse(buffer, offset, length); 40 | } 41 | 42 | public Charset getCharset() { 43 | return charset; 44 | } 45 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/decoder/ParserFactory.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.decoder; 2 | 3 | import uk.elementarysoftware.quickcsv.decoder.doubles.DoubleParser; 4 | import uk.elementarysoftware.quickcsv.decoder.doubles.JDKDoubleParserAdapter; 5 | import uk.elementarysoftware.quickcsv.decoder.doubles.QuickDoubleParser; 6 | import uk.elementarysoftware.quickcsv.decoder.ints.IntParser; 7 | import uk.elementarysoftware.quickcsv.decoder.ints.LongParser; 8 | import uk.elementarysoftware.quickcsv.decoder.ints.QuickIntParser; 9 | import uk.elementarysoftware.quickcsv.decoder.ints.QuickLongParser; 10 | 11 | class ParserFactory { 12 | 13 | private final boolean useQuickParsers; 14 | 15 | ParserFactory() { 16 | this.useQuickParsers = "true".equals(System.getProperty("uk.elementarysoftware.useQuickParsers", "true")); 17 | } 18 | 19 | public DoubleParser getDoubleParser() { 20 | if (useQuickParsers) { 21 | return new QuickDoubleParser(); 22 | } else { 23 | return new JDKDoubleParserAdapter(); 24 | } 25 | } 26 | 27 | public IntParser getIntParser() { 28 | return new QuickIntParser(); 29 | } 30 | 31 | public LongParser getLongParser() { 32 | return new QuickLongParser(); 33 | } 34 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/decoder/doubles/DoubleParser.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.decoder.doubles; 2 | 3 | 4 | public interface DoubleParser { 5 | public double parse(byte[] in, int startIndex, int length); 6 | 7 | default public double parse(String s) { 8 | return parse(s.getBytes(), 0, s.length()); 9 | }; 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/decoder/doubles/QuickDoubleParser.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.decoder.doubles; 2 | 3 | 4 | public class QuickDoubleParser implements DoubleParser { 5 | 6 | private static final int RADIX = 10; 7 | private static final int DOT = '.'-'0'; 8 | 9 | private JDKDoubleParserAdapter fallBack = new JDKDoubleParserAdapter(); 10 | 11 | public double parse(byte[] bytes, int offset, int length) { 12 | if (bytes == null || length <=0) 13 | throw new NumberFormatException("Empty input"); 14 | long result = 0; 15 | boolean isNegative = false; 16 | int index = offset, dotIndex=offset+length-1, endIndex = offset+length; 17 | 18 | byte firstByte = bytes[index]; 19 | if (firstByte < '0') { 20 | if (firstByte == '-') { 21 | isNegative = true; 22 | } 23 | index++; 24 | } 25 | int nDigits = 0; 26 | while (index < endIndex) { 27 | int digit = bytes[index] - '0'; 28 | if (digit == DOT) { 29 | dotIndex=index; 30 | }else if (digit < 0 || digit>9) { 31 | throw new NumberFormatException("For: "+new String(bytes, offset, length)); 32 | } else { 33 | result *= RADIX; 34 | result -= digit; 35 | nDigits++; 36 | } 37 | index++; 38 | } 39 | 40 | double mantissa = -result; 41 | int negExponent = length-(dotIndex-offset)-1; 42 | 43 | if (nDigits <= JDKDoubleParser.maxDecimalDigits) { 44 | if (negExponent == 0 || mantissa == 0.0) { 45 | return (isNegative) ? -mantissa : mantissa; 46 | } 47 | double rValue = mantissa / JDKDoubleParser.small10pow[negExponent]; 48 | return (isNegative) ? -rValue : rValue; 49 | } else { //harder case, use JDK implementation 50 | return fallBack.parse(bytes, offset, length); 51 | } 52 | } 53 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/ExceptionHelper.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.decoder.ints; 2 | 3 | class ExceptionHelper { 4 | static NumberFormatException nfExceptionFor(byte[] in, int startIndex, int len) { 5 | return new NumberFormatException("For: "+new String(in, startIndex, len)); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/IntParser.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.decoder.ints; 2 | 3 | public interface IntParser { 4 | public int parse(byte[] in, int startIndex, int length); 5 | 6 | default public int parse(String s) { 7 | return parse(s.getBytes(), 0, s.length()); 8 | }; 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/LongParser.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.decoder.ints; 2 | 3 | public interface LongParser { 4 | public long parse(byte[] in, int startIndex, int length); 5 | 6 | default public long parse(String s) { 7 | return parse(s.getBytes(), 0, s.length()); 8 | }; 9 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/QuickIntParser.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.decoder.ints; 2 | import static uk.elementarysoftware.quickcsv.decoder.ints.ExceptionHelper.*; 3 | 4 | public class QuickIntParser implements IntParser { 5 | 6 | private static final int radix = 10; 7 | 8 | @Override 9 | public int parse(final byte[] in, final int startIndex, final int len) { 10 | 11 | int result = 0; 12 | boolean negative = false; 13 | int index = startIndex; 14 | final int end = startIndex + len; 15 | int limit = -Integer.MAX_VALUE; 16 | int multmin; 17 | int digit; 18 | 19 | if (len > 0) { 20 | byte firstByte = in[index]; 21 | if (firstByte < '0') { // Possible leading "+" or "-" 22 | if (firstByte == '-') { 23 | negative = true; 24 | limit = Integer.MIN_VALUE; 25 | } else if (firstByte != '+') 26 | throw nfExceptionFor(in, startIndex, len); 27 | 28 | if (len == 1) // Cannot have lone "+" or "-" 29 | throw nfExceptionFor(in, startIndex, len); 30 | index++; 31 | } 32 | multmin = limit / radix; 33 | while (index < end) { 34 | // Accumulating negatively avoids surprises near MAX_VALUE 35 | digit = in[index++] - '0'; 36 | if (digit < 0 || digit >= radix) { 37 | throw nfExceptionFor(in, startIndex, len); 38 | } 39 | if (result < multmin) { 40 | throw nfExceptionFor(in, startIndex, len); 41 | } 42 | result *= radix; 43 | if (result < limit + digit) { 44 | throw nfExceptionFor(in, startIndex, len); 45 | } 46 | result -= digit; 47 | } 48 | } else { 49 | throw nfExceptionFor(in, startIndex, len); 50 | } 51 | return negative ? result : -result; 52 | } 53 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/QuickLongParser.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.decoder.ints; 2 | import static uk.elementarysoftware.quickcsv.decoder.ints.ExceptionHelper.*; 3 | 4 | public class QuickLongParser implements LongParser { 5 | 6 | private static final int radix = 10; 7 | 8 | @Override 9 | public long parse(final byte[] in, final int startIndex, final int len) { 10 | 11 | long result = 0; 12 | boolean negative = false; 13 | int index = startIndex; 14 | long limit = -Long.MAX_VALUE; 15 | final int end = startIndex + len; 16 | long multmin; 17 | int digit; 18 | 19 | if (len > 0) { 20 | byte firstByte = in[index]; 21 | if (firstByte < '0') { // Possible leading "+" or "-" 22 | if (firstByte == '-') { 23 | negative = true; 24 | limit = Long.MIN_VALUE; 25 | } else if (firstByte != '+') 26 | throw nfExceptionFor(in, startIndex, len); 27 | 28 | if (len == 1) // Cannot have lone "+" or "-" 29 | throw nfExceptionFor(in, startIndex, len); 30 | index++; 31 | } 32 | multmin = limit / radix; 33 | while (index < end) { 34 | // Accumulating negatively avoids surprises near MAX_VALUE 35 | digit = in[index++] - '0'; 36 | if (digit < 0 || digit >= radix) { 37 | throw nfExceptionFor(in, startIndex, len); 38 | } 39 | if (result < multmin) { 40 | throw nfExceptionFor(in, startIndex, len); 41 | } 42 | result *= radix; 43 | if (result < limit + digit) { 44 | throw nfExceptionFor(in, startIndex, len); 45 | } 46 | result -= digit; 47 | } 48 | } else { 49 | throw nfExceptionFor(in, startIndex, len); 50 | } 51 | return negative ? result : -result; 52 | } 53 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/functional/Pair.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.functional; 2 | 3 | import java.util.Objects; 4 | 5 | /** 6 | * Container to ease passing around a tuple of two objects. This object provides a sensible 7 | * implementation of equals(), returning true if equals() is true on each of the contained 8 | * objects. 9 | */ 10 | public class Pair { 11 | 12 | public final F first; 13 | public final S second; 14 | 15 | /** 16 | * Constructor for a Pair. 17 | * 18 | * @param first the first object in the Pair 19 | * @param second the second object in the pair 20 | */ 21 | public Pair(F first, S second) { 22 | this.first = first; 23 | this.second = second; 24 | } 25 | 26 | /** 27 | * Checks the two objects for equality by delegating to their respective 28 | * {@link Object#equals(Object)} methods. 29 | * 30 | * @param o the {@link Pair} to which this one is to be checked for equality 31 | * @return true if the underlying objects of the Pair are both considered 32 | * equal 33 | */ 34 | @Override 35 | public boolean equals(Object o) { 36 | if (!(o instanceof Pair)) { 37 | return false; 38 | } 39 | Pair p = (Pair) o; 40 | return Objects.equals(p.first, first) && Objects.equals(p.second, second); 41 | } 42 | 43 | /** 44 | * Compute a hash code using the hash codes of the underlying objects 45 | * 46 | * @return a hashcode of the Pair 47 | */ 48 | @Override 49 | public int hashCode() { 50 | return (first == null ? 0 : first.hashCode()) ^ (second == null ? 0 : second.hashCode()); 51 | } 52 | 53 | @Override 54 | public String toString() { 55 | return first+"="+second; 56 | } 57 | 58 | /** 59 | * Convenience method for creating an appropriately typed pair. 60 | * @param a the first object in the Pair 61 | * @param b the second object in the pair 62 | * @param type of left element 63 | * @param type of right element 64 | * @return a Pair that is templatized with the types of a and b 65 | */ 66 | public static Pair of(A a, B b) { 67 | return new Pair(a, b); 68 | } 69 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/functional/PrimitiveFunctions.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.functional; 2 | 3 | public class PrimitiveFunctions { 4 | 5 | @FunctionalInterface 6 | public static interface FunCharToT { 7 | public T apply(char c); 8 | } 9 | 10 | @FunctionalInterface 11 | public static interface FunBiCharToT { 12 | public T apply(char c, char q); 13 | } 14 | 15 | @FunctionalInterface 16 | public static interface FunCharToBoolean { 17 | public boolean apply(char c); 18 | } 19 | 20 | @FunctionalInterface 21 | public static interface FunBiCharToBoolean { 22 | public boolean apply(char c, char q); 23 | } 24 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/ioutils/IOUtils.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.ioutils; 2 | 3 | import java.io.Closeable; 4 | import java.io.IOException; 5 | 6 | public class IOUtils { 7 | 8 | public static void closeQuietly(Closeable closeable) { 9 | try { 10 | if (closeable != null) { 11 | closeable.close(); 12 | } 13 | } catch (IOException ioe) { 14 | // ignore 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/parser/BufferPool.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.parser; 2 | 3 | import java.util.Queue; 4 | import java.util.concurrent.ConcurrentLinkedQueue; 5 | import java.util.concurrent.atomic.AtomicInteger; 6 | 7 | /** Pools large, long-living byte arrays to minimise old generation GC */ 8 | class BufferPool { 9 | 10 | private final int bufferSize; 11 | private final AtomicInteger buffersCreated = new AtomicInteger(0); 12 | private final Queue buffers = new ConcurrentLinkedQueue(); 13 | 14 | BufferPool(int bufferSize) { 15 | this.bufferSize = bufferSize; 16 | } 17 | 18 | byte[] getBuffer() { 19 | byte[] result = buffers.poll(); 20 | if (result == null) { 21 | buffersCreated.incrementAndGet(); 22 | return new byte[bufferSize]; 23 | } else { 24 | return result; 25 | } 26 | } 27 | 28 | void handBack(byte[] buffer) { 29 | buffers.add(buffer); 30 | if (buffers.size() >= buffersCreated.get()) { 31 | clear(); 32 | } 33 | } 34 | 35 | private void clear() { 36 | buffers.clear(); 37 | buffersCreated.set(0); 38 | } 39 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/parser/ByteArrayField.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.parser; 2 | 3 | import java.nio.ByteBuffer; 4 | import java.nio.charset.Charset; 5 | 6 | import uk.elementarysoftware.quickcsv.api.Field; 7 | import uk.elementarysoftware.quickcsv.decoder.Decoder; 8 | 9 | public class ByteArrayField implements Field { 10 | 11 | public static final ByteArrayField EMPTY = new ByteArrayField(new byte[0], 0, 0, null); 12 | 13 | private final Decoder decoder; 14 | 15 | private byte[] buffer; 16 | private int start; 17 | private int end; 18 | private Character quote; //if not null indicates that value was actually quoted 19 | 20 | public ByteArrayField(byte[] buffer, int startIndex, int endIndex, Charset charset) { 21 | this(buffer, startIndex, endIndex, charset, null); 22 | } 23 | 24 | public ByteArrayField(byte[] buffer, int startIndex, int endIndex, Charset charset, Character quote) { 25 | this.buffer = buffer; 26 | this.start = startIndex; 27 | this.end = endIndex; 28 | this.quote = quote; 29 | this.decoder = new Decoder(charset); 30 | } 31 | 32 | @Override 33 | public ByteBuffer raw() { 34 | return ByteBuffer.wrap(buffer, start, end - start); 35 | } 36 | 37 | @Override 38 | public String asString() { 39 | String result = decoder.decodeToString(buffer, start, end - start); 40 | if (quote != null && result.indexOf(quote) >= 0) { 41 | //TODO: optimise and add more flexible escape character 42 | //flag indicating if an escaped quote was seen can be passed from the parser itself as state 43 | return result.replace(new StringBuffer().append(quote).append(quote), new StringBuffer().append(quote)); 44 | } else { 45 | return result; 46 | } 47 | } 48 | 49 | @Override 50 | public double asDouble() { 51 | return decoder.decodeToDouble(buffer, start, end - start); 52 | } 53 | 54 | @Override 55 | public byte asByte() { 56 | return (byte) asInt(); 57 | } 58 | 59 | @Override 60 | public char asChar() { 61 | return (char) asInt(); 62 | } 63 | 64 | @Override 65 | public short asShort() { 66 | return (short) asInt(); 67 | } 68 | 69 | @Override 70 | public int asInt() { 71 | return decoder.decodeToInt(buffer, start, end - start); 72 | } 73 | 74 | @Override 75 | public long asLong() { 76 | return decoder.decodeToLong(buffer, start, end - start); 77 | } 78 | 79 | void modifyBounds(int start, int end) { //re-use object to reduce GC overhead 80 | this.start = start; 81 | this.end = end; 82 | this.quote = null; 83 | } 84 | 85 | void modifyBounds(int start, int end, Character quote) { 86 | this.start = start; 87 | this.end = end; 88 | this.quote = quote; 89 | } 90 | 91 | public void initFrom(ByteArrayField other) { 92 | this.buffer = other.buffer; 93 | this.start = other.start; 94 | this.end = other.end; 95 | this.quote = other.quote; 96 | } 97 | 98 | @Override 99 | public Field clone() { 100 | return new ByteArrayField(buffer, start, end, decoder.getCharset(), quote); 101 | } 102 | 103 | @Override 104 | public boolean isEmpty() { 105 | return start >= end; 106 | } 107 | 108 | @Override 109 | public Double asBoxedDouble() { 110 | return isEmpty() ? null : asDouble(); 111 | } 112 | 113 | @Override 114 | public Integer asBoxedInt() { 115 | return isEmpty() ? null : asInt(); 116 | } 117 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/parser/ByteSlice.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.parser; 2 | 3 | import java.nio.charset.Charset; 4 | 5 | import uk.elementarysoftware.quickcsv.api.ByteArraySource.ByteArrayChunk; 6 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder.CSVFileMetadata; 7 | import uk.elementarysoftware.quickcsv.functional.Pair; 8 | import uk.elementarysoftware.quickcsv.functional.PrimitiveFunctions.FunBiCharToBoolean; 9 | import uk.elementarysoftware.quickcsv.functional.PrimitiveFunctions.FunBiCharToT; 10 | import uk.elementarysoftware.quickcsv.functional.PrimitiveFunctions.FunCharToBoolean; 11 | import uk.elementarysoftware.quickcsv.functional.PrimitiveFunctions.FunCharToT; 12 | 13 | 14 | public interface ByteSlice { 15 | static final byte CR = 0xD; 16 | static final byte LF = 0xA; 17 | 18 | public static ByteSlice wrap(ByteArrayChunk it, Charset charset) { 19 | return new SingleByteSlice(it, charset); 20 | } 21 | 22 | public static ByteSlice empty() { 23 | return wrap(ByteArrayChunk.EMPTY, null); 24 | } 25 | 26 | public static ByteSlice join(ByteSlice prefix, ByteSlice suffix) { 27 | return new CompositeByteSlice((SingleByteSlice) prefix, (SingleByteSlice) suffix); 28 | } 29 | 30 | public Pair splitOnLastLineEnd(); 31 | 32 | public boolean nextLine(); 33 | 34 | /** 35 | * Skip until next occurrence of c character. False if not found and end of slice is reached 36 | * @param c - character on which to break 37 | * @return true if character was actually found, false if end of slice reached 38 | */ 39 | public boolean skipUntil(final char c); 40 | 41 | public boolean skipUntil(final char c, final char quote); 42 | 43 | /** 44 | * Returns next field and advances to next field. Returns null when end of line or end of slice is reached. 45 | * @param c - character that indicates field boundary 46 | * @return object to access field content 47 | */ 48 | public ByteArrayField nextField(final char c); 49 | 50 | public ByteArrayField nextField(final char c, final char quote); 51 | 52 | public int size(); 53 | 54 | public boolean hasMoreData(); 55 | 56 | default public boolean isEmpty() { 57 | return !hasMoreData(); 58 | } 59 | 60 | /** 61 | * String representation of current line. Mainly for debug purposes, can return broken line when in composite slice. 62 | * @return current line 63 | */ 64 | public String currentLine(); 65 | 66 | default public void skipField(final CSVFileMetadata metadata) { 67 | if (metadata.quote.isPresent()) 68 | skipUntil(metadata.separator, metadata.quote.get()); 69 | else 70 | skipUntil(metadata.separator); 71 | } 72 | 73 | default public ByteArrayField getNextField(final CSVFileMetadata metadata) { 74 | if (metadata.quote.isPresent()) 75 | return nextField(metadata.separator, metadata.quote.get()); 76 | else 77 | return nextField(metadata.separator); 78 | } 79 | 80 | public void incrementUse(); 81 | 82 | public void decremenentUse(); 83 | 84 | } 85 | 86 | final class SingleByteSlice implements ByteSlice { 87 | final int start;//inclusive 88 | final int end;//exclusive 89 | final byte[] buffer; 90 | final ByteArrayField fieldTemplateObject; 91 | final Charset charset; 92 | final ByteArrayChunk src; 93 | 94 | int currentIndex; 95 | 96 | public SingleByteSlice(ByteArrayChunk src, Charset charset) { 97 | this(src, src.getData(), 0, src.getLength(), charset); 98 | } 99 | 100 | public SingleByteSlice(ByteArrayChunk src, byte[] buffer, int start, int end, Charset charset) { 101 | this.src = src; 102 | this.buffer = buffer; 103 | this.start = start; 104 | this.end = end; 105 | this.fieldTemplateObject = new ByteArrayField(buffer, 0, 0, charset); 106 | this.currentIndex = start; 107 | this.charset = charset; 108 | } 109 | 110 | @Override 111 | public int size() { 112 | return end - start; 113 | } 114 | 115 | @Override 116 | public boolean hasMoreData() { 117 | return currentIndex < end; 118 | } 119 | 120 | boolean frontTrim() { 121 | boolean seenEOL = false; 122 | for(; hasMoreData() && (buffer[currentIndex]==CR || buffer[currentIndex]==LF); currentIndex++) { 123 | seenEOL = true; 124 | } 125 | return seenEOL; 126 | } 127 | 128 | @Override 129 | public boolean nextLine() { 130 | for(; hasMoreData() && buffer[currentIndex]!=CR && buffer[currentIndex]!=LF; currentIndex++); 131 | return frontTrim(); 132 | } 133 | 134 | public String currentLine() { 135 | int startIdx = currentIndex; 136 | for(; startIdx > start && buffer[startIdx]!=CR && buffer[startIdx]!=LF; startIdx--); 137 | int endIdx = currentIndex; 138 | for(; endIdx < end && buffer[endIdx]!=CR && buffer[endIdx]!=LF; endIdx++); 139 | return new String(buffer, startIdx, endIdx - startIdx); 140 | } 141 | 142 | public Pair splitOnLastLineEnd() { 143 | int i = end-1; 144 | for (;i >=currentIndex && buffer[i] != LF; i--); 145 | SingleByteSlice prefix = new SingleByteSlice(src, buffer, currentIndex, i+1, charset); 146 | SingleByteSlice suffix = new SingleByteSlice(src, buffer, i+1, end, charset); 147 | return Pair.of(prefix, suffix); 148 | } 149 | 150 | public boolean skipUntil(final char c) { 151 | boolean isFound = false; 152 | while(currentIndex < end) { 153 | if (buffer[currentIndex]==c) { 154 | currentIndex++; 155 | isFound = true; 156 | break; 157 | } 158 | currentIndex++; 159 | } 160 | return isFound; 161 | } 162 | 163 | public boolean skipUntil(char c, char q) { 164 | boolean inQuote = currentIndex < buffer.length && buffer[currentIndex] == q; 165 | if (!inQuote) return skipUntil(c); 166 | currentIndex++; 167 | boolean isFound = false; 168 | while(currentIndex < end) { 169 | if (buffer[currentIndex]==c && buffer[currentIndex-1] == q) { 170 | currentIndex++; 171 | isFound = true; 172 | break; 173 | } 174 | currentIndex++; 175 | } 176 | return isFound; 177 | } 178 | 179 | public ByteArrayField nextField(final char c) { 180 | int startIndex = currentIndex; 181 | int endIndex = currentIndex; 182 | while(currentIndex < end) { 183 | byte cur = buffer[currentIndex]; 184 | if (cur == c || cur == CR || cur == LF) { 185 | endIndex = currentIndex; 186 | if (cur == c) 187 | currentIndex++; 188 | break; 189 | } else { 190 | currentIndex++; 191 | } 192 | } 193 | if (currentIndex == startIndex) return null; 194 | if (currentIndex == end) endIndex = end; 195 | fieldTemplateObject.modifyBounds(startIndex, endIndex); 196 | return fieldTemplateObject; 197 | } 198 | 199 | @Override 200 | public ByteArrayField nextField(char c, char q) { 201 | boolean inQuote = currentIndex < buffer.length && buffer[currentIndex] == q; 202 | if (!inQuote) return nextField(c); 203 | currentIndex++; 204 | int startIndex = currentIndex; 205 | int endIndex = currentIndex; 206 | while(currentIndex < end) { 207 | byte cur = buffer[currentIndex]; 208 | if ((cur == c || cur == CR || cur == LF) && buffer[currentIndex-1] == q) {//there is an issue when we have escaped quote and then separator, but we ignore it for now 209 | endIndex = currentIndex - 1; 210 | if (cur == c) currentIndex++; //let frontTrim consume linebreaks later 211 | break; 212 | } else { 213 | currentIndex++; 214 | } 215 | } 216 | if (currentIndex == startIndex) return null; 217 | if (currentIndex == end) { 218 | if (buffer[end-1] == q) endIndex = end - 1; else endIndex = end; 219 | } 220 | fieldTemplateObject.modifyBounds(startIndex, endIndex, q); 221 | return fieldTemplateObject; 222 | } 223 | 224 | @Override 225 | public String toString() { 226 | return new String(buffer, start, size()); 227 | } 228 | 229 | @Override 230 | public void incrementUse() { 231 | src.incrementUseCount(); 232 | } 233 | 234 | @Override 235 | public void decremenentUse() { 236 | src.decrementUseCount(); 237 | } 238 | } 239 | 240 | final class CompositeByteSlice implements ByteSlice { 241 | 242 | private final SingleByteSlice prefix; 243 | private final SingleByteSlice suffix; 244 | private final ByteArrayField prefixFieldTemplateObject; 245 | private final ByteArrayField suffixFieldTemplateObject; 246 | 247 | private FunCharToT nextFieldFun; 248 | private FunBiCharToT nextFieldFunQuoted; 249 | private FunCharToBoolean skipUntilFun; 250 | private FunBiCharToBoolean skipUntilFunQuoted; 251 | 252 | CompositeByteSlice(SingleByteSlice prefix, SingleByteSlice suffix) { 253 | this.prefix = prefix; 254 | this.suffix = suffix; 255 | this.prefixFieldTemplateObject = new ByteArrayField(prefix.buffer, 0, 0, prefix.charset); 256 | this.suffixFieldTemplateObject = new ByteArrayField(suffix.buffer, 0, 0, suffix.charset); 257 | 258 | this.nextFieldFun = this::nextFieldWithPrefix; 259 | this.nextFieldFunQuoted = this::nextFieldWithPrefix; 260 | this.skipUntilFun = this::skipUntilWithPrefix; 261 | this.skipUntilFunQuoted = this::skipUntilWithPrefix; 262 | } 263 | 264 | /* 265 | * ----------------------------------------------------------- 266 | * Generic functions below work on slice with non-empty prefix, but once prefix has been 267 | * exhausted they will flip to simple suffix delegates. 268 | * Only frequently called functions are implemented that way. 269 | * ----------------------------------------------------------- 270 | */ 271 | private ByteArrayField nextFieldWithPrefix(char c) { 272 | if (prefix.isEmpty()) { 273 | flip(); 274 | return suffix.nextField(c); 275 | } 276 | int startIndex = currentIndex(); 277 | int endIndex = currentIndex(); 278 | byte cur = 0; 279 | while(hasMoreData()) { 280 | cur = currentByte(); 281 | if (cur == c || cur == CR || cur == LF) { 282 | endIndex = currentIndex(); 283 | if (cur == c) 284 | nextByte(); 285 | break; 286 | } else { 287 | nextByte(); 288 | } 289 | } 290 | if (currentIndex() == startIndex) return null; 291 | if (cur != c && !hasMoreData()) endIndex = prefix.end + suffix.end; 292 | return createField(startIndex, endIndex, null); 293 | } 294 | 295 | private ByteArrayField nextFieldWithPrefix(char c, char quote) { 296 | if (prefix.isEmpty()) { 297 | flip(); 298 | return suffix.nextField(c, quote); 299 | } 300 | boolean inQuote = hasMoreData() && currentByte() == quote; 301 | if (!inQuote) return nextField(c); 302 | nextByte(); 303 | int startIndex = currentIndex(); 304 | int endIndex = currentIndex(); 305 | while(hasMoreData()) { 306 | byte cur = currentByte(); 307 | if ((cur == c || cur == CR || cur == LF) && prevByte() == quote) { 308 | endIndex = currentIndex() - 1; 309 | if (cur == c) 310 | nextByte(); 311 | break; 312 | } else { 313 | nextByte(); 314 | } 315 | } 316 | if (currentIndex() == startIndex) return null; 317 | if (isEmpty()) { 318 | if (prevByte() == quote) endIndex = currentIndex() - 1; else endIndex = currentIndex(); 319 | } 320 | return createField(startIndex, endIndex, quote); 321 | } 322 | 323 | private boolean skipUntilWithPrefix(char c) { 324 | if (prefix.isEmpty()) { 325 | flip(); 326 | return suffix.skipUntil(c); 327 | } 328 | boolean isFound = prefix.skipUntil(c); 329 | if (isFound) { 330 | return true; 331 | } else { 332 | return suffix.skipUntil(c); 333 | } 334 | } 335 | 336 | private boolean skipUntilWithPrefix(char c, char q) { 337 | if (prefix.isEmpty()) { 338 | flip(); 339 | return suffix.skipUntil(c, q); 340 | } 341 | boolean isFound = prefix.skipUntil(c, q); 342 | if (isFound) { 343 | return true; 344 | } else { 345 | return suffix.skipUntil(c, q); 346 | } 347 | } 348 | 349 | private void flip() { 350 | this.nextFieldFun = suffix::nextField; 351 | this.nextFieldFunQuoted = suffix::nextField; 352 | this.skipUntilFun = suffix::skipUntil; 353 | this.skipUntilFunQuoted = suffix::skipUntil; 354 | } 355 | /* 356 | * ----------------------------------------------------------- 357 | * end 358 | * ----------------------------------------------------------- 359 | */ 360 | 361 | @Override 362 | public Pair splitOnLastLineEnd() { 363 | Pair sliced = suffix.splitOnLastLineEnd(); 364 | return Pair.of(ByteSlice.join(this.prefix, sliced.first), sliced.second); 365 | } 366 | 367 | @Override 368 | public int size() { 369 | return prefix.size() + suffix.size(); 370 | } 371 | 372 | @Override 373 | public boolean hasMoreData() { 374 | return prefix.hasMoreData() || suffix.hasMoreData(); 375 | } 376 | 377 | @Override 378 | public ByteArrayField nextField(char c) { 379 | return nextFieldFun.apply(c); 380 | } 381 | 382 | @Override 383 | public ByteArrayField nextField(char c, char quote) { 384 | return nextFieldFunQuoted.apply(c, quote); 385 | } 386 | 387 | @Override 388 | public boolean skipUntil(char c) { 389 | return skipUntilFun.apply(c); 390 | } 391 | 392 | @Override 393 | public boolean skipUntil(char c, char q) { 394 | return skipUntilFunQuoted.apply(c, q); 395 | } 396 | 397 | @Override 398 | public boolean nextLine() { 399 | if (prefix.isEmpty()) { 400 | return suffix.nextLine(); 401 | } else { 402 | boolean seenEOL = prefix.nextLine(); 403 | if (seenEOL) { 404 | if (prefix.isEmpty()) suffix.frontTrim(); 405 | return true; 406 | } else { 407 | return suffix.nextLine(); 408 | } 409 | } 410 | } 411 | 412 | boolean frontTrim() { 413 | return prefix.isEmpty() ? suffix.frontTrim() : prefix.frontTrim(); 414 | } 415 | 416 | @Override 417 | public String currentLine() { 418 | return prefix.isEmpty() ? suffix.currentLine() : prefix.currentLine(); 419 | } 420 | 421 | private ByteArrayField createField(int startIndex, int endIndex, Character quote) { 422 | if (startIndex >= prefix.end) { 423 | suffixFieldTemplateObject.modifyBounds(startIndex - prefix.end, endIndex - prefix.end, quote); 424 | return suffixFieldTemplateObject; 425 | } 426 | if (endIndex < prefix.end) { 427 | prefixFieldTemplateObject.modifyBounds(startIndex, endIndex, quote); 428 | return prefixFieldTemplateObject; 429 | } 430 | byte[] result = new byte[endIndex - startIndex]; 431 | System.arraycopy(prefix.buffer, startIndex, result, 0, prefix.end - startIndex); 432 | System.arraycopy(suffix.buffer, 0, result, prefix.end - startIndex, endIndex - prefix.end); 433 | return new ByteArrayField(result, 0, result.length, prefix.charset, quote); 434 | } 435 | 436 | @Override 437 | public String toString() { 438 | return new StringBuffer().append(prefix).append(suffix).toString(); 439 | } 440 | 441 | byte prevByte() { 442 | if (suffix.currentIndex > suffix.start) return suffix.buffer[suffix.currentIndex - 1]; 443 | return prefix.buffer[prefix.currentIndex - 1]; 444 | } 445 | 446 | 447 | byte currentByte() { 448 | return prefix.isEmpty() ? suffix.buffer[suffix.currentIndex] : prefix.buffer[prefix.currentIndex]; 449 | } 450 | 451 | void nextByte() { 452 | if (prefix.isEmpty()) suffix.currentIndex++; else prefix.currentIndex++; 453 | } 454 | 455 | int currentIndex() { 456 | return prefix.currentIndex + suffix.currentIndex; 457 | } 458 | 459 | @Override 460 | public void decremenentUse() { 461 | prefix.src.decrementUseCount(); 462 | suffix.src.decrementUseCount(); 463 | } 464 | 465 | @Override 466 | public void incrementUse() { 467 | throw new IllegalStateException("Should not be called"); 468 | } 469 | } 470 | -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/parser/FieldSubsetView.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.parser; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Arrays; 5 | import java.util.EnumMap; 6 | import java.util.List; 7 | import java.util.Map; 8 | 9 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder.CSVFileMetadata; 10 | 11 | /** 12 | * Provides view on the CSVRecord that focuses on particular subset of fields. 13 | * 14 | * Within the view fields can be accessed by index in order of the subset or by field enumeration K. 15 | * @param - enum containing list of fields that form the subset 16 | */ 17 | public class FieldSubsetView> { 18 | 19 | private final HeaderSource headerSource; 20 | private final Class fieldSubset; 21 | 22 | private boolean isFirstSlice = true; 23 | 24 | private int[] headerIndexesOfK; 25 | private int[] parseOrderToSourceOrder; 26 | private int[] fieldSkipSchedule; 27 | 28 | private FieldSubsetView(HeaderSource headerSource, Class fieldSubset) { 29 | this.headerSource = headerSource; 30 | this.fieldSubset = fieldSubset; 31 | } 32 | 33 | public static > FieldSubsetView forExplicitHeader(Class fieldsToSource, String... header) { 34 | return new FieldSubsetView<>(new HeaderSource.ExplicitHeader(header), fieldsToSource); 35 | } 36 | 37 | public static > FieldSubsetView forSourceSuppliedHeader(Class fieldsToSource) { 38 | return forSourceSuppliedHeader(fieldsToSource, 0); 39 | } 40 | 41 | public static > FieldSubsetView forSourceSuppliedHeader(Class fieldsToSource, int headerRowIndexInFile) { 42 | return new FieldSubsetView<>(new HeaderSource.SourceSuppliedHeader(headerRowIndexInFile), fieldsToSource); 43 | } 44 | 45 | public void onSlice(ByteSlice slice, CSVFileMetadata metadata) { 46 | if (isFirstSlice) { 47 | headerSource.onSlice(slice, metadata); 48 | initLookups(); 49 | isFirstSlice = false; 50 | } 51 | } 52 | 53 | private void initLookups() { 54 | List header = headerSource.getHeader(); 55 | headerIndexesOfK = getHeaderIndexesOfK(header); 56 | Map fieldToHeaderIndex = new EnumMap(fieldSubset); 57 | for (K k : fieldSubset.getEnumConstants()) { 58 | fieldToHeaderIndex.put(k, header.indexOf(k.toString())); 59 | } 60 | 61 | this.fieldSkipSchedule = new int[headerIndexesOfK.length]; 62 | int lastFieldIndex = -1; 63 | for (int i = 0; i < headerIndexesOfK.length; i++) { 64 | int idx = headerIndexesOfK[i]; 65 | int nSkip = idx - lastFieldIndex - 1; 66 | fieldSkipSchedule[i] = nSkip; 67 | lastFieldIndex = idx; 68 | } 69 | 70 | parseOrderToSourceOrder = new int[getFieldSubsetSize()]; 71 | K[] ks = fieldSubset.getEnumConstants(); 72 | for (int i = 0; i < ks.length; i++) { 73 | int headerIdx = fieldToHeaderIndex.get(ks[i]); 74 | parseOrderToSourceOrder[i] = Arrays.binarySearch(headerIndexesOfK, headerIdx); 75 | } 76 | } 77 | 78 | private int[] getHeaderIndexesOfK(List header) { 79 | K[] ks = fieldSubset.getEnumConstants(); 80 | int[] result = new int[ks.length]; 81 | for (int i = 0; i < result.length; i++) { 82 | if ((result[i] = header.indexOf(ks[i].toString())) == -1) { 83 | throw new RuntimeException("Field not found in header: "+ks[i].toString()); 84 | } 85 | } 86 | Arrays.sort(result); 87 | return result; 88 | } 89 | 90 | int[] getFieldIndexes() { 91 | return headerIndexesOfK; 92 | } 93 | 94 | public Class getFieldSubset() { 95 | return fieldSubset; 96 | } 97 | 98 | int[] getFieldSkipSchedule() { 99 | return fieldSkipSchedule; 100 | } 101 | 102 | List getHeader() { 103 | return headerSource.getHeader(); 104 | } 105 | 106 | int indexOfInSourceView(int parseIdx) { 107 | return parseOrderToSourceOrder[parseIdx]; 108 | } 109 | 110 | int getFieldSubsetSize() { 111 | return fieldSubset.getEnumConstants().length; 112 | } 113 | 114 | public static abstract class HeaderSource { 115 | 116 | private HeaderSource() {} 117 | 118 | abstract void onSlice(ByteSlice slice, CSVFileMetadata metadata); 119 | abstract List getHeader(); 120 | 121 | private static class ExplicitHeader extends HeaderSource { 122 | private final String[] header; 123 | 124 | public ExplicitHeader(String[] header) { 125 | this.header = header; 126 | } 127 | 128 | @Override 129 | List getHeader() { 130 | return Arrays.asList(header); 131 | } 132 | 133 | @Override 134 | void onSlice(ByteSlice slice, CSVFileMetadata metadata) {} 135 | } 136 | 137 | private static class SourceSuppliedHeader extends HeaderSource { 138 | 139 | private final int headerIndex; 140 | private List header; 141 | 142 | public SourceSuppliedHeader(int headerIndex) { 143 | this.headerIndex = headerIndex; 144 | } 145 | 146 | @Override 147 | void onSlice(ByteSlice slice, CSVFileMetadata metadata) { 148 | for (int i = 0; i < headerIndex; i++) { 149 | slice.nextLine(); 150 | } 151 | List header = new ArrayList<>(); 152 | ByteArrayField field; 153 | while((field = slice.getNextField(metadata)) != null) { 154 | header.add(field.asString()); 155 | } 156 | slice.nextLine(); 157 | this.header = header; 158 | } 159 | 160 | @Override 161 | List getHeader() { 162 | return header; 163 | } 164 | } 165 | } 166 | } -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/parser/InputStreamToByteArraySourceAdapter.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.parser; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | 6 | import uk.elementarysoftware.quickcsv.api.ByteArraySource; 7 | 8 | class InputStreamToByteArraySourceAdapter implements ByteArraySource { 9 | 10 | private final InputStream is; 11 | private final BufferPool pool; 12 | 13 | public InputStreamToByteArraySourceAdapter(InputStream is, BufferPool pool) { 14 | this.is = is; 15 | this.pool = pool; 16 | } 17 | 18 | @Override 19 | public ByteArrayChunk getNext() throws IOException { 20 | byte[] buffer = pool.getBuffer(); 21 | int read = is.read(buffer); 22 | boolean isEndReached = read == -1; 23 | return new ByteArrayChunk(buffer, Math.max(0, read), isEndReached, pool::handBack); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/uk/elementarysoftware/quickcsv/parser/QuickCSVParser.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.parser; 2 | 3 | import java.io.InputStream; 4 | import java.nio.charset.Charset; 5 | import java.util.List; 6 | import java.util.Optional; 7 | import java.util.Spliterator; 8 | import java.util.Spliterators; 9 | import java.util.function.Consumer; 10 | import java.util.function.Function; 11 | import java.util.stream.Stream; 12 | import java.util.stream.StreamSupport; 13 | 14 | import uk.elementarysoftware.quickcsv.api.ByteArraySource; 15 | import uk.elementarysoftware.quickcsv.api.ByteArraySource.ByteArrayChunk; 16 | import uk.elementarysoftware.quickcsv.api.CSVParser; 17 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder.CSVFileMetadata; 18 | import uk.elementarysoftware.quickcsv.functional.Pair; 19 | import uk.elementarysoftware.quickcsv.api.CSVRecord; 20 | import uk.elementarysoftware.quickcsv.api.CSVRecordWithHeader; 21 | import uk.elementarysoftware.quickcsv.api.Field; 22 | 23 | public class QuickCSVParser> implements CSVParser { 24 | 25 | private final CSVFileMetadata metadata; 26 | private final int bufferSize; 27 | private final Function mapper; 28 | private final Optional> fieldSubsetView; 29 | private final Charset charset; 30 | 31 | public QuickCSVParser(int bufferSize, CSVFileMetadata metadata, Function, T> mapper, 32 | FieldSubsetView fieldSubsetView, Charset charset) { 33 | this.metadata = metadata; 34 | this.bufferSize = bufferSize; 35 | this.mapper = cast(mapper); 36 | this.fieldSubsetView = Optional.of(fieldSubsetView); 37 | this.charset = charset; 38 | } 39 | 40 | public QuickCSVParser(int bufferSize, CSVFileMetadata metadata, Function mapper, Charset charset) { 41 | this.metadata = metadata; 42 | this.bufferSize = bufferSize; 43 | this.mapper = mapper; 44 | this.fieldSubsetView = Optional.empty(); 45 | this.charset = charset; 46 | } 47 | 48 | @SuppressWarnings("unchecked") 49 | private static > Function cast(Function, T> f) { 50 | return r -> f.apply((CSVRecordWithHeader) r); 51 | } 52 | 53 | 54 | @Override 55 | public Stream parse(InputStream is) { 56 | BufferPool pool = new BufferPool(bufferSize); 57 | return parse(new InputStreamToByteArraySourceAdapter(is, pool)); 58 | } 59 | 60 | @Override 61 | public Stream parse(ByteArraySource bas) { 62 | return StreamSupport.stream(new SplittingSpliterator(bas), true); 63 | } 64 | 65 | class SplittingSpliterator implements Spliterator { 66 | 67 | private final ByteArraySource bas; 68 | 69 | private ByteSlice prefix = ByteSlice.empty(); 70 | private boolean isEndReached = false; 71 | 72 | private Spliterator sequentialSplitterator = Spliterators.emptySpliterator(); 73 | 74 | SplittingSpliterator(ByteArraySource bas) { 75 | this.bas = bas; 76 | } 77 | 78 | @Override 79 | public boolean tryAdvance(Consumer action) { //usually only called in sequential mode 80 | boolean advanced = sequentialSplitterator.tryAdvance(action); 81 | if (advanced) return true; 82 | if (isEndReached) return false; 83 | ByteSlice nextSlice = nextSlice(); 84 | if (!nextSlice.hasMoreData()) return false; 85 | this.sequentialSplitterator = sliceSpliterator(nextSlice); 86 | return tryAdvance(action); 87 | } 88 | 89 | @Override 90 | public Spliterator trySplit() { 91 | if (isEndReached) return null; 92 | ByteSlice nextSlice = nextSlice(); 93 | if (!nextSlice.hasMoreData()) return null; 94 | return sliceSpliterator(nextSlice); 95 | } 96 | 97 | private ByteSlice nextSlice() { 98 | ByteSlice bareSlice = nextBareSlice(); 99 | bareSlice.incrementUse(); 100 | if (isEndReached) { 101 | return ByteSlice.join(prefix, bareSlice); 102 | } else { 103 | Pair sliced = bareSlice.splitOnLastLineEnd(); 104 | ByteSlice result = ByteSlice.join(prefix, sliced.first); 105 | this.prefix = sliced.second; 106 | bareSlice.incrementUse(); 107 | return result; 108 | } 109 | } 110 | 111 | private ByteSlice nextBareSlice() { 112 | try { 113 | ByteArrayChunk it = bas.getNext(); 114 | this.isEndReached = it.isLast(); 115 | ByteSlice slice = ByteSlice.wrap(it, charset); 116 | if (fieldSubsetView.isPresent()) fieldSubsetView.get().onSlice(slice, metadata); 117 | return slice; 118 | } catch (RuntimeException e) { 119 | throw e; 120 | } catch (Exception e) { 121 | throw new RuntimeException(e); 122 | } 123 | } 124 | 125 | @Override 126 | public long estimateSize() { 127 | return Long.MAX_VALUE; 128 | } 129 | 130 | @Override 131 | public int characteristics() { 132 | return ORDERED | NONNULL | IMMUTABLE; 133 | } 134 | } 135 | 136 | Spliterator sliceSpliterator(ByteSlice slice) { 137 | return fieldSubsetView.isPresent() ? new LensingByteSliceSpliterator(slice) : new ByteSliceSpliterator(slice); 138 | } 139 | 140 | class ByteSliceSpliterator implements Spliterator, CSVRecord { 141 | 142 | protected final ByteSlice slice; 143 | 144 | ByteSliceSpliterator(ByteSlice slice) { 145 | this.slice = slice;//incoming slice should have no broken lines 146 | } 147 | 148 | @Override 149 | public boolean tryAdvance(Consumer action) { 150 | if (!slice.hasMoreData()) { 151 | slice.decremenentUse(); 152 | return false; 153 | } 154 | advance(action); 155 | return true; 156 | } 157 | 158 | protected void advance(Consumer action) { 159 | T t = mapper.apply(this); 160 | action.accept(t); 161 | slice.nextLine(); 162 | } 163 | 164 | @Override 165 | public Spliterator trySplit() { 166 | return null; 167 | } 168 | 169 | @Override 170 | public long estimateSize() { 171 | return slice.size(); 172 | } 173 | 174 | @Override 175 | public int characteristics() { 176 | return ORDERED | NONNULL | IMMUTABLE; 177 | } 178 | 179 | @Override 180 | public void skipField() { 181 | slice.skipField(metadata); 182 | } 183 | 184 | @Override 185 | public void skipFields(int nFields) { 186 | for (int i = 0; i < nFields; i++) { 187 | skipField(); 188 | } 189 | } 190 | 191 | @Override 192 | public ByteArrayField getNextField() { 193 | return slice.getNextField(metadata); 194 | } 195 | } 196 | 197 | class LensingByteSliceSpliterator extends ByteSliceSpliterator implements CSVRecordWithHeader { 198 | 199 | private final FieldSubsetView view; 200 | private final ByteArrayField[] fieldTemplates; 201 | 202 | public LensingByteSliceSpliterator(ByteSlice slice) { 203 | super(slice); 204 | this.view = fieldSubsetView.get(); 205 | this.fieldTemplates = new ByteArrayField[view.getFieldSubsetSize()]; 206 | for (int i = 0; i < fieldTemplates.length; i++) { 207 | fieldTemplates[i] = new ByteArrayField(null, -1, -1, charset); 208 | } 209 | } 210 | 211 | @Override 212 | public boolean tryAdvance(Consumer action) { 213 | if (!slice.hasMoreData()) { 214 | slice.decremenentUse(); 215 | return false; 216 | } 217 | parseFields(); 218 | super.advance(action); 219 | return true; 220 | } 221 | 222 | private void parseFields() { 223 | int[] skipSchedule = view.getFieldSkipSchedule(); 224 | for (int i = 0; i < skipSchedule.length; i++) { 225 | skipFields(skipSchedule[i]); 226 | ByteArrayField field = super.getNextField();//TODO: init into template directly 227 | if (field != null) { 228 | fieldTemplates[i].initFrom(field); 229 | } else { 230 | //when line ends with separator it is very difficult to distinguish between that and overflow when getNextField() returns null. Here we assume correct field schedule and map null to empty field. 231 | fieldTemplates[i].initFrom(ByteArrayField.EMPTY); 232 | } 233 | } 234 | } 235 | 236 | @Override 237 | public Field getField(K fieldName) { 238 | return fieldTemplates[view.indexOfInSourceView(fieldName.ordinal())]; 239 | } 240 | 241 | @Override 242 | public List getHeader() { 243 | return view.getHeader(); 244 | } 245 | } 246 | } -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/decoder/doubles/DoubleParserTest.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.decoder.doubles; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import java.io.File; 6 | import java.io.FileInputStream; 7 | import java.net.URL; 8 | import java.nio.charset.Charset; 9 | 10 | import org.apache.commons.io.IOUtils; 11 | import org.apache.commons.io.LineIterator; 12 | import org.junit.Test; 13 | 14 | import uk.elementarysoftware.quickcsv.decoder.doubles.DoubleParser; 15 | import uk.elementarysoftware.quickcsv.decoder.doubles.JDKDoubleParserAdapter; 16 | import uk.elementarysoftware.quickcsv.decoder.doubles.QuickDoubleParser; 17 | 18 | 19 | public class DoubleParserTest { 20 | 21 | @Test 22 | public void testSimpleCases() { 23 | doTestSimpleCases(new JDKDoubleParserAdapter()); 24 | doTestSimpleCases(new QuickDoubleParser()); 25 | } 26 | 27 | @Test 28 | public void testBigBuffer() { 29 | doTestBigBuffer(new JDKDoubleParserAdapter()); 30 | doTestBigBuffer(new QuickDoubleParser()); 31 | } 32 | 33 | @Test 34 | public void testFile() throws Exception { 35 | doTestFile(new JDKDoubleParserAdapter()); 36 | doTestFile(new QuickDoubleParser()); 37 | } 38 | 39 | private void doTestSimpleCases(DoubleParser parser) { 40 | assertEquals(0.0, parser.parse("0"), 1E-14); 41 | assertEquals(3.14159265, parser.parse("3.14159265"), 1E-14); 42 | assertEquals(-93231637.47759183, parser.parse("-93231637.47759183"), 1E-14); 43 | assertEquals(-0.3903, parser.parse("-0.3903"), 1E-14); 44 | assertEquals(2.71828183, parser.parse("2.71828183"), 1E-14); 45 | } 46 | 47 | private void doTestBigBuffer(DoubleParser parser) { 48 | String prefix = "anything"; 49 | String middle = "2.71828183"; 50 | String suffix = "anything again"; 51 | 52 | byte[] buffer = (prefix + middle + suffix).getBytes(); 53 | double result = parser.parse(buffer, prefix.length(), middle.length()); 54 | assertEquals(2.71828183, result, 1E-14); 55 | } 56 | 57 | 58 | 59 | private void doTestFile(DoubleParser parser) throws Exception { 60 | int nLinesToTest = 500; 61 | URL fileUrl = getClass().getResource("/cities-dos.txt"); 62 | File file = new File(fileUrl.toURI()); 63 | LineIterator lines = IOUtils.lineIterator(new FileInputStream(file), Charset.defaultCharset()); 64 | int lineNumber = 0; 65 | while (lines.hasNext() && lineNumber < nLinesToTest) { 66 | String[] data = lines.next().split(","); 67 | for (int i = 0; i < data.length; i++) { 68 | compareParsingResult(parser, data[i]); 69 | } 70 | lineNumber ++; 71 | } 72 | } 73 | 74 | private void compareParsingResult(DoubleParser parser, String stringValue) { 75 | Object d1 = null; 76 | try { 77 | d1 = parser.parse(stringValue); 78 | } catch (Exception e) { 79 | d1 = e; 80 | } 81 | Object d2 = null; 82 | try { 83 | d2 = Double.parseDouble(stringValue); 84 | } catch (Exception e) { 85 | d2 = e; 86 | } 87 | assertEquals(d2.getClass(), d1.getClass()); 88 | if (d2 instanceof Double) { 89 | assertEquals("Failed for: "+stringValue, d2, d1); 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/decoder/ints/IntParserTest.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.decoder.ints; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import java.util.Random; 6 | import java.util.function.Function; 7 | 8 | import org.junit.experimental.theories.DataPoints; 9 | import org.junit.experimental.theories.FromDataPoints; 10 | import org.junit.experimental.theories.Theories; 11 | import org.junit.experimental.theories.Theory; 12 | import org.junit.runner.RunWith; 13 | 14 | @RunWith(Theories.class) 15 | public class IntParserTest { 16 | 17 | private static final int randomSize = 1000; 18 | 19 | private static final Random rnd = new Random(); 20 | 21 | @DataPoints("validInts") 22 | public static String[] randomInts() { 23 | return rnd.ints(randomSize).mapToObj(i -> ""+i).toArray(String[]::new); 24 | } 25 | 26 | @DataPoints("validInts") 27 | public static String[] specialInts() { 28 | return new String[] {"0", "-0", "+0", "+1", Integer.MAX_VALUE+"", Integer.MIN_VALUE+""}; 29 | } 30 | 31 | @DataPoints("failingInts") 32 | public static String[] specialFailingInts() { 33 | return new String[] {"X0", "-", "+", Long.MAX_VALUE+"", "", "Hello"}; 34 | } 35 | 36 | private QuickIntParser parser = new QuickIntParser(); 37 | 38 | @Theory 39 | public void parsersAreEquivalentOnValidInts(@FromDataPoints("validInts") String intValue) { 40 | compareParsingResult(intValue, s -> Integer.parseInt(s), s -> parser.parse(s)); 41 | } 42 | 43 | @Theory 44 | public void parsersAreEquivalentOnFailingInts(@FromDataPoints("failingInts") String intValue) { 45 | compareParsingResult(intValue, s -> Integer.parseInt(s), s -> parser.parse(s)); 46 | } 47 | 48 | private void compareParsingResult(String value, Function p1, Function p2) { 49 | Object v1 = null; 50 | try { 51 | v1 = p1.apply(value); 52 | } catch (Exception e) { 53 | v1 = e; 54 | } 55 | Object v2 = null; 56 | try { 57 | v2 = p2.apply(value); 58 | } catch (Exception e) { 59 | v2 = e; 60 | } 61 | assertEquals(v2.getClass(), v1.getClass()); 62 | if (v2 instanceof Integer) { 63 | assertEquals(v2, v1); 64 | } 65 | } 66 | } -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/decoder/ints/LongParserTest.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.decoder.ints; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import java.util.Arrays; 6 | import java.util.Random; 7 | import java.util.function.Function; 8 | 9 | import org.junit.experimental.theories.DataPoints; 10 | import org.junit.experimental.theories.FromDataPoints; 11 | import org.junit.experimental.theories.Theories; 12 | import org.junit.experimental.theories.Theory; 13 | import org.junit.runner.RunWith; 14 | 15 | @RunWith(Theories.class) 16 | public class LongParserTest { 17 | 18 | private static final int randomSize = 1000; 19 | 20 | private static final Random rnd = new Random(); 21 | 22 | @DataPoints("validLongs") 23 | public static String[] randomLongs() { 24 | return rnd.ints(randomSize).mapToObj(i -> ""+i).toArray(String[]::new); 25 | } 26 | 27 | @DataPoints("validLongs") 28 | public static String[] specialLongs() { 29 | return new String[] {"0", "-0", "+0", "+1", Long.MAX_VALUE+"", Long.MIN_VALUE+""}; 30 | } 31 | 32 | @DataPoints("failingLongs") 33 | public static String[] specialFailingLongs() { 34 | return new String[] {"X0", "-", "+", Double.MAX_VALUE+"", "", "Hello"}; 35 | } 36 | 37 | private QuickLongParser parser = new QuickLongParser(); 38 | 39 | @Theory 40 | public void parsersAreEquivalentOnValidLongs(@FromDataPoints("validLongs") String intValue) { 41 | compareParsingResult(intValue, s -> Long.parseLong(s), s -> parser.parse(s)); 42 | } 43 | 44 | @Theory 45 | public void parsersAreEquivalentOnFailingLongs(@FromDataPoints("failingLongs") String intValue) { 46 | compareParsingResult(intValue, s -> Long.parseLong(s), s -> parser.parse(s)); 47 | } 48 | 49 | private void compareParsingResult(String value, Function p1, Function p2) { 50 | Object v1 = null; 51 | try { 52 | v1 = p1.apply(value); 53 | } catch (Exception e) { 54 | v1 = e; 55 | } 56 | Object v2 = null; 57 | try { 58 | v2 = p2.apply(value); 59 | } catch (Exception e) { 60 | v2 = e; 61 | } 62 | assertEquals("Value 2:"+v2+", value 1: "+v1+", source"+value+"; "+Arrays.toString(value.getBytes()), v2.getClass(), v1.getClass()); 63 | 64 | if (v2 instanceof Long) { 65 | assertEquals(v2, v1); 66 | } 67 | } 68 | 69 | public static void main(String[] args) { 70 | byte[] x = new byte[] {-39, -94}; 71 | long l = Long.parseLong(new String(x)); 72 | System.out.println(l); 73 | } 74 | } -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/integration/CorrectnessTest.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.integration; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import java.io.File; 6 | import java.io.IOException; 7 | import java.util.List; 8 | import java.util.stream.Stream; 9 | 10 | import org.junit.Test; 11 | 12 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder; 13 | import uk.elementarysoftware.quickcsv.api.StandardMappers; 14 | 15 | public class CorrectnessTest { 16 | 17 | File input = new File("src/test/resources/correctness.txt"); 18 | 19 | @Test 20 | @SuppressWarnings("unchecked") 21 | public void testParse() throws IOException { 22 | Stream> stream = CSVParserBuilder.aParser(StandardMappers.TO_STRING_LIST).build().parse(input); 23 | List[] rows = stream.toArray(List[]::new); 24 | assertArrayEquals(new String[] {"Year", "Make", "Model", "Description", "Price"}, rows[0].toArray(new String[0])); 25 | assertArrayEquals(new String[] {"1997", "Ford", "E350", "ac, abs, moon", "3000.00"}, rows[1].toArray(new String[0])); 26 | assertArrayEquals(new String[] {"1999", "Chevy", "Venture \"Extended Edition\"", "", "4900.00"}, rows[2].toArray(new String[0])); 27 | String separ = System.getProperty("line.separator"); 28 | assertArrayEquals(new String[] {"1996", "Jeep", "Grand Cherokee", "MUST SELL!"+separ+"air, moon roof, loaded", "4799.00"}, rows[3].toArray(new String[0])); 29 | assertArrayEquals(new String[] {"1999", "Chevy", "Venture \"Extended Edition, Very Large\"", "", "5000.00"}, rows[4].toArray(new String[0])); 30 | assertArrayEquals(new String[] {"", "", "Venture \"Extended Edition\"", "", "4900.00" }, rows[5].toArray(new String[0])); 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/integration/HttpStreamTest.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.integration; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import java.io.File; 6 | import java.net.URI; 7 | import java.util.stream.Stream; 8 | 9 | import org.apache.commons.io.FileUtils; 10 | import org.apache.http.client.methods.CloseableHttpResponse; 11 | import org.apache.http.client.methods.HttpGet; 12 | import org.apache.http.impl.client.CloseableHttpClient; 13 | import org.apache.http.impl.client.HttpClients; 14 | import org.eclipse.jetty.server.Handler; 15 | import org.eclipse.jetty.server.Server; 16 | import org.eclipse.jetty.server.handler.DefaultHandler; 17 | import org.eclipse.jetty.server.handler.HandlerList; 18 | import org.eclipse.jetty.server.handler.ResourceHandler; 19 | import org.junit.Rule; 20 | import org.junit.Test; 21 | import org.junit.rules.ExternalResource; 22 | 23 | import uk.elementarysoftware.quickcsv.api.CSVParser; 24 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder; 25 | import uk.elementarysoftware.quickcsv.sampledomain.City; 26 | 27 | public class HttpStreamTest { 28 | 29 | @Rule 30 | public final FileServer httpServer = new FileServer(); 31 | 32 | private final File testFile = IntegrationTest.inputUnix; 33 | private final CSVParser parser = CSVParserBuilder.aParser(City.MAPPER).build(); 34 | 35 | @Test 36 | public void testParseHttpResource() throws Exception { 37 | CloseableHttpClient httpclient = HttpClients.createDefault(); 38 | HttpGet httpGet = new HttpGet(httpServer.getURI().resolve(testFile.getName())); 39 | CloseableHttpResponse response = httpclient.execute(httpGet); 40 | 41 | try(Stream stream = parser.parse(response.getEntity().getContent())) { 42 | assertEquals(FileUtils.readLines(testFile, "UTF-8").size(), stream.count()); 43 | } 44 | } 45 | 46 | static class FileServer extends ExternalResource { 47 | 48 | private Server server; 49 | 50 | @Override 51 | protected void before() throws Throwable { 52 | server = new Server(0); 53 | 54 | ResourceHandler rh = new ResourceHandler(); 55 | rh.setResourceBase("src/test/resources"); 56 | 57 | HandlerList handlers = new HandlerList(); 58 | handlers.setHandlers(new Handler[] { rh, new DefaultHandler() }); 59 | server.setHandler(handlers); 60 | 61 | server.start(); 62 | } 63 | 64 | 65 | @Override 66 | protected void after() { 67 | try { 68 | server.stop(); 69 | } catch (Exception e) { 70 | //no-op 71 | } 72 | } 73 | 74 | public URI getURI() { 75 | return server.getURI(); 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/integration/IntegrationTest.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.integration; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import java.io.File; 6 | import java.util.stream.Stream; 7 | 8 | import org.junit.Test; 9 | 10 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder; 11 | import uk.elementarysoftware.quickcsv.parser.simple.StraightForwardParser; 12 | import uk.elementarysoftware.quickcsv.sampledomain.City; 13 | 14 | public class IntegrationTest { 15 | 16 | static final File inputDos = new File("src/test/resources/cities-dos.txt"); 17 | static final File inputUnix = new File("src/test/resources/cities-unix.txt"); 18 | 19 | static final int[] bufferSizesToTest = new int[] {1024, 11_111, 1_000_000}; 20 | 21 | 22 | @Test 23 | public void testMultiThreaded() throws Exception { 24 | Stream s1 = new StraightForwardParser().parse(inputDos).map(City.MAPPER); 25 | Object[] expected = s1.toArray(); 26 | for (int i = 0; i < bufferSizesToTest.length; i++) { 27 | Stream s2 = CSVParserBuilder.aParser(City.MAPPER).usingBufferSize(bufferSizesToTest[i]).build().parse(inputDos); 28 | assertArrayEquals(expected, s2.toArray()); 29 | } 30 | } 31 | 32 | @Test 33 | public void testSingleThreaded() throws Exception { 34 | Stream s1 = new StraightForwardParser().parse(inputDos).map(City.MAPPER); 35 | Stream s2 = CSVParserBuilder.aParser(City.MAPPER).build().parse(inputDos).sequential(); 36 | assertArrayEquals(s1.toArray(), s2.sequential().toArray()); 37 | } 38 | 39 | @Test 40 | public void testDosVsUnix() throws Exception { 41 | Stream s1 = CSVParserBuilder.aParser(City.MAPPER).build().parse(inputUnix); 42 | Stream s2 = CSVParserBuilder.aParser(City.MAPPER).build().parse(inputDos); 43 | assertArrayEquals(s1.toArray(), s2.sequential().toArray()); 44 | } 45 | 46 | } -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/manual/CityManualPerformanceTester.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.manual; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | 7 | import org.apache.commons.io.FileUtils; 8 | import org.apache.commons.io.IOUtils; 9 | 10 | import uk.elementarysoftware.quickcsv.api.CSVParser; 11 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder; 12 | import uk.elementarysoftware.quickcsv.sampledomain.City; 13 | 14 | 15 | public class CityManualPerformanceTester { 16 | long maxSpeed = 0; 17 | 18 | public void run() throws Exception { 19 | File file = prepareFile(300); 20 | try { 21 | System.out.println("Running file of size "+(file.length() / 1024 / 1024)+ "MB"); 22 | run(file, 30); 23 | } finally { 24 | file.delete(); 25 | } 26 | } 27 | 28 | private void run(File source, int nRuns) throws Exception { 29 | CSVParser parser = CSVParserBuilder.aParser(City.MAPPER).build(); 30 | //CSVParser parser = CSVParserBuilder.aParser(City.HeaderAwareMapper.MAPPER, City.HeaderAwareMapper.Fields.class).usingExplicitHeader("Country", "City", "AccentCity", "Region", "Population", "Latitude", "Longitude").build();//TODO add that example to docs 31 | 32 | for (int i = 0; i < nRuns; i++) { 33 | runOnce(parser, source); 34 | } 35 | } 36 | 37 | private void runOnce(CSVParser parser, File source) throws IOException { 38 | long start = System.currentTimeMillis(); 39 | parser.parse(source).count(); 40 | long duration = System.currentTimeMillis() - start; 41 | if (duration == 0) return; 42 | System.out.println("P2 parsed " +source.getName()+" in "+duration); 43 | long speed = source.length()/1024/duration; 44 | if (speed > maxSpeed) maxSpeed = speed; 45 | System.out.println("P2 speed: "+(source.length()/1024/duration)+" MB/s, max: "+maxSpeed); 46 | 47 | } 48 | 49 | private File prepareFile(int sizeMultiplier) throws Exception { 50 | InputStream is = getClass().getResourceAsStream("/cities-unix.txt"); 51 | byte[] content = IOUtils.toByteArray(is); 52 | File result = File.createTempFile("csv", "large"); 53 | for (int i = 0; i < sizeMultiplier; i++) { 54 | FileUtils.writeByteArrayToFile(result, content, true); 55 | } 56 | return result; 57 | } 58 | 59 | public static void main(String[] args) throws Exception { 60 | new CityManualPerformanceTester().run(); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/parser/ByteSliceTest.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.parser; 2 | 3 | import static org.junit.Assert.assertArrayEquals; 4 | import static org.junit.Assert.assertEquals; 5 | import static org.junit.Assert.assertNull; 6 | import static org.junit.Assert.assertTrue; 7 | 8 | import java.nio.charset.Charset; 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | 12 | import org.junit.Test; 13 | 14 | import uk.elementarysoftware.quickcsv.api.ByteArraySource.ByteArrayChunk; 15 | import uk.elementarysoftware.quickcsv.api.Field; 16 | import uk.elementarysoftware.quickcsv.functional.Pair; 17 | 18 | public class ByteSliceTest { 19 | 20 | private static final String FIELDS22 = "field11,field12\nfield21,field22"; 21 | private static final String FIELDS33 = "field11,field12,field13\nfield21,field22,field23\nfield31,field32,field33"; 22 | private static final String QUOTED = "'field11','field12'\n'field21','field22'\n"; 23 | 24 | @Test 25 | public void testSplitOnLastLineEnd() { 26 | String content = "line1\nline2\nlastline"; 27 | ByteSlice slice = sliceFor(content.getBytes()); 28 | assertEquals(content, slice.toString()); 29 | Pair sliced = slice.splitOnLastLineEnd(); 30 | assertEquals("line1\nline2\n", sliced.first.toString()); 31 | assertEquals("lastline", sliced.second.toString()); 32 | } 33 | 34 | 35 | @Test 36 | public void testSplitOnLastLineEndWithSkip() { 37 | String content = "line1\nline2\nlastline"; 38 | ByteSlice slice = sliceFor(content.getBytes()); 39 | slice.nextLine(); 40 | Pair sliced = slice.splitOnLastLineEnd(); 41 | assertEquals("line2\n", sliced.first.toString()); 42 | assertEquals("lastline", sliced.second.toString()); 43 | } 44 | 45 | @Test 46 | public void testSingleSlice() { 47 | ByteSlice slice = sliceFor(FIELDS22.getBytes()); 48 | assertEquals("field11,field12", slice.currentLine()); 49 | List fields = getFields(slice); 50 | assertArrayEquals(new String[] {"field11","field12","field21","field22"}, fields.stream().map(f -> f.asString()).toArray()); 51 | } 52 | 53 | @Test 54 | public void testSingleSliceFieldSplitWithQuote() { 55 | ByteSlice slice = sliceFor("f1,\"f2,f2\",f3,\"f\"\"4\"".getBytes()); 56 | assertEquals("f1", slice.nextField(',', '"').asString()); 57 | assertEquals("f2,f2", slice.nextField(',', '"').asString()); 58 | assertEquals("f3", slice.nextField(',', '"').asString()); 59 | assertEquals("f\"4", slice.nextField(',', '"').asString()); 60 | } 61 | 62 | @Test 63 | public void testMultiSliceQuoteSplit() { 64 | String content = "f1,\"f2,f2\",f3,\"f\"\"4\""; 65 | for (int splitIndex = 0; splitIndex < content.length(); splitIndex++) { 66 | String prefix = content.substring(0, splitIndex); 67 | String suffix = content.substring(splitIndex); 68 | ByteSlice join = ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes())); 69 | assertEquals(content, join.toString()); 70 | List fields = getFieldsQuoted(join); 71 | assertArrayEquals( 72 | "Failed on split index "+splitIndex, 73 | new String[] {"f1","f2,f2","f3","f\"4"}, 74 | fields.stream().map(f -> f.asString()).toArray()); 75 | } 76 | } 77 | 78 | @Test 79 | public void testEmptyFieldHandling() { 80 | ByteSlice slice = sliceFor("f1,,f2".getBytes()); 81 | assertEquals("f1", slice.nextField(',', '"').asString()); 82 | assertEquals("", slice.nextField(',', '"').asString()); 83 | assertEquals("f2", slice.nextField(',', '"').asString()); 84 | assertNull(slice.nextField(',', '"')); 85 | } 86 | 87 | @Test 88 | public void testSkipSlice() { 89 | ByteSlice slice = sliceFor(FIELDS22.getBytes()); 90 | slice.skipUntil(','); 91 | assertEquals("field12", slice.nextField(',').asString()); 92 | } 93 | 94 | @Test 95 | public void testSkipSliceQuoted() { 96 | ByteSlice slice = sliceFor("f1,\"f2,f2\",f3".getBytes()); 97 | slice.skipUntil(',', '"'); 98 | slice.skipUntil(',', '"'); 99 | assertEquals("f3", slice.nextField(',', '"').asString()); 100 | } 101 | 102 | 103 | @Test 104 | public void testMultiSliceIteration() { 105 | String content = FIELDS22; 106 | int splitIndex = 3; 107 | String prefix = content.substring(0, splitIndex); 108 | String suffix = content.substring(splitIndex); 109 | CompositeByteSlice slice = (CompositeByteSlice) ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes())); 110 | byte[] result = new byte[slice.size()]; 111 | for (int i = 0; i < result.length; i++) { 112 | result[i] = slice.currentByte(); 113 | slice.nextByte(); 114 | } 115 | assertEquals(FIELDS22, new String(result)); 116 | } 117 | 118 | @Test 119 | public void testMultiSliceFieldSplit() { 120 | String content = FIELDS33; 121 | for (int splitIndex = 0; splitIndex < content.length(); splitIndex++) { 122 | String prefix = content.substring(0, splitIndex); 123 | String suffix = content.substring(splitIndex); 124 | ByteSlice join = ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes())); 125 | assertEquals(content, join.toString()); 126 | List fields = getFields(join); 127 | assertArrayEquals( 128 | "Failed on split index "+splitIndex, 129 | new String[] {"field11","field12","field13","field21","field22","field23","field31","field32","field33"}, 130 | fields.stream().map(f -> f.asString()).toArray()); 131 | } 132 | } 133 | 134 | @Test 135 | public void testMultiSliceSkip() { 136 | String content = FIELDS33; 137 | for (int splitIndex = 0; splitIndex < content.length(); splitIndex++) { 138 | String prefix = content.substring(0, splitIndex); 139 | String suffix = content.substring(splitIndex); 140 | ByteSlice join = ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes())); 141 | assertTrue(join.skipUntil(',')); 142 | assertEquals("field12", join.nextField(',').asString()); 143 | assertTrue(join.nextLine()); 144 | assertEquals("field21", join.nextField(',').asString()); 145 | assertTrue(join.skipUntil(',')); 146 | assertEquals("field23", join.nextField(',').asString()); 147 | } 148 | } 149 | 150 | @Test 151 | public void testMultiSliceFieldSplitQuoted() { 152 | String content = QUOTED; 153 | for (int splitIndex = 0; splitIndex < content.length(); splitIndex++) { 154 | String prefix = content.substring(0, splitIndex); 155 | String suffix = content.substring(splitIndex); 156 | ByteSlice join = ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes())); 157 | assertEquals(content, join.toString()); 158 | List fields = getFieldsQuoted(join, '\''); 159 | assertArrayEquals( 160 | "Failed on split index "+splitIndex, 161 | new String[] {"field11","field12","field21","field22"}, 162 | fields.stream().map(f -> f.asString()).toArray()); 163 | } 164 | } 165 | 166 | private ByteSlice sliceFor(byte[] bytes) { 167 | return ByteSlice.wrap(new ByteArrayChunk(bytes, bytes.length, false, (b) -> {}), Charset.defaultCharset()); 168 | } 169 | 170 | private List getFields(ByteSlice bs) { 171 | List result = new ArrayList<>(); 172 | while(true) { 173 | ByteArrayField f = bs.nextField(','); 174 | if (f == null) { 175 | if (!bs.nextLine()) break; 176 | } else { 177 | result.add(f.clone()); 178 | } 179 | } 180 | return result; 181 | } 182 | 183 | private List getFieldsQuoted(ByteSlice bs, char quote) { 184 | List result = new ArrayList<>(); 185 | while(true) { 186 | ByteArrayField f = bs.nextField(',', quote); 187 | if (f == null) { 188 | if (!bs.nextLine()) break; 189 | } else { 190 | result.add(f.clone()); 191 | } 192 | } 193 | return result; 194 | } 195 | 196 | private List getFieldsQuoted(ByteSlice bs) { 197 | return getFieldsQuoted(bs, '"'); 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/parser/CharsetHandlingTest.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.parser; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import java.io.File; 6 | import java.util.function.Function; 7 | import java.util.stream.Stream; 8 | 9 | import org.junit.Test; 10 | 11 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder; 12 | import uk.elementarysoftware.quickcsv.api.CSVRecordWithHeader; 13 | import uk.elementarysoftware.quickcsv.sampledomain.City; 14 | 15 | public class CharsetHandlingTest { 16 | 17 | File utf8input = new File("src/test/resources/cities-rus-utf8.txt"); 18 | File cp1251input = new File("src/test/resources/cities-rus-cp1251.txt"); 19 | 20 | String[] expected = new String[] { 21 | "City [city=Андора, population=0, latitude=42.5, longitude=1.5166667]", 22 | "City [city=City of London, population=0, latitude=51.514125, longitude=-0.093689]", 23 | "City [city=Харків, population=0, latitude=49.980814, longitude=36.252718]" 24 | }; 25 | 26 | @Test 27 | public void testUtf8() throws Exception { 28 | Stream cities = CSVParserBuilder.aParser(EnumMapper.MAPPER, EnumMapper.RusFields.class) 29 | .usingCharset("UTF-8").build().parse(utf8input); 30 | String[] actual = cities.map(c -> c.toString()).toArray(String[]::new); 31 | assertArrayEquals(expected, actual); 32 | } 33 | 34 | @Test 35 | public void testCp1251() throws Exception { 36 | Stream cities = CSVParserBuilder.aParser(EnumMapper.MAPPER, EnumMapper.RusFields.class) 37 | .usingCharset("Cp1251").build().parse(cp1251input); 38 | String[] actual = cities.map(c -> c.toString()).toArray(String[]::new); 39 | assertArrayEquals(expected, actual); 40 | } 41 | 42 | public static class EnumMapper { 43 | 44 | enum RusFields { 45 | Latitude("Широта"), 46 | Longitude("Долгота"), 47 | AccentCity("Город"), 48 | Population("Население"); 49 | 50 | private final String headerFieldName; 51 | 52 | private RusFields(String headerFieldName) { 53 | this.headerFieldName = headerFieldName; 54 | } 55 | 56 | @Override 57 | public String toString() { 58 | return headerFieldName; 59 | } 60 | } 61 | 62 | public static final Function, City> MAPPER = r -> { 63 | return new City( 64 | r.getField(RusFields.AccentCity).asString(), 65 | r.getField(RusFields.Population).asInt(), 66 | r.getField(RusFields.Latitude).asDouble(), 67 | r.getField(RusFields.Longitude).asDouble(), 68 | r.getField(RusFields.Population).asLong() 69 | ); 70 | }; 71 | } 72 | } -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/parser/FieldSubsetViewTest.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.parser; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import org.junit.Before; 6 | import org.junit.Test; 7 | 8 | public class FieldSubsetViewTest { 9 | 10 | enum FieldSubset { 11 | C3, C4, C1 12 | } 13 | 14 | private FieldSubsetView fs; 15 | 16 | @Before 17 | public void init() { 18 | this.fs = FieldSubsetView.forExplicitHeader(FieldSubset.class, "C1", "C2", "C3", "C4", "C5"); 19 | fs.onSlice(null, null); 20 | } 21 | 22 | @Test 23 | public void testFieldIndexIsSortedAndCorrect() { 24 | assertArrayEquals(new int[] {0, 2, 3}, fs.getFieldIndexes()); 25 | } 26 | 27 | @Test 28 | public void testIndexOfInSourceView() { 29 | assertEquals(1, fs.indexOfInSourceView(0)); 30 | assertEquals(2, fs.indexOfInSourceView(1)); 31 | assertEquals(0, fs.indexOfInSourceView(2)); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/parser/TestParsingSpecialCases.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.parser; 2 | 3 | import static org.junit.Assert.assertArrayEquals; 4 | 5 | import java.io.ByteArrayInputStream; 6 | import java.io.InputStream; 7 | import java.util.List; 8 | import java.util.stream.Collectors; 9 | 10 | import org.junit.Test; 11 | 12 | import uk.elementarysoftware.quickcsv.api.CSVParser; 13 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder; 14 | 15 | public class TestParsingSpecialCases { 16 | 17 | CSVParser parser = 18 | CSVParserBuilder.aParser(r -> new String[] { 19 | r.getField(Fields.A).asString(), 20 | r.getField(Fields.B).asString(), 21 | r.getField(Fields.C).asString() 22 | }, Fields.class).build(); 23 | 24 | @Test 25 | public void testLineEndsWithEmptyField() { 26 | InputStream csv = new ByteArrayInputStream("A,B,C\na,,".getBytes()); 27 | List result = parser.parse(csv).collect(Collectors.toList()); 28 | assertArrayEquals(new String[] {"a", "", ""}, result.get(0)); 29 | } 30 | 31 | @Test 32 | public void testLineEndsWithEmptyFieldQuoted() { 33 | InputStream csv = new ByteArrayInputStream("\"A\",\"B\",\"C\"\n\"a\",\"\",\"\"".getBytes()); 34 | List result = parser.parse(csv).collect(Collectors.toList()); 35 | assertArrayEquals(new String[] {"a", "", ""}, result.get(0)); 36 | } 37 | 38 | static enum Fields { 39 | A, B, C; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/parser/TestParsingWithHeader.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.parser; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import java.io.File; 6 | import java.io.IOException; 7 | import java.util.List; 8 | import java.util.function.Function; 9 | import java.util.stream.Collectors; 10 | import java.util.stream.Stream; 11 | 12 | import org.junit.Test; 13 | 14 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder; 15 | import uk.elementarysoftware.quickcsv.api.StandardMappers; 16 | import uk.elementarysoftware.quickcsv.sampledomain.City; 17 | 18 | public class TestParsingWithHeader { 19 | 20 | File input = new File("src/test/resources/cities-with-header.txt"); 21 | 22 | String[] expected = new String[] { 23 | "City [city=Andorra, population=0, latitude=42.5, longitude=1.5166667]", 24 | "City [city=City of London, population=0, latitude=51.514125, longitude=-0.093689]", 25 | "City [city=Kharkiv, population=0, latitude=49.980814, longitude=36.252718]" 26 | }; 27 | 28 | @Test 29 | public void testSequential() throws Exception { 30 | Stream cities = CSVParserBuilder.aParser(ignoreErrors(City.MAPPER)) 31 | .build().parse(input).sequential(); 32 | String[] actual = cities.filter(c -> c != null).map(c -> c.toString()).toArray(String[]::new); 33 | assertArrayEquals(expected, actual); 34 | } 35 | 36 | @Test 37 | public void testSequentialWithEnumApi() throws Exception { 38 | Stream cities = CSVParserBuilder.aParser(City.HeaderAwareMapper.MAPPER, City.HeaderAwareMapper.Fields.class) 39 | .build().parse(input).sequential(); 40 | String[] actual = cities.map(c -> c.toString()).toArray(String[]::new); 41 | assertArrayEquals(expected, actual); 42 | } 43 | 44 | @Test 45 | public void testSequentialWithEnumApiWithFirstColumn() throws Exception { 46 | Stream cities = CSVParserBuilder.aParser(City.HeaderAwareMapper2.MAPPER, City.HeaderAwareMapper2.Fields.class) 47 | .build().parse(input).sequential(); 48 | String[] actual = cities.map(c -> c.toString()).toArray(String[]::new); 49 | assertEquals(3, actual.length); 50 | } 51 | 52 | @Test 53 | public void testParallel() throws Exception { 54 | Stream cities = CSVParserBuilder.aParser(ignoreErrors(City.MAPPER)) 55 | .build().parse(input).parallel(); 56 | String[] actual = cities.filter(c -> c != null).map(c -> c.toString()).toArray(String[]::new); 57 | assertArrayEquals(expected, actual); 58 | } 59 | 60 | @Test 61 | /** 62 | * Checks that we can skip records on parallel stream. That verifies that the stream is ordered by 63 | * default and behaves normally when being copied by java's skipping stream decorator. 64 | */ 65 | public void testParallelParseWithSkip() throws IOException { 66 | List> result = CSVParserBuilder.aParser(StandardMappers.TO_STRING_LIST).build() 67 | .parse(input).skip(1).collect(Collectors.toList()); 68 | assertEquals(3, result.size()); 69 | assertArrayEquals(new String[] {"ad","andorra","Andorra","07","","42.5","1.5166667"}, result.get(0).toArray(new String[0])); 70 | } 71 | 72 | private static Function ignoreErrors(Function f) { 73 | return t -> { 74 | try { 75 | return f.apply(t); 76 | } catch (Exception e) { 77 | return null; 78 | } 79 | }; 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/parser/TestParsingWithHeaderQuoted.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.parser; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import java.io.File; 6 | import java.util.stream.Stream; 7 | 8 | import org.junit.Test; 9 | 10 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder; 11 | import uk.elementarysoftware.quickcsv.sampledomain.City; 12 | 13 | public class TestParsingWithHeaderQuoted { 14 | 15 | File input = new File("src/test/resources/cities-with-header-quoted.txt"); 16 | 17 | String[] expected = new String[] { 18 | "City [city=Andorra, population=0, latitude=42.5, longitude=1.5166667]", 19 | "City [city=City of London, population=0, latitude=51.514125, longitude=-0.093689]", 20 | "City [city=Kharkiv, population=0, latitude=49.980814, longitude=36.252718]" 21 | }; 22 | 23 | 24 | @Test 25 | public void testSequentialWithEnumApi() throws Exception { 26 | Stream cities = CSVParserBuilder.aParser(City.HeaderAwareMapper.MAPPER, City.HeaderAwareMapper.Fields.class) 27 | .usingSeparatorWithQuote(',', '"') 28 | .build().parse(input).sequential(); 29 | String[] actual = cities.map(c -> c.toString()).toArray(String[]::new); 30 | assertArrayEquals(expected, actual); 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/parser/simple/StraightForwardParser.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.parser.simple; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | import java.nio.ByteBuffer; 7 | import java.nio.file.Files; 8 | import java.util.function.Function; 9 | import java.util.stream.Stream; 10 | 11 | import uk.elementarysoftware.quickcsv.api.ByteArraySource; 12 | import uk.elementarysoftware.quickcsv.api.CSVParser; 13 | import uk.elementarysoftware.quickcsv.api.CSVRecord; 14 | import uk.elementarysoftware.quickcsv.api.Field; 15 | 16 | public class StraightForwardParser implements CSVParser { 17 | 18 | @Override @SuppressWarnings("resource") 19 | public Stream parse(File source) throws IOException { 20 | Stream lines = Files.lines(source.toPath()); 21 | return lines.map(l -> l.split(",")).map(toCSVRecord()); 22 | } 23 | 24 | private Function toCSVRecord() { 25 | return new Function() { 26 | @Override 27 | public CSVRecord apply(String[] fields) { 28 | return new SimpleCSVRecord(fields); 29 | } 30 | }; 31 | } 32 | 33 | public static class SimpleCSVRecord implements CSVRecord { 34 | 35 | private String[] fields; 36 | private int index; 37 | 38 | public SimpleCSVRecord(String[] fields) { 39 | this.index = 0; 40 | this.fields = fields; 41 | } 42 | 43 | @Override 44 | public void skipField() { 45 | index++; 46 | } 47 | 48 | @Override 49 | public void skipFields(int nFields) { 50 | index+=nFields; 51 | } 52 | 53 | @Override 54 | public Field getNextField() { 55 | return new SimpleField(fields[index++]); 56 | } 57 | 58 | } 59 | 60 | public static class SimpleField implements Field { 61 | String value; 62 | 63 | public SimpleField(String value) { 64 | this.value = value; 65 | } 66 | 67 | @Override 68 | public ByteBuffer raw() { 69 | return null; 70 | } 71 | 72 | @Override 73 | public String asString() { 74 | return value; 75 | } 76 | 77 | @Override 78 | public double asDouble() { 79 | return Double.parseDouble(value); 80 | } 81 | 82 | @Override 83 | public byte asByte() { 84 | return 0; 85 | } 86 | 87 | @Override 88 | public char asChar() { 89 | return 0; 90 | } 91 | 92 | @Override 93 | public short asShort() { 94 | return 0; 95 | } 96 | 97 | @Override 98 | public int asInt() { 99 | if (isEmpty()) return 0; 100 | return Integer.parseInt(value); 101 | } 102 | 103 | @Override 104 | public long asLong() { 105 | return 0; 106 | } 107 | 108 | @Override 109 | public Field clone() { 110 | return this; 111 | } 112 | 113 | @Override 114 | public boolean isEmpty() { 115 | return value.length() == 0; 116 | } 117 | 118 | @Override 119 | public Double asBoxedDouble() { 120 | return asDouble(); 121 | } 122 | 123 | @Override 124 | public Integer asBoxedInt() { 125 | return asInt(); 126 | } 127 | } 128 | 129 | @Override 130 | public Stream parse(InputStream is) { 131 | throw new UnsupportedOperationException(); 132 | } 133 | 134 | @Override 135 | public Stream parse(ByteArraySource bas) { 136 | throw new UnsupportedOperationException(); 137 | } 138 | } -------------------------------------------------------------------------------- /src/test/java/uk/elementarysoftware/quickcsv/sampledomain/City.java: -------------------------------------------------------------------------------- 1 | package uk.elementarysoftware.quickcsv.sampledomain; 2 | 3 | import java.util.function.Function; 4 | 5 | import uk.elementarysoftware.quickcsv.api.CSVRecord; 6 | import uk.elementarysoftware.quickcsv.api.CSVRecordWithHeader; 7 | import uk.elementarysoftware.quickcsv.api.Field; 8 | 9 | public class City { 10 | 11 | public static final Function MAPPER = City::new; 12 | 13 | public static class HeaderAwareMapper { 14 | 15 | public static enum Fields { 16 | AccentCity, 17 | Latitude, 18 | Longitude, 19 | Population 20 | } 21 | 22 | public static final Function, City> MAPPER = r -> { 23 | return new City( 24 | r.getField(Fields.AccentCity).asString(), 25 | r.getField(Fields.Population).asInt(), 26 | r.getField(Fields.Latitude).asDouble(), 27 | r.getField(Fields.Longitude).asDouble(), 28 | r.getField(Fields.Population).asLong() 29 | ); 30 | }; 31 | } 32 | 33 | public static class HeaderAwareMapper2 { 34 | public static enum Fields { 35 | AccentCity, Population, Latitude, Longitude, Country, City 36 | } 37 | 38 | public static final Function, City> MAPPER = r -> { 39 | return new City( 40 | r.getField(Fields.City).asString(), 41 | r.getField(Fields.Population).asInt(), 42 | r.getField(Fields.Latitude).asDouble(), 43 | r.getField(Fields.Longitude).asDouble(), 44 | r.getField(Fields.Population).asLong() 45 | ); 46 | }; 47 | } 48 | 49 | private static final int CITY_INDEX = 2; 50 | 51 | private final String city; 52 | private final int population; 53 | private final double latitude; 54 | private final double longitude; 55 | private final long populationL; 56 | 57 | public City(CSVRecord r) { 58 | r.skipFields(CITY_INDEX); 59 | this.city = r.getNextField().asString(); 60 | r.skipField(); 61 | Field popField = r.getNextField(); 62 | this.population = popField.asInt(); 63 | this.populationL = popField.asLong(); 64 | this.latitude = r.getNextField().asDouble(); 65 | this.longitude = r.getNextField().asDouble(); 66 | } 67 | 68 | public City(String city, int population, double latitude, double longitude, long populationL) { 69 | this.city = city; 70 | this.population = population; 71 | this.latitude = latitude; 72 | this.longitude = longitude; 73 | this.populationL = populationL; 74 | } 75 | 76 | public String getCity() { 77 | return city; 78 | } 79 | 80 | public int getPopulation() { 81 | return population; 82 | } 83 | 84 | public double getLatitude() { 85 | return latitude; 86 | } 87 | 88 | public double getLongitude() { 89 | return longitude; 90 | } 91 | 92 | public long getPopulationL() { 93 | return populationL; 94 | } 95 | 96 | @Override 97 | public int hashCode() { 98 | final int prime = 31; 99 | int result = 1; 100 | result = prime * result + ((city == null) ? 0 : city.hashCode()); 101 | long temp; 102 | temp = Double.doubleToLongBits(latitude); 103 | result = prime * result + (int) (temp ^ (temp >>> 32)); 104 | temp = Double.doubleToLongBits(longitude); 105 | result = prime * result + (int) (temp ^ (temp >>> 32)); 106 | result = prime * result + population; 107 | return result; 108 | } 109 | 110 | @Override 111 | public boolean equals(Object obj) { 112 | if (this == obj) 113 | return true; 114 | if (obj == null) 115 | return false; 116 | if (getClass() != obj.getClass()) 117 | return false; 118 | City other = (City) obj; 119 | if (city == null) { 120 | if (other.city != null) 121 | return false; 122 | } else if (!city.equals(other.city)) 123 | return false; 124 | if (Double.doubleToLongBits(latitude) != Double.doubleToLongBits(other.latitude)) 125 | return false; 126 | if (Double.doubleToLongBits(longitude) != Double.doubleToLongBits(other.longitude)) 127 | return false; 128 | if (population != other.population) 129 | return false; 130 | return true; 131 | } 132 | 133 | @Override 134 | public String toString() { 135 | return "City [city=" + city + ", population=" + population + ", latitude=" + latitude + ", longitude=" + longitude + "]"; 136 | } 137 | 138 | } 139 | -------------------------------------------------------------------------------- /src/test/resources/cities-rus-cp1251.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/titorenko/quick-csv-streamer/cc11f6e9db6df4f3aac57ca72c4176501667f41d/src/test/resources/cities-rus-cp1251.txt -------------------------------------------------------------------------------- /src/test/resources/cities-rus-utf8.txt: -------------------------------------------------------------------------------- 1 | Страна,Код города,Город,Регион,Население,Широта,Долгота 2 | ad,andorra,Андора,07,,42.5,1.5166667 3 | gb,city of london,City of London,H9,,51.514125,-.093689 4 | ua,kharkiv,Харків,07,,49.980814,36.252718 -------------------------------------------------------------------------------- /src/test/resources/cities-with-header-quoted.txt: -------------------------------------------------------------------------------- 1 | "Country","City","AccentCity","Region","Population","Latitude","Longitude" 2 | "ad","andorra","Andorra","07","","42.5","1.5166667" 3 | "gb","city of london","City of London","H9","","51.514125","-.093689" 4 | "ua","kharkiv","Kharkiv","07","","49.980814","36.252718" -------------------------------------------------------------------------------- /src/test/resources/cities-with-header.txt: -------------------------------------------------------------------------------- 1 | Country,City,AccentCity,Region,Population,Latitude,Longitude 2 | ad,andorra,Andorra,07,,42.5,1.5166667 3 | gb,city of london,City of London,H9,,51.514125,-.093689 4 | ua,kharkiv,Kharkiv,07,,49.980814,36.252718 -------------------------------------------------------------------------------- /src/test/resources/correctness.txt: -------------------------------------------------------------------------------- 1 | Year,Make,Model,Description,Price 2 | 1997,Ford,E350,"ac, abs, moon",3000.00 3 | 1999,Chevy,"Venture ""Extended Edition""","",4900.00 4 | 1996,Jeep,Grand Cherokee,"MUST SELL! 5 | air, moon roof, loaded",4799.00 6 | 1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00 7 | ,,"Venture ""Extended Edition""","",4900.00 --------------------------------------------------------------------------------