├── .gitattributes
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── build.gradle
├── release-notes.md
├── settings.gradle
└── src
    ├── jmh
        └── java
        │   └── uk
        │       └── elementarysoftware
        │           └── quickcsv
        │               └── benchmarks
        │                   ├── BenchmarkParserAndMapperInMemory.java
        │                   ├── City.java
        │                   └── OpenCSVParser.java
    ├── main
        └── java
        │   └── uk
        │       └── elementarysoftware
        │           └── quickcsv
        │               ├── api
        │                   ├── ByteArraySource.java
        │                   ├── CSVParser.java
        │                   ├── CSVParserBuilder.java
        │                   ├── CSVRecord.java
        │                   ├── CSVRecordWithHeader.java
        │                   ├── Field.java
        │                   └── StandardMappers.java
        │               ├── decoder
        │                   ├── Decoder.java
        │                   ├── ParserFactory.java
        │                   ├── doubles
        │                   │   ├── DoubleParser.java
        │                   │   ├── JDKDoubleParserAdapter.java
        │                   │   └── QuickDoubleParser.java
        │                   └── ints
        │                   │   ├── ExceptionHelper.java
        │                   │   ├── IntParser.java
        │                   │   ├── LongParser.java
        │                   │   ├── QuickIntParser.java
        │                   │   └── QuickLongParser.java
        │               ├── functional
        │                   ├── Pair.java
        │                   └── PrimitiveFunctions.java
        │               ├── ioutils
        │                   └── IOUtils.java
        │               └── parser
        │                   ├── BufferPool.java
        │                   ├── ByteArrayField.java
        │                   ├── ByteSlice.java
        │                   ├── FieldSubsetView.java
        │                   ├── InputStreamToByteArraySourceAdapter.java
        │                   └── QuickCSVParser.java
    └── test
        ├── java
            └── uk
            │   └── elementarysoftware
            │       └── quickcsv
            │           ├── decoder
            │               ├── doubles
            │               │   └── DoubleParserTest.java
            │               └── ints
            │               │   ├── IntParserTest.java
            │               │   └── LongParserTest.java
            │           ├── integration
            │               ├── CorrectnessTest.java
            │               ├── HttpStreamTest.java
            │               └── IntegrationTest.java
            │           ├── manual
            │               └── CityManualPerformanceTester.java
            │           ├── parser
            │               ├── ByteSliceTest.java
            │               ├── CharsetHandlingTest.java
            │               ├── FieldSubsetViewTest.java
            │               ├── TestParsingSpecialCases.java
            │               ├── TestParsingWithHeader.java
            │               ├── TestParsingWithHeaderQuoted.java
            │               └── simple
            │               │   └── StraightForwardParser.java
            │           └── sampledomain
            │               └── City.java
        └── resources
            ├── cities-dos.txt
            ├── cities-rus-cp1251.txt
            ├── cities-rus-utf8.txt
            ├── cities-unix-quoted.txt
            ├── cities-unix.txt
            ├── cities-with-header-quoted.txt
            ├── cities-with-header.txt
            └── correctness.txt


/.gitattributes:
--------------------------------------------------------------------------------
1 | cities-dos.txt text eol=crlf
2 | cities-unix.txt text eol=lf


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /bin/
 2 | .classpath
 3 | .project
 4 | .gradle
 5 | .settings
 6 | bin
 7 | build
 8 | private-notes.txt
 9 | /keys/
10 | gradle.properties
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | jdk:
3 |   - oraclejdk8
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 
341 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Quick CSV Streamer  
  2 | =============
  3 | 
  4 | [![Build Status](https://travis-ci.org/titorenko/quick-csv-streamer.svg?branch=master)](https://travis-ci.org/titorenko/quick-csv-streamer)
  5 | [![Maven Central](https://maven-badges.herokuapp.com/maven-central/uk.elementarysoftware/quick-csv-streamer/badge.svg)](https://maven-badges.herokuapp.com/maven-central/uk.elementarysoftware/quick-csv-streamer/)
  6 | [![Javadoc](https://javadoc-emblem.rhcloud.com/doc/uk.elementarysoftware/quick-csv-streamer/badge.svg)](http://www.javadoc.io/doc/uk.elementarysoftware/quick-csv-streamer)
  7 | 
  8 | Quick CSV streamer is a high performance CSV parsing library with Java 8 Stream API.
  9 | The library operates in "zero-copy" mode and only parses what is required by the client. Amount
 10 | of garbage produced is also optimized, reducing pressure on the garbage collector.
 11 | Parallel, multi-core parsing is supported transparently via Java Stream API.
 12 | 
 13 | Compared to other open source Java CSV parsing libraries Quick CSV achieves speed ups at 2x - 10x range in sequential, single thread, mode. Naturally parallel mode improves performance further. See benchmarking results below for more details.
 14 | 
 15 | The library is limited to so called "line-optimal" charsets like UTF-8, US-ASCII, ISO-8859-1 and some others. Such line-optimal charsets have the property that line feed ('\n'), carriage return ('\r'), CSV separator are easily identifiable from other encoded characters.
 16 | 
 17 | 
 18 | Maven dependency
 19 | --------------
 20 | 
 21 | Available from Maven Central:
 22 | 
 23 | ```xml
 24 | <dependency>
 25 |     <groupId>uk.elementarysoftware</groupId>
 26 |     <artifactId>quick-csv-streamer</artifactId>
 27 |     <version>0.2.4</version>
 28 | </dependency>
 29 | ```
 30 | 
 31 | Example usage
 32 | --------------
 33 | 
 34 | Suppose following CSV file needs to be parsed
 35 | 
 36 |     Country,City,AccentCity,Region,Population,Latitude,Longitude
 37 |     ad,andorra,Andorra,07,,42.5,1.5166667
 38 |     gb,city of london,City of London,H9,,51.514125,-.093689
 39 |     ua,kharkiv,Kharkiv,07,,49.980814,36.252718
 40 | 
 41 | First define Java class to represent the records as follows
 42 | 
 43 |     public class City {
 44 |         private final String city;
 45 |         private final int population;
 46 |         private final double latitude;
 47 |         private final double longitude;
 48 | 
 49 |         ...
 50 |     }
 51 | 
 52 | here we will be sourcing 4 fields from the source file, ignoring other 3.  
 53 | 
 54 | Parsing the file is simple
 55 | 
 56 |     import uk.elementarysoftware.quickcsv.api.*;
 57 | 
 58 |     CSVParser<City> parser = CSVParserBuilder.aParser(City::new, City.CSVFields.class).forRfc4180().build();
 59 | 
 60 | the parser will be using CSV separators as per RFC 4180, default encoding and will be expecting header as first record in the source. Custom separators, quotes, encodings and header sources are supported.
 61 | 
 62 | Actual mapping is done in `City` constructor
 63 | 
 64 |     public class City {
 65 | 
 66 |         public static enum CSVFields {
 67 |             AccentCity,
 68 |             Population,
 69 |             Latitude,
 70 |             Longitude
 71 |         }
 72 | 
 73 |         public City(CSVRecordWithHeader<CSVFields> r) {
 74 |             this.city = r.getField(CSVFields.AccentCity).asString();
 75 |             this.population = r.getField(CSVFields.Population).asInt();
 76 |             this.latitude = r.getField(CSVFields.Latitude).asDouble();
 77 |             this.longitude = r.getField(CSVFields.Longitude).asDouble();
 78 |         }
 79 | 
 80 | first `CSVFields` enum specifies which fields should be sourced and only these fields will be actually parsed. After that `CSVRecordWithHeader` instance is used to populate `City` instance fields, refering to CSV fields by enum values.
 81 | 
 82 | Of course mapping can also be done outside domain class constructor, just pass different `Function<CSVRecordWithHeader, City>` to `CSVParserBuilder`.
 83 | 
 84 | Resulting stream can be processed in parallel or sequentially with usual Java stream API. For example to parse sequentially on  a single thread
 85 | 
 86 |     Stream<City> stream = parser.parse(source).sequential();
 87 |     stream.forEach(System.out::println);    
 88 | 
 89 | By default parser will operate in parallel mode.
 90 | 
 91 | Please see [sample project](https://github.com/titorenko/quick-csv-streamer-cities-sample) for full source code of the above example.
 92 | 
 93 | Special cases for headers
 94 | --------------
 95 | 
 96 | When header contains special characters the fields can not be simply encoded by enum literals. In such cases `toString` should be overwritten, for example
 97 | 
 98 |     enum Fields {
 99 |         Latitude("City Latitude"),
100 |         Longitude("City Longitude"),
101 |         City("City name"),
102 |         Population("City Population");
103 | 
104 |         private final String headerFieldName;
105 | 
106 |         private Fields(String headerFieldName) {
107 |             this.headerFieldName = headerFieldName;
108 |         }
109 | 
110 |         @Override public String toString() {
111 |             return headerFieldName;
112 |         }
113 |     }
114 | 
115 | If header is missing from the source it can be supplied during parser constuction
116 | 
117 |     CSVParserBuilder
118 |         .aParser(City::new, City.CSVFields.class)
119 |         .usingExplicitHeader("Country", "City", "AccentCity", "Region", "Population", "Latitude", "Longitude")
120 |         .build();
121 | 
122 | 
123 | Advanced usage
124 | --------------
125 | About 10% performance improvement compared to normal usage can be achieved by referencing the fields by position instead of name. In this case parser construction is even simpler
126 | 
127 |     CSVParser<City> parser = CSVParserBuilder.aParser(City::new).build();
128 | 
129 | as enumeration specifying field names is not needed. However now constructor will be using `CSVRecord` interface  
130 | 
131 |     public City(CSVRecord r) {
132 |         r.skipFields(2);
133 |         this.city  = r.getNextField().asString();
134 |         r.skipField();        
135 |         this.population = r.getNextField().asInt();        
136 |         this.latitude = r.getNextField().asDouble();
137 |         this.longitude = r.getNextField().asDouble();
138 |     }
139 | 
140 | effectively this encodes field order in the CSV source.
141 | 
142 | Performance
143 | --------------    
144 | 
145 | Best way to check performance of the library is to run benchmark on your target system with
146 | 
147 |     gradle jmh
148 | 
149 | reports can be then found in build/reports/jmh.
150 | 
151 | It is very important to appreciate that performance might vary dramattically depending on the actual CSV content. As a very rough guideline see below sample output of "gradle jmh" on i7 2700k Ubuntu system, which uses `cities.txt` similar to example above, expanded to have 3173800 rows and 157 MB in size:
152 | 
153 | |Benchmark                      |Mode  |Cnt  |   Score |   Error   |Units|
154 | | ----------------------------- | ---- | --- | ------- | --------- | --- |
155 | |OpenCSVParser                  |avgt  |  5  |2393.921 |± 262.347  |ms/op|
156 | |Quick CSV Parallel with header |avgt  |  5  | 205.013 |±  1.739   |ms/op|
157 | |Quick CSV Parallel (advanced)  |avgt  |  5  | 177.262 |±  1.739   |ms/op|
158 | |Quick CSV Sequential           |avgt  |  5  | 648.462 |± 45.991   |ms/op|
159 | 
160 | Comparison is done with OpenCSV library v3.8, performance of other libraries can be extrapolated using chart from https://github.com/uniVocity/csv-parsers-comparison
161 | 
162 | Prerequisites
163 | --------------
164 | Quick CSV Streamer library requires Java 8, it has no other dependencies.
165 | 
166 | License
167 | --------------
168 | Library is licensed under the terms of [GPL v2.0 license](http://www.gnu.org/licenses/gpl-2.0.html).
169 | Please contact me if you wish to use this library under more commercially friendly license or want to extend it, for example to add async parsing or support different file formats.
170 | 


--------------------------------------------------------------------------------
/build.gradle:
--------------------------------------------------------------------------------
  1 | plugins {
  2 |    id 'me.champeau.gradle.jmh' version '0.3.0'
  3 | }
  4 | 
  5 | apply plugin: 'java'
  6 | apply plugin: 'maven'
  7 | apply plugin: 'signing'
  8 | apply plugin: 'me.champeau.gradle.jmh'
  9 | apply plugin: "eclipse"
 10 | 
 11 | group = 'uk.elementarysoftware'
 12 | version = '0.2.4'
 13 | 
 14 | sourceCompatibility = JavaVersion.VERSION_1_8
 15 | 
 16 | tasks.withType(JavaCompile) {
 17 |     options.encoding = 'UTF-8'
 18 | }
 19 | 
 20 | repositories {
 21 |     mavenCentral()
 22 | }
 23 | 
 24 | dependencies {
 25 |     testCompile group: 'commons-io', name: 'commons-io', version: '2.5'
 26 |     testCompile group: 'junit', name: 'junit', version: '4.12'
 27 |     testCompile group: 'org.eclipse.jetty', name: 'jetty-server', version: '9.4.6.v20170531'
 28 |     testCompile group: 'org.apache.httpcomponents', name: 'httpclient', version: '4.5.3'
 29 | 
 30 |     jmh "commons-io:commons-io:2.4"
 31 |     jmh "com.opencsv:opencsv:3.8"
 32 | }
 33 | 
 34 | jmh {
 35 |     include = '.*BenchmarkParserAndMapperInMemory.*'
 36 |     jmhVersion = '1.19'
 37 |     jvmArgsAppend = '-Xmx1g -XX:+AggressiveOpts'
 38 | }
 39 | 
 40 | eclipse {
 41 |     classpath {
 42 |         plusConfigurations += [ configurations.jmh ]
 43 |     }
 44 | }
 45 | 
 46 | task javadocJar(type: Jar) {
 47 |     classifier = 'javadoc'
 48 |     from javadoc
 49 | }
 50 | 
 51 | task sourcesJar(type: Jar) {
 52 |     classifier = 'sources'
 53 |     from sourceSets.main.allSource
 54 | }
 55 | 
 56 | artifacts {
 57 |     archives javadocJar, sourcesJar
 58 | }
 59 | 
 60 | if (hasProperty('ossrhUsername')) {
 61 | 
 62 | 	signing {
 63 | 	    sign configurations.archives
 64 | 	}
 65 | 
 66 | 	uploadArchives {
 67 | 	  repositories {
 68 | 	    mavenDeployer {
 69 | 	      beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) }
 70 | 
 71 | 	      repository(url: "https://oss.sonatype.org/service/local/staging/deploy/maven2/") {
 72 | 	        authentication(userName: ossrhUsername, password: ossrhPassword)
 73 | 	      }
 74 | 
 75 | 	      snapshotRepository(url: "https://oss.sonatype.org/content/repositories/snapshots/") {
 76 | 	        authentication(userName: ossrhUsername, password: ossrhPassword)
 77 | 	      }
 78 | 
 79 | 	      pom.project {
 80 | 	        name 'Quick CSV Streamer'
 81 | 	        packaging 'jar'
 82 | 	        description 'Quick CSV Parser with Java 8 Streams API'
 83 | 	        url 'https://github.com/titorenko/quick-csv-streamer'
 84 | 
 85 | 	        scm {
 86 | 	          connection 'scm:git:git://github.com/titorenko/quick-csv-streamer.git'
 87 | 	          developerConnection 'scm:git:git@github.com:titorenko/quick-csv-streamer.git'
 88 | 	          url 'https://github.com/titorenko/quick-csv-streamer'
 89 | 	        }
 90 | 
 91 | 	        licenses {
 92 | 	          license {
 93 | 	            name 'GNU General Public License, version 2'
 94 | 	            url 'http://www.gnu.org/licenses/gpl-2.0.html'
 95 | 	          }
 96 | 	        }
 97 | 
 98 | 	        developers {
 99 | 	          developer {
100 | 	            id 'elementarysoftware'
101 | 	            name 'Elementary Software Ltd.'
102 | 	            email 'elementarysoftware@gmail.com'
103 | 	          }
104 | 	        }
105 | 	      }
106 | 	    }
107 | 	  }
108 | 	}
109 | }
110 | 


--------------------------------------------------------------------------------
/release-notes.md:
--------------------------------------------------------------------------------
 1 | 0.2.4
 2 | ==========
 3 | Added asBoxedInt, asBoxedDouble convenience methods.
 4 | 
 5 | Bugfixes:
 6 | * Issues #6 and #7 fixed
 7 | 
 8 | 0.2.3
 9 | ==========
10 | Improvement in parsing performance for quoted data
11 | Bugfixes
12 | * Additional fix for issue quotes in the end of the line, thanks to https://github.com/jasonk000
13 | 
14 | 0.2.2
15 | ==========
16 | Converted tabs to spaces in source files
17 | Bugfixes
18 | * Fix for issue #3 with quotes in the end of the line
19 | 
20 | 0.2.1
21 | ==========
22 | Bugfixes
23 |  * Fix NPE occuring when first column is included into parsing results using header in the source API.
24 |    Issue was reported by Pradeep Jaligama.
25 | 
26 | 0.2.0
27 | ==========
28 | New features
29 | * header aware parsing
30 | * charset support
31 | * more flexible input config
32 | * performance improvements: less garbage, better composite slice impl., int and long parsers
33 | * new interface with mapper
34 | 
35 | Bugfixes
36 | * issue with skipping records via stream api
37 | * stream completion flag as returned by advance() was not properly calculated  
38 | 
39 | 0.1.1
40 | ==========
41 | * sample project added
42 | 
43 | 0.1.0
44 | ==========
45 | * initial release
46 | 


--------------------------------------------------------------------------------
/settings.gradle:
--------------------------------------------------------------------------------
1 | rootProject.name = 'quick-csv-streamer'


--------------------------------------------------------------------------------
/src/jmh/java/uk/elementarysoftware/quickcsv/benchmarks/BenchmarkParserAndMapperInMemory.java:
--------------------------------------------------------------------------------
  1 | package uk.elementarysoftware.quickcsv.benchmarks;
  2 | 
  3 | import java.io.ByteArrayInputStream;
  4 | import java.io.File;
  5 | import java.io.IOException;
  6 | import java.io.UncheckedIOException;
  7 | import java.util.concurrent.TimeUnit;
  8 | import java.util.stream.Stream;
  9 | 
 10 | import org.apache.commons.io.FileUtils;
 11 | import org.openjdk.jmh.annotations.Benchmark;
 12 | import org.openjdk.jmh.annotations.BenchmarkMode;
 13 | import org.openjdk.jmh.annotations.Fork;
 14 | import org.openjdk.jmh.annotations.Measurement;
 15 | import org.openjdk.jmh.annotations.Mode;
 16 | import org.openjdk.jmh.annotations.OutputTimeUnit;
 17 | import org.openjdk.jmh.annotations.Scope;
 18 | import org.openjdk.jmh.annotations.State;
 19 | import org.openjdk.jmh.annotations.Warmup;
 20 | import org.openjdk.jmh.infra.Blackhole;
 21 | import org.openjdk.jmh.runner.Runner;
 22 | import org.openjdk.jmh.runner.options.Options;
 23 | import org.openjdk.jmh.runner.options.OptionsBuilder;
 24 | 
 25 | import uk.elementarysoftware.quickcsv.api.CSVParser;
 26 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
 27 | 
 28 | @BenchmarkMode(Mode.AverageTime)
 29 | @Fork(1)
 30 | @Warmup(iterations = 3, time = 5000, timeUnit = TimeUnit.MILLISECONDS)
 31 | @Measurement(iterations = 5, time = 7000, timeUnit = TimeUnit.MILLISECONDS)
 32 | @OutputTimeUnit(TimeUnit.MILLISECONDS)
 33 | @State(Scope.Benchmark)
 34 | public class BenchmarkParserAndMapperInMemory {
 35 |     
 36 |     private static final String TEST_FILE = "src/test/resources/cities-unix.txt"; 
 37 |     private static final String TEST_FILE_QUOTED = "src/test/resources/cities-unix-quoted.txt"; 
 38 |     
 39 |     @State(Scope.Benchmark)
 40 |     public static class BenchmarkState {
 41 |         
 42 |         byte[] content = loadFile(prepareFile(100, TEST_FILE));
 43 |         
 44 |         byte[] quotedContent = loadFile(prepareFile(100, TEST_FILE_QUOTED));
 45 |         
 46 |         private File prepareFile(int sizeMultiplier, String testFile) {
 47 |             try {
 48 |                 byte[] content= FileUtils.readFileToByteArray(new File(testFile));
 49 |                 File result = File.createTempFile("csv", "large");
 50 |                 for (int i = 0; i < sizeMultiplier; i++) {
 51 |                     FileUtils.writeByteArrayToFile(result, content, true);
 52 |                 }
 53 |                 return result;
 54 |             } catch (IOException e) {
 55 |                 throw new UncheckedIOException(e);
 56 |             }
 57 |         }
 58 | 
 59 |         private byte[] loadFile(File file) {
 60 |             try {
 61 |                 return FileUtils.readFileToByteArray(file);
 62 |             } catch (IOException e) {
 63 |                 throw new UncheckedIOException(e);
 64 |             }
 65 |         }
 66 |     }
 67 |     
 68 |     @Benchmark
 69 |     public void benchmarkParallelParser(BenchmarkState state, Blackhole bh) {
 70 |         CSVParser<City> parser = CSVParserBuilder.aParser(City.MAPPER).build();
 71 |         Stream<City> stream = parser.parse(new ByteArrayInputStream(state.content));
 72 |         stream.forEach(c -> bh.consume(c));
 73 |     }
 74 | 
 75 |     @Benchmark
 76 |     public void benchmarkParallelParserWithHeader(BenchmarkState state, Blackhole bh) {
 77 |         CSVParser<City> parser = CSVParserBuilder
 78 |                 .aParser(City.EnumMapper.MAPPER, City.EnumMapper.Fields.class)
 79 |                 .usingExplicitHeader("Country", "City", "AccentCity", "Region", "Population", "Latitude", "Longitude")
 80 |                 .build();
 81 |         Stream<City> stream = parser.parse(new ByteArrayInputStream(state.content));
 82 |         stream.forEach(c -> bh.consume(c));
 83 |     }
 84 |     
 85 |     @Benchmark
 86 |     public void benchmarkSequentialParser(BenchmarkState state, Blackhole bh) {
 87 |         CSVParser<City> parser = CSVParserBuilder.aParser(City.MAPPER).build();
 88 |         Stream<City> stream = parser.parse(new ByteArrayInputStream(state.content));
 89 |         stream.sequential().forEach(c -> bh.consume(c));
 90 |     }
 91 |     
 92 | 
 93 |     @Benchmark
 94 |     public void benchmarkSequentialParserWithQuotes(BenchmarkState state, Blackhole bh) {
 95 |         CSVParser<City> parser = CSVParserBuilder.aParser(City.MAPPER).build();
 96 |         Stream<City> stream = parser.parse(new ByteArrayInputStream(state.quotedContent));
 97 |         stream.sequential().forEach(c -> bh.consume(c));
 98 |     }
 99 |     
100 |     @Benchmark
101 |     public void benchmarkOpenCSVParser(BenchmarkState state, Blackhole bh) {
102 |         OpenCSVParser parser = new OpenCSVParser();
103 |         Stream<City> stream = parser.parse(new ByteArrayInputStream(state.content));
104 |         stream.forEach(c -> bh.consume(c));
105 |     }
106 |     
107 |     public static void main(String[] args) throws Exception {
108 |         Options opt = new OptionsBuilder()
109 |             .include(".*" + BenchmarkParserAndMapperInMemory.class.getSimpleName()+".*")
110 |             //.addProfiler(LinuxPerfAsmProfiler.class)
111 |             //.addProfiler(StackProfiler.class)
112 |             .build();
113 |         new Runner(opt).run();
114 |     }
115 | 
116 | }


--------------------------------------------------------------------------------
/src/jmh/java/uk/elementarysoftware/quickcsv/benchmarks/City.java:
--------------------------------------------------------------------------------
  1 | package uk.elementarysoftware.quickcsv.benchmarks;
  2 | 
  3 | import java.util.function.Function;
  4 | 
  5 | import uk.elementarysoftware.quickcsv.api.CSVRecord;
  6 | import uk.elementarysoftware.quickcsv.api.CSVRecordWithHeader;
  7 | 
  8 | public class City {
  9 |     
 10 |     public static final Function<CSVRecord, City> MAPPER = City::new;
 11 |     
 12 |     public static class EnumMapper {
 13 |         
 14 |         public static enum Fields {
 15 |             AccentCity,
 16 |             Population,
 17 |             Latitude,
 18 |             Longitude
 19 |         }
 20 |         
 21 |         public static final Function<CSVRecordWithHeader<Fields>, City> MAPPER = r -> {
 22 |             return new City(
 23 |                     r.getField(Fields.AccentCity).asString(),
 24 |                     r.getField(Fields.Population).asInt(),
 25 |                     r.getField(Fields.Latitude).asDouble(),
 26 |                     r.getField(Fields.Longitude).asDouble()
 27 |             );
 28 |         };
 29 |     }
 30 |     
 31 |     private static final int CITY_INDEX = 2;
 32 |     
 33 |     private final String city;
 34 |     private final int population; 
 35 |     private final double latitude;
 36 |     private final double longitude;
 37 |     
 38 |     public City(CSVRecord r) {
 39 |         r.skipFields(CITY_INDEX);
 40 |         this.city  = r.getNextField().asString();
 41 |         r.skipField();
 42 |         this.population = r.getNextField().asInt();
 43 |         this.latitude = r.getNextField().asDouble();
 44 |         this.longitude = r.getNextField().asDouble();
 45 |     }
 46 |     
 47 |     public City(String city, int population, double latitude, double longitude) {
 48 |         this.city = city;
 49 |         this.population = population;
 50 |         this.latitude = latitude;
 51 |         this.longitude = longitude;
 52 |     }
 53 | 
 54 |     public String getCity() {
 55 |         return city;
 56 |     }
 57 |     
 58 |     public int getPopulation() {
 59 |         return population;
 60 |     }
 61 |     
 62 |     public double getLatitude() {
 63 |         return latitude;
 64 |     }
 65 |     
 66 |     public double getLongitude() {
 67 |         return longitude;
 68 |     }
 69 | 
 70 |     @Override
 71 |     public int hashCode() {
 72 |         final int prime = 31;
 73 |         int result = 1;
 74 |         result = prime * result + ((city == null) ? 0 : city.hashCode());
 75 |         long temp;
 76 |         temp = Double.doubleToLongBits(latitude);
 77 |         result = prime * result + (int) (temp ^ (temp >>> 32));
 78 |         temp = Double.doubleToLongBits(longitude);
 79 |         result = prime * result + (int) (temp ^ (temp >>> 32));
 80 |         result = prime * result + population;
 81 |         return result;
 82 |     }
 83 | 
 84 |     @Override
 85 |     public boolean equals(Object obj) {
 86 |         if (this == obj)
 87 |             return true;
 88 |         if (obj == null)
 89 |             return false;
 90 |         if (getClass() != obj.getClass())
 91 |             return false;
 92 |         City other = (City) obj;
 93 |         if (city == null) {
 94 |             if (other.city != null)
 95 |                 return false;
 96 |         } else if (!city.equals(other.city))
 97 |             return false;
 98 |         if (Double.doubleToLongBits(latitude) != Double.doubleToLongBits(other.latitude))
 99 |             return false;
100 |         if (Double.doubleToLongBits(longitude) != Double.doubleToLongBits(other.longitude))
101 |             return false;
102 |         if (population != other.population)
103 |             return false;
104 |         return true;
105 |     }
106 | 
107 |     @Override
108 |     public String toString() {
109 |         return "City [city=" + city + ", population=" + population + ", latitude=" + latitude + ", longitude=" + longitude + "]";
110 |     }
111 |     
112 | }
113 | 


--------------------------------------------------------------------------------
/src/jmh/java/uk/elementarysoftware/quickcsv/benchmarks/OpenCSVParser.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.benchmarks;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.InputStream;
 5 | import java.io.InputStreamReader;
 6 | import java.io.Reader;
 7 | import java.io.UncheckedIOException;
 8 | import java.util.Iterator;
 9 | import java.util.Spliterator;
10 | import java.util.Spliterators;
11 | import java.util.stream.Stream;
12 | import java.util.stream.StreamSupport;
13 | 
14 | import org.apache.commons.io.IOUtils;
15 | 
16 | import com.opencsv.CSVReader;
17 | 
18 | 
19 | public class OpenCSVParser {
20 |     
21 |     public Stream<City> parse(InputStream is) {
22 |         Reader reader = new InputStreamReader(is);
23 |         CSVReader csvReader = new CSVReader(reader);
24 |         Iterator<City> iterator = new Iterator<City>() {
25 |             private boolean isEndReached = false;
26 |             
27 |             @Override
28 |             public boolean hasNext() {
29 |                 return !isEndReached;
30 |             }
31 | 
32 |             @Override
33 |             public City next() {
34 |                 try {
35 |                     String[] values = csvReader.readNext();
36 |                     if (values == null) {
37 |                         isEndReached = true;
38 |                         return null;
39 |                     } else {
40 |                         return toCity(values);
41 |                     }
42 |                 } catch (IOException e) {
43 |                     throw new UncheckedIOException(e);
44 |                 }
45 |             }
46 |         };
47 |         Spliterator<City> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
48 |         return StreamSupport.stream(spliterator, false).onClose(new Runnable() {
49 |             @Override
50 |             public void run() {
51 |                 IOUtils.closeQuietly(csvReader);
52 |             }
53 |         });
54 |     }
55 | 
56 |     protected City toCity(String[] values) {
57 |         if (values.length < 7) return null;
58 |         return new City(values[2], parseInt(values[4]), parseDouble(values[5]), parseDouble(values[6]));
59 |     }
60 | 
61 |     private int parseInt(String value) {
62 |         try {
63 |             return value.isEmpty() ? 0 : Integer.parseInt(value);
64 |         } catch (Exception e) {
65 |             return 0;
66 |         }
67 |     }
68 | 
69 |     private double parseDouble(String value) {
70 |         return value.isEmpty() ? 0 : Double.parseDouble(value);
71 |     }
72 | 
73 | }
74 | 


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/api/ByteArraySource.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.api;
 2 | 
 3 | import java.util.concurrent.atomic.AtomicInteger;
 4 | import java.util.function.Consumer;
 5 | 
 6 | /**
 7 |  * Abstract source of byte arrays to allow parsing of synchronous or asynchronous streams. 
 8 |  */
 9 | public interface ByteArraySource {
10 | 
11 |     ByteArrayChunk getNext() throws Exception;
12 |     
13 |     public abstract static class ReusableChunk {
14 | 
15 |         private final Runnable onFree;
16 |         private final AtomicInteger usageCount = new AtomicInteger(0);
17 |         
18 |         /**
19 |         * @param onFree - callback that will be called when usage count reaches zero
20 |         */
21 |         protected ReusableChunk(Runnable onFree) {
22 |             this.onFree = onFree;
23 |         }
24 |         
25 |         public void incrementUseCount() {
26 |             usageCount.incrementAndGet();
27 |         }
28 |         
29 |         public void decrementUseCount() {
30 |             int value = usageCount.decrementAndGet();
31 |             if (value <= 0) onFree.run();
32 |         }
33 |     }
34 |     
35 |     public static class ByteArrayChunk extends ReusableChunk {
36 |         public static final ByteArrayChunk EMPTY = new ByteArrayChunk(new byte[0], 0, false, (b) -> {});
37 |         
38 |         private final byte[] data;
39 |         private final int length;
40 |         private final boolean isLast;
41 | 
42 |         /**
43 |          * @param data - underlying content
44 |          * @param length - content length
45 |          * @param isLast - is this chunk of is last
46 |          * @param onFree - callback that will be called when data from this chunk has been fully consumed.
47 |          */
48 |         public ByteArrayChunk(byte[] data, int length, boolean isLast, Consumer<byte[]> onFree) {
49 |             super(() -> onFree.accept(data));
50 |             this.data = data;
51 |             this.length = length;
52 |             this.isLast = isLast;
53 |         }
54 | 
55 |         public byte[] getData() {
56 |             return data;
57 |         }
58 | 
59 |         public int getLength() {
60 |             return length;
61 |         }
62 | 
63 |         public boolean isLast() {
64 |             return isLast;
65 |         }
66 |     }
67 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/api/CSVParser.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.api;
 2 | 
 3 | import java.io.File;
 4 | import java.io.FileInputStream;
 5 | import java.io.IOException;
 6 | import java.io.InputStream;
 7 | import java.util.stream.Stream;
 8 | 
 9 | import uk.elementarysoftware.quickcsv.ioutils.IOUtils;
10 | 
11 | /**
12 |  * CSV Parser can parse inputs such as {@link InputStream} or more generally {@link ByteArraySource} to Stream&lt;T&gt;.
13 |  * 
14 |  * @param <T> - the type of the parsing result
15 |  */
16 | public interface CSVParser<T> {
17 |     
18 |     public default Stream<T> parse(File file) throws IOException {
19 |         InputStream is = new FileInputStream(file);
20 |         return parse(is).onClose(() -> IOUtils.closeQuietly(is));
21 |     }
22 |     
23 |     public Stream<T> parse(InputStream is);
24 |     
25 |     public Stream<T> parse(ByteArraySource bas);
26 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/api/CSVParserBuilder.java:
--------------------------------------------------------------------------------
  1 | package uk.elementarysoftware.quickcsv.api;
  2 | 
  3 | import java.nio.charset.Charset;
  4 | import java.util.Objects;
  5 | import java.util.Optional;
  6 | import java.util.function.Function;
  7 | 
  8 | import uk.elementarysoftware.quickcsv.parser.FieldSubsetView;
  9 | import uk.elementarysoftware.quickcsv.parser.QuickCSVParser;
 10 | 
 11 | /**
 12 |  * CSV Parser builder, use this class to construct {@link CSVParser}.
 13 |  * 
 14 |  * @param <T> - type of object that each record of the CSV data will be mapped to
 15 |  * @param <K> - type of enumeration that is used to specify fields to be parsed, only relevant for header-aware parser. 
 16 |  */
 17 | public class CSVParserBuilder<T, K extends Enum<K>> {
 18 |     
 19 |     private int bufferSize = 512*1024;
 20 |     
 21 |     private CSVFileMetadata metadata = CSVFileMetadata.RFC_4180;
 22 | 
 23 |     private Function<CSVRecord, T> recordMapper;
 24 | 
 25 |     private Function<CSVRecordWithHeader<K>, T> recordWithHeaderMapper;
 26 |     private FieldSubsetView<K> subsetView = null;
 27 |     
 28 |     private Charset charset = Charset.defaultCharset();
 29 |     
 30 |     private CSVParserBuilder() {
 31 |     }
 32 | 
 33 |     /**
 34 |      * Create new parser using supplied mapping function. 
 35 |      * 
 36 |      * Mapping function can not store reference to {@link CSVRecord} object, 
 37 |      * it needs to be a pure function that creates new instance of T. 
 38 |      * CSVRecord could be mutated by the parser when next field or record are processed.
 39 |      * 
 40 |      * @param mapper - mapping function from CSVRecord to T
 41 |      * @param <T> - type of object that each record of the CSV data will be mapped to
 42 |      * @param <K> - ignored
 43 |      * @return this parser builder
 44 |      */
 45 |     public static <T, K extends Enum<K>> CSVParserBuilder<T, K> aParser(Function<CSVRecord, T> mapper) {
 46 |         CSVParserBuilder<T, K> builder = new CSVParserBuilder<T, K>();
 47 |         builder.recordMapper = mapper;
 48 |         return builder;
 49 |     }
 50 |     
 51 |     /**
 52 |      * Create new header-aware parser using supplied mapping function. 
 53 |      * 
 54 |      * Mapping function can not store reference to {@link CSVRecordWithHeader} object, 
 55 |      * it needs to be a pure function that create new instance of T.
 56 |      *  
 57 |      * CSVRecordWithHeader could be mutated by the parser when next record is processed.
 58 |      * 
 59 |      * @param mapper - mapping function from CSVRecordWithHeader to T
 60 |      * @param fields - enumeration specifying fields that should be parsed
 61 |      * @param <T> - type of object that each record of the CSV data will be mapped to
 62 |      * @param <K> - type of enumeration that is used to specify fields to be parsed
 63 |      * 
 64 |      * @return this parser builder
 65 |      */
 66 |     
 67 |     public static <T, K extends Enum<K>> CSVParserBuilder<T, K> aParser(Function<CSVRecordWithHeader<K>, T> mapper, Class<K> fields) {
 68 |         CSVParserBuilder<T, K> builder = new CSVParserBuilder<T, K>();
 69 |         builder.recordWithHeaderMapper = mapper;
 70 |         builder.subsetView = FieldSubsetView.forSourceSuppliedHeader(fields);
 71 |         return builder;
 72 |     }
 73 |     
 74 |     /**
 75 |      * Use supplied header and do not take header from the source.
 76 |      * @param header - header fields
 77 |      * @return this parser builder
 78 |      */
 79 |     public CSVParserBuilder<T, K> usingExplicitHeader(String... header) {
 80 |         Objects.requireNonNull(subsetView);
 81 |         this.subsetView = FieldSubsetView.forExplicitHeader(subsetView.getFieldSubset(), header);
 82 |         return this;
 83 |     }
 84 |     
 85 |     /**
 86 |      * Use tabs as separator and no quoting
 87 |      * @return this parser builder
 88 |      */
 89 |     public CSVParserBuilder<T, K> forTabs() {
 90 |         this.metadata = CSVFileMetadata.TABS;
 91 |         return this;
 92 |     }
 93 |     
 94 |     /**
 95 |      * Use comma as separator and double quotes as quote character as per RFC 4180 document.
 96 |      * @return this parser builder
 97 |      */
 98 |     public CSVParserBuilder<T, K> forRfc4180() {
 99 |         this.metadata = CSVFileMetadata.RFC_4180;
100 |         return this;
101 |     }
102 |     
103 |     /**
104 |      * Use specified character as field separator.
105 |      * @param separator - field separator character
106 |      * @return this parser builder
107 |      */
108 |     public CSVParserBuilder<T, K> usingSeparatorWithNoQuotes(char separator) {
109 |         this.metadata = new CSVFileMetadata(separator, Optional.empty());
110 |         return this;
111 |     }
112 |     
113 |     /**
114 |      * Use specified characters as field separator and quote character.
115 |      * Quote character can be escaped by preceding it with another quote character.
116 |      * @param separator - field separator character
117 |      * @param quote - quote character
118 |      * @return this parser builder
119 |      */
120 |     public CSVParserBuilder<T, K> usingSeparatorWithQuote(char separator, char quote) {
121 |         this.metadata = new CSVFileMetadata(separator, Optional.of(quote));
122 |         return this;
123 |     }
124 |     
125 |     /**
126 |      * Buffer size to use when reading from file and parsing. Each buffer is parsed by single thread. 
127 |      * @param size - size in bytes
128 |      * @return this parser builder
129 |      */
130 |     public CSVParserBuilder<T, K> usingBufferSize(int size) {
131 |         this.bufferSize = size;
132 |         return this;
133 |     }
134 |     
135 | 
136 |     /**
137 |      * Specifies charset to use during parsing. By default Charset.defaultCharset() is used.
138 |      * This parser only supports charset that represent separators and digits as single bytes.
139 |      * @param charset - charset to use during parsing
140 |      * @return this parser builder
141 |      */
142 |     public CSVParserBuilder<T, K> usingCharset(Charset charset) {
143 |         this.charset = charset;
144 |         return this;
145 |     }
146 |     
147 |     /**
148 |      * Specifies charset name to use during parsing. By default Charset.defaultCharset() is used.
149 |      * This parser only supports charset that represent separators and digits as single bytes.
150 |      * @param charsetName - charset to use during parsing
151 |      * @return this parser builder
152 |      */
153 |     public CSVParserBuilder<T, K> usingCharset(String charsetName) {
154 |         return usingCharset(Charset.forName(charsetName));
155 |     }
156 |     
157 |     /**
158 |      * Construct parser using current setting
159 |      * @return CSV Parser
160 |      */
161 |     public CSVParser<T> build() {
162 |         return subsetView == null ? 
163 |                 new QuickCSVParser<T,K>(bufferSize, metadata, recordMapper, charset) :
164 |                 new QuickCSVParser<T,K>(bufferSize, metadata, recordWithHeaderMapper, subsetView, charset);
165 |     }
166 |     
167 |     public static class CSVFileMetadata {
168 |         
169 |         public static CSVFileMetadata RFC_4180 = new CSVFileMetadata(',', Optional.of('"'));
170 |         public static CSVFileMetadata TABS = new CSVFileMetadata('\t', Optional.empty());
171 |         
172 |         public final char separator;
173 |         public final Optional<Character> quote;
174 | 
175 |         public CSVFileMetadata(char separator, Optional<Character> quote) {
176 |             this.separator = separator;
177 |             this.quote = quote;
178 |         }
179 |     }
180 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/api/CSVRecord.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.api;
 2 | 
 3 | 
 4 | /**
 5 |  * Interface to access parsed CSV data in efficient manner. 
 6 |  * Fields are parsed in order they appear in the CSV source.
 7 |  */
 8 | public interface CSVRecord {
 9 |     public void skipField();
10 |     public void skipFields(int nFields);
11 |     
12 |     public Field getNextField();
13 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/api/CSVRecordWithHeader.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.api;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | /**
 6 |  * CSV Record with header that gives access to all fields from enumeration K.
 7 |  * The fields can be accessed by name using enum values.
 8 |  * Enum values toString() should match with header column names.
 9 |  * 
10 |  * @param <K> - field enumeration
11 |  */
12 | public interface CSVRecordWithHeader<K extends Enum<K>> {
13 |     
14 |     public Field getField(K field);
15 |     
16 |     public List<String> getHeader();
17 | }
18 | 


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/api/Field.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.api;
 2 | 
 3 | import java.nio.ByteBuffer;
 4 | 
 5 | /**
 6 |  * Interface to access underlying raw data as particular type.
 7 |  *
 8 |  * Usually underlying the field is some kind of byte large array and the field maintains view onto this array.
 9 |  * Underlying array can be mutated during parsing and the field object itself can be re-used, therefore clients
10 |  * should not maintain references to Field instances, instead client is expected to map field to it's own data
11 |  * structure and the no longer use it.
12 |  * 
13 |  * Methods that return primitive types will throw NPE if underlying field is empty. This should be tested with isEmpty() 
14 |  * call where needed or boxed methods should be used.
15 |  */
16 | public interface Field {
17 | 
18 |     public ByteBuffer raw();
19 | 
20 |     public String asString();
21 |     
22 |     public double asDouble();
23 |     public byte asByte();
24 |     public char asChar();
25 |     public short asShort();
26 |     public int asInt();
27 |     public long asLong();
28 |     
29 |     public Integer asBoxedInt();
30 |     public Double asBoxedDouble();
31 | 
32 |     public boolean isEmpty();
33 | 
34 |     public Field clone();
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/api/StandardMappers.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.api;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.List;
 5 | import java.util.function.Function;
 6 | 
 7 | public class StandardMappers {
 8 |     /**
 9 |      * Just convert to string list. Note that is NOT recommended to use this function in high volume scenarios,
10 |      * more effective is to directly convert to domain object or array.
11 |      */
12 |     public static final Function<CSVRecord, List<String>> TO_STRING_LIST = new Function<CSVRecord, List<String>>() {
13 | 
14 |         @Override
15 |         public List<String> apply(CSVRecord r) {
16 |             List<String> result = new ArrayList<>();
17 |             while(true) {
18 |                 Field f = r.getNextField();
19 |                 if (f == null) break;
20 |                 result.add(f.asString());
21 |             }
22 |             return result;
23 |         }
24 |         
25 |     };
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/Decoder.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.decoder;
 2 | 
 3 | import java.nio.charset.Charset;
 4 | 
 5 | import uk.elementarysoftware.quickcsv.decoder.ints.IntParser;
 6 | import uk.elementarysoftware.quickcsv.decoder.ints.LongParser;
 7 | 
 8 | public class Decoder {
 9 |     
10 |     private final uk.elementarysoftware.quickcsv.decoder.doubles.DoubleParser doubleParser;
11 |     private final Charset charset;
12 |     private final IntParser intParser;
13 |     private final LongParser longParser;
14 |     
15 |     public Decoder(Charset charset) {
16 |         this.charset = charset;
17 |         ParserFactory parserFactory = new ParserFactory();
18 |         this.doubleParser = parserFactory.getDoubleParser();
19 |         this.intParser = parserFactory.getIntParser();
20 |         this.longParser = parserFactory.getLongParser();
21 |     }
22 |     
23 |     public String decodeToString(byte[] buffer, int offset, int length) {
24 |         return new String(buffer, offset, length, charset);
25 |     }
26 |     
27 |     public double decodeToDouble(byte[] buffer, int offset, int length) {
28 |         if (length == 0) return 0.0;
29 |         return doubleParser.parse(buffer, offset, length);
30 |     }
31 |     
32 |     public int decodeToInt(byte[] buffer, int offset, int length) {
33 |         if (length == 0) return 0;
34 |         return intParser.parse(buffer, offset, length);
35 |     }
36 | 
37 |     public long decodeToLong(byte[] buffer, int offset, int length) {
38 |         if (length == 0) return 0L;
39 |         return longParser.parse(buffer, offset, length);
40 |     }
41 | 
42 |     public Charset getCharset() {
43 |         return charset;
44 |     }
45 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/ParserFactory.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.decoder;
 2 | 
 3 | import uk.elementarysoftware.quickcsv.decoder.doubles.DoubleParser;
 4 | import uk.elementarysoftware.quickcsv.decoder.doubles.JDKDoubleParserAdapter;
 5 | import uk.elementarysoftware.quickcsv.decoder.doubles.QuickDoubleParser;
 6 | import uk.elementarysoftware.quickcsv.decoder.ints.IntParser;
 7 | import uk.elementarysoftware.quickcsv.decoder.ints.LongParser;
 8 | import uk.elementarysoftware.quickcsv.decoder.ints.QuickIntParser;
 9 | import uk.elementarysoftware.quickcsv.decoder.ints.QuickLongParser;
10 | 
11 | class ParserFactory {
12 |     
13 |     private final boolean useQuickParsers;
14 | 
15 |     ParserFactory() {
16 |         this.useQuickParsers = "true".equals(System.getProperty("uk.elementarysoftware.useQuickParsers", "true"));
17 |     }
18 |     
19 |     public DoubleParser getDoubleParser() {
20 |         if (useQuickParsers) {
21 |             return new QuickDoubleParser();
22 |         } else {
23 |             return new JDKDoubleParserAdapter();
24 |         }
25 |     }
26 | 
27 |     public IntParser getIntParser() {
28 |         return new QuickIntParser();
29 |     }
30 | 
31 |     public LongParser getLongParser() {
32 |         return new QuickLongParser();
33 |     }
34 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/doubles/DoubleParser.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.decoder.doubles;
 2 | 
 3 | 
 4 | public interface DoubleParser {
 5 |     public double parse(byte[] in, int startIndex, int length);
 6 | 
 7 |     default public double parse(String s) {
 8 |         return parse(s.getBytes(), 0, s.length());
 9 |     };
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/doubles/QuickDoubleParser.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.decoder.doubles;
 2 | 
 3 | 
 4 | public class QuickDoubleParser implements DoubleParser {
 5 | 
 6 |     private static final int RADIX = 10;
 7 |     private static final int DOT = '.'-'0';
 8 |     
 9 |     private JDKDoubleParserAdapter fallBack = new JDKDoubleParserAdapter();
10 |     
11 |     public double parse(byte[] bytes, int offset, int length) {
12 |         if (bytes == null || length <=0)
13 |             throw new NumberFormatException("Empty input");
14 |         long result = 0;
15 |         boolean isNegative = false;
16 |         int index = offset, dotIndex=offset+length-1, endIndex = offset+length;
17 | 
18 |         byte firstByte = bytes[index];
19 |         if (firstByte < '0') {
20 |             if (firstByte == '-') {
21 |                 isNegative = true;
22 |             }
23 |             index++;
24 |         }
25 |         int nDigits = 0;
26 |         while (index < endIndex) {
27 |             int digit = bytes[index] - '0';
28 |             if (digit == DOT) {
29 |                 dotIndex=index;
30 |             }else  if (digit < 0 || digit>9) {
31 |                 throw new NumberFormatException("For: "+new String(bytes, offset, length));
32 |             } else {
33 |                 result *= RADIX;
34 |                 result -= digit;
35 |                 nDigits++; 
36 |             }
37 |             index++;
38 |         }
39 |         
40 |         double mantissa = -result;
41 |         int negExponent = length-(dotIndex-offset)-1;
42 |         
43 |         if (nDigits <= JDKDoubleParser.maxDecimalDigits) {
44 |             if (negExponent == 0 || mantissa == 0.0) {
45 |                 return (isNegative) ? -mantissa : mantissa;
46 |             }  
47 |             double rValue = mantissa / JDKDoubleParser.small10pow[negExponent];
48 |             return (isNegative) ? -rValue : rValue;
49 |         } else { //harder case, use JDK implementation
50 |             return fallBack.parse(bytes, offset, length);
51 |         }
52 |     }
53 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/ExceptionHelper.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.decoder.ints;
2 | 
3 | class ExceptionHelper {
4 |     static NumberFormatException nfExceptionFor(byte[] in, int startIndex, int len) {
5 |         return new NumberFormatException("For: "+new String(in, startIndex, len));
6 |     } 
7 | }
8 | 


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/IntParser.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.decoder.ints;
 2 | 
 3 | public interface IntParser {
 4 |     public int parse(byte[] in, int startIndex, int length);
 5 | 
 6 |     default public int parse(String s) {
 7 |         return parse(s.getBytes(), 0, s.length());
 8 |     };
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/LongParser.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.decoder.ints;
2 | 
3 | public interface LongParser {
4 |     public long parse(byte[] in, int startIndex, int length);
5 | 
6 |     default public long parse(String s) {
7 |         return parse(s.getBytes(), 0, s.length());
8 |     };
9 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/QuickIntParser.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.decoder.ints;
 2 | import static uk.elementarysoftware.quickcsv.decoder.ints.ExceptionHelper.*;
 3 | 
 4 | public class QuickIntParser implements IntParser {
 5 |     
 6 |     private static final int radix = 10;
 7 | 
 8 |     @Override
 9 |     public int parse(final byte[] in, final int startIndex, final int len) {
10 |         
11 |         int result = 0;
12 |         boolean negative = false;
13 |         int index = startIndex;
14 |         final int end = startIndex + len;
15 |         int limit = -Integer.MAX_VALUE;
16 |         int multmin;
17 |         int digit;
18 | 
19 |         if (len > 0) {
20 |             byte firstByte = in[index];
21 |             if (firstByte < '0') { // Possible leading "+" or "-"
22 |                 if (firstByte == '-') {
23 |                     negative = true;
24 |                     limit = Integer.MIN_VALUE;
25 |                 } else if (firstByte != '+')
26 |                     throw nfExceptionFor(in, startIndex, len);
27 | 
28 |                 if (len == 1) // Cannot have lone "+" or "-"
29 |                     throw nfExceptionFor(in, startIndex, len);
30 |                 index++;
31 |             }
32 |             multmin = limit / radix;
33 |             while (index < end) {
34 |                 // Accumulating negatively avoids surprises near MAX_VALUE
35 |                 digit = in[index++] - '0';
36 |                 if (digit < 0 || digit >= radix) {
37 |                     throw nfExceptionFor(in, startIndex, len);
38 |                 }
39 |                 if (result < multmin) {
40 |                     throw nfExceptionFor(in, startIndex, len);
41 |                 }
42 |                 result *= radix;
43 |                 if (result < limit + digit) {
44 |                     throw nfExceptionFor(in, startIndex, len);
45 |                 }
46 |                 result -= digit;
47 |             }
48 |         } else {
49 |             throw nfExceptionFor(in, startIndex, len);
50 |         }
51 |         return negative ? result : -result;
52 |     }
53 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/QuickLongParser.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.decoder.ints;
 2 | import static uk.elementarysoftware.quickcsv.decoder.ints.ExceptionHelper.*;
 3 | 
 4 | public class QuickLongParser implements LongParser {
 5 |     
 6 |     private static final int radix = 10;
 7 |     
 8 |     @Override
 9 |     public long parse(final byte[] in, final int startIndex, final int len) {
10 | 
11 |         long result = 0;
12 |         boolean negative = false;
13 |         int index = startIndex;
14 |         long limit = -Long.MAX_VALUE;
15 |         final int end = startIndex + len;
16 |         long multmin;
17 |         int digit;
18 | 
19 |         if (len > 0) {
20 |             byte firstByte = in[index];
21 |             if (firstByte < '0') { // Possible leading "+" or "-"
22 |                 if (firstByte == '-') {
23 |                     negative = true;
24 |                     limit = Long.MIN_VALUE;
25 |                 } else if (firstByte != '+')
26 |                     throw nfExceptionFor(in, startIndex, len);
27 | 
28 |                 if (len == 1) // Cannot have lone "+" or "-"
29 |                     throw nfExceptionFor(in, startIndex, len);
30 |                 index++;
31 |             }
32 |             multmin = limit / radix;
33 |             while (index < end) {
34 |                 // Accumulating negatively avoids surprises near MAX_VALUE
35 |                 digit = in[index++] - '0';
36 |                 if (digit < 0 || digit >= radix) {
37 |                     throw nfExceptionFor(in, startIndex, len);
38 |                 }
39 |                 if (result < multmin) {
40 |                     throw nfExceptionFor(in, startIndex, len);
41 |                 }
42 |                 result *= radix;
43 |                 if (result < limit + digit) {
44 |                     throw nfExceptionFor(in, startIndex, len);
45 |                 }
46 |                 result -= digit;
47 |             }
48 |         } else {
49 |             throw nfExceptionFor(in, startIndex, len);
50 |         }
51 |         return negative ? result : -result;
52 |     }
53 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/functional/Pair.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.functional;
 2 | 
 3 | import java.util.Objects;
 4 | 
 5 | /**
 6 |  * Container to ease passing around a tuple of two objects. This object provides a sensible
 7 |  * implementation of equals(), returning true if equals() is true on each of the contained
 8 |  * objects.
 9 |  */
10 | public class Pair<F, S> {
11 |     
12 |     public final F first;
13 |     public final S second;
14 | 
15 |     /**
16 |      * Constructor for a Pair.
17 |      *
18 |      * @param first the first object in the Pair
19 |      * @param second the second object in the pair
20 |      */
21 |     public Pair(F first, S second) {
22 |         this.first = first;
23 |         this.second = second;
24 |     }
25 | 
26 |     /**
27 |      * Checks the two objects for equality by delegating to their respective
28 |      * {@link Object#equals(Object)} methods.
29 |      *
30 |      * @param o the {@link Pair} to which this one is to be checked for equality
31 |      * @return true if the underlying objects of the Pair are both considered
32 |      *         equal
33 |      */
34 |     @Override
35 |     public boolean equals(Object o) {
36 |         if (!(o instanceof Pair)) {
37 |             return false;
38 |         }
39 |         Pair<?, ?> p = (Pair<?, ?>) o;
40 |         return Objects.equals(p.first, first) && Objects.equals(p.second, second);
41 |     }
42 | 
43 |     /**
44 |      * Compute a hash code using the hash codes of the underlying objects
45 |      *
46 |      * @return a hashcode of the Pair
47 |      */
48 |     @Override
49 |     public int hashCode() {
50 |         return (first == null ? 0 : first.hashCode()) ^ (second == null ? 0 : second.hashCode());
51 |     }
52 |     
53 |     @Override
54 |     public String toString() {
55 |         return first+"="+second;
56 |     }
57 | 
58 |     /**
59 |      * Convenience method for creating an appropriately typed pair.
60 |      * @param a the first object in the Pair
61 |      * @param b the second object in the pair
62 |      * @param <A> type of left element
63 |      * @param <B> type of right element
64 |      * @return a Pair that is templatized with the types of a and b
65 |      */
66 |     public static <A, B> Pair <A, B> of(A a, B b) {
67 |         return new Pair<A, B>(a, b);
68 |     }
69 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/functional/PrimitiveFunctions.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.functional;
 2 | 
 3 | public class PrimitiveFunctions {
 4 |     
 5 |     @FunctionalInterface
 6 |     public static interface FunCharToT<T> {
 7 |         public T apply(char c);
 8 |     }
 9 |     
10 |     @FunctionalInterface
11 |     public static interface FunBiCharToT<T> {
12 |         public T apply(char c, char q);
13 |     }
14 |     
15 |     @FunctionalInterface
16 |     public static interface FunCharToBoolean {
17 |         public boolean apply(char c);
18 |     }
19 |     
20 |     @FunctionalInterface
21 |     public static interface FunBiCharToBoolean {
22 |         public boolean apply(char c, char q);
23 |     }
24 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/ioutils/IOUtils.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.ioutils;
 2 | 
 3 | import java.io.Closeable;
 4 | import java.io.IOException;
 5 | 
 6 | public class IOUtils {
 7 |     
 8 |     public static void closeQuietly(Closeable closeable) {
 9 |         try {
10 |             if (closeable != null) {
11 |                 closeable.close();
12 |             }
13 |         } catch (IOException ioe) {
14 |             // ignore
15 |         }
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/parser/BufferPool.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.parser;
 2 | 
 3 | import java.util.Queue;
 4 | import java.util.concurrent.ConcurrentLinkedQueue;
 5 | import java.util.concurrent.atomic.AtomicInteger;
 6 | 
 7 | /** Pools large, long-living byte arrays to minimise old generation GC */
 8 | class BufferPool {
 9 | 
10 |     private final int bufferSize;
11 |     private final AtomicInteger buffersCreated = new AtomicInteger(0);
12 |     private final Queue<byte[]> buffers = new ConcurrentLinkedQueue<byte[]>();
13 | 
14 |     BufferPool(int bufferSize) {
15 |         this.bufferSize = bufferSize;
16 |     }
17 | 
18 |     byte[] getBuffer() {
19 |         byte[] result = buffers.poll();
20 |         if (result == null) {
21 |             buffersCreated.incrementAndGet();
22 |             return new byte[bufferSize];
23 |         } else {
24 |             return result;
25 |         }
26 |     }
27 | 
28 |     void handBack(byte[] buffer) {
29 |         buffers.add(buffer);
30 |         if (buffers.size() >= buffersCreated.get()) {
31 |             clear();
32 |         }
33 |     }
34 | 
35 |     private void clear() {
36 |         buffers.clear();
37 |         buffersCreated.set(0);
38 |     }
39 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/parser/ByteArrayField.java:
--------------------------------------------------------------------------------
  1 | package uk.elementarysoftware.quickcsv.parser;
  2 | 
  3 | import java.nio.ByteBuffer;
  4 | import java.nio.charset.Charset;
  5 | 
  6 | import uk.elementarysoftware.quickcsv.api.Field;
  7 | import uk.elementarysoftware.quickcsv.decoder.Decoder;
  8 | 
  9 | public class ByteArrayField implements Field {
 10 |     
 11 | 	public static final ByteArrayField EMPTY = new ByteArrayField(new byte[0], 0, 0, null);
 12 | 	
 13 |     private final Decoder decoder;
 14 | 
 15 |     private byte[] buffer;
 16 |     private int start;
 17 |     private int end;
 18 |     private Character quote; //if not null indicates that value was actually quoted
 19 | 
 20 |     public ByteArrayField(byte[] buffer, int startIndex, int endIndex, Charset charset) {
 21 |         this(buffer, startIndex, endIndex, charset, null);
 22 |     }
 23 | 
 24 |     public ByteArrayField(byte[] buffer, int startIndex, int endIndex, Charset charset, Character quote) {
 25 |         this.buffer = buffer;
 26 |         this.start = startIndex;
 27 |         this.end = endIndex;
 28 |         this.quote = quote;
 29 |         this.decoder = new Decoder(charset);
 30 |     }
 31 | 
 32 |     @Override
 33 |     public ByteBuffer raw() {
 34 |         return ByteBuffer.wrap(buffer, start, end - start);
 35 |     }
 36 | 
 37 |     @Override
 38 |     public String asString() {
 39 |     	String result = decoder.decodeToString(buffer, start, end - start);
 40 |         if (quote != null && result.indexOf(quote) >= 0) {
 41 |         	//TODO: optimise and add more flexible escape character
 42 |         	//flag indicating if an escaped quote was seen can be passed from the parser itself as state
 43 |             return result.replace(new StringBuffer().append(quote).append(quote), new StringBuffer().append(quote)); 
 44 |         } else {
 45 |             return result;
 46 |         }
 47 |     }
 48 | 
 49 |     @Override
 50 |     public double asDouble() {
 51 |         return decoder.decodeToDouble(buffer, start, end - start);
 52 |     }
 53 | 
 54 |     @Override
 55 |     public byte asByte() {
 56 |         return (byte) asInt();
 57 |     }
 58 | 
 59 |     @Override
 60 |     public char asChar() {
 61 |         return (char) asInt();
 62 |     }
 63 | 
 64 |     @Override
 65 |     public short asShort() {
 66 |         return (short) asInt();
 67 |     }
 68 | 
 69 |     @Override
 70 |     public int asInt() {
 71 |         return decoder.decodeToInt(buffer, start, end - start);
 72 |     }
 73 | 
 74 |     @Override
 75 |     public long asLong() {
 76 |         return decoder.decodeToLong(buffer, start, end - start);
 77 |     }
 78 | 
 79 |     void modifyBounds(int start, int end) { //re-use object to reduce GC overhead
 80 |         this.start = start;
 81 |         this.end = end;
 82 |         this.quote = null;
 83 |     }
 84 |     
 85 |     void modifyBounds(int start, int end, Character quote) {
 86 |         this.start = start;
 87 |         this.end = end;
 88 |         this.quote = quote;        
 89 |     }
 90 |     
 91 |     public void initFrom(ByteArrayField other) {
 92 |         this.buffer = other.buffer;
 93 |         this.start = other.start;
 94 |         this.end = other.end;
 95 |         this.quote = other.quote;
 96 |     }
 97 |     
 98 |     @Override
 99 |     public Field clone() {
100 |         return new ByteArrayField(buffer, start, end, decoder.getCharset(), quote);
101 |     }
102 | 
103 |     @Override
104 |     public boolean isEmpty() {
105 |         return start >= end;
106 |     }
107 | 
108 | 	@Override
109 | 	public Double asBoxedDouble() {
110 | 		return isEmpty() ? null : asDouble();
111 | 	}
112 | 
113 | 	@Override
114 | 	public Integer asBoxedInt() {
115 | 		return isEmpty() ? null : asInt();
116 | 	}
117 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/parser/ByteSlice.java:
--------------------------------------------------------------------------------
  1 | package uk.elementarysoftware.quickcsv.parser;
  2 | 
  3 | import java.nio.charset.Charset;
  4 | 
  5 | import uk.elementarysoftware.quickcsv.api.ByteArraySource.ByteArrayChunk;
  6 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder.CSVFileMetadata;
  7 | import uk.elementarysoftware.quickcsv.functional.Pair;
  8 | import uk.elementarysoftware.quickcsv.functional.PrimitiveFunctions.FunBiCharToBoolean;
  9 | import uk.elementarysoftware.quickcsv.functional.PrimitiveFunctions.FunBiCharToT;
 10 | import uk.elementarysoftware.quickcsv.functional.PrimitiveFunctions.FunCharToBoolean;
 11 | import uk.elementarysoftware.quickcsv.functional.PrimitiveFunctions.FunCharToT;
 12 | 
 13 | 
 14 | public interface ByteSlice {
 15 |     static final byte CR = 0xD;
 16 |     static final byte LF = 0xA;
 17 | 
 18 |     public static ByteSlice wrap(ByteArrayChunk it, Charset charset) {
 19 |         return new SingleByteSlice(it, charset);
 20 |     }
 21 | 
 22 |     public static ByteSlice empty() {
 23 |         return wrap(ByteArrayChunk.EMPTY, null);
 24 |     }
 25 | 
 26 |     public static ByteSlice join(ByteSlice prefix, ByteSlice suffix) {
 27 |         return new CompositeByteSlice((SingleByteSlice) prefix, (SingleByteSlice) suffix);
 28 |     }
 29 | 
 30 |     public Pair<ByteSlice, ByteSlice> splitOnLastLineEnd();
 31 | 
 32 |     public boolean nextLine();
 33 | 
 34 |     /**
 35 |      * Skip until next occurrence of c character. False if not found and end of slice is reached
 36 |      * @param c - character on which to break
 37 |      * @return true if character was actually found, false if end of slice reached
 38 |      */
 39 |     public boolean skipUntil(final char c);
 40 | 
 41 |     public boolean skipUntil(final char c, final char quote);
 42 | 
 43 |     /**
 44 |      * Returns next field and advances to next field. Returns null when end of line or end of slice is reached.
 45 |      * @param c - character that indicates field boundary
 46 |      * @return object to access field content
 47 |      */
 48 |     public ByteArrayField nextField(final char c);
 49 | 
 50 |     public ByteArrayField nextField(final char c, final char quote);
 51 | 
 52 |     public int size();
 53 | 
 54 |     public boolean hasMoreData();
 55 | 
 56 |     default public boolean isEmpty() {
 57 |         return !hasMoreData();
 58 |     }
 59 | 
 60 |     /**
 61 |      * String representation of current line. Mainly for debug purposes, can return broken line when in composite slice.
 62 |      * @return current line
 63 |      */
 64 |     public String currentLine();
 65 | 
 66 |     default public void skipField(final CSVFileMetadata metadata) {
 67 |         if (metadata.quote.isPresent())
 68 |             skipUntil(metadata.separator, metadata.quote.get());
 69 |         else
 70 |             skipUntil(metadata.separator);
 71 |     }
 72 | 
 73 |     default public ByteArrayField getNextField(final CSVFileMetadata metadata) {
 74 |         if (metadata.quote.isPresent())
 75 |             return nextField(metadata.separator, metadata.quote.get());
 76 |         else
 77 |             return nextField(metadata.separator);
 78 |     }
 79 | 
 80 |     public void incrementUse();
 81 | 
 82 |     public void decremenentUse();
 83 | 
 84 | }
 85 | 
 86 | final class SingleByteSlice implements ByteSlice {
 87 |     final int start;//inclusive
 88 |     final int end;//exclusive
 89 |     final byte[] buffer;
 90 |     final ByteArrayField fieldTemplateObject;
 91 |     final Charset charset;
 92 |     final ByteArrayChunk src;
 93 | 
 94 |     int currentIndex;
 95 | 
 96 |     public SingleByteSlice(ByteArrayChunk src, Charset charset) {
 97 |         this(src, src.getData(), 0, src.getLength(), charset);
 98 |     }
 99 | 
100 |     public SingleByteSlice(ByteArrayChunk src, byte[] buffer, int start, int end, Charset charset) {
101 |         this.src = src;
102 |         this.buffer = buffer;
103 |         this.start = start;
104 |         this.end = end;
105 |         this.fieldTemplateObject = new ByteArrayField(buffer, 0, 0, charset);
106 |         this.currentIndex = start;
107 |         this.charset = charset;
108 |     }
109 | 
110 |     @Override
111 |     public int size() {
112 |         return end - start;
113 |     }
114 | 
115 |     @Override
116 |     public boolean hasMoreData() {
117 |         return currentIndex < end;
118 |     }
119 | 
120 |     boolean frontTrim() {
121 |         boolean seenEOL = false;
122 |         for(; hasMoreData() && (buffer[currentIndex]==CR || buffer[currentIndex]==LF); currentIndex++) {
123 |             seenEOL = true;
124 |         }
125 |         return seenEOL;
126 |     }
127 | 
128 |     @Override
129 |     public boolean nextLine() {
130 |         for(; hasMoreData() && buffer[currentIndex]!=CR && buffer[currentIndex]!=LF; currentIndex++);
131 |         return frontTrim();
132 |     }
133 | 
134 |     public String currentLine() {
135 |         int startIdx = currentIndex;
136 |         for(; startIdx > start && buffer[startIdx]!=CR && buffer[startIdx]!=LF; startIdx--);
137 |         int endIdx = currentIndex;
138 |         for(; endIdx < end && buffer[endIdx]!=CR && buffer[endIdx]!=LF; endIdx++);
139 |         return new String(buffer, startIdx, endIdx - startIdx);
140 |     }
141 | 
142 |     public Pair<ByteSlice, ByteSlice> splitOnLastLineEnd() {
143 |         int i = end-1;
144 |         for (;i >=currentIndex && buffer[i] != LF; i--);
145 |         SingleByteSlice prefix = new SingleByteSlice(src, buffer, currentIndex, i+1, charset);
146 |         SingleByteSlice suffix = new SingleByteSlice(src, buffer, i+1, end, charset);
147 |         return Pair.of(prefix, suffix);
148 |     }
149 | 
150 |     public boolean skipUntil(final char c) {
151 |         boolean isFound = false;
152 |         while(currentIndex < end) {
153 |             if (buffer[currentIndex]==c) {
154 |                 currentIndex++;
155 |                 isFound = true;
156 |                 break;
157 |             }
158 |             currentIndex++;
159 |         }
160 |         return isFound;
161 |     }
162 | 
163 |     public boolean skipUntil(char c, char q) {
164 |         boolean inQuote = currentIndex < buffer.length && buffer[currentIndex] == q;
165 |         if (!inQuote) return skipUntil(c);
166 |         currentIndex++;
167 |         boolean isFound = false;
168 |         while(currentIndex < end) {
169 |             if (buffer[currentIndex]==c && buffer[currentIndex-1] == q) {
170 |                 currentIndex++;
171 |                 isFound = true;
172 |                 break;
173 |             }
174 |             currentIndex++;
175 |         }
176 |         return isFound;
177 |     }
178 | 
179 |     public ByteArrayField nextField(final char c) {
180 |         int startIndex = currentIndex;
181 |         int endIndex = currentIndex;
182 |         while(currentIndex < end) {
183 |             byte cur = buffer[currentIndex];
184 |             if (cur == c || cur == CR || cur == LF) {
185 |                 endIndex = currentIndex;
186 |                 if (cur == c)
187 |                     currentIndex++;
188 |                 break;
189 |             } else {
190 |                 currentIndex++;
191 |             }
192 |         }
193 |         if (currentIndex == startIndex) return null;
194 |         if (currentIndex == end) endIndex = end;
195 |         fieldTemplateObject.modifyBounds(startIndex, endIndex);
196 |         return fieldTemplateObject;
197 |     }
198 | 
199 |     @Override
200 |     public ByteArrayField nextField(char c, char q) {
201 |         boolean inQuote = currentIndex < buffer.length && buffer[currentIndex] == q;
202 |         if (!inQuote) return nextField(c);
203 |         currentIndex++;
204 |         int startIndex = currentIndex;
205 |         int endIndex = currentIndex;
206 |         while(currentIndex < end) {
207 |             byte cur = buffer[currentIndex];
208 |             if ((cur == c || cur == CR || cur == LF) && buffer[currentIndex-1] == q) {//there is an issue when we have escaped quote and then separator, but we ignore it for now
209 |                 endIndex = currentIndex - 1;
210 |                 if (cur == c) currentIndex++; //let frontTrim consume linebreaks later
211 |                 break;
212 |             } else {
213 |                 currentIndex++;
214 |             }
215 |         }
216 |         if (currentIndex == startIndex) return null;
217 |         if (currentIndex == end) {
218 |             if (buffer[end-1] == q) endIndex = end - 1; else endIndex = end;
219 |         }
220 |         fieldTemplateObject.modifyBounds(startIndex, endIndex, q);
221 |         return fieldTemplateObject;
222 |     }
223 | 
224 |     @Override
225 |     public String toString() {
226 |         return new String(buffer, start, size());
227 |     }
228 | 
229 |     @Override
230 |     public void incrementUse() {
231 |         src.incrementUseCount();
232 |     }
233 | 
234 |     @Override
235 |     public void decremenentUse() {
236 |         src.decrementUseCount();
237 |     }
238 | }
239 | 
240 | final class CompositeByteSlice implements ByteSlice {
241 | 
242 |     private final SingleByteSlice prefix;
243 |     private final SingleByteSlice suffix;
244 |     private final ByteArrayField prefixFieldTemplateObject;
245 |     private final ByteArrayField suffixFieldTemplateObject;
246 | 
247 |     private FunCharToT<ByteArrayField> nextFieldFun;
248 |     private FunBiCharToT<ByteArrayField> nextFieldFunQuoted;
249 |     private FunCharToBoolean skipUntilFun;
250 |     private FunBiCharToBoolean skipUntilFunQuoted;
251 | 
252 |     CompositeByteSlice(SingleByteSlice prefix, SingleByteSlice suffix) {
253 |         this.prefix = prefix;
254 |         this.suffix = suffix;
255 |         this.prefixFieldTemplateObject = new ByteArrayField(prefix.buffer, 0, 0, prefix.charset);
256 |         this.suffixFieldTemplateObject = new ByteArrayField(suffix.buffer, 0, 0, suffix.charset);
257 | 
258 |         this.nextFieldFun =  this::nextFieldWithPrefix;
259 |         this.nextFieldFunQuoted =  this::nextFieldWithPrefix;
260 |         this.skipUntilFun = this::skipUntilWithPrefix;
261 |         this.skipUntilFunQuoted = this::skipUntilWithPrefix;
262 |     }
263 | 
264 |     /*
265 |      * -----------------------------------------------------------
266 |      * Generic functions below work on slice with non-empty prefix, but once prefix has been
267 |      * exhausted they will flip to simple suffix delegates.
268 |      * Only frequently called functions are implemented that way.
269 |      * -----------------------------------------------------------
270 |      */
271 |     private ByteArrayField nextFieldWithPrefix(char c) {
272 |         if (prefix.isEmpty()) {
273 |             flip();
274 |             return suffix.nextField(c);
275 |         }
276 |         int startIndex = currentIndex();
277 |         int endIndex = currentIndex();
278 |         byte cur = 0;
279 |         while(hasMoreData()) {
280 |             cur = currentByte();
281 |             if (cur == c || cur == CR || cur == LF) {
282 |                 endIndex = currentIndex();
283 |                 if (cur == c)
284 |                     nextByte();
285 |                 break;
286 |             } else {
287 |                 nextByte();
288 |             }
289 |         }
290 |         if (currentIndex() == startIndex) return null;
291 |         if (cur != c && !hasMoreData()) endIndex = prefix.end + suffix.end;
292 |         return createField(startIndex, endIndex, null);
293 |     }
294 | 
295 |     private ByteArrayField nextFieldWithPrefix(char c, char quote) {
296 |         if (prefix.isEmpty()) {
297 |             flip();
298 |             return suffix.nextField(c, quote);
299 |         }
300 |         boolean inQuote = hasMoreData() && currentByte() == quote;
301 |         if (!inQuote) return nextField(c);
302 |         nextByte();
303 |         int startIndex = currentIndex();
304 |         int endIndex = currentIndex();
305 |         while(hasMoreData()) {
306 |             byte cur = currentByte();
307 |             if ((cur == c || cur == CR || cur == LF) && prevByte() == quote) {
308 |                 endIndex = currentIndex() - 1;
309 |                 if (cur == c)
310 |                     nextByte();
311 |                 break;
312 |             } else {
313 |                 nextByte();
314 |             }
315 |         }
316 |         if (currentIndex() == startIndex) return null;
317 |         if (isEmpty()) {
318 |             if (prevByte() == quote) endIndex = currentIndex() - 1; else endIndex = currentIndex();
319 |         }
320 |         return createField(startIndex, endIndex, quote);
321 |     }
322 | 
323 |     private boolean skipUntilWithPrefix(char c) {
324 |         if (prefix.isEmpty()) {
325 |             flip();
326 |             return suffix.skipUntil(c);
327 |         }
328 |         boolean isFound = prefix.skipUntil(c);
329 |         if (isFound) {
330 |             return true;
331 |         } else {
332 |             return suffix.skipUntil(c);
333 |         }
334 |     }
335 | 
336 |     private boolean skipUntilWithPrefix(char c, char q) {
337 |         if (prefix.isEmpty()) {
338 |             flip();
339 |             return suffix.skipUntil(c, q);
340 |         }
341 |         boolean isFound = prefix.skipUntil(c, q);
342 |         if (isFound) {
343 |             return true;
344 |         } else {
345 |             return suffix.skipUntil(c, q);
346 |         }
347 |     }
348 | 
349 |     private void flip() {
350 |         this.nextFieldFun =  suffix::nextField;
351 |         this.nextFieldFunQuoted =  suffix::nextField;
352 |         this.skipUntilFun = suffix::skipUntil;
353 |         this.skipUntilFunQuoted = suffix::skipUntil;
354 |     }
355 |     /*
356 |      * -----------------------------------------------------------
357 |      * end
358 |      * -----------------------------------------------------------
359 |     */
360 | 
361 |     @Override
362 |     public Pair<ByteSlice, ByteSlice> splitOnLastLineEnd() {
363 |         Pair<ByteSlice, ByteSlice> sliced = suffix.splitOnLastLineEnd();
364 |         return Pair.of(ByteSlice.join(this.prefix, sliced.first), sliced.second);
365 |     }
366 | 
367 |     @Override
368 |     public int size() {
369 |         return prefix.size() + suffix.size();
370 |     }
371 | 
372 |     @Override
373 |     public boolean hasMoreData() {
374 |         return prefix.hasMoreData() || suffix.hasMoreData();
375 |     }
376 | 
377 |     @Override
378 |     public ByteArrayField nextField(char c) {
379 |         return nextFieldFun.apply(c);
380 |     }
381 | 
382 |     @Override
383 |     public ByteArrayField nextField(char c, char quote) {
384 |         return nextFieldFunQuoted.apply(c, quote);
385 |     }
386 | 
387 |     @Override
388 |     public boolean skipUntil(char c) {
389 |         return skipUntilFun.apply(c);
390 |     }
391 | 
392 |     @Override
393 |     public boolean skipUntil(char c, char q) {
394 |         return skipUntilFunQuoted.apply(c, q);
395 |     }
396 | 
397 |     @Override
398 |     public boolean nextLine() {
399 |         if (prefix.isEmpty()) {
400 |             return suffix.nextLine();
401 |         } else {
402 |             boolean seenEOL = prefix.nextLine();
403 |             if (seenEOL) {
404 |                 if (prefix.isEmpty()) suffix.frontTrim();
405 |                 return true;
406 |             } else {
407 |                 return suffix.nextLine();
408 |             }
409 |         }
410 |     }
411 | 
412 |     boolean frontTrim() {
413 |         return prefix.isEmpty() ? suffix.frontTrim() : prefix.frontTrim();
414 |     }
415 | 
416 |     @Override
417 |     public String currentLine() {
418 |         return prefix.isEmpty() ? suffix.currentLine() : prefix.currentLine();
419 |     }
420 | 
421 |     private ByteArrayField createField(int startIndex, int endIndex, Character quote) {
422 |         if (startIndex >= prefix.end) {
423 |             suffixFieldTemplateObject.modifyBounds(startIndex - prefix.end, endIndex - prefix.end, quote);
424 |             return suffixFieldTemplateObject;
425 |         }
426 |         if (endIndex < prefix.end) {
427 |             prefixFieldTemplateObject.modifyBounds(startIndex, endIndex, quote);
428 |             return prefixFieldTemplateObject;
429 |         }
430 |         byte[] result = new byte[endIndex - startIndex];
431 |         System.arraycopy(prefix.buffer, startIndex, result, 0, prefix.end - startIndex);
432 |         System.arraycopy(suffix.buffer, 0, result, prefix.end - startIndex, endIndex - prefix.end);
433 |         return new ByteArrayField(result, 0, result.length, prefix.charset, quote);
434 |     }
435 | 
436 |     @Override
437 |     public String toString() {
438 |         return new StringBuffer().append(prefix).append(suffix).toString();
439 |     }
440 | 
441 |     byte prevByte() {
442 |         if (suffix.currentIndex > suffix.start) return suffix.buffer[suffix.currentIndex - 1];
443 |         return prefix.buffer[prefix.currentIndex - 1];
444 |     }
445 | 
446 | 
447 |     byte currentByte() {
448 |         return prefix.isEmpty() ? suffix.buffer[suffix.currentIndex] : prefix.buffer[prefix.currentIndex];
449 |     }
450 | 
451 |     void nextByte() {
452 |         if (prefix.isEmpty()) suffix.currentIndex++; else prefix.currentIndex++;
453 |     }
454 | 
455 |     int currentIndex() {
456 |         return prefix.currentIndex + suffix.currentIndex;
457 |     }
458 | 
459 |     @Override
460 |     public void decremenentUse() {
461 |         prefix.src.decrementUseCount();
462 |         suffix.src.decrementUseCount();
463 |     }
464 | 
465 |     @Override
466 |     public void incrementUse() {
467 |         throw new IllegalStateException("Should not be called");
468 |     }
469 | }
470 | 


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/parser/FieldSubsetView.java:
--------------------------------------------------------------------------------
  1 | package uk.elementarysoftware.quickcsv.parser;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.Arrays;
  5 | import java.util.EnumMap;
  6 | import java.util.List;
  7 | import java.util.Map;
  8 | 
  9 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder.CSVFileMetadata;
 10 | 
 11 | /**
 12 |  * Provides view on the CSVRecord that focuses on particular subset of fields.
 13 |  * 
 14 |  * Within the view fields can be accessed by index in order of the subset or by field enumeration K.   
 15 |  * @param <K> - enum containing list of fields that form the subset
 16 |  */
 17 | public class FieldSubsetView<K extends Enum<K>> {
 18 |     
 19 |     private final HeaderSource headerSource;
 20 |     private final Class<K> fieldSubset;
 21 |     
 22 |     private boolean isFirstSlice = true;
 23 |     
 24 |     private int[] headerIndexesOfK;
 25 |     private int[] parseOrderToSourceOrder;
 26 |     private int[] fieldSkipSchedule;
 27 | 
 28 |     private FieldSubsetView(HeaderSource headerSource, Class<K> fieldSubset) {
 29 |         this.headerSource = headerSource;
 30 |         this.fieldSubset = fieldSubset;
 31 |     }
 32 |     
 33 |     public static <K extends Enum<K>> FieldSubsetView<K> forExplicitHeader(Class<K> fieldsToSource, String... header) {
 34 |         return new FieldSubsetView<>(new HeaderSource.ExplicitHeader(header), fieldsToSource);
 35 |     }
 36 |     
 37 |     public static <K extends Enum<K>> FieldSubsetView<K> forSourceSuppliedHeader(Class<K> fieldsToSource) {
 38 |         return forSourceSuppliedHeader(fieldsToSource, 0);
 39 |     }
 40 |     
 41 |     public static <K extends Enum<K>> FieldSubsetView<K>  forSourceSuppliedHeader(Class<K> fieldsToSource, int headerRowIndexInFile) {
 42 |         return new FieldSubsetView<>(new HeaderSource.SourceSuppliedHeader(headerRowIndexInFile), fieldsToSource);
 43 |     }
 44 |     
 45 |     public void onSlice(ByteSlice slice, CSVFileMetadata metadata) {
 46 |         if (isFirstSlice) {
 47 |             headerSource.onSlice(slice, metadata);
 48 |             initLookups();
 49 |             isFirstSlice = false;
 50 |         }
 51 |     }
 52 |     
 53 |     private void initLookups() {
 54 |         List<String> header = headerSource.getHeader();
 55 |         headerIndexesOfK = getHeaderIndexesOfK(header);
 56 |         Map<K, Integer> fieldToHeaderIndex = new EnumMap<K, Integer>(fieldSubset);
 57 |         for (K k : fieldSubset.getEnumConstants()) {
 58 |             fieldToHeaderIndex.put(k, header.indexOf(k.toString()));
 59 |         }
 60 |         
 61 |         this.fieldSkipSchedule = new int[headerIndexesOfK.length];
 62 |         int lastFieldIndex = -1;
 63 |         for (int i = 0; i < headerIndexesOfK.length; i++) {
 64 |             int idx = headerIndexesOfK[i];
 65 |             int nSkip = idx - lastFieldIndex - 1;
 66 |             fieldSkipSchedule[i] = nSkip;
 67 |             lastFieldIndex = idx;
 68 |         }
 69 |         
 70 |         parseOrderToSourceOrder = new int[getFieldSubsetSize()];
 71 |         K[] ks = fieldSubset.getEnumConstants();
 72 |         for (int i = 0; i < ks.length; i++) {
 73 |             int headerIdx = fieldToHeaderIndex.get(ks[i]);
 74 |             parseOrderToSourceOrder[i] = Arrays.binarySearch(headerIndexesOfK, headerIdx);
 75 |         }
 76 |     }
 77 | 
 78 |     private int[] getHeaderIndexesOfK(List<String> header) {
 79 |         K[] ks = fieldSubset.getEnumConstants();
 80 |         int[] result = new int[ks.length];
 81 |         for (int i = 0; i < result.length; i++) {
 82 |             if ((result[i] = header.indexOf(ks[i].toString())) == -1) {
 83 |                 throw new RuntimeException("Field not found in header: "+ks[i].toString());
 84 |             }
 85 |         }
 86 |         Arrays.sort(result);
 87 |         return result;
 88 |     }
 89 | 
 90 |     int[] getFieldIndexes() {
 91 |         return headerIndexesOfK;
 92 |     }
 93 |     
 94 |     public Class<K> getFieldSubset() {
 95 |         return fieldSubset;
 96 |     }
 97 | 
 98 |     int[] getFieldSkipSchedule() {
 99 |         return fieldSkipSchedule;
100 |     }
101 |     
102 |     List<String> getHeader() {
103 |         return headerSource.getHeader();
104 |     }
105 | 
106 |     int indexOfInSourceView(int parseIdx) {
107 |         return parseOrderToSourceOrder[parseIdx];
108 |     }
109 |     
110 |     int getFieldSubsetSize() {
111 |         return fieldSubset.getEnumConstants().length;
112 |     }
113 |     
114 |     public static abstract class HeaderSource {
115 |         
116 |         private HeaderSource() {}
117 |         
118 |         abstract void onSlice(ByteSlice slice, CSVFileMetadata metadata);
119 |         abstract List<String> getHeader();
120 | 
121 |         private static class ExplicitHeader extends HeaderSource {
122 |             private final String[] header;
123 | 
124 |             public ExplicitHeader(String[] header) {
125 |                 this.header = header;
126 |             }
127 |             
128 |             @Override
129 |             List<String> getHeader() {
130 |                 return Arrays.asList(header);
131 |             }
132 |             
133 |             @Override 
134 |             void onSlice(ByteSlice slice, CSVFileMetadata metadata) {}
135 |         }
136 |         
137 |         private static class SourceSuppliedHeader extends HeaderSource {
138 |             
139 |             private final int headerIndex;
140 |             private List<String> header;
141 | 
142 |             public SourceSuppliedHeader(int headerIndex) {
143 |                 this.headerIndex = headerIndex;
144 |             }
145 |             
146 |             @Override 
147 |             void onSlice(ByteSlice slice, CSVFileMetadata metadata) {
148 |                 for (int i = 0; i < headerIndex; i++) {
149 |                     slice.nextLine();
150 |                 }
151 |                 List<String> header = new ArrayList<>();
152 |                 ByteArrayField field;
153 |                 while((field = slice.getNextField(metadata)) != null) {
154 |                     header.add(field.asString());
155 |                 }
156 |                 slice.nextLine();
157 |                 this.header = header;
158 |             }
159 | 
160 |             @Override
161 |             List<String> getHeader() {
162 |                 return header;
163 |             }
164 |         }
165 |     }
166 | }


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/parser/InputStreamToByteArraySourceAdapter.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.parser;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.InputStream;
 5 | 
 6 | import uk.elementarysoftware.quickcsv.api.ByteArraySource;
 7 | 
 8 | class InputStreamToByteArraySourceAdapter implements ByteArraySource {
 9 | 
10 |     private final InputStream is;
11 |     private final BufferPool pool;
12 |     
13 |     public InputStreamToByteArraySourceAdapter(InputStream is, BufferPool pool) {
14 |         this.is = is;
15 |         this.pool = pool;
16 |     }
17 |   
18 |     @Override
19 |     public ByteArrayChunk getNext() throws IOException {
20 |         byte[] buffer = pool.getBuffer();
21 |         int read = is.read(buffer);
22 |         boolean isEndReached = read == -1;
23 |         return new ByteArrayChunk(buffer, Math.max(0, read), isEndReached, pool::handBack);
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/parser/QuickCSVParser.java:
--------------------------------------------------------------------------------
  1 | package uk.elementarysoftware.quickcsv.parser;
  2 | 
  3 | import java.io.InputStream;
  4 | import java.nio.charset.Charset;
  5 | import java.util.List;
  6 | import java.util.Optional;
  7 | import java.util.Spliterator;
  8 | import java.util.Spliterators;
  9 | import java.util.function.Consumer;
 10 | import java.util.function.Function;
 11 | import java.util.stream.Stream;
 12 | import java.util.stream.StreamSupport;
 13 | 
 14 | import uk.elementarysoftware.quickcsv.api.ByteArraySource;
 15 | import uk.elementarysoftware.quickcsv.api.ByteArraySource.ByteArrayChunk;
 16 | import uk.elementarysoftware.quickcsv.api.CSVParser;
 17 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder.CSVFileMetadata;
 18 | import uk.elementarysoftware.quickcsv.functional.Pair;
 19 | import uk.elementarysoftware.quickcsv.api.CSVRecord;
 20 | import uk.elementarysoftware.quickcsv.api.CSVRecordWithHeader;
 21 | import uk.elementarysoftware.quickcsv.api.Field;
 22 | 
 23 | public class QuickCSVParser<T, K extends Enum<K>> implements CSVParser<T> {
 24 | 
 25 |     private final CSVFileMetadata metadata;
 26 |     private final int bufferSize;
 27 |     private final Function<CSVRecord, T> mapper;
 28 |     private final Optional<FieldSubsetView<K>> fieldSubsetView;
 29 |     private final Charset charset;
 30 | 
 31 |     public QuickCSVParser(int bufferSize, CSVFileMetadata metadata, Function<CSVRecordWithHeader<K>, T> mapper, 
 32 |             FieldSubsetView<K> fieldSubsetView, Charset charset) {
 33 |         this.metadata = metadata;
 34 |         this.bufferSize = bufferSize;
 35 |         this.mapper = cast(mapper);
 36 |         this.fieldSubsetView = Optional.of(fieldSubsetView);
 37 |         this.charset = charset;
 38 |     }
 39 |     
 40 |     public QuickCSVParser(int bufferSize, CSVFileMetadata metadata, Function<CSVRecord, T> mapper, Charset charset) {
 41 |         this.metadata = metadata;
 42 |         this.bufferSize = bufferSize;
 43 |         this.mapper = mapper;
 44 |         this.fieldSubsetView = Optional.empty();
 45 |         this.charset = charset;
 46 |     }
 47 |     
 48 |     @SuppressWarnings("unchecked")
 49 |     private static <T, K extends Enum<K>> Function<CSVRecord, T> cast(Function<CSVRecordWithHeader<K>, T> f) {
 50 |         return r -> f.apply((CSVRecordWithHeader<K>) r);
 51 |     }
 52 | 
 53 |     
 54 |     @Override
 55 |     public Stream<T> parse(InputStream is) {
 56 |         BufferPool pool = new BufferPool(bufferSize);
 57 |         return parse(new InputStreamToByteArraySourceAdapter(is, pool));
 58 |     }
 59 |     
 60 |     @Override
 61 |     public Stream<T> parse(ByteArraySource bas) {
 62 |         return StreamSupport.stream(new SplittingSpliterator(bas), true);
 63 |     }
 64 | 
 65 |     class SplittingSpliterator implements Spliterator<T> {
 66 |         
 67 |         private final ByteArraySource bas;
 68 |         
 69 |         private ByteSlice prefix = ByteSlice.empty(); 
 70 |         private boolean isEndReached = false;
 71 | 
 72 |         private Spliterator<T> sequentialSplitterator = Spliterators.emptySpliterator();
 73 | 
 74 |         SplittingSpliterator(ByteArraySource bas) {
 75 |             this.bas = bas;
 76 |         }
 77 | 
 78 |         @Override
 79 |         public boolean tryAdvance(Consumer<? super T> action) { //usually only called in sequential mode
 80 |             boolean advanced = sequentialSplitterator.tryAdvance(action);
 81 |             if (advanced) return true;
 82 |             if (isEndReached) return false;
 83 |             ByteSlice nextSlice = nextSlice();
 84 |             if (!nextSlice.hasMoreData()) return false;
 85 |             this.sequentialSplitterator = sliceSpliterator(nextSlice);
 86 |             return tryAdvance(action);
 87 |         }
 88 | 
 89 |         @Override
 90 |         public Spliterator<T> trySplit() {
 91 |             if (isEndReached) return null;
 92 |             ByteSlice nextSlice = nextSlice();
 93 |             if (!nextSlice.hasMoreData()) return null;
 94 |             return sliceSpliterator(nextSlice);
 95 |         }
 96 |         
 97 |         private ByteSlice nextSlice() {
 98 |             ByteSlice bareSlice = nextBareSlice();
 99 |             bareSlice.incrementUse();
100 |             if (isEndReached) {
101 |                 return ByteSlice.join(prefix, bareSlice);
102 |             } else {
103 |                 Pair<ByteSlice, ByteSlice> sliced = bareSlice.splitOnLastLineEnd();
104 |                 ByteSlice result = ByteSlice.join(prefix, sliced.first);
105 |                 this.prefix = sliced.second;
106 |                 bareSlice.incrementUse();
107 |                 return result;
108 |             }
109 |         }
110 | 
111 |         private ByteSlice nextBareSlice() {
112 |             try {
113 |                 ByteArrayChunk it = bas.getNext();
114 |                 this.isEndReached = it.isLast();
115 |                 ByteSlice slice = ByteSlice.wrap(it, charset);
116 |                 if (fieldSubsetView.isPresent()) fieldSubsetView.get().onSlice(slice, metadata);
117 |                 return slice;
118 |             } catch (RuntimeException e) {
119 |                 throw e;
120 |             } catch (Exception e) {
121 |                 throw new RuntimeException(e);
122 |             }
123 |         }
124 | 
125 |         @Override
126 |         public long estimateSize() {
127 |            return Long.MAX_VALUE;
128 |         }
129 | 
130 |         @Override
131 |         public int characteristics() {
132 |             return ORDERED | NONNULL | IMMUTABLE;
133 |         }
134 |     }
135 |     
136 |     Spliterator<T> sliceSpliterator(ByteSlice slice) {
137 |         return fieldSubsetView.isPresent() ? new LensingByteSliceSpliterator(slice) : new ByteSliceSpliterator(slice);
138 |     }
139 |     
140 |     class ByteSliceSpliterator implements Spliterator<T>, CSVRecord {
141 | 
142 |         protected final ByteSlice slice;
143 | 
144 |         ByteSliceSpliterator(ByteSlice slice) {
145 |             this.slice = slice;//incoming slice should have no broken lines
146 |         }
147 | 
148 |         @Override
149 |         public boolean tryAdvance(Consumer<? super T> action) {
150 |             if (!slice.hasMoreData()) {
151 |                 slice.decremenentUse();
152 |                 return false;
153 |             }
154 |             advance(action);
155 |             return true;
156 |         }
157 | 
158 |         protected void advance(Consumer<? super T> action) {
159 |             T t = mapper.apply(this);
160 |             action.accept(t);
161 |             slice.nextLine();
162 |         }
163 | 
164 |         @Override
165 |         public Spliterator<T> trySplit() {
166 |             return null;
167 |         }
168 | 
169 |         @Override
170 |         public long estimateSize() {
171 |             return slice.size();
172 |         }
173 | 
174 |         @Override
175 |         public int characteristics() {
176 |             return ORDERED | NONNULL | IMMUTABLE;
177 |         }
178 | 
179 |         @Override
180 |         public void skipField() {
181 |             slice.skipField(metadata);
182 |         }
183 | 
184 |         @Override
185 |         public void skipFields(int nFields) {
186 |             for (int i = 0; i < nFields; i++) {
187 |                 skipField();
188 |             }
189 |         }
190 | 
191 |         @Override
192 |         public ByteArrayField getNextField() {
193 |             return slice.getNextField(metadata);
194 |         }
195 |     }
196 |     
197 |     class LensingByteSliceSpliterator extends ByteSliceSpliterator implements CSVRecordWithHeader<K> {
198 | 
199 |         private final FieldSubsetView<K> view;
200 |         private final ByteArrayField[] fieldTemplates; 
201 | 
202 |         public LensingByteSliceSpliterator(ByteSlice slice) {
203 |             super(slice);
204 |             this.view = fieldSubsetView.get();
205 |             this.fieldTemplates = new ByteArrayField[view.getFieldSubsetSize()];
206 |             for (int i = 0; i < fieldTemplates.length; i++) {
207 |                 fieldTemplates[i] = new ByteArrayField(null, -1, -1, charset);
208 |             }
209 |         }
210 |         
211 |         @Override
212 |         public boolean tryAdvance(Consumer<? super T> action) {
213 |             if (!slice.hasMoreData()) {
214 |                 slice.decremenentUse();
215 |                 return false;
216 |             }
217 |             parseFields();
218 |             super.advance(action);
219 |             return true;
220 |         }
221 | 
222 |         private void parseFields() {
223 |             int[] skipSchedule = view.getFieldSkipSchedule();
224 |             for (int i = 0; i < skipSchedule.length; i++) {
225 |                 skipFields(skipSchedule[i]);
226 |                 ByteArrayField field = super.getNextField();//TODO: init into template directly
227 |                 if (field != null) {
228 |                 	fieldTemplates[i].initFrom(field);
229 |                 } else {
230 |                 	//when line ends with separator it is very difficult to distinguish between that and overflow when getNextField() returns null. Here we assume correct field schedule and map null to empty field.
231 |                 	fieldTemplates[i].initFrom(ByteArrayField.EMPTY);
232 |                 }
233 |             }
234 |         }
235 | 
236 |         @Override
237 |         public Field getField(K fieldName) {
238 |             return fieldTemplates[view.indexOfInSourceView(fieldName.ordinal())];
239 |         }
240 | 
241 |         @Override
242 |         public List<String> getHeader() {
243 |             return view.getHeader();
244 |         }
245 |     }
246 | }


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/decoder/doubles/DoubleParserTest.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.decoder.doubles;
 2 | 
 3 | import static org.junit.Assert.*;
 4 | 
 5 | import java.io.File;
 6 | import java.io.FileInputStream;
 7 | import java.net.URL;
 8 | import java.nio.charset.Charset;
 9 | 
10 | import org.apache.commons.io.IOUtils;
11 | import org.apache.commons.io.LineIterator;
12 | import org.junit.Test;
13 | 
14 | import uk.elementarysoftware.quickcsv.decoder.doubles.DoubleParser;
15 | import uk.elementarysoftware.quickcsv.decoder.doubles.JDKDoubleParserAdapter;
16 | import uk.elementarysoftware.quickcsv.decoder.doubles.QuickDoubleParser;
17 | 
18 | 
19 | public class DoubleParserTest {
20 |     
21 |     @Test
22 |     public void testSimpleCases() {
23 |         doTestSimpleCases(new JDKDoubleParserAdapter());
24 |         doTestSimpleCases(new QuickDoubleParser());
25 |     }
26 |     
27 |     @Test
28 |     public void testBigBuffer() {
29 |         doTestBigBuffer(new JDKDoubleParserAdapter());
30 |         doTestBigBuffer(new QuickDoubleParser());
31 |     }
32 |     
33 |     @Test
34 |     public void testFile() throws Exception {
35 |         doTestFile(new JDKDoubleParserAdapter());
36 |         doTestFile(new QuickDoubleParser());
37 |     }
38 |     
39 |     private void doTestSimpleCases(DoubleParser parser) {
40 |         assertEquals(0.0, parser.parse("0"), 1E-14);
41 |         assertEquals(3.14159265, parser.parse("3.14159265"), 1E-14);
42 |         assertEquals(-93231637.47759183, parser.parse("-93231637.47759183"), 1E-14);
43 |         assertEquals(-0.3903, parser.parse("-0.3903"), 1E-14);
44 |         assertEquals(2.71828183, parser.parse("2.71828183"), 1E-14);
45 |     }
46 |     
47 |     private void doTestBigBuffer(DoubleParser parser) {
48 |         String prefix = "anything";
49 |         String middle = "2.71828183";
50 |         String suffix = "anything again";
51 |         
52 |         byte[] buffer = (prefix + middle + suffix).getBytes();
53 |         double result = parser.parse(buffer, prefix.length(), middle.length());
54 |         assertEquals(2.71828183, result, 1E-14);
55 |     }
56 |     
57 |     
58 |     
59 |     private void doTestFile(DoubleParser parser) throws Exception {
60 |         int nLinesToTest = 500;
61 |         URL fileUrl = getClass().getResource("/cities-dos.txt");
62 |         File file = new File(fileUrl.toURI());
63 |         LineIterator lines = IOUtils.lineIterator(new FileInputStream(file), Charset.defaultCharset());
64 |         int lineNumber = 0;
65 |         while (lines.hasNext() && lineNumber < nLinesToTest) {
66 |             String[] data = lines.next().split(",");
67 |             for (int i = 0; i < data.length; i++) {
68 |                 compareParsingResult(parser, data[i]);
69 |             }
70 |             lineNumber ++;
71 |         }
72 |     }
73 | 
74 |     private void compareParsingResult(DoubleParser parser, String stringValue) {
75 |         Object d1 = null;
76 |         try {
77 |             d1 = parser.parse(stringValue);
78 |         } catch (Exception e) {
79 |             d1 = e;
80 |         }
81 |         Object d2 = null;
82 |         try {
83 |             d2 = Double.parseDouble(stringValue);
84 |         } catch (Exception e) {
85 |             d2 = e;
86 |         }
87 |         assertEquals(d2.getClass(), d1.getClass());
88 |         if (d2 instanceof Double) {
89 |             assertEquals("Failed for: "+stringValue, d2, d1);
90 |         }
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/decoder/ints/IntParserTest.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.decoder.ints;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | 
 5 | import java.util.Random;
 6 | import java.util.function.Function;
 7 | 
 8 | import org.junit.experimental.theories.DataPoints;
 9 | import org.junit.experimental.theories.FromDataPoints;
10 | import org.junit.experimental.theories.Theories;
11 | import org.junit.experimental.theories.Theory;
12 | import org.junit.runner.RunWith;
13 | 
14 | @RunWith(Theories.class)
15 | public class IntParserTest {
16 |     
17 |     private static final int randomSize = 1000; 
18 |     
19 |     private static final Random rnd = new Random();
20 |     
21 |     @DataPoints("validInts")
22 |     public static String[] randomInts() {
23 |         return rnd.ints(randomSize).mapToObj(i -> ""+i).toArray(String[]::new);
24 |     }
25 |     
26 |     @DataPoints("validInts")
27 |     public static String[] specialInts() {
28 |         return new String[] {"0", "-0", "+0", "+1", Integer.MAX_VALUE+"", Integer.MIN_VALUE+""};
29 |     }
30 |     
31 |     @DataPoints("failingInts") 
32 |     public static String[] specialFailingInts() {
33 |         return new String[] {"X0", "-", "+", Long.MAX_VALUE+"", "", "Hello"};
34 |     }
35 |     
36 |     private QuickIntParser parser = new QuickIntParser();
37 |     
38 |     @Theory
39 |     public void parsersAreEquivalentOnValidInts(@FromDataPoints("validInts") String intValue) {
40 |         compareParsingResult(intValue, s -> Integer.parseInt(s), s -> parser.parse(s));
41 |     }
42 |     
43 |     @Theory
44 |     public void parsersAreEquivalentOnFailingInts(@FromDataPoints("failingInts") String intValue) {
45 |         compareParsingResult(intValue, s -> Integer.parseInt(s), s -> parser.parse(s));
46 |     }
47 |     
48 |     private void compareParsingResult(String value, Function<String, Integer> p1, Function<String, Integer> p2) {
49 |         Object v1 = null;
50 |         try {
51 |             v1 = p1.apply(value);
52 |         } catch (Exception e) {
53 |             v1 = e;
54 |         }
55 |         Object v2 = null;
56 |         try {
57 |             v2 = p2.apply(value);
58 |         } catch (Exception e) {
59 |             v2 = e;
60 |         }
61 |         assertEquals(v2.getClass(), v1.getClass());
62 |         if (v2 instanceof Integer) {
63 |             assertEquals(v2, v1);
64 |         }
65 |     }
66 | }


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/decoder/ints/LongParserTest.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.decoder.ints;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | 
 5 | import java.util.Arrays;
 6 | import java.util.Random;
 7 | import java.util.function.Function;
 8 | 
 9 | import org.junit.experimental.theories.DataPoints;
10 | import org.junit.experimental.theories.FromDataPoints;
11 | import org.junit.experimental.theories.Theories;
12 | import org.junit.experimental.theories.Theory;
13 | import org.junit.runner.RunWith;
14 | 
15 | @RunWith(Theories.class)
16 | public class LongParserTest {
17 |     
18 |     private static final int randomSize = 1000; 
19 |     
20 |     private static final Random rnd = new Random();
21 |     
22 |     @DataPoints("validLongs")
23 |     public static String[] randomLongs() {
24 |         return rnd.ints(randomSize).mapToObj(i -> ""+i).toArray(String[]::new);
25 |     }
26 |     
27 |     @DataPoints("validLongs")
28 |     public static String[] specialLongs() {
29 |         return new String[] {"0", "-0", "+0", "+1", Long.MAX_VALUE+"", Long.MIN_VALUE+""};
30 |     }
31 |     
32 |     @DataPoints("failingLongs") 
33 |     public static String[] specialFailingLongs() {
34 |         return new String[] {"X0", "-", "+", Double.MAX_VALUE+"", "", "Hello"};
35 |     }
36 |     
37 |     private QuickLongParser parser = new QuickLongParser();
38 |     
39 |     @Theory
40 |     public void parsersAreEquivalentOnValidLongs(@FromDataPoints("validLongs") String intValue) {
41 |         compareParsingResult(intValue, s -> Long.parseLong(s), s -> parser.parse(s));
42 |     }
43 |     
44 |     @Theory
45 |     public void parsersAreEquivalentOnFailingLongs(@FromDataPoints("failingLongs") String intValue) {
46 |         compareParsingResult(intValue, s -> Long.parseLong(s), s -> parser.parse(s));
47 |     }
48 |     
49 |     private void compareParsingResult(String value, Function<String, Long> p1, Function<String, Long> p2) {
50 |         Object v1 = null;
51 |         try {
52 |             v1 = p1.apply(value);
53 |         } catch (Exception e) {
54 |             v1 = e;
55 |         }
56 |         Object v2 = null;
57 |         try {
58 |             v2 = p2.apply(value);
59 |         } catch (Exception e) {
60 |             v2 = e;
61 |         }
62 |         assertEquals("Value 2:"+v2+", value 1: "+v1+", source"+value+"; "+Arrays.toString(value.getBytes()), v2.getClass(), v1.getClass());
63 |         
64 |         if (v2 instanceof Long) {
65 |             assertEquals(v2, v1);
66 |         }
67 |     }
68 |     
69 |     public static void main(String[] args) {
70 |         byte[] x = new byte[] {-39, -94};
71 |         long l = Long.parseLong(new String(x));
72 |         System.out.println(l);
73 |     }
74 | }


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/integration/CorrectnessTest.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.integration;
 2 | 
 3 | import static org.junit.Assert.*;
 4 | 
 5 | import java.io.File;
 6 | import java.io.IOException;
 7 | import java.util.List;
 8 | import java.util.stream.Stream;
 9 | 
10 | import org.junit.Test;
11 | 
12 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
13 | import uk.elementarysoftware.quickcsv.api.StandardMappers;
14 | 
15 | public class CorrectnessTest {
16 |     
17 |     File input = new File("src/test/resources/correctness.txt");
18 |     
19 |     @Test
20 |     @SuppressWarnings("unchecked")
21 |     public void testParse() throws IOException {
22 |         Stream<List<String>> stream = CSVParserBuilder.aParser(StandardMappers.TO_STRING_LIST).build().parse(input);
23 |         List<String>[] rows = stream.toArray(List[]::new);
24 |         assertArrayEquals(new String[] {"Year", "Make", "Model", "Description", "Price"}, rows[0].toArray(new String[0]));
25 |         assertArrayEquals(new String[] {"1997", "Ford", "E350", "ac, abs, moon", "3000.00"}, rows[1].toArray(new String[0]));
26 |         assertArrayEquals(new String[] {"1999", "Chevy", "Venture \"Extended Edition\"", "", "4900.00"}, rows[2].toArray(new String[0]));
27 |         String separ = System.getProperty("line.separator");
28 |         assertArrayEquals(new String[] {"1996", "Jeep", "Grand Cherokee", "MUST SELL!"+separ+"air, moon roof, loaded", "4799.00"}, rows[3].toArray(new String[0]));
29 |         assertArrayEquals(new String[] {"1999", "Chevy", "Venture \"Extended Edition, Very Large\"", "", "5000.00"}, rows[4].toArray(new String[0]));
30 |         assertArrayEquals(new String[] {"", "", "Venture \"Extended Edition\"", "", "4900.00" }, rows[5].toArray(new String[0]));
31 |     }
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/integration/HttpStreamTest.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.integration;
 2 | 
 3 | import static org.junit.Assert.assertEquals;
 4 | 
 5 | import java.io.File;
 6 | import java.net.URI;
 7 | import java.util.stream.Stream;
 8 | 
 9 | import org.apache.commons.io.FileUtils;
10 | import org.apache.http.client.methods.CloseableHttpResponse;
11 | import org.apache.http.client.methods.HttpGet;
12 | import org.apache.http.impl.client.CloseableHttpClient;
13 | import org.apache.http.impl.client.HttpClients;
14 | import org.eclipse.jetty.server.Handler;
15 | import org.eclipse.jetty.server.Server;
16 | import org.eclipse.jetty.server.handler.DefaultHandler;
17 | import org.eclipse.jetty.server.handler.HandlerList;
18 | import org.eclipse.jetty.server.handler.ResourceHandler;
19 | import org.junit.Rule;
20 | import org.junit.Test;
21 | import org.junit.rules.ExternalResource;
22 | 
23 | import uk.elementarysoftware.quickcsv.api.CSVParser;
24 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
25 | import uk.elementarysoftware.quickcsv.sampledomain.City;
26 | 
27 | public class HttpStreamTest {
28 | 	
29 | 	@Rule
30 | 	public final FileServer httpServer = new FileServer();
31 | 	
32 | 	private final File testFile = IntegrationTest.inputUnix;
33 | 	private final CSVParser<City> parser = CSVParserBuilder.aParser(City.MAPPER).build();
34 | 	
35 | 	@Test
36 | 	public void testParseHttpResource() throws Exception {
37 | 		CloseableHttpClient httpclient = HttpClients.createDefault();
38 | 		HttpGet httpGet = new HttpGet(httpServer.getURI().resolve(testFile.getName()));
39 | 		CloseableHttpResponse response = httpclient.execute(httpGet);
40 | 		
41 | 		try(Stream<City> stream = parser.parse(response.getEntity().getContent())) {
42 | 			assertEquals(FileUtils.readLines(testFile, "UTF-8").size(), stream.count());
43 | 		}
44 | 	}
45 | 
46 | 	static class FileServer extends ExternalResource {
47 | 		
48 | 		private Server server;
49 | 
50 | 		@Override
51 | 		protected void before() throws Throwable {
52 | 			server = new Server(0);
53 | 
54 | 			ResourceHandler rh = new ResourceHandler();
55 | 	        rh.setResourceBase("src/test/resources");
56 | 	        
57 | 	        HandlerList handlers = new HandlerList();
58 | 	        handlers.setHandlers(new Handler[] { rh, new DefaultHandler() });
59 | 	        server.setHandler(handlers);
60 | 	        
61 | 	        server.start();
62 | 		}
63 | 		
64 | 
65 | 		@Override
66 | 		protected void after() {
67 | 			try {
68 | 				server.stop();
69 | 			} catch (Exception e) {
70 | 				//no-op
71 | 			}
72 | 		}
73 | 		
74 | 		public URI getURI() {
75 | 			return server.getURI();
76 | 		}	         
77 | 	}
78 | }
79 | 


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/integration/IntegrationTest.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.integration;
 2 | 
 3 | import static org.junit.Assert.*;
 4 | 
 5 | import java.io.File;
 6 | import java.util.stream.Stream;
 7 | 
 8 | import org.junit.Test;
 9 | 
10 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
11 | import uk.elementarysoftware.quickcsv.parser.simple.StraightForwardParser;
12 | import uk.elementarysoftware.quickcsv.sampledomain.City;
13 | 
14 | public class IntegrationTest {
15 |     
16 |     static final File inputDos = new File("src/test/resources/cities-dos.txt");
17 |     static final File inputUnix = new File("src/test/resources/cities-unix.txt");
18 |     
19 |     static final int[] bufferSizesToTest = new int[] {1024, 11_111, 1_000_000};
20 |     
21 |     
22 |     @Test
23 |     public void testMultiThreaded() throws Exception {
24 |         Stream<City> s1 = new StraightForwardParser().parse(inputDos).map(City.MAPPER);
25 |         Object[] expected = s1.toArray();
26 |         for (int i = 0; i < bufferSizesToTest.length; i++) {
27 |             Stream<City> s2 = CSVParserBuilder.aParser(City.MAPPER).usingBufferSize(bufferSizesToTest[i]).build().parse(inputDos);
28 |             assertArrayEquals(expected, s2.toArray());
29 |         }
30 |     }
31 |     
32 |     @Test
33 |     public void testSingleThreaded() throws Exception {
34 |         Stream<City> s1 = new StraightForwardParser().parse(inputDos).map(City.MAPPER);
35 |         Stream<City> s2 = CSVParserBuilder.aParser(City.MAPPER).build().parse(inputDos).sequential();
36 |         assertArrayEquals(s1.toArray(), s2.sequential().toArray());
37 |     }
38 |     
39 |     @Test
40 |     public void testDosVsUnix() throws Exception {
41 |         Stream<City> s1 = CSVParserBuilder.aParser(City.MAPPER).build().parse(inputUnix);
42 |         Stream<City> s2 = CSVParserBuilder.aParser(City.MAPPER).build().parse(inputDos);
43 |         assertArrayEquals(s1.toArray(), s2.sequential().toArray());
44 |     }
45 |     
46 | }


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/manual/CityManualPerformanceTester.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.manual;
 2 | 
 3 | import java.io.File;
 4 | import java.io.IOException;
 5 | import java.io.InputStream;
 6 | 
 7 | import org.apache.commons.io.FileUtils;
 8 | import org.apache.commons.io.IOUtils;
 9 | 
10 | import uk.elementarysoftware.quickcsv.api.CSVParser;
11 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
12 | import uk.elementarysoftware.quickcsv.sampledomain.City;
13 | 
14 | 
15 | public class CityManualPerformanceTester {
16 |     long maxSpeed = 0;
17 |     
18 |     public void run() throws Exception {
19 |         File file = prepareFile(300);
20 |         try {
21 |             System.out.println("Running file of size "+(file.length() / 1024 / 1024)+ "MB");
22 |             run(file, 30);
23 |         } finally {
24 |             file.delete();
25 |         } 
26 |     }
27 |     
28 |     private void run(File source, int nRuns) throws Exception {
29 |         CSVParser<City> parser = CSVParserBuilder.aParser(City.MAPPER).build();
30 |         //CSVParser<City> parser = CSVParserBuilder.aParser(City.HeaderAwareMapper.MAPPER, City.HeaderAwareMapper.Fields.class).usingExplicitHeader("Country", "City", "AccentCity", "Region", "Population", "Latitude", "Longitude").build();//TODO add that example to docs
31 |         
32 |         for (int i = 0; i < nRuns; i++) {
33 |             runOnce(parser, source);
34 |         }
35 |     }
36 | 
37 |     private void runOnce(CSVParser<City> parser, File source) throws IOException {
38 |         long start = System.currentTimeMillis();
39 |         parser.parse(source).count();
40 |         long duration = System.currentTimeMillis() - start;
41 |         if (duration == 0) return;
42 |         System.out.println("P2 parsed " +source.getName()+" in "+duration);
43 |         long speed = source.length()/1024/duration;
44 |         if (speed > maxSpeed) maxSpeed = speed;  
45 |         System.out.println("P2 speed: "+(source.length()/1024/duration)+" MB/s, max: "+maxSpeed);
46 |         
47 |     }
48 | 
49 |     private File prepareFile(int sizeMultiplier) throws Exception {
50 |         InputStream is = getClass().getResourceAsStream("/cities-unix.txt");
51 |         byte[] content = IOUtils.toByteArray(is);
52 |         File result = File.createTempFile("csv", "large");
53 |         for (int i = 0; i < sizeMultiplier; i++) {
54 |             FileUtils.writeByteArrayToFile(result, content, true);
55 |         }
56 |         return result;
57 |     }
58 |     
59 |     public static void main(String[] args) throws Exception {
60 |         new CityManualPerformanceTester().run(); 
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/parser/ByteSliceTest.java:
--------------------------------------------------------------------------------
  1 | package uk.elementarysoftware.quickcsv.parser;
  2 | 
  3 | import static org.junit.Assert.assertArrayEquals;
  4 | import static org.junit.Assert.assertEquals;
  5 | import static org.junit.Assert.assertNull;
  6 | import static org.junit.Assert.assertTrue;
  7 | 
  8 | import java.nio.charset.Charset;
  9 | import java.util.ArrayList;
 10 | import java.util.List;
 11 | 
 12 | import org.junit.Test;
 13 | 
 14 | import uk.elementarysoftware.quickcsv.api.ByteArraySource.ByteArrayChunk;
 15 | import uk.elementarysoftware.quickcsv.api.Field;
 16 | import uk.elementarysoftware.quickcsv.functional.Pair;
 17 | 
 18 | public class ByteSliceTest {
 19 |     
 20 |     private static final String FIELDS22 = "field11,field12\nfield21,field22";
 21 |     private static final String FIELDS33 = "field11,field12,field13\nfield21,field22,field23\nfield31,field32,field33";
 22 |     private static final String QUOTED = "'field11','field12'\n'field21','field22'\n";
 23 |     
 24 |     @Test
 25 |     public void testSplitOnLastLineEnd() {
 26 |         String content = "line1\nline2\nlastline";
 27 |         ByteSlice slice = sliceFor(content.getBytes());
 28 |         assertEquals(content, slice.toString());
 29 |         Pair<ByteSlice, ByteSlice> sliced = slice.splitOnLastLineEnd();
 30 |         assertEquals("line1\nline2\n", sliced.first.toString());
 31 |         assertEquals("lastline", sliced.second.toString());
 32 |     }
 33 |     
 34 |     
 35 |     @Test
 36 |     public void testSplitOnLastLineEndWithSkip() {
 37 |         String content = "line1\nline2\nlastline";
 38 |         ByteSlice slice = sliceFor(content.getBytes());
 39 |         slice.nextLine();
 40 |         Pair<ByteSlice, ByteSlice> sliced = slice.splitOnLastLineEnd();
 41 |         assertEquals("line2\n", sliced.first.toString());
 42 |         assertEquals("lastline", sliced.second.toString());
 43 |     }
 44 |     
 45 |     @Test
 46 |     public void testSingleSlice() {
 47 |         ByteSlice slice = sliceFor(FIELDS22.getBytes());
 48 |         assertEquals("field11,field12", slice.currentLine());
 49 |         List<Field> fields = getFields(slice);
 50 |         assertArrayEquals(new String[] {"field11","field12","field21","field22"}, fields.stream().map(f -> f.asString()).toArray());
 51 |     }
 52 |     
 53 |     @Test
 54 |     public void testSingleSliceFieldSplitWithQuote() {
 55 |         ByteSlice slice = sliceFor("f1,\"f2,f2\",f3,\"f\"\"4\"".getBytes());
 56 |         assertEquals("f1", slice.nextField(',', '"').asString());
 57 |         assertEquals("f2,f2", slice.nextField(',', '"').asString());
 58 |         assertEquals("f3", slice.nextField(',', '"').asString());
 59 |         assertEquals("f\"4", slice.nextField(',', '"').asString());
 60 |     }
 61 |     
 62 |     @Test
 63 |     public void testMultiSliceQuoteSplit() {
 64 |         String content = "f1,\"f2,f2\",f3,\"f\"\"4\"";
 65 |         for (int splitIndex = 0; splitIndex < content.length(); splitIndex++) {
 66 |             String prefix = content.substring(0, splitIndex);
 67 |             String suffix = content.substring(splitIndex);
 68 |             ByteSlice join = ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes()));
 69 |             assertEquals(content, join.toString());
 70 |             List<Field> fields = getFieldsQuoted(join);
 71 |             assertArrayEquals(
 72 |                     "Failed on split index "+splitIndex,
 73 |                     new String[] {"f1","f2,f2","f3","f\"4"}, 
 74 |                     fields.stream().map(f -> f.asString()).toArray());
 75 |         }
 76 |     }
 77 |     
 78 |     @Test
 79 |     public void testEmptyFieldHandling() {
 80 |         ByteSlice slice = sliceFor("f1,,f2".getBytes());
 81 |         assertEquals("f1", slice.nextField(',', '"').asString());
 82 |         assertEquals("", slice.nextField(',', '"').asString());
 83 |         assertEquals("f2", slice.nextField(',', '"').asString());
 84 |         assertNull(slice.nextField(',', '"'));
 85 |     }
 86 |     
 87 |     @Test
 88 |     public void testSkipSlice() {
 89 |         ByteSlice slice = sliceFor(FIELDS22.getBytes());
 90 |         slice.skipUntil(',');
 91 |         assertEquals("field12", slice.nextField(',').asString());
 92 |     }
 93 |     
 94 |     @Test
 95 |     public void testSkipSliceQuoted() {
 96 |         ByteSlice slice = sliceFor("f1,\"f2,f2\",f3".getBytes());
 97 |         slice.skipUntil(',', '"');
 98 |         slice.skipUntil(',', '"');
 99 |         assertEquals("f3", slice.nextField(',', '"').asString());
100 |     }
101 |     
102 |     
103 |     @Test
104 |     public void testMultiSliceIteration() {
105 |         String content = FIELDS22;
106 |         int splitIndex = 3;
107 |         String prefix = content.substring(0, splitIndex);
108 |         String suffix = content.substring(splitIndex);
109 |         CompositeByteSlice slice = (CompositeByteSlice) ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes()));
110 |         byte[] result = new byte[slice.size()];
111 |         for (int i = 0; i < result.length; i++) {
112 |             result[i] = slice.currentByte();
113 |             slice.nextByte();
114 |         }
115 |         assertEquals(FIELDS22, new String(result));
116 |     }
117 |     
118 |     @Test
119 |     public void testMultiSliceFieldSplit() {
120 |         String content = FIELDS33;
121 |         for (int splitIndex = 0; splitIndex < content.length(); splitIndex++) {
122 |             String prefix = content.substring(0, splitIndex);
123 |             String suffix = content.substring(splitIndex);
124 |             ByteSlice join = ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes()));
125 |             assertEquals(content, join.toString());
126 |             List<Field> fields = getFields(join);
127 |             assertArrayEquals(
128 |                     "Failed on split index "+splitIndex,
129 |                     new String[] {"field11","field12","field13","field21","field22","field23","field31","field32","field33"}, 
130 |                     fields.stream().map(f -> f.asString()).toArray());
131 |         }
132 |     }
133 |     
134 |     @Test
135 |     public void testMultiSliceSkip() {
136 |         String content = FIELDS33;
137 |         for (int splitIndex = 0; splitIndex < content.length(); splitIndex++) {
138 |             String prefix = content.substring(0, splitIndex);
139 |             String suffix = content.substring(splitIndex);
140 |             ByteSlice join = ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes()));
141 |             assertTrue(join.skipUntil(','));
142 |             assertEquals("field12", join.nextField(',').asString());
143 |             assertTrue(join.nextLine());
144 |             assertEquals("field21", join.nextField(',').asString());
145 |             assertTrue(join.skipUntil(','));
146 |             assertEquals("field23", join.nextField(',').asString());
147 |         }
148 |     }
149 |     
150 |     @Test
151 |     public void testMultiSliceFieldSplitQuoted() {
152 |         String content = QUOTED;
153 |         for (int splitIndex = 0; splitIndex < content.length(); splitIndex++) {
154 |             String prefix = content.substring(0, splitIndex);
155 |             String suffix = content.substring(splitIndex);
156 |             ByteSlice join = ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes()));
157 |             assertEquals(content, join.toString());
158 |             List<Field> fields = getFieldsQuoted(join, '\'');
159 |             assertArrayEquals(
160 |                     "Failed on split index "+splitIndex,
161 |                     new String[] {"field11","field12","field21","field22"}, 
162 |                     fields.stream().map(f -> f.asString()).toArray());
163 |         }
164 |     }
165 |     
166 |     private ByteSlice sliceFor(byte[] bytes) {
167 |         return ByteSlice.wrap(new ByteArrayChunk(bytes, bytes.length, false, (b) -> {}), Charset.defaultCharset());
168 |     }
169 | 
170 |     private List<Field> getFields(ByteSlice bs) {
171 |         List<Field> result = new ArrayList<>();
172 |         while(true) {
173 |             ByteArrayField f = bs.nextField(',');
174 |             if (f == null) {
175 |                 if (!bs.nextLine()) break;
176 |             } else {
177 |                 result.add(f.clone());
178 |             }
179 |         }
180 |         return result;
181 |     }
182 |     
183 |     private List<Field> getFieldsQuoted(ByteSlice bs, char quote) {
184 |         List<Field> result = new ArrayList<>();
185 |         while(true) {
186 |             ByteArrayField f = bs.nextField(',', quote);
187 |             if (f == null) {
188 |                 if (!bs.nextLine()) break;
189 |             } else {
190 |                 result.add(f.clone());
191 |             }
192 |         }
193 |         return result;
194 |     }
195 |     
196 |     private List<Field> getFieldsQuoted(ByteSlice bs) {
197 |         return getFieldsQuoted(bs, '"');
198 |     }
199 | }
200 | 


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/parser/CharsetHandlingTest.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.parser;
 2 | 
 3 | import static org.junit.Assert.*;
 4 | 
 5 | import java.io.File;
 6 | import java.util.function.Function;
 7 | import java.util.stream.Stream;
 8 | 
 9 | import org.junit.Test;
10 | 
11 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
12 | import uk.elementarysoftware.quickcsv.api.CSVRecordWithHeader;
13 | import uk.elementarysoftware.quickcsv.sampledomain.City;
14 | 
15 | public class CharsetHandlingTest {
16 |     
17 |     File utf8input = new File("src/test/resources/cities-rus-utf8.txt");
18 |     File cp1251input = new File("src/test/resources/cities-rus-cp1251.txt");
19 |     
20 |     String[] expected = new String[] {
21 |             "City [city=Андора, population=0, latitude=42.5, longitude=1.5166667]",
22 |             "City [city=City of London, population=0, latitude=51.514125, longitude=-0.093689]",
23 |             "City [city=Харків, population=0, latitude=49.980814, longitude=36.252718]" 
24 |     };
25 |     
26 |     @Test
27 |     public void testUtf8() throws Exception {
28 |         Stream<City> cities = CSVParserBuilder.aParser(EnumMapper.MAPPER, EnumMapper.RusFields.class) 
29 |                 .usingCharset("UTF-8").build().parse(utf8input);
30 |         String[] actual = cities.map(c -> c.toString()).toArray(String[]::new);
31 |         assertArrayEquals(expected, actual);
32 |     }
33 |     
34 |     @Test
35 |     public void testCp1251() throws Exception {
36 |         Stream<City> cities = CSVParserBuilder.aParser(EnumMapper.MAPPER, EnumMapper.RusFields.class)
37 |                 .usingCharset("Cp1251").build().parse(cp1251input);
38 |         String[] actual = cities.map(c -> c.toString()).toArray(String[]::new);
39 |         assertArrayEquals(expected, actual);
40 |     }
41 |     
42 |     public static class EnumMapper { 
43 |         
44 |         enum RusFields {
45 |             Latitude("Широта"),
46 |             Longitude("Долгота"),
47 |             AccentCity("Город"),
48 |             Population("Население");
49 |             
50 |             private final String headerFieldName;
51 | 
52 |             private RusFields(String headerFieldName) {
53 |                 this.headerFieldName = headerFieldName;
54 |             }
55 |             
56 |             @Override
57 |             public String toString() {
58 |                 return headerFieldName;
59 |             }
60 |         }
61 |         
62 |         public static final Function<CSVRecordWithHeader<RusFields>, City> MAPPER = r -> {
63 |             return new City(
64 |                     r.getField(RusFields.AccentCity).asString(),
65 |                     r.getField(RusFields.Population).asInt(),
66 |                     r.getField(RusFields.Latitude).asDouble(),
67 |                     r.getField(RusFields.Longitude).asDouble(),
68 |                     r.getField(RusFields.Population).asLong()
69 |             );
70 |         };
71 |     }
72 | }


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/parser/FieldSubsetViewTest.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.parser;
 2 | 
 3 | import static org.junit.Assert.*;
 4 | 
 5 | import org.junit.Before;
 6 | import org.junit.Test;
 7 | 
 8 | public class FieldSubsetViewTest {
 9 |     
10 |     enum FieldSubset {
11 |         C3, C4, C1
12 |     }
13 | 
14 |     private FieldSubsetView<FieldSubset> fs;
15 |     
16 |     @Before
17 |     public void init() {
18 |         this.fs = FieldSubsetView.forExplicitHeader(FieldSubset.class, "C1", "C2", "C3", "C4", "C5");
19 |         fs.onSlice(null, null);
20 |     }
21 |     
22 |     @Test
23 |     public void testFieldIndexIsSortedAndCorrect() {
24 |         assertArrayEquals(new int[] {0, 2, 3}, fs.getFieldIndexes());
25 |     }
26 |     
27 |     @Test
28 |     public void testIndexOfInSourceView() {
29 |         assertEquals(1, fs.indexOfInSourceView(0));
30 |         assertEquals(2, fs.indexOfInSourceView(1));
31 |         assertEquals(0, fs.indexOfInSourceView(2));
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/parser/TestParsingSpecialCases.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.parser;
 2 | 
 3 | import static org.junit.Assert.assertArrayEquals;
 4 | 
 5 | import java.io.ByteArrayInputStream;
 6 | import java.io.InputStream;
 7 | import java.util.List;
 8 | import java.util.stream.Collectors;
 9 | 
10 | import org.junit.Test;
11 | 
12 | import uk.elementarysoftware.quickcsv.api.CSVParser;
13 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
14 | 
15 | public class TestParsingSpecialCases {
16 | 	
17 | 	CSVParser<String[]> parser = 
18 | 			CSVParserBuilder.aParser(r -> new String[] {
19 | 					r.getField(Fields.A).asString(), 
20 | 					r.getField(Fields.B).asString(), 
21 | 					r.getField(Fields.C).asString()
22 | 			},  Fields.class).build();
23 | 	
24 | 	@Test
25 | 	public void testLineEndsWithEmptyField() {
26 | 		InputStream csv = new ByteArrayInputStream("A,B,C\na,,".getBytes());
27 | 		List<String[]> result = parser.parse(csv).collect(Collectors.toList());
28 | 		assertArrayEquals(new String[] {"a", "", ""}, result.get(0));
29 | 	}
30 | 	
31 | 	@Test
32 | 	public void testLineEndsWithEmptyFieldQuoted() {
33 | 		InputStream csv = new ByteArrayInputStream("\"A\",\"B\",\"C\"\n\"a\",\"\",\"\"".getBytes());
34 | 		List<String[]> result = parser.parse(csv).collect(Collectors.toList());
35 | 		assertArrayEquals(new String[] {"a", "", ""}, result.get(0));
36 | 	}
37 | 	
38 | 	static enum Fields {
39 | 		A, B, C;
40 | 	}
41 | }
42 | 


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/parser/TestParsingWithHeader.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.parser;
 2 | 
 3 | import static org.junit.Assert.*;
 4 | 
 5 | import java.io.File;
 6 | import java.io.IOException;
 7 | import java.util.List;
 8 | import java.util.function.Function;
 9 | import java.util.stream.Collectors;
10 | import java.util.stream.Stream;
11 | 
12 | import org.junit.Test;
13 | 
14 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
15 | import uk.elementarysoftware.quickcsv.api.StandardMappers;
16 | import uk.elementarysoftware.quickcsv.sampledomain.City;
17 | 
18 | public class TestParsingWithHeader {
19 | 
20 |     File input = new File("src/test/resources/cities-with-header.txt");
21 | 
22 |     String[] expected = new String[] {
23 |             "City [city=Andorra, population=0, latitude=42.5, longitude=1.5166667]",
24 |             "City [city=City of London, population=0, latitude=51.514125, longitude=-0.093689]",
25 |             "City [city=Kharkiv, population=0, latitude=49.980814, longitude=36.252718]"
26 |     };
27 | 
28 |     @Test
29 |     public void testSequential() throws Exception {
30 |         Stream<City> cities = CSVParserBuilder.aParser(ignoreErrors(City.MAPPER))
31 |                 .build().parse(input).sequential();
32 |         String[] actual = cities.filter(c -> c != null).map(c -> c.toString()).toArray(String[]::new);
33 |         assertArrayEquals(expected, actual);
34 |     }
35 | 
36 |     @Test
37 |     public void testSequentialWithEnumApi() throws Exception {
38 |         Stream<City> cities = CSVParserBuilder.aParser(City.HeaderAwareMapper.MAPPER, City.HeaderAwareMapper.Fields.class)
39 |                 .build().parse(input).sequential();
40 |         String[] actual = cities.map(c -> c.toString()).toArray(String[]::new);
41 |         assertArrayEquals(expected, actual);
42 |     }
43 | 
44 |     @Test
45 |     public void testSequentialWithEnumApiWithFirstColumn() throws Exception {
46 |         Stream<City> cities = CSVParserBuilder.aParser(City.HeaderAwareMapper2.MAPPER, City.HeaderAwareMapper2.Fields.class)
47 |                 .build().parse(input).sequential();
48 |         String[] actual = cities.map(c -> c.toString()).toArray(String[]::new);
49 |         assertEquals(3, actual.length);
50 |     }
51 | 
52 |     @Test
53 |     public void testParallel() throws Exception {
54 |         Stream<City> cities = CSVParserBuilder.aParser(ignoreErrors(City.MAPPER))
55 |                 .build().parse(input).parallel();
56 |         String[] actual = cities.filter(c -> c != null).map(c -> c.toString()).toArray(String[]::new);
57 |         assertArrayEquals(expected, actual);
58 |     }
59 | 
60 |     @Test
61 |     /**
62 |      * Checks that we can skip records on parallel stream. That verifies that the stream is ordered by
63 |      * default and behaves normally when being copied by java's skipping stream decorator.
64 |      */
65 |     public void testParallelParseWithSkip() throws IOException {
66 |         List<List<String>> result = CSVParserBuilder.aParser(StandardMappers.TO_STRING_LIST).build()
67 |                 .parse(input).skip(1).collect(Collectors.toList());
68 |         assertEquals(3, result.size());
69 |         assertArrayEquals(new String[] {"ad","andorra","Andorra","07","","42.5","1.5166667"}, result.get(0).toArray(new String[0]));
70 |     }
71 | 
72 |     private static <T,S> Function<T, S> ignoreErrors(Function<T, S> f) {
73 |         return t -> {
74 |           try {
75 |               return f.apply(t);
76 |           } catch (Exception e) {
77 |               return null;
78 |           }
79 |         };
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/parser/TestParsingWithHeaderQuoted.java:
--------------------------------------------------------------------------------
 1 | package uk.elementarysoftware.quickcsv.parser;
 2 | 
 3 | import static org.junit.Assert.*;
 4 | 
 5 | import java.io.File;
 6 | import java.util.stream.Stream;
 7 | 
 8 | import org.junit.Test;
 9 | 
10 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
11 | import uk.elementarysoftware.quickcsv.sampledomain.City;
12 | 
13 | public class TestParsingWithHeaderQuoted {
14 | 
15 |     File input = new File("src/test/resources/cities-with-header-quoted.txt");
16 | 
17 |     String[] expected = new String[] {
18 |             "City [city=Andorra, population=0, latitude=42.5, longitude=1.5166667]",
19 |             "City [city=City of London, population=0, latitude=51.514125, longitude=-0.093689]",
20 |             "City [city=Kharkiv, population=0, latitude=49.980814, longitude=36.252718]"
21 |     };
22 | 
23 | 
24 |     @Test
25 |     public void testSequentialWithEnumApi() throws Exception {
26 |         Stream<City> cities = CSVParserBuilder.aParser(City.HeaderAwareMapper.MAPPER, City.HeaderAwareMapper.Fields.class)
27 |                 .usingSeparatorWithQuote(',', '"')
28 |                 .build().parse(input).sequential();
29 |         String[] actual = cities.map(c -> c.toString()).toArray(String[]::new);
30 |         assertArrayEquals(expected, actual);
31 |     }
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/parser/simple/StraightForwardParser.java:
--------------------------------------------------------------------------------
  1 | package uk.elementarysoftware.quickcsv.parser.simple;
  2 | 
  3 | import java.io.File;
  4 | import java.io.IOException;
  5 | import java.io.InputStream;
  6 | import java.nio.ByteBuffer;
  7 | import java.nio.file.Files;
  8 | import java.util.function.Function;
  9 | import java.util.stream.Stream;
 10 | 
 11 | import uk.elementarysoftware.quickcsv.api.ByteArraySource;
 12 | import uk.elementarysoftware.quickcsv.api.CSVParser;
 13 | import uk.elementarysoftware.quickcsv.api.CSVRecord;
 14 | import uk.elementarysoftware.quickcsv.api.Field;
 15 | 
 16 | public class StraightForwardParser implements CSVParser<CSVRecord> {
 17 |     
 18 |     @Override @SuppressWarnings("resource")
 19 |     public Stream<CSVRecord> parse(File source) throws IOException {
 20 |         Stream<String> lines = Files.lines(source.toPath());
 21 |         return lines.map(l -> l.split(",")).map(toCSVRecord());
 22 |     }
 23 |     
 24 |     private Function<String[], CSVRecord> toCSVRecord() {
 25 |         return new Function<String[], CSVRecord>() {
 26 |             @Override
 27 |             public CSVRecord apply(String[] fields) {
 28 |                 return new SimpleCSVRecord(fields);
 29 |             }
 30 |         };
 31 |     }
 32 | 
 33 |     public static class SimpleCSVRecord implements CSVRecord {
 34 | 
 35 |         private String[] fields;
 36 |         private int index;
 37 | 
 38 |         public SimpleCSVRecord(String[] fields) {
 39 |             this.index = 0;
 40 |             this.fields = fields;
 41 |         }
 42 | 
 43 |         @Override
 44 |         public void skipField() {
 45 |             index++;
 46 |         }
 47 | 
 48 |         @Override
 49 |         public void skipFields(int nFields) {
 50 |             index+=nFields;
 51 |         }
 52 | 
 53 |         @Override
 54 |         public Field getNextField() {
 55 |             return new SimpleField(fields[index++]);
 56 |         }
 57 | 
 58 |     }
 59 |     
 60 |     public static class SimpleField implements Field {
 61 |         String value;
 62 |         
 63 |         public SimpleField(String value) {
 64 |             this.value = value;
 65 |         }
 66 | 
 67 |         @Override
 68 |         public ByteBuffer raw() {
 69 |             return null;
 70 |         }
 71 | 
 72 |         @Override
 73 |         public String asString() {
 74 |             return value;
 75 |         }
 76 | 
 77 |         @Override
 78 |         public double asDouble() {
 79 |             return Double.parseDouble(value);
 80 |         }
 81 | 
 82 |         @Override
 83 |         public byte asByte() {
 84 |             return 0;
 85 |         }
 86 | 
 87 |         @Override
 88 |         public char asChar() {
 89 |             return 0;
 90 |         }
 91 | 
 92 |         @Override
 93 |         public short asShort() {
 94 |             return 0;
 95 |         }
 96 | 
 97 |         @Override
 98 |         public int asInt() {
 99 |             if (isEmpty()) return 0;
100 |             return Integer.parseInt(value);
101 |         }
102 | 
103 |         @Override
104 |         public long asLong() {
105 |             return 0;
106 |         }
107 |         
108 |         @Override
109 |         public Field clone() {
110 |             return this;
111 |         }
112 | 
113 |         @Override
114 |         public boolean isEmpty() {
115 |             return value.length() == 0;
116 |         }
117 | 
118 | 		@Override
119 | 		public Double asBoxedDouble() {
120 | 			return asDouble();
121 | 		}
122 | 
123 | 		@Override
124 | 		public Integer asBoxedInt() {
125 | 			return asInt();
126 | 		}
127 |     }
128 | 
129 |     @Override
130 |     public Stream<CSVRecord> parse(InputStream is) {
131 |         throw new UnsupportedOperationException();
132 |     }
133 | 
134 |     @Override
135 |     public Stream<CSVRecord> parse(ByteArraySource bas) {
136 |         throw new UnsupportedOperationException();
137 |     }
138 | }


--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/sampledomain/City.java:
--------------------------------------------------------------------------------
  1 | package uk.elementarysoftware.quickcsv.sampledomain;
  2 | 
  3 | import java.util.function.Function;
  4 | 
  5 | import uk.elementarysoftware.quickcsv.api.CSVRecord;
  6 | import uk.elementarysoftware.quickcsv.api.CSVRecordWithHeader;
  7 | import uk.elementarysoftware.quickcsv.api.Field;
  8 | 
  9 | public class City {
 10 |     
 11 |     public static final Function<CSVRecord, City> MAPPER = City::new;
 12 |     
 13 |     public static class HeaderAwareMapper {
 14 |         
 15 |         public static enum Fields {
 16 |             AccentCity,
 17 |             Latitude,
 18 |             Longitude,
 19 |             Population
 20 |         }
 21 |         
 22 |         public static final Function<CSVRecordWithHeader<Fields>, City> MAPPER = r -> {
 23 |             return new City(
 24 |                 r.getField(Fields.AccentCity).asString(),
 25 |                 r.getField(Fields.Population).asInt(),
 26 |                 r.getField(Fields.Latitude).asDouble(),
 27 |                 r.getField(Fields.Longitude).asDouble(),
 28 |                 r.getField(Fields.Population).asLong()
 29 |             );
 30 |         };
 31 |     }
 32 |     
 33 |     public static class HeaderAwareMapper2 {
 34 |         public static enum Fields {
 35 |             AccentCity, Population, Latitude, Longitude, Country, City
 36 |         }
 37 |         
 38 |         public static final Function<CSVRecordWithHeader<Fields>, City> MAPPER = r -> {
 39 |             return new City(
 40 |                 r.getField(Fields.City).asString(),
 41 |                 r.getField(Fields.Population).asInt(),
 42 |                 r.getField(Fields.Latitude).asDouble(),
 43 |                 r.getField(Fields.Longitude).asDouble(),
 44 |                 r.getField(Fields.Population).asLong()
 45 |             );
 46 |         };
 47 |     }
 48 |     
 49 |     private static final int CITY_INDEX = 2;
 50 |     
 51 |     private final String city;
 52 |     private final int population; 
 53 |     private final double latitude;
 54 |     private final double longitude;
 55 |     private final long populationL;
 56 |     
 57 |     public City(CSVRecord r) {
 58 |         r.skipFields(CITY_INDEX);
 59 |         this.city  = r.getNextField().asString();
 60 |         r.skipField();
 61 |         Field popField = r.getNextField();
 62 |         this.population = popField.asInt();
 63 |         this.populationL = popField.asLong();
 64 |         this.latitude = r.getNextField().asDouble();
 65 |         this.longitude = r.getNextField().asDouble();
 66 |     }
 67 |     
 68 |     public City(String city, int population, double latitude, double longitude, long populationL) {
 69 |         this.city = city;
 70 |         this.population = population;
 71 |         this.latitude = latitude;
 72 |         this.longitude = longitude;
 73 |         this.populationL = populationL;
 74 |     }
 75 | 
 76 |     public String getCity() {
 77 |         return city;
 78 |     }
 79 |     
 80 |     public int getPopulation() {
 81 |         return population;
 82 |     }
 83 |     
 84 |     public double getLatitude() {
 85 |         return latitude;
 86 |     }
 87 |     
 88 |     public double getLongitude() {
 89 |         return longitude;
 90 |     }
 91 |     
 92 |     public long getPopulationL() {
 93 |         return populationL;
 94 |     }
 95 | 
 96 |     @Override
 97 |     public int hashCode() {
 98 |         final int prime = 31;
 99 |         int result = 1;
100 |         result = prime * result + ((city == null) ? 0 : city.hashCode());
101 |         long temp;
102 |         temp = Double.doubleToLongBits(latitude);
103 |         result = prime * result + (int) (temp ^ (temp >>> 32));
104 |         temp = Double.doubleToLongBits(longitude);
105 |         result = prime * result + (int) (temp ^ (temp >>> 32));
106 |         result = prime * result + population;
107 |         return result;
108 |     }
109 | 
110 |     @Override
111 |     public boolean equals(Object obj) {
112 |         if (this == obj)
113 |             return true;
114 |         if (obj == null)
115 |             return false;
116 |         if (getClass() != obj.getClass())
117 |             return false;
118 |         City other = (City) obj;
119 |         if (city == null) {
120 |             if (other.city != null)
121 |                 return false;
122 |         } else if (!city.equals(other.city))
123 |             return false;
124 |         if (Double.doubleToLongBits(latitude) != Double.doubleToLongBits(other.latitude))
125 |             return false;
126 |         if (Double.doubleToLongBits(longitude) != Double.doubleToLongBits(other.longitude))
127 |             return false;
128 |         if (population != other.population)
129 |             return false;
130 |         return true;
131 |     }
132 | 
133 |     @Override
134 |     public String toString() {
135 |         return "City [city=" + city + ", population=" + population + ", latitude=" + latitude + ", longitude=" + longitude + "]";
136 |     }
137 |     
138 | }
139 | 


--------------------------------------------------------------------------------
/src/test/resources/cities-rus-cp1251.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/titorenko/quick-csv-streamer/cc11f6e9db6df4f3aac57ca72c4176501667f41d/src/test/resources/cities-rus-cp1251.txt


--------------------------------------------------------------------------------
/src/test/resources/cities-rus-utf8.txt:
--------------------------------------------------------------------------------
1 | Страна,Код города,Город,Регион,Население,Широта,Долгота
2 | ad,andorra,Андора,07,,42.5,1.5166667
3 | gb,city of london,City of London,H9,,51.514125,-.093689
4 | ua,kharkiv,Харків,07,,49.980814,36.252718


--------------------------------------------------------------------------------
/src/test/resources/cities-with-header-quoted.txt:
--------------------------------------------------------------------------------
1 | "Country","City","AccentCity","Region","Population","Latitude","Longitude"
2 | "ad","andorra","Andorra","07","","42.5","1.5166667"
3 | "gb","city of london","City of London","H9","","51.514125","-.093689"
4 | "ua","kharkiv","Kharkiv","07","","49.980814","36.252718"


--------------------------------------------------------------------------------
/src/test/resources/cities-with-header.txt:
--------------------------------------------------------------------------------
1 | Country,City,AccentCity,Region,Population,Latitude,Longitude
2 | ad,andorra,Andorra,07,,42.5,1.5166667
3 | gb,city of london,City of London,H9,,51.514125,-.093689
4 | ua,kharkiv,Kharkiv,07,,49.980814,36.252718


--------------------------------------------------------------------------------
/src/test/resources/correctness.txt:
--------------------------------------------------------------------------------
1 | Year,Make,Model,Description,Price
2 | 1997,Ford,E350,"ac, abs, moon",3000.00
3 | 1999,Chevy,"Venture ""Extended Edition""","",4900.00
4 | 1996,Jeep,Grand Cherokee,"MUST SELL!
5 | air, moon roof, loaded",4799.00
6 | 1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
7 | ,,"Venture ""Extended Edition""","",4900.00


--------------------------------------------------------------------------------