├── .gitattributes
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── build.gradle
├── release-notes.md
├── settings.gradle
└── src
├── jmh
└── java
│ └── uk
│ └── elementarysoftware
│ └── quickcsv
│ └── benchmarks
│ ├── BenchmarkParserAndMapperInMemory.java
│ ├── City.java
│ └── OpenCSVParser.java
├── main
└── java
│ └── uk
│ └── elementarysoftware
│ └── quickcsv
│ ├── api
│ ├── ByteArraySource.java
│ ├── CSVParser.java
│ ├── CSVParserBuilder.java
│ ├── CSVRecord.java
│ ├── CSVRecordWithHeader.java
│ ├── Field.java
│ └── StandardMappers.java
│ ├── decoder
│ ├── Decoder.java
│ ├── ParserFactory.java
│ ├── doubles
│ │ ├── DoubleParser.java
│ │ ├── JDKDoubleParserAdapter.java
│ │ └── QuickDoubleParser.java
│ └── ints
│ │ ├── ExceptionHelper.java
│ │ ├── IntParser.java
│ │ ├── LongParser.java
│ │ ├── QuickIntParser.java
│ │ └── QuickLongParser.java
│ ├── functional
│ ├── Pair.java
│ └── PrimitiveFunctions.java
│ ├── ioutils
│ └── IOUtils.java
│ └── parser
│ ├── BufferPool.java
│ ├── ByteArrayField.java
│ ├── ByteSlice.java
│ ├── FieldSubsetView.java
│ ├── InputStreamToByteArraySourceAdapter.java
│ └── QuickCSVParser.java
└── test
├── java
└── uk
│ └── elementarysoftware
│ └── quickcsv
│ ├── decoder
│ ├── doubles
│ │ └── DoubleParserTest.java
│ └── ints
│ │ ├── IntParserTest.java
│ │ └── LongParserTest.java
│ ├── integration
│ ├── CorrectnessTest.java
│ ├── HttpStreamTest.java
│ └── IntegrationTest.java
│ ├── manual
│ └── CityManualPerformanceTester.java
│ ├── parser
│ ├── ByteSliceTest.java
│ ├── CharsetHandlingTest.java
│ ├── FieldSubsetViewTest.java
│ ├── TestParsingSpecialCases.java
│ ├── TestParsingWithHeader.java
│ ├── TestParsingWithHeaderQuoted.java
│ └── simple
│ │ └── StraightForwardParser.java
│ └── sampledomain
│ └── City.java
└── resources
├── cities-dos.txt
├── cities-rus-cp1251.txt
├── cities-rus-utf8.txt
├── cities-unix-quoted.txt
├── cities-unix.txt
├── cities-with-header-quoted.txt
├── cities-with-header.txt
└── correctness.txt
/.gitattributes:
--------------------------------------------------------------------------------
1 | cities-dos.txt text eol=crlf
2 | cities-unix.txt text eol=lf
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /bin/
2 | .classpath
3 | .project
4 | .gradle
5 | .settings
6 | bin
7 | build
8 | private-notes.txt
9 | /keys/
10 | gradle.properties
11 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | jdk:
3 | - oraclejdk8
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 | {description}
294 | Copyright (C) {year} {fullname}
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | {signature of Ty Coon}, 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 |
341 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Quick CSV Streamer
2 | =============
3 |
4 | [](https://travis-ci.org/titorenko/quick-csv-streamer)
5 | [](https://maven-badges.herokuapp.com/maven-central/uk.elementarysoftware/quick-csv-streamer/)
6 | [](http://www.javadoc.io/doc/uk.elementarysoftware/quick-csv-streamer)
7 |
8 | Quick CSV streamer is a high performance CSV parsing library with Java 8 Stream API.
9 | The library operates in "zero-copy" mode and only parses what is required by the client. Amount
10 | of garbage produced is also optimized, reducing pressure on the garbage collector.
11 | Parallel, multi-core parsing is supported transparently via Java Stream API.
12 |
13 | Compared to other open source Java CSV parsing libraries Quick CSV achieves speed ups at 2x - 10x range in sequential, single thread, mode. Naturally parallel mode improves performance further. See benchmarking results below for more details.
14 |
15 | The library is limited to so called "line-optimal" charsets like UTF-8, US-ASCII, ISO-8859-1 and some others. Such line-optimal charsets have the property that line feed ('\n'), carriage return ('\r'), CSV separator are easily identifiable from other encoded characters.
16 |
17 |
18 | Maven dependency
19 | --------------
20 |
21 | Available from Maven Central:
22 |
23 | ```xml
24 |
25 | uk.elementarysoftware
26 | quick-csv-streamer
27 | 0.2.4
28 |
29 | ```
30 |
31 | Example usage
32 | --------------
33 |
34 | Suppose following CSV file needs to be parsed
35 |
36 | Country,City,AccentCity,Region,Population,Latitude,Longitude
37 | ad,andorra,Andorra,07,,42.5,1.5166667
38 | gb,city of london,City of London,H9,,51.514125,-.093689
39 | ua,kharkiv,Kharkiv,07,,49.980814,36.252718
40 |
41 | First define Java class to represent the records as follows
42 |
43 | public class City {
44 | private final String city;
45 | private final int population;
46 | private final double latitude;
47 | private final double longitude;
48 |
49 | ...
50 | }
51 |
52 | here we will be sourcing 4 fields from the source file, ignoring other 3.
53 |
54 | Parsing the file is simple
55 |
56 | import uk.elementarysoftware.quickcsv.api.*;
57 |
58 | CSVParser parser = CSVParserBuilder.aParser(City::new, City.CSVFields.class).forRfc4180().build();
59 |
60 | the parser will be using CSV separators as per RFC 4180, default encoding and will be expecting header as first record in the source. Custom separators, quotes, encodings and header sources are supported.
61 |
62 | Actual mapping is done in `City` constructor
63 |
64 | public class City {
65 |
66 | public static enum CSVFields {
67 | AccentCity,
68 | Population,
69 | Latitude,
70 | Longitude
71 | }
72 |
73 | public City(CSVRecordWithHeader r) {
74 | this.city = r.getField(CSVFields.AccentCity).asString();
75 | this.population = r.getField(CSVFields.Population).asInt();
76 | this.latitude = r.getField(CSVFields.Latitude).asDouble();
77 | this.longitude = r.getField(CSVFields.Longitude).asDouble();
78 | }
79 |
80 | first `CSVFields` enum specifies which fields should be sourced and only these fields will be actually parsed. After that `CSVRecordWithHeader` instance is used to populate `City` instance fields, refering to CSV fields by enum values.
81 |
82 | Of course mapping can also be done outside domain class constructor, just pass different `Function` to `CSVParserBuilder`.
83 |
84 | Resulting stream can be processed in parallel or sequentially with usual Java stream API. For example to parse sequentially on a single thread
85 |
86 | Stream stream = parser.parse(source).sequential();
87 | stream.forEach(System.out::println);
88 |
89 | By default parser will operate in parallel mode.
90 |
91 | Please see [sample project](https://github.com/titorenko/quick-csv-streamer-cities-sample) for full source code of the above example.
92 |
93 | Special cases for headers
94 | --------------
95 |
96 | When header contains special characters the fields can not be simply encoded by enum literals. In such cases `toString` should be overwritten, for example
97 |
98 | enum Fields {
99 | Latitude("City Latitude"),
100 | Longitude("City Longitude"),
101 | City("City name"),
102 | Population("City Population");
103 |
104 | private final String headerFieldName;
105 |
106 | private Fields(String headerFieldName) {
107 | this.headerFieldName = headerFieldName;
108 | }
109 |
110 | @Override public String toString() {
111 | return headerFieldName;
112 | }
113 | }
114 |
115 | If header is missing from the source it can be supplied during parser constuction
116 |
117 | CSVParserBuilder
118 | .aParser(City::new, City.CSVFields.class)
119 | .usingExplicitHeader("Country", "City", "AccentCity", "Region", "Population", "Latitude", "Longitude")
120 | .build();
121 |
122 |
123 | Advanced usage
124 | --------------
125 | About 10% performance improvement compared to normal usage can be achieved by referencing the fields by position instead of name. In this case parser construction is even simpler
126 |
127 | CSVParser parser = CSVParserBuilder.aParser(City::new).build();
128 |
129 | as enumeration specifying field names is not needed. However now constructor will be using `CSVRecord` interface
130 |
131 | public City(CSVRecord r) {
132 | r.skipFields(2);
133 | this.city = r.getNextField().asString();
134 | r.skipField();
135 | this.population = r.getNextField().asInt();
136 | this.latitude = r.getNextField().asDouble();
137 | this.longitude = r.getNextField().asDouble();
138 | }
139 |
140 | effectively this encodes field order in the CSV source.
141 |
142 | Performance
143 | --------------
144 |
145 | Best way to check performance of the library is to run benchmark on your target system with
146 |
147 | gradle jmh
148 |
149 | reports can be then found in build/reports/jmh.
150 |
151 | It is very important to appreciate that performance might vary dramattically depending on the actual CSV content. As a very rough guideline see below sample output of "gradle jmh" on i7 2700k Ubuntu system, which uses `cities.txt` similar to example above, expanded to have 3173800 rows and 157 MB in size:
152 |
153 | |Benchmark |Mode |Cnt | Score | Error |Units|
154 | | ----------------------------- | ---- | --- | ------- | --------- | --- |
155 | |OpenCSVParser |avgt | 5 |2393.921 |± 262.347 |ms/op|
156 | |Quick CSV Parallel with header |avgt | 5 | 205.013 |± 1.739 |ms/op|
157 | |Quick CSV Parallel (advanced) |avgt | 5 | 177.262 |± 1.739 |ms/op|
158 | |Quick CSV Sequential |avgt | 5 | 648.462 |± 45.991 |ms/op|
159 |
160 | Comparison is done with OpenCSV library v3.8, performance of other libraries can be extrapolated using chart from https://github.com/uniVocity/csv-parsers-comparison
161 |
162 | Prerequisites
163 | --------------
164 | Quick CSV Streamer library requires Java 8, it has no other dependencies.
165 |
166 | License
167 | --------------
168 | Library is licensed under the terms of [GPL v2.0 license](http://www.gnu.org/licenses/gpl-2.0.html).
169 | Please contact me if you wish to use this library under more commercially friendly license or want to extend it, for example to add async parsing or support different file formats.
170 |
--------------------------------------------------------------------------------
/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id 'me.champeau.gradle.jmh' version '0.3.0'
3 | }
4 |
5 | apply plugin: 'java'
6 | apply plugin: 'maven'
7 | apply plugin: 'signing'
8 | apply plugin: 'me.champeau.gradle.jmh'
9 | apply plugin: "eclipse"
10 |
11 | group = 'uk.elementarysoftware'
12 | version = '0.2.4'
13 |
14 | sourceCompatibility = JavaVersion.VERSION_1_8
15 |
16 | tasks.withType(JavaCompile) {
17 | options.encoding = 'UTF-8'
18 | }
19 |
20 | repositories {
21 | mavenCentral()
22 | }
23 |
24 | dependencies {
25 | testCompile group: 'commons-io', name: 'commons-io', version: '2.5'
26 | testCompile group: 'junit', name: 'junit', version: '4.12'
27 | testCompile group: 'org.eclipse.jetty', name: 'jetty-server', version: '9.4.6.v20170531'
28 | testCompile group: 'org.apache.httpcomponents', name: 'httpclient', version: '4.5.3'
29 |
30 | jmh "commons-io:commons-io:2.4"
31 | jmh "com.opencsv:opencsv:3.8"
32 | }
33 |
34 | jmh {
35 | include = '.*BenchmarkParserAndMapperInMemory.*'
36 | jmhVersion = '1.19'
37 | jvmArgsAppend = '-Xmx1g -XX:+AggressiveOpts'
38 | }
39 |
40 | eclipse {
41 | classpath {
42 | plusConfigurations += [ configurations.jmh ]
43 | }
44 | }
45 |
46 | task javadocJar(type: Jar) {
47 | classifier = 'javadoc'
48 | from javadoc
49 | }
50 |
51 | task sourcesJar(type: Jar) {
52 | classifier = 'sources'
53 | from sourceSets.main.allSource
54 | }
55 |
56 | artifacts {
57 | archives javadocJar, sourcesJar
58 | }
59 |
60 | if (hasProperty('ossrhUsername')) {
61 |
62 | signing {
63 | sign configurations.archives
64 | }
65 |
66 | uploadArchives {
67 | repositories {
68 | mavenDeployer {
69 | beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) }
70 |
71 | repository(url: "https://oss.sonatype.org/service/local/staging/deploy/maven2/") {
72 | authentication(userName: ossrhUsername, password: ossrhPassword)
73 | }
74 |
75 | snapshotRepository(url: "https://oss.sonatype.org/content/repositories/snapshots/") {
76 | authentication(userName: ossrhUsername, password: ossrhPassword)
77 | }
78 |
79 | pom.project {
80 | name 'Quick CSV Streamer'
81 | packaging 'jar'
82 | description 'Quick CSV Parser with Java 8 Streams API'
83 | url 'https://github.com/titorenko/quick-csv-streamer'
84 |
85 | scm {
86 | connection 'scm:git:git://github.com/titorenko/quick-csv-streamer.git'
87 | developerConnection 'scm:git:git@github.com:titorenko/quick-csv-streamer.git'
88 | url 'https://github.com/titorenko/quick-csv-streamer'
89 | }
90 |
91 | licenses {
92 | license {
93 | name 'GNU General Public License, version 2'
94 | url 'http://www.gnu.org/licenses/gpl-2.0.html'
95 | }
96 | }
97 |
98 | developers {
99 | developer {
100 | id 'elementarysoftware'
101 | name 'Elementary Software Ltd.'
102 | email 'elementarysoftware@gmail.com'
103 | }
104 | }
105 | }
106 | }
107 | }
108 | }
109 | }
110 |
--------------------------------------------------------------------------------
/release-notes.md:
--------------------------------------------------------------------------------
1 | 0.2.4
2 | ==========
3 | Added asBoxedInt, asBoxedDouble convenience methods.
4 |
5 | Bugfixes:
6 | * Issues #6 and #7 fixed
7 |
8 | 0.2.3
9 | ==========
10 | Improvement in parsing performance for quoted data
11 | Bugfixes
12 | * Additional fix for issue quotes in the end of the line, thanks to https://github.com/jasonk000
13 |
14 | 0.2.2
15 | ==========
16 | Converted tabs to spaces in source files
17 | Bugfixes
18 | * Fix for issue #3 with quotes in the end of the line
19 |
20 | 0.2.1
21 | ==========
22 | Bugfixes
23 | * Fix NPE occuring when first column is included into parsing results using header in the source API.
24 | Issue was reported by Pradeep Jaligama.
25 |
26 | 0.2.0
27 | ==========
28 | New features
29 | * header aware parsing
30 | * charset support
31 | * more flexible input config
32 | * performance improvements: less garbage, better composite slice impl., int and long parsers
33 | * new interface with mapper
34 |
35 | Bugfixes
36 | * issue with skipping records via stream api
37 | * stream completion flag as returned by advance() was not properly calculated
38 |
39 | 0.1.1
40 | ==========
41 | * sample project added
42 |
43 | 0.1.0
44 | ==========
45 | * initial release
46 |
--------------------------------------------------------------------------------
/settings.gradle:
--------------------------------------------------------------------------------
1 | rootProject.name = 'quick-csv-streamer'
--------------------------------------------------------------------------------
/src/jmh/java/uk/elementarysoftware/quickcsv/benchmarks/BenchmarkParserAndMapperInMemory.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.benchmarks;
2 |
3 | import java.io.ByteArrayInputStream;
4 | import java.io.File;
5 | import java.io.IOException;
6 | import java.io.UncheckedIOException;
7 | import java.util.concurrent.TimeUnit;
8 | import java.util.stream.Stream;
9 |
10 | import org.apache.commons.io.FileUtils;
11 | import org.openjdk.jmh.annotations.Benchmark;
12 | import org.openjdk.jmh.annotations.BenchmarkMode;
13 | import org.openjdk.jmh.annotations.Fork;
14 | import org.openjdk.jmh.annotations.Measurement;
15 | import org.openjdk.jmh.annotations.Mode;
16 | import org.openjdk.jmh.annotations.OutputTimeUnit;
17 | import org.openjdk.jmh.annotations.Scope;
18 | import org.openjdk.jmh.annotations.State;
19 | import org.openjdk.jmh.annotations.Warmup;
20 | import org.openjdk.jmh.infra.Blackhole;
21 | import org.openjdk.jmh.runner.Runner;
22 | import org.openjdk.jmh.runner.options.Options;
23 | import org.openjdk.jmh.runner.options.OptionsBuilder;
24 |
25 | import uk.elementarysoftware.quickcsv.api.CSVParser;
26 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
27 |
28 | @BenchmarkMode(Mode.AverageTime)
29 | @Fork(1)
30 | @Warmup(iterations = 3, time = 5000, timeUnit = TimeUnit.MILLISECONDS)
31 | @Measurement(iterations = 5, time = 7000, timeUnit = TimeUnit.MILLISECONDS)
32 | @OutputTimeUnit(TimeUnit.MILLISECONDS)
33 | @State(Scope.Benchmark)
34 | public class BenchmarkParserAndMapperInMemory {
35 |
36 | private static final String TEST_FILE = "src/test/resources/cities-unix.txt";
37 | private static final String TEST_FILE_QUOTED = "src/test/resources/cities-unix-quoted.txt";
38 |
39 | @State(Scope.Benchmark)
40 | public static class BenchmarkState {
41 |
42 | byte[] content = loadFile(prepareFile(100, TEST_FILE));
43 |
44 | byte[] quotedContent = loadFile(prepareFile(100, TEST_FILE_QUOTED));
45 |
46 | private File prepareFile(int sizeMultiplier, String testFile) {
47 | try {
48 | byte[] content= FileUtils.readFileToByteArray(new File(testFile));
49 | File result = File.createTempFile("csv", "large");
50 | for (int i = 0; i < sizeMultiplier; i++) {
51 | FileUtils.writeByteArrayToFile(result, content, true);
52 | }
53 | return result;
54 | } catch (IOException e) {
55 | throw new UncheckedIOException(e);
56 | }
57 | }
58 |
59 | private byte[] loadFile(File file) {
60 | try {
61 | return FileUtils.readFileToByteArray(file);
62 | } catch (IOException e) {
63 | throw new UncheckedIOException(e);
64 | }
65 | }
66 | }
67 |
68 | @Benchmark
69 | public void benchmarkParallelParser(BenchmarkState state, Blackhole bh) {
70 | CSVParser parser = CSVParserBuilder.aParser(City.MAPPER).build();
71 | Stream stream = parser.parse(new ByteArrayInputStream(state.content));
72 | stream.forEach(c -> bh.consume(c));
73 | }
74 |
75 | @Benchmark
76 | public void benchmarkParallelParserWithHeader(BenchmarkState state, Blackhole bh) {
77 | CSVParser parser = CSVParserBuilder
78 | .aParser(City.EnumMapper.MAPPER, City.EnumMapper.Fields.class)
79 | .usingExplicitHeader("Country", "City", "AccentCity", "Region", "Population", "Latitude", "Longitude")
80 | .build();
81 | Stream stream = parser.parse(new ByteArrayInputStream(state.content));
82 | stream.forEach(c -> bh.consume(c));
83 | }
84 |
85 | @Benchmark
86 | public void benchmarkSequentialParser(BenchmarkState state, Blackhole bh) {
87 | CSVParser parser = CSVParserBuilder.aParser(City.MAPPER).build();
88 | Stream stream = parser.parse(new ByteArrayInputStream(state.content));
89 | stream.sequential().forEach(c -> bh.consume(c));
90 | }
91 |
92 |
93 | @Benchmark
94 | public void benchmarkSequentialParserWithQuotes(BenchmarkState state, Blackhole bh) {
95 | CSVParser parser = CSVParserBuilder.aParser(City.MAPPER).build();
96 | Stream stream = parser.parse(new ByteArrayInputStream(state.quotedContent));
97 | stream.sequential().forEach(c -> bh.consume(c));
98 | }
99 |
100 | @Benchmark
101 | public void benchmarkOpenCSVParser(BenchmarkState state, Blackhole bh) {
102 | OpenCSVParser parser = new OpenCSVParser();
103 | Stream stream = parser.parse(new ByteArrayInputStream(state.content));
104 | stream.forEach(c -> bh.consume(c));
105 | }
106 |
107 | public static void main(String[] args) throws Exception {
108 | Options opt = new OptionsBuilder()
109 | .include(".*" + BenchmarkParserAndMapperInMemory.class.getSimpleName()+".*")
110 | //.addProfiler(LinuxPerfAsmProfiler.class)
111 | //.addProfiler(StackProfiler.class)
112 | .build();
113 | new Runner(opt).run();
114 | }
115 |
116 | }
--------------------------------------------------------------------------------
/src/jmh/java/uk/elementarysoftware/quickcsv/benchmarks/City.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.benchmarks;
2 |
3 | import java.util.function.Function;
4 |
5 | import uk.elementarysoftware.quickcsv.api.CSVRecord;
6 | import uk.elementarysoftware.quickcsv.api.CSVRecordWithHeader;
7 |
8 | public class City {
9 |
10 | public static final Function MAPPER = City::new;
11 |
12 | public static class EnumMapper {
13 |
14 | public static enum Fields {
15 | AccentCity,
16 | Population,
17 | Latitude,
18 | Longitude
19 | }
20 |
21 | public static final Function, City> MAPPER = r -> {
22 | return new City(
23 | r.getField(Fields.AccentCity).asString(),
24 | r.getField(Fields.Population).asInt(),
25 | r.getField(Fields.Latitude).asDouble(),
26 | r.getField(Fields.Longitude).asDouble()
27 | );
28 | };
29 | }
30 |
31 | private static final int CITY_INDEX = 2;
32 |
33 | private final String city;
34 | private final int population;
35 | private final double latitude;
36 | private final double longitude;
37 |
38 | public City(CSVRecord r) {
39 | r.skipFields(CITY_INDEX);
40 | this.city = r.getNextField().asString();
41 | r.skipField();
42 | this.population = r.getNextField().asInt();
43 | this.latitude = r.getNextField().asDouble();
44 | this.longitude = r.getNextField().asDouble();
45 | }
46 |
47 | public City(String city, int population, double latitude, double longitude) {
48 | this.city = city;
49 | this.population = population;
50 | this.latitude = latitude;
51 | this.longitude = longitude;
52 | }
53 |
54 | public String getCity() {
55 | return city;
56 | }
57 |
58 | public int getPopulation() {
59 | return population;
60 | }
61 |
62 | public double getLatitude() {
63 | return latitude;
64 | }
65 |
66 | public double getLongitude() {
67 | return longitude;
68 | }
69 |
70 | @Override
71 | public int hashCode() {
72 | final int prime = 31;
73 | int result = 1;
74 | result = prime * result + ((city == null) ? 0 : city.hashCode());
75 | long temp;
76 | temp = Double.doubleToLongBits(latitude);
77 | result = prime * result + (int) (temp ^ (temp >>> 32));
78 | temp = Double.doubleToLongBits(longitude);
79 | result = prime * result + (int) (temp ^ (temp >>> 32));
80 | result = prime * result + population;
81 | return result;
82 | }
83 |
84 | @Override
85 | public boolean equals(Object obj) {
86 | if (this == obj)
87 | return true;
88 | if (obj == null)
89 | return false;
90 | if (getClass() != obj.getClass())
91 | return false;
92 | City other = (City) obj;
93 | if (city == null) {
94 | if (other.city != null)
95 | return false;
96 | } else if (!city.equals(other.city))
97 | return false;
98 | if (Double.doubleToLongBits(latitude) != Double.doubleToLongBits(other.latitude))
99 | return false;
100 | if (Double.doubleToLongBits(longitude) != Double.doubleToLongBits(other.longitude))
101 | return false;
102 | if (population != other.population)
103 | return false;
104 | return true;
105 | }
106 |
107 | @Override
108 | public String toString() {
109 | return "City [city=" + city + ", population=" + population + ", latitude=" + latitude + ", longitude=" + longitude + "]";
110 | }
111 |
112 | }
113 |
--------------------------------------------------------------------------------
/src/jmh/java/uk/elementarysoftware/quickcsv/benchmarks/OpenCSVParser.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.benchmarks;
2 |
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 | import java.io.InputStreamReader;
6 | import java.io.Reader;
7 | import java.io.UncheckedIOException;
8 | import java.util.Iterator;
9 | import java.util.Spliterator;
10 | import java.util.Spliterators;
11 | import java.util.stream.Stream;
12 | import java.util.stream.StreamSupport;
13 |
14 | import org.apache.commons.io.IOUtils;
15 |
16 | import com.opencsv.CSVReader;
17 |
18 |
19 | public class OpenCSVParser {
20 |
21 | public Stream parse(InputStream is) {
22 | Reader reader = new InputStreamReader(is);
23 | CSVReader csvReader = new CSVReader(reader);
24 | Iterator iterator = new Iterator() {
25 | private boolean isEndReached = false;
26 |
27 | @Override
28 | public boolean hasNext() {
29 | return !isEndReached;
30 | }
31 |
32 | @Override
33 | public City next() {
34 | try {
35 | String[] values = csvReader.readNext();
36 | if (values == null) {
37 | isEndReached = true;
38 | return null;
39 | } else {
40 | return toCity(values);
41 | }
42 | } catch (IOException e) {
43 | throw new UncheckedIOException(e);
44 | }
45 | }
46 | };
47 | Spliterator spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
48 | return StreamSupport.stream(spliterator, false).onClose(new Runnable() {
49 | @Override
50 | public void run() {
51 | IOUtils.closeQuietly(csvReader);
52 | }
53 | });
54 | }
55 |
56 | protected City toCity(String[] values) {
57 | if (values.length < 7) return null;
58 | return new City(values[2], parseInt(values[4]), parseDouble(values[5]), parseDouble(values[6]));
59 | }
60 |
61 | private int parseInt(String value) {
62 | try {
63 | return value.isEmpty() ? 0 : Integer.parseInt(value);
64 | } catch (Exception e) {
65 | return 0;
66 | }
67 | }
68 |
69 | private double parseDouble(String value) {
70 | return value.isEmpty() ? 0 : Double.parseDouble(value);
71 | }
72 |
73 | }
74 |
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/api/ByteArraySource.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.api;
2 |
3 | import java.util.concurrent.atomic.AtomicInteger;
4 | import java.util.function.Consumer;
5 |
6 | /**
7 | * Abstract source of byte arrays to allow parsing of synchronous or asynchronous streams.
8 | */
9 | public interface ByteArraySource {
10 |
11 | ByteArrayChunk getNext() throws Exception;
12 |
13 | public abstract static class ReusableChunk {
14 |
15 | private final Runnable onFree;
16 | private final AtomicInteger usageCount = new AtomicInteger(0);
17 |
18 | /**
19 | * @param onFree - callback that will be called when usage count reaches zero
20 | */
21 | protected ReusableChunk(Runnable onFree) {
22 | this.onFree = onFree;
23 | }
24 |
25 | public void incrementUseCount() {
26 | usageCount.incrementAndGet();
27 | }
28 |
29 | public void decrementUseCount() {
30 | int value = usageCount.decrementAndGet();
31 | if (value <= 0) onFree.run();
32 | }
33 | }
34 |
35 | public static class ByteArrayChunk extends ReusableChunk {
36 | public static final ByteArrayChunk EMPTY = new ByteArrayChunk(new byte[0], 0, false, (b) -> {});
37 |
38 | private final byte[] data;
39 | private final int length;
40 | private final boolean isLast;
41 |
42 | /**
43 | * @param data - underlying content
44 | * @param length - content length
45 | * @param isLast - is this chunk of is last
46 | * @param onFree - callback that will be called when data from this chunk has been fully consumed.
47 | */
48 | public ByteArrayChunk(byte[] data, int length, boolean isLast, Consumer onFree) {
49 | super(() -> onFree.accept(data));
50 | this.data = data;
51 | this.length = length;
52 | this.isLast = isLast;
53 | }
54 |
55 | public byte[] getData() {
56 | return data;
57 | }
58 |
59 | public int getLength() {
60 | return length;
61 | }
62 |
63 | public boolean isLast() {
64 | return isLast;
65 | }
66 | }
67 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/api/CSVParser.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.api;
2 |
3 | import java.io.File;
4 | import java.io.FileInputStream;
5 | import java.io.IOException;
6 | import java.io.InputStream;
7 | import java.util.stream.Stream;
8 |
9 | import uk.elementarysoftware.quickcsv.ioutils.IOUtils;
10 |
11 | /**
12 | * CSV Parser can parse inputs such as {@link InputStream} or more generally {@link ByteArraySource} to Stream<T>.
13 | *
14 | * @param - the type of the parsing result
15 | */
16 | public interface CSVParser {
17 |
18 | public default Stream parse(File file) throws IOException {
19 | InputStream is = new FileInputStream(file);
20 | return parse(is).onClose(() -> IOUtils.closeQuietly(is));
21 | }
22 |
23 | public Stream parse(InputStream is);
24 |
25 | public Stream parse(ByteArraySource bas);
26 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/api/CSVParserBuilder.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.api;
2 |
3 | import java.nio.charset.Charset;
4 | import java.util.Objects;
5 | import java.util.Optional;
6 | import java.util.function.Function;
7 |
8 | import uk.elementarysoftware.quickcsv.parser.FieldSubsetView;
9 | import uk.elementarysoftware.quickcsv.parser.QuickCSVParser;
10 |
11 | /**
12 | * CSV Parser builder, use this class to construct {@link CSVParser}.
13 | *
14 | * @param - type of object that each record of the CSV data will be mapped to
15 | * @param - type of enumeration that is used to specify fields to be parsed, only relevant for header-aware parser.
16 | */
17 | public class CSVParserBuilder> {
18 |
19 | private int bufferSize = 512*1024;
20 |
21 | private CSVFileMetadata metadata = CSVFileMetadata.RFC_4180;
22 |
23 | private Function recordMapper;
24 |
25 | private Function, T> recordWithHeaderMapper;
26 | private FieldSubsetView subsetView = null;
27 |
28 | private Charset charset = Charset.defaultCharset();
29 |
30 | private CSVParserBuilder() {
31 | }
32 |
33 | /**
34 | * Create new parser using supplied mapping function.
35 | *
36 | * Mapping function can not store reference to {@link CSVRecord} object,
37 | * it needs to be a pure function that creates new instance of T.
38 | * CSVRecord could be mutated by the parser when next field or record are processed.
39 | *
40 | * @param mapper - mapping function from CSVRecord to T
41 | * @param - type of object that each record of the CSV data will be mapped to
42 | * @param - ignored
43 | * @return this parser builder
44 | */
45 | public static > CSVParserBuilder aParser(Function mapper) {
46 | CSVParserBuilder builder = new CSVParserBuilder();
47 | builder.recordMapper = mapper;
48 | return builder;
49 | }
50 |
51 | /**
52 | * Create new header-aware parser using supplied mapping function.
53 | *
54 | * Mapping function can not store reference to {@link CSVRecordWithHeader} object,
55 | * it needs to be a pure function that create new instance of T.
56 | *
57 | * CSVRecordWithHeader could be mutated by the parser when next record is processed.
58 | *
59 | * @param mapper - mapping function from CSVRecordWithHeader to T
60 | * @param fields - enumeration specifying fields that should be parsed
61 | * @param - type of object that each record of the CSV data will be mapped to
62 | * @param - type of enumeration that is used to specify fields to be parsed
63 | *
64 | * @return this parser builder
65 | */
66 |
67 | public static > CSVParserBuilder aParser(Function, T> mapper, Class fields) {
68 | CSVParserBuilder builder = new CSVParserBuilder();
69 | builder.recordWithHeaderMapper = mapper;
70 | builder.subsetView = FieldSubsetView.forSourceSuppliedHeader(fields);
71 | return builder;
72 | }
73 |
74 | /**
75 | * Use supplied header and do not take header from the source.
76 | * @param header - header fields
77 | * @return this parser builder
78 | */
79 | public CSVParserBuilder usingExplicitHeader(String... header) {
80 | Objects.requireNonNull(subsetView);
81 | this.subsetView = FieldSubsetView.forExplicitHeader(subsetView.getFieldSubset(), header);
82 | return this;
83 | }
84 |
85 | /**
86 | * Use tabs as separator and no quoting
87 | * @return this parser builder
88 | */
89 | public CSVParserBuilder forTabs() {
90 | this.metadata = CSVFileMetadata.TABS;
91 | return this;
92 | }
93 |
94 | /**
95 | * Use comma as separator and double quotes as quote character as per RFC 4180 document.
96 | * @return this parser builder
97 | */
98 | public CSVParserBuilder forRfc4180() {
99 | this.metadata = CSVFileMetadata.RFC_4180;
100 | return this;
101 | }
102 |
103 | /**
104 | * Use specified character as field separator.
105 | * @param separator - field separator character
106 | * @return this parser builder
107 | */
108 | public CSVParserBuilder usingSeparatorWithNoQuotes(char separator) {
109 | this.metadata = new CSVFileMetadata(separator, Optional.empty());
110 | return this;
111 | }
112 |
113 | /**
114 | * Use specified characters as field separator and quote character.
115 | * Quote character can be escaped by preceding it with another quote character.
116 | * @param separator - field separator character
117 | * @param quote - quote character
118 | * @return this parser builder
119 | */
120 | public CSVParserBuilder usingSeparatorWithQuote(char separator, char quote) {
121 | this.metadata = new CSVFileMetadata(separator, Optional.of(quote));
122 | return this;
123 | }
124 |
125 | /**
126 | * Buffer size to use when reading from file and parsing. Each buffer is parsed by single thread.
127 | * @param size - size in bytes
128 | * @return this parser builder
129 | */
130 | public CSVParserBuilder usingBufferSize(int size) {
131 | this.bufferSize = size;
132 | return this;
133 | }
134 |
135 |
136 | /**
137 | * Specifies charset to use during parsing. By default Charset.defaultCharset() is used.
138 | * This parser only supports charset that represent separators and digits as single bytes.
139 | * @param charset - charset to use during parsing
140 | * @return this parser builder
141 | */
142 | public CSVParserBuilder usingCharset(Charset charset) {
143 | this.charset = charset;
144 | return this;
145 | }
146 |
147 | /**
148 | * Specifies charset name to use during parsing. By default Charset.defaultCharset() is used.
149 | * This parser only supports charset that represent separators and digits as single bytes.
150 | * @param charsetName - charset to use during parsing
151 | * @return this parser builder
152 | */
153 | public CSVParserBuilder usingCharset(String charsetName) {
154 | return usingCharset(Charset.forName(charsetName));
155 | }
156 |
157 | /**
158 | * Construct parser using current setting
159 | * @return CSV Parser
160 | */
161 | public CSVParser build() {
162 | return subsetView == null ?
163 | new QuickCSVParser(bufferSize, metadata, recordMapper, charset) :
164 | new QuickCSVParser(bufferSize, metadata, recordWithHeaderMapper, subsetView, charset);
165 | }
166 |
167 | public static class CSVFileMetadata {
168 |
169 | public static CSVFileMetadata RFC_4180 = new CSVFileMetadata(',', Optional.of('"'));
170 | public static CSVFileMetadata TABS = new CSVFileMetadata('\t', Optional.empty());
171 |
172 | public final char separator;
173 | public final Optional quote;
174 |
175 | public CSVFileMetadata(char separator, Optional quote) {
176 | this.separator = separator;
177 | this.quote = quote;
178 | }
179 | }
180 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/api/CSVRecord.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.api;
2 |
3 |
4 | /**
5 | * Interface to access parsed CSV data in efficient manner.
6 | * Fields are parsed in order they appear in the CSV source.
7 | */
8 | public interface CSVRecord {
9 | public void skipField();
10 | public void skipFields(int nFields);
11 |
12 | public Field getNextField();
13 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/api/CSVRecordWithHeader.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.api;
2 |
3 | import java.util.List;
4 |
5 | /**
6 | * CSV Record with header that gives access to all fields from enumeration K.
7 | * The fields can be accessed by name using enum values.
8 | * Enum values toString() should match with header column names.
9 | *
10 | * @param - field enumeration
11 | */
12 | public interface CSVRecordWithHeader> {
13 |
14 | public Field getField(K field);
15 |
16 | public List getHeader();
17 | }
18 |
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/api/Field.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.api;
2 |
3 | import java.nio.ByteBuffer;
4 |
5 | /**
6 | * Interface to access underlying raw data as particular type.
7 | *
8 | * Usually underlying the field is some kind of byte large array and the field maintains view onto this array.
9 | * Underlying array can be mutated during parsing and the field object itself can be re-used, therefore clients
10 | * should not maintain references to Field instances, instead client is expected to map field to it's own data
11 | * structure and the no longer use it.
12 | *
13 | * Methods that return primitive types will throw NPE if underlying field is empty. This should be tested with isEmpty()
14 | * call where needed or boxed methods should be used.
15 | */
16 | public interface Field {
17 |
18 | public ByteBuffer raw();
19 |
20 | public String asString();
21 |
22 | public double asDouble();
23 | public byte asByte();
24 | public char asChar();
25 | public short asShort();
26 | public int asInt();
27 | public long asLong();
28 |
29 | public Integer asBoxedInt();
30 | public Double asBoxedDouble();
31 |
32 | public boolean isEmpty();
33 |
34 | public Field clone();
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/api/StandardMappers.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.api;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 | import java.util.function.Function;
6 |
7 | public class StandardMappers {
8 | /**
9 | * Just convert to string list. Note that is NOT recommended to use this function in high volume scenarios,
10 | * more effective is to directly convert to domain object or array.
11 | */
12 | public static final Function> TO_STRING_LIST = new Function>() {
13 |
14 | @Override
15 | public List apply(CSVRecord r) {
16 | List result = new ArrayList<>();
17 | while(true) {
18 | Field f = r.getNextField();
19 | if (f == null) break;
20 | result.add(f.asString());
21 | }
22 | return result;
23 | }
24 |
25 | };
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/Decoder.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.decoder;
2 |
3 | import java.nio.charset.Charset;
4 |
5 | import uk.elementarysoftware.quickcsv.decoder.ints.IntParser;
6 | import uk.elementarysoftware.quickcsv.decoder.ints.LongParser;
7 |
8 | public class Decoder {
9 |
10 | private final uk.elementarysoftware.quickcsv.decoder.doubles.DoubleParser doubleParser;
11 | private final Charset charset;
12 | private final IntParser intParser;
13 | private final LongParser longParser;
14 |
15 | public Decoder(Charset charset) {
16 | this.charset = charset;
17 | ParserFactory parserFactory = new ParserFactory();
18 | this.doubleParser = parserFactory.getDoubleParser();
19 | this.intParser = parserFactory.getIntParser();
20 | this.longParser = parserFactory.getLongParser();
21 | }
22 |
23 | public String decodeToString(byte[] buffer, int offset, int length) {
24 | return new String(buffer, offset, length, charset);
25 | }
26 |
27 | public double decodeToDouble(byte[] buffer, int offset, int length) {
28 | if (length == 0) return 0.0;
29 | return doubleParser.parse(buffer, offset, length);
30 | }
31 |
32 | public int decodeToInt(byte[] buffer, int offset, int length) {
33 | if (length == 0) return 0;
34 | return intParser.parse(buffer, offset, length);
35 | }
36 |
37 | public long decodeToLong(byte[] buffer, int offset, int length) {
38 | if (length == 0) return 0L;
39 | return longParser.parse(buffer, offset, length);
40 | }
41 |
42 | public Charset getCharset() {
43 | return charset;
44 | }
45 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/ParserFactory.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.decoder;
2 |
3 | import uk.elementarysoftware.quickcsv.decoder.doubles.DoubleParser;
4 | import uk.elementarysoftware.quickcsv.decoder.doubles.JDKDoubleParserAdapter;
5 | import uk.elementarysoftware.quickcsv.decoder.doubles.QuickDoubleParser;
6 | import uk.elementarysoftware.quickcsv.decoder.ints.IntParser;
7 | import uk.elementarysoftware.quickcsv.decoder.ints.LongParser;
8 | import uk.elementarysoftware.quickcsv.decoder.ints.QuickIntParser;
9 | import uk.elementarysoftware.quickcsv.decoder.ints.QuickLongParser;
10 |
11 | class ParserFactory {
12 |
13 | private final boolean useQuickParsers;
14 |
15 | ParserFactory() {
16 | this.useQuickParsers = "true".equals(System.getProperty("uk.elementarysoftware.useQuickParsers", "true"));
17 | }
18 |
19 | public DoubleParser getDoubleParser() {
20 | if (useQuickParsers) {
21 | return new QuickDoubleParser();
22 | } else {
23 | return new JDKDoubleParserAdapter();
24 | }
25 | }
26 |
27 | public IntParser getIntParser() {
28 | return new QuickIntParser();
29 | }
30 |
31 | public LongParser getLongParser() {
32 | return new QuickLongParser();
33 | }
34 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/doubles/DoubleParser.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.decoder.doubles;
2 |
3 |
4 | public interface DoubleParser {
5 | public double parse(byte[] in, int startIndex, int length);
6 |
7 | default public double parse(String s) {
8 | return parse(s.getBytes(), 0, s.length());
9 | };
10 | }
11 |
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/doubles/QuickDoubleParser.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.decoder.doubles;
2 |
3 |
4 | public class QuickDoubleParser implements DoubleParser {
5 |
6 | private static final int RADIX = 10;
7 | private static final int DOT = '.'-'0';
8 |
9 | private JDKDoubleParserAdapter fallBack = new JDKDoubleParserAdapter();
10 |
11 | public double parse(byte[] bytes, int offset, int length) {
12 | if (bytes == null || length <=0)
13 | throw new NumberFormatException("Empty input");
14 | long result = 0;
15 | boolean isNegative = false;
16 | int index = offset, dotIndex=offset+length-1, endIndex = offset+length;
17 |
18 | byte firstByte = bytes[index];
19 | if (firstByte < '0') {
20 | if (firstByte == '-') {
21 | isNegative = true;
22 | }
23 | index++;
24 | }
25 | int nDigits = 0;
26 | while (index < endIndex) {
27 | int digit = bytes[index] - '0';
28 | if (digit == DOT) {
29 | dotIndex=index;
30 | }else if (digit < 0 || digit>9) {
31 | throw new NumberFormatException("For: "+new String(bytes, offset, length));
32 | } else {
33 | result *= RADIX;
34 | result -= digit;
35 | nDigits++;
36 | }
37 | index++;
38 | }
39 |
40 | double mantissa = -result;
41 | int negExponent = length-(dotIndex-offset)-1;
42 |
43 | if (nDigits <= JDKDoubleParser.maxDecimalDigits) {
44 | if (negExponent == 0 || mantissa == 0.0) {
45 | return (isNegative) ? -mantissa : mantissa;
46 | }
47 | double rValue = mantissa / JDKDoubleParser.small10pow[negExponent];
48 | return (isNegative) ? -rValue : rValue;
49 | } else { //harder case, use JDK implementation
50 | return fallBack.parse(bytes, offset, length);
51 | }
52 | }
53 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/ExceptionHelper.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.decoder.ints;
2 |
3 | class ExceptionHelper {
4 | static NumberFormatException nfExceptionFor(byte[] in, int startIndex, int len) {
5 | return new NumberFormatException("For: "+new String(in, startIndex, len));
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/IntParser.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.decoder.ints;
2 |
3 | public interface IntParser {
4 | public int parse(byte[] in, int startIndex, int length);
5 |
6 | default public int parse(String s) {
7 | return parse(s.getBytes(), 0, s.length());
8 | };
9 | }
10 |
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/LongParser.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.decoder.ints;
2 |
3 | public interface LongParser {
4 | public long parse(byte[] in, int startIndex, int length);
5 |
6 | default public long parse(String s) {
7 | return parse(s.getBytes(), 0, s.length());
8 | };
9 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/QuickIntParser.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.decoder.ints;
2 | import static uk.elementarysoftware.quickcsv.decoder.ints.ExceptionHelper.*;
3 |
4 | public class QuickIntParser implements IntParser {
5 |
6 | private static final int radix = 10;
7 |
8 | @Override
9 | public int parse(final byte[] in, final int startIndex, final int len) {
10 |
11 | int result = 0;
12 | boolean negative = false;
13 | int index = startIndex;
14 | final int end = startIndex + len;
15 | int limit = -Integer.MAX_VALUE;
16 | int multmin;
17 | int digit;
18 |
19 | if (len > 0) {
20 | byte firstByte = in[index];
21 | if (firstByte < '0') { // Possible leading "+" or "-"
22 | if (firstByte == '-') {
23 | negative = true;
24 | limit = Integer.MIN_VALUE;
25 | } else if (firstByte != '+')
26 | throw nfExceptionFor(in, startIndex, len);
27 |
28 | if (len == 1) // Cannot have lone "+" or "-"
29 | throw nfExceptionFor(in, startIndex, len);
30 | index++;
31 | }
32 | multmin = limit / radix;
33 | while (index < end) {
34 | // Accumulating negatively avoids surprises near MAX_VALUE
35 | digit = in[index++] - '0';
36 | if (digit < 0 || digit >= radix) {
37 | throw nfExceptionFor(in, startIndex, len);
38 | }
39 | if (result < multmin) {
40 | throw nfExceptionFor(in, startIndex, len);
41 | }
42 | result *= radix;
43 | if (result < limit + digit) {
44 | throw nfExceptionFor(in, startIndex, len);
45 | }
46 | result -= digit;
47 | }
48 | } else {
49 | throw nfExceptionFor(in, startIndex, len);
50 | }
51 | return negative ? result : -result;
52 | }
53 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/decoder/ints/QuickLongParser.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.decoder.ints;
2 | import static uk.elementarysoftware.quickcsv.decoder.ints.ExceptionHelper.*;
3 |
4 | public class QuickLongParser implements LongParser {
5 |
6 | private static final int radix = 10;
7 |
8 | @Override
9 | public long parse(final byte[] in, final int startIndex, final int len) {
10 |
11 | long result = 0;
12 | boolean negative = false;
13 | int index = startIndex;
14 | long limit = -Long.MAX_VALUE;
15 | final int end = startIndex + len;
16 | long multmin;
17 | int digit;
18 |
19 | if (len > 0) {
20 | byte firstByte = in[index];
21 | if (firstByte < '0') { // Possible leading "+" or "-"
22 | if (firstByte == '-') {
23 | negative = true;
24 | limit = Long.MIN_VALUE;
25 | } else if (firstByte != '+')
26 | throw nfExceptionFor(in, startIndex, len);
27 |
28 | if (len == 1) // Cannot have lone "+" or "-"
29 | throw nfExceptionFor(in, startIndex, len);
30 | index++;
31 | }
32 | multmin = limit / radix;
33 | while (index < end) {
34 | // Accumulating negatively avoids surprises near MAX_VALUE
35 | digit = in[index++] - '0';
36 | if (digit < 0 || digit >= radix) {
37 | throw nfExceptionFor(in, startIndex, len);
38 | }
39 | if (result < multmin) {
40 | throw nfExceptionFor(in, startIndex, len);
41 | }
42 | result *= radix;
43 | if (result < limit + digit) {
44 | throw nfExceptionFor(in, startIndex, len);
45 | }
46 | result -= digit;
47 | }
48 | } else {
49 | throw nfExceptionFor(in, startIndex, len);
50 | }
51 | return negative ? result : -result;
52 | }
53 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/functional/Pair.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.functional;
2 |
3 | import java.util.Objects;
4 |
5 | /**
6 | * Container to ease passing around a tuple of two objects. This object provides a sensible
7 | * implementation of equals(), returning true if equals() is true on each of the contained
8 | * objects.
9 | */
10 | public class Pair {
11 |
12 | public final F first;
13 | public final S second;
14 |
15 | /**
16 | * Constructor for a Pair.
17 | *
18 | * @param first the first object in the Pair
19 | * @param second the second object in the pair
20 | */
21 | public Pair(F first, S second) {
22 | this.first = first;
23 | this.second = second;
24 | }
25 |
26 | /**
27 | * Checks the two objects for equality by delegating to their respective
28 | * {@link Object#equals(Object)} methods.
29 | *
30 | * @param o the {@link Pair} to which this one is to be checked for equality
31 | * @return true if the underlying objects of the Pair are both considered
32 | * equal
33 | */
34 | @Override
35 | public boolean equals(Object o) {
36 | if (!(o instanceof Pair)) {
37 | return false;
38 | }
39 | Pair, ?> p = (Pair, ?>) o;
40 | return Objects.equals(p.first, first) && Objects.equals(p.second, second);
41 | }
42 |
43 | /**
44 | * Compute a hash code using the hash codes of the underlying objects
45 | *
46 | * @return a hashcode of the Pair
47 | */
48 | @Override
49 | public int hashCode() {
50 | return (first == null ? 0 : first.hashCode()) ^ (second == null ? 0 : second.hashCode());
51 | }
52 |
53 | @Override
54 | public String toString() {
55 | return first+"="+second;
56 | }
57 |
58 | /**
59 | * Convenience method for creating an appropriately typed pair.
60 | * @param a the first object in the Pair
61 | * @param b the second object in the pair
62 | * @param type of left element
63 | * @param type of right element
64 | * @return a Pair that is templatized with the types of a and b
65 | */
66 | public static Pair of(A a, B b) {
67 | return new Pair(a, b);
68 | }
69 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/functional/PrimitiveFunctions.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.functional;
2 |
3 | public class PrimitiveFunctions {
4 |
5 | @FunctionalInterface
6 | public static interface FunCharToT {
7 | public T apply(char c);
8 | }
9 |
10 | @FunctionalInterface
11 | public static interface FunBiCharToT {
12 | public T apply(char c, char q);
13 | }
14 |
15 | @FunctionalInterface
16 | public static interface FunCharToBoolean {
17 | public boolean apply(char c);
18 | }
19 |
20 | @FunctionalInterface
21 | public static interface FunBiCharToBoolean {
22 | public boolean apply(char c, char q);
23 | }
24 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/ioutils/IOUtils.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.ioutils;
2 |
3 | import java.io.Closeable;
4 | import java.io.IOException;
5 |
6 | public class IOUtils {
7 |
8 | public static void closeQuietly(Closeable closeable) {
9 | try {
10 | if (closeable != null) {
11 | closeable.close();
12 | }
13 | } catch (IOException ioe) {
14 | // ignore
15 | }
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/parser/BufferPool.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.parser;
2 |
3 | import java.util.Queue;
4 | import java.util.concurrent.ConcurrentLinkedQueue;
5 | import java.util.concurrent.atomic.AtomicInteger;
6 |
7 | /** Pools large, long-living byte arrays to minimise old generation GC */
8 | class BufferPool {
9 |
10 | private final int bufferSize;
11 | private final AtomicInteger buffersCreated = new AtomicInteger(0);
12 | private final Queue buffers = new ConcurrentLinkedQueue();
13 |
14 | BufferPool(int bufferSize) {
15 | this.bufferSize = bufferSize;
16 | }
17 |
18 | byte[] getBuffer() {
19 | byte[] result = buffers.poll();
20 | if (result == null) {
21 | buffersCreated.incrementAndGet();
22 | return new byte[bufferSize];
23 | } else {
24 | return result;
25 | }
26 | }
27 |
28 | void handBack(byte[] buffer) {
29 | buffers.add(buffer);
30 | if (buffers.size() >= buffersCreated.get()) {
31 | clear();
32 | }
33 | }
34 |
35 | private void clear() {
36 | buffers.clear();
37 | buffersCreated.set(0);
38 | }
39 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/parser/ByteArrayField.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.parser;
2 |
3 | import java.nio.ByteBuffer;
4 | import java.nio.charset.Charset;
5 |
6 | import uk.elementarysoftware.quickcsv.api.Field;
7 | import uk.elementarysoftware.quickcsv.decoder.Decoder;
8 |
9 | public class ByteArrayField implements Field {
10 |
11 | public static final ByteArrayField EMPTY = new ByteArrayField(new byte[0], 0, 0, null);
12 |
13 | private final Decoder decoder;
14 |
15 | private byte[] buffer;
16 | private int start;
17 | private int end;
18 | private Character quote; //if not null indicates that value was actually quoted
19 |
20 | public ByteArrayField(byte[] buffer, int startIndex, int endIndex, Charset charset) {
21 | this(buffer, startIndex, endIndex, charset, null);
22 | }
23 |
24 | public ByteArrayField(byte[] buffer, int startIndex, int endIndex, Charset charset, Character quote) {
25 | this.buffer = buffer;
26 | this.start = startIndex;
27 | this.end = endIndex;
28 | this.quote = quote;
29 | this.decoder = new Decoder(charset);
30 | }
31 |
32 | @Override
33 | public ByteBuffer raw() {
34 | return ByteBuffer.wrap(buffer, start, end - start);
35 | }
36 |
37 | @Override
38 | public String asString() {
39 | String result = decoder.decodeToString(buffer, start, end - start);
40 | if (quote != null && result.indexOf(quote) >= 0) {
41 | //TODO: optimise and add more flexible escape character
42 | //flag indicating if an escaped quote was seen can be passed from the parser itself as state
43 | return result.replace(new StringBuffer().append(quote).append(quote), new StringBuffer().append(quote));
44 | } else {
45 | return result;
46 | }
47 | }
48 |
49 | @Override
50 | public double asDouble() {
51 | return decoder.decodeToDouble(buffer, start, end - start);
52 | }
53 |
54 | @Override
55 | public byte asByte() {
56 | return (byte) asInt();
57 | }
58 |
59 | @Override
60 | public char asChar() {
61 | return (char) asInt();
62 | }
63 |
64 | @Override
65 | public short asShort() {
66 | return (short) asInt();
67 | }
68 |
69 | @Override
70 | public int asInt() {
71 | return decoder.decodeToInt(buffer, start, end - start);
72 | }
73 |
74 | @Override
75 | public long asLong() {
76 | return decoder.decodeToLong(buffer, start, end - start);
77 | }
78 |
79 | void modifyBounds(int start, int end) { //re-use object to reduce GC overhead
80 | this.start = start;
81 | this.end = end;
82 | this.quote = null;
83 | }
84 |
85 | void modifyBounds(int start, int end, Character quote) {
86 | this.start = start;
87 | this.end = end;
88 | this.quote = quote;
89 | }
90 |
91 | public void initFrom(ByteArrayField other) {
92 | this.buffer = other.buffer;
93 | this.start = other.start;
94 | this.end = other.end;
95 | this.quote = other.quote;
96 | }
97 |
98 | @Override
99 | public Field clone() {
100 | return new ByteArrayField(buffer, start, end, decoder.getCharset(), quote);
101 | }
102 |
103 | @Override
104 | public boolean isEmpty() {
105 | return start >= end;
106 | }
107 |
108 | @Override
109 | public Double asBoxedDouble() {
110 | return isEmpty() ? null : asDouble();
111 | }
112 |
113 | @Override
114 | public Integer asBoxedInt() {
115 | return isEmpty() ? null : asInt();
116 | }
117 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/parser/ByteSlice.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.parser;
2 |
3 | import java.nio.charset.Charset;
4 |
5 | import uk.elementarysoftware.quickcsv.api.ByteArraySource.ByteArrayChunk;
6 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder.CSVFileMetadata;
7 | import uk.elementarysoftware.quickcsv.functional.Pair;
8 | import uk.elementarysoftware.quickcsv.functional.PrimitiveFunctions.FunBiCharToBoolean;
9 | import uk.elementarysoftware.quickcsv.functional.PrimitiveFunctions.FunBiCharToT;
10 | import uk.elementarysoftware.quickcsv.functional.PrimitiveFunctions.FunCharToBoolean;
11 | import uk.elementarysoftware.quickcsv.functional.PrimitiveFunctions.FunCharToT;
12 |
13 |
14 | public interface ByteSlice {
15 | static final byte CR = 0xD;
16 | static final byte LF = 0xA;
17 |
18 | public static ByteSlice wrap(ByteArrayChunk it, Charset charset) {
19 | return new SingleByteSlice(it, charset);
20 | }
21 |
22 | public static ByteSlice empty() {
23 | return wrap(ByteArrayChunk.EMPTY, null);
24 | }
25 |
26 | public static ByteSlice join(ByteSlice prefix, ByteSlice suffix) {
27 | return new CompositeByteSlice((SingleByteSlice) prefix, (SingleByteSlice) suffix);
28 | }
29 |
30 | public Pair splitOnLastLineEnd();
31 |
32 | public boolean nextLine();
33 |
34 | /**
35 | * Skip until next occurrence of c character. False if not found and end of slice is reached
36 | * @param c - character on which to break
37 | * @return true if character was actually found, false if end of slice reached
38 | */
39 | public boolean skipUntil(final char c);
40 |
41 | public boolean skipUntil(final char c, final char quote);
42 |
43 | /**
44 | * Returns next field and advances to next field. Returns null when end of line or end of slice is reached.
45 | * @param c - character that indicates field boundary
46 | * @return object to access field content
47 | */
48 | public ByteArrayField nextField(final char c);
49 |
50 | public ByteArrayField nextField(final char c, final char quote);
51 |
52 | public int size();
53 |
54 | public boolean hasMoreData();
55 |
56 | default public boolean isEmpty() {
57 | return !hasMoreData();
58 | }
59 |
60 | /**
61 | * String representation of current line. Mainly for debug purposes, can return broken line when in composite slice.
62 | * @return current line
63 | */
64 | public String currentLine();
65 |
66 | default public void skipField(final CSVFileMetadata metadata) {
67 | if (metadata.quote.isPresent())
68 | skipUntil(metadata.separator, metadata.quote.get());
69 | else
70 | skipUntil(metadata.separator);
71 | }
72 |
73 | default public ByteArrayField getNextField(final CSVFileMetadata metadata) {
74 | if (metadata.quote.isPresent())
75 | return nextField(metadata.separator, metadata.quote.get());
76 | else
77 | return nextField(metadata.separator);
78 | }
79 |
80 | public void incrementUse();
81 |
82 | public void decremenentUse();
83 |
84 | }
85 |
86 | final class SingleByteSlice implements ByteSlice {
87 | final int start;//inclusive
88 | final int end;//exclusive
89 | final byte[] buffer;
90 | final ByteArrayField fieldTemplateObject;
91 | final Charset charset;
92 | final ByteArrayChunk src;
93 |
94 | int currentIndex;
95 |
96 | public SingleByteSlice(ByteArrayChunk src, Charset charset) {
97 | this(src, src.getData(), 0, src.getLength(), charset);
98 | }
99 |
100 | public SingleByteSlice(ByteArrayChunk src, byte[] buffer, int start, int end, Charset charset) {
101 | this.src = src;
102 | this.buffer = buffer;
103 | this.start = start;
104 | this.end = end;
105 | this.fieldTemplateObject = new ByteArrayField(buffer, 0, 0, charset);
106 | this.currentIndex = start;
107 | this.charset = charset;
108 | }
109 |
110 | @Override
111 | public int size() {
112 | return end - start;
113 | }
114 |
115 | @Override
116 | public boolean hasMoreData() {
117 | return currentIndex < end;
118 | }
119 |
120 | boolean frontTrim() {
121 | boolean seenEOL = false;
122 | for(; hasMoreData() && (buffer[currentIndex]==CR || buffer[currentIndex]==LF); currentIndex++) {
123 | seenEOL = true;
124 | }
125 | return seenEOL;
126 | }
127 |
128 | @Override
129 | public boolean nextLine() {
130 | for(; hasMoreData() && buffer[currentIndex]!=CR && buffer[currentIndex]!=LF; currentIndex++);
131 | return frontTrim();
132 | }
133 |
134 | public String currentLine() {
135 | int startIdx = currentIndex;
136 | for(; startIdx > start && buffer[startIdx]!=CR && buffer[startIdx]!=LF; startIdx--);
137 | int endIdx = currentIndex;
138 | for(; endIdx < end && buffer[endIdx]!=CR && buffer[endIdx]!=LF; endIdx++);
139 | return new String(buffer, startIdx, endIdx - startIdx);
140 | }
141 |
142 | public Pair splitOnLastLineEnd() {
143 | int i = end-1;
144 | for (;i >=currentIndex && buffer[i] != LF; i--);
145 | SingleByteSlice prefix = new SingleByteSlice(src, buffer, currentIndex, i+1, charset);
146 | SingleByteSlice suffix = new SingleByteSlice(src, buffer, i+1, end, charset);
147 | return Pair.of(prefix, suffix);
148 | }
149 |
150 | public boolean skipUntil(final char c) {
151 | boolean isFound = false;
152 | while(currentIndex < end) {
153 | if (buffer[currentIndex]==c) {
154 | currentIndex++;
155 | isFound = true;
156 | break;
157 | }
158 | currentIndex++;
159 | }
160 | return isFound;
161 | }
162 |
163 | public boolean skipUntil(char c, char q) {
164 | boolean inQuote = currentIndex < buffer.length && buffer[currentIndex] == q;
165 | if (!inQuote) return skipUntil(c);
166 | currentIndex++;
167 | boolean isFound = false;
168 | while(currentIndex < end) {
169 | if (buffer[currentIndex]==c && buffer[currentIndex-1] == q) {
170 | currentIndex++;
171 | isFound = true;
172 | break;
173 | }
174 | currentIndex++;
175 | }
176 | return isFound;
177 | }
178 |
179 | public ByteArrayField nextField(final char c) {
180 | int startIndex = currentIndex;
181 | int endIndex = currentIndex;
182 | while(currentIndex < end) {
183 | byte cur = buffer[currentIndex];
184 | if (cur == c || cur == CR || cur == LF) {
185 | endIndex = currentIndex;
186 | if (cur == c)
187 | currentIndex++;
188 | break;
189 | } else {
190 | currentIndex++;
191 | }
192 | }
193 | if (currentIndex == startIndex) return null;
194 | if (currentIndex == end) endIndex = end;
195 | fieldTemplateObject.modifyBounds(startIndex, endIndex);
196 | return fieldTemplateObject;
197 | }
198 |
199 | @Override
200 | public ByteArrayField nextField(char c, char q) {
201 | boolean inQuote = currentIndex < buffer.length && buffer[currentIndex] == q;
202 | if (!inQuote) return nextField(c);
203 | currentIndex++;
204 | int startIndex = currentIndex;
205 | int endIndex = currentIndex;
206 | while(currentIndex < end) {
207 | byte cur = buffer[currentIndex];
208 | if ((cur == c || cur == CR || cur == LF) && buffer[currentIndex-1] == q) {//there is an issue when we have escaped quote and then separator, but we ignore it for now
209 | endIndex = currentIndex - 1;
210 | if (cur == c) currentIndex++; //let frontTrim consume linebreaks later
211 | break;
212 | } else {
213 | currentIndex++;
214 | }
215 | }
216 | if (currentIndex == startIndex) return null;
217 | if (currentIndex == end) {
218 | if (buffer[end-1] == q) endIndex = end - 1; else endIndex = end;
219 | }
220 | fieldTemplateObject.modifyBounds(startIndex, endIndex, q);
221 | return fieldTemplateObject;
222 | }
223 |
224 | @Override
225 | public String toString() {
226 | return new String(buffer, start, size());
227 | }
228 |
229 | @Override
230 | public void incrementUse() {
231 | src.incrementUseCount();
232 | }
233 |
234 | @Override
235 | public void decremenentUse() {
236 | src.decrementUseCount();
237 | }
238 | }
239 |
240 | final class CompositeByteSlice implements ByteSlice {
241 |
242 | private final SingleByteSlice prefix;
243 | private final SingleByteSlice suffix;
244 | private final ByteArrayField prefixFieldTemplateObject;
245 | private final ByteArrayField suffixFieldTemplateObject;
246 |
247 | private FunCharToT nextFieldFun;
248 | private FunBiCharToT nextFieldFunQuoted;
249 | private FunCharToBoolean skipUntilFun;
250 | private FunBiCharToBoolean skipUntilFunQuoted;
251 |
252 | CompositeByteSlice(SingleByteSlice prefix, SingleByteSlice suffix) {
253 | this.prefix = prefix;
254 | this.suffix = suffix;
255 | this.prefixFieldTemplateObject = new ByteArrayField(prefix.buffer, 0, 0, prefix.charset);
256 | this.suffixFieldTemplateObject = new ByteArrayField(suffix.buffer, 0, 0, suffix.charset);
257 |
258 | this.nextFieldFun = this::nextFieldWithPrefix;
259 | this.nextFieldFunQuoted = this::nextFieldWithPrefix;
260 | this.skipUntilFun = this::skipUntilWithPrefix;
261 | this.skipUntilFunQuoted = this::skipUntilWithPrefix;
262 | }
263 |
264 | /*
265 | * -----------------------------------------------------------
266 | * Generic functions below work on slice with non-empty prefix, but once prefix has been
267 | * exhausted they will flip to simple suffix delegates.
268 | * Only frequently called functions are implemented that way.
269 | * -----------------------------------------------------------
270 | */
271 | private ByteArrayField nextFieldWithPrefix(char c) {
272 | if (prefix.isEmpty()) {
273 | flip();
274 | return suffix.nextField(c);
275 | }
276 | int startIndex = currentIndex();
277 | int endIndex = currentIndex();
278 | byte cur = 0;
279 | while(hasMoreData()) {
280 | cur = currentByte();
281 | if (cur == c || cur == CR || cur == LF) {
282 | endIndex = currentIndex();
283 | if (cur == c)
284 | nextByte();
285 | break;
286 | } else {
287 | nextByte();
288 | }
289 | }
290 | if (currentIndex() == startIndex) return null;
291 | if (cur != c && !hasMoreData()) endIndex = prefix.end + suffix.end;
292 | return createField(startIndex, endIndex, null);
293 | }
294 |
295 | private ByteArrayField nextFieldWithPrefix(char c, char quote) {
296 | if (prefix.isEmpty()) {
297 | flip();
298 | return suffix.nextField(c, quote);
299 | }
300 | boolean inQuote = hasMoreData() && currentByte() == quote;
301 | if (!inQuote) return nextField(c);
302 | nextByte();
303 | int startIndex = currentIndex();
304 | int endIndex = currentIndex();
305 | while(hasMoreData()) {
306 | byte cur = currentByte();
307 | if ((cur == c || cur == CR || cur == LF) && prevByte() == quote) {
308 | endIndex = currentIndex() - 1;
309 | if (cur == c)
310 | nextByte();
311 | break;
312 | } else {
313 | nextByte();
314 | }
315 | }
316 | if (currentIndex() == startIndex) return null;
317 | if (isEmpty()) {
318 | if (prevByte() == quote) endIndex = currentIndex() - 1; else endIndex = currentIndex();
319 | }
320 | return createField(startIndex, endIndex, quote);
321 | }
322 |
323 | private boolean skipUntilWithPrefix(char c) {
324 | if (prefix.isEmpty()) {
325 | flip();
326 | return suffix.skipUntil(c);
327 | }
328 | boolean isFound = prefix.skipUntil(c);
329 | if (isFound) {
330 | return true;
331 | } else {
332 | return suffix.skipUntil(c);
333 | }
334 | }
335 |
336 | private boolean skipUntilWithPrefix(char c, char q) {
337 | if (prefix.isEmpty()) {
338 | flip();
339 | return suffix.skipUntil(c, q);
340 | }
341 | boolean isFound = prefix.skipUntil(c, q);
342 | if (isFound) {
343 | return true;
344 | } else {
345 | return suffix.skipUntil(c, q);
346 | }
347 | }
348 |
349 | private void flip() {
350 | this.nextFieldFun = suffix::nextField;
351 | this.nextFieldFunQuoted = suffix::nextField;
352 | this.skipUntilFun = suffix::skipUntil;
353 | this.skipUntilFunQuoted = suffix::skipUntil;
354 | }
355 | /*
356 | * -----------------------------------------------------------
357 | * end
358 | * -----------------------------------------------------------
359 | */
360 |
361 | @Override
362 | public Pair splitOnLastLineEnd() {
363 | Pair sliced = suffix.splitOnLastLineEnd();
364 | return Pair.of(ByteSlice.join(this.prefix, sliced.first), sliced.second);
365 | }
366 |
367 | @Override
368 | public int size() {
369 | return prefix.size() + suffix.size();
370 | }
371 |
372 | @Override
373 | public boolean hasMoreData() {
374 | return prefix.hasMoreData() || suffix.hasMoreData();
375 | }
376 |
377 | @Override
378 | public ByteArrayField nextField(char c) {
379 | return nextFieldFun.apply(c);
380 | }
381 |
382 | @Override
383 | public ByteArrayField nextField(char c, char quote) {
384 | return nextFieldFunQuoted.apply(c, quote);
385 | }
386 |
387 | @Override
388 | public boolean skipUntil(char c) {
389 | return skipUntilFun.apply(c);
390 | }
391 |
392 | @Override
393 | public boolean skipUntil(char c, char q) {
394 | return skipUntilFunQuoted.apply(c, q);
395 | }
396 |
397 | @Override
398 | public boolean nextLine() {
399 | if (prefix.isEmpty()) {
400 | return suffix.nextLine();
401 | } else {
402 | boolean seenEOL = prefix.nextLine();
403 | if (seenEOL) {
404 | if (prefix.isEmpty()) suffix.frontTrim();
405 | return true;
406 | } else {
407 | return suffix.nextLine();
408 | }
409 | }
410 | }
411 |
412 | boolean frontTrim() {
413 | return prefix.isEmpty() ? suffix.frontTrim() : prefix.frontTrim();
414 | }
415 |
416 | @Override
417 | public String currentLine() {
418 | return prefix.isEmpty() ? suffix.currentLine() : prefix.currentLine();
419 | }
420 |
421 | private ByteArrayField createField(int startIndex, int endIndex, Character quote) {
422 | if (startIndex >= prefix.end) {
423 | suffixFieldTemplateObject.modifyBounds(startIndex - prefix.end, endIndex - prefix.end, quote);
424 | return suffixFieldTemplateObject;
425 | }
426 | if (endIndex < prefix.end) {
427 | prefixFieldTemplateObject.modifyBounds(startIndex, endIndex, quote);
428 | return prefixFieldTemplateObject;
429 | }
430 | byte[] result = new byte[endIndex - startIndex];
431 | System.arraycopy(prefix.buffer, startIndex, result, 0, prefix.end - startIndex);
432 | System.arraycopy(suffix.buffer, 0, result, prefix.end - startIndex, endIndex - prefix.end);
433 | return new ByteArrayField(result, 0, result.length, prefix.charset, quote);
434 | }
435 |
436 | @Override
437 | public String toString() {
438 | return new StringBuffer().append(prefix).append(suffix).toString();
439 | }
440 |
441 | byte prevByte() {
442 | if (suffix.currentIndex > suffix.start) return suffix.buffer[suffix.currentIndex - 1];
443 | return prefix.buffer[prefix.currentIndex - 1];
444 | }
445 |
446 |
447 | byte currentByte() {
448 | return prefix.isEmpty() ? suffix.buffer[suffix.currentIndex] : prefix.buffer[prefix.currentIndex];
449 | }
450 |
451 | void nextByte() {
452 | if (prefix.isEmpty()) suffix.currentIndex++; else prefix.currentIndex++;
453 | }
454 |
455 | int currentIndex() {
456 | return prefix.currentIndex + suffix.currentIndex;
457 | }
458 |
459 | @Override
460 | public void decremenentUse() {
461 | prefix.src.decrementUseCount();
462 | suffix.src.decrementUseCount();
463 | }
464 |
465 | @Override
466 | public void incrementUse() {
467 | throw new IllegalStateException("Should not be called");
468 | }
469 | }
470 |
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/parser/FieldSubsetView.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.parser;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Arrays;
5 | import java.util.EnumMap;
6 | import java.util.List;
7 | import java.util.Map;
8 |
9 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder.CSVFileMetadata;
10 |
11 | /**
12 | * Provides view on the CSVRecord that focuses on particular subset of fields.
13 | *
14 | * Within the view fields can be accessed by index in order of the subset or by field enumeration K.
15 | * @param - enum containing list of fields that form the subset
16 | */
17 | public class FieldSubsetView> {
18 |
19 | private final HeaderSource headerSource;
20 | private final Class fieldSubset;
21 |
22 | private boolean isFirstSlice = true;
23 |
24 | private int[] headerIndexesOfK;
25 | private int[] parseOrderToSourceOrder;
26 | private int[] fieldSkipSchedule;
27 |
28 | private FieldSubsetView(HeaderSource headerSource, Class fieldSubset) {
29 | this.headerSource = headerSource;
30 | this.fieldSubset = fieldSubset;
31 | }
32 |
33 | public static > FieldSubsetView forExplicitHeader(Class fieldsToSource, String... header) {
34 | return new FieldSubsetView<>(new HeaderSource.ExplicitHeader(header), fieldsToSource);
35 | }
36 |
37 | public static > FieldSubsetView forSourceSuppliedHeader(Class fieldsToSource) {
38 | return forSourceSuppliedHeader(fieldsToSource, 0);
39 | }
40 |
41 | public static > FieldSubsetView forSourceSuppliedHeader(Class fieldsToSource, int headerRowIndexInFile) {
42 | return new FieldSubsetView<>(new HeaderSource.SourceSuppliedHeader(headerRowIndexInFile), fieldsToSource);
43 | }
44 |
45 | public void onSlice(ByteSlice slice, CSVFileMetadata metadata) {
46 | if (isFirstSlice) {
47 | headerSource.onSlice(slice, metadata);
48 | initLookups();
49 | isFirstSlice = false;
50 | }
51 | }
52 |
53 | private void initLookups() {
54 | List header = headerSource.getHeader();
55 | headerIndexesOfK = getHeaderIndexesOfK(header);
56 | Map fieldToHeaderIndex = new EnumMap(fieldSubset);
57 | for (K k : fieldSubset.getEnumConstants()) {
58 | fieldToHeaderIndex.put(k, header.indexOf(k.toString()));
59 | }
60 |
61 | this.fieldSkipSchedule = new int[headerIndexesOfK.length];
62 | int lastFieldIndex = -1;
63 | for (int i = 0; i < headerIndexesOfK.length; i++) {
64 | int idx = headerIndexesOfK[i];
65 | int nSkip = idx - lastFieldIndex - 1;
66 | fieldSkipSchedule[i] = nSkip;
67 | lastFieldIndex = idx;
68 | }
69 |
70 | parseOrderToSourceOrder = new int[getFieldSubsetSize()];
71 | K[] ks = fieldSubset.getEnumConstants();
72 | for (int i = 0; i < ks.length; i++) {
73 | int headerIdx = fieldToHeaderIndex.get(ks[i]);
74 | parseOrderToSourceOrder[i] = Arrays.binarySearch(headerIndexesOfK, headerIdx);
75 | }
76 | }
77 |
78 | private int[] getHeaderIndexesOfK(List header) {
79 | K[] ks = fieldSubset.getEnumConstants();
80 | int[] result = new int[ks.length];
81 | for (int i = 0; i < result.length; i++) {
82 | if ((result[i] = header.indexOf(ks[i].toString())) == -1) {
83 | throw new RuntimeException("Field not found in header: "+ks[i].toString());
84 | }
85 | }
86 | Arrays.sort(result);
87 | return result;
88 | }
89 |
90 | int[] getFieldIndexes() {
91 | return headerIndexesOfK;
92 | }
93 |
94 | public Class getFieldSubset() {
95 | return fieldSubset;
96 | }
97 |
98 | int[] getFieldSkipSchedule() {
99 | return fieldSkipSchedule;
100 | }
101 |
102 | List getHeader() {
103 | return headerSource.getHeader();
104 | }
105 |
106 | int indexOfInSourceView(int parseIdx) {
107 | return parseOrderToSourceOrder[parseIdx];
108 | }
109 |
110 | int getFieldSubsetSize() {
111 | return fieldSubset.getEnumConstants().length;
112 | }
113 |
114 | public static abstract class HeaderSource {
115 |
116 | private HeaderSource() {}
117 |
118 | abstract void onSlice(ByteSlice slice, CSVFileMetadata metadata);
119 | abstract List getHeader();
120 |
121 | private static class ExplicitHeader extends HeaderSource {
122 | private final String[] header;
123 |
124 | public ExplicitHeader(String[] header) {
125 | this.header = header;
126 | }
127 |
128 | @Override
129 | List getHeader() {
130 | return Arrays.asList(header);
131 | }
132 |
133 | @Override
134 | void onSlice(ByteSlice slice, CSVFileMetadata metadata) {}
135 | }
136 |
137 | private static class SourceSuppliedHeader extends HeaderSource {
138 |
139 | private final int headerIndex;
140 | private List header;
141 |
142 | public SourceSuppliedHeader(int headerIndex) {
143 | this.headerIndex = headerIndex;
144 | }
145 |
146 | @Override
147 | void onSlice(ByteSlice slice, CSVFileMetadata metadata) {
148 | for (int i = 0; i < headerIndex; i++) {
149 | slice.nextLine();
150 | }
151 | List header = new ArrayList<>();
152 | ByteArrayField field;
153 | while((field = slice.getNextField(metadata)) != null) {
154 | header.add(field.asString());
155 | }
156 | slice.nextLine();
157 | this.header = header;
158 | }
159 |
160 | @Override
161 | List getHeader() {
162 | return header;
163 | }
164 | }
165 | }
166 | }
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/parser/InputStreamToByteArraySourceAdapter.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.parser;
2 |
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 |
6 | import uk.elementarysoftware.quickcsv.api.ByteArraySource;
7 |
8 | class InputStreamToByteArraySourceAdapter implements ByteArraySource {
9 |
10 | private final InputStream is;
11 | private final BufferPool pool;
12 |
13 | public InputStreamToByteArraySourceAdapter(InputStream is, BufferPool pool) {
14 | this.is = is;
15 | this.pool = pool;
16 | }
17 |
18 | @Override
19 | public ByteArrayChunk getNext() throws IOException {
20 | byte[] buffer = pool.getBuffer();
21 | int read = is.read(buffer);
22 | boolean isEndReached = read == -1;
23 | return new ByteArrayChunk(buffer, Math.max(0, read), isEndReached, pool::handBack);
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/uk/elementarysoftware/quickcsv/parser/QuickCSVParser.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.parser;
2 |
3 | import java.io.InputStream;
4 | import java.nio.charset.Charset;
5 | import java.util.List;
6 | import java.util.Optional;
7 | import java.util.Spliterator;
8 | import java.util.Spliterators;
9 | import java.util.function.Consumer;
10 | import java.util.function.Function;
11 | import java.util.stream.Stream;
12 | import java.util.stream.StreamSupport;
13 |
14 | import uk.elementarysoftware.quickcsv.api.ByteArraySource;
15 | import uk.elementarysoftware.quickcsv.api.ByteArraySource.ByteArrayChunk;
16 | import uk.elementarysoftware.quickcsv.api.CSVParser;
17 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder.CSVFileMetadata;
18 | import uk.elementarysoftware.quickcsv.functional.Pair;
19 | import uk.elementarysoftware.quickcsv.api.CSVRecord;
20 | import uk.elementarysoftware.quickcsv.api.CSVRecordWithHeader;
21 | import uk.elementarysoftware.quickcsv.api.Field;
22 |
23 | public class QuickCSVParser> implements CSVParser {
24 |
25 | private final CSVFileMetadata metadata;
26 | private final int bufferSize;
27 | private final Function mapper;
28 | private final Optional> fieldSubsetView;
29 | private final Charset charset;
30 |
31 | public QuickCSVParser(int bufferSize, CSVFileMetadata metadata, Function, T> mapper,
32 | FieldSubsetView fieldSubsetView, Charset charset) {
33 | this.metadata = metadata;
34 | this.bufferSize = bufferSize;
35 | this.mapper = cast(mapper);
36 | this.fieldSubsetView = Optional.of(fieldSubsetView);
37 | this.charset = charset;
38 | }
39 |
40 | public QuickCSVParser(int bufferSize, CSVFileMetadata metadata, Function mapper, Charset charset) {
41 | this.metadata = metadata;
42 | this.bufferSize = bufferSize;
43 | this.mapper = mapper;
44 | this.fieldSubsetView = Optional.empty();
45 | this.charset = charset;
46 | }
47 |
48 | @SuppressWarnings("unchecked")
49 | private static > Function cast(Function, T> f) {
50 | return r -> f.apply((CSVRecordWithHeader) r);
51 | }
52 |
53 |
54 | @Override
55 | public Stream parse(InputStream is) {
56 | BufferPool pool = new BufferPool(bufferSize);
57 | return parse(new InputStreamToByteArraySourceAdapter(is, pool));
58 | }
59 |
60 | @Override
61 | public Stream parse(ByteArraySource bas) {
62 | return StreamSupport.stream(new SplittingSpliterator(bas), true);
63 | }
64 |
65 | class SplittingSpliterator implements Spliterator {
66 |
67 | private final ByteArraySource bas;
68 |
69 | private ByteSlice prefix = ByteSlice.empty();
70 | private boolean isEndReached = false;
71 |
72 | private Spliterator sequentialSplitterator = Spliterators.emptySpliterator();
73 |
74 | SplittingSpliterator(ByteArraySource bas) {
75 | this.bas = bas;
76 | }
77 |
78 | @Override
79 | public boolean tryAdvance(Consumer super T> action) { //usually only called in sequential mode
80 | boolean advanced = sequentialSplitterator.tryAdvance(action);
81 | if (advanced) return true;
82 | if (isEndReached) return false;
83 | ByteSlice nextSlice = nextSlice();
84 | if (!nextSlice.hasMoreData()) return false;
85 | this.sequentialSplitterator = sliceSpliterator(nextSlice);
86 | return tryAdvance(action);
87 | }
88 |
89 | @Override
90 | public Spliterator trySplit() {
91 | if (isEndReached) return null;
92 | ByteSlice nextSlice = nextSlice();
93 | if (!nextSlice.hasMoreData()) return null;
94 | return sliceSpliterator(nextSlice);
95 | }
96 |
97 | private ByteSlice nextSlice() {
98 | ByteSlice bareSlice = nextBareSlice();
99 | bareSlice.incrementUse();
100 | if (isEndReached) {
101 | return ByteSlice.join(prefix, bareSlice);
102 | } else {
103 | Pair sliced = bareSlice.splitOnLastLineEnd();
104 | ByteSlice result = ByteSlice.join(prefix, sliced.first);
105 | this.prefix = sliced.second;
106 | bareSlice.incrementUse();
107 | return result;
108 | }
109 | }
110 |
111 | private ByteSlice nextBareSlice() {
112 | try {
113 | ByteArrayChunk it = bas.getNext();
114 | this.isEndReached = it.isLast();
115 | ByteSlice slice = ByteSlice.wrap(it, charset);
116 | if (fieldSubsetView.isPresent()) fieldSubsetView.get().onSlice(slice, metadata);
117 | return slice;
118 | } catch (RuntimeException e) {
119 | throw e;
120 | } catch (Exception e) {
121 | throw new RuntimeException(e);
122 | }
123 | }
124 |
125 | @Override
126 | public long estimateSize() {
127 | return Long.MAX_VALUE;
128 | }
129 |
130 | @Override
131 | public int characteristics() {
132 | return ORDERED | NONNULL | IMMUTABLE;
133 | }
134 | }
135 |
136 | Spliterator sliceSpliterator(ByteSlice slice) {
137 | return fieldSubsetView.isPresent() ? new LensingByteSliceSpliterator(slice) : new ByteSliceSpliterator(slice);
138 | }
139 |
140 | class ByteSliceSpliterator implements Spliterator, CSVRecord {
141 |
142 | protected final ByteSlice slice;
143 |
144 | ByteSliceSpliterator(ByteSlice slice) {
145 | this.slice = slice;//incoming slice should have no broken lines
146 | }
147 |
148 | @Override
149 | public boolean tryAdvance(Consumer super T> action) {
150 | if (!slice.hasMoreData()) {
151 | slice.decremenentUse();
152 | return false;
153 | }
154 | advance(action);
155 | return true;
156 | }
157 |
158 | protected void advance(Consumer super T> action) {
159 | T t = mapper.apply(this);
160 | action.accept(t);
161 | slice.nextLine();
162 | }
163 |
164 | @Override
165 | public Spliterator trySplit() {
166 | return null;
167 | }
168 |
169 | @Override
170 | public long estimateSize() {
171 | return slice.size();
172 | }
173 |
174 | @Override
175 | public int characteristics() {
176 | return ORDERED | NONNULL | IMMUTABLE;
177 | }
178 |
179 | @Override
180 | public void skipField() {
181 | slice.skipField(metadata);
182 | }
183 |
184 | @Override
185 | public void skipFields(int nFields) {
186 | for (int i = 0; i < nFields; i++) {
187 | skipField();
188 | }
189 | }
190 |
191 | @Override
192 | public ByteArrayField getNextField() {
193 | return slice.getNextField(metadata);
194 | }
195 | }
196 |
197 | class LensingByteSliceSpliterator extends ByteSliceSpliterator implements CSVRecordWithHeader {
198 |
199 | private final FieldSubsetView view;
200 | private final ByteArrayField[] fieldTemplates;
201 |
202 | public LensingByteSliceSpliterator(ByteSlice slice) {
203 | super(slice);
204 | this.view = fieldSubsetView.get();
205 | this.fieldTemplates = new ByteArrayField[view.getFieldSubsetSize()];
206 | for (int i = 0; i < fieldTemplates.length; i++) {
207 | fieldTemplates[i] = new ByteArrayField(null, -1, -1, charset);
208 | }
209 | }
210 |
211 | @Override
212 | public boolean tryAdvance(Consumer super T> action) {
213 | if (!slice.hasMoreData()) {
214 | slice.decremenentUse();
215 | return false;
216 | }
217 | parseFields();
218 | super.advance(action);
219 | return true;
220 | }
221 |
222 | private void parseFields() {
223 | int[] skipSchedule = view.getFieldSkipSchedule();
224 | for (int i = 0; i < skipSchedule.length; i++) {
225 | skipFields(skipSchedule[i]);
226 | ByteArrayField field = super.getNextField();//TODO: init into template directly
227 | if (field != null) {
228 | fieldTemplates[i].initFrom(field);
229 | } else {
230 | //when line ends with separator it is very difficult to distinguish between that and overflow when getNextField() returns null. Here we assume correct field schedule and map null to empty field.
231 | fieldTemplates[i].initFrom(ByteArrayField.EMPTY);
232 | }
233 | }
234 | }
235 |
236 | @Override
237 | public Field getField(K fieldName) {
238 | return fieldTemplates[view.indexOfInSourceView(fieldName.ordinal())];
239 | }
240 |
241 | @Override
242 | public List getHeader() {
243 | return view.getHeader();
244 | }
245 | }
246 | }
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/decoder/doubles/DoubleParserTest.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.decoder.doubles;
2 |
3 | import static org.junit.Assert.*;
4 |
5 | import java.io.File;
6 | import java.io.FileInputStream;
7 | import java.net.URL;
8 | import java.nio.charset.Charset;
9 |
10 | import org.apache.commons.io.IOUtils;
11 | import org.apache.commons.io.LineIterator;
12 | import org.junit.Test;
13 |
14 | import uk.elementarysoftware.quickcsv.decoder.doubles.DoubleParser;
15 | import uk.elementarysoftware.quickcsv.decoder.doubles.JDKDoubleParserAdapter;
16 | import uk.elementarysoftware.quickcsv.decoder.doubles.QuickDoubleParser;
17 |
18 |
19 | public class DoubleParserTest {
20 |
21 | @Test
22 | public void testSimpleCases() {
23 | doTestSimpleCases(new JDKDoubleParserAdapter());
24 | doTestSimpleCases(new QuickDoubleParser());
25 | }
26 |
27 | @Test
28 | public void testBigBuffer() {
29 | doTestBigBuffer(new JDKDoubleParserAdapter());
30 | doTestBigBuffer(new QuickDoubleParser());
31 | }
32 |
33 | @Test
34 | public void testFile() throws Exception {
35 | doTestFile(new JDKDoubleParserAdapter());
36 | doTestFile(new QuickDoubleParser());
37 | }
38 |
39 | private void doTestSimpleCases(DoubleParser parser) {
40 | assertEquals(0.0, parser.parse("0"), 1E-14);
41 | assertEquals(3.14159265, parser.parse("3.14159265"), 1E-14);
42 | assertEquals(-93231637.47759183, parser.parse("-93231637.47759183"), 1E-14);
43 | assertEquals(-0.3903, parser.parse("-0.3903"), 1E-14);
44 | assertEquals(2.71828183, parser.parse("2.71828183"), 1E-14);
45 | }
46 |
47 | private void doTestBigBuffer(DoubleParser parser) {
48 | String prefix = "anything";
49 | String middle = "2.71828183";
50 | String suffix = "anything again";
51 |
52 | byte[] buffer = (prefix + middle + suffix).getBytes();
53 | double result = parser.parse(buffer, prefix.length(), middle.length());
54 | assertEquals(2.71828183, result, 1E-14);
55 | }
56 |
57 |
58 |
59 | private void doTestFile(DoubleParser parser) throws Exception {
60 | int nLinesToTest = 500;
61 | URL fileUrl = getClass().getResource("/cities-dos.txt");
62 | File file = new File(fileUrl.toURI());
63 | LineIterator lines = IOUtils.lineIterator(new FileInputStream(file), Charset.defaultCharset());
64 | int lineNumber = 0;
65 | while (lines.hasNext() && lineNumber < nLinesToTest) {
66 | String[] data = lines.next().split(",");
67 | for (int i = 0; i < data.length; i++) {
68 | compareParsingResult(parser, data[i]);
69 | }
70 | lineNumber ++;
71 | }
72 | }
73 |
74 | private void compareParsingResult(DoubleParser parser, String stringValue) {
75 | Object d1 = null;
76 | try {
77 | d1 = parser.parse(stringValue);
78 | } catch (Exception e) {
79 | d1 = e;
80 | }
81 | Object d2 = null;
82 | try {
83 | d2 = Double.parseDouble(stringValue);
84 | } catch (Exception e) {
85 | d2 = e;
86 | }
87 | assertEquals(d2.getClass(), d1.getClass());
88 | if (d2 instanceof Double) {
89 | assertEquals("Failed for: "+stringValue, d2, d1);
90 | }
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/decoder/ints/IntParserTest.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.decoder.ints;
2 |
3 | import static org.junit.Assert.assertEquals;
4 |
5 | import java.util.Random;
6 | import java.util.function.Function;
7 |
8 | import org.junit.experimental.theories.DataPoints;
9 | import org.junit.experimental.theories.FromDataPoints;
10 | import org.junit.experimental.theories.Theories;
11 | import org.junit.experimental.theories.Theory;
12 | import org.junit.runner.RunWith;
13 |
14 | @RunWith(Theories.class)
15 | public class IntParserTest {
16 |
17 | private static final int randomSize = 1000;
18 |
19 | private static final Random rnd = new Random();
20 |
21 | @DataPoints("validInts")
22 | public static String[] randomInts() {
23 | return rnd.ints(randomSize).mapToObj(i -> ""+i).toArray(String[]::new);
24 | }
25 |
26 | @DataPoints("validInts")
27 | public static String[] specialInts() {
28 | return new String[] {"0", "-0", "+0", "+1", Integer.MAX_VALUE+"", Integer.MIN_VALUE+""};
29 | }
30 |
31 | @DataPoints("failingInts")
32 | public static String[] specialFailingInts() {
33 | return new String[] {"X0", "-", "+", Long.MAX_VALUE+"", "", "Hello"};
34 | }
35 |
36 | private QuickIntParser parser = new QuickIntParser();
37 |
38 | @Theory
39 | public void parsersAreEquivalentOnValidInts(@FromDataPoints("validInts") String intValue) {
40 | compareParsingResult(intValue, s -> Integer.parseInt(s), s -> parser.parse(s));
41 | }
42 |
43 | @Theory
44 | public void parsersAreEquivalentOnFailingInts(@FromDataPoints("failingInts") String intValue) {
45 | compareParsingResult(intValue, s -> Integer.parseInt(s), s -> parser.parse(s));
46 | }
47 |
48 | private void compareParsingResult(String value, Function p1, Function p2) {
49 | Object v1 = null;
50 | try {
51 | v1 = p1.apply(value);
52 | } catch (Exception e) {
53 | v1 = e;
54 | }
55 | Object v2 = null;
56 | try {
57 | v2 = p2.apply(value);
58 | } catch (Exception e) {
59 | v2 = e;
60 | }
61 | assertEquals(v2.getClass(), v1.getClass());
62 | if (v2 instanceof Integer) {
63 | assertEquals(v2, v1);
64 | }
65 | }
66 | }
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/decoder/ints/LongParserTest.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.decoder.ints;
2 |
3 | import static org.junit.Assert.assertEquals;
4 |
5 | import java.util.Arrays;
6 | import java.util.Random;
7 | import java.util.function.Function;
8 |
9 | import org.junit.experimental.theories.DataPoints;
10 | import org.junit.experimental.theories.FromDataPoints;
11 | import org.junit.experimental.theories.Theories;
12 | import org.junit.experimental.theories.Theory;
13 | import org.junit.runner.RunWith;
14 |
15 | @RunWith(Theories.class)
16 | public class LongParserTest {
17 |
18 | private static final int randomSize = 1000;
19 |
20 | private static final Random rnd = new Random();
21 |
22 | @DataPoints("validLongs")
23 | public static String[] randomLongs() {
24 | return rnd.ints(randomSize).mapToObj(i -> ""+i).toArray(String[]::new);
25 | }
26 |
27 | @DataPoints("validLongs")
28 | public static String[] specialLongs() {
29 | return new String[] {"0", "-0", "+0", "+1", Long.MAX_VALUE+"", Long.MIN_VALUE+""};
30 | }
31 |
32 | @DataPoints("failingLongs")
33 | public static String[] specialFailingLongs() {
34 | return new String[] {"X0", "-", "+", Double.MAX_VALUE+"", "", "Hello"};
35 | }
36 |
37 | private QuickLongParser parser = new QuickLongParser();
38 |
39 | @Theory
40 | public void parsersAreEquivalentOnValidLongs(@FromDataPoints("validLongs") String intValue) {
41 | compareParsingResult(intValue, s -> Long.parseLong(s), s -> parser.parse(s));
42 | }
43 |
44 | @Theory
45 | public void parsersAreEquivalentOnFailingLongs(@FromDataPoints("failingLongs") String intValue) {
46 | compareParsingResult(intValue, s -> Long.parseLong(s), s -> parser.parse(s));
47 | }
48 |
49 | private void compareParsingResult(String value, Function p1, Function p2) {
50 | Object v1 = null;
51 | try {
52 | v1 = p1.apply(value);
53 | } catch (Exception e) {
54 | v1 = e;
55 | }
56 | Object v2 = null;
57 | try {
58 | v2 = p2.apply(value);
59 | } catch (Exception e) {
60 | v2 = e;
61 | }
62 | assertEquals("Value 2:"+v2+", value 1: "+v1+", source"+value+"; "+Arrays.toString(value.getBytes()), v2.getClass(), v1.getClass());
63 |
64 | if (v2 instanceof Long) {
65 | assertEquals(v2, v1);
66 | }
67 | }
68 |
69 | public static void main(String[] args) {
70 | byte[] x = new byte[] {-39, -94};
71 | long l = Long.parseLong(new String(x));
72 | System.out.println(l);
73 | }
74 | }
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/integration/CorrectnessTest.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.integration;
2 |
3 | import static org.junit.Assert.*;
4 |
5 | import java.io.File;
6 | import java.io.IOException;
7 | import java.util.List;
8 | import java.util.stream.Stream;
9 |
10 | import org.junit.Test;
11 |
12 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
13 | import uk.elementarysoftware.quickcsv.api.StandardMappers;
14 |
15 | public class CorrectnessTest {
16 |
17 | File input = new File("src/test/resources/correctness.txt");
18 |
19 | @Test
20 | @SuppressWarnings("unchecked")
21 | public void testParse() throws IOException {
22 | Stream> stream = CSVParserBuilder.aParser(StandardMappers.TO_STRING_LIST).build().parse(input);
23 | List[] rows = stream.toArray(List[]::new);
24 | assertArrayEquals(new String[] {"Year", "Make", "Model", "Description", "Price"}, rows[0].toArray(new String[0]));
25 | assertArrayEquals(new String[] {"1997", "Ford", "E350", "ac, abs, moon", "3000.00"}, rows[1].toArray(new String[0]));
26 | assertArrayEquals(new String[] {"1999", "Chevy", "Venture \"Extended Edition\"", "", "4900.00"}, rows[2].toArray(new String[0]));
27 | String separ = System.getProperty("line.separator");
28 | assertArrayEquals(new String[] {"1996", "Jeep", "Grand Cherokee", "MUST SELL!"+separ+"air, moon roof, loaded", "4799.00"}, rows[3].toArray(new String[0]));
29 | assertArrayEquals(new String[] {"1999", "Chevy", "Venture \"Extended Edition, Very Large\"", "", "5000.00"}, rows[4].toArray(new String[0]));
30 | assertArrayEquals(new String[] {"", "", "Venture \"Extended Edition\"", "", "4900.00" }, rows[5].toArray(new String[0]));
31 | }
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/integration/HttpStreamTest.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.integration;
2 |
3 | import static org.junit.Assert.assertEquals;
4 |
5 | import java.io.File;
6 | import java.net.URI;
7 | import java.util.stream.Stream;
8 |
9 | import org.apache.commons.io.FileUtils;
10 | import org.apache.http.client.methods.CloseableHttpResponse;
11 | import org.apache.http.client.methods.HttpGet;
12 | import org.apache.http.impl.client.CloseableHttpClient;
13 | import org.apache.http.impl.client.HttpClients;
14 | import org.eclipse.jetty.server.Handler;
15 | import org.eclipse.jetty.server.Server;
16 | import org.eclipse.jetty.server.handler.DefaultHandler;
17 | import org.eclipse.jetty.server.handler.HandlerList;
18 | import org.eclipse.jetty.server.handler.ResourceHandler;
19 | import org.junit.Rule;
20 | import org.junit.Test;
21 | import org.junit.rules.ExternalResource;
22 |
23 | import uk.elementarysoftware.quickcsv.api.CSVParser;
24 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
25 | import uk.elementarysoftware.quickcsv.sampledomain.City;
26 |
27 | public class HttpStreamTest {
28 |
29 | @Rule
30 | public final FileServer httpServer = new FileServer();
31 |
32 | private final File testFile = IntegrationTest.inputUnix;
33 | private final CSVParser parser = CSVParserBuilder.aParser(City.MAPPER).build();
34 |
35 | @Test
36 | public void testParseHttpResource() throws Exception {
37 | CloseableHttpClient httpclient = HttpClients.createDefault();
38 | HttpGet httpGet = new HttpGet(httpServer.getURI().resolve(testFile.getName()));
39 | CloseableHttpResponse response = httpclient.execute(httpGet);
40 |
41 | try(Stream stream = parser.parse(response.getEntity().getContent())) {
42 | assertEquals(FileUtils.readLines(testFile, "UTF-8").size(), stream.count());
43 | }
44 | }
45 |
46 | static class FileServer extends ExternalResource {
47 |
48 | private Server server;
49 |
50 | @Override
51 | protected void before() throws Throwable {
52 | server = new Server(0);
53 |
54 | ResourceHandler rh = new ResourceHandler();
55 | rh.setResourceBase("src/test/resources");
56 |
57 | HandlerList handlers = new HandlerList();
58 | handlers.setHandlers(new Handler[] { rh, new DefaultHandler() });
59 | server.setHandler(handlers);
60 |
61 | server.start();
62 | }
63 |
64 |
65 | @Override
66 | protected void after() {
67 | try {
68 | server.stop();
69 | } catch (Exception e) {
70 | //no-op
71 | }
72 | }
73 |
74 | public URI getURI() {
75 | return server.getURI();
76 | }
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/integration/IntegrationTest.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.integration;
2 |
3 | import static org.junit.Assert.*;
4 |
5 | import java.io.File;
6 | import java.util.stream.Stream;
7 |
8 | import org.junit.Test;
9 |
10 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
11 | import uk.elementarysoftware.quickcsv.parser.simple.StraightForwardParser;
12 | import uk.elementarysoftware.quickcsv.sampledomain.City;
13 |
14 | public class IntegrationTest {
15 |
16 | static final File inputDos = new File("src/test/resources/cities-dos.txt");
17 | static final File inputUnix = new File("src/test/resources/cities-unix.txt");
18 |
19 | static final int[] bufferSizesToTest = new int[] {1024, 11_111, 1_000_000};
20 |
21 |
22 | @Test
23 | public void testMultiThreaded() throws Exception {
24 | Stream s1 = new StraightForwardParser().parse(inputDos).map(City.MAPPER);
25 | Object[] expected = s1.toArray();
26 | for (int i = 0; i < bufferSizesToTest.length; i++) {
27 | Stream s2 = CSVParserBuilder.aParser(City.MAPPER).usingBufferSize(bufferSizesToTest[i]).build().parse(inputDos);
28 | assertArrayEquals(expected, s2.toArray());
29 | }
30 | }
31 |
32 | @Test
33 | public void testSingleThreaded() throws Exception {
34 | Stream s1 = new StraightForwardParser().parse(inputDos).map(City.MAPPER);
35 | Stream s2 = CSVParserBuilder.aParser(City.MAPPER).build().parse(inputDos).sequential();
36 | assertArrayEquals(s1.toArray(), s2.sequential().toArray());
37 | }
38 |
39 | @Test
40 | public void testDosVsUnix() throws Exception {
41 | Stream s1 = CSVParserBuilder.aParser(City.MAPPER).build().parse(inputUnix);
42 | Stream s2 = CSVParserBuilder.aParser(City.MAPPER).build().parse(inputDos);
43 | assertArrayEquals(s1.toArray(), s2.sequential().toArray());
44 | }
45 |
46 | }
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/manual/CityManualPerformanceTester.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.manual;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.io.InputStream;
6 |
7 | import org.apache.commons.io.FileUtils;
8 | import org.apache.commons.io.IOUtils;
9 |
10 | import uk.elementarysoftware.quickcsv.api.CSVParser;
11 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
12 | import uk.elementarysoftware.quickcsv.sampledomain.City;
13 |
14 |
15 | public class CityManualPerformanceTester {
16 | long maxSpeed = 0;
17 |
18 | public void run() throws Exception {
19 | File file = prepareFile(300);
20 | try {
21 | System.out.println("Running file of size "+(file.length() / 1024 / 1024)+ "MB");
22 | run(file, 30);
23 | } finally {
24 | file.delete();
25 | }
26 | }
27 |
28 | private void run(File source, int nRuns) throws Exception {
29 | CSVParser parser = CSVParserBuilder.aParser(City.MAPPER).build();
30 | //CSVParser parser = CSVParserBuilder.aParser(City.HeaderAwareMapper.MAPPER, City.HeaderAwareMapper.Fields.class).usingExplicitHeader("Country", "City", "AccentCity", "Region", "Population", "Latitude", "Longitude").build();//TODO add that example to docs
31 |
32 | for (int i = 0; i < nRuns; i++) {
33 | runOnce(parser, source);
34 | }
35 | }
36 |
37 | private void runOnce(CSVParser parser, File source) throws IOException {
38 | long start = System.currentTimeMillis();
39 | parser.parse(source).count();
40 | long duration = System.currentTimeMillis() - start;
41 | if (duration == 0) return;
42 | System.out.println("P2 parsed " +source.getName()+" in "+duration);
43 | long speed = source.length()/1024/duration;
44 | if (speed > maxSpeed) maxSpeed = speed;
45 | System.out.println("P2 speed: "+(source.length()/1024/duration)+" MB/s, max: "+maxSpeed);
46 |
47 | }
48 |
49 | private File prepareFile(int sizeMultiplier) throws Exception {
50 | InputStream is = getClass().getResourceAsStream("/cities-unix.txt");
51 | byte[] content = IOUtils.toByteArray(is);
52 | File result = File.createTempFile("csv", "large");
53 | for (int i = 0; i < sizeMultiplier; i++) {
54 | FileUtils.writeByteArrayToFile(result, content, true);
55 | }
56 | return result;
57 | }
58 |
59 | public static void main(String[] args) throws Exception {
60 | new CityManualPerformanceTester().run();
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/parser/ByteSliceTest.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.parser;
2 |
3 | import static org.junit.Assert.assertArrayEquals;
4 | import static org.junit.Assert.assertEquals;
5 | import static org.junit.Assert.assertNull;
6 | import static org.junit.Assert.assertTrue;
7 |
8 | import java.nio.charset.Charset;
9 | import java.util.ArrayList;
10 | import java.util.List;
11 |
12 | import org.junit.Test;
13 |
14 | import uk.elementarysoftware.quickcsv.api.ByteArraySource.ByteArrayChunk;
15 | import uk.elementarysoftware.quickcsv.api.Field;
16 | import uk.elementarysoftware.quickcsv.functional.Pair;
17 |
18 | public class ByteSliceTest {
19 |
20 | private static final String FIELDS22 = "field11,field12\nfield21,field22";
21 | private static final String FIELDS33 = "field11,field12,field13\nfield21,field22,field23\nfield31,field32,field33";
22 | private static final String QUOTED = "'field11','field12'\n'field21','field22'\n";
23 |
24 | @Test
25 | public void testSplitOnLastLineEnd() {
26 | String content = "line1\nline2\nlastline";
27 | ByteSlice slice = sliceFor(content.getBytes());
28 | assertEquals(content, slice.toString());
29 | Pair sliced = slice.splitOnLastLineEnd();
30 | assertEquals("line1\nline2\n", sliced.first.toString());
31 | assertEquals("lastline", sliced.second.toString());
32 | }
33 |
34 |
35 | @Test
36 | public void testSplitOnLastLineEndWithSkip() {
37 | String content = "line1\nline2\nlastline";
38 | ByteSlice slice = sliceFor(content.getBytes());
39 | slice.nextLine();
40 | Pair sliced = slice.splitOnLastLineEnd();
41 | assertEquals("line2\n", sliced.first.toString());
42 | assertEquals("lastline", sliced.second.toString());
43 | }
44 |
45 | @Test
46 | public void testSingleSlice() {
47 | ByteSlice slice = sliceFor(FIELDS22.getBytes());
48 | assertEquals("field11,field12", slice.currentLine());
49 | List fields = getFields(slice);
50 | assertArrayEquals(new String[] {"field11","field12","field21","field22"}, fields.stream().map(f -> f.asString()).toArray());
51 | }
52 |
53 | @Test
54 | public void testSingleSliceFieldSplitWithQuote() {
55 | ByteSlice slice = sliceFor("f1,\"f2,f2\",f3,\"f\"\"4\"".getBytes());
56 | assertEquals("f1", slice.nextField(',', '"').asString());
57 | assertEquals("f2,f2", slice.nextField(',', '"').asString());
58 | assertEquals("f3", slice.nextField(',', '"').asString());
59 | assertEquals("f\"4", slice.nextField(',', '"').asString());
60 | }
61 |
62 | @Test
63 | public void testMultiSliceQuoteSplit() {
64 | String content = "f1,\"f2,f2\",f3,\"f\"\"4\"";
65 | for (int splitIndex = 0; splitIndex < content.length(); splitIndex++) {
66 | String prefix = content.substring(0, splitIndex);
67 | String suffix = content.substring(splitIndex);
68 | ByteSlice join = ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes()));
69 | assertEquals(content, join.toString());
70 | List fields = getFieldsQuoted(join);
71 | assertArrayEquals(
72 | "Failed on split index "+splitIndex,
73 | new String[] {"f1","f2,f2","f3","f\"4"},
74 | fields.stream().map(f -> f.asString()).toArray());
75 | }
76 | }
77 |
78 | @Test
79 | public void testEmptyFieldHandling() {
80 | ByteSlice slice = sliceFor("f1,,f2".getBytes());
81 | assertEquals("f1", slice.nextField(',', '"').asString());
82 | assertEquals("", slice.nextField(',', '"').asString());
83 | assertEquals("f2", slice.nextField(',', '"').asString());
84 | assertNull(slice.nextField(',', '"'));
85 | }
86 |
87 | @Test
88 | public void testSkipSlice() {
89 | ByteSlice slice = sliceFor(FIELDS22.getBytes());
90 | slice.skipUntil(',');
91 | assertEquals("field12", slice.nextField(',').asString());
92 | }
93 |
94 | @Test
95 | public void testSkipSliceQuoted() {
96 | ByteSlice slice = sliceFor("f1,\"f2,f2\",f3".getBytes());
97 | slice.skipUntil(',', '"');
98 | slice.skipUntil(',', '"');
99 | assertEquals("f3", slice.nextField(',', '"').asString());
100 | }
101 |
102 |
103 | @Test
104 | public void testMultiSliceIteration() {
105 | String content = FIELDS22;
106 | int splitIndex = 3;
107 | String prefix = content.substring(0, splitIndex);
108 | String suffix = content.substring(splitIndex);
109 | CompositeByteSlice slice = (CompositeByteSlice) ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes()));
110 | byte[] result = new byte[slice.size()];
111 | for (int i = 0; i < result.length; i++) {
112 | result[i] = slice.currentByte();
113 | slice.nextByte();
114 | }
115 | assertEquals(FIELDS22, new String(result));
116 | }
117 |
118 | @Test
119 | public void testMultiSliceFieldSplit() {
120 | String content = FIELDS33;
121 | for (int splitIndex = 0; splitIndex < content.length(); splitIndex++) {
122 | String prefix = content.substring(0, splitIndex);
123 | String suffix = content.substring(splitIndex);
124 | ByteSlice join = ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes()));
125 | assertEquals(content, join.toString());
126 | List fields = getFields(join);
127 | assertArrayEquals(
128 | "Failed on split index "+splitIndex,
129 | new String[] {"field11","field12","field13","field21","field22","field23","field31","field32","field33"},
130 | fields.stream().map(f -> f.asString()).toArray());
131 | }
132 | }
133 |
134 | @Test
135 | public void testMultiSliceSkip() {
136 | String content = FIELDS33;
137 | for (int splitIndex = 0; splitIndex < content.length(); splitIndex++) {
138 | String prefix = content.substring(0, splitIndex);
139 | String suffix = content.substring(splitIndex);
140 | ByteSlice join = ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes()));
141 | assertTrue(join.skipUntil(','));
142 | assertEquals("field12", join.nextField(',').asString());
143 | assertTrue(join.nextLine());
144 | assertEquals("field21", join.nextField(',').asString());
145 | assertTrue(join.skipUntil(','));
146 | assertEquals("field23", join.nextField(',').asString());
147 | }
148 | }
149 |
150 | @Test
151 | public void testMultiSliceFieldSplitQuoted() {
152 | String content = QUOTED;
153 | for (int splitIndex = 0; splitIndex < content.length(); splitIndex++) {
154 | String prefix = content.substring(0, splitIndex);
155 | String suffix = content.substring(splitIndex);
156 | ByteSlice join = ByteSlice.join(sliceFor(prefix.getBytes()), sliceFor(suffix.getBytes()));
157 | assertEquals(content, join.toString());
158 | List fields = getFieldsQuoted(join, '\'');
159 | assertArrayEquals(
160 | "Failed on split index "+splitIndex,
161 | new String[] {"field11","field12","field21","field22"},
162 | fields.stream().map(f -> f.asString()).toArray());
163 | }
164 | }
165 |
166 | private ByteSlice sliceFor(byte[] bytes) {
167 | return ByteSlice.wrap(new ByteArrayChunk(bytes, bytes.length, false, (b) -> {}), Charset.defaultCharset());
168 | }
169 |
170 | private List getFields(ByteSlice bs) {
171 | List result = new ArrayList<>();
172 | while(true) {
173 | ByteArrayField f = bs.nextField(',');
174 | if (f == null) {
175 | if (!bs.nextLine()) break;
176 | } else {
177 | result.add(f.clone());
178 | }
179 | }
180 | return result;
181 | }
182 |
183 | private List getFieldsQuoted(ByteSlice bs, char quote) {
184 | List result = new ArrayList<>();
185 | while(true) {
186 | ByteArrayField f = bs.nextField(',', quote);
187 | if (f == null) {
188 | if (!bs.nextLine()) break;
189 | } else {
190 | result.add(f.clone());
191 | }
192 | }
193 | return result;
194 | }
195 |
196 | private List getFieldsQuoted(ByteSlice bs) {
197 | return getFieldsQuoted(bs, '"');
198 | }
199 | }
200 |
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/parser/CharsetHandlingTest.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.parser;
2 |
3 | import static org.junit.Assert.*;
4 |
5 | import java.io.File;
6 | import java.util.function.Function;
7 | import java.util.stream.Stream;
8 |
9 | import org.junit.Test;
10 |
11 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
12 | import uk.elementarysoftware.quickcsv.api.CSVRecordWithHeader;
13 | import uk.elementarysoftware.quickcsv.sampledomain.City;
14 |
15 | public class CharsetHandlingTest {
16 |
17 | File utf8input = new File("src/test/resources/cities-rus-utf8.txt");
18 | File cp1251input = new File("src/test/resources/cities-rus-cp1251.txt");
19 |
20 | String[] expected = new String[] {
21 | "City [city=Андора, population=0, latitude=42.5, longitude=1.5166667]",
22 | "City [city=City of London, population=0, latitude=51.514125, longitude=-0.093689]",
23 | "City [city=Харків, population=0, latitude=49.980814, longitude=36.252718]"
24 | };
25 |
26 | @Test
27 | public void testUtf8() throws Exception {
28 | Stream cities = CSVParserBuilder.aParser(EnumMapper.MAPPER, EnumMapper.RusFields.class)
29 | .usingCharset("UTF-8").build().parse(utf8input);
30 | String[] actual = cities.map(c -> c.toString()).toArray(String[]::new);
31 | assertArrayEquals(expected, actual);
32 | }
33 |
34 | @Test
35 | public void testCp1251() throws Exception {
36 | Stream cities = CSVParserBuilder.aParser(EnumMapper.MAPPER, EnumMapper.RusFields.class)
37 | .usingCharset("Cp1251").build().parse(cp1251input);
38 | String[] actual = cities.map(c -> c.toString()).toArray(String[]::new);
39 | assertArrayEquals(expected, actual);
40 | }
41 |
42 | public static class EnumMapper {
43 |
44 | enum RusFields {
45 | Latitude("Широта"),
46 | Longitude("Долгота"),
47 | AccentCity("Город"),
48 | Population("Население");
49 |
50 | private final String headerFieldName;
51 |
52 | private RusFields(String headerFieldName) {
53 | this.headerFieldName = headerFieldName;
54 | }
55 |
56 | @Override
57 | public String toString() {
58 | return headerFieldName;
59 | }
60 | }
61 |
62 | public static final Function, City> MAPPER = r -> {
63 | return new City(
64 | r.getField(RusFields.AccentCity).asString(),
65 | r.getField(RusFields.Population).asInt(),
66 | r.getField(RusFields.Latitude).asDouble(),
67 | r.getField(RusFields.Longitude).asDouble(),
68 | r.getField(RusFields.Population).asLong()
69 | );
70 | };
71 | }
72 | }
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/parser/FieldSubsetViewTest.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.parser;
2 |
3 | import static org.junit.Assert.*;
4 |
5 | import org.junit.Before;
6 | import org.junit.Test;
7 |
8 | public class FieldSubsetViewTest {
9 |
10 | enum FieldSubset {
11 | C3, C4, C1
12 | }
13 |
14 | private FieldSubsetView fs;
15 |
16 | @Before
17 | public void init() {
18 | this.fs = FieldSubsetView.forExplicitHeader(FieldSubset.class, "C1", "C2", "C3", "C4", "C5");
19 | fs.onSlice(null, null);
20 | }
21 |
22 | @Test
23 | public void testFieldIndexIsSortedAndCorrect() {
24 | assertArrayEquals(new int[] {0, 2, 3}, fs.getFieldIndexes());
25 | }
26 |
27 | @Test
28 | public void testIndexOfInSourceView() {
29 | assertEquals(1, fs.indexOfInSourceView(0));
30 | assertEquals(2, fs.indexOfInSourceView(1));
31 | assertEquals(0, fs.indexOfInSourceView(2));
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/parser/TestParsingSpecialCases.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.parser;
2 |
3 | import static org.junit.Assert.assertArrayEquals;
4 |
5 | import java.io.ByteArrayInputStream;
6 | import java.io.InputStream;
7 | import java.util.List;
8 | import java.util.stream.Collectors;
9 |
10 | import org.junit.Test;
11 |
12 | import uk.elementarysoftware.quickcsv.api.CSVParser;
13 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
14 |
15 | public class TestParsingSpecialCases {
16 |
17 | CSVParser parser =
18 | CSVParserBuilder.aParser(r -> new String[] {
19 | r.getField(Fields.A).asString(),
20 | r.getField(Fields.B).asString(),
21 | r.getField(Fields.C).asString()
22 | }, Fields.class).build();
23 |
24 | @Test
25 | public void testLineEndsWithEmptyField() {
26 | InputStream csv = new ByteArrayInputStream("A,B,C\na,,".getBytes());
27 | List result = parser.parse(csv).collect(Collectors.toList());
28 | assertArrayEquals(new String[] {"a", "", ""}, result.get(0));
29 | }
30 |
31 | @Test
32 | public void testLineEndsWithEmptyFieldQuoted() {
33 | InputStream csv = new ByteArrayInputStream("\"A\",\"B\",\"C\"\n\"a\",\"\",\"\"".getBytes());
34 | List result = parser.parse(csv).collect(Collectors.toList());
35 | assertArrayEquals(new String[] {"a", "", ""}, result.get(0));
36 | }
37 |
38 | static enum Fields {
39 | A, B, C;
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/parser/TestParsingWithHeader.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.parser;
2 |
3 | import static org.junit.Assert.*;
4 |
5 | import java.io.File;
6 | import java.io.IOException;
7 | import java.util.List;
8 | import java.util.function.Function;
9 | import java.util.stream.Collectors;
10 | import java.util.stream.Stream;
11 |
12 | import org.junit.Test;
13 |
14 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
15 | import uk.elementarysoftware.quickcsv.api.StandardMappers;
16 | import uk.elementarysoftware.quickcsv.sampledomain.City;
17 |
18 | public class TestParsingWithHeader {
19 |
20 | File input = new File("src/test/resources/cities-with-header.txt");
21 |
22 | String[] expected = new String[] {
23 | "City [city=Andorra, population=0, latitude=42.5, longitude=1.5166667]",
24 | "City [city=City of London, population=0, latitude=51.514125, longitude=-0.093689]",
25 | "City [city=Kharkiv, population=0, latitude=49.980814, longitude=36.252718]"
26 | };
27 |
28 | @Test
29 | public void testSequential() throws Exception {
30 | Stream cities = CSVParserBuilder.aParser(ignoreErrors(City.MAPPER))
31 | .build().parse(input).sequential();
32 | String[] actual = cities.filter(c -> c != null).map(c -> c.toString()).toArray(String[]::new);
33 | assertArrayEquals(expected, actual);
34 | }
35 |
36 | @Test
37 | public void testSequentialWithEnumApi() throws Exception {
38 | Stream cities = CSVParserBuilder.aParser(City.HeaderAwareMapper.MAPPER, City.HeaderAwareMapper.Fields.class)
39 | .build().parse(input).sequential();
40 | String[] actual = cities.map(c -> c.toString()).toArray(String[]::new);
41 | assertArrayEquals(expected, actual);
42 | }
43 |
44 | @Test
45 | public void testSequentialWithEnumApiWithFirstColumn() throws Exception {
46 | Stream cities = CSVParserBuilder.aParser(City.HeaderAwareMapper2.MAPPER, City.HeaderAwareMapper2.Fields.class)
47 | .build().parse(input).sequential();
48 | String[] actual = cities.map(c -> c.toString()).toArray(String[]::new);
49 | assertEquals(3, actual.length);
50 | }
51 |
52 | @Test
53 | public void testParallel() throws Exception {
54 | Stream cities = CSVParserBuilder.aParser(ignoreErrors(City.MAPPER))
55 | .build().parse(input).parallel();
56 | String[] actual = cities.filter(c -> c != null).map(c -> c.toString()).toArray(String[]::new);
57 | assertArrayEquals(expected, actual);
58 | }
59 |
60 | @Test
61 | /**
62 | * Checks that we can skip records on parallel stream. That verifies that the stream is ordered by
63 | * default and behaves normally when being copied by java's skipping stream decorator.
64 | */
65 | public void testParallelParseWithSkip() throws IOException {
66 | List> result = CSVParserBuilder.aParser(StandardMappers.TO_STRING_LIST).build()
67 | .parse(input).skip(1).collect(Collectors.toList());
68 | assertEquals(3, result.size());
69 | assertArrayEquals(new String[] {"ad","andorra","Andorra","07","","42.5","1.5166667"}, result.get(0).toArray(new String[0]));
70 | }
71 |
72 | private static Function ignoreErrors(Function f) {
73 | return t -> {
74 | try {
75 | return f.apply(t);
76 | } catch (Exception e) {
77 | return null;
78 | }
79 | };
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/parser/TestParsingWithHeaderQuoted.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.parser;
2 |
3 | import static org.junit.Assert.*;
4 |
5 | import java.io.File;
6 | import java.util.stream.Stream;
7 |
8 | import org.junit.Test;
9 |
10 | import uk.elementarysoftware.quickcsv.api.CSVParserBuilder;
11 | import uk.elementarysoftware.quickcsv.sampledomain.City;
12 |
13 | public class TestParsingWithHeaderQuoted {
14 |
15 | File input = new File("src/test/resources/cities-with-header-quoted.txt");
16 |
17 | String[] expected = new String[] {
18 | "City [city=Andorra, population=0, latitude=42.5, longitude=1.5166667]",
19 | "City [city=City of London, population=0, latitude=51.514125, longitude=-0.093689]",
20 | "City [city=Kharkiv, population=0, latitude=49.980814, longitude=36.252718]"
21 | };
22 |
23 |
24 | @Test
25 | public void testSequentialWithEnumApi() throws Exception {
26 | Stream cities = CSVParserBuilder.aParser(City.HeaderAwareMapper.MAPPER, City.HeaderAwareMapper.Fields.class)
27 | .usingSeparatorWithQuote(',', '"')
28 | .build().parse(input).sequential();
29 | String[] actual = cities.map(c -> c.toString()).toArray(String[]::new);
30 | assertArrayEquals(expected, actual);
31 | }
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/parser/simple/StraightForwardParser.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.parser.simple;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.io.InputStream;
6 | import java.nio.ByteBuffer;
7 | import java.nio.file.Files;
8 | import java.util.function.Function;
9 | import java.util.stream.Stream;
10 |
11 | import uk.elementarysoftware.quickcsv.api.ByteArraySource;
12 | import uk.elementarysoftware.quickcsv.api.CSVParser;
13 | import uk.elementarysoftware.quickcsv.api.CSVRecord;
14 | import uk.elementarysoftware.quickcsv.api.Field;
15 |
16 | public class StraightForwardParser implements CSVParser {
17 |
18 | @Override @SuppressWarnings("resource")
19 | public Stream parse(File source) throws IOException {
20 | Stream lines = Files.lines(source.toPath());
21 | return lines.map(l -> l.split(",")).map(toCSVRecord());
22 | }
23 |
24 | private Function toCSVRecord() {
25 | return new Function() {
26 | @Override
27 | public CSVRecord apply(String[] fields) {
28 | return new SimpleCSVRecord(fields);
29 | }
30 | };
31 | }
32 |
33 | public static class SimpleCSVRecord implements CSVRecord {
34 |
35 | private String[] fields;
36 | private int index;
37 |
38 | public SimpleCSVRecord(String[] fields) {
39 | this.index = 0;
40 | this.fields = fields;
41 | }
42 |
43 | @Override
44 | public void skipField() {
45 | index++;
46 | }
47 |
48 | @Override
49 | public void skipFields(int nFields) {
50 | index+=nFields;
51 | }
52 |
53 | @Override
54 | public Field getNextField() {
55 | return new SimpleField(fields[index++]);
56 | }
57 |
58 | }
59 |
60 | public static class SimpleField implements Field {
61 | String value;
62 |
63 | public SimpleField(String value) {
64 | this.value = value;
65 | }
66 |
67 | @Override
68 | public ByteBuffer raw() {
69 | return null;
70 | }
71 |
72 | @Override
73 | public String asString() {
74 | return value;
75 | }
76 |
77 | @Override
78 | public double asDouble() {
79 | return Double.parseDouble(value);
80 | }
81 |
82 | @Override
83 | public byte asByte() {
84 | return 0;
85 | }
86 |
87 | @Override
88 | public char asChar() {
89 | return 0;
90 | }
91 |
92 | @Override
93 | public short asShort() {
94 | return 0;
95 | }
96 |
97 | @Override
98 | public int asInt() {
99 | if (isEmpty()) return 0;
100 | return Integer.parseInt(value);
101 | }
102 |
103 | @Override
104 | public long asLong() {
105 | return 0;
106 | }
107 |
108 | @Override
109 | public Field clone() {
110 | return this;
111 | }
112 |
113 | @Override
114 | public boolean isEmpty() {
115 | return value.length() == 0;
116 | }
117 |
118 | @Override
119 | public Double asBoxedDouble() {
120 | return asDouble();
121 | }
122 |
123 | @Override
124 | public Integer asBoxedInt() {
125 | return asInt();
126 | }
127 | }
128 |
129 | @Override
130 | public Stream parse(InputStream is) {
131 | throw new UnsupportedOperationException();
132 | }
133 |
134 | @Override
135 | public Stream parse(ByteArraySource bas) {
136 | throw new UnsupportedOperationException();
137 | }
138 | }
--------------------------------------------------------------------------------
/src/test/java/uk/elementarysoftware/quickcsv/sampledomain/City.java:
--------------------------------------------------------------------------------
1 | package uk.elementarysoftware.quickcsv.sampledomain;
2 |
3 | import java.util.function.Function;
4 |
5 | import uk.elementarysoftware.quickcsv.api.CSVRecord;
6 | import uk.elementarysoftware.quickcsv.api.CSVRecordWithHeader;
7 | import uk.elementarysoftware.quickcsv.api.Field;
8 |
9 | public class City {
10 |
11 | public static final Function MAPPER = City::new;
12 |
13 | public static class HeaderAwareMapper {
14 |
15 | public static enum Fields {
16 | AccentCity,
17 | Latitude,
18 | Longitude,
19 | Population
20 | }
21 |
22 | public static final Function, City> MAPPER = r -> {
23 | return new City(
24 | r.getField(Fields.AccentCity).asString(),
25 | r.getField(Fields.Population).asInt(),
26 | r.getField(Fields.Latitude).asDouble(),
27 | r.getField(Fields.Longitude).asDouble(),
28 | r.getField(Fields.Population).asLong()
29 | );
30 | };
31 | }
32 |
33 | public static class HeaderAwareMapper2 {
34 | public static enum Fields {
35 | AccentCity, Population, Latitude, Longitude, Country, City
36 | }
37 |
38 | public static final Function, City> MAPPER = r -> {
39 | return new City(
40 | r.getField(Fields.City).asString(),
41 | r.getField(Fields.Population).asInt(),
42 | r.getField(Fields.Latitude).asDouble(),
43 | r.getField(Fields.Longitude).asDouble(),
44 | r.getField(Fields.Population).asLong()
45 | );
46 | };
47 | }
48 |
49 | private static final int CITY_INDEX = 2;
50 |
51 | private final String city;
52 | private final int population;
53 | private final double latitude;
54 | private final double longitude;
55 | private final long populationL;
56 |
57 | public City(CSVRecord r) {
58 | r.skipFields(CITY_INDEX);
59 | this.city = r.getNextField().asString();
60 | r.skipField();
61 | Field popField = r.getNextField();
62 | this.population = popField.asInt();
63 | this.populationL = popField.asLong();
64 | this.latitude = r.getNextField().asDouble();
65 | this.longitude = r.getNextField().asDouble();
66 | }
67 |
68 | public City(String city, int population, double latitude, double longitude, long populationL) {
69 | this.city = city;
70 | this.population = population;
71 | this.latitude = latitude;
72 | this.longitude = longitude;
73 | this.populationL = populationL;
74 | }
75 |
76 | public String getCity() {
77 | return city;
78 | }
79 |
80 | public int getPopulation() {
81 | return population;
82 | }
83 |
84 | public double getLatitude() {
85 | return latitude;
86 | }
87 |
88 | public double getLongitude() {
89 | return longitude;
90 | }
91 |
92 | public long getPopulationL() {
93 | return populationL;
94 | }
95 |
96 | @Override
97 | public int hashCode() {
98 | final int prime = 31;
99 | int result = 1;
100 | result = prime * result + ((city == null) ? 0 : city.hashCode());
101 | long temp;
102 | temp = Double.doubleToLongBits(latitude);
103 | result = prime * result + (int) (temp ^ (temp >>> 32));
104 | temp = Double.doubleToLongBits(longitude);
105 | result = prime * result + (int) (temp ^ (temp >>> 32));
106 | result = prime * result + population;
107 | return result;
108 | }
109 |
110 | @Override
111 | public boolean equals(Object obj) {
112 | if (this == obj)
113 | return true;
114 | if (obj == null)
115 | return false;
116 | if (getClass() != obj.getClass())
117 | return false;
118 | City other = (City) obj;
119 | if (city == null) {
120 | if (other.city != null)
121 | return false;
122 | } else if (!city.equals(other.city))
123 | return false;
124 | if (Double.doubleToLongBits(latitude) != Double.doubleToLongBits(other.latitude))
125 | return false;
126 | if (Double.doubleToLongBits(longitude) != Double.doubleToLongBits(other.longitude))
127 | return false;
128 | if (population != other.population)
129 | return false;
130 | return true;
131 | }
132 |
133 | @Override
134 | public String toString() {
135 | return "City [city=" + city + ", population=" + population + ", latitude=" + latitude + ", longitude=" + longitude + "]";
136 | }
137 |
138 | }
139 |
--------------------------------------------------------------------------------
/src/test/resources/cities-rus-cp1251.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/titorenko/quick-csv-streamer/cc11f6e9db6df4f3aac57ca72c4176501667f41d/src/test/resources/cities-rus-cp1251.txt
--------------------------------------------------------------------------------
/src/test/resources/cities-rus-utf8.txt:
--------------------------------------------------------------------------------
1 | Страна,Код города,Город,Регион,Население,Широта,Долгота
2 | ad,andorra,Андора,07,,42.5,1.5166667
3 | gb,city of london,City of London,H9,,51.514125,-.093689
4 | ua,kharkiv,Харків,07,,49.980814,36.252718
--------------------------------------------------------------------------------
/src/test/resources/cities-with-header-quoted.txt:
--------------------------------------------------------------------------------
1 | "Country","City","AccentCity","Region","Population","Latitude","Longitude"
2 | "ad","andorra","Andorra","07","","42.5","1.5166667"
3 | "gb","city of london","City of London","H9","","51.514125","-.093689"
4 | "ua","kharkiv","Kharkiv","07","","49.980814","36.252718"
--------------------------------------------------------------------------------
/src/test/resources/cities-with-header.txt:
--------------------------------------------------------------------------------
1 | Country,City,AccentCity,Region,Population,Latitude,Longitude
2 | ad,andorra,Andorra,07,,42.5,1.5166667
3 | gb,city of london,City of London,H9,,51.514125,-.093689
4 | ua,kharkiv,Kharkiv,07,,49.980814,36.252718
--------------------------------------------------------------------------------
/src/test/resources/correctness.txt:
--------------------------------------------------------------------------------
1 | Year,Make,Model,Description,Price
2 | 1997,Ford,E350,"ac, abs, moon",3000.00
3 | 1999,Chevy,"Venture ""Extended Edition""","",4900.00
4 | 1996,Jeep,Grand Cherokee,"MUST SELL!
5 | air, moon roof, loaded",4799.00
6 | 1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
7 | ,,"Venture ""Extended Edition""","",4900.00
--------------------------------------------------------------------------------