├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.rst
├── docs
    ├── EXAMPLES.rst
    └── HISTORY.rst
├── es2csv.py
├── es2csv_cli.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
    ├── es_data
        ├── docs.json
        └── docs_with_unicode.json
    ├── run_tests_in_docker.sh
    ├── smoke.bats
    ├── test.sh
    ├── test_env.dockerfile
    └── test_env_2.x.dockerfile


/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | build
3 | *.egg-info
4 | *.pyc
5 | *.*~
6 | .DS_Store
7 | *.sublime-*
8 | out.csv
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2015 Taras Layshchuk
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include docs/HISTORY.rst
2 | include docs/EXAMPLES.rst
3 | include requirements.txt
4 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | ======
 2 | es2csv
 3 | ======
 4 | 
 5 | A CLI tool for exporting data from Elasticsearch into a CSV file
 6 | ----------------------------------------------------------------
 7 | 
 8 | Command line utility, written in Python, for querying Elasticsearch in Lucene query syntax or Query DSL syntax and exporting result as documents into a CSV file. This tool can query bulk docs in multiple indices and get only selected fields, this reduces query execution time.
 9 | 
10 | Quick Look Demo
11 | ---------------
12 | .. figure:: https://cloud.githubusercontent.com/assets/7491121/12016825/59eb5f82-ad58-11e5-81eb-871a49e39c37.gif
13 | 
14 | Requirements
15 | ------------
16 | | This tool should be used with Elasticsearch 5.x version, for older version please check `2.x release <https://github.com/taraslayshchuk/es2csv/tree/2.x>`_.
17 | | You also need `Python 2.7.x <https://www.python.org/downloads/>`_ and `pip <https://pip.pypa.io/en/stable/installing/>`_.
18 | 
19 | Installation
20 | ------------
21 | 
22 | From source:
23 | 
24 | .. code-block:: bash
25 | 
26 |     $ pip install git+https://github.com/taraslayshchuk/es2csv.git
27 | 
28 | From pip:
29 | 
30 | .. code-block:: bash
31 | 
32 |     $ pip install es2csv
33 | 
34 | Usage
35 | -----
36 | .. code-block:: bash
37 | 
38 |  $ es2csv [-h] -q QUERY [-u URL] [-a AUTH] [-i INDEX [INDEX ...]]
39 |           [-D DOC_TYPE [DOC_TYPE ...]] [-t TAGS [TAGS ...]] -o FILE
40 |           [-f FIELDS [FIELDS ...]] [-S FIELDS [FIELDS ...]] [-d DELIMITER]
41 |           [-m INTEGER] [-s INTEGER] [-k] [-r] [-e] [--verify-certs]
42 |           [--ca-certs CA_CERTS] [--client-cert CLIENT_CERT]
43 |           [--client-key CLIENT_KEY] [-v] [--debug]
44 | 
45 |  Arguments:
46 |   -q, --query QUERY                        Query string in Lucene syntax.               [required]
47 |   -o, --output-file FILE                   CSV file location.                           [required]
48 |   -u, --url URL                            Elasticsearch host URL. Default is http://localhost:9200.
49 |   -a, --auth                               Elasticsearch basic authentication in the form of username:password.
50 |   -i, --index-prefixes INDEX [INDEX ...]   Index name prefix(es). Default is ['logstash-*'].
51 |   -D, --doc-types DOC_TYPE [DOC_TYPE ...]  Document type(s).
52 |   -t, --tags TAGS [TAGS ...]               Query tags.
53 |   -f, --fields FIELDS [FIELDS ...]         List of selected fields in output. Default is ['_all'].
54 |   -S, --sort FIELDS [FIELDS ...]           List of <field>:<direction> pairs to sort on. Default is [].
55 |   -d, --delimiter DELIMITER                Delimiter to use in CSV file. Default is ",".
56 |   -m, --max INTEGER                        Maximum number of results to return. Default is 0.
57 |   -s, --scroll-size INTEGER                Scroll size for each batch of results. Default is 100.
58 |   -k, --kibana-nested                      Format nested fields in Kibana style.
59 |   -r, --raw-query                          Switch query format in the Query DSL.
60 |   -e, --meta-fields                        Add meta-fields in output.
61 |   --verify-certs                           Verify SSL certificates. Default is False.
62 |   --ca-certs CA_CERTS                      Location of CA bundle.
63 |   --client-cert CLIENT_CERT                Location of Client Auth cert.
64 |   --client-key CLIENT_KEY                  Location of Client Cert Key.
65 |   -v, --version                            Show version and exit.
66 |   --debug                                  Debug mode on.
67 |   -h, --help                               show this help message and exit
68 | 
69 | [ `Usage Examples <./docs/EXAMPLES.rst>`_ | `Release Changelog <./docs/HISTORY.rst>`_ ]
70 | 


--------------------------------------------------------------------------------
/docs/EXAMPLES.rst:
--------------------------------------------------------------------------------
  1 | =========
  2 | Arguments
  3 | =========
  4 | 
  5 | ============================================================  ==================================================================== 
  6 |                          Argument                                                        Description 
  7 | ============================================================  ==================================================================== 
  8 | `-q, --query <#query>`_ QUERY                                 Query string in Lucene syntax.               [required]
  9 | `-o, --output-file <#output-file>`_ FILE                      CSV file location.                           [required]
 10 | `-u, --url <#url>`_ URL                                       Elasticsearch host URL. Default is "http://localhost:9200".
 11 | `-a, --auth <#auth>`_                                         Elasticsearch basic authentication in the form of username:password.
 12 | `-i, --index-prefixes <#index-prefixes>`_ INDEX [INDEX ...]   Index name prefix(es). Default is ['logstash-\*'].
 13 | `-D, --doc-types <#doc-types>`_ DOC_TYPE [DOC_TYPE ...]       Document type(s).
 14 | `-t, --tags <#tags>`_ TAGS [TAGS ...]                         Query tags.
 15 | `-f, --fields <#fields>`_ FIELDS [FIELDS ...]                 List of selected fields in output. Default is ['_all'].
 16 | `-s, --sort <#sort>`_ FIELDS [FIELDS ...]                     List of <field>:<direction> pairs to sort on. Default is [].
 17 | `-d, --delimiter <#delimiter>`_ DELIMITER                     Delimiter to use in CSV file. Default is ",".
 18 | `-m, --max <#max>`_ INTEGER                                   Maximum number of results to return. Default is 0.
 19 | -s, --scroll-size INTEGER                                     Scroll size for each batch of results. Default is 100.
 20 | `-k, --kibana-nested <#kibana-nested>`_                       Format nested fields in Kibana style.
 21 | `-r, --raw-query <#raw-query>`_                               Switch query format in the Query DSL.
 22 | `-e, --meta-fields <#meta-fields>`_                           Add meta-fields in output.
 23 | `--verify-certs <#verify-certs>`_                             Verify SSL certificates. Default is False.
 24 | `--ca-certs CA_CERTS <#ca-certs>`_                            Location of CA bundle.
 25 | --client-cert CLIENT_CERT                                     Location of Client Auth cert.
 26 | --client-key CLIENT_KEY                                       Location of Client Cert Key.
 27 | -v, --version                                                 Show version and exit.
 28 | --debug                                                       Debug mode on.
 29 | -h, --help                                                    show this help message and exit
 30 | ============================================================  ==================================================================== 
 31 | 
 32 | ========
 33 | Examples
 34 | ========
 35 | 
 36 | query
 37 | -----
 38 | Searching on http://localhost:9200, by default
 39 | 
 40 | .. code-block:: bash
 41 | 
 42 |   $ es2csv -q 'user: John' -o database.csv
 43 | 
 44 | output-file
 45 | -----------
 46 | Save to my_database.csv file
 47 | 
 48 | .. code-block:: bash
 49 | 
 50 |   $ es2csv -q 'user: John' -o my_database.csv
 51 | 
 52 | url
 53 | ---
 54 | On custom Elasticsearch host
 55 | 
 56 | .. code-block:: bash
 57 | 
 58 |   $ es2csv -u my.cool.host.com:9200 -q 'user: John' -o database.csv
 59 | 
 60 | You are using secure Elasticsearch with nginx? No problem!
 61 | 
 62 | .. code-block:: bash
 63 | 
 64 |   $ es2csv -u http://my.cool.host.com/es/ -q 'user: John' -o database.csv
 65 | 
 66 | Not default port?
 67 | 
 68 | .. code-block:: bash
 69 | 
 70 |   $ es2csv -u my.cool.host.com:6666/es/ -q 'user: John' -o database.csv
 71 | 
 72 | auth
 73 | ----
 74 | With Authorization
 75 | 
 76 | .. code-block:: bash
 77 | 
 78 |   $ es2csv -u http://login:password@my.cool.host.com:6666/es/ -q 'user: John' -o database.csv
 79 | 
 80 | With explicit Authorization
 81 | 
 82 | .. code-block:: bash
 83 | 
 84 |   $ es2csv -a login:password -u http://my.cool.host.com:6666/es/ -q 'user: John' -o database.csv
 85 | 
 86 | index-prefixes
 87 | --------------
 88 | Specifying index
 89 | 
 90 | .. code-block:: bash
 91 | 
 92 |   $ es2csv -i logstash-2015-07-07 -q 'user: John' -o database.csv
 93 | 
 94 | More indexes
 95 | 
 96 | .. code-block:: bash
 97 | 
 98 |   $ es2csv -i logstash-2015-07-07 logstash-2015-08-08 -q 'user: John' -o database.csv
 99 | 
100 | Or index mask
101 | 
102 | .. code-block:: bash
103 | 
104 |   $ es2csv -i logstash-2015-* -q 'user: John' -o database.csv
105 | 
106 | And now together
107 | 
108 | .. code-block:: bash
109 | 
110 |   $ es2csv -i logstash-2015-01-0* logstash-2015-01-10 -q 'user: John' -o database.csv
111 | 
112 | Collecting all data on all indices
113 | 
114 | .. code-block:: bash
115 | 
116 |   $ es2csv -i _all -q '*' -o database.csv
117 | 
118 | doc-types
119 | ---------
120 | Specifying document type
121 | 
122 | .. code-block:: bash
123 | 
124 |   $ es2csv -D log -i _all -q '*' -o database.csv
125 | 
126 | tags
127 | ----
128 | With tag
129 | 
130 | .. code-block:: bash
131 | 
132 |   $ es2csv -t dev -q 'user: John' -o database.csv
133 | 
134 | More tags
135 | 
136 | .. code-block:: bash
137 | 
138 |   $ es2csv -t dev prod -q 'user: John' -o database.csv
139 | 
140 | fields
141 | ------
142 | Selecting some fields, what you are interesting in, if you don't need all of them (query run faster)
143 | 
144 | .. code-block:: bash
145 | 
146 |   $ es2csv -f host status date -q 'user: John' -o database.csv
147 | 
148 | Or field mask
149 | 
150 | .. code-block:: bash
151 | 
152 |   $ es2csv -f 'ho*' 'st*us' '*ate' -q 'user: John' -o database.csv
153 | 
154 | Selecting all fields, by default
155 | 
156 | .. code-block:: bash
157 | 
158 |   $ es2csv -f _all -q 'user: John' -o database.csv
159 | 
160 | Selecting nested fields
161 | 
162 | .. code-block:: bash
163 | 
164 |   $ es2csv -f comments.comment comments.date comments.name -q '*' -i twitter -o database.csv
165 | 
166 | sort
167 | ----
168 | Sorting by fields, in order what you are interesting in, could contains only field name (will be sorted in ascending order)
169 | 
170 | .. code-block:: bash
171 | 
172 |   $ es2csv -S key -q '*' -o database.csv
173 | 
174 | Or field pair: field name and direction (desc or asc)
175 | 
176 | .. code-block:: bash
177 | 
178 |   $ es2csv -S status:desc -q '*' -o database.csv
179 | 
180 | Using multiple pairs
181 | 
182 | .. code-block:: bash
183 | 
184 |   $ es2csv -S key:desc status:asc -q '*' -o database.csv
185 | 
186 | Selecting some field(s), but sorting by other(s)
187 | 
188 | .. code-block:: bash
189 | 
190 |   $ es2csv -S key -f user -q '*' -o database.csv
191 | 
192 | delimiter
193 | ---------
194 | Changing column delimiter in CSV file, by default ','
195 | 
196 | .. code-block:: bash
197 | 
198 |   $ es2csv -d ';' -q '*' -i twitter -o database.csv
199 | 
200 | max
201 | ---
202 | Max results count
203 | 
204 | .. code-block:: bash
205 | 
206 |   $ es2csv -m 6283185 -q '*' -i twitter -o database.csv
207 | 
208 | Retrieve 2000 results in just 2 requests (two scrolls 1000 each):
209 | 
210 | .. code-block:: bash
211 | 
212 |   $ es2csv -m 2000 -s 1000 -q '*' -i twitter -o database.csv
213 | 
214 | kibana-nested
215 | -------------
216 | Changing nested columns output format to Kibana style like
217 | 
218 | .. code-block:: bash
219 | 
220 |   $ es2csv -k -q '*' -i twitter -o database.csv
221 | 
222 | An JSON document example
223 | 
224 | .. code-block:: json
225 | 
226 |   {
227 |     "title": "Nest eggs",
228 |     "body":  "Making your money work...",
229 |     "tags":  [ "cash", "shares" ],
230 |     "comments": [ 
231 |       {
232 |         "name":    "John Smith",
233 |         "comment": "Great article",
234 |         "age":     28,
235 |         "stars":   4,
236 |         "date":    "2014-09-01"
237 |       },
238 |       {
239 |         "name":    "Alice White",
240 |         "comment": "More like this please",
241 |         "age":     31,
242 |         "stars":   5,
243 |         "date":    "2014-10-22"
244 |       }
245 |     ]
246 |   }
247 | 
248 | A CSV file in Kibana style format
249 | 
250 | .. code-block:: csv
251 | 
252 |   body,comments.age,comments.comment,comments.date,comments.name,comments.stars,tags,title
253 |   Making your money work...,"28,31","Great article,More like this please","2014-09-01,2014-10-22","John Smith,Alice White","4,5","cash,shares",Nest eggs
254 | 
255 | A CSV file in default format
256 | 
257 | .. code-block:: csv
258 | 
259 |   body,comments.0.age,comments.0.comment,comments.0.date,comments.0.name,comments.0.stars,comments.1.age,comments.1.comment,comments.1.date,comments.1.name,comments.1.stars,tags.0,tags.1,title
260 |   Making your money work...,28,Great article,2014-09-01,John Smith,4,31,More like this please,2014-10-22,Alice White,5,cash,shares,Nest eggs
261 | 
262 | raw-query
263 | ---------
264 | Query DSL syntax
265 | 
266 | .. code-block:: bash
267 | 
268 |   $ es2csv -r -q '{"query": {"match": {"user": "John"}}}' -o database.csv
269 | 
270 | Very long queries can be read from file
271 | 
272 | .. code-block:: bash
273 | 
274 |   $ es2csv -r -q @'~/query string file.json' -o database.csv
275 | 
276 | meta-fields
277 | -----------
278 | Selecting meta-fields: _id, _index, _score, _type
279 | 
280 | .. code-block:: bash
281 | 
282 |   $ es2csv -e -f _all -q 'user: John' -o database.csv
283 | 
284 | verify-certs
285 | ------------
286 | With enabled SSL certificate verification (off by default)
287 | 
288 | .. code-block:: bash
289 | 
290 |   $ es2csv --verify-certs -u https://my.cool.host.com/es/ -q 'user: John' -o database.csv
291 | 
292 | ca-certs
293 | --------
294 | With your own certificate authority bundle
295 | 
296 | .. code-block:: bash
297 | 
298 |   $ es2csv --ca-certs '/path/to/your/ca_bundle' --verify-certs -u https://host.com -q '*' -o out.csv
299 | 


--------------------------------------------------------------------------------
/docs/HISTORY.rst:
--------------------------------------------------------------------------------
 1 | .. :changelog:
 2 | 
 3 | Release Changelog
 4 | =================
 5 | 
 6 | 5.5.2 (2018-03-21)
 7 | ------------------
 8 | - Fixed encoding in field name to UTF-8. (Issue `#35 <https://github.com/taraslayshchuk/es2csv/issues/35>`_)
 9 | - Added --sort(-S) argument for sorting data by selected field. (Issue `#41 <https://github.com/taraslayshchuk/es2csv/issues/41>`_)
10 | - Added requirement for version of python 2.7.*. (Issue `#8 <https://github.com/taraslayshchuk/es2csv/issues/8>`_, `#12 <https://github.com/taraslayshchuk/es2csv/issues/12>`_, `#20 <https://github.com/taraslayshchuk/es2csv/issues/20>`_, `#29 <https://github.com/taraslayshchuk/es2csv/issues/29>`_, `#33 <https://github.com/taraslayshchuk/es2csv/issues/33>`_ and `#38 <https://github.com/taraslayshchuk/es2csv/issues/38>`_)
11 | - Update documentation with examples.
12 | - Updating version elasticsearch-py to 5.5.*.
13 | 
14 | 5.2.1 (2017-04-02)
15 | ------------------
16 | - Added --verify-certs, --ca-certs, --client-cert, --client-key arguments for SSL configuration. (Issue `#11 <https://github.com/taraslayshchuk/es2csv/issues/11>`_ and `#24 <https://github.com/taraslayshchuk/es2csv/issues/24>`_, Pull `#22 <https://github.com/taraslayshchuk/es2csv/pull/22>`_)
17 | - Added --scroll_size(-s) argument to specify the scroll size of requests. (Pull `#27 <https://github.com/taraslayshchuk/es2csv/pull/27>`_)
18 | 
19 | 5.2.0 (2017-02-16)
20 | ------------------
21 | - Updating version elasticsearch-py to 5.2.* and added support of Elasticsearch 5. (Issue `#19 <https://github.com/taraslayshchuk/es2csv/issues/19>`_)
22 | 
23 | 2.4.3 (2017-02-15)
24 | ------------------
25 | - Update doc according to wildcard support in fields naming.
26 | - Added support of old version pip. (Issue `#16 <https://github.com/taraslayshchuk/es2csv/issues/16>`_)
27 | 
28 | 2.4.2 (2017-02-14)
29 | ------------------
30 | - Added wildcard support in fields naming.
31 | - Removed column sorting. (Issue `#21 <https://github.com/taraslayshchuk/es2csv/issues/21>`_)
32 | 
33 | 2.4.1 (2016-11-10)
34 | ------------------
35 | - Added --auth(-a) argument for Elasticsearch basic authentication. (Pull `#17 <https://github.com/taraslayshchuk/es2csv/pull/17>`_)
36 | - Added --doc_types(-D) argument for specifying document type. (Pull `#13 <https://github.com/taraslayshchuk/es2csv/pull/13>`_)
37 | 
38 | 2.4.0 (2016-10-26)
39 | ------------------
40 | - Added JSON validation for raw query. (Issue `#7 <https://github.com/taraslayshchuk/es2csv/issues/7>`_)
41 | - Added checks to exclude hangs during connection issues. (Issue `#9 <https://github.com/taraslayshchuk/es2csv/issues/9>`_)
42 | - Updating version elasticsearch-py to 2.4.0 and freeze this dependence according to mask 2.4.*. (Issue `#14 <https://github.com/taraslayshchuk/es2csv/issues/14>`_)
43 | - Updating version progressbar2 to fix issue with visibility.
44 | 
45 | 1.0.3 (2016-06-12)
46 | ------------------
47 | - Added option to read query string from file --query(-q) @'~/filename.json'. (Issue `#5 <https://github.com/taraslayshchuk/es2csv/issues/5>`_)
48 | - Added --meta_fields(-e) argument for selecting meta-fields: _id, _index, _score, _type. (Issue `#6 <https://github.com/taraslayshchuk/es2csv/issues/6>`_)
49 | - Updating version elasticsearch-py to 2.3.0.
50 | 
51 | 1.0.2 (2016-04-12)
52 | ------------------
53 | - Added --raw_query(-r) argument for using the native Query DSL format.
54 | 
55 | 1.0.1 (2016-01-22)
56 | ------------------
57 | - Fixed support elasticsearch-1.4.0.
58 | - Added --version argument.
59 | - Added history changelog.
60 | 
61 | 1.0.0.dev1 (2016-01-04)
62 | -----------------------
63 | - Fixed encoding in CSV to UTF-8. (Issue `#3 <https://github.com/taraslayshchuk/es2csv/issues/3>`_, Pull `#1 <https://github.com/taraslayshchuk/es2csv/pull/1>`_)
64 | - Added better progressbar unit names. (Pull `#2 <https://github.com/taraslayshchuk/es2csv/pull/2>`_)
65 | - Added pip installation instruction.
66 | 
67 | 1.0.0.dev0 (2015-12-25)
68 | -----------------------
69 | - Initial registration.
70 | - Added first dev-release on github.
71 | - Added first release on PyPI.
72 | 


--------------------------------------------------------------------------------
/es2csv.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import json
  4 | import codecs
  5 | import elasticsearch
  6 | import progressbar
  7 | from backports import csv
  8 | from functools import wraps
  9 | 
 10 | 
 11 | FLUSH_BUFFER = 1000  # Chunk of docs to flush in temp file
 12 | CONNECTION_TIMEOUT = 120
 13 | TIMES_TO_TRY = 3
 14 | RETRY_DELAY = 60
 15 | META_FIELDS = [u'_id', u'_index', u'_score', u'_type']
 16 | 
 17 | 
 18 | # Retry decorator for functions with exceptions
 19 | def retry(ExceptionToCheck, tries=TIMES_TO_TRY, delay=RETRY_DELAY):
 20 |     def deco_retry(f):
 21 |         @wraps(f)
 22 |         def f_retry(*args, **kwargs):
 23 |             mtries = tries
 24 |             while mtries > 0:
 25 |                 try:
 26 |                     return f(*args, **kwargs)
 27 |                 except ExceptionToCheck as e:
 28 |                     print(e)
 29 |                     print('Retrying in {} seconds ...'.format(delay))
 30 |                     time.sleep(delay)
 31 |                     mtries -= 1
 32 |                 else:
 33 |                     print('Done.')
 34 |             try:
 35 |                 return f(*args, **kwargs)
 36 |             except ExceptionToCheck as e:
 37 |                 print('Fatal Error: {}'.format(e))
 38 |                 exit(1)
 39 | 
 40 |         return f_retry
 41 | 
 42 |     return deco_retry
 43 | 
 44 | 
 45 | class Es2csv:
 46 | 
 47 |     def __init__(self, opts):
 48 |         self.opts = opts
 49 | 
 50 |         self.num_results = 0
 51 |         self.scroll_ids = []
 52 |         self.scroll_time = '30m'
 53 | 
 54 |         self.csv_headers = list(META_FIELDS) if self.opts.meta_fields else []
 55 |         self.tmp_file = '{}.tmp'.format(opts.output_file)
 56 | 
 57 |     @retry(elasticsearch.exceptions.ConnectionError, tries=TIMES_TO_TRY)
 58 |     def create_connection(self):
 59 |         es = elasticsearch.Elasticsearch(self.opts.url, timeout=CONNECTION_TIMEOUT, http_auth=self.opts.auth,
 60 |                                          verify_certs=self.opts.verify_certs, ca_certs=self.opts.ca_certs,
 61 |                                          client_cert=self.opts.client_cert, client_key=self.opts.client_key)
 62 |         es.cluster.health()
 63 |         self.es_conn = es
 64 | 
 65 |     @retry(elasticsearch.exceptions.ConnectionError, tries=TIMES_TO_TRY)
 66 |     def check_indexes(self):
 67 |         indexes = self.opts.index_prefixes
 68 |         if '_all' in indexes:
 69 |             indexes = ['_all']
 70 |         else:
 71 |             indexes = [index for index in indexes if self.es_conn.indices.exists(index)]
 72 |             if not indexes:
 73 |                 print('Any of index(es) {} does not exist in {}.'.format(', '.join(self.opts.index_prefixes), self.opts.url))
 74 |                 exit(1)
 75 |         self.opts.index_prefixes = indexes
 76 | 
 77 |     @retry(elasticsearch.exceptions.ConnectionError, tries=TIMES_TO_TRY)
 78 |     def search_query(self):
 79 |         @retry(elasticsearch.exceptions.ConnectionError, tries=TIMES_TO_TRY)
 80 |         def next_scroll(scroll_id):
 81 |             return self.es_conn.scroll(scroll=self.scroll_time, scroll_id=scroll_id)
 82 | 
 83 |         search_args = dict(
 84 |             index=','.join(self.opts.index_prefixes),
 85 |             sort=','.join(self.opts.sort),
 86 |             scroll=self.scroll_time,
 87 |             size=self.opts.scroll_size,
 88 |             terminate_after=self.opts.max_results
 89 |         )
 90 | 
 91 |         if self.opts.doc_types:
 92 |             search_args['doc_type'] = self.opts.doc_types
 93 | 
 94 |         if self.opts.query.startswith('@'):
 95 |             query_file = self.opts.query[1:]
 96 |             if os.path.exists(query_file):
 97 |                 with codecs.open(query_file, mode='r', encoding='utf-8') as f:
 98 |                     self.opts.query = f.read()
 99 |             else:
100 |                 print('No such file: {}.'.format(query_file))
101 |                 exit(1)
102 |         if self.opts.raw_query:
103 |             try:
104 |                 query = json.loads(self.opts.query)
105 |             except ValueError as e:
106 |                 print('Invalid JSON syntax in query. {}'.format(e))
107 |                 exit(1)
108 |             search_args['body'] = query
109 |         else:
110 |             query = self.opts.query if not self.opts.tags else '{} AND tags: ({})'.format(
111 |                 self.opts.query, ' AND '.join(self.opts.tags))
112 |             search_args['q'] = query
113 | 
114 |         if '_all' not in self.opts.fields:
115 |             search_args['_source_include'] = ','.join(self.opts.fields)
116 |             self.csv_headers.extend([unicode(field, "utf-8") for field in self.opts.fields if '*' not in field])
117 | 
118 |         if self.opts.debug_mode:
119 |             print('Using these indices: {}.'.format(', '.join(self.opts.index_prefixes)))
120 |             print('Query[{0[0]}]: {0[1]}.'.format(
121 |                 ('Query DSL', json.dumps(query, ensure_ascii=False).encode('utf8')) if self.opts.raw_query else ('Lucene', query))
122 |             )
123 |             print('Output field(s): {}.'.format(', '.join(self.opts.fields)))
124 |             print('Sorting by: {}.'.format(', '.join(self.opts.sort)))
125 | 
126 |         res = self.es_conn.search(**search_args)
127 |         self.num_results = res['hits']['total']
128 | 
129 |         print('Found {} results.'.format(self.num_results))
130 |         if self.opts.debug_mode:
131 |             print(json.dumps(res, ensure_ascii=False).encode('utf8'))
132 | 
133 |         if self.num_results > 0:
134 |             codecs.open(self.opts.output_file, mode='w', encoding='utf-8').close()
135 |             codecs.open(self.tmp_file, mode='w', encoding='utf-8').close()
136 | 
137 |             hit_list = []
138 |             total_lines = 0
139 | 
140 |             widgets = ['Run query ',
141 |                        progressbar.Bar(left='[', marker='#', right=']'),
142 |                        progressbar.FormatLabel(' [%(value)i/%(max)i] ['),
143 |                        progressbar.Percentage(),
144 |                        progressbar.FormatLabel('] [%(elapsed)s] ['),
145 |                        progressbar.ETA(), '] [',
146 |                        progressbar.FileTransferSpeed(unit='docs'), ']'
147 |                        ]
148 |             bar = progressbar.ProgressBar(widgets=widgets, maxval=self.num_results).start()
149 | 
150 |             while total_lines != self.num_results:
151 |                 if res['_scroll_id'] not in self.scroll_ids:
152 |                     self.scroll_ids.append(res['_scroll_id'])
153 | 
154 |                 if not res['hits']['hits']:
155 |                     print('Scroll[{}] expired(multiple reads?). Saving loaded data.'.format(res['_scroll_id']))
156 |                     break
157 |                 for hit in res['hits']['hits']:
158 |                     total_lines += 1
159 |                     bar.update(total_lines)
160 |                     hit_list.append(hit)
161 |                     if len(hit_list) == FLUSH_BUFFER:
162 |                         self.flush_to_file(hit_list)
163 |                         hit_list = []
164 |                     if self.opts.max_results:
165 |                         if total_lines == self.opts.max_results:
166 |                             self.flush_to_file(hit_list)
167 |                             print('Hit max result limit: {} records'.format(self.opts.max_results))
168 |                             return
169 |                 res = next_scroll(res['_scroll_id'])
170 |             self.flush_to_file(hit_list)
171 |             bar.finish()
172 | 
173 |     def flush_to_file(self, hit_list):
174 |         def to_keyvalue_pairs(source, ancestors=[], header_delimeter='.'):
175 |             def is_list(arg):
176 |                 return type(arg) is list
177 | 
178 |             def is_dict(arg):
179 |                 return type(arg) is dict
180 | 
181 |             if is_dict(source):
182 |                 for key in source.keys():
183 |                     to_keyvalue_pairs(source[key], ancestors + [key])
184 | 
185 |             elif is_list(source):
186 |                 if self.opts.kibana_nested:
187 |                     [to_keyvalue_pairs(item, ancestors) for item in source]
188 |                 else:
189 |                     [to_keyvalue_pairs(item, ancestors + [str(index)]) for index, item in enumerate(source)]
190 |             else:
191 |                 header = header_delimeter.join(ancestors)
192 |                 if header not in self.csv_headers:
193 |                     self.csv_headers.append(header)
194 |                 try:
195 |                     out[header] = '{}{}{}'.format(out[header], self.opts.delimiter, source)
196 |                 except:
197 |                     out[header] = source
198 | 
199 |         with codecs.open(self.tmp_file, mode='a', encoding='utf-8') as tmp_file:
200 |             for hit in hit_list:
201 |                 out = {field: hit[field] for field in META_FIELDS} if self.opts.meta_fields else {}
202 |                 if '_source' in hit and len(hit['_source']) > 0:
203 |                     to_keyvalue_pairs(hit['_source'])
204 |                     tmp_file.write('{}\n'.format(json.dumps(out)))
205 |         tmp_file.close()
206 | 
207 |     def write_to_csv(self):
208 |         if self.num_results > 0:
209 |             self.num_results = sum(1 for line in codecs.open(self.tmp_file, mode='r', encoding='utf-8'))
210 |             if self.num_results > 0:
211 |                 output_file = codecs.open(self.opts.output_file, mode='a', encoding='utf-8')
212 |                 csv_writer = csv.DictWriter(output_file, fieldnames=self.csv_headers)
213 |                 csv_writer.writeheader()
214 |                 timer = 0
215 |                 widgets = ['Write to csv ',
216 |                            progressbar.Bar(left='[', marker='#', right=']'),
217 |                            progressbar.FormatLabel(' [%(value)i/%(max)i] ['),
218 |                            progressbar.Percentage(),
219 |                            progressbar.FormatLabel('] [%(elapsed)s] ['),
220 |                            progressbar.ETA(), '] [',
221 |                            progressbar.FileTransferSpeed(unit='lines'), ']'
222 |                            ]
223 |                 bar = progressbar.ProgressBar(widgets=widgets, maxval=self.num_results).start()
224 | 
225 |                 for line in codecs.open(self.tmp_file, mode='r', encoding='utf-8'):
226 |                     timer += 1
227 |                     bar.update(timer)
228 |                     csv_writer.writerow(json.loads(line))
229 |                 output_file.close()
230 |                 bar.finish()
231 |             else:
232 |                 print('There is no docs with selected field(s): {}.'.format(','.join(self.opts.fields)))
233 |             os.remove(self.tmp_file)
234 | 
235 |     def clean_scroll_ids(self):
236 |         try:
237 |             self.es_conn.clear_scroll(body=','.join(self.scroll_ids))
238 |         except:
239 |             pass
240 | 


--------------------------------------------------------------------------------
/es2csv_cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | title:           A CLI tool for exporting data from Elasticsearch into a CSV file.
 4 | description:     Command line utility, written in Python, for querying Elasticsearch in Lucene query syntax or Query DSL syntax and exporting result as documents into a CSV file.
 5 | usage:           es2csv -q '*' -i _all -e -o ~/file.csv -k -m 100
 6 |                  es2csv -q '{"query": {"match_all": {}}}' -r -i _all -o ~/file.csv
 7 |                  es2csv -q @'~/long_query_file.json' -r -i _all -o ~/file.csv
 8 |                  es2csv -q '*' -i logstash-2015-01-* -f host status message -o ~/file.csv
 9 |                  es2csv -q 'host: localhost' -i logstash-2015-01-01 logstash-2015-01-02 -f host status message -o ~/file.csv
10 |                  es2csv -q 'host: localhost AND status: GET' -u http://kibana.com:80/es/ -o ~/file.csv
11 |                  es2csv -q '*' -t dev prod -u http://login:password@kibana.com:6666/es/ -o ~/file.csv
12 |                  es2csv -q '{"query": {"match_all": {}}, "filter":{"term": {"tags": "dev"}}}' -r -u http://login:password@kibana.com:6666/es/ -o ~/file.csv
13 | """
14 | import sys
15 | import argparse
16 | import es2csv
17 | 
18 | __version__ = '5.5.2'
19 | 
20 | 
21 | def main():
22 |     p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
23 |     p.add_argument('-q', '--query', dest='query', type=str, required=True, help='Query string in Lucene syntax.')
24 |     p.add_argument('-u', '--url', dest='url', default='http://localhost:9200', type=str, help='Elasticsearch host URL. Default is %(default)s.')
25 |     p.add_argument('-a', '--auth', dest='auth', type=str, required=False, help='Elasticsearch basic authentication in the form of username:password.')
26 |     p.add_argument('-i', '--index-prefixes', dest='index_prefixes', default=['logstash-*'], type=str, nargs='+', metavar='INDEX', help='Index name prefix(es). Default is %(default)s.')
27 |     p.add_argument('-D', '--doc-types', dest='doc_types', type=str, nargs='+', metavar='DOC_TYPE', help='Document type(s).')
28 |     p.add_argument('-t', '--tags', dest='tags', type=str, nargs='+', help='Query tags.')
29 |     p.add_argument('-o', '--output-file', dest='output_file', type=str, required=True, metavar='FILE', help='CSV file location.')
30 |     p.add_argument('-f', '--fields', dest='fields', default=['_all'], type=str, nargs='+', help='List of selected fields in output. Default is %(default)s.')
31 |     p.add_argument('-S', '--sort', dest='sort', default=[], type=str, nargs='+', metavar='FIELDS', help='List of <field>:<direction> pairs to sort on. Default is %(default)s.')
32 |     p.add_argument('-d', '--delimiter', dest='delimiter', default=',', type=str, help='Delimiter to use in CSV file. Default is "%(default)s".')
33 |     p.add_argument('-m', '--max', dest='max_results', default=0, type=int, metavar='INTEGER', help='Maximum number of results to return. Default is %(default)s.')
34 |     p.add_argument('-s', '--scroll-size', dest='scroll_size', default=100, type=int, metavar='INTEGER', help='Scroll size for each batch of results. Default is %(default)s.')
35 |     p.add_argument('-k', '--kibana-nested', dest='kibana_nested', action='store_true', help='Format nested fields in Kibana style.')
36 |     p.add_argument('-r', '--raw-query', dest='raw_query', action='store_true', help='Switch query format in the Query DSL.')
37 |     p.add_argument('-e', '--meta-fields', dest='meta_fields', action='store_true', help='Add meta-fields in output.')
38 |     p.add_argument('--verify-certs', dest='verify_certs', action='store_true', help='Verify SSL certificates. Default is %(default)s.')
39 |     p.add_argument('--ca-certs', dest='ca_certs', default=None, type=str, help='Location of CA bundle.')
40 |     p.add_argument('--client-cert', dest='client_cert', default=None, type=str, help='Location of Client Auth cert.')
41 |     p.add_argument('--client-key', dest='client_key', default=None, type=str, help='Location of Client Cert Key.')
42 |     p.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__, help='Show version and exit.')
43 |     p.add_argument('--debug', dest='debug_mode', action='store_true', help='Debug mode on.')
44 | 
45 |     if len(sys.argv) == 1:
46 |         p.print_help()
47 |         exit()
48 | 
49 |     opts = p.parse_args()
50 |     es = es2csv.Es2csv(opts)
51 |     es.create_connection()
52 |     es.check_indexes()
53 |     es.search_query()
54 |     es.write_to_csv()
55 |     es.clean_scroll_ids()
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     main()
60 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | elasticsearch>=5.5.2,<5.6.0
2 | progressbar2>=3.36.0,<3.37.0
3 | backports.csv>=1.0.5,<1.1.0
4 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | python-tag = py27


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import os
 3 | import re
 4 | 
 5 | from setuptools import setup
 6 | 
 7 | classifiers = [
 8 |     'Development Status :: 5 - Production/Stable',
 9 |     'Intended Audience :: Developers',
10 |     'Intended Audience :: System Administrators',
11 |     'Environment :: Console',
12 |     'License :: OSI Approved :: Apache Software License',
13 |     'Programming Language :: Python :: 2 :: Only',
14 |     'Programming Language :: Python :: 2.7',
15 |     'Topic :: System :: Systems Administration',
16 |     'Topic :: Database',
17 |     'Topic :: Text Processing',
18 |     'Topic :: Internet',
19 |     'Topic :: Utilities',
20 | ]
21 | 
22 | 
23 | def read_file(*paths):
24 |     here = os.path.dirname(os.path.abspath(__file__))
25 |     with open(os.path.join(here, *paths)) as f:
26 |         return f.read()
27 | 
28 | 
29 | src_file = read_file('es2csv_cli.py')
30 | url = 'https://github.com/taraslayshchuk/es2csv'
31 | 
32 | 
33 | def get_version():
34 |     """
35 |     Pull version from module without loading module first. This was lovingly
36 |     collected and adapted from
37 |     https://github.com/pypa/virtualenv/blob/12.1.1/setup.py#L67.
38 |     """
39 | 
40 |     version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
41 |                               src_file, re.M)
42 |     if version_match:
43 |         return version_match.group(1)
44 |     raise RuntimeError("Unable to find version string.")
45 | 
46 | 
47 | def get_description():
48 |     try:
49 |         return src_file.split('\n')[2].split(':')[1].strip()
50 |     except:
51 |         raise RuntimeError("Unable to find description string.")
52 | 
53 | 
54 | version = get_version()
55 | 
56 | with open('README.rst') as file_readme:
57 |     readme = file_readme.read()
58 |     readme = re.sub(r'.(/docs/[A-Z]+.rst)', r'%s/blob/%s\1' % (url, version), readme)
59 | 
60 | with open('docs/HISTORY.rst') as history_file:
61 |     history = history_file.read()
62 | 
63 | with open('requirements.txt') as file_requirements:
64 |     requirements = file_requirements.read().splitlines()
65 | 
66 | settings = dict()
67 | settings.update(
68 |     name='es2csv',
69 |     version=version,
70 |     description=get_description(),
71 |     long_description='%s\n\n%s' % (readme, history),
72 |     author='Taras Layshchuk',
73 |     author_email='taraslayshchuk@gmail.com',
74 |     license='Apache 2.0',
75 |     url=url,
76 |     classifiers=classifiers,
77 |     python_requires='==2.7.*',
78 |     keywords='elasticsearch export kibana es bulk csv',
79 |     py_modules=['es2csv', 'es2csv_cli'],
80 |     entry_points={
81 |         'console_scripts': [
82 |             'es2csv = es2csv_cli:main'
83 |         ]
84 |     },
85 |     install_requires=requirements,
86 | )
87 | 
88 | setup(**settings)
89 | 


--------------------------------------------------------------------------------
/tests/es_data/docs.json:
--------------------------------------------------------------------------------
 1 | {"key": 1, "email": "mirandasampson@exoteric.com", "phone": "+1 (840) 440-3835", "address": "598 Central Avenue, Columbus, Alabama, 3577", "general": {"account": {"isActive": false, "registered": "2015-11-07T06:24:24 -02:00"}, "gender": "male"}, "coords": {"latitude": -53.504984, "longitude": -99.603298 }, "isVisible": true, "tags": ["stg", "prod"]}
 2 | {"key": 2, "email": "winniepoole@snips.com", "phone": "+1 (813) 578-3298", "address": "352 Kings Hwy, Bethany, Puerto Rico, 4754", "general": {"account": {"isActive": false, "registered": "2015-11-11T10:31:21 -02:00"}, "gender": "female"}, "coords": {"latitude": 50.856949, "longitude": 63.269778 }, "isVisible": true, "tags": ["qa", "dev"]}
 3 | {"key": 3, "email": "russoblake@bostonic.com", "phone": "+1 (985) 560-3188", "address": "951 Miller Place, Irwin, District Of Columbia, 9054", "general": {"account": {"isActive": true, "registered": "2014-06-16T03:30:55 -03:00"}, "gender": "female"}, "coords": {"latitude": -7.603898, "longitude": -27.098493 }, "isVisible": false, "tags": ["dev", "qa"]}
 4 | {"key": 4, "email": "petraanthony@prowaste.com", "phone": "+1 (905) 437-2490", "address": "697 Lefferts Place, Coaldale, Alaska, 229", "general": {"account": {"isActive": true, "registered": "2016-01-27T06:38:16 -02:00"}, "gender": "female"}, "coords": {"latitude": -1.803428, "longitude": 106.846802 }, "isVisible": true, "tags": ["prod", "dev"]}
 5 | {"key": 5, "email": "hornhebert@escenta.com", "phone": "+1 (850) 473-3789", "address": "612 Denton Place, Carrizo, Missouri, 6506", "general": {"account": {"isActive": true, "registered": "2014-11-01T03:06:16 -02:00"}, "gender": "male"}, "coords": {"latitude": 23.442781, "longitude": -104.658652 }, "isVisible": false, "tags": ["dev", "stg"]}
 6 | {"key": 6, "email": "lottnoble@cuizine.com", "phone": "+1 (885) 407-3386", "address": "230 Temple Court, Smeltertown, Minnesota, 4842", "general": {"account": {"isActive": false, "registered": "2016-07-14T01:24:11 -03:00"}, "gender": "female"}, "coords": {"latitude": 45.563911, "longitude": -15.95042 }, "isVisible": true, "tags": ["prod", "qa"]}
 7 | {"key": 7, "email": "carlydennis@verton.com", "phone": "+1 (985) 423-3850", "address": "354 Cherry Street, Colton, Massachusetts, 832", "general": {"account": {"isActive": false, "registered": "2016-05-31T09:16:56 -03:00"}, "gender": "male"}, "coords": {"latitude": 63.364727, "longitude": 128.906581 }, "isVisible": true, "tags": ["qa", "stg"]}
 8 | {"key": 8, "email": "trevinoclark@sportan.com", "phone": "+1 (932) 474-2148", "address": "632 Kane Place, Morgandale, North Carolina, 5583", "general": {"account": {"isActive": false, "registered": "2014-08-09T01:10:37 -03:00"}, "gender": "female"}, "coords": {"latitude": -86.833656, "longitude": 45.665975 }, "isVisible": false, "tags": ["dev", "prod"]}
 9 | {"key": 9, "email": "kristinahoffman@voipa.com", "phone": "+1 (912) 474-3457", "address": "412 Perry Terrace, Corinne, Ohio, 6901", "general": {"account": {"isActive": false, "registered": "2015-02-22T12:52:07 -02:00"}, "gender": "male"}, "coords": {"latitude": 79.047492, "longitude": 24.924048 }, "isVisible": false, "tags": ["dev", "prod"]}
10 | {"key": 10,"email": "jaimephillips@xeronk.com", "phone": "+1 (823) 537-3055", "address": "414 Humboldt Street, Whipholt, Pennsylvania, 4066", "general": {"account": {"isActive": false, "registered": "2014-01-08T12:19:09 -02:00"}, "gender": "female"}, "coords": {"latitude": 18.303031, "longitude": 64.729812 }, "isVisible": true, "tags": ["stg", "qa"]}
11 | 


--------------------------------------------------------------------------------
/tests/es_data/docs_with_unicode.json:
--------------------------------------------------------------------------------
 1 | {"key": 1, "emåil": "mirandasampson@exoteric.com", "phone": "+1 (840) 440-3835", "address": "598 Central Avenue, Columbus, Alabama, 3577", "general": {"åccount": {"isActive": false, "registered": "2015-11-07T06:24:24 -02:00"}, "⟹": "☺️ ☹ ☝️ ✌️ ✍️ ❤️ ❣️ ☠ ♨️ ✈️ ⌛ ⌚ ♈ ♉ ♊ ♋ ♌ ♍ ♎ ♏ ♐ ♑ ♒ ♓ ☀️ ☁️ ☂️ ❄️ ⛄️ ☄ ♠️ ♥️ ♦️ ♣️ ▶️ ◀️ ☎️ ⌨ ✉️ ✏️ ✒️ ✂️ ↗️ ➡️ ↘️ ↙️ ↖️ ↕️ ↔️ ↩️ ↪️ ✡️ ☸ ☯️ ✝️ ☦ ☪ ☮ ☢ ☣ ☑️ ✔️ ✖️ ✳️ ✴️ ❇️ ‼️ ©️ ®️ ™️ Ⓜ️ ▪️ ▫️ #⃣️ *️⃣ 0⃣️ 1⃣️ 2⃣️ 3⃣️ 4⃣️ 5⃣️ 6⃣️ 7⃣️ 8⃣️ 9⃣️ ⁉️ ℹ️ ⤴️ ⤵️ ♻️ ◻️ ◼️ ◽ ◾ ☕ ⚠️ ☔ ⏏ ⬆️ ⬇️ ⬅️ ⚡ ☘ ⚓ ♿ ⚒ ⚙ ⚗ ⚖ ⚔ ⚰ ⚱ ⚜ ⚛ ⚪ ⚫ 🀄 ⭐ ⬛ ⬜ ⛑ ⛰ ⛪ ⛲ ⛺ ⛽ ⛵ ⛴ ⛔ ⛅ ⛈ ⛱ ⛄ ⚽ ⚾️ ⛳ ⛸ ⛷ ⛹ ⛏ ⛓ ⛩ ⭕ ❗ 🅿️ ❦ ♕ ♛ ♔ ♖ ♜ ☾ → ⇒ ⟹ ⇨ ⇰ ➩ ➪ ➫ ➬ ➭ ➮ ➯ ➲ ➳ ➵ ➸ ➻ ➺ ➼ ➽ ☜ ☟ ➹ ➷ ↶ ↷ ✆ ⌘ ⎋ ⏎ ⏏ ⎈ ⎌ ⍟ ❥ ツ ღ ☻", "gender": "male"}, "coords": {"latitude": -53.504984, "longitüde": -99.603298 }, "isVisible": true, "tags": ["stg", "prod"]}
 2 | {"key": 2, "emåil": "winniepoole@snips.com", "phone": "+1 (813) 578-3298", "address": "352 Kings Hwy, Bethany, Puerto Rico, 4754", "general": {"åccount": {"isActive": false, "registered": "2015-11-11T10:31:21 -02:00"}, "⟹": "☺️ ☹ ☝️ ✌️ ✍️ ❤️ ❣️ ☠ ♨️ ✈️ ⌛ ⌚ ♈ ♉ ♊ ♋ ♌ ♍ ♎ ♏ ♐ ♑ ♒ ♓ ☀️ ☁️ ☂️ ❄️ ⛄️ ☄ ♠️ ♥️ ♦️ ♣️ ▶️ ◀️ ☎️ ⌨ ✉️ ✏️ ✒️ ✂️ ↗️ ➡️ ↘️ ↙️ ↖️ ↕️ ↔️ ↩️ ↪️ ✡️ ☸ ☯️ ✝️ ☦ ☪ ☮ ☢ ☣ ☑️ ✔️ ✖️ ✳️ ✴️ ❇️ ‼️ ©️ ®️ ™️ Ⓜ️ ▪️ ▫️ #⃣️ *️⃣ 0⃣️ 1⃣️ 2⃣️ 3⃣️ 4⃣️ 5⃣️ 6⃣️ 7⃣️ 8⃣️ 9⃣️ ⁉️ ℹ️ ⤴️ ⤵️ ♻️ ◻️ ◼️ ◽ ◾ ☕ ⚠️ ☔ ⏏ ⬆️ ⬇️ ⬅️ ⚡ ☘ ⚓ ♿ ⚒ ⚙ ⚗ ⚖ ⚔ ⚰ ⚱ ⚜ ⚛ ⚪ ⚫ 🀄 ⭐ ⬛ ⬜ ⛑ ⛰ ⛪ ⛲ ⛺ ⛽ ⛵ ⛴ ⛔ ⛅ ⛈ ⛱ ⛄ ⚽ ⚾️ ⛳ ⛸ ⛷ ⛹ ⛏ ⛓ ⛩ ⭕ ❗ 🅿️ ❦ ♕ ♛ ♔ ♖ ♜ ☾ → ⇒ ⟹ ⇨ ⇰ ➩ ➪ ➫ ➬ ➭ ➮ ➯ ➲ ➳ ➵ ➸ ➻ ➺ ➼ ➽ ☜ ☟ ➹ ➷ ↶ ↷ ✆ ⌘ ⎋ ⏎ ⏏ ⎈ ⎌ ⍟ ❥ ツ ღ ☻", "gender": "female"}, "coords": {"latitude": 50.856949, "longitüde": 63.269778 }, "isVisible": true, "tags": ["qa", "dev"]}
 3 | {"key": 3, "emåil": "russoblake@bostonic.com", "phone": "+1 (985) 560-3188", "address": "951 Miller Place, Irwin, District Of Columbia, 9054", "general": {"åccount": {"isActive": true, "registered": "2014-06-16T03:30:55 -03:00"}, "⟹": "☺️ ☹ ☝️ ✌️ ✍️ ❤️ ❣️ ☠ ♨️ ✈️ ⌛ ⌚ ♈ ♉ ♊ ♋ ♌ ♍ ♎ ♏ ♐ ♑ ♒ ♓ ☀️ ☁️ ☂️ ❄️ ⛄️ ☄ ♠️ ♥️ ♦️ ♣️ ▶️ ◀️ ☎️ ⌨ ✉️ ✏️ ✒️ ✂️ ↗️ ➡️ ↘️ ↙️ ↖️ ↕️ ↔️ ↩️ ↪️ ✡️ ☸ ☯️ ✝️ ☦ ☪ ☮ ☢ ☣ ☑️ ✔️ ✖️ ✳️ ✴️ ❇️ ‼️ ©️ ®️ ™️ Ⓜ️ ▪️ ▫️ #⃣️ *️⃣ 0⃣️ 1⃣️ 2⃣️ 3⃣️ 4⃣️ 5⃣️ 6⃣️ 7⃣️ 8⃣️ 9⃣️ ⁉️ ℹ️ ⤴️ ⤵️ ♻️ ◻️ ◼️ ◽ ◾ ☕ ⚠️ ☔ ⏏ ⬆️ ⬇️ ⬅️ ⚡ ☘ ⚓ ♿ ⚒ ⚙ ⚗ ⚖ ⚔ ⚰ ⚱ ⚜ ⚛ ⚪ ⚫ 🀄 ⭐ ⬛ ⬜ ⛑ ⛰ ⛪ ⛲ ⛺ ⛽ ⛵ ⛴ ⛔ ⛅ ⛈ ⛱ ⛄ ⚽ ⚾️ ⛳ ⛸ ⛷ ⛹ ⛏ ⛓ ⛩ ⭕ ❗ 🅿️ ❦ ♕ ♛ ♔ ♖ ♜ ☾ → ⇒ ⟹ ⇨ ⇰ ➩ ➪ ➫ ➬ ➭ ➮ ➯ ➲ ➳ ➵ ➸ ➻ ➺ ➼ ➽ ☜ ☟ ➹ ➷ ↶ ↷ ✆ ⌘ ⎋ ⏎ ⏏ ⎈ ⎌ ⍟ ❥ ツ ღ ☻", "gender": "female"}, "coords": {"latitude": -7.603898, "longitüde": -27.098493 }, "isVisible": false, "tags": ["dev", "qa"]}
 4 | {"key": 4, "emåil": "petraanthony@prowaste.com", "phone": "+1 (905) 437-2490", "address": "697 Lefferts Place, Coaldale, Alaska, 229", "general": {"åccount": {"isActive": true, "registered": "2016-01-27T06:38:16 -02:00"}, "⟹": "☺️ ☹ ☝️ ✌️ ✍️ ❤️ ❣️ ☠ ♨️ ✈️ ⌛ ⌚ ♈ ♉ ♊ ♋ ♌ ♍ ♎ ♏ ♐ ♑ ♒ ♓ ☀️ ☁️ ☂️ ❄️ ⛄️ ☄ ♠️ ♥️ ♦️ ♣️ ▶️ ◀️ ☎️ ⌨ ✉️ ✏️ ✒️ ✂️ ↗️ ➡️ ↘️ ↙️ ↖️ ↕️ ↔️ ↩️ ↪️ ✡️ ☸ ☯️ ✝️ ☦ ☪ ☮ ☢ ☣ ☑️ ✔️ ✖️ ✳️ ✴️ ❇️ ‼️ ©️ ®️ ™️ Ⓜ️ ▪️ ▫️ #⃣️ *️⃣ 0⃣️ 1⃣️ 2⃣️ 3⃣️ 4⃣️ 5⃣️ 6⃣️ 7⃣️ 8⃣️ 9⃣️ ⁉️ ℹ️ ⤴️ ⤵️ ♻️ ◻️ ◼️ ◽ ◾ ☕ ⚠️ ☔ ⏏ ⬆️ ⬇️ ⬅️ ⚡ ☘ ⚓ ♿ ⚒ ⚙ ⚗ ⚖ ⚔ ⚰ ⚱ ⚜ ⚛ ⚪ ⚫ 🀄 ⭐ ⬛ ⬜ ⛑ ⛰ ⛪ ⛲ ⛺ ⛽ ⛵ ⛴ ⛔ ⛅ ⛈ ⛱ ⛄ ⚽ ⚾️ ⛳ ⛸ ⛷ ⛹ ⛏ ⛓ ⛩ ⭕ ❗ 🅿️ ❦ ♕ ♛ ♔ ♖ ♜ ☾ → ⇒ ⟹ ⇨ ⇰ ➩ ➪ ➫ ➬ ➭ ➮ ➯ ➲ ➳ ➵ ➸ ➻ ➺ ➼ ➽ ☜ ☟ ➹ ➷ ↶ ↷ ✆ ⌘ ⎋ ⏎ ⏏ ⎈ ⎌ ⍟ ❥ ツ ღ ☻", "gender": "female"}, "coords": {"latitude": -1.803428, "longitüde": 106.846802 }, "isVisible": true, "tags": ["prod", "dev"]}
 5 | {"key": 5, "emåil": "hornhebert@escenta.com", "phone": "+1 (850) 473-3789", "address": "612 Denton Place, Carrizo, Missouri, 6506", "general": {"åccount": {"isActive": true, "registered": "2014-11-01T03:06:16 -02:00"}, "⟹": "☺️ ☹ ☝️ ✌️ ✍️ ❤️ ❣️ ☠ ♨️ ✈️ ⌛ ⌚ ♈ ♉ ♊ ♋ ♌ ♍ ♎ ♏ ♐ ♑ ♒ ♓ ☀️ ☁️ ☂️ ❄️ ⛄️ ☄ ♠️ ♥️ ♦️ ♣️ ▶️ ◀️ ☎️ ⌨ ✉️ ✏️ ✒️ ✂️ ↗️ ➡️ ↘️ ↙️ ↖️ ↕️ ↔️ ↩️ ↪️ ✡️ ☸ ☯️ ✝️ ☦ ☪ ☮ ☢ ☣ ☑️ ✔️ ✖️ ✳️ ✴️ ❇️ ‼️ ©️ ®️ ™️ Ⓜ️ ▪️ ▫️ #⃣️ *️⃣ 0⃣️ 1⃣️ 2⃣️ 3⃣️ 4⃣️ 5⃣️ 6⃣️ 7⃣️ 8⃣️ 9⃣️ ⁉️ ℹ️ ⤴️ ⤵️ ♻️ ◻️ ◼️ ◽ ◾ ☕ ⚠️ ☔ ⏏ ⬆️ ⬇️ ⬅️ ⚡ ☘ ⚓ ♿ ⚒ ⚙ ⚗ ⚖ ⚔ ⚰ ⚱ ⚜ ⚛ ⚪ ⚫ 🀄 ⭐ ⬛ ⬜ ⛑ ⛰ ⛪ ⛲ ⛺ ⛽ ⛵ ⛴ ⛔ ⛅ ⛈ ⛱ ⛄ ⚽ ⚾️ ⛳ ⛸ ⛷ ⛹ ⛏ ⛓ ⛩ ⭕ ❗ 🅿️ ❦ ♕ ♛ ♔ ♖ ♜ ☾ → ⇒ ⟹ ⇨ ⇰ ➩ ➪ ➫ ➬ ➭ ➮ ➯ ➲ ➳ ➵ ➸ ➻ ➺ ➼ ➽ ☜ ☟ ➹ ➷ ↶ ↷ ✆ ⌘ ⎋ ⏎ ⏏ ⎈ ⎌ ⍟ ❥ ツ ღ ☻", "gender": "male"}, "coords": {"latitude": 23.442781, "longitüde": -104.658652 }, "isVisible": false, "tags": ["dev", "stg"]}
 6 | {"key": 6, "emåil": "lottnoble@cuizine.com", "phone": "+1 (885) 407-3386", "address": "230 Temple Court, Smeltertown, Minnesota, 4842", "general": {"åccount": {"isActive": false, "registered": "2016-07-14T01:24:11 -03:00"}, "⟹": "☺️ ☹ ☝️ ✌️ ✍️ ❤️ ❣️ ☠ ♨️ ✈️ ⌛ ⌚ ♈ ♉ ♊ ♋ ♌ ♍ ♎ ♏ ♐ ♑ ♒ ♓ ☀️ ☁️ ☂️ ❄️ ⛄️ ☄ ♠️ ♥️ ♦️ ♣️ ▶️ ◀️ ☎️ ⌨ ✉️ ✏️ ✒️ ✂️ ↗️ ➡️ ↘️ ↙️ ↖️ ↕️ ↔️ ↩️ ↪️ ✡️ ☸ ☯️ ✝️ ☦ ☪ ☮ ☢ ☣ ☑️ ✔️ ✖️ ✳️ ✴️ ❇️ ‼️ ©️ ®️ ™️ Ⓜ️ ▪️ ▫️ #⃣️ *️⃣ 0⃣️ 1⃣️ 2⃣️ 3⃣️ 4⃣️ 5⃣️ 6⃣️ 7⃣️ 8⃣️ 9⃣️ ⁉️ ℹ️ ⤴️ ⤵️ ♻️ ◻️ ◼️ ◽ ◾ ☕ ⚠️ ☔ ⏏ ⬆️ ⬇️ ⬅️ ⚡ ☘ ⚓ ♿ ⚒ ⚙ ⚗ ⚖ ⚔ ⚰ ⚱ ⚜ ⚛ ⚪ ⚫ 🀄 ⭐ ⬛ ⬜ ⛑ ⛰ ⛪ ⛲ ⛺ ⛽ ⛵ ⛴ ⛔ ⛅ ⛈ ⛱ ⛄ ⚽ ⚾️ ⛳ ⛸ ⛷ ⛹ ⛏ ⛓ ⛩ ⭕ ❗ 🅿️ ❦ ♕ ♛ ♔ ♖ ♜ ☾ → ⇒ ⟹ ⇨ ⇰ ➩ ➪ ➫ ➬ ➭ ➮ ➯ ➲ ➳ ➵ ➸ ➻ ➺ ➼ ➽ ☜ ☟ ➹ ➷ ↶ ↷ ✆ ⌘ ⎋ ⏎ ⏏ ⎈ ⎌ ⍟ ❥ ツ ღ ☻", "gender": "female"}, "coords": {"latitude": 45.563911, "longitüde": -15.95042 }, "isVisible": true, "tags": ["prod", "qa"]}
 7 | {"key": 7, "emåil": "cårlydennis@verton.com", "phone": "+1 (985) 423-3850", "address": "354 Cherry Street, Colton, Massachusetts, 832", "general": {"åccount": {"isActive": false, "registered": "2016-05-31T09:16:56 -03:00"}, "⟹": "☺️ ☹ ☝️ ✌️ ✍️ ❤️ ❣️ ☠ ♨️ ✈️ ⌛ ⌚ ♈ ♉ ♊ ♋ ♌ ♍ ♎ ♏ ♐ ♑ ♒ ♓ ☀️ ☁️ ☂️ ❄️ ⛄️ ☄ ♠️ ♥️ ♦️ ♣️ ▶️ ◀️ ☎️ ⌨ ✉️ ✏️ ✒️ ✂️ ↗️ ➡️ ↘️ ↙️ ↖️ ↕️ ↔️ ↩️ ↪️ ✡️ ☸ ☯️ ✝️ ☦ ☪ ☮ ☢ ☣ ☑️ ✔️ ✖️ ✳️ ✴️ ❇️ ‼️ ©️ ®️ ™️ Ⓜ️ ▪️ ▫️ #⃣️ *️⃣ 0⃣️ 1⃣️ 2⃣️ 3⃣️ 4⃣️ 5⃣️ 6⃣️ 7⃣️ 8⃣️ 9⃣️ ⁉️ ℹ️ ⤴️ ⤵️ ♻️ ◻️ ◼️ ◽ ◾ ☕ ⚠️ ☔ ⏏ ⬆️ ⬇️ ⬅️ ⚡ ☘ ⚓ ♿ ⚒ ⚙ ⚗ ⚖ ⚔ ⚰ ⚱ ⚜ ⚛ ⚪ ⚫ 🀄 ⭐ ⬛ ⬜ ⛑ ⛰ ⛪ ⛲ ⛺ ⛽ ⛵ ⛴ ⛔ ⛅ ⛈ ⛱ ⛄ ⚽ ⚾️ ⛳ ⛸ ⛷ ⛹ ⛏ ⛓ ⛩ ⭕ ❗ 🅿️ ❦ ♕ ♛ ♔ ♖ ♜ ☾ → ⇒ ⟹ ⇨ ⇰ ➩ ➪ ➫ ➬ ➭ ➮ ➯ ➲ ➳ ➵ ➸ ➻ ➺ ➼ ➽ ☜ ☟ ➹ ➷ ↶ ↷ ✆ ⌘ ⎋ ⏎ ⏏ ⎈ ⎌ ⍟ ❥ ツ ღ ☻", "gender": "male"}, "coords": {"latitude": 63.364727, "longitüde": 128.906581 }, "isVisible": true, "tags": ["qa", "stg"]}
 8 | {"key": 8, "emåil": "trevinoclark@sportan.com", "phone": "+1 (932) 474-2148", "address": "632 Kane Place, Morgandale, North Carolina, 5583", "general": {"åccount": {"isActive": false, "registered": "2014-08-09T01:10:37 -03:00"}, "⟹": "☺️ ☹ ☝️ ✌️ ✍️ ❤️ ❣️ ☠ ♨️ ✈️ ⌛ ⌚ ♈ ♉ ♊ ♋ ♌ ♍ ♎ ♏ ♐ ♑ ♒ ♓ ☀️ ☁️ ☂️ ❄️ ⛄️ ☄ ♠️ ♥️ ♦️ ♣️ ▶️ ◀️ ☎️ ⌨ ✉️ ✏️ ✒️ ✂️ ↗️ ➡️ ↘️ ↙️ ↖️ ↕️ ↔️ ↩️ ↪️ ✡️ ☸ ☯️ ✝️ ☦ ☪ ☮ ☢ ☣ ☑️ ✔️ ✖️ ✳️ ✴️ ❇️ ‼️ ©️ ®️ ™️ Ⓜ️ ▪️ ▫️ #⃣️ *️⃣ 0⃣️ 1⃣️ 2⃣️ 3⃣️ 4⃣️ 5⃣️ 6⃣️ 7⃣️ 8⃣️ 9⃣️ ⁉️ ℹ️ ⤴️ ⤵️ ♻️ ◻️ ◼️ ◽ ◾ ☕ ⚠️ ☔ ⏏ ⬆️ ⬇️ ⬅️ ⚡ ☘ ⚓ ♿ ⚒ ⚙ ⚗ ⚖ ⚔ ⚰ ⚱ ⚜ ⚛ ⚪ ⚫ 🀄 ⭐ ⬛ ⬜ ⛑ ⛰ ⛪ ⛲ ⛺ ⛽ ⛵ ⛴ ⛔ ⛅ ⛈ ⛱ ⛄ ⚽ ⚾️ ⛳ ⛸ ⛷ ⛹ ⛏ ⛓ ⛩ ⭕ ❗ 🅿️ ❦ ♕ ♛ ♔ ♖ ♜ ☾ → ⇒ ⟹ ⇨ ⇰ ➩ ➪ ➫ ➬ ➭ ➮ ➯ ➲ ➳ ➵ ➸ ➻ ➺ ➼ ➽ ☜ ☟ ➹ ➷ ↶ ↷ ✆ ⌘ ⎋ ⏎ ⏏ ⎈ ⎌ ⍟ ❥ ツ ღ ☻", "gender": "female"}, "coords": {"latitude": -86.833656, "longitüde": 45.665975 }, "isVisible": false, "tags": ["dev", "prod"]}
 9 | {"key": 9, "emåil": "kristinahoffman@voipa.com", "phone": "+1 (912) 474-3457", "address": "412 Perry Terrace, Corinne, Ohio, 6901", "general": {"åccount": {"isActive": false, "registered": "2015-02-22T12:52:07 -02:00"}, "⟹": "☺️ ☹ ☝️ ✌️ ✍️ ❤️ ❣️ ☠ ♨️ ✈️ ⌛ ⌚ ♈ ♉ ♊ ♋ ♌ ♍ ♎ ♏ ♐ ♑ ♒ ♓ ☀️ ☁️ ☂️ ❄️ ⛄️ ☄ ♠️ ♥️ ♦️ ♣️ ▶️ ◀️ ☎️ ⌨ ✉️ ✏️ ✒️ ✂️ ↗️ ➡️ ↘️ ↙️ ↖️ ↕️ ↔️ ↩️ ↪️ ✡️ ☸ ☯️ ✝️ ☦ ☪ ☮ ☢ ☣ ☑️ ✔️ ✖️ ✳️ ✴️ ❇️ ‼️ ©️ ®️ ™️ Ⓜ️ ▪️ ▫️ #⃣️ *️⃣ 0⃣️ 1⃣️ 2⃣️ 3⃣️ 4⃣️ 5⃣️ 6⃣️ 7⃣️ 8⃣️ 9⃣️ ⁉️ ℹ️ ⤴️ ⤵️ ♻️ ◻️ ◼️ ◽ ◾ ☕ ⚠️ ☔ ⏏ ⬆️ ⬇️ ⬅️ ⚡ ☘ ⚓ ♿ ⚒ ⚙ ⚗ ⚖ ⚔ ⚰ ⚱ ⚜ ⚛ ⚪ ⚫ 🀄 ⭐ ⬛ ⬜ ⛑ ⛰ ⛪ ⛲ ⛺ ⛽ ⛵ ⛴ ⛔ ⛅ ⛈ ⛱ ⛄ ⚽ ⚾️ ⛳ ⛸ ⛷ ⛹ ⛏ ⛓ ⛩ ⭕ ❗ 🅿️ ❦ ♕ ♛ ♔ ♖ ♜ ☾ → ⇒ ⟹ ⇨ ⇰ ➩ ➪ ➫ ➬ ➭ ➮ ➯ ➲ ➳ ➵ ➸ ➻ ➺ ➼ ➽ ☜ ☟ ➹ ➷ ↶ ↷ ✆ ⌘ ⎋ ⏎ ⏏ ⎈ ⎌ ⍟ ❥ ツ ღ ☻", "gender": "male"}, "coords": {"latitude": 79.047492, "longitüde": 24.924048 }, "isVisible": false, "tags": ["dev", "prod"]}
10 | {"key": 10,"emåil": "jaimephillips@xeronk.com", "phone": "+1 (823) 537-3055", "address": "414 Humboldt Street, Whipholt, Pennsylvania, 4066", "general": {"åccount": {"isActive": false, "registered": "2014-01-08T12:19:09 -02:00"}, "⟹": "☺️ ☹ ☝️ ✌️ ✍️ ❤️ ❣️ ☠ ♨️ ✈️ ⌛ ⌚ ♈ ♉ ♊ ♋ ♌ ♍ ♎ ♏ ♐ ♑ ♒ ♓ ☀️ ☁️ ☂️ ❄️ ⛄️ ☄ ♠️ ♥️ ♦️ ♣️ ▶️ ◀️ ☎️ ⌨ ✉️ ✏️ ✒️ ✂️ ↗️ ➡️ ↘️ ↙️ ↖️ ↕️ ↔️ ↩️ ↪️ ✡️ ☸ ☯️ ✝️ ☦ ☪ ☮ ☢ ☣ ☑️ ✔️ ✖️ ✳️ ✴️ ❇️ ‼️ ©️ ®️ ™️ Ⓜ️ ▪️ ▫️ #⃣️ *️⃣ 0⃣️ 1⃣️ 2⃣️ 3⃣️ 4⃣️ 5⃣️ 6⃣️ 7⃣️ 8⃣️ 9⃣️ ⁉️ ℹ️ ⤴️ ⤵️ ♻️ ◻️ ◼️ ◽ ◾ ☕ ⚠️ ☔ ⏏ ⬆️ ⬇️ ⬅️ ⚡ ☘ ⚓ ♿ ⚒ ⚙ ⚗ ⚖ ⚔ ⚰ ⚱ ⚜ ⚛ ⚪ ⚫ 🀄 ⭐ ⬛ ⬜ ⛑ ⛰ ⛪ ⛲ ⛺ ⛽ ⛵ ⛴ ⛔ ⛅ ⛈ ⛱ ⛄ ⚽ ⚾️ ⛳ ⛸ ⛷ ⛹ ⛏ ⛓ ⛩ ⭕ ❗ 🅿️ ❦ ♕ ♛ ♔ ♖ ♜ ☾ → ⇒ ⟹ ⇨ ⇰ ➩ ➪ ➫ ➬ ➭ ➮ ➯ ➲ ➳ ➵ ➸ ➻ ➺ ➼ ➽ ☜ ☟ ➹ ➷ ↶ ↷ ✆ ⌘ ⎋ ⏎ ⏏ ⎈ ⎌ ⍟ ❥ ツ ღ ☻", "gender": "female"}, "coords": {"latitude": 18.303031, "longitüde": 64.729812 }, "isVisible": true, "tags": ["stg", "qa"]}
11 | 


--------------------------------------------------------------------------------
/tests/run_tests_in_docker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | while getopts "be:" opt; do
 5 |     case "$opt" in
 6 |         b) BUILD_IMAGE=1 ;;
 7 |         e) ES_VERSION=$OPTARG ;;
 8 |     esac
 9 | done
10 | shift $((OPTIND-1))
11 | 
12 | if [ ! "$ES_VERSION" ]; then
13 |     echo 'Elasticsearch version(-e) required.'
14 |     exit 1
15 | fi
16 | 
17 | if [[ $BUILD_IMAGE == 1 ]]; then
18 |     echo "+++ Docker building build image..."
19 |     cat ./tests/test_env.dockerfile | docker build --tag es2csv_test_env:"${ES_VERSION}" --build-arg ES_VERSION="${ES_VERSION}" -
20 |     echo "+++ Done."
21 | fi
22 | 
23 | echo "+++ Docker running tests in docker..."
24 | docker run -it --rm \
25 |        -v `pwd`:/data \
26 |        es2csv_test_env:"${ES_VERSION}" \
27 |        /bin/bash -c 'su elasticsearch "/usr/share/elasticsearch/bin/elasticsearch" > /var/log/elasticsearch.log 2>&1 & \
28 |                     ./tests/test.sh'
29 | echo "+++ Done."
30 | 


--------------------------------------------------------------------------------
/tests/smoke.bats:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bats
 2 | 
 3 | DOCS_COUNT=$(cat /data/tests/es_data/docs.json | wc -l)
 4 | OUT_FILE=/data/out.csv
 5 | 
 6 | @test "prints usage instructions" {
 7 |   run es2csv -h
 8 |   [ "$status" -eq 0 ]
 9 |   [ $(expr "${lines[0]}" : "usage: es2csv.*") -ne 0 ]
10 | }
11 | @test "query result count" {
12 |   run es2csv -q '*' -o $OUT_FILE --debug
13 |   echo ${output}
14 |   [ "$status" -eq 0 ]
15 |   [ "${lines[3]//[^0-9]/}" -eq "$DOCS_COUNT" ]
16 |   [ $(expr $(cat out.csv | wc -l) - 1) -eq "$DOCS_COUNT" ]
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | pip install -q -e .
4 | while ! echo exit | curl -s localhost:9200; do sleep 10; done
5 | curl -s -H'Content-Type: application/json' -XPOST localhost:9200/logstash-12.12.2012/log/_bulk --data-binary @<(sed 's/^/{"index": {}}\n&/' tests/es_data/docs.json) | jq .
6 | curl -s -H'Content-Type: application/json' -XPOST localhost:9200/unicode-logstash-12.12.2012/log/_bulk --data-binary @<(sed 's/^/{"index": {}}\n&/' tests/es_data/docs_with_unicode.json) | jq .
7 | tail -f /var/log/elasticsearch.log
8 | 


--------------------------------------------------------------------------------
/tests/test_env.dockerfile:
--------------------------------------------------------------------------------
 1 | ARG ES_VERSION
 2 | FROM docker.elastic.co/elasticsearch/elasticsearch:${ES_VERSION}
 3 | 
 4 | USER root
 5 | RUN echo 'timeout=1'>> /etc/yum.conf && \
 6 |     yum install epel-release -y -q && \
 7 |     yum install procps python jq -y -q && \
 8 |     yum clean all && \
 9 |     rm -rf /var/cache/yum && \
10 |     curl -L "https://bootstrap.pypa.io/get-pip.py" | python - && \
11 |     curl -L "https://github.com/sstephenson/bats/archive/v0.4.0.tar.gz" | tar xz -C "/tmp" && \
12 |     bash /tmp/bats-0.4.0/install.sh /usr/local && \
13 |     rm -rf /tmp/bats-0.4.0 && \
14 |     echo 'xpack.security.enabled: false' >> /usr/share/elasticsearch/config/elasticsearch.yml
15 | 
16 | WORKDIR /data
17 | 


--------------------------------------------------------------------------------
/tests/test_env_2.x.dockerfile:
--------------------------------------------------------------------------------
 1 | # Backup for elasticsearch-docker:2.x
 2 | ARG ES_VERSION
 3 | FROM elasticsearch:${ES_VERSION}
 4 | 
 5 | RUN apt-get update -qq && \
 6 |     apt-get install -qqy procps python jq && \
 7 |     apt-get clean -qq && \
 8 |     rm -rf /var/lib/apt/lists/* && \
 9 |     curl -L "https://bootstrap.pypa.io/get-pip.py" | python - && \
10 |     curl -L "https://github.com/sstephenson/bats/archive/v0.4.0.tar.gz" | tar xz -C "/tmp" && \
11 |     bash /tmp/bats-0.4.0/install.sh /usr/local && \
12 |     rm -rf /tmp/bats-0.4.0
13 | 
14 | WORKDIR /data
15 | 


--------------------------------------------------------------------------------