├── .gitignore
├── .ruby-version
├── Gemfile
├── Gemfile.lock
├── LICENSE.txt
├── README.md
├── Rakefile
├── ecs-mapper
├── example
    ├── README.md
    ├── beats.yml
    ├── beats
    │   ├── config_header.yml
    │   └── filebeat.yml
    ├── elasticsearch.json
    ├── log
    │   └── sample.log
    ├── logstash.conf
    ├── logstash
    │   ├── 1-input.conf
    │   ├── 2-ecs-conversion.conf
    │   └── 3-output.conf
    └── mapping.csv
├── lib
    ├── beats_pipeline_generator.rb
    ├── elasticsearch_pipeline_generator.rb
    ├── helpers.rb
    ├── logstash_pipeline_generator.rb
    ├── mapping_loader.rb
    └── options_parser.rb
└── test
    └── unit
        ├── beats_pipeline_generator_test.rb
        ├── elasticsearch_pipeline_generator_test.rb
        ├── helpers_test.rb
        ├── logstash_pipeline_generator_test.rb
        ├── mapping_loader_test.rb
        └── options_parser_test.rb


/.gitignore:
--------------------------------------------------------------------------------
1 | *.gem
2 | 


--------------------------------------------------------------------------------
/.ruby-version:
--------------------------------------------------------------------------------
1 | 2.6
2 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | source "https://rubygems.org"
3 | 
4 | group 'development' do
5 |   gem 'minitest'
6 |   gem 'pry'
7 |   gem 'rake'
8 | end
9 | 


--------------------------------------------------------------------------------
/Gemfile.lock:
--------------------------------------------------------------------------------
 1 | GEM
 2 |   remote: https://rubygems.org/
 3 |   specs:
 4 |     coderay (1.1.2)
 5 |     method_source (0.9.2)
 6 |     minitest (5.13.0)
 7 |     pry (0.12.2)
 8 |       coderay (~> 1.1.0)
 9 |       method_source (~> 0.9.0)
10 |     rake (13.0.1)
11 | 
12 | PLATFORMS
13 |   ruby
14 | 
15 | DEPENDENCIES
16 |   minitest
17 |   pry
18 |   rake
19 | 
20 | BUNDLED WITH
21 |    2.1.4
22 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ⚠️ **This tool and repository are no longer maintained. We strongly advise you to [use Kibana to map custom data to ECS fields](https://www.elastic.co/guide/en/ecs/current/ecs-converting.html) instead.**
  2 | 
  3 | ---
  4 | 
  5 | ## Synopsis
  6 | 
  7 | The ECS mapper tool turns a field mapping from a CSV to an equivalent pipeline for:
  8 | 
  9 | - [Beats](https://www.elastic.co/guide/en/beats/filebeat/current/filtering-and-enhancing-data.html)
 10 | - [Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest-processors.html)
 11 | - [Logstash](https://www.elastic.co/guide/en/logstash/current/filter-plugins.html)
 12 | 
 13 | This tool generates starter pipelines for each solution above to help you 
 14 | get started quickly in mapping new data sources to ECS.
 15 | 
 16 | A mapping CSV is what you get when you start planning how to map a new data
 17 | source to ECS in a spreadsheet.
 18 | 
 19 | Colleagues may collaborate on a spreadsheet that looks like this:
 20 | 
 21 | | source\_field | destination\_field | notes  |
 22 | |--------------|-------------------|---------------------------------------|
 23 | | duration     | event.duration    | ECS supports nanoseconds precision    |
 24 | | remoteip     | source.ip         | Hey @Jane do you agree with this one? |
 25 | | message      |                   | No need to change this field          |
 26 | | ...          |                   |                                       |
 27 | 
 28 | You can export your spreadsheet to CSV, run it through the ECS mapper,
 29 | and generate your starter pipelines.
 30 | 
 31 | Note that this tool generates starter pipelines. They only do field rename and copy
 32 | operations as well as some field format adjustments. It's up to you to integrate them
 33 | in a complete pipeline that ingests and outputs the data however you need.
 34 | 
 35 | Scroll down to the [Examples](#examples) section below to get right to a
 36 | concrete example you can play with.
 37 | 
 38 | ## CSV Format
 39 | 
 40 | Here are more details on the CSV format supported by this tool. Since mapping
 41 | spreadsheets are used by humans, it's totally fine to have as many columns
 42 | as you need in your spreadsheets/CSV. Only the following columns will be considered:
 43 | 
 44 | | column name | required | allowed values | notes |
 45 | |-------------|----------|----------------|-------|
 46 | | source\_field | required |  | A dotted Elasticsearch field name. Dots represent JSON nesting. Lines with empty "source\_field" are skipped. |
 47 | | destination\_field | required |  | A dotted Elasticsearch field name. Dots represent JSON nesting. Can be left empty if there's no copy action (just a type conversion). |
 48 | | format\_action | optional | to\_float, to\_integer, to\_string, to\_boolean, to\_array, parse\_timestamp, uppercase, lowercase, (empty) | Simple conversion to apply to the field value. |
 49 | | timestamp\_format | optional | Only UNIX and UNIX\_MS formats are supported across all three tools. You may also specify other formats, like ISO8601, TAI64N, or a Java time pattern, but we will not validate whether the format is supported by the tool. |
 50 | | copy\_action | optional | rename, copy, (empty) | What to do with the field. If left empty, default action is based on the `--copy-action` flag. |
 51 | 
 52 | You can start from this
 53 | [spreadsheet template](https://docs.google.com/spreadsheets/d/1m5JiOTeZtUueW3VOVqS8bFYqNGEEyp0jAsgO12NFkNM). Make a copy of it in your Google Docs account, or download it as an Excel file.
 54 | 
 55 | When the destination field is @timestamp, then we always enforce an explicit date ```format_action``` of ```parse_timestamp``` to ```UNIX_MS``` avoid conversion problems downstream. If no ```timestamp_format``` is provided, then ```UNIX_MS``` is used. Please note that the timestamp layouts used by the [Filebeat processor for converting timestamps](https://www.elastic.co/guide/en/beats/filebeat/current/processor-timestamp.html) are different than the formats supported by date processors in Logstash and Elasticsearch Ingest Node.
 56 | 
 57 | 
 58 | 
 59 | ## Usage and Dependencies
 60 | 
 61 | This is a simple Ruby program with no external dependencies, other than development
 62 | dependencies.
 63 | 
 64 | Any modern version of Ruby should be sufficient. If you don't intend to run the
 65 | tests or the rake tasks, you can skip right to [usage tips](#using-the-ecs-mapper).
 66 | 
 67 | ### Ruby Setup
 68 | 
 69 | If you want to tweak the code of this script, run the tests or use the rake tasks,
 70 | you'll need to install the development dependencies.
 71 | 
 72 | Once you have Ruby installed for your platform, installing the dependencies is simply:
 73 | 
 74 | ```bash
 75 | gem install bundler
 76 | bundle install
 77 | ```
 78 | 
 79 | Run the tests:
 80 | 
 81 | ```bash
 82 | rake test
 83 | ```
 84 | 
 85 | ### Using the ECS Mapper
 86 | 
 87 | Help.
 88 | 
 89 | ```bash
 90 | ./ecs-mapper --help
 91 | Reads a CSV mapping of source field names to destination field names, and generates
 92 | Elastic pipelines to help perform the conversion.
 93 | 
 94 | You can have as many columns as you want in your CSV.
 95 | Only the following columns will be used by this tool:
 96 | source_field, destination_field, format_action, copy_action
 97 | 
 98 | Options:
 99 |     -f, --file FILE                  Input CSV file.
100 |     -o, --output DIR                 Output directory. Defaults to parent dir of --file.
101 |         --copy-action COPY_ACTION
102 |                                      Default action for field renames. Acceptable values are: copy, rename. Default is copy.
103 |         --debug                      Shorthand for --log-level=debug
104 |     -h, --help                       Display help
105 | ```
106 | 
107 | Process my.csv and output pipelines in the same directory as the csv.
108 | 
109 | ```bash
110 | ./ecs-mapper --file my.csv
111 | ```
112 | 
113 | Process my.csv and output pipelines elsewhere.
114 | 
115 | ```bash
116 | ./ecs-mapper --file my.csv --output pipelines/mine/
117 | ```
118 | 
119 | Process my.csv, fields with an empty value in the "copy\_action" column are renamed,
120 | instead of copied (the default).
121 | 
122 | ```bash
123 | ./ecs-mapper --file my.csv --copy_action rename 
124 | ```
125 | 
126 | ## Examples
127 | 
128 | Look at an example CSV mapping and the pipelines generated from it:
129 | 
130 | - [example/mapping.csv](example/mapping.csv)
131 | - [example/beats.yml](example/beats.yml)
132 | - [example/elasticsearch.json](example/elasticsearch.json)
133 | - [example/logstash.conf](example/logstash.conf)
134 | 
135 | You can try each pipeline easily by following the instructions
136 | in [example/README.md](example/).
137 | 
138 | ## Caveats
139 | 
140 | * The Beats pipelines don't perform "to\_array", "uppercase" nor
141 |   "lowercase" transformations. They could be implemented via the "script" processor.
142 | * Only UNIX and UNIX\_MS timestamp formats are supported across Beats, Elasticsearch, 
143 |   and Filebeat. For other timestamp formats, please modify the starter pipeline or add the 
144 |   appropriate date processor in the generated pipeline by hand. Refer to the documentation
145 |   for [Beats](https://www.elastic.co/guide/en/beats/filebeat/current/processor-timestamp.html), [Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/reference/master/date-processor.html), and [Logstash](https://www.elastic.co/guide/en/logstash/current/plugins-filters-date.html#plugins-filters-date-match).
146 | * This tool does not currently support additional processors, like setting static 
147 |   field values or dropping events based on a condition.
148 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | require "rake/testtask"
 2 | 
 3 | Rake::TestTask.new(:test) do |t|
 4 |   t.libs << "test"
 5 |   t.libs << "lib"
 6 |   t.test_files = FileList["test/**/*_test.rb"]
 7 | end
 8 | 
 9 | task :default => :test
10 | 
11 | task :example do |t|
12 |   system("./ecs-mapper --file example/mapping.csv")
13 |   system("cp example/logstash.conf example/logstash/2-ecs-conversion.conf")
14 |   system("cat example/beats/config_header.yml example/beats.yml > example/beats/filebeat.yml")
15 | end
16 | 


--------------------------------------------------------------------------------
/ecs-mapper:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | require 'fileutils'
 4 | require 'logger'
 5 | 
 6 | require_relative 'lib/options_parser'
 7 | require_relative 'lib/mapping_loader'
 8 | require_relative 'lib/beats_pipeline_generator'
 9 | require_relative 'lib/elasticsearch_pipeline_generator'
10 | require_relative 'lib/logstash_pipeline_generator'
11 | 
12 | SYNOPSIS = <<-SYN
13 | Reads a CSV mapping of source field names to destination field names, and generates
14 | Elastic pipelines to help perform the conversion.
15 | 
16 | You can have as many columns as you want in your CSV.
17 | Only the following columns will be used by this tool:
18 | #{KNOWN_CSV_HEADERS.join(', ')}
19 | SYN
20 | 
21 | VERSION = '0.0.1'
22 | VERSION_STRING = "ecs-mapper #{VERSION}"
23 | 
24 | def main(options)
25 |   puts "Options: #{options}" if $debug
26 | 
27 |   FileUtils.mkdir_p(options[:output])
28 | 
29 |   raw_mapping = read_csv(options[:file])
30 |   mapping = make_mapping_explicit(raw_mapping, options)
31 | 
32 |   create_output_directory!(options[:output])
33 | 
34 |   elasticsearch_pl = generate_elasticsearch_pipeline(mapping)
35 |   puts output_elasticsearch_pipeline(elasticsearch_pl, options[:output])
36 | 
37 |   beats_pl = generate_beats_pipeline(mapping)
38 |   puts output_beats_pipeline(beats_pl, options[:output])
39 | 
40 |   mutations, dates, array_fields = generate_logstash_pipeline(mapping)
41 |   puts output_logstash_pipeline(mutations, dates, array_fields, options[:output])
42 | end
43 | 
44 | def create_output_directory!(dir)
45 |   unless dir.directory?
46 |     FileUtils.mkdir_p(dir)
47 |   end
48 | end
49 | 
50 | main( parse_options!(ARGV) )
51 | 


--------------------------------------------------------------------------------
/example/README.md:
--------------------------------------------------------------------------------
  1 | ## Running the Examples
  2 | 
  3 | Here are the instructions to run each of the example pipelines.
  4 | 
  5 | Note that the the Beats and Logstash pipelines have been copied to
  6 | "example/beats/filebeat.yml" and "example/logstash/2-ecs-conversion.conf" respectively,
  7 | in order to produce functional configurations used in the examples below.
  8 | 
  9 | All commands should be run from the root of this repo.
 10 | 
 11 | ### Elasticsearch ingest pipeline
 12 | 
 13 | In a Kibana console, prepare a "simulate" API call, without the array of processors:
 14 | 
 15 | ```JS
 16 | POST _ingest/pipeline/_simulate
 17 | { "pipeline": { "processors" :
 18 | 
 19 | } , "docs":
 20 |   [ { "_source":
 21 |       { "log_level": "debug", "eventid": 424242, "hostip": "192.0.2.3",
 22 |         "srcip": "192.0.2.1", "srcport": "42", "destip": "192.0.2.2", "destport": "42",
 23 |         "timestamp": "now", "action": "Testing", "duration": "1.1",
 24 |         "successful": "true", "process":{ "args": "--yolo" },
 25 |         "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
 26 |       }
 27 |     }
 28 |   ]
 29 | }
 30 | ```
 31 | 
 32 | Paste the full content of the example/elasticsearch.json file below `"processors":`.
 33 | It should now look like this:
 34 | 
 35 | ```JS
 36 | POST _ingest/pipeline/_simulate
 37 | { "pipeline": { "processors" :
 38 | [
 39 |   {
 40 |     "rename": {
 41 |       "field": "srcip",
 42 |       "target_field": "source.address",
 43 |       "ignore_missing": true
 44 |     }
 45 |   },
 46 | // ...
 47 | ```
 48 | 
 49 | When you execute the call, you'll see the resulting event, with transformations applied:
 50 | 
 51 | ```JSON
 52 | {
 53 |   "docs" : [
 54 |     {
 55 |       "doc" : {
 56 |         "_index" : "_index",
 57 |         "_type" : "_doc",
 58 |         "_id" : "_id",
 59 |         "_source" : {
 60 |           "process" : {
 61 |             "args" : [
 62 |               "--yolo"
 63 |             ]
 64 |           },
 65 |           "log" : {
 66 |             "level" : "DEBUG"
 67 |           },
 68 |           ...
 69 | ```
 70 | 
 71 | ## Logstash
 72 | 
 73 | Logstash can load multiple .conf files in alphabetical order, to form a full pipeline.
 74 | We already have the generated Logstash pipeline copied to the example configuration
 75 | directory, at "example/logstash/2-ecs-conversion.conf":
 76 | 
 77 | ```bash
 78 | ls example/logstash/ # 1-input.conf 2-ecs-conversion.conf 3-output.conf
 79 | ```
 80 | 
 81 | Start Logstash:
 82 | 
 83 | ```bash
 84 | $logstash_path/bin/logstash -f example/logstash/
 85 | ```
 86 | 
 87 | Once Logstash is running, paste this document in Logstash' terminal:
 88 | 
 89 | ```json
 90 | { "log_level": "debug", "eventid": 424242, "srcip": "192.0.2.1", "srcport": 42, "destip": "192.0.2.2", "destport": 42, "hostip": "192.0.2.42", "ts": "now", "action": "Testing", "duration": "1.1", "process":{ "args": "--yolo" }, "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36" }
 91 | ```
 92 | 
 93 | Logstash outputs the resulting event, with transformations applied:
 94 | 
 95 | ```ruby
 96 | {
 97 |     "host" => {
 98 |         "ip" => [
 99 |             [0] "192.0.2.42"
100 |         ]
101 |     },
102 |         "process" => {
103 |         "args" => [
104 |             [0] "--yolo"
105 |         ]
106 |     },
107 |     # ...
108 | ```
109 | 
110 | ## Beats
111 | 
112 | We already have an example Filebeat configuration file based on the generated
113 | pipeline, at "example/beats/filebeat.yml". This configuration file reads the sample
114 | NDJSON log "example/log/sample.log" and outputs the converted docs to stdout.
115 | 
116 | Run Filebeat with this configuration:
117 | 
118 | ```bash
119 | $filebeat_path/filebeat -c example/beats/filebeat.yml
120 | ```
121 | 
122 | Filebeat outputs the resulting event, with transformations applied:
123 | 
124 | ```JSON
125 | {
126 |   "@timestamp": "2020-01-01T01:01:01.001Z",
127 |   "process": {
128 |     "args": "--yolo"
129 |   },
130 |   "host": {
131 |     "ip": "192.0.2.42",
132 |     "name": "matbook-pro.lan"
133 |   },
134 |   ...
135 | ```
136 | 


--------------------------------------------------------------------------------
/example/beats.yml:
--------------------------------------------------------------------------------
 1 | processors:
 2 | - timestamp:
 3 |     field: some_timestamp_field
 4 |     target_field: "@timestamp"
 5 |     layouts: UNIX_MS
 6 |     timezone: UTC
 7 |     ignore_missing: true
 8 |     ignore_failure: true
 9 | - timestamp:
10 |     field: some_other_timestamp
11 |     target_field: "@timestamp"
12 |     layouts: UNIX_MS
13 |     timezone: UTC
14 |     ignore_missing: true
15 |     ignore_failure: true
16 | - timestamp:
17 |     field: some_new_timestamp
18 |     target_field: destination_timestamp
19 |     layouts: UNIX
20 |     timezone: UTC
21 |     ignore_missing: true
22 |     ignore_failure: true
23 | - copy_fields:
24 |     fields:
25 |     - from: srcip
26 |       to: source.address
27 |     - from: srcip
28 |       to: source.ip
29 |     - from: new_event.srcip
30 |       to: source.ip
31 |     - from: destip
32 |       to: destination.address
33 |     - from: destport
34 |       to: destination.port
35 |     - from: ts
36 |       to: timestamp
37 |     ignore_missing: true
38 |     fail_on_error: false
39 | - rename:
40 |     fields:
41 |     - from: srcport
42 |       to: source.port
43 |     - from: action
44 |       to: event.action
45 |     - from: duration
46 |       to: event.duration
47 |     - from: user_agent
48 |       to: user_agent.original
49 |     - from: log_level
50 |       to: log.level
51 |     - from: eventid
52 |       to: event.id
53 |     - from: hostip
54 |       to: host.ip
55 |     ignore_missing: true
56 |     fail_on_error: false
57 | - convert:
58 |     fields:
59 |     - from: source.port
60 |       type: long
61 |     - from: destination.port
62 |       type: long
63 |     - from: event.duration
64 |       type: float
65 |     - from: event.id
66 |       type: string
67 |     ignore_missing: true
68 |     fail_on_error: false
69 | 


--------------------------------------------------------------------------------
/example/beats/config_header.yml:
--------------------------------------------------------------------------------
 1 | filebeat.inputs:
 2 | - type: log
 3 |   enabled: true
 4 |   paths:
 5 |     - example/log/sample.log
 6 |   processors:
 7 |     - decode_json_fields:
 8 |         fields: ["message"]
 9 |         target: ""
10 | 
11 | output.elasticsearch.enabled: false
12 | output.console:
13 |   enabled: true
14 |   pretty: true
15 | 
16 | # Add the generated Beats pipeline below
17 | 
18 | 


--------------------------------------------------------------------------------
/example/beats/filebeat.yml:
--------------------------------------------------------------------------------
 1 | filebeat.inputs:
 2 | - type: log
 3 |   enabled: true
 4 |   paths:
 5 |     - example/log/sample.log
 6 |   processors:
 7 |     - decode_json_fields:
 8 |         fields: ["message"]
 9 |         target: ""
10 | 
11 | output.elasticsearch.enabled: false
12 | output.console:
13 |   enabled: true
14 |   pretty: true
15 | 
16 | # Add the generated Beats pipeline below
17 | 
18 | processors:
19 | - timestamp:
20 |     field: some_timestamp_field
21 |     target_field: "@timestamp"
22 |     layouts: UNIX_MS
23 |     timezone: UTC
24 |     ignore_missing: true
25 |     ignore_failure: true
26 | - timestamp:
27 |     field: some_other_timestamp
28 |     target_field: "@timestamp"
29 |     layouts: UNIX_MS
30 |     timezone: UTC
31 |     ignore_missing: true
32 |     ignore_failure: true
33 | - timestamp:
34 |     field: some_new_timestamp
35 |     target_field: destination_timestamp
36 |     layouts: UNIX
37 |     timezone: UTC
38 |     ignore_missing: true
39 |     ignore_failure: true
40 | - copy_fields:
41 |     fields:
42 |     - from: srcip
43 |       to: source.address
44 |     - from: srcip
45 |       to: source.ip
46 |     - from: new_event.srcip
47 |       to: source.ip
48 |     - from: destip
49 |       to: destination.address
50 |     - from: destport
51 |       to: destination.port
52 |     - from: ts
53 |       to: timestamp
54 |     ignore_missing: true
55 |     fail_on_error: false
56 | - rename:
57 |     fields:
58 |     - from: srcport
59 |       to: source.port
60 |     - from: action
61 |       to: event.action
62 |     - from: duration
63 |       to: event.duration
64 |     - from: user_agent
65 |       to: user_agent.original
66 |     - from: log_level
67 |       to: log.level
68 |     - from: eventid
69 |       to: event.id
70 |     - from: hostip
71 |       to: host.ip
72 |     ignore_missing: true
73 |     fail_on_error: false
74 | - convert:
75 |     fields:
76 |     - from: source.port
77 |       type: long
78 |     - from: destination.port
79 |       type: long
80 |     - from: event.duration
81 |       type: float
82 |     - from: event.id
83 |       type: string
84 |     ignore_missing: true
85 |     fail_on_error: false
86 | 


--------------------------------------------------------------------------------
/example/elasticsearch.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "set": {
  4 |       "field": "source.address",
  5 |       "value": "{{srcip}}",
  6 |       "if": "ctx.srcip != null"
  7 |     }
  8 |   },
  9 |   {
 10 |     "set": {
 11 |       "field": "source.ip",
 12 |       "value": "{{srcip}}",
 13 |       "if": "ctx.srcip != null"
 14 |     }
 15 |   },
 16 |   {
 17 |     "set": {
 18 |       "field": "source.ip",
 19 |       "value": "{{new_event.srcip}}",
 20 |       "if": "ctx.new_event?.srcip != null"
 21 |     }
 22 |   },
 23 |   {
 24 |     "date": {
 25 |       "field": "some_timestamp_field",
 26 |       "target_field": "@timestamp",
 27 |       "formats": [
 28 |         "UNIX_MS"
 29 |       ],
 30 |       "timezone": "UTC",
 31 |       "ignore_failure": true
 32 |     }
 33 |   },
 34 |   {
 35 |     "date": {
 36 |       "field": "some_other_timestamp",
 37 |       "target_field": "@timestamp",
 38 |       "formats": [
 39 |         "UNIX_MS"
 40 |       ],
 41 |       "timezone": "UTC",
 42 |       "ignore_failure": true
 43 |     }
 44 |   },
 45 |   {
 46 |     "date": {
 47 |       "field": "some_new_timestamp",
 48 |       "target_field": "destination_timestamp",
 49 |       "formats": [
 50 |         "UNIX"
 51 |       ],
 52 |       "timezone": "UTC",
 53 |       "ignore_failure": true
 54 |     }
 55 |   },
 56 |   {
 57 |     "rename": {
 58 |       "field": "srcport",
 59 |       "target_field": "source.port",
 60 |       "ignore_missing": true
 61 |     }
 62 |   },
 63 |   {
 64 |     "convert": {
 65 |       "field": "source.port",
 66 |       "type": "long",
 67 |       "ignore_missing": true,
 68 |       "ignore_failure": true
 69 |     }
 70 |   },
 71 |   {
 72 |     "set": {
 73 |       "field": "destination.address",
 74 |       "value": "{{destip}}",
 75 |       "if": "ctx.destip != null"
 76 |     }
 77 |   },
 78 |   {
 79 |     "set": {
 80 |       "field": "destination.port",
 81 |       "value": "{{destport}}",
 82 |       "if": "ctx.destport != null"
 83 |     }
 84 |   },
 85 |   {
 86 |     "convert": {
 87 |       "field": "destination.port",
 88 |       "type": "long",
 89 |       "ignore_missing": true,
 90 |       "ignore_failure": true
 91 |     }
 92 |   },
 93 |   {
 94 |     "set": {
 95 |       "field": "timestamp",
 96 |       "value": "{{ts}}",
 97 |       "if": "ctx.ts != null"
 98 |     }
 99 |   },
100 |   {
101 |     "rename": {
102 |       "field": "action",
103 |       "target_field": "event.action",
104 |       "ignore_missing": true
105 |     }
106 |   },
107 |   {
108 |     "lowercase": {
109 |       "field": "event.action",
110 |       "ignore_missing": true,
111 |       "ignore_failure": true
112 |     }
113 |   },
114 |   {
115 |     "rename": {
116 |       "field": "duration",
117 |       "target_field": "event.duration",
118 |       "ignore_missing": true
119 |     }
120 |   },
121 |   {
122 |     "convert": {
123 |       "field": "event.duration",
124 |       "type": "float",
125 |       "ignore_missing": true,
126 |       "ignore_failure": true
127 |     }
128 |   },
129 |   {
130 |     "rename": {
131 |       "field": "user_agent",
132 |       "target_field": "user_agent.original",
133 |       "ignore_missing": true
134 |     }
135 |   },
136 |   {
137 |     "rename": {
138 |       "field": "log_level",
139 |       "target_field": "log.level",
140 |       "ignore_missing": true
141 |     }
142 |   },
143 |   {
144 |     "uppercase": {
145 |       "field": "log.level",
146 |       "ignore_missing": true,
147 |       "ignore_failure": true
148 |     }
149 |   },
150 |   {
151 |     "rename": {
152 |       "field": "eventid",
153 |       "target_field": "event.id",
154 |       "ignore_missing": true
155 |     }
156 |   },
157 |   {
158 |     "convert": {
159 |       "field": "event.id",
160 |       "type": "string",
161 |       "ignore_missing": true,
162 |       "ignore_failure": true
163 |     }
164 |   },
165 |   {
166 |     "rename": {
167 |       "field": "hostip",
168 |       "target_field": "host.ip",
169 |       "ignore_missing": true
170 |     }
171 |   },
172 |   {
173 |     "append": {
174 |       "field": "host.ip",
175 |       "value": [
176 | 
177 |       ],
178 |       "ignore_failure": true,
179 |       "if": "ctx.host?.ip != null"
180 |     }
181 |   }
182 | ]


--------------------------------------------------------------------------------
/example/log/sample.log:
--------------------------------------------------------------------------------
1 | { "log_level": "debug", "eventid": 424242, "srcip": "192.0.2.1", "srcport": 42, "destip": "192.0.2.2", "destport": 42, "hostip": "192.0.2.42", "ts": "now", "action": "Testing", "duration": "1.1", "process":{ "args": "--yolo" }, "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36" }
2 | 


--------------------------------------------------------------------------------
/example/logstash.conf:
--------------------------------------------------------------------------------
 1 | filter {
 2 |   mutate {
 3 |     copy => { '[srcip]' => '[source][address]' }
 4 |     copy => { '[srcip]' => '[source][ip]' }
 5 |     copy => { '[new_event][srcip]' => '[source][ip]' }
 6 |     rename => { '[srcport]' => '[source][port]' }
 7 |     convert => { '[source][port]' => 'integer' }
 8 |     copy => { '[destip]' => '[destination][address]' }
 9 |     copy => { '[destport]' => '[destination][port]' }
10 |     convert => { '[destination][port]' => 'integer' }
11 |     copy => { '[ts]' => '[timestamp]' }
12 |     rename => { '[action]' => '[event][action]' }
13 |     lowercase => [ '[event][action]' ]
14 |     rename => { '[duration]' => '[event][duration]' }
15 |     convert => { '[event][duration]' => 'float' }
16 |     rename => { '[user_agent]' => '[user_agent][original]' }
17 |     rename => { '[log_level]' => '[log][level]' }
18 |     uppercase => [ '[log][level]' ]
19 |     rename => { '[eventid]' => '[event][id]' }
20 |     convert => { '[event][id]' => 'string' }
21 |     rename => { '[hostip]' => '[host][ip]' }
22 |   }
23 | 
24 |   date { 
25 |     match => ["[some_timestamp_field]", "UNIX_MS"]
26 |     target => "[@timestamp]" 
27 |   }
28 | 
29 |     
30 |   date { 
31 |     match => ["[some_other_timestamp]", "UNIX_MS"]
32 |     target => "[@timestamp]" 
33 |   }
34 | 
35 |     
36 |   date { 
37 |     match => ["[some_new_timestamp]", "UNIX"]
38 |     target => "[destination_timestamp]" 
39 |   }
40 | 
41 |   if [host][ip] {
42 |     ruby {
43 |       code => "event.set('[host][ip]', Array(event.get('[host][ip]')) )"
44 |     }
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/example/logstash/1-input.conf:
--------------------------------------------------------------------------------
 1 | input {
 2 |   stdin { codec => json_lines }
 3 |   file {
 4 |     codec => json_lines
 5 |     path => 'example/log-sample.log'
 6 |   }
 7 | }
 8 | 
 9 | filter {
10 |   mutate { remove_field => [ 'host' ] } # Avoids conflict with ECS host.*
11 | }
12 | 


--------------------------------------------------------------------------------
/example/logstash/2-ecs-conversion.conf:
--------------------------------------------------------------------------------
 1 | filter {
 2 |   mutate {
 3 |     copy => { '[srcip]' => '[source][address]' }
 4 |     copy => { '[srcip]' => '[source][ip]' }
 5 |     copy => { '[new_event][srcip]' => '[source][ip]' }
 6 |     rename => { '[srcport]' => '[source][port]' }
 7 |     convert => { '[source][port]' => 'integer' }
 8 |     copy => { '[destip]' => '[destination][address]' }
 9 |     copy => { '[destport]' => '[destination][port]' }
10 |     convert => { '[destination][port]' => 'integer' }
11 |     copy => { '[ts]' => '[timestamp]' }
12 |     rename => { '[action]' => '[event][action]' }
13 |     lowercase => [ '[event][action]' ]
14 |     rename => { '[duration]' => '[event][duration]' }
15 |     convert => { '[event][duration]' => 'float' }
16 |     rename => { '[user_agent]' => '[user_agent][original]' }
17 |     rename => { '[log_level]' => '[log][level]' }
18 |     uppercase => [ '[log][level]' ]
19 |     rename => { '[eventid]' => '[event][id]' }
20 |     convert => { '[event][id]' => 'string' }
21 |     rename => { '[hostip]' => '[host][ip]' }
22 |   }
23 | 
24 |   date { 
25 |     match => ["[some_timestamp_field]", "UNIX_MS"]
26 |     target => "[@timestamp]" 
27 |   }
28 | 
29 |     
30 |   date { 
31 |     match => ["[some_other_timestamp]", "UNIX_MS"]
32 |     target => "[@timestamp]" 
33 |   }
34 | 
35 |     
36 |   date { 
37 |     match => ["[some_new_timestamp]", "UNIX"]
38 |     target => "[destination_timestamp]" 
39 |   }
40 | 
41 |   if [host][ip] {
42 |     ruby {
43 |       code => "event.set('[host][ip]', Array(event.get('[host][ip]')) )"
44 |     }
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/example/logstash/3-output.conf:
--------------------------------------------------------------------------------
1 | output {
2 |   stdout {
3 |     # codec => json_lines # Alternately output JSON events
4 |     codec => rubydebug
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/example/mapping.csv:
--------------------------------------------------------------------------------
 1 | source_field,copy_action,format_action,timestamp_format,destination_field,Notes
 2 | srcip,,,,source.address,Copying srcip to source.address
 3 | srcip,,,,source.ip,Copying srcip a second time to source.ip as well
 4 | new_event.srcip,,,,source.ip,This new event type could also populate source.ip
 5 | some_timestamp_field,,parse_timestamp,,@timestamp,Convert this timestamp to UNIX_MS format
 6 | some_other_timestamp,,,,@timestamp,Convert this timestamp to default UNIX_MS
 7 | some_new_timestamp,,parse_timestamp,UNIX,destination_timestamp,Convert this timestamp to UNIX format
 8 | srcport,rename,to_integer,,source.port,
 9 | destip,,,,destination.address,
10 | destport,,to_integer,,destination.port,
11 | ts,copy,,,timestamp,
12 | action,rename,lowercase,,event.action,
13 | duration,rename,to_float,,event.duration,
14 | user_agent,rename,,,user_agent.original,
15 | log_level,rename,uppercase,,log.level,
16 | eventid,rename,to_string,,event.id,IDs should be strings!
17 | successful,,to_boolean,,,Format source field to boolean type
18 | hostip,rename,to_array,,host.ip,
19 | process.args,,to_array,,,Format source field to an array
20 | 


--------------------------------------------------------------------------------
/lib/beats_pipeline_generator.rb:
--------------------------------------------------------------------------------
 1 | require 'yaml'
 2 | require_relative 'helpers'
 3 | 
 4 | def generate_beats_pipeline(mapping)
 5 |   # copy/rename
 6 |   fields_to_copy = []
 7 |   fields_to_rename = []
 8 |   fields_to_convert = []
 9 |   pipeline = []
10 | 
11 |   mapping.each_pair do |_, row|
12 |     if same_field_name?(row)
13 |       next if row[:format_action].nil?
14 |     end
15 | 
16 |     source_field = row[:source_field]
17 | 
18 |     if row[:destination_field] and not ['parse_timestamp'].include?(row[:format_action])
19 |       statement = {
20 |         'from' => source_field,
21 |         'to' => row[:destination_field],
22 |       }
23 |       if 'copy' == row[:copy_action]
24 |         fields_to_copy << statement
25 |       else
26 |         fields_to_rename << statement
27 |       end
28 |     end
29 | 
30 |     if row[:format_action]
31 |       affected_field = row[:destination_field] || row[:source_field]
32 |       type = case row[:format_action]
33 |              when 'to_boolean'
34 |                'boolean'
35 |              when 'to_integer'
36 |                'long'
37 |              when 'to_string'
38 |                'string'
39 |              when 'to_float'
40 |                'float'
41 |              end
42 | 
43 |       if type
44 |         statement = { 'from' => affected_field, 'type' => type }
45 |         fields_to_convert << statement
46 | 
47 |       elsif ['parse_timestamp'].include?(row[:format_action])
48 |         pipeline << {
49 |           'timestamp' => {
50 |             'field' => row[:source_field],
51 |             'target_field' => row[:destination_field],
52 |             'layouts' => row[:timestamp_format],
53 |             'timezone' => "UTC",
54 |             'ignore_missing' => true,
55 |             'ignore_failure' => true
56 |           }
57 |         }
58 |       end
59 |     end
60 |   end
61 | 
62 |   if fields_to_copy.size > 0
63 |     pipeline << {
64 |       'copy_fields' => { 'fields' => fields_to_copy,
65 |                     'ignore_missing' => true, 'fail_on_error' => false }
66 |     }
67 |   end
68 |   if fields_to_rename.size > 0
69 |     pipeline << {
70 |       'rename' => { 'fields' => fields_to_rename,
71 |                     'ignore_missing' => true, 'fail_on_error' => false }
72 |     }
73 |   end
74 |   if fields_to_convert.size > 0
75 |     pipeline << {
76 |       'convert' => {  'fields' => fields_to_convert,
77 |                       'ignore_missing' => true, 'fail_on_error' => false }
78 |     }
79 |   end
80 | 
81 |   return pipeline
82 | end
83 | 
84 | def output_beats_pipeline(pipeline, output_dir)
85 |   file_name = output_dir.join('beats.yml')
86 |   File.open(file_name, 'w') do |f|
87 |     yaml = YAML.dump({ 'processors' => pipeline})
88 |     f.write(yaml.gsub(/^---./m, '')) # Making concatenation easier, to build a full Beats config
89 |   end
90 |   return file_name
91 | end
92 | 


--------------------------------------------------------------------------------
/lib/elasticsearch_pipeline_generator.rb:
--------------------------------------------------------------------------------
  1 | require 'json'
  2 | require_relative 'helpers'
  3 | 
  4 | def generate_elasticsearch_pipeline(mapping)
  5 |   pipeline = []
  6 |   mapping.each_pair do |_, row|
  7 |     if same_field_name?(row)
  8 |       next if row[:format_action].nil?
  9 |     end
 10 | 
 11 |     source_field = row[:source_field]
 12 | 
 13 |     # copy/rename
 14 |     if row[:destination_field] and not ['parse_timestamp'].include?(row[:format_action])
 15 |       if 'copy' == row[:copy_action]
 16 |         processor = {
 17 |           set: {
 18 |             field: row[:destination_field],
 19 |             value: '{{' + source_field + '}}',
 20 |             if: field_presence_predicate(source_field),
 21 |           }
 22 |         }
 23 | 
 24 |       else
 25 |         processor = {
 26 |           rename: {
 27 |             field: source_field,
 28 |             target_field: row[:destination_field],
 29 |             ignore_missing: true
 30 |           }
 31 |         }
 32 |       end
 33 |       pipeline << processor
 34 |     end
 35 | 
 36 |     processor = nil
 37 |     if row[:format_action]
 38 |       # Modify the source_field if there's no destination_field (no rename, just a type change)
 39 |       affected_field = row[:destination_field] || row[:source_field]
 40 | 
 41 |       type = case row[:format_action]
 42 |              when 'to_boolean'
 43 |                'boolean'
 44 |              when 'to_integer'
 45 |                'long'
 46 |              when 'to_string'
 47 |                'string'
 48 |              when 'to_float'
 49 |                'float'
 50 |              end
 51 |       if type
 52 |         processor = {
 53 |           convert: {
 54 |             field: affected_field,
 55 |             type: type,
 56 |             ignore_missing: true,
 57 |             ignore_failure: true,
 58 |           }
 59 |         }
 60 | 
 61 |       elsif ['uppercase', 'lowercase'].include?(row[:format_action])
 62 |         processor = {
 63 |           row[:format_action] => {
 64 |             field: affected_field,
 65 |             ignore_missing: true,
 66 |             ignore_failure: true,
 67 |           }
 68 |         }
 69 | 
 70 |       elsif ['to_array'].include?(row[:format_action])
 71 |         processor = {
 72 |           'append' => {
 73 |             field: affected_field,
 74 |             value: [],
 75 |             ignore_failure: true,
 76 |             if: field_presence_predicate(affected_field),
 77 |           }
 78 |         }
 79 | 
 80 |       elsif ['parse_timestamp'].include?(row[:format_action])
 81 |         processor = {
 82 |           'date' => {
 83 |             field: row[:source_field],
 84 |             target_field: row[:destination_field],
 85 |             formats: [ row[:timestamp_format] ],
 86 |             timezone: "UTC",
 87 |             ignore_failure: true
 88 |           }
 89 |         }
 90 |       end
 91 | 
 92 |     end
 93 |     pipeline << processor if processor # Skip lower/upper and others not done by convert processor
 94 |   end
 95 |   pipeline
 96 | end
 97 | 
 98 | def field_presence_predicate(field)
 99 |   if '@timestamp' == field
100 |     return "ctx.containsKey('@timestamp')"
101 |   end
102 |   field_levels = field.split('.')
103 |   if field_levels.size == 1
104 |     return "ctx.#{field} != null"
105 |   end
106 | 
107 |   null_safe = field_levels[0..-2].map { |f| "#{f}?" }.join('.')
108 |   return "ctx.#{null_safe}.#{field_levels.last} != null"
109 | end
110 | 
111 | def output_elasticsearch_pipeline(pipeline, output_dir)
112 |   file_name = output_dir.join('elasticsearch.json')
113 |   File.open(file_name, 'w') do |f|
114 |     f.write JSON.pretty_generate(pipeline)
115 |   end
116 |   return file_name
117 | end
118 | 


--------------------------------------------------------------------------------
/lib/helpers.rb:
--------------------------------------------------------------------------------
1 | def same_field_name?(row)
2 |   return  row[:destination_field].nil? ||
3 |           row[:source_field] == row[:destination_field]
4 | end
5 | 


--------------------------------------------------------------------------------
/lib/logstash_pipeline_generator.rb:
--------------------------------------------------------------------------------
  1 | require_relative 'helpers'
  2 | 
  3 | def generate_logstash_pipeline(mapping)
  4 |   mutations = [] # Most things are in the same mutate block
  5 |   dates = []
  6 |   array_fields = []
  7 |   mapping.each_pair do |_, row|
  8 |     if same_field_name?(row)
  9 |       next if row[:format_action].nil?
 10 |     end
 11 | 
 12 |     source_field = row[:source_field]
 13 | 
 14 |     if row[:destination_field] and not ['parse_timestamp'].include?(row[:format_action])
 15 |       if 'copy' == row[:copy_action]
 16 |         mutations << { 'copy' => { lsf(source_field) => lsf(row[:destination_field]) } }
 17 |       else
 18 |         mutations << { 'rename' => { lsf(source_field) => lsf(row[:destination_field]) } }
 19 |       end
 20 |     end
 21 | 
 22 |     if row[:format_action]
 23 |       affected_field = row[:destination_field] || row[:source_field]
 24 |       type = case row[:format_action]
 25 |              when 'to_boolean'
 26 |                'boolean'
 27 |              when 'to_integer'
 28 |                'integer'
 29 |              when 'to_string'
 30 |                'string'
 31 |              when 'to_float'
 32 |                'float'
 33 |              end
 34 |       if type
 35 |         mutations << { 'convert' => { lsf(affected_field) => type } }
 36 |       elsif 'uppercase' == row[:format_action]
 37 |         mutations << { 'uppercase' => [lsf(affected_field)] }
 38 |       elsif 'lowercase' == row[:format_action]
 39 |         mutations << { 'lowercase' => [lsf(affected_field)] }
 40 |       elsif 'to_array' == row[:format_action]
 41 |         array_fields << lsf(affected_field)
 42 |       elsif ['parse_timestamp'].include?(row[:format_action])
 43 |         dates << {
 44 |           'date' => {
 45 |             'match' => [ lsf(row[:source_field]), row[:timestamp_format] ],
 46 |             'target' => lsf(row[:destination_field])
 47 |           }
 48 |         }
 49 |       end
 50 |     end
 51 |   end
 52 | 
 53 |   return mutations, dates, array_fields
 54 | end
 55 | 
 56 | def render_mutate_line(line)
 57 |   raise "Expected one key at root of #{line}" if line.keys.size != 1
 58 |   action = line.keys.first
 59 |   if line[action].is_a? Hash
 60 |     key, value = line[action].to_a.flatten
 61 |     return "#{action} => { '#{key}' => '#{value}' }"
 62 |   elsif line[action].is_a? Array
 63 |     return "#{action} => [ '#{line[action].first}' ]"
 64 |   end
 65 | end
 66 | 
 67 | def render_date_line(line)
 68 |   raise "Expected one key at root of #{line}" if line.keys.size != 1
 69 |   action = line.keys.first
 70 |   if line[action].is_a? Hash
 71 |     match = line[action]["match"]
 72 |     target = line[action]["target"]    
 73 |     return """
 74 |   date { 
 75 |     match => #{match}
 76 |     target => \"#{target}\" 
 77 |   }
 78 | """
 79 |   end
 80 | end
 81 | 
 82 | def lsf(field)
 83 |   field.split('.').map{|f| "[#{f}]"}.join
 84 | end
 85 | 
 86 | def output_logstash_pipeline(mutations, dates, array_fields, output_dir)
 87 |   file_name = output_dir.join('logstash.conf')
 88 |   File.open(file_name, 'w') do |f|
 89 | 
 90 |     f.write(<<-CONF)
 91 | filter {
 92 |   mutate {
 93 |     #{mutations.map{|line| render_mutate_line(line)}.join("\n    ")}
 94 |   }
 95 | CONF
 96 | 
 97 |     if dates.length > 0
 98 |       f.write(<<-DATES)
 99 | #{dates.map{|line| render_date_line(line)}.join("\n    ")}
100 | DATES
101 |     end
102 | 
103 |     array_fields.each do |array_field|
104 |       f.write(<<-RB)
105 |   if #{array_field} {
106 |     ruby {
107 |       code => "event.set('#{array_field}', Array(event.get('#{array_field}')) )"
108 |     }
109 |   }
110 | RB
111 |     end
112 |     f.write("}\n")
113 |   end
114 |   return file_name
115 | end
116 | 


--------------------------------------------------------------------------------
/lib/mapping_loader.rb:
--------------------------------------------------------------------------------
  1 | require 'csv'
  2 | 
  3 | REQUIRED_CSV_HEADERS = [
  4 |     'source_field', 
  5 |     'destination_field'
  6 | ]
  7 | 
  8 | KNOWN_CSV_HEADERS = REQUIRED_CSV_HEADERS + [
  9 |     'format_action', 
 10 |     'copy_action',
 11 |     'timestamp_format'
 12 | ]
 13 | 
 14 | ACCEPTED_FORMAT_ACTIONS = [
 15 |     'uppercase', 
 16 |     'lowercase', 
 17 |     'to_boolean', 
 18 |     'to_integer',
 19 |     'to_float', 
 20 |     'to_array', 
 21 |     'to_string',
 22 |     'parse_timestamp',
 23 | ].sort
 24 | 
 25 | def read_csv(file_name)
 26 |   csv = CSV.read(file_name, headers: true)
 27 |   unless (REQUIRED_CSV_HEADERS - csv.headers).empty?
 28 |     abort "Required headers are missing in the CSV.\n" +
 29 |           "  Missing: #{REQUIRED_CSV_HEADERS - csv.headers}.\n" +
 30 |           "  Required: #{REQUIRED_CSV_HEADERS}.\n" +
 31 |           "  Found: #{csv.headers}"
 32 |   end
 33 |   return csv_to_mapping(csv)
 34 | end
 35 | 
 36 | def csv_to_mapping(csv)
 37 |   mapping = {}
 38 |   csv.each do |row|
 39 |     # skip rows that don't have a source field
 40 |     next if row['source_field'].nil? ||
 41 |             row['source_field'].strip.empty? 
 42 | 
 43 |     # skip if no destination field and no format field provided
 44 |     # since it's possible to reformat a source field by itself
 45 |     next if ( row['destination_field'].nil? ||
 46 |               row['destination_field'].strip.empty? ) and
 47 |             ( row['format_field'].nil? ||
 48 |               row['format_field'].strip.empty? )
 49 | 
 50 |     source_field = row['source_field'].strip
 51 |     destination_field =   row['destination_field'] && row['destination_field'].strip || ''
 52 |  
 53 |     mapping[source_field + '+' + destination_field] = {
 54 |       # required fields
 55 |       source_field:       source_field,
 56 |       destination_field:  destination_field,
 57 |       # optional fields
 58 |       copy_action:        (row['copy_action'] && row['copy_action'].strip),
 59 |       format_action:      (row['format_action'] && row['format_action'].strip),
 60 |       timestamp_format:   (row['timestamp_format'] && row['timestamp_format'].strip),
 61 |     }
 62 |   end
 63 |   return mapping
 64 | end
 65 | 
 66 | def make_mapping_explicit(raw_mapping, options)
 67 |   mapping = {}
 68 |   raw_mapping.each_pair do |key, row|
 69 |     mapping[key] = row.dup
 70 |     mapping[key][:copy_action] ||= options[:copy_action]
 71 | 
 72 |     # If @timestamp is the destination and the user does not
 73 |     # specify how to format the conversion, assume we're 
 74 |     # converting it to UNIX_MS
 75 |     if mapping[key][:destination_field] == '@timestamp' and 
 76 |         ( mapping[key][:timestamp_format].nil? || 
 77 |           mapping[key][:timestamp_format].strip.empty? )
 78 |         mapping[key][:format_action] = 'parse_timestamp'
 79 |         mapping[key][:timestamp_format] = 'UNIX_MS'
 80 | 
 81 |     # If the destination field is empty but a format action is
 82 |     # provided, then assume we're formating the source field.
 83 |     elsif ( mapping[key][:destination_field].nil? || 
 84 |             mapping[key][:destination_field].strip.empty? ) and not
 85 |           ( mapping[key][:format_action].nil? ||
 86 |             mapping[key][:format_action].strip.empty? )
 87 |         puts mapping[key][:source_field].inspect
 88 |         mapping[key][:destination_field] = mapping[key][:source_field]
 89 |     end
 90 |   end
 91 |   validate_mapping!(mapping)
 92 |   return mapping
 93 | end
 94 | 
 95 | def validate_mapping!(mapping)
 96 |   mapping.each_pair do |key, row|
 97 |     if row[:format_action] and not ACCEPTED_FORMAT_ACTIONS.include?(row[:format_action])
 98 |       raise "Unsupported format_action: #{row[:format_action]}, expected one of #{ACCEPTED_FORMAT_ACTIONS}"
 99 |     end
100 |   end
101 | end
102 | 


--------------------------------------------------------------------------------
/lib/options_parser.rb:
--------------------------------------------------------------------------------
 1 | require 'optparse'
 2 | require 'pathname'
 3 | 
 4 | ## Configuration
 5 | 
 6 | def parse_options!(argv)
 7 |   # defaults
 8 |   options = {
 9 |     :action => :main,
10 |     :copy_action => 'copy',
11 |   }
12 | 
13 |   parser = OptionParser.new do |opts|
14 |     opts.banner = [
15 |       SYNOPSIS,
16 |       "",
17 |     ].join("\n")
18 |     opts.separator "Options:"
19 | 
20 |     # More examples at http://apidock.com/ruby/OptionParser
21 | 
22 |     opts.on('-f', '--file FILE', "Input CSV file.") do |value|
23 |       options[:file] = value
24 |     end
25 | 
26 |     opts.on('-o', '--output DIR', "Output directory. Defaults to parent dir of --file.") do |value|
27 |       options[:output] = value
28 |     end
29 | 
30 |     opts.on('--copy-action COPY_ACTION', "Default action for field renames. Acceptable values are: copy, rename. Default is copy.") do |value|
31 |       options[:copy_action] = value
32 |     end
33 | 
34 | 
35 |     # opts.on("--log-level LEVEL", "Log level. Default is info. Supports all of Ruby's Logger levels.") do |value|
36 |     #   level_name = value.upcase
37 |     #   if Logger.const_defined?(level_name) && Logger.const_get(level_name).is_a?(Integer)
38 |     #     options[:log_level] = Logger.const_get(level_name)
39 |     #   end
40 |     # end
41 | 
42 |     opts.on_tail("--debug", "Shorthand for --log-level=debug") do |value|
43 |       $debug = true
44 |       require 'pry'
45 |       # options[:log_level] = Logger::DEBUG
46 |     end
47 | 
48 |     opts.on_tail('-h', '--help', "Display help") do
49 |       options[:action] = :help
50 |     end
51 | 
52 |     opts.on_tail('-v', '--version', "Display version and exit") do
53 |       options[:action] = :version
54 |     end
55 | 
56 |   end
57 | 
58 |   parser.parse!(argv)
59 | 
60 |   if :version == options[:action]
61 |     puts VERSION_STRING
62 |     exit
63 |   end
64 | 
65 |   if :help == options[:action]
66 |     puts VERSION_STRING, ''
67 |     puts parser.to_s
68 |     exit
69 |   end
70 | 
71 |   if options[:file].nil?
72 |     abort "Use flag '--file FILE' to specify the mapping file to convert."
73 |   end
74 | 
75 |   options = smart_output_default(options)
76 | 
77 |   options
78 | end
79 | 
80 | def smart_output_default(raw_options)
81 |   options = raw_options.dup
82 |   if options[:output]
83 |     output = Pathname.new(options[:output])
84 |   else
85 |     if options[:file]
86 |       output = Pathname.new(options[:file]).parent
87 |     else
88 |       output = Pathname.new('.')
89 |     end
90 |   end
91 |   options[:output] = output.expand_path
92 |   options
93 | end
94 | 


--------------------------------------------------------------------------------
/test/unit/beats_pipeline_generator_test.rb:
--------------------------------------------------------------------------------
  1 | require 'minitest/autorun'
  2 | require_relative '../../lib/beats_pipeline_generator'
  3 | 
  4 | class BeatsPipelineGeneratorTest < Minitest::Test
  5 |   def test_copy_and_rename_pipeline
  6 |     mapping = {
  7 |       'old1+new1' => { source_field: 'old1', destination_field: 'new1', copy_action: 'copy' },
  8 |       'old2+new2' => { source_field: 'old2', destination_field: 'new2', copy_action: 'rename' },
  9 |       'old3+new3' => { source_field: 'old3', destination_field: 'new3', copy_action: 'copy' },
 10 |     }
 11 |     pl = generate_beats_pipeline(mapping)
 12 |     copy_processor = pl[0]
 13 |     rename_processor = pl[1]
 14 |     assert_equal(
 15 |       { 'copy_fields' => {
 16 |           'fields' => [ {'from' => 'old1', 'to' => 'new1'}, {'from' => 'old3', 'to' => 'new3'} ],
 17 |           'ignore_missing' => true, 'fail_on_error' => false
 18 |       } },
 19 |       copy_processor
 20 |     )
 21 |     assert_equal(
 22 |       { 'rename' => {
 23 |           'fields' => [ {'from' => 'old2', 'to' => 'new2'} ],
 24 |           'ignore_missing' => true, 'fail_on_error' => false
 25 |       } },
 26 |       rename_processor
 27 |     )
 28 |   end
 29 | 
 30 |   def test_non_renamed_beats
 31 |     mapping = {
 32 |       'field1+field1' => { source_field: 'field1', destination_field: 'field1', copy_action: 'copy' },
 33 |       'field2+' => { source_field: 'field2', destination_field: nil, copy_action: 'copy' },
 34 |     }
 35 |     pl = generate_beats_pipeline(mapping)
 36 |     assert_equal([], pl, "No rename processor should be added when there's no rename to perform")
 37 |   end
 38 | 
 39 |   def test_duplicate_source_fields_same_destination
 40 |     mapping = {
 41 |       'field1+field3' => { source_field: 'field1', destination_field: 'field3', copy_action: 'copy' },
 42 |       'field2+field3' => { source_field: 'field2', destination_field: 'field3', copy_action: 'copy' },
 43 |       'field4+field5' => { source_field: 'field4', destination_field: 'field5', copy_action: 'copy' },
 44 |       'field4+field6' => { source_field: 'field4', destination_field: 'field6', copy_action: 'copy' },
 45 |     }
 46 | 
 47 |     pl = generate_beats_pipeline(mapping)
 48 | 
 49 |     assert_equal(
 50 |       { "copy_fields" => {
 51 |           "fields" => [ 
 52 |               {"from"=>"field1", "to"=>"field3"}, 
 53 |               {"from"=>"field2", "to"=>"field3"}, 
 54 |               {"from"=>"field4", "to"=>"field5"}, 
 55 |               {"from"=>"field4", "to"=>"field6"}], 
 56 |           "ignore_missing"=>true, 
 57 |           "fail_on_error"=>false}},
 58 |       pl.first
 59 |     )
 60 |   end
 61 | 
 62 |   def test_dates
 63 |     mapping = {
 64 |       'field1+@timestamp' =>
 65 |         { source_field: 'field1',
 66 |           destination_field: '@timestamp',
 67 |           format_action: 'parse_timestamp',
 68 |           timestamp_format: 'UNIX_MS' },
 69 |       'field2+@timestamp' =>
 70 |         { source_field: 'field2',
 71 |           destination_field: '@timestamp',
 72 |           format_action: 'parse_timestamp',
 73 |           timestamp_format: 'UNIX' },
 74 |     }
 75 | 
 76 |     pl = generate_beats_pipeline(mapping)
 77 | 
 78 |     assert_equal(
 79 |       pl,
 80 |       [
 81 |         { 
 82 |           "timestamp" => { 
 83 |             "field" => "field1",
 84 |             "target_field" => "@timestamp",
 85 |             "layouts" => "UNIX_MS", 
 86 |             "timezone" => "UTC", 
 87 |             "ignore_missing" => true, 
 88 |             "ignore_failure" => true 
 89 |           }
 90 |         }, 
 91 |         {
 92 |           "timestamp" => {
 93 |             "field" => "field2",
 94 |             "target_field" => "@timestamp",
 95 |             "layouts" => "UNIX", 
 96 |             "timezone" => "UTC", 
 97 |             "ignore_missing" => true, 
 98 |             "ignore_failure"=>true
 99 |           }
100 |         }
101 |       ]
102 |     ) 
103 |   end
104 | 
105 | end
106 | 


--------------------------------------------------------------------------------
/test/unit/elasticsearch_pipeline_generator_test.rb:
--------------------------------------------------------------------------------
  1 | require 'minitest/autorun'
  2 | require_relative '../../lib/elasticsearch_pipeline_generator'
  3 | 
  4 | class OptionsParserTest < Minitest::Test
  5 |   def test_copy_processor
  6 |     mapping = { 'old_field+new_field' => {
  7 |       source_field: 'old_field', destination_field: 'new_field', copy_action: 'copy'
  8 |     } }
  9 |     pl = generate_elasticsearch_pipeline(mapping)
 10 |     processor = pl.first
 11 |     assert_equal(
 12 |       { set: { field: 'new_field', value: '{{old_field}}', if: 'ctx.old_field != null' } },
 13 |       processor
 14 |     )
 15 |   end
 16 | 
 17 |   def test_rename_processor
 18 |     mapping = { 'old_field+new_field' => {
 19 |       source_field: 'old_field', destination_field: 'new_field', copy_action: 'rename'
 20 |     } }
 21 |     pl = generate_elasticsearch_pipeline(mapping)
 22 |     processor = pl.first
 23 |     assert_equal(
 24 |       { rename: { field: 'old_field', target_field: 'new_field', ignore_missing: true } },
 25 |       processor
 26 |     )
 27 |   end
 28 | 
 29 |   def test_non_renamed_elasticsearch
 30 |     mapping = {
 31 |       'field1+field1' => { source_field: 'field1', destination_field: 'field1', copy_action: 'copy' },
 32 |       'field2+' => { source_field: 'field2', destination_field: nil, copy_action: 'copy' },
 33 |     }
 34 |     pl = generate_elasticsearch_pipeline(mapping)
 35 |     assert_equal([], pl, "No rename processor should be added when there's no rename to perform")
 36 |   end
 37 | 
 38 |   def test_field_presence_predicate
 39 |     assert_equal('ctx.level != null',
 40 |                  field_presence_predicate('level'))
 41 |     assert_equal('ctx.suricata?.eve?.http?.hostname != null',
 42 |                  field_presence_predicate('suricata.eve.http.hostname'))
 43 | 
 44 |     assert_equal("ctx.containsKey('@timestamp')",
 45 |                  field_presence_predicate('@timestamp'))
 46 |   end
 47 | 
 48 |   def test_duplicate_source_fields_same_destination
 49 |     mapping = {
 50 |       'field1+field3' => { source_field: 'field1', destination_field: 'field3', copy_action: 'copy' },
 51 |       'field2+field3' => { source_field: 'field2', destination_field: 'field3', copy_action: 'copy' },
 52 |       'field4+field5' => { source_field: 'field4', destination_field: 'field5', copy_action: 'copy' },
 53 |       'field4+field6' => { source_field: 'field4', destination_field: 'field6', copy_action: 'copy' },
 54 |     } 
 55 | 
 56 |     pl = generate_elasticsearch_pipeline(mapping)
 57 | 
 58 |     assert_equal(4, pl.length, "Expected 4 processors")   
 59 |     assert_equal(
 60 |       {:set=>{:field=>"field3", :value=>"{{field1}}", :if=>"ctx.field1 != null"}},
 61 |       pl[0]
 62 |     )
 63 |     assert_equal(
 64 |       {:set=>{:field=>"field3", :value=>"{{field2}}", :if=>"ctx.field2 != null"}},
 65 |       pl[1]
 66 |     ) 
 67 |     assert_equal(
 68 |       {:set=>{:field=>"field5", :value=>"{{field4}}", :if=>"ctx.field4 != null"}},
 69 |       pl[2]
 70 |     )
 71 |     assert_equal(
 72 |       {:set=>{:field=>"field6", :value=>"{{field4}}", :if=>"ctx.field4 != null"}},
 73 |       pl[3]
 74 |     )         
 75 |   end
 76 | 
 77 |   def test_dates
 78 |     mapping = {
 79 |       'field1+@timestamp' => 
 80 |         { source_field: 'field1', 
 81 |           destination_field: '@timestamp',
 82 |           format_action: 'parse_timestamp',
 83 |           timestamp_format: 'UNIX_MS' },
 84 |       'field2+@timestamp' =>
 85 |         { source_field: 'field2',
 86 |           destination_field: '@timestamp',
 87 |           format_action: 'parse_timestamp',
 88 |           timestamp_format: 'UNIX' },
 89 |     }
 90 | 
 91 |     pl = generate_elasticsearch_pipeline(mapping)
 92 | 
 93 |     assert_equal(
 94 |       { "date" => {
 95 |         :field => "field1", 
 96 |         :target_field => "@timestamp", 
 97 |         :formats => ["UNIX_MS"], 
 98 |         :timezone => "UTC", 
 99 |         :ignore_failure => true}},
100 |       pl[0]
101 |     )
102 | 
103 |     assert_equal(
104 |       { "date" => {
105 |         :field => "field2", 
106 |         :target_field => "@timestamp", 
107 |         :formats => ["UNIX"], 
108 |         :timezone => "UTC", 
109 |         :ignore_failure => true}},
110 |       pl[1]
111 |     )
112 |   end
113 | end
114 | 


--------------------------------------------------------------------------------
/test/unit/helpers_test.rb:
--------------------------------------------------------------------------------
 1 | require 'minitest/autorun'
 2 | require_relative '../../lib/helpers'
 3 | 
 4 | class HelpersTest < Minitest::Test
 5 |   def test_same_field_name
 6 |     assert same_field_name?({source_field: 'foo', destination_field: nil})
 7 |     assert same_field_name?({source_field: 'foo', destination_field: 'foo'})
 8 | 
 9 |     assert_equal false, same_field_name?({source_field: 'foo', destination_field: 'bar'})
10 |   end
11 | end
12 | 


--------------------------------------------------------------------------------
/test/unit/logstash_pipeline_generator_test.rb:
--------------------------------------------------------------------------------
  1 | require 'minitest/autorun'
  2 | require_relative '../../lib/logstash_pipeline_generator'
  3 | 
  4 | class LogstashPipelineGeneratorTest < Minitest::Test
  5 |   def test_logstash_pipeline
  6 |     mapping = {
  7 |       'old1' => { source_field: 'old1', destination_field: 'new1', copy_action: 'copy' },
  8 |       'old2' => { source_field: 'old2', destination_field: 'new2', copy_action: 'rename' },
  9 |       'old3' => { source_field: 'old3', destination_field: 'new3', copy_action: 'copy' },
 10 |     }
 11 |     mutations, _, _ = generate_logstash_pipeline(mapping)
 12 |     old1_processor = mutations[0]
 13 |     old2_processor = mutations[1]
 14 |     old3_processor = mutations[2]
 15 |     assert_equal( { 'copy'   => { '[old1]' => '[new1]' } }, old1_processor)
 16 |     assert_equal( { 'rename' => { '[old2]' => '[new2]' } }, old2_processor)
 17 |     assert_equal( { 'copy'   => { '[old3]' => '[new3]' } }, old3_processor)
 18 |   end
 19 | 
 20 |   def test_non_renamed_ls
 21 |     mapping = {
 22 |       'field1' => { source_field: 'field1', destination_field: 'field1', copy_action: 'copy' },
 23 |       'field2' => { source_field: 'field2', destination_field: nil, copy_action: 'copy' },
 24 |     }
 25 |     mutations, _, _ = generate_logstash_pipeline(mapping)
 26 |     assert_equal([], mutations, "No rename processor should be added when there's no rename to perform")
 27 |   end
 28 | 
 29 |   def test_render_ls_field_name
 30 |     assert_equal("[field]",       lsf("field"))
 31 |     assert_equal("[@field]",      lsf("@field"))
 32 |     assert_equal("[log][level]",  lsf("log.level"))
 33 |   end
 34 | 
 35 |   def test_render_mutate_line_simple_hash
 36 |     assert_equal(
 37 |       "copy => { '[src_field]' => '[dest_field]' }",
 38 |       render_mutate_line('copy' => {'[src_field]' => '[dest_field]'})
 39 |     )
 40 |     assert_equal(
 41 |       "convert => { '[event][duration]' => 'float' }",
 42 |       render_mutate_line('convert' => {'[event][duration]' => 'float'})
 43 |     )
 44 |   end
 45 | 
 46 |   def test_render_mutate_line_array
 47 |     assert_equal(
 48 |       "uppercase => [ '[log][level]' ]",
 49 |       render_mutate_line('uppercase' => ['[log][level]'])
 50 |     )
 51 |   end
 52 | 
 53 |   def test_duplicate_source_fields_same_destination
 54 |     mapping = {
 55 |       'field1+field3' => { source_field: 'field1', destination_field: 'field3', copy_action: 'copy' },
 56 |       'field2+field3' => { source_field: 'field2', destination_field: 'field3', copy_action: 'copy' },
 57 |       'field4+field5' => { source_field: 'field4', destination_field: 'field5', copy_action: 'copy' },
 58 |       'field4+field6' => { source_field: 'field4', destination_field: 'field6', copy_action: 'copy' },
 59 |     }
 60 | 
 61 |     mutations, _, _ = generate_logstash_pipeline(mapping)
 62 | 
 63 |     assert_equal(
 64 |       [ {"copy" => {"[field1]" => "[field3]"}}, 
 65 |         {"copy" => {"[field2]" => "[field3]"}}, 
 66 |         {"copy" => {"[field4]" => "[field5]"}}, 
 67 |         {"copy" => {"[field4]" => "[field6]"}}],
 68 |       mutations
 69 |     )
 70 |   end
 71 | 
 72 |   def test_dates
 73 |     mapping = {
 74 |       'field1+@timestamp' =>
 75 |         { source_field: 'field1',
 76 |           destination_field: '@timestamp',
 77 |           format_action: 'parse_timestamp',
 78 |           timestamp_format: 'UNIX_MS' },
 79 |       'field2+@timestamp' =>
 80 |         { source_field: 'field2',
 81 |           destination_field: '@timestamp',
 82 |           format_action: 'parse_timestamp',
 83 |           timestamp_format: 'UNIX' },
 84 |     }
 85 | 
 86 |     mutations, dates, array_fields = generate_logstash_pipeline(mapping)
 87 | 
 88 |     assert_equal(
 89 |       [],
 90 |       mutations
 91 |     )
 92 | 
 93 |     assert_equal(
 94 |       [],
 95 |       array_fields
 96 |     )
 97 | 
 98 |     assert_equal(
 99 |       {"date" => {
100 |         "match" => ["[field1]", "UNIX_MS"],
101 |         "target" => "[@timestamp]"
102 |       }},
103 |       dates[0]
104 |     )
105 | 
106 |     assert_equal(
107 |       {"date" => {
108 |         "match" => ["[field2]", "UNIX"],
109 |         "target" => "[@timestamp]"
110 |       }},
111 |       dates[1]
112 |     )
113 | 
114 |   end
115 | end
116 | 


--------------------------------------------------------------------------------
/test/unit/mapping_loader_test.rb:
--------------------------------------------------------------------------------
  1 | require 'minitest/autorun'
  2 | require_relative '../../lib/mapping_loader'
  3 | 
  4 | class MappingLoaderTest < Minitest::Test
  5 |   def test_explicit_mapping_default_rename_action
  6 |     raw_mapping = {
  7 |       'copied_field' => {
  8 |         source_field: 'copied_field',
  9 |         destination_field: 'new.copied_field',
 10 |         copy_action: 'copy'
 11 |       },
 12 |       'default_field' => {
 13 |         source_field: 'default_field',
 14 |         destination_field: 'new.default_field'
 15 |       },
 16 |       'renamed_field' => {
 17 |         source_field: 'renamed_field',
 18 |         destination_field: 'new.renamed_field',
 19 |         copy_action: 'rename'
 20 |       },
 21 |     }
 22 |     options = { copy_action: 'copy' }
 23 |     mapping = make_mapping_explicit(raw_mapping, options)
 24 |     assert_equal('copy', mapping['default_field'][:copy_action])
 25 |     assert_equal('copy', mapping['copied_field'][:copy_action])
 26 |     assert_equal('rename', mapping['renamed_field'][:copy_action])
 27 |   end
 28 | 
 29 |   def test_csv_to_mapping_cleans_up_spaces_ignores_unknown_keys
 30 |     # Note: an instance of CSV behaves a lot like an array of hashes
 31 |     csv = [{ 'source_field' => ' my_field ',
 32 |              'destination_field' => "another_field\t",
 33 |              'copy_action' => ' copy',
 34 |     }]
 35 |     expected_mapping = {
 36 |       'my_field+another_field' => {
 37 |         source_field: 'my_field',
 38 |         destination_field: 'another_field',
 39 |         copy_action: 'copy',
 40 |         format_action: nil,
 41 |         timestamp_format: nil
 42 |       }
 43 |     }
 44 |     assert_equal(expected_mapping, csv_to_mapping(csv))
 45 |   end
 46 | 
 47 |   def test_validate!
 48 |     assert_raises(RuntimeError) do
 49 |       validate_mapping!({ 'foo' => {:format_action => 'foo'}})
 50 |     end
 51 | 
 52 |     # If no errors, this is good (no need for assertion)
 53 |     validate_mapping!({ 'foo' => {:format_action => 'to_array'}})
 54 |   end
 55 | 
 56 |   def test_mapping_loader_skips_missing_fields
 57 |     csv = [
 58 |       # skipped
 59 |       { 'source_field' => nil,           'destination_field' => nil },
 60 |       { 'source_field' => nil,           'destination_field' => 'fieldname' },
 61 |       { 'source_field' => ' ',           'destination_field' => '  ' },
 62 |       { 'source_field' => "\t",          'destination_field' => 'fieldname' },
 63 |       { 'source_field' => 'correct_fieldname',  'destination_field' => nil },
 64 |       # Not skipped
 65 |       { 'source_field' => 'original_fieldname', 'destination_field' => 'new_fieldname' },
 66 |     ]
 67 | 
 68 |     expected_mapping = {
 69 |       'original_fieldname+new_fieldname' => {
 70 |         source_field: 'original_fieldname',
 71 |         destination_field: 'new_fieldname',
 72 |         copy_action: nil,
 73 |         format_action: nil,
 74 |         timestamp_format: nil
 75 |       }
 76 |     }
 77 |     assert_equal(expected_mapping, csv_to_mapping(csv))
 78 |   end
 79 | 
 80 |   def test_mapping_timestamp
 81 |     csv = [
 82 |       { 'source_field' => 'some_timestamp_field1', 
 83 |         'destination_field' => '@timestamp' },
 84 |       { 'source_field' => 'some_timestamp_field2', 
 85 |         'destination_field' => '@timestamp', 
 86 |         'format_action' => 'parse_timestamp' },
 87 |       { 'source_field' => 'some_timestamp_field3', 
 88 |         'destination_field' => '@timestamp', 
 89 |         'format_action' => 'parse_timestamp',
 90 |         'timestamp_format' => 'UNIX' },
 91 |       { 'source_field' => 'some_timestamp_field4', 
 92 |         'destination_field' => 'some_other_timestamp', 
 93 |         'format_action' => 'parse_timestamp',
 94 |         'timestamp_format' => 'UNIX' },
 95 |       { 'source_field' => 'some_timestamp_field5', 
 96 |         'destination_field' => 'some_other_timestamp', 
 97 |         'format_action' => 'parse_timestamp', 
 98 |         'timestamp_format' => 'ISO8601' },
 99 |       { 'source_field' => 'some_timestamp_field6',
100 |         'destination_field' => 'some_other_timestamp',
101 |         'format_action' => 'parse_timestamp',
102 |         'timestamp_format' => 'TAI64N' },
103 | 
104 |     ]
105 | 
106 |     options = { copy_action: 'copy' }
107 |     mapping = csv_to_mapping(csv)
108 |     explicit_mapping = make_mapping_explicit(mapping, options)
109 | 
110 |     assert_equal(explicit_mapping['some_timestamp_field1+@timestamp'],
111 |       { :source_field => "some_timestamp_field1", 
112 |         :destination_field => "@timestamp", 
113 |         :copy_action => "copy", 
114 |         :format_action => "parse_timestamp",
115 |         :timestamp_format => "UNIX_MS"
116 |       }
117 |     )
118 |     assert_equal(explicit_mapping['some_timestamp_field2+@timestamp'],
119 |       { :source_field => "some_timestamp_field2",
120 |         :destination_field => "@timestamp",
121 |         :copy_action => "copy", 
122 |         :format_action => "parse_timestamp",
123 |         :timestamp_format => "UNIX_MS"
124 |       }
125 |     )
126 |     assert_equal(explicit_mapping['some_timestamp_field3+@timestamp'],
127 |       { :source_field => "some_timestamp_field3",
128 |         :destination_field => "@timestamp",
129 |         :copy_action => "copy",
130 |         :format_action => "parse_timestamp",
131 |         :timestamp_format => "UNIX"
132 |       }
133 |     )
134 |     assert_equal(explicit_mapping['some_timestamp_field4+some_other_timestamp'],
135 |       { :source_field => "some_timestamp_field4",
136 |         :destination_field => "some_other_timestamp",
137 |         :copy_action => "copy",
138 |         :format_action => "parse_timestamp",
139 |         :timestamp_format => "UNIX"
140 |       }
141 |     )
142 |     assert_equal(explicit_mapping['some_timestamp_field5+some_other_timestamp'],
143 |       { :source_field => "some_timestamp_field5",
144 |         :destination_field => "some_other_timestamp",
145 |         :copy_action => "copy",
146 |         :format_action => "parse_timestamp",
147 |         :timestamp_format => "ISO8601"
148 |       }
149 |     )
150 |     assert_equal(explicit_mapping['some_timestamp_field6+some_other_timestamp'],
151 |       { :source_field => "some_timestamp_field6",
152 |         :destination_field => "some_other_timestamp",
153 |         :copy_action => "copy",
154 |         :format_action => "parse_timestamp",
155 |         :timestamp_format => "TAI64N"
156 |       }
157 |     )
158 |    
159 |   end
160 | end
161 | 


--------------------------------------------------------------------------------
/test/unit/options_parser_test.rb:
--------------------------------------------------------------------------------
 1 | require 'minitest/autorun'
 2 | require_relative '../../lib/options_parser'
 3 | 
 4 | class OptionsParserTest < Minitest::Test
 5 |   def test_smart_output_from_input_file
 6 |     assert_equal(
 7 |       { file: '/home/bob/mapping.csv', output: Pathname.new('/home/bob') },
 8 |       smart_output_default({ file: '/home/bob/mapping.csv' })
 9 |     )
10 |   end
11 | 
12 |   def test_smart_output_default_explicit_output
13 |     assert_equal(
14 |       { file: 'mapping.csv', output: Pathname.new('/tmp') },
15 |       smart_output_default({ file: 'mapping.csv', output: '/tmp' })
16 |     )
17 |   end
18 | 
19 |   def test_output_dir_is_expanded
20 |     current_user_home = Pathname.new('~').expand_path
21 |     assert_equal(
22 |       { file: 'mapping.csv', output: current_user_home },
23 |       smart_output_default({ file: 'mapping.csv', output: '~' })
24 |     )
25 |   end
26 | end
27 | 


--------------------------------------------------------------------------------