├── .gitignore ├── .ruby-version ├── Gemfile ├── Gemfile.lock ├── LICENSE.txt ├── README.md ├── Rakefile ├── ecs-mapper ├── example ├── README.md ├── beats.yml ├── beats │ ├── config_header.yml │ └── filebeat.yml ├── elasticsearch.json ├── log │ └── sample.log ├── logstash.conf ├── logstash │ ├── 1-input.conf │ ├── 2-ecs-conversion.conf │ └── 3-output.conf └── mapping.csv ├── lib ├── beats_pipeline_generator.rb ├── elasticsearch_pipeline_generator.rb ├── helpers.rb ├── logstash_pipeline_generator.rb ├── mapping_loader.rb └── options_parser.rb └── test └── unit ├── beats_pipeline_generator_test.rb ├── elasticsearch_pipeline_generator_test.rb ├── helpers_test.rb ├── logstash_pipeline_generator_test.rb ├── mapping_loader_test.rb └── options_parser_test.rb /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | -------------------------------------------------------------------------------- /.ruby-version: -------------------------------------------------------------------------------- 1 | 2.6 2 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | source "https://rubygems.org" 3 | 4 | group 'development' do 5 | gem 'minitest' 6 | gem 'pry' 7 | gem 'rake' 8 | end 9 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | coderay (1.1.2) 5 | method_source (0.9.2) 6 | minitest (5.13.0) 7 | pry (0.12.2) 8 | coderay (~> 1.1.0) 9 | method_source (~> 0.9.0) 10 | rake (13.0.1) 11 | 12 | PLATFORMS 13 | ruby 14 | 15 | DEPENDENCIES 16 | minitest 17 | pry 18 | rake 19 | 20 | BUNDLED WITH 21 | 2.1.4 22 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ⚠️ **This tool and repository are no longer maintained. We strongly advise you to [use Kibana to map custom data to ECS fields](https://www.elastic.co/guide/en/ecs/current/ecs-converting.html) instead.** 2 | 3 | --- 4 | 5 | ## Synopsis 6 | 7 | The ECS mapper tool turns a field mapping from a CSV to an equivalent pipeline for: 8 | 9 | - [Beats](https://www.elastic.co/guide/en/beats/filebeat/current/filtering-and-enhancing-data.html) 10 | - [Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest-processors.html) 11 | - [Logstash](https://www.elastic.co/guide/en/logstash/current/filter-plugins.html) 12 | 13 | This tool generates starter pipelines for each solution above to help you 14 | get started quickly in mapping new data sources to ECS. 15 | 16 | A mapping CSV is what you get when you start planning how to map a new data 17 | source to ECS in a spreadsheet. 18 | 19 | Colleagues may collaborate on a spreadsheet that looks like this: 20 | 21 | | source\_field | destination\_field | notes | 22 | |--------------|-------------------|---------------------------------------| 23 | | duration | event.duration | ECS supports nanoseconds precision | 24 | | remoteip | source.ip | Hey @Jane do you agree with this one? | 25 | | message | | No need to change this field | 26 | | ... | | | 27 | 28 | You can export your spreadsheet to CSV, run it through the ECS mapper, 29 | and generate your starter pipelines. 30 | 31 | Note that this tool generates starter pipelines. They only do field rename and copy 32 | operations as well as some field format adjustments. It's up to you to integrate them 33 | in a complete pipeline that ingests and outputs the data however you need. 34 | 35 | Scroll down to the [Examples](#examples) section below to get right to a 36 | concrete example you can play with. 37 | 38 | ## CSV Format 39 | 40 | Here are more details on the CSV format supported by this tool. Since mapping 41 | spreadsheets are used by humans, it's totally fine to have as many columns 42 | as you need in your spreadsheets/CSV. Only the following columns will be considered: 43 | 44 | | column name | required | allowed values | notes | 45 | |-------------|----------|----------------|-------| 46 | | source\_field | required | | A dotted Elasticsearch field name. Dots represent JSON nesting. Lines with empty "source\_field" are skipped. | 47 | | destination\_field | required | | A dotted Elasticsearch field name. Dots represent JSON nesting. Can be left empty if there's no copy action (just a type conversion). | 48 | | format\_action | optional | to\_float, to\_integer, to\_string, to\_boolean, to\_array, parse\_timestamp, uppercase, lowercase, (empty) | Simple conversion to apply to the field value. | 49 | | timestamp\_format | optional | Only UNIX and UNIX\_MS formats are supported across all three tools. You may also specify other formats, like ISO8601, TAI64N, or a Java time pattern, but we will not validate whether the format is supported by the tool. | 50 | | copy\_action | optional | rename, copy, (empty) | What to do with the field. If left empty, default action is based on the `--copy-action` flag. | 51 | 52 | You can start from this 53 | [spreadsheet template](https://docs.google.com/spreadsheets/d/1m5JiOTeZtUueW3VOVqS8bFYqNGEEyp0jAsgO12NFkNM). Make a copy of it in your Google Docs account, or download it as an Excel file. 54 | 55 | When the destination field is @timestamp, then we always enforce an explicit date ```format_action``` of ```parse_timestamp``` to ```UNIX_MS``` avoid conversion problems downstream. If no ```timestamp_format``` is provided, then ```UNIX_MS``` is used. Please note that the timestamp layouts used by the [Filebeat processor for converting timestamps](https://www.elastic.co/guide/en/beats/filebeat/current/processor-timestamp.html) are different than the formats supported by date processors in Logstash and Elasticsearch Ingest Node. 56 | 57 | 58 | 59 | ## Usage and Dependencies 60 | 61 | This is a simple Ruby program with no external dependencies, other than development 62 | dependencies. 63 | 64 | Any modern version of Ruby should be sufficient. If you don't intend to run the 65 | tests or the rake tasks, you can skip right to [usage tips](#using-the-ecs-mapper). 66 | 67 | ### Ruby Setup 68 | 69 | If you want to tweak the code of this script, run the tests or use the rake tasks, 70 | you'll need to install the development dependencies. 71 | 72 | Once you have Ruby installed for your platform, installing the dependencies is simply: 73 | 74 | ```bash 75 | gem install bundler 76 | bundle install 77 | ``` 78 | 79 | Run the tests: 80 | 81 | ```bash 82 | rake test 83 | ``` 84 | 85 | ### Using the ECS Mapper 86 | 87 | Help. 88 | 89 | ```bash 90 | ./ecs-mapper --help 91 | Reads a CSV mapping of source field names to destination field names, and generates 92 | Elastic pipelines to help perform the conversion. 93 | 94 | You can have as many columns as you want in your CSV. 95 | Only the following columns will be used by this tool: 96 | source_field, destination_field, format_action, copy_action 97 | 98 | Options: 99 | -f, --file FILE Input CSV file. 100 | -o, --output DIR Output directory. Defaults to parent dir of --file. 101 | --copy-action COPY_ACTION 102 | Default action for field renames. Acceptable values are: copy, rename. Default is copy. 103 | --debug Shorthand for --log-level=debug 104 | -h, --help Display help 105 | ``` 106 | 107 | Process my.csv and output pipelines in the same directory as the csv. 108 | 109 | ```bash 110 | ./ecs-mapper --file my.csv 111 | ``` 112 | 113 | Process my.csv and output pipelines elsewhere. 114 | 115 | ```bash 116 | ./ecs-mapper --file my.csv --output pipelines/mine/ 117 | ``` 118 | 119 | Process my.csv, fields with an empty value in the "copy\_action" column are renamed, 120 | instead of copied (the default). 121 | 122 | ```bash 123 | ./ecs-mapper --file my.csv --copy_action rename 124 | ``` 125 | 126 | ## Examples 127 | 128 | Look at an example CSV mapping and the pipelines generated from it: 129 | 130 | - [example/mapping.csv](example/mapping.csv) 131 | - [example/beats.yml](example/beats.yml) 132 | - [example/elasticsearch.json](example/elasticsearch.json) 133 | - [example/logstash.conf](example/logstash.conf) 134 | 135 | You can try each pipeline easily by following the instructions 136 | in [example/README.md](example/). 137 | 138 | ## Caveats 139 | 140 | * The Beats pipelines don't perform "to\_array", "uppercase" nor 141 | "lowercase" transformations. They could be implemented via the "script" processor. 142 | * Only UNIX and UNIX\_MS timestamp formats are supported across Beats, Elasticsearch, 143 | and Filebeat. For other timestamp formats, please modify the starter pipeline or add the 144 | appropriate date processor in the generated pipeline by hand. Refer to the documentation 145 | for [Beats](https://www.elastic.co/guide/en/beats/filebeat/current/processor-timestamp.html), [Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/reference/master/date-processor.html), and [Logstash](https://www.elastic.co/guide/en/logstash/current/plugins-filters-date.html#plugins-filters-date-match). 146 | * This tool does not currently support additional processors, like setting static 147 | field values or dropping events based on a condition. 148 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "rake/testtask" 2 | 3 | Rake::TestTask.new(:test) do |t| 4 | t.libs << "test" 5 | t.libs << "lib" 6 | t.test_files = FileList["test/**/*_test.rb"] 7 | end 8 | 9 | task :default => :test 10 | 11 | task :example do |t| 12 | system("./ecs-mapper --file example/mapping.csv") 13 | system("cp example/logstash.conf example/logstash/2-ecs-conversion.conf") 14 | system("cat example/beats/config_header.yml example/beats.yml > example/beats/filebeat.yml") 15 | end 16 | -------------------------------------------------------------------------------- /ecs-mapper: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'fileutils' 4 | require 'logger' 5 | 6 | require_relative 'lib/options_parser' 7 | require_relative 'lib/mapping_loader' 8 | require_relative 'lib/beats_pipeline_generator' 9 | require_relative 'lib/elasticsearch_pipeline_generator' 10 | require_relative 'lib/logstash_pipeline_generator' 11 | 12 | SYNOPSIS = <<-SYN 13 | Reads a CSV mapping of source field names to destination field names, and generates 14 | Elastic pipelines to help perform the conversion. 15 | 16 | You can have as many columns as you want in your CSV. 17 | Only the following columns will be used by this tool: 18 | #{KNOWN_CSV_HEADERS.join(', ')} 19 | SYN 20 | 21 | VERSION = '0.0.1' 22 | VERSION_STRING = "ecs-mapper #{VERSION}" 23 | 24 | def main(options) 25 | puts "Options: #{options}" if $debug 26 | 27 | FileUtils.mkdir_p(options[:output]) 28 | 29 | raw_mapping = read_csv(options[:file]) 30 | mapping = make_mapping_explicit(raw_mapping, options) 31 | 32 | create_output_directory!(options[:output]) 33 | 34 | elasticsearch_pl = generate_elasticsearch_pipeline(mapping) 35 | puts output_elasticsearch_pipeline(elasticsearch_pl, options[:output]) 36 | 37 | beats_pl = generate_beats_pipeline(mapping) 38 | puts output_beats_pipeline(beats_pl, options[:output]) 39 | 40 | mutations, dates, array_fields = generate_logstash_pipeline(mapping) 41 | puts output_logstash_pipeline(mutations, dates, array_fields, options[:output]) 42 | end 43 | 44 | def create_output_directory!(dir) 45 | unless dir.directory? 46 | FileUtils.mkdir_p(dir) 47 | end 48 | end 49 | 50 | main( parse_options!(ARGV) ) 51 | -------------------------------------------------------------------------------- /example/README.md: -------------------------------------------------------------------------------- 1 | ## Running the Examples 2 | 3 | Here are the instructions to run each of the example pipelines. 4 | 5 | Note that the the Beats and Logstash pipelines have been copied to 6 | "example/beats/filebeat.yml" and "example/logstash/2-ecs-conversion.conf" respectively, 7 | in order to produce functional configurations used in the examples below. 8 | 9 | All commands should be run from the root of this repo. 10 | 11 | ### Elasticsearch ingest pipeline 12 | 13 | In a Kibana console, prepare a "simulate" API call, without the array of processors: 14 | 15 | ```JS 16 | POST _ingest/pipeline/_simulate 17 | { "pipeline": { "processors" : 18 | 19 | } , "docs": 20 | [ { "_source": 21 | { "log_level": "debug", "eventid": 424242, "hostip": "192.0.2.3", 22 | "srcip": "192.0.2.1", "srcport": "42", "destip": "192.0.2.2", "destport": "42", 23 | "timestamp": "now", "action": "Testing", "duration": "1.1", 24 | "successful": "true", "process":{ "args": "--yolo" }, 25 | "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36" 26 | } 27 | } 28 | ] 29 | } 30 | ``` 31 | 32 | Paste the full content of the example/elasticsearch.json file below `"processors":`. 33 | It should now look like this: 34 | 35 | ```JS 36 | POST _ingest/pipeline/_simulate 37 | { "pipeline": { "processors" : 38 | [ 39 | { 40 | "rename": { 41 | "field": "srcip", 42 | "target_field": "source.address", 43 | "ignore_missing": true 44 | } 45 | }, 46 | // ... 47 | ``` 48 | 49 | When you execute the call, you'll see the resulting event, with transformations applied: 50 | 51 | ```JSON 52 | { 53 | "docs" : [ 54 | { 55 | "doc" : { 56 | "_index" : "_index", 57 | "_type" : "_doc", 58 | "_id" : "_id", 59 | "_source" : { 60 | "process" : { 61 | "args" : [ 62 | "--yolo" 63 | ] 64 | }, 65 | "log" : { 66 | "level" : "DEBUG" 67 | }, 68 | ... 69 | ``` 70 | 71 | ## Logstash 72 | 73 | Logstash can load multiple .conf files in alphabetical order, to form a full pipeline. 74 | We already have the generated Logstash pipeline copied to the example configuration 75 | directory, at "example/logstash/2-ecs-conversion.conf": 76 | 77 | ```bash 78 | ls example/logstash/ # 1-input.conf 2-ecs-conversion.conf 3-output.conf 79 | ``` 80 | 81 | Start Logstash: 82 | 83 | ```bash 84 | $logstash_path/bin/logstash -f example/logstash/ 85 | ``` 86 | 87 | Once Logstash is running, paste this document in Logstash' terminal: 88 | 89 | ```json 90 | { "log_level": "debug", "eventid": 424242, "srcip": "192.0.2.1", "srcport": 42, "destip": "192.0.2.2", "destport": 42, "hostip": "192.0.2.42", "ts": "now", "action": "Testing", "duration": "1.1", "process":{ "args": "--yolo" }, "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36" } 91 | ``` 92 | 93 | Logstash outputs the resulting event, with transformations applied: 94 | 95 | ```ruby 96 | { 97 | "host" => { 98 | "ip" => [ 99 | [0] "192.0.2.42" 100 | ] 101 | }, 102 | "process" => { 103 | "args" => [ 104 | [0] "--yolo" 105 | ] 106 | }, 107 | # ... 108 | ``` 109 | 110 | ## Beats 111 | 112 | We already have an example Filebeat configuration file based on the generated 113 | pipeline, at "example/beats/filebeat.yml". This configuration file reads the sample 114 | NDJSON log "example/log/sample.log" and outputs the converted docs to stdout. 115 | 116 | Run Filebeat with this configuration: 117 | 118 | ```bash 119 | $filebeat_path/filebeat -c example/beats/filebeat.yml 120 | ``` 121 | 122 | Filebeat outputs the resulting event, with transformations applied: 123 | 124 | ```JSON 125 | { 126 | "@timestamp": "2020-01-01T01:01:01.001Z", 127 | "process": { 128 | "args": "--yolo" 129 | }, 130 | "host": { 131 | "ip": "192.0.2.42", 132 | "name": "matbook-pro.lan" 133 | }, 134 | ... 135 | ``` 136 | -------------------------------------------------------------------------------- /example/beats.yml: -------------------------------------------------------------------------------- 1 | processors: 2 | - timestamp: 3 | field: some_timestamp_field 4 | target_field: "@timestamp" 5 | layouts: UNIX_MS 6 | timezone: UTC 7 | ignore_missing: true 8 | ignore_failure: true 9 | - timestamp: 10 | field: some_other_timestamp 11 | target_field: "@timestamp" 12 | layouts: UNIX_MS 13 | timezone: UTC 14 | ignore_missing: true 15 | ignore_failure: true 16 | - timestamp: 17 | field: some_new_timestamp 18 | target_field: destination_timestamp 19 | layouts: UNIX 20 | timezone: UTC 21 | ignore_missing: true 22 | ignore_failure: true 23 | - copy_fields: 24 | fields: 25 | - from: srcip 26 | to: source.address 27 | - from: srcip 28 | to: source.ip 29 | - from: new_event.srcip 30 | to: source.ip 31 | - from: destip 32 | to: destination.address 33 | - from: destport 34 | to: destination.port 35 | - from: ts 36 | to: timestamp 37 | ignore_missing: true 38 | fail_on_error: false 39 | - rename: 40 | fields: 41 | - from: srcport 42 | to: source.port 43 | - from: action 44 | to: event.action 45 | - from: duration 46 | to: event.duration 47 | - from: user_agent 48 | to: user_agent.original 49 | - from: log_level 50 | to: log.level 51 | - from: eventid 52 | to: event.id 53 | - from: hostip 54 | to: host.ip 55 | ignore_missing: true 56 | fail_on_error: false 57 | - convert: 58 | fields: 59 | - from: source.port 60 | type: long 61 | - from: destination.port 62 | type: long 63 | - from: event.duration 64 | type: float 65 | - from: event.id 66 | type: string 67 | ignore_missing: true 68 | fail_on_error: false 69 | -------------------------------------------------------------------------------- /example/beats/config_header.yml: -------------------------------------------------------------------------------- 1 | filebeat.inputs: 2 | - type: log 3 | enabled: true 4 | paths: 5 | - example/log/sample.log 6 | processors: 7 | - decode_json_fields: 8 | fields: ["message"] 9 | target: "" 10 | 11 | output.elasticsearch.enabled: false 12 | output.console: 13 | enabled: true 14 | pretty: true 15 | 16 | # Add the generated Beats pipeline below 17 | 18 | -------------------------------------------------------------------------------- /example/beats/filebeat.yml: -------------------------------------------------------------------------------- 1 | filebeat.inputs: 2 | - type: log 3 | enabled: true 4 | paths: 5 | - example/log/sample.log 6 | processors: 7 | - decode_json_fields: 8 | fields: ["message"] 9 | target: "" 10 | 11 | output.elasticsearch.enabled: false 12 | output.console: 13 | enabled: true 14 | pretty: true 15 | 16 | # Add the generated Beats pipeline below 17 | 18 | processors: 19 | - timestamp: 20 | field: some_timestamp_field 21 | target_field: "@timestamp" 22 | layouts: UNIX_MS 23 | timezone: UTC 24 | ignore_missing: true 25 | ignore_failure: true 26 | - timestamp: 27 | field: some_other_timestamp 28 | target_field: "@timestamp" 29 | layouts: UNIX_MS 30 | timezone: UTC 31 | ignore_missing: true 32 | ignore_failure: true 33 | - timestamp: 34 | field: some_new_timestamp 35 | target_field: destination_timestamp 36 | layouts: UNIX 37 | timezone: UTC 38 | ignore_missing: true 39 | ignore_failure: true 40 | - copy_fields: 41 | fields: 42 | - from: srcip 43 | to: source.address 44 | - from: srcip 45 | to: source.ip 46 | - from: new_event.srcip 47 | to: source.ip 48 | - from: destip 49 | to: destination.address 50 | - from: destport 51 | to: destination.port 52 | - from: ts 53 | to: timestamp 54 | ignore_missing: true 55 | fail_on_error: false 56 | - rename: 57 | fields: 58 | - from: srcport 59 | to: source.port 60 | - from: action 61 | to: event.action 62 | - from: duration 63 | to: event.duration 64 | - from: user_agent 65 | to: user_agent.original 66 | - from: log_level 67 | to: log.level 68 | - from: eventid 69 | to: event.id 70 | - from: hostip 71 | to: host.ip 72 | ignore_missing: true 73 | fail_on_error: false 74 | - convert: 75 | fields: 76 | - from: source.port 77 | type: long 78 | - from: destination.port 79 | type: long 80 | - from: event.duration 81 | type: float 82 | - from: event.id 83 | type: string 84 | ignore_missing: true 85 | fail_on_error: false 86 | -------------------------------------------------------------------------------- /example/elasticsearch.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "set": { 4 | "field": "source.address", 5 | "value": "{{srcip}}", 6 | "if": "ctx.srcip != null" 7 | } 8 | }, 9 | { 10 | "set": { 11 | "field": "source.ip", 12 | "value": "{{srcip}}", 13 | "if": "ctx.srcip != null" 14 | } 15 | }, 16 | { 17 | "set": { 18 | "field": "source.ip", 19 | "value": "{{new_event.srcip}}", 20 | "if": "ctx.new_event?.srcip != null" 21 | } 22 | }, 23 | { 24 | "date": { 25 | "field": "some_timestamp_field", 26 | "target_field": "@timestamp", 27 | "formats": [ 28 | "UNIX_MS" 29 | ], 30 | "timezone": "UTC", 31 | "ignore_failure": true 32 | } 33 | }, 34 | { 35 | "date": { 36 | "field": "some_other_timestamp", 37 | "target_field": "@timestamp", 38 | "formats": [ 39 | "UNIX_MS" 40 | ], 41 | "timezone": "UTC", 42 | "ignore_failure": true 43 | } 44 | }, 45 | { 46 | "date": { 47 | "field": "some_new_timestamp", 48 | "target_field": "destination_timestamp", 49 | "formats": [ 50 | "UNIX" 51 | ], 52 | "timezone": "UTC", 53 | "ignore_failure": true 54 | } 55 | }, 56 | { 57 | "rename": { 58 | "field": "srcport", 59 | "target_field": "source.port", 60 | "ignore_missing": true 61 | } 62 | }, 63 | { 64 | "convert": { 65 | "field": "source.port", 66 | "type": "long", 67 | "ignore_missing": true, 68 | "ignore_failure": true 69 | } 70 | }, 71 | { 72 | "set": { 73 | "field": "destination.address", 74 | "value": "{{destip}}", 75 | "if": "ctx.destip != null" 76 | } 77 | }, 78 | { 79 | "set": { 80 | "field": "destination.port", 81 | "value": "{{destport}}", 82 | "if": "ctx.destport != null" 83 | } 84 | }, 85 | { 86 | "convert": { 87 | "field": "destination.port", 88 | "type": "long", 89 | "ignore_missing": true, 90 | "ignore_failure": true 91 | } 92 | }, 93 | { 94 | "set": { 95 | "field": "timestamp", 96 | "value": "{{ts}}", 97 | "if": "ctx.ts != null" 98 | } 99 | }, 100 | { 101 | "rename": { 102 | "field": "action", 103 | "target_field": "event.action", 104 | "ignore_missing": true 105 | } 106 | }, 107 | { 108 | "lowercase": { 109 | "field": "event.action", 110 | "ignore_missing": true, 111 | "ignore_failure": true 112 | } 113 | }, 114 | { 115 | "rename": { 116 | "field": "duration", 117 | "target_field": "event.duration", 118 | "ignore_missing": true 119 | } 120 | }, 121 | { 122 | "convert": { 123 | "field": "event.duration", 124 | "type": "float", 125 | "ignore_missing": true, 126 | "ignore_failure": true 127 | } 128 | }, 129 | { 130 | "rename": { 131 | "field": "user_agent", 132 | "target_field": "user_agent.original", 133 | "ignore_missing": true 134 | } 135 | }, 136 | { 137 | "rename": { 138 | "field": "log_level", 139 | "target_field": "log.level", 140 | "ignore_missing": true 141 | } 142 | }, 143 | { 144 | "uppercase": { 145 | "field": "log.level", 146 | "ignore_missing": true, 147 | "ignore_failure": true 148 | } 149 | }, 150 | { 151 | "rename": { 152 | "field": "eventid", 153 | "target_field": "event.id", 154 | "ignore_missing": true 155 | } 156 | }, 157 | { 158 | "convert": { 159 | "field": "event.id", 160 | "type": "string", 161 | "ignore_missing": true, 162 | "ignore_failure": true 163 | } 164 | }, 165 | { 166 | "rename": { 167 | "field": "hostip", 168 | "target_field": "host.ip", 169 | "ignore_missing": true 170 | } 171 | }, 172 | { 173 | "append": { 174 | "field": "host.ip", 175 | "value": [ 176 | 177 | ], 178 | "ignore_failure": true, 179 | "if": "ctx.host?.ip != null" 180 | } 181 | } 182 | ] -------------------------------------------------------------------------------- /example/log/sample.log: -------------------------------------------------------------------------------- 1 | { "log_level": "debug", "eventid": 424242, "srcip": "192.0.2.1", "srcport": 42, "destip": "192.0.2.2", "destport": 42, "hostip": "192.0.2.42", "ts": "now", "action": "Testing", "duration": "1.1", "process":{ "args": "--yolo" }, "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36" } 2 | -------------------------------------------------------------------------------- /example/logstash.conf: -------------------------------------------------------------------------------- 1 | filter { 2 | mutate { 3 | copy => { '[srcip]' => '[source][address]' } 4 | copy => { '[srcip]' => '[source][ip]' } 5 | copy => { '[new_event][srcip]' => '[source][ip]' } 6 | rename => { '[srcport]' => '[source][port]' } 7 | convert => { '[source][port]' => 'integer' } 8 | copy => { '[destip]' => '[destination][address]' } 9 | copy => { '[destport]' => '[destination][port]' } 10 | convert => { '[destination][port]' => 'integer' } 11 | copy => { '[ts]' => '[timestamp]' } 12 | rename => { '[action]' => '[event][action]' } 13 | lowercase => [ '[event][action]' ] 14 | rename => { '[duration]' => '[event][duration]' } 15 | convert => { '[event][duration]' => 'float' } 16 | rename => { '[user_agent]' => '[user_agent][original]' } 17 | rename => { '[log_level]' => '[log][level]' } 18 | uppercase => [ '[log][level]' ] 19 | rename => { '[eventid]' => '[event][id]' } 20 | convert => { '[event][id]' => 'string' } 21 | rename => { '[hostip]' => '[host][ip]' } 22 | } 23 | 24 | date { 25 | match => ["[some_timestamp_field]", "UNIX_MS"] 26 | target => "[@timestamp]" 27 | } 28 | 29 | 30 | date { 31 | match => ["[some_other_timestamp]", "UNIX_MS"] 32 | target => "[@timestamp]" 33 | } 34 | 35 | 36 | date { 37 | match => ["[some_new_timestamp]", "UNIX"] 38 | target => "[destination_timestamp]" 39 | } 40 | 41 | if [host][ip] { 42 | ruby { 43 | code => "event.set('[host][ip]', Array(event.get('[host][ip]')) )" 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /example/logstash/1-input.conf: -------------------------------------------------------------------------------- 1 | input { 2 | stdin { codec => json_lines } 3 | file { 4 | codec => json_lines 5 | path => 'example/log-sample.log' 6 | } 7 | } 8 | 9 | filter { 10 | mutate { remove_field => [ 'host' ] } # Avoids conflict with ECS host.* 11 | } 12 | -------------------------------------------------------------------------------- /example/logstash/2-ecs-conversion.conf: -------------------------------------------------------------------------------- 1 | filter { 2 | mutate { 3 | copy => { '[srcip]' => '[source][address]' } 4 | copy => { '[srcip]' => '[source][ip]' } 5 | copy => { '[new_event][srcip]' => '[source][ip]' } 6 | rename => { '[srcport]' => '[source][port]' } 7 | convert => { '[source][port]' => 'integer' } 8 | copy => { '[destip]' => '[destination][address]' } 9 | copy => { '[destport]' => '[destination][port]' } 10 | convert => { '[destination][port]' => 'integer' } 11 | copy => { '[ts]' => '[timestamp]' } 12 | rename => { '[action]' => '[event][action]' } 13 | lowercase => [ '[event][action]' ] 14 | rename => { '[duration]' => '[event][duration]' } 15 | convert => { '[event][duration]' => 'float' } 16 | rename => { '[user_agent]' => '[user_agent][original]' } 17 | rename => { '[log_level]' => '[log][level]' } 18 | uppercase => [ '[log][level]' ] 19 | rename => { '[eventid]' => '[event][id]' } 20 | convert => { '[event][id]' => 'string' } 21 | rename => { '[hostip]' => '[host][ip]' } 22 | } 23 | 24 | date { 25 | match => ["[some_timestamp_field]", "UNIX_MS"] 26 | target => "[@timestamp]" 27 | } 28 | 29 | 30 | date { 31 | match => ["[some_other_timestamp]", "UNIX_MS"] 32 | target => "[@timestamp]" 33 | } 34 | 35 | 36 | date { 37 | match => ["[some_new_timestamp]", "UNIX"] 38 | target => "[destination_timestamp]" 39 | } 40 | 41 | if [host][ip] { 42 | ruby { 43 | code => "event.set('[host][ip]', Array(event.get('[host][ip]')) )" 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /example/logstash/3-output.conf: -------------------------------------------------------------------------------- 1 | output { 2 | stdout { 3 | # codec => json_lines # Alternately output JSON events 4 | codec => rubydebug 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /example/mapping.csv: -------------------------------------------------------------------------------- 1 | source_field,copy_action,format_action,timestamp_format,destination_field,Notes 2 | srcip,,,,source.address,Copying srcip to source.address 3 | srcip,,,,source.ip,Copying srcip a second time to source.ip as well 4 | new_event.srcip,,,,source.ip,This new event type could also populate source.ip 5 | some_timestamp_field,,parse_timestamp,,@timestamp,Convert this timestamp to UNIX_MS format 6 | some_other_timestamp,,,,@timestamp,Convert this timestamp to default UNIX_MS 7 | some_new_timestamp,,parse_timestamp,UNIX,destination_timestamp,Convert this timestamp to UNIX format 8 | srcport,rename,to_integer,,source.port, 9 | destip,,,,destination.address, 10 | destport,,to_integer,,destination.port, 11 | ts,copy,,,timestamp, 12 | action,rename,lowercase,,event.action, 13 | duration,rename,to_float,,event.duration, 14 | user_agent,rename,,,user_agent.original, 15 | log_level,rename,uppercase,,log.level, 16 | eventid,rename,to_string,,event.id,IDs should be strings! 17 | successful,,to_boolean,,,Format source field to boolean type 18 | hostip,rename,to_array,,host.ip, 19 | process.args,,to_array,,,Format source field to an array 20 | -------------------------------------------------------------------------------- /lib/beats_pipeline_generator.rb: -------------------------------------------------------------------------------- 1 | require 'yaml' 2 | require_relative 'helpers' 3 | 4 | def generate_beats_pipeline(mapping) 5 | # copy/rename 6 | fields_to_copy = [] 7 | fields_to_rename = [] 8 | fields_to_convert = [] 9 | pipeline = [] 10 | 11 | mapping.each_pair do |_, row| 12 | if same_field_name?(row) 13 | next if row[:format_action].nil? 14 | end 15 | 16 | source_field = row[:source_field] 17 | 18 | if row[:destination_field] and not ['parse_timestamp'].include?(row[:format_action]) 19 | statement = { 20 | 'from' => source_field, 21 | 'to' => row[:destination_field], 22 | } 23 | if 'copy' == row[:copy_action] 24 | fields_to_copy << statement 25 | else 26 | fields_to_rename << statement 27 | end 28 | end 29 | 30 | if row[:format_action] 31 | affected_field = row[:destination_field] || row[:source_field] 32 | type = case row[:format_action] 33 | when 'to_boolean' 34 | 'boolean' 35 | when 'to_integer' 36 | 'long' 37 | when 'to_string' 38 | 'string' 39 | when 'to_float' 40 | 'float' 41 | end 42 | 43 | if type 44 | statement = { 'from' => affected_field, 'type' => type } 45 | fields_to_convert << statement 46 | 47 | elsif ['parse_timestamp'].include?(row[:format_action]) 48 | pipeline << { 49 | 'timestamp' => { 50 | 'field' => row[:source_field], 51 | 'target_field' => row[:destination_field], 52 | 'layouts' => row[:timestamp_format], 53 | 'timezone' => "UTC", 54 | 'ignore_missing' => true, 55 | 'ignore_failure' => true 56 | } 57 | } 58 | end 59 | end 60 | end 61 | 62 | if fields_to_copy.size > 0 63 | pipeline << { 64 | 'copy_fields' => { 'fields' => fields_to_copy, 65 | 'ignore_missing' => true, 'fail_on_error' => false } 66 | } 67 | end 68 | if fields_to_rename.size > 0 69 | pipeline << { 70 | 'rename' => { 'fields' => fields_to_rename, 71 | 'ignore_missing' => true, 'fail_on_error' => false } 72 | } 73 | end 74 | if fields_to_convert.size > 0 75 | pipeline << { 76 | 'convert' => { 'fields' => fields_to_convert, 77 | 'ignore_missing' => true, 'fail_on_error' => false } 78 | } 79 | end 80 | 81 | return pipeline 82 | end 83 | 84 | def output_beats_pipeline(pipeline, output_dir) 85 | file_name = output_dir.join('beats.yml') 86 | File.open(file_name, 'w') do |f| 87 | yaml = YAML.dump({ 'processors' => pipeline}) 88 | f.write(yaml.gsub(/^---./m, '')) # Making concatenation easier, to build a full Beats config 89 | end 90 | return file_name 91 | end 92 | -------------------------------------------------------------------------------- /lib/elasticsearch_pipeline_generator.rb: -------------------------------------------------------------------------------- 1 | require 'json' 2 | require_relative 'helpers' 3 | 4 | def generate_elasticsearch_pipeline(mapping) 5 | pipeline = [] 6 | mapping.each_pair do |_, row| 7 | if same_field_name?(row) 8 | next if row[:format_action].nil? 9 | end 10 | 11 | source_field = row[:source_field] 12 | 13 | # copy/rename 14 | if row[:destination_field] and not ['parse_timestamp'].include?(row[:format_action]) 15 | if 'copy' == row[:copy_action] 16 | processor = { 17 | set: { 18 | field: row[:destination_field], 19 | value: '{{' + source_field + '}}', 20 | if: field_presence_predicate(source_field), 21 | } 22 | } 23 | 24 | else 25 | processor = { 26 | rename: { 27 | field: source_field, 28 | target_field: row[:destination_field], 29 | ignore_missing: true 30 | } 31 | } 32 | end 33 | pipeline << processor 34 | end 35 | 36 | processor = nil 37 | if row[:format_action] 38 | # Modify the source_field if there's no destination_field (no rename, just a type change) 39 | affected_field = row[:destination_field] || row[:source_field] 40 | 41 | type = case row[:format_action] 42 | when 'to_boolean' 43 | 'boolean' 44 | when 'to_integer' 45 | 'long' 46 | when 'to_string' 47 | 'string' 48 | when 'to_float' 49 | 'float' 50 | end 51 | if type 52 | processor = { 53 | convert: { 54 | field: affected_field, 55 | type: type, 56 | ignore_missing: true, 57 | ignore_failure: true, 58 | } 59 | } 60 | 61 | elsif ['uppercase', 'lowercase'].include?(row[:format_action]) 62 | processor = { 63 | row[:format_action] => { 64 | field: affected_field, 65 | ignore_missing: true, 66 | ignore_failure: true, 67 | } 68 | } 69 | 70 | elsif ['to_array'].include?(row[:format_action]) 71 | processor = { 72 | 'append' => { 73 | field: affected_field, 74 | value: [], 75 | ignore_failure: true, 76 | if: field_presence_predicate(affected_field), 77 | } 78 | } 79 | 80 | elsif ['parse_timestamp'].include?(row[:format_action]) 81 | processor = { 82 | 'date' => { 83 | field: row[:source_field], 84 | target_field: row[:destination_field], 85 | formats: [ row[:timestamp_format] ], 86 | timezone: "UTC", 87 | ignore_failure: true 88 | } 89 | } 90 | end 91 | 92 | end 93 | pipeline << processor if processor # Skip lower/upper and others not done by convert processor 94 | end 95 | pipeline 96 | end 97 | 98 | def field_presence_predicate(field) 99 | if '@timestamp' == field 100 | return "ctx.containsKey('@timestamp')" 101 | end 102 | field_levels = field.split('.') 103 | if field_levels.size == 1 104 | return "ctx.#{field} != null" 105 | end 106 | 107 | null_safe = field_levels[0..-2].map { |f| "#{f}?" }.join('.') 108 | return "ctx.#{null_safe}.#{field_levels.last} != null" 109 | end 110 | 111 | def output_elasticsearch_pipeline(pipeline, output_dir) 112 | file_name = output_dir.join('elasticsearch.json') 113 | File.open(file_name, 'w') do |f| 114 | f.write JSON.pretty_generate(pipeline) 115 | end 116 | return file_name 117 | end 118 | -------------------------------------------------------------------------------- /lib/helpers.rb: -------------------------------------------------------------------------------- 1 | def same_field_name?(row) 2 | return row[:destination_field].nil? || 3 | row[:source_field] == row[:destination_field] 4 | end 5 | -------------------------------------------------------------------------------- /lib/logstash_pipeline_generator.rb: -------------------------------------------------------------------------------- 1 | require_relative 'helpers' 2 | 3 | def generate_logstash_pipeline(mapping) 4 | mutations = [] # Most things are in the same mutate block 5 | dates = [] 6 | array_fields = [] 7 | mapping.each_pair do |_, row| 8 | if same_field_name?(row) 9 | next if row[:format_action].nil? 10 | end 11 | 12 | source_field = row[:source_field] 13 | 14 | if row[:destination_field] and not ['parse_timestamp'].include?(row[:format_action]) 15 | if 'copy' == row[:copy_action] 16 | mutations << { 'copy' => { lsf(source_field) => lsf(row[:destination_field]) } } 17 | else 18 | mutations << { 'rename' => { lsf(source_field) => lsf(row[:destination_field]) } } 19 | end 20 | end 21 | 22 | if row[:format_action] 23 | affected_field = row[:destination_field] || row[:source_field] 24 | type = case row[:format_action] 25 | when 'to_boolean' 26 | 'boolean' 27 | when 'to_integer' 28 | 'integer' 29 | when 'to_string' 30 | 'string' 31 | when 'to_float' 32 | 'float' 33 | end 34 | if type 35 | mutations << { 'convert' => { lsf(affected_field) => type } } 36 | elsif 'uppercase' == row[:format_action] 37 | mutations << { 'uppercase' => [lsf(affected_field)] } 38 | elsif 'lowercase' == row[:format_action] 39 | mutations << { 'lowercase' => [lsf(affected_field)] } 40 | elsif 'to_array' == row[:format_action] 41 | array_fields << lsf(affected_field) 42 | elsif ['parse_timestamp'].include?(row[:format_action]) 43 | dates << { 44 | 'date' => { 45 | 'match' => [ lsf(row[:source_field]), row[:timestamp_format] ], 46 | 'target' => lsf(row[:destination_field]) 47 | } 48 | } 49 | end 50 | end 51 | end 52 | 53 | return mutations, dates, array_fields 54 | end 55 | 56 | def render_mutate_line(line) 57 | raise "Expected one key at root of #{line}" if line.keys.size != 1 58 | action = line.keys.first 59 | if line[action].is_a? Hash 60 | key, value = line[action].to_a.flatten 61 | return "#{action} => { '#{key}' => '#{value}' }" 62 | elsif line[action].is_a? Array 63 | return "#{action} => [ '#{line[action].first}' ]" 64 | end 65 | end 66 | 67 | def render_date_line(line) 68 | raise "Expected one key at root of #{line}" if line.keys.size != 1 69 | action = line.keys.first 70 | if line[action].is_a? Hash 71 | match = line[action]["match"] 72 | target = line[action]["target"] 73 | return """ 74 | date { 75 | match => #{match} 76 | target => \"#{target}\" 77 | } 78 | """ 79 | end 80 | end 81 | 82 | def lsf(field) 83 | field.split('.').map{|f| "[#{f}]"}.join 84 | end 85 | 86 | def output_logstash_pipeline(mutations, dates, array_fields, output_dir) 87 | file_name = output_dir.join('logstash.conf') 88 | File.open(file_name, 'w') do |f| 89 | 90 | f.write(<<-CONF) 91 | filter { 92 | mutate { 93 | #{mutations.map{|line| render_mutate_line(line)}.join("\n ")} 94 | } 95 | CONF 96 | 97 | if dates.length > 0 98 | f.write(<<-DATES) 99 | #{dates.map{|line| render_date_line(line)}.join("\n ")} 100 | DATES 101 | end 102 | 103 | array_fields.each do |array_field| 104 | f.write(<<-RB) 105 | if #{array_field} { 106 | ruby { 107 | code => "event.set('#{array_field}', Array(event.get('#{array_field}')) )" 108 | } 109 | } 110 | RB 111 | end 112 | f.write("}\n") 113 | end 114 | return file_name 115 | end 116 | -------------------------------------------------------------------------------- /lib/mapping_loader.rb: -------------------------------------------------------------------------------- 1 | require 'csv' 2 | 3 | REQUIRED_CSV_HEADERS = [ 4 | 'source_field', 5 | 'destination_field' 6 | ] 7 | 8 | KNOWN_CSV_HEADERS = REQUIRED_CSV_HEADERS + [ 9 | 'format_action', 10 | 'copy_action', 11 | 'timestamp_format' 12 | ] 13 | 14 | ACCEPTED_FORMAT_ACTIONS = [ 15 | 'uppercase', 16 | 'lowercase', 17 | 'to_boolean', 18 | 'to_integer', 19 | 'to_float', 20 | 'to_array', 21 | 'to_string', 22 | 'parse_timestamp', 23 | ].sort 24 | 25 | def read_csv(file_name) 26 | csv = CSV.read(file_name, headers: true) 27 | unless (REQUIRED_CSV_HEADERS - csv.headers).empty? 28 | abort "Required headers are missing in the CSV.\n" + 29 | " Missing: #{REQUIRED_CSV_HEADERS - csv.headers}.\n" + 30 | " Required: #{REQUIRED_CSV_HEADERS}.\n" + 31 | " Found: #{csv.headers}" 32 | end 33 | return csv_to_mapping(csv) 34 | end 35 | 36 | def csv_to_mapping(csv) 37 | mapping = {} 38 | csv.each do |row| 39 | # skip rows that don't have a source field 40 | next if row['source_field'].nil? || 41 | row['source_field'].strip.empty? 42 | 43 | # skip if no destination field and no format field provided 44 | # since it's possible to reformat a source field by itself 45 | next if ( row['destination_field'].nil? || 46 | row['destination_field'].strip.empty? ) and 47 | ( row['format_field'].nil? || 48 | row['format_field'].strip.empty? ) 49 | 50 | source_field = row['source_field'].strip 51 | destination_field = row['destination_field'] && row['destination_field'].strip || '' 52 | 53 | mapping[source_field + '+' + destination_field] = { 54 | # required fields 55 | source_field: source_field, 56 | destination_field: destination_field, 57 | # optional fields 58 | copy_action: (row['copy_action'] && row['copy_action'].strip), 59 | format_action: (row['format_action'] && row['format_action'].strip), 60 | timestamp_format: (row['timestamp_format'] && row['timestamp_format'].strip), 61 | } 62 | end 63 | return mapping 64 | end 65 | 66 | def make_mapping_explicit(raw_mapping, options) 67 | mapping = {} 68 | raw_mapping.each_pair do |key, row| 69 | mapping[key] = row.dup 70 | mapping[key][:copy_action] ||= options[:copy_action] 71 | 72 | # If @timestamp is the destination and the user does not 73 | # specify how to format the conversion, assume we're 74 | # converting it to UNIX_MS 75 | if mapping[key][:destination_field] == '@timestamp' and 76 | ( mapping[key][:timestamp_format].nil? || 77 | mapping[key][:timestamp_format].strip.empty? ) 78 | mapping[key][:format_action] = 'parse_timestamp' 79 | mapping[key][:timestamp_format] = 'UNIX_MS' 80 | 81 | # If the destination field is empty but a format action is 82 | # provided, then assume we're formating the source field. 83 | elsif ( mapping[key][:destination_field].nil? || 84 | mapping[key][:destination_field].strip.empty? ) and not 85 | ( mapping[key][:format_action].nil? || 86 | mapping[key][:format_action].strip.empty? ) 87 | puts mapping[key][:source_field].inspect 88 | mapping[key][:destination_field] = mapping[key][:source_field] 89 | end 90 | end 91 | validate_mapping!(mapping) 92 | return mapping 93 | end 94 | 95 | def validate_mapping!(mapping) 96 | mapping.each_pair do |key, row| 97 | if row[:format_action] and not ACCEPTED_FORMAT_ACTIONS.include?(row[:format_action]) 98 | raise "Unsupported format_action: #{row[:format_action]}, expected one of #{ACCEPTED_FORMAT_ACTIONS}" 99 | end 100 | end 101 | end 102 | -------------------------------------------------------------------------------- /lib/options_parser.rb: -------------------------------------------------------------------------------- 1 | require 'optparse' 2 | require 'pathname' 3 | 4 | ## Configuration 5 | 6 | def parse_options!(argv) 7 | # defaults 8 | options = { 9 | :action => :main, 10 | :copy_action => 'copy', 11 | } 12 | 13 | parser = OptionParser.new do |opts| 14 | opts.banner = [ 15 | SYNOPSIS, 16 | "", 17 | ].join("\n") 18 | opts.separator "Options:" 19 | 20 | # More examples at http://apidock.com/ruby/OptionParser 21 | 22 | opts.on('-f', '--file FILE', "Input CSV file.") do |value| 23 | options[:file] = value 24 | end 25 | 26 | opts.on('-o', '--output DIR', "Output directory. Defaults to parent dir of --file.") do |value| 27 | options[:output] = value 28 | end 29 | 30 | opts.on('--copy-action COPY_ACTION', "Default action for field renames. Acceptable values are: copy, rename. Default is copy.") do |value| 31 | options[:copy_action] = value 32 | end 33 | 34 | 35 | # opts.on("--log-level LEVEL", "Log level. Default is info. Supports all of Ruby's Logger levels.") do |value| 36 | # level_name = value.upcase 37 | # if Logger.const_defined?(level_name) && Logger.const_get(level_name).is_a?(Integer) 38 | # options[:log_level] = Logger.const_get(level_name) 39 | # end 40 | # end 41 | 42 | opts.on_tail("--debug", "Shorthand for --log-level=debug") do |value| 43 | $debug = true 44 | require 'pry' 45 | # options[:log_level] = Logger::DEBUG 46 | end 47 | 48 | opts.on_tail('-h', '--help', "Display help") do 49 | options[:action] = :help 50 | end 51 | 52 | opts.on_tail('-v', '--version', "Display version and exit") do 53 | options[:action] = :version 54 | end 55 | 56 | end 57 | 58 | parser.parse!(argv) 59 | 60 | if :version == options[:action] 61 | puts VERSION_STRING 62 | exit 63 | end 64 | 65 | if :help == options[:action] 66 | puts VERSION_STRING, '' 67 | puts parser.to_s 68 | exit 69 | end 70 | 71 | if options[:file].nil? 72 | abort "Use flag '--file FILE' to specify the mapping file to convert." 73 | end 74 | 75 | options = smart_output_default(options) 76 | 77 | options 78 | end 79 | 80 | def smart_output_default(raw_options) 81 | options = raw_options.dup 82 | if options[:output] 83 | output = Pathname.new(options[:output]) 84 | else 85 | if options[:file] 86 | output = Pathname.new(options[:file]).parent 87 | else 88 | output = Pathname.new('.') 89 | end 90 | end 91 | options[:output] = output.expand_path 92 | options 93 | end 94 | -------------------------------------------------------------------------------- /test/unit/beats_pipeline_generator_test.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/autorun' 2 | require_relative '../../lib/beats_pipeline_generator' 3 | 4 | class BeatsPipelineGeneratorTest < Minitest::Test 5 | def test_copy_and_rename_pipeline 6 | mapping = { 7 | 'old1+new1' => { source_field: 'old1', destination_field: 'new1', copy_action: 'copy' }, 8 | 'old2+new2' => { source_field: 'old2', destination_field: 'new2', copy_action: 'rename' }, 9 | 'old3+new3' => { source_field: 'old3', destination_field: 'new3', copy_action: 'copy' }, 10 | } 11 | pl = generate_beats_pipeline(mapping) 12 | copy_processor = pl[0] 13 | rename_processor = pl[1] 14 | assert_equal( 15 | { 'copy_fields' => { 16 | 'fields' => [ {'from' => 'old1', 'to' => 'new1'}, {'from' => 'old3', 'to' => 'new3'} ], 17 | 'ignore_missing' => true, 'fail_on_error' => false 18 | } }, 19 | copy_processor 20 | ) 21 | assert_equal( 22 | { 'rename' => { 23 | 'fields' => [ {'from' => 'old2', 'to' => 'new2'} ], 24 | 'ignore_missing' => true, 'fail_on_error' => false 25 | } }, 26 | rename_processor 27 | ) 28 | end 29 | 30 | def test_non_renamed_beats 31 | mapping = { 32 | 'field1+field1' => { source_field: 'field1', destination_field: 'field1', copy_action: 'copy' }, 33 | 'field2+' => { source_field: 'field2', destination_field: nil, copy_action: 'copy' }, 34 | } 35 | pl = generate_beats_pipeline(mapping) 36 | assert_equal([], pl, "No rename processor should be added when there's no rename to perform") 37 | end 38 | 39 | def test_duplicate_source_fields_same_destination 40 | mapping = { 41 | 'field1+field3' => { source_field: 'field1', destination_field: 'field3', copy_action: 'copy' }, 42 | 'field2+field3' => { source_field: 'field2', destination_field: 'field3', copy_action: 'copy' }, 43 | 'field4+field5' => { source_field: 'field4', destination_field: 'field5', copy_action: 'copy' }, 44 | 'field4+field6' => { source_field: 'field4', destination_field: 'field6', copy_action: 'copy' }, 45 | } 46 | 47 | pl = generate_beats_pipeline(mapping) 48 | 49 | assert_equal( 50 | { "copy_fields" => { 51 | "fields" => [ 52 | {"from"=>"field1", "to"=>"field3"}, 53 | {"from"=>"field2", "to"=>"field3"}, 54 | {"from"=>"field4", "to"=>"field5"}, 55 | {"from"=>"field4", "to"=>"field6"}], 56 | "ignore_missing"=>true, 57 | "fail_on_error"=>false}}, 58 | pl.first 59 | ) 60 | end 61 | 62 | def test_dates 63 | mapping = { 64 | 'field1+@timestamp' => 65 | { source_field: 'field1', 66 | destination_field: '@timestamp', 67 | format_action: 'parse_timestamp', 68 | timestamp_format: 'UNIX_MS' }, 69 | 'field2+@timestamp' => 70 | { source_field: 'field2', 71 | destination_field: '@timestamp', 72 | format_action: 'parse_timestamp', 73 | timestamp_format: 'UNIX' }, 74 | } 75 | 76 | pl = generate_beats_pipeline(mapping) 77 | 78 | assert_equal( 79 | pl, 80 | [ 81 | { 82 | "timestamp" => { 83 | "field" => "field1", 84 | "target_field" => "@timestamp", 85 | "layouts" => "UNIX_MS", 86 | "timezone" => "UTC", 87 | "ignore_missing" => true, 88 | "ignore_failure" => true 89 | } 90 | }, 91 | { 92 | "timestamp" => { 93 | "field" => "field2", 94 | "target_field" => "@timestamp", 95 | "layouts" => "UNIX", 96 | "timezone" => "UTC", 97 | "ignore_missing" => true, 98 | "ignore_failure"=>true 99 | } 100 | } 101 | ] 102 | ) 103 | end 104 | 105 | end 106 | -------------------------------------------------------------------------------- /test/unit/elasticsearch_pipeline_generator_test.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/autorun' 2 | require_relative '../../lib/elasticsearch_pipeline_generator' 3 | 4 | class OptionsParserTest < Minitest::Test 5 | def test_copy_processor 6 | mapping = { 'old_field+new_field' => { 7 | source_field: 'old_field', destination_field: 'new_field', copy_action: 'copy' 8 | } } 9 | pl = generate_elasticsearch_pipeline(mapping) 10 | processor = pl.first 11 | assert_equal( 12 | { set: { field: 'new_field', value: '{{old_field}}', if: 'ctx.old_field != null' } }, 13 | processor 14 | ) 15 | end 16 | 17 | def test_rename_processor 18 | mapping = { 'old_field+new_field' => { 19 | source_field: 'old_field', destination_field: 'new_field', copy_action: 'rename' 20 | } } 21 | pl = generate_elasticsearch_pipeline(mapping) 22 | processor = pl.first 23 | assert_equal( 24 | { rename: { field: 'old_field', target_field: 'new_field', ignore_missing: true } }, 25 | processor 26 | ) 27 | end 28 | 29 | def test_non_renamed_elasticsearch 30 | mapping = { 31 | 'field1+field1' => { source_field: 'field1', destination_field: 'field1', copy_action: 'copy' }, 32 | 'field2+' => { source_field: 'field2', destination_field: nil, copy_action: 'copy' }, 33 | } 34 | pl = generate_elasticsearch_pipeline(mapping) 35 | assert_equal([], pl, "No rename processor should be added when there's no rename to perform") 36 | end 37 | 38 | def test_field_presence_predicate 39 | assert_equal('ctx.level != null', 40 | field_presence_predicate('level')) 41 | assert_equal('ctx.suricata?.eve?.http?.hostname != null', 42 | field_presence_predicate('suricata.eve.http.hostname')) 43 | 44 | assert_equal("ctx.containsKey('@timestamp')", 45 | field_presence_predicate('@timestamp')) 46 | end 47 | 48 | def test_duplicate_source_fields_same_destination 49 | mapping = { 50 | 'field1+field3' => { source_field: 'field1', destination_field: 'field3', copy_action: 'copy' }, 51 | 'field2+field3' => { source_field: 'field2', destination_field: 'field3', copy_action: 'copy' }, 52 | 'field4+field5' => { source_field: 'field4', destination_field: 'field5', copy_action: 'copy' }, 53 | 'field4+field6' => { source_field: 'field4', destination_field: 'field6', copy_action: 'copy' }, 54 | } 55 | 56 | pl = generate_elasticsearch_pipeline(mapping) 57 | 58 | assert_equal(4, pl.length, "Expected 4 processors") 59 | assert_equal( 60 | {:set=>{:field=>"field3", :value=>"{{field1}}", :if=>"ctx.field1 != null"}}, 61 | pl[0] 62 | ) 63 | assert_equal( 64 | {:set=>{:field=>"field3", :value=>"{{field2}}", :if=>"ctx.field2 != null"}}, 65 | pl[1] 66 | ) 67 | assert_equal( 68 | {:set=>{:field=>"field5", :value=>"{{field4}}", :if=>"ctx.field4 != null"}}, 69 | pl[2] 70 | ) 71 | assert_equal( 72 | {:set=>{:field=>"field6", :value=>"{{field4}}", :if=>"ctx.field4 != null"}}, 73 | pl[3] 74 | ) 75 | end 76 | 77 | def test_dates 78 | mapping = { 79 | 'field1+@timestamp' => 80 | { source_field: 'field1', 81 | destination_field: '@timestamp', 82 | format_action: 'parse_timestamp', 83 | timestamp_format: 'UNIX_MS' }, 84 | 'field2+@timestamp' => 85 | { source_field: 'field2', 86 | destination_field: '@timestamp', 87 | format_action: 'parse_timestamp', 88 | timestamp_format: 'UNIX' }, 89 | } 90 | 91 | pl = generate_elasticsearch_pipeline(mapping) 92 | 93 | assert_equal( 94 | { "date" => { 95 | :field => "field1", 96 | :target_field => "@timestamp", 97 | :formats => ["UNIX_MS"], 98 | :timezone => "UTC", 99 | :ignore_failure => true}}, 100 | pl[0] 101 | ) 102 | 103 | assert_equal( 104 | { "date" => { 105 | :field => "field2", 106 | :target_field => "@timestamp", 107 | :formats => ["UNIX"], 108 | :timezone => "UTC", 109 | :ignore_failure => true}}, 110 | pl[1] 111 | ) 112 | end 113 | end 114 | -------------------------------------------------------------------------------- /test/unit/helpers_test.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/autorun' 2 | require_relative '../../lib/helpers' 3 | 4 | class HelpersTest < Minitest::Test 5 | def test_same_field_name 6 | assert same_field_name?({source_field: 'foo', destination_field: nil}) 7 | assert same_field_name?({source_field: 'foo', destination_field: 'foo'}) 8 | 9 | assert_equal false, same_field_name?({source_field: 'foo', destination_field: 'bar'}) 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /test/unit/logstash_pipeline_generator_test.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/autorun' 2 | require_relative '../../lib/logstash_pipeline_generator' 3 | 4 | class LogstashPipelineGeneratorTest < Minitest::Test 5 | def test_logstash_pipeline 6 | mapping = { 7 | 'old1' => { source_field: 'old1', destination_field: 'new1', copy_action: 'copy' }, 8 | 'old2' => { source_field: 'old2', destination_field: 'new2', copy_action: 'rename' }, 9 | 'old3' => { source_field: 'old3', destination_field: 'new3', copy_action: 'copy' }, 10 | } 11 | mutations, _, _ = generate_logstash_pipeline(mapping) 12 | old1_processor = mutations[0] 13 | old2_processor = mutations[1] 14 | old3_processor = mutations[2] 15 | assert_equal( { 'copy' => { '[old1]' => '[new1]' } }, old1_processor) 16 | assert_equal( { 'rename' => { '[old2]' => '[new2]' } }, old2_processor) 17 | assert_equal( { 'copy' => { '[old3]' => '[new3]' } }, old3_processor) 18 | end 19 | 20 | def test_non_renamed_ls 21 | mapping = { 22 | 'field1' => { source_field: 'field1', destination_field: 'field1', copy_action: 'copy' }, 23 | 'field2' => { source_field: 'field2', destination_field: nil, copy_action: 'copy' }, 24 | } 25 | mutations, _, _ = generate_logstash_pipeline(mapping) 26 | assert_equal([], mutations, "No rename processor should be added when there's no rename to perform") 27 | end 28 | 29 | def test_render_ls_field_name 30 | assert_equal("[field]", lsf("field")) 31 | assert_equal("[@field]", lsf("@field")) 32 | assert_equal("[log][level]", lsf("log.level")) 33 | end 34 | 35 | def test_render_mutate_line_simple_hash 36 | assert_equal( 37 | "copy => { '[src_field]' => '[dest_field]' }", 38 | render_mutate_line('copy' => {'[src_field]' => '[dest_field]'}) 39 | ) 40 | assert_equal( 41 | "convert => { '[event][duration]' => 'float' }", 42 | render_mutate_line('convert' => {'[event][duration]' => 'float'}) 43 | ) 44 | end 45 | 46 | def test_render_mutate_line_array 47 | assert_equal( 48 | "uppercase => [ '[log][level]' ]", 49 | render_mutate_line('uppercase' => ['[log][level]']) 50 | ) 51 | end 52 | 53 | def test_duplicate_source_fields_same_destination 54 | mapping = { 55 | 'field1+field3' => { source_field: 'field1', destination_field: 'field3', copy_action: 'copy' }, 56 | 'field2+field3' => { source_field: 'field2', destination_field: 'field3', copy_action: 'copy' }, 57 | 'field4+field5' => { source_field: 'field4', destination_field: 'field5', copy_action: 'copy' }, 58 | 'field4+field6' => { source_field: 'field4', destination_field: 'field6', copy_action: 'copy' }, 59 | } 60 | 61 | mutations, _, _ = generate_logstash_pipeline(mapping) 62 | 63 | assert_equal( 64 | [ {"copy" => {"[field1]" => "[field3]"}}, 65 | {"copy" => {"[field2]" => "[field3]"}}, 66 | {"copy" => {"[field4]" => "[field5]"}}, 67 | {"copy" => {"[field4]" => "[field6]"}}], 68 | mutations 69 | ) 70 | end 71 | 72 | def test_dates 73 | mapping = { 74 | 'field1+@timestamp' => 75 | { source_field: 'field1', 76 | destination_field: '@timestamp', 77 | format_action: 'parse_timestamp', 78 | timestamp_format: 'UNIX_MS' }, 79 | 'field2+@timestamp' => 80 | { source_field: 'field2', 81 | destination_field: '@timestamp', 82 | format_action: 'parse_timestamp', 83 | timestamp_format: 'UNIX' }, 84 | } 85 | 86 | mutations, dates, array_fields = generate_logstash_pipeline(mapping) 87 | 88 | assert_equal( 89 | [], 90 | mutations 91 | ) 92 | 93 | assert_equal( 94 | [], 95 | array_fields 96 | ) 97 | 98 | assert_equal( 99 | {"date" => { 100 | "match" => ["[field1]", "UNIX_MS"], 101 | "target" => "[@timestamp]" 102 | }}, 103 | dates[0] 104 | ) 105 | 106 | assert_equal( 107 | {"date" => { 108 | "match" => ["[field2]", "UNIX"], 109 | "target" => "[@timestamp]" 110 | }}, 111 | dates[1] 112 | ) 113 | 114 | end 115 | end 116 | -------------------------------------------------------------------------------- /test/unit/mapping_loader_test.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/autorun' 2 | require_relative '../../lib/mapping_loader' 3 | 4 | class MappingLoaderTest < Minitest::Test 5 | def test_explicit_mapping_default_rename_action 6 | raw_mapping = { 7 | 'copied_field' => { 8 | source_field: 'copied_field', 9 | destination_field: 'new.copied_field', 10 | copy_action: 'copy' 11 | }, 12 | 'default_field' => { 13 | source_field: 'default_field', 14 | destination_field: 'new.default_field' 15 | }, 16 | 'renamed_field' => { 17 | source_field: 'renamed_field', 18 | destination_field: 'new.renamed_field', 19 | copy_action: 'rename' 20 | }, 21 | } 22 | options = { copy_action: 'copy' } 23 | mapping = make_mapping_explicit(raw_mapping, options) 24 | assert_equal('copy', mapping['default_field'][:copy_action]) 25 | assert_equal('copy', mapping['copied_field'][:copy_action]) 26 | assert_equal('rename', mapping['renamed_field'][:copy_action]) 27 | end 28 | 29 | def test_csv_to_mapping_cleans_up_spaces_ignores_unknown_keys 30 | # Note: an instance of CSV behaves a lot like an array of hashes 31 | csv = [{ 'source_field' => ' my_field ', 32 | 'destination_field' => "another_field\t", 33 | 'copy_action' => ' copy', 34 | }] 35 | expected_mapping = { 36 | 'my_field+another_field' => { 37 | source_field: 'my_field', 38 | destination_field: 'another_field', 39 | copy_action: 'copy', 40 | format_action: nil, 41 | timestamp_format: nil 42 | } 43 | } 44 | assert_equal(expected_mapping, csv_to_mapping(csv)) 45 | end 46 | 47 | def test_validate! 48 | assert_raises(RuntimeError) do 49 | validate_mapping!({ 'foo' => {:format_action => 'foo'}}) 50 | end 51 | 52 | # If no errors, this is good (no need for assertion) 53 | validate_mapping!({ 'foo' => {:format_action => 'to_array'}}) 54 | end 55 | 56 | def test_mapping_loader_skips_missing_fields 57 | csv = [ 58 | # skipped 59 | { 'source_field' => nil, 'destination_field' => nil }, 60 | { 'source_field' => nil, 'destination_field' => 'fieldname' }, 61 | { 'source_field' => ' ', 'destination_field' => ' ' }, 62 | { 'source_field' => "\t", 'destination_field' => 'fieldname' }, 63 | { 'source_field' => 'correct_fieldname', 'destination_field' => nil }, 64 | # Not skipped 65 | { 'source_field' => 'original_fieldname', 'destination_field' => 'new_fieldname' }, 66 | ] 67 | 68 | expected_mapping = { 69 | 'original_fieldname+new_fieldname' => { 70 | source_field: 'original_fieldname', 71 | destination_field: 'new_fieldname', 72 | copy_action: nil, 73 | format_action: nil, 74 | timestamp_format: nil 75 | } 76 | } 77 | assert_equal(expected_mapping, csv_to_mapping(csv)) 78 | end 79 | 80 | def test_mapping_timestamp 81 | csv = [ 82 | { 'source_field' => 'some_timestamp_field1', 83 | 'destination_field' => '@timestamp' }, 84 | { 'source_field' => 'some_timestamp_field2', 85 | 'destination_field' => '@timestamp', 86 | 'format_action' => 'parse_timestamp' }, 87 | { 'source_field' => 'some_timestamp_field3', 88 | 'destination_field' => '@timestamp', 89 | 'format_action' => 'parse_timestamp', 90 | 'timestamp_format' => 'UNIX' }, 91 | { 'source_field' => 'some_timestamp_field4', 92 | 'destination_field' => 'some_other_timestamp', 93 | 'format_action' => 'parse_timestamp', 94 | 'timestamp_format' => 'UNIX' }, 95 | { 'source_field' => 'some_timestamp_field5', 96 | 'destination_field' => 'some_other_timestamp', 97 | 'format_action' => 'parse_timestamp', 98 | 'timestamp_format' => 'ISO8601' }, 99 | { 'source_field' => 'some_timestamp_field6', 100 | 'destination_field' => 'some_other_timestamp', 101 | 'format_action' => 'parse_timestamp', 102 | 'timestamp_format' => 'TAI64N' }, 103 | 104 | ] 105 | 106 | options = { copy_action: 'copy' } 107 | mapping = csv_to_mapping(csv) 108 | explicit_mapping = make_mapping_explicit(mapping, options) 109 | 110 | assert_equal(explicit_mapping['some_timestamp_field1+@timestamp'], 111 | { :source_field => "some_timestamp_field1", 112 | :destination_field => "@timestamp", 113 | :copy_action => "copy", 114 | :format_action => "parse_timestamp", 115 | :timestamp_format => "UNIX_MS" 116 | } 117 | ) 118 | assert_equal(explicit_mapping['some_timestamp_field2+@timestamp'], 119 | { :source_field => "some_timestamp_field2", 120 | :destination_field => "@timestamp", 121 | :copy_action => "copy", 122 | :format_action => "parse_timestamp", 123 | :timestamp_format => "UNIX_MS" 124 | } 125 | ) 126 | assert_equal(explicit_mapping['some_timestamp_field3+@timestamp'], 127 | { :source_field => "some_timestamp_field3", 128 | :destination_field => "@timestamp", 129 | :copy_action => "copy", 130 | :format_action => "parse_timestamp", 131 | :timestamp_format => "UNIX" 132 | } 133 | ) 134 | assert_equal(explicit_mapping['some_timestamp_field4+some_other_timestamp'], 135 | { :source_field => "some_timestamp_field4", 136 | :destination_field => "some_other_timestamp", 137 | :copy_action => "copy", 138 | :format_action => "parse_timestamp", 139 | :timestamp_format => "UNIX" 140 | } 141 | ) 142 | assert_equal(explicit_mapping['some_timestamp_field5+some_other_timestamp'], 143 | { :source_field => "some_timestamp_field5", 144 | :destination_field => "some_other_timestamp", 145 | :copy_action => "copy", 146 | :format_action => "parse_timestamp", 147 | :timestamp_format => "ISO8601" 148 | } 149 | ) 150 | assert_equal(explicit_mapping['some_timestamp_field6+some_other_timestamp'], 151 | { :source_field => "some_timestamp_field6", 152 | :destination_field => "some_other_timestamp", 153 | :copy_action => "copy", 154 | :format_action => "parse_timestamp", 155 | :timestamp_format => "TAI64N" 156 | } 157 | ) 158 | 159 | end 160 | end 161 | -------------------------------------------------------------------------------- /test/unit/options_parser_test.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/autorun' 2 | require_relative '../../lib/options_parser' 3 | 4 | class OptionsParserTest < Minitest::Test 5 | def test_smart_output_from_input_file 6 | assert_equal( 7 | { file: '/home/bob/mapping.csv', output: Pathname.new('/home/bob') }, 8 | smart_output_default({ file: '/home/bob/mapping.csv' }) 9 | ) 10 | end 11 | 12 | def test_smart_output_default_explicit_output 13 | assert_equal( 14 | { file: 'mapping.csv', output: Pathname.new('/tmp') }, 15 | smart_output_default({ file: 'mapping.csv', output: '/tmp' }) 16 | ) 17 | end 18 | 19 | def test_output_dir_is_expanded 20 | current_user_home = Pathname.new('~').expand_path 21 | assert_equal( 22 | { file: 'mapping.csv', output: current_user_home }, 23 | smart_output_default({ file: 'mapping.csv', output: '~' }) 24 | ) 25 | end 26 | end 27 | --------------------------------------------------------------------------------