├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yaml │ ├── config.yml │ └── feature_request.yaml ├── dependabot.yml └── workflows │ ├── linux.yml │ └── stale-actions.yml ├── .gitignore ├── AUTHORS ├── ChangeLog ├── Gemfile ├── Gemfile.v0.12 ├── README.md ├── Rakefile ├── VERSION ├── appveyor.yml ├── docs ├── credentials.md ├── howto.md ├── input.md ├── output.md └── v0.12.md ├── fluent-plugin-s3.gemspec ├── lib └── fluent │ ├── log-ext.rb │ └── plugin │ ├── in_s3.rb │ ├── out_s3.rb │ ├── s3_compressor_gzip_command.rb │ ├── s3_compressor_lzma2.rb │ ├── s3_compressor_lzo.rb │ ├── s3_compressor_parquet.rb │ ├── s3_compressor_zstd.rb │ ├── s3_extractor_gzip_command.rb │ ├── s3_extractor_lzma2.rb │ └── s3_extractor_lzo.rb └── test ├── test_in_s3.rb └── test_out_s3.rb /.github/ISSUE_TEMPLATE/bug_report.yaml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: Create a report with a procedure for reproducing the bug 3 | labels: "waiting-for-triage" 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Check [README](https://github.com/fluent/fluent-plugin-s3#readme) first and here is the list to help us investigate the problem. 9 | - type: textarea 10 | id: description 11 | attributes: 12 | label: Describe the bug 13 | description: A clear and concise description of what the bug is 14 | validations: 15 | required: true 16 | - type: textarea 17 | id: reproduce 18 | attributes: 19 | label: To Reproduce 20 | description: Steps to reproduce the behavior 21 | validations: 22 | required: true 23 | - type: textarea 24 | id: expected 25 | attributes: 26 | label: Expected behavior 27 | description: A clear and concise description of what you expected to happen 28 | validations: 29 | required: true 30 | - type: textarea 31 | id: environment 32 | attributes: 33 | label: Your Environment 34 | description: | 35 | - Fluentd or td-agent version: `fluentd --version` or `td-agent --version` 36 | - Operating system: `cat /etc/os-release` 37 | - Kernel version: `uname -r` 38 | 39 | Tip: If you hit the problem with older fluentd version, try latest version first. 40 | value: | 41 | - Fluentd version: 42 | - TD Agent version: 43 | - fluent-plugin-s3 version: 44 | - aws-sdk-s3 version: 45 | - aws-sdk-sqs version: 46 | - Operating system: 47 | - Kernel version: 48 | render: markdown 49 | validations: 50 | required: true 51 | - type: textarea 52 | id: configuration 53 | attributes: 54 | label: Your Configuration 55 | description: | 56 | Write your configuration here. Minimum reproducible fluentd.conf is recommended. 57 | validations: 58 | required: true 59 | - type: textarea 60 | id: logs 61 | attributes: 62 | label: Your Error Log 63 | description: Write your ALL error log here 64 | render: shell 65 | validations: 66 | required: true 67 | - type: textarea 68 | id: addtional-context 69 | attributes: 70 | label: Additional context 71 | description: Add any other context about the problem here. 72 | validations: 73 | required: false 74 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Ask a Question 4 | url: https://discuss.fluentd.org/ 5 | about: I have questions about fluent-plugin-kafka. Please ask and answer questions at https://discuss.fluentd.org/. 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yaml: -------------------------------------------------------------------------------- 1 | name: Feature request 2 | description: Suggest an idea for this project 3 | labels: "waiting-for-triage" 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Check [README.md](https://github.com/fluent/fluent-plugin-s3/blob/master/README.md) first and here is the list to help us investigate the problem. 9 | - type: textarea 10 | id: description 11 | attributes: 12 | label: Is your feature request related to a problem? Please describe. 13 | description: | 14 | A clear and concise description of what the problem is. 15 | Ex. I'm always frustrated when [...] 16 | validations: 17 | required: true 18 | - type: textarea 19 | id: solution 20 | attributes: 21 | label: Describe the solution you'd like 22 | description: A clear and concise description of what you want to happen. 23 | validations: 24 | required: true 25 | - type: textarea 26 | id: alternative 27 | attributes: 28 | label: Describe alternatives you've considered 29 | description: A clear and concise description of any alternative solutions or features you've considered. 30 | validations: 31 | required: true 32 | - type: textarea 33 | id: addtional-context 34 | attributes: 35 | label: Additional context 36 | description: Add any other context or screenshots about the feature request here. 37 | validations: 38 | required: false 39 | 40 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: 'github-actions' 4 | directory: '/' 5 | schedule: 6 | interval: 'monthly' 7 | -------------------------------------------------------------------------------- /.github/workflows/linux.yml: -------------------------------------------------------------------------------- 1 | name: linux 2 | on: 3 | push: 4 | branches: [master] 5 | pull_request: 6 | branches: [master] 7 | jobs: 8 | build: 9 | runs-on: ${{ matrix.os }} 10 | strategy: 11 | fail-fast: false 12 | matrix: 13 | ruby: [ '3.4', '3.3', '3.2', '3.1', '3.0', '2.7' ] 14 | os: 15 | - ubuntu-latest 16 | name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }} 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: ruby/setup-ruby@v1 20 | with: 21 | ruby-version: ${{ matrix.ruby }} 22 | - name: unit testing 23 | env: 24 | CI: true 25 | run: | 26 | gem install rake 27 | bundle install --jobs 4 --retry 3 28 | bundle exec rake test 29 | -------------------------------------------------------------------------------- /.github/workflows/stale-actions.yml: -------------------------------------------------------------------------------- 1 | name: "Mark or close stale issues and PRs" 2 | on: 3 | schedule: 4 | - cron: "00 10 * * *" 5 | 6 | jobs: 7 | stale: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/stale@v8 11 | with: 12 | repo-token: ${{ secrets.GITHUB_TOKEN }} 13 | days-before-stale: 30 14 | days-before-close: 7 15 | stale-issue-message: "This issue has been automatically marked as stale because it has been open 30 days with no activity. Remove stale label or comment or this issue will be closed in 7 days" 16 | stale-pr-message: "This PR has been automatically marked as stale because it has been open 30 days with no activity. Remove stale label or comment or this PR will be closed in 7 days" 17 | close-issue-message: "This issue was automatically closed because of stale in 7 days" 18 | close-pr-message: "This PR was automatically closed because of stale in 7 days" 19 | stale-pr-label: "stale" 20 | stale-issue-label: "stale" 21 | exempt-issue-labels: "waiting-for-triage,bug,help wanted,enhancement" 22 | exempt-pr-labels: "waiting-for-triage,bug,help wanted,enhancement" 23 | exempt-all-assignees: true 24 | exempt-all-milestones: true 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ~* 2 | #* 3 | *~ 4 | [._]*.s[a-w][a-z] 5 | .DS_Store 6 | 7 | *.gem 8 | .bundle 9 | Gemfile.lock 10 | vendor 11 | .ruby-version 12 | 13 | test/tmp/ 14 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | FURUHASHI Sadayuki 2 | MASAHIRO Nakagawa 3 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | Release 1.8.3 - 2025/02/18 2 | 3 | * out_s3: Add `sts_http_proxy` and `sts_endpoint_url` to web_identity_credentials (GitHub: #452) 4 | 5 | Release 1.8.2 - 2024/12/18 6 | 7 | * out_s3: Add more logging to identify unexpected error of Tempfile#close. 8 | * out_s3: Support `checksum_algorithm` parameter to validate the data with checksum (CRC32, CRC32C, SHA1 and SHA256) during upload/download. 9 | 10 | Release 1.8.1 - 2024/11/15 11 | 12 | * dependency: Change zstd-ruby dependency optional. Install zstd-ruby manually if you want to enable Zstd compression feature. 13 | 14 | Release 1.8.0 - 2024/11/06 15 | 16 | * out_s3: Add zstd compression support 17 | 18 | Release 1.7.2 - 2022/10/19 19 | 20 | * in_s3: Add `event_bridge_mode` parameter 21 | * out_s3: Fix `s3_object_key_format` check to allow `%{hex_random}` as well as `%{uuid_flush}` or `${chunk_id}` 22 | 23 | Release 1.7.1 - 2022/07/15 24 | 25 | * in_s3: Add `match_regexp` parameter to selectively download S3 files based on the object key 26 | * out_s3: Support `ssl_ca_bundle` and `ssl_ca_directory` parameter 27 | 28 | Release 1.7.0 - 2022/06/14 29 | 30 | * in_s3: Allow multi workers 31 | * in_s3: Support alternative AWS key ID and secret for SQS 32 | * out_s3: Add warning for multi workers 33 | * out_s3: Support object tagging 34 | 35 | Release 1.6.1 - 2021/08/19 36 | 37 | * in_s3/out_s3: Don't raise error when s3_endpoint is used for VPC endpoint (GitHub: #384) 38 | 39 | Release 1.6.0 - 2021/04/08 40 | 41 | * out_s3: Add support for Parquet compressor. Use `` section to configure columnify command behavior. 42 | 43 | Release 1.5.1 - 2021/02/16 44 | 45 | * out_s3: Fix assume_role_credentials regression 46 | 47 | Release 1.5.0 - 2020/12/18 48 | 49 | * out_s3: Fix IAM credentials handling. Prefer assume_role_credentials and region parameter bug. 50 | 51 | Release 1.4.0 - 2020/08/02 52 | 53 | * Remove uuidtools dependency 54 | * in_s3: Add error info to polloing retry log 55 | 56 | Release 1.3.4 - 2020/07/07 57 | 58 | * Add sts_http_proxy and sts_endpoint_url to assume_role_credentials 59 | 60 | Release 1.3.3 - 2020/06/25 61 | 62 | * Allow fips/gov included endpoint 63 | * Support sts_region parameter 64 | 65 | Release 1.3.2 - 2020/05/18 66 | 67 | * out_s3: Show warning message for object conflict case. 68 | 69 | Release 1.3.1 - 2020/04/15 70 | 71 | * out_s3: Support S3 Dual-Stack Endpoints in output plugin via enable_dual_stack parameter 72 | 73 | Release 1.3.0 - 2020/02/10 74 | 75 | * in_s3/out_s3: Support AssumeRoleWebIdentityCredentials via `web_identity_credentials` section for EKS. 76 | 77 | Release 1.2.1 - 2019/11/10 78 | 79 | * in_s3: Support ECSCredentials 80 | 81 | Release 1.2.0 - 2019/10/17 82 | 83 | * out_s3: Add bucket_lifecycle_rule section to set bucket's lifecycle 84 | 85 | Release 1.1.11 - 2019/06/17 86 | 87 | * in_s3: Add add_object_metadata parameter 88 | 89 | Release 1.1.10 - 2019/05/08 90 | 91 | * out_s3: force_path_style is deprecated 92 | 93 | Release 1.1.9 - 2019/03/26 94 | 95 | * out_s3: Add enable_transfer_acceleration parameter 96 | * Update fluentd dependency to v0.14.22 or later 97 | 98 | Release 1.1.8 - 2019/01/28 99 | 100 | * in_s3: Restart SQS polling when the error happens 101 | * out_s3: Show root cause when error happens during compressor loading 102 | 103 | Release 1.1.7 - 2018/11/14 104 | 105 | * in_s3: Fix the bug proxy_uri parameter setting 106 | * in_s3: Support cross-acount bucket/sqs capability with AssumeRole 107 | 108 | Release 1.1.6 - 2018/09/11 109 | 110 | * in_s3: Add s3_endpoint parameter to support S3 compatible service 111 | 112 | Release 1.1.5 - 2018/09/04 113 | 114 | * out_s3: Improve check_apikeys performance by specifying `max_keys` parameter 115 | 116 | Release 1.1.4 - 2018/07/22 117 | 118 | * out_s3: Support time_slice in `check_object false`. date_slice is kept for backward compatibility 119 | 120 | Release 1.1.3 - 2018/04/15 121 | 122 | * in_s3: Fixed extracting gzip's multiple stream. 123 | 124 | Release 1.1.2 - 2018/04/10 125 | 126 | * out_s3: Fix memory leak when s3_object_key_format has time related placeholder 127 | 128 | Release 1.1.1 - 2018/01/10 129 | 130 | * out_s3: Add index_format parameter 131 | 132 | Release 1.1.0 - 2017/11/29 133 | 134 | * Use aws-sdk-s3 / aws-sdk-sqs gems instead of aws-sdk gem 135 | 136 | Release 1.0.0 - 2017/11/15 137 | 138 | * Use v0.14 API 139 | * out_s3: Support canonical user id based grant permission 140 | * out_s3: Use specified s3_object_key_format even if check_object is false 141 | * out_s3: Add s3_metadata parameter 142 | * out_s3: Add ssl_verify_peer parameter 143 | * in_s3: Unescape S3 key 144 | * Add use_bundled_cert parameter 145 | 146 | Release 0.8.0 - 2016/12/20 147 | 148 | * out_s3: Add check_object / check_bucket parameters for only put permission 149 | * Remove fluent-mixin-config-placeholders dependency 150 | 151 | 152 | Release 0.7.2 - 2016/10/20 153 | 154 | * in_s3: Replace old parser API with new one 155 | * in_s3: Don't stop SQS polling when error happens 156 | 157 | 158 | Release 0.7.1 - 2016/09/02 159 | 160 | * Support IAM role for Amazon ECS task 161 | 162 | 163 | Release 0.7.0 - 2016/08/10 164 | 165 | * Add s3 input plugin 166 | 167 | 168 | Release 0.6.9 - 2016/07/28 169 | 170 | * Support v0.14 171 | * Fix aws_iam_retries warning 172 | * Fix race condition at start with buffers 173 | 174 | 175 | Release 0.6.8 - 2016/04/19 176 | 177 | * Add sse customer key options 178 | 179 | 180 | Release 0.6.7 - 2016/03/31 181 | 182 | * Add signature_version parameter 183 | * Add warn_for_delay parameter 184 | 185 | 186 | Release 0.6.6 - 2016/03/16 187 | 188 | * Fix ACL handling in PUT operation 189 | 190 | 191 | Release 0.6.5 - 2016/01/13 192 | 193 | * Add description to parameters 194 | * Use path as prefix in API key check 195 | 196 | 197 | Release 0.6.4 - 2015/12/03 198 | 199 | * Add secret parameters to role_arn, external_id and ssekms_key_id 200 | * Fix region handling in assume_role_credentials 201 | 202 | 203 | Release 0.6.3 - 2015/11/25 204 | 205 | * Add compute_checksums parameter 206 | 207 | 208 | Release 0.6.2 - 2015/11/24 209 | 210 | * Add ssekms_key_id option to use KMS encryption 211 | 212 | 213 | Release 0.6.1 - 2015/10/30 214 | 215 | * Fix server_side_encryption error 216 | * Keep hex random identity on rebooting 217 | * Fix Tempfile handling on windows 218 | 219 | 220 | Release 0.6.0 - 2015/10/09 221 | 222 | * Allow path based calling format 223 | * Add hex_random placeholder 224 | * Add overwrite option 225 | 226 | 227 | Release 0.6.0.pre1 - 2015/09/10 228 | 229 | * Use AWS SDK v2 230 | 231 | 232 | Release 0.5.11 - 2015/08/04 233 | 234 | * Add acl parameter 235 | * Fix use_server_side_encryption parameter 236 | 237 | 238 | Release 0.5.10 - 2015/07/27 239 | 240 | * Add "uuid_flush" placeholder to s3_object_key_format for creating unique object 241 | 242 | 243 | Release 0.5.9 - 2015/06/10 244 | 245 | * Add secret option to AWS key related parameters 246 | 247 | 248 | Release 0.5.8 - 2015/06/09 249 | 250 | * Fix credential provider selection for env vars 251 | * Add aws_iam_retries parameter to set number of attemps to the EC2 metadata service 252 | 253 | 254 | Release 0.5.7 - 2015/03/31 255 | 256 | * Use s3_endpoint instead of endpoint for S3 Client configuration for S3 compatible services 257 | * Increase the number of retry when use IAM role 258 | 259 | 260 | Release 0.5.6 - 2015/03/02 261 | 262 | * Force to use aws-sdk-v1, not aws-sdk 263 | 264 | 265 | Release 0.5.5 - 2015/02/19 266 | 267 | * Revive s3_endpoint parameter for S3 compatible services 268 | 269 | 270 | Release 0.5.4 - 2015/02/12 271 | 272 | * Add use_server_side_encryption parameter 273 | 274 | 275 | Release 0.5.3 - 2015/02/06 276 | 277 | * Add error information in API check 278 | * Add GzipWriter fallback to gzip_command 279 | 280 | 281 | Release 0.5.2 - 2015/02/05 282 | 283 | * Add experimental gzip_command compressor 284 | 285 | 286 | Release 0.5.1 - 2014/12/18 287 | 288 | * Skip apikey_check error when auto_create_bucket is true and bucket doesn't exist 289 | 290 | 291 | Release 0.5.0 - 2014/12/07 292 | 293 | * Make compression algorithm pluggable 294 | * Remove format_json parameter 295 | * Remove s3_endpoint parameter 296 | * Relax fluentd version restriction to support 0.12 or later 297 | 298 | 299 | Release 0.4.3 - 2014/11/10 300 | 301 | * Change API check message to consider region mismatch 302 | 303 | 304 | Release 0.4.2 - 2014/10/22 305 | 306 | * Update fluent-mixin-config-placeholders to v0.3.0 307 | 308 | 309 | Release 0.4.1 - 2014/10/16 310 | 311 | * Add 's3_region' option to specify S3 region 312 | * Restrict aws-sdk gem dependency version to use v1 313 | * Fix infinite loop when same object path is generated 314 | 315 | 316 | Release 0.4.0 - 2014/06/06 317 | 318 | * Add 'format' option to change one line format 319 | * Update fluentd dependency to v0.10.49 320 | 321 | 322 | Release 0.3.7 - 2014/03/07 323 | 324 | * Support lzma2 compression using 'xz' command 325 | https://github.com/fluent/fluent-plugin-s3/pull/41 326 | * Relax aws-sdk gem requirement 327 | https://github.com/fluent/fluent-plugin-s3/pull/42 328 | 329 | 330 | Release 0.3.6 - 2014/02/05 331 | 332 | * Support 'log_level' option 333 | 334 | 335 | Release 0.3.5 - 2013/12/05 336 | 337 | * Add 'reduced_redundancy' option to store logs in reduced redundancy 338 | https://github.com/fluent/fluent-plugin-s3/pull/33 339 | 340 | 341 | Release 0.3.4 - 2013/07/31 342 | 343 | * Add dynamic path slicing by time formatted string 344 | https://github.com/fluent/fluent-plugin-s3/pull/24 345 | 346 | 347 | Release 0.3.3 - 2013/06/18 348 | 349 | * Fix require bug on case-sensitive environment 350 | 351 | 352 | Release 0.3.2 - 2013/06/18 353 | 354 | * Support lzo mime-type 355 | https://github.com/fluent/fluent-plugin-s3/pull/29 356 | * Add proxy_uri option 357 | https://github.com/fluent/fluent-plugin-s3/issues/25 358 | * Add check_apikey_on_start option 359 | https://github.com/fluent/fluent-plugin-s3/pull/28 360 | 361 | 362 | Release 0.3.1 - 2013/03/28 363 | 364 | * Support json and text mime-types 365 | https://github.com/fluent/fluent-plugin-s3/pull/20 366 | 367 | 368 | Release 0.3.0 - 2013/02/19 369 | 370 | * Enable dynamic and configurable S3 object kyes 371 | https://github.com/fluent/fluent-plugin-s3/pull/12 372 | * Fix a lot of temporary files were left on /tmp when the plugin failed to write to S3 373 | https://github.com/fluent/fluent-plugin-s3/pull/15 374 | * Enable fluent-mixin-config-placeholders to support hostname, uuid and other parameters in configuration 375 | https://github.com/fluent/fluent-plugin-s3/pull/19 376 | * Update 'aws-sdk' version requirement to '~> 1.8.2' 377 | https://github.com/fluent/fluent-plugin-s3/pull/21 378 | * Create new S3 bucket if not exists 379 | https://github.com/fluent/fluent-plugin-s3/pull/22 380 | * Check the permission and bucket existence at start method, not write method. 381 | 382 | 383 | Release 0.2.6 - 2013/01/15 384 | 385 | * Add use_ssl option 386 | 387 | 388 | Release 0.2.5 - 2012/12/06 389 | 390 | * Add format_json and time/tag mixin options [#9] 391 | 392 | 393 | Release 0.2.4 - 2012/11/21 394 | 395 | * Set content type when writing file to s3 396 | 397 | 398 | Release 0.2.3 - 2012/11/19 399 | 400 | * Loosen 'aws-sdk' version requirement from "~> 1.1.3" to "~> 1.1" 401 | * Support aws-sdk facility to load credentials from ENV vars or IAM Instance Profile by making the credentials non-mandatory 402 | * Use Yajl instead of to_json not to raise exceptions when it got invalid bytes as UTF-8. 403 | 404 | 405 | Release 0.2.2 - 2011/12/15 406 | 407 | * Add s3_endpoint option 408 | 409 | 410 | Release 0.2.1 - 2011/10/24 411 | 412 | * Add sequential number to the file to avoid overwriting 413 | * Use bundler instead of jeweler for packaging 414 | * Updated README 415 | 416 | 417 | Release 0.2.0 - 2011/10/16 418 | 419 | * Updated to fluentd-0.10.0 420 | 421 | 422 | Release 0.1.1 - 2011/09/27 423 | 424 | * First release 425 | 426 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "http://rubygems.org" 2 | 3 | gemspec 4 | -------------------------------------------------------------------------------- /Gemfile.v0.12: -------------------------------------------------------------------------------- 1 | source "http://rubygems.org" 2 | 3 | gem 'json', '= 1.8.3' 4 | gem 'fluentd', '~> 0.12.0' 5 | 6 | gemspec 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Amazon S3 plugin for [Fluentd](http://github.com/fluent/fluentd) 2 | 3 | [Build Status](https://travis-ci.org/fluent/fluent-plugin-s3) [](https://codeclimate.com/github/fluent/fluent-plugin-s3) 7 | 8 | ## Overview 9 | 10 | **s3** output plugin buffers event logs in local file and upload it to S3 11 | periodically. 12 | 13 | This plugin splits files exactly by using the time of event logs (not the time 14 | when the logs are received). For example, a log '2011-01-02 message B' is 15 | reached, and then another log '2011-01-03 message B' is reached in this order, 16 | the former one is stored in "20110102.gz" file, and latter one in 17 | "20110103.gz" file. 18 | 19 | **s3** input plugin reads data from S3 periodically. This plugin uses 20 | SQS queue on the region same as S3 bucket. 21 | We must setup SQS queue and S3 event notification before use this plugin. 22 | 23 | :warning: Be sure to keep a close eye on S3 costs, as a few user have reported [unexpectedly high costs](https://github.com/fluent/fluent-plugin-s3/issues/160). 24 | 25 | ## Requirements 26 | 27 | | fluent-plugin-s3 | fluentd | ruby | 28 | |-------------------|---------|------| 29 | | >= 1.0.0 | >= v0.14.0 | >= 2.1 | 30 | | < 1.0.0 | >= v0.12.0 | >= 1.9 | 31 | 32 | ## Installation 33 | 34 | Simply use RubyGems: 35 | 36 | # install latest version 37 | $ gem install fluent-plugin-s3 --no-document # for fluentd v1.0 or later 38 | # If you need to install specifiv version, use -v option 39 | $ gem install fluent-plugin-s3 -v 1.3.0 --no-document 40 | # For v0.12. This is for old v0.12 users. Don't use v0.12 for new deployment 41 | $ gem install fluent-plugin-s3 -v "~> 0.8" --no-document # for fluentd v0.12 42 | 43 | 44 | ## Configuration: credentials 45 | 46 | Both S3 input/output plugin provide several credential methods for authentication/authorization. 47 | 48 | See [Configuration: credentials](docs/credentials.md) about details. 49 | 50 | ## Output Plugin 51 | 52 | See [Configuration: Output](docs/output.md) about details. 53 | 54 | ## Input Plugin 55 | 56 | See [Configuration: Input](docs/input.md) about details. 57 | 58 | ## Tips and How to 59 | 60 | * [Object Metadata Added To Records](docs/howto.md#object-metadata-added-to-records) 61 | * [IAM Policy](docs/howto.md#iam-policy) 62 | * [Use your (de)compression algorithm](docs/howto.md#use-your-decompression-algorithm) 63 | 64 | ## Migration guide 65 | 66 | See [Migration guide from v0.12](docs/v0.12.md) about details. 67 | 68 | ## Website, license, et. al. 69 | 70 | | Web site | http://fluentd.org/ | 71 | |-------------------|-------------------------------------------| 72 | | Documents | http://docs.fluentd.org/ | 73 | | Source repository | http://github.com/fluent/fluent-plugin-s3 | 74 | | Discussion | http://groups.google.com/group/fluentd | 75 | | Author | Sadayuki Furuhashi | 76 | | Copyright | (c) 2011 FURUHASHI Sadayuki | 77 | | License | Apache License, Version 2.0 | 78 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | 2 | require 'bundler' 3 | Bundler::GemHelper.install_tasks 4 | 5 | require 'rake/testtask' 6 | 7 | Rake::TestTask.new(:test) do |test| 8 | test.libs << 'lib' << 'test' 9 | test.test_files = FileList['test/test_*.rb'] 10 | test.verbose = true 11 | end 12 | 13 | task :default => [:build] 14 | 15 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 1.8.3 2 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | version: '{build}' 2 | 3 | install: 4 | - SET PATH=C:\Ruby%ruby_version%\bin;%PATH% 5 | - "%devkit%\\devkitvars.bat" 6 | - ruby --version 7 | - gem --version 8 | - bundle install 9 | build: off 10 | test_script: 11 | - bundle exec rake test TESTOPTS=-v 12 | 13 | environment: 14 | matrix: 15 | - ruby_version: "22-x64" 16 | devkit: C:\Ruby21-x64\DevKit 17 | - ruby_version: "22" 18 | devkit: C:\Ruby21\DevKit 19 | - ruby_version: "21-x64" 20 | devkit: C:\Ruby21-x64\DevKit 21 | - ruby_version: "21" 22 | devkit: C:\Ruby21\DevKit 23 | matrix: 24 | allow_failures: 25 | - ruby_version: "21" 26 | -------------------------------------------------------------------------------- /docs/credentials.md: -------------------------------------------------------------------------------- 1 | # Configuration: credentials 2 | 3 | Both S3 input/output plugin provide several credential methods for authentication/authorization. 4 | 5 | ## AWS key and secret authentication 6 | 7 | These parameters are required when your agent is not running on EC2 instance with an IAM Role. When using an IAM role, make sure to configure `instance_profile_credentials`. Usage can be found below. 8 | 9 | ### aws_key_id 10 | 11 | AWS access key id. 12 | 13 | ### aws_sec_key 14 | 15 | AWS secret key. 16 | 17 | ## \ section 18 | 19 | Typically, you use AssumeRole for cross-account access or federation. 20 | 21 | 22 | @type s3 23 | 24 | 25 | role_arn ROLE_ARN 26 | role_session_name ROLE_SESSION_NAME 27 | 28 | 29 | 30 | See also: 31 | 32 | * [Using IAM Roles - AWS Identity and Access 33 | Management](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use.html) 34 | * [Aws::STS::Client](http://docs.aws.amazon.com/sdkforruby/api/Aws/STS/Client.html) 35 | * [Aws::AssumeRoleCredentials](http://docs.aws.amazon.com/sdkforruby/api/Aws/AssumeRoleCredentials.html) 36 | 37 | ### role_arn (required) 38 | 39 | The Amazon Resource Name (ARN) of the role to assume. 40 | 41 | ### role_session_name (required) 42 | 43 | An identifier for the assumed role session. 44 | 45 | ### policy 46 | 47 | An IAM policy in JSON format. 48 | 49 | ### duration_seconds 50 | 51 | The duration, in seconds, of the role session. The value can range from 52 | 900 seconds (15 minutes) to 3600 seconds (1 hour). By default, the value 53 | is set to 3600 seconds. 54 | 55 | ### external_id 56 | 57 | A unique identifier that is used by third parties when assuming roles in 58 | their customers' accounts. 59 | 60 | ## \ section 61 | 62 | Similar to the assume_role_credentials, but for usage in EKS. 63 | 64 | 65 | @type s3 66 | 67 | 68 | role_arn ROLE_ARN 69 | role_session_name ROLE_SESSION_NAME 70 | web_identity_token_file AWS_WEB_IDENTITY_TOKEN_FILE 71 | 72 | 73 | 74 | See also: 75 | 76 | * [Using IAM Roles - AWS Identity and Access 77 | Management](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use.html) 78 | * [IAM Roles For Service Accounts](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts-technical-overview.html) 79 | * [Aws::STS::Client](http://docs.aws.amazon.com/sdkforruby/api/Aws/STS/Client.html) 80 | * [Aws::AssumeRoleWebIdentityCredentials](https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/AssumeRoleWebIdentityCredentials.html) 81 | 82 | ### role_arn (required) 83 | 84 | The Amazon Resource Name (ARN) of the role to assume. 85 | 86 | ### role_session_name (required) 87 | 88 | An identifier for the assumed role session. 89 | 90 | ### web_identity_token_file (required) 91 | 92 | The absolute path to the file on disk containing the OIDC token 93 | 94 | ### policy 95 | 96 | An IAM policy in JSON format. 97 | 98 | ### duration_seconds 99 | 100 | The duration, in seconds, of the role session. The value can range from 101 | 900 seconds (15 minutes) to 43200 seconds (12 hours). By default, the value 102 | is set to 3600 seconds. 103 | 104 | 105 | ## \ section 106 | 107 | Retrieve temporary security credentials via HTTP request. This is useful on 108 | EC2 instance. 109 | 110 | 111 | @type s3 112 | 113 | 114 | ip_address IP_ADDRESS 115 | port PORT 116 | 117 | 118 | 119 | See also: 120 | 121 | * [Aws::InstanceProfileCredentials](http://docs.aws.amazon.com/sdkforruby/api/Aws/InstanceProfileCredentials.html) 122 | * [Temporary Security Credentials - AWS Identity and Access 123 | Management](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp.html) 124 | * [Instance Metadata and User Data - Amazon Elastic Compute 125 | Cloud](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) 126 | 127 | ### retries 128 | 129 | Number of times to retry when retrieving credentials. Default is nil. 130 | 131 | ### ip_address 132 | 133 | Default is 169.254.169.254. 134 | 135 | ### port 136 | 137 | Default is 80. 138 | 139 | ### http_open_timeout 140 | 141 | Default is 5. 142 | 143 | ### http_read_timeout 144 | 145 | Default is 5. 146 | 147 | ## \ section 148 | 149 | This loads AWS access credentials from local ini file. This is useful for 150 | local developing. 151 | 152 | 153 | @type s3 154 | 155 | 156 | path PATH 157 | profile_name PROFILE_NAME 158 | 159 | 160 | 161 | See also: 162 | 163 | * [Aws::SharedCredentials](http://docs.aws.amazon.com/sdkforruby/api/Aws/SharedCredentials.html) 164 | 165 | ### path 166 | 167 | Path to the shared file. Defaults to "#{Dir.home}/.aws/credentials". 168 | 169 | ### profile_name 170 | 171 | Defaults to 'default' or `[ENV]('AWS_PROFILE')`. 172 | -------------------------------------------------------------------------------- /docs/howto.md: -------------------------------------------------------------------------------- 1 | # Object Metadata Added To Records 2 | 3 | If the [`add_object_metadata`](input.md#add_object_metadata) option is set to true, then the name of the bucket 4 | and the key for a given object will be added to each log record as [`s3_bucket`](input.md#s3_bucket) 5 | and [`s3_key`](input.md#s3_key), respectively. This metadata can be used by filter plugins or other 6 | downstream processors to better identify the source of a given record. 7 | 8 | # IAM Policy 9 | 10 | The following is an example for a IAM policy needed to write to an s3 bucket (matches my-s3bucket/logs, my-s3bucket/test, etc.). 11 | 12 | { 13 | "Version": "2012-10-17", 14 | "Statement": [ 15 | { 16 | "Effect": "Allow", 17 | "Action": [ 18 | "s3:ListBucket" 19 | ], 20 | "Resource": "arn:aws:s3:::my-s3bucket" 21 | }, 22 | { 23 | "Effect": "Allow", 24 | "Action": [ 25 | "s3:PutObject", 26 | "s3:GetObject" 27 | ], 28 | "Resource": "arn:aws:s3:::my-s3bucket/*" 29 | } 30 | ] 31 | } 32 | 33 | Note that the bucket must already exist and **[`auto_create_bucket`](output.md#auto_create_bucket)** has no effect in this case. 34 | 35 | `s3:GetObject` is needed for object check to avoid object overwritten. 36 | If you set `check_object false`, `s3:GetObject` is not needed. 37 | 38 | Refer to the [AWS 39 | documentation](http://docs.aws.amazon.com/IAM/latest/UserGuide/ExampleIAMPolicies.html) for example policies. 40 | 41 | Using [IAM 42 | roles](http://docs.aws.amazon.com/IAM/latest/UserGuide/WorkingWithRoles.html) 43 | with a properly configured IAM policy are preferred over embedding access keys 44 | on EC2 instances. 45 | 46 | ## Example when `check_bucket false` and `check_object false` 47 | 48 | When the mentioned configuration will be made, fluentd will work with the 49 | minimum IAM poilcy, like: 50 | 51 | 52 | "Statement": [{ 53 | "Effect": "Allow", 54 | "Action": "s3:PutObject", 55 | "Resource": ["*"] 56 | }] 57 | 58 | 59 | # Use your (de)compression algorithm 60 | 61 | s3 plugin has pluggable compression mechanizm like Fluentd's input / output 62 | plugin. If you set 'store_as xxx', `out_s3` plugin searches 63 | `fluent/plugin/s3_compressor_xxx.rb` and `in_s3` plugin searches 64 | `fluent/plugin/s3_extractor_xxx.rb`. You can define your (de)compression with 65 | 'S3Output::Compressor'/`S3Input::Extractor` classes. Compressor API is here: 66 | 67 | module Fluent # Since fluent-plugin-s3 v1.0.0 or later, use Fluent::Plugin instead of Fluent 68 | class S3Output 69 | class XXXCompressor < Compressor 70 | S3Output.register_compressor('xxx', self) 71 | 72 | # Used to file extension 73 | def ext 74 | 'xxx' 75 | end 76 | 77 | # Used to file content type 78 | def content_type 79 | 'application/x-xxx' 80 | end 81 | 82 | # chunk is buffer chunk. tmp is destination file for upload 83 | def compress(chunk, tmp) 84 | # call command or something 85 | end 86 | end 87 | end 88 | end 89 | 90 | `Extractor` is similar to `Compressor` 91 | See bundled `Compressor`/`Extractor` classes for more detail. 92 | 93 | -------------------------------------------------------------------------------- /docs/input.md: -------------------------------------------------------------------------------- 1 | # Input: Setup 2 | 3 | 1. Create new [SQS](https://aws.amazon.com/documentation/sqs/) queue (use same region as S3) 4 | 2. Set proper permission to new queue 5 | 3. [Configure S3 event notification](http://docs.aws.amazon.com/AmazonS3/latest/dev/NotificationHowTo.html) 6 | 4. Write configuration file such as fluent.conf 7 | 5. Run fluentd 8 | 9 | # Configuration: Input 10 | 11 | See also [Configuration: credentials](credentials.md) for common comprehensive parameters. 12 | 13 | 14 | @type s3 15 | 16 | aws_key_id YOUR_AWS_KEY_ID 17 | aws_sec_key YOUR_AWS_SECRET_KEY 18 | s3_bucket YOUR_S3_BUCKET_NAME 19 | s3_region ap-northeast-1 20 | add_object_metadata true 21 | match_regexp production_.* 22 | 23 | 24 | queue_name YOUR_SQS_QUEUE_NAME 25 | 26 | 27 | 28 | ## add_object_metadata 29 | 30 | Whether or not object metadata should be added to the record. Defaults to `false`. See below for details. 31 | 32 | ## match_regexp 33 | 34 | If provided, process the S3 object only if its keys matches the regular expression 35 | 36 | ## s3_bucket (required) 37 | 38 | S3 bucket name. 39 | 40 | ## s3_region 41 | 42 | S3 region name. For example, US West (Oregon) Region is 43 | "us-west-2". The full list of regions are available here. > 44 | http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region. We 45 | recommend using `s3_region` instead of `s3_endpoint`. 46 | 47 | ## store_as 48 | 49 | archive format on S3. You can use serveral format: 50 | 51 | * gzip (default) 52 | * json 53 | * text 54 | * lzo (Need lzop command) 55 | * lzma2 (Need xz command) 56 | * gzip_command (Need gzip command) 57 | * This compressor uses an external gzip command, hence would result in utilizing CPU cores well compared with `gzip` 58 | 59 | See [Use your compression algorithm](howto.md#use-your-compression-algorithm) section for adding another format. 60 | 61 | ## format 62 | 63 | Parse a line as this format in the S3 object. Supported formats are 64 | "apache_error", "apache2", "syslog", "json", "tsv", "ltsv", "csv", 65 | "nginx" and "none". 66 | 67 | ## check_apikey_on_start 68 | 69 | Check AWS key on start. Default is true. 70 | 71 | ## proxy_uri 72 | 73 | URI of proxy environment. 74 | 75 | ## \ section 76 | 77 | ### queue_name (required) 78 | 79 | SQS queue name. Need to create SQS queue on the region same as S3 bucket. 80 | 81 | ### queue_owner_aws_account_id 82 | 83 | SQS Owner Account ID 84 | 85 | ### aws_key_id 86 | 87 | Alternative aws key id for SQS 88 | 89 | ### aws_sec_key 90 | 91 | Alternative aws key secret for SQS 92 | 93 | ### skip_delete 94 | 95 | When true, messages are not deleted after polling block. Default is false. 96 | 97 | ### wait_time_seconds 98 | 99 | The long polling interval. Default is 20. 100 | 101 | ### retry_error_interval 102 | 103 | Interval to retry polling SQS if polling unsuccessful, in seconds. Default is 300. 104 | 105 | ### event_bridge_mode 106 | When true, Amazon S3 Event Notification should be configured using the EventBridge integration. Default is false. 107 | See [Configure S3 event notification using EventBridge](https://docs.aws.amazon.com/AmazonS3/latest/userguide/EventBridge.html) for additional information. -------------------------------------------------------------------------------- /docs/output.md: -------------------------------------------------------------------------------- 1 | # Configuration: Output 2 | 3 | Here is a sample configuration and available parameters for fluentd v1 or later. 4 | See also [Configuration: credentials](credentials.md) for common comprehensive parameters. 5 | 6 | 7 | @type s3 8 | 9 | aws_key_id YOUR_AWS_KEY_ID 10 | aws_sec_key YOUR_AWS_SECRET_KEY 11 | s3_bucket YOUR_S3_BUCKET_NAME 12 | s3_region ap-northeast-1 13 | 14 | path logs/${tag}/%Y/%m/%d/ 15 | s3_object_key_format %{path}%{time_slice}_%{index}.%{file_extension} 16 | 17 | # if you want to use ${tag} or %Y/%m/%d/ like syntax in path / s3_object_key_format, 18 | # need to specify tag for ${tag} and time for %Y/%m/%d in argument. 19 | 20 | @type file 21 | path /var/log/fluent/s3 22 | timekey 3600 # 1 hour partition 23 | timekey_wait 10m 24 | timekey_use_utc true # use utc 25 | 26 | 27 | @type json 28 | 29 | 30 | 31 | For [``](https://docs.fluentd.org/configuration/buffer-section), you can use any record field in `path` / `s3_object_key_format`. 32 | 33 | path logs/${tag}/${foo} 34 | 35 | # parameters... 36 | 37 | 38 | See official article for available parameters and usage of placeholder in detail: [Config: Buffer Section](https://docs.fluentd.org/configuration/buffer-section#placeholders) 39 | 40 | Note that this configuration doesn't work with fluentd v0.12. See [v0.12](v0.12.md) for v0.12 style. 41 | 42 | ## aws_iam_retries 43 | 44 | This parameter is deprecated. Use [instance_profile_credentials](credentials.md#instance_profile_credentials) instead. 45 | 46 | The number of attempts to make (with exponential backoff) when loading 47 | instance profile credentials from the EC2 metadata service using an IAM 48 | role. Defaults to 5 retries. 49 | 50 | ## s3_bucket (required) 51 | 52 | S3 bucket name. 53 | 54 | ## s3_region 55 | 56 | s3 region name. For example, US West (Oregon) Region is "us-west-2". The 57 | full list of regions are available here. > 58 | http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region. We 59 | recommend using `s3_region` instead of [`s3_endpoint`](#s3_endpoint). 60 | 61 | ## s3_endpoint 62 | 63 | endpoint for S3 compatible services. For example, Riak CS based storage or 64 | something. This option is deprecated for AWS S3, use [`s3_region`](#s3_region) instead. 65 | 66 | See also AWS article: [Working with Regions](https://aws.amazon.com/blogs/developer/working-with-regions/). 67 | 68 | ## enable_transfer_acceleration 69 | 70 | Enable [S3 Transfer Acceleration](https://docs.aws.amazon.com/AmazonS3/latest/dev/transfer-acceleration.html) for uploads. **IMPORTANT**: For this to work, you must first enable this feature on your destination S3 bucket. 71 | 72 | ## enable_dual_stack 73 | 74 | Enable [Amazon S3 Dual-Stack Endpoints](https://docs.aws.amazon.com/AmazonS3/latest/dev/dual-stack-endpoints.html) for uploads. Will make it possible to use either IPv4 or IPv6 when connecting to S3. 75 | 76 | ## use_bundled_cert 77 | 78 | For cases where the default SSL certificate is unavailable (e.g. Windows), you can set this option to true in order to use the AWS SDK bundled certificate. Default is false. 79 | 80 | This fixes the following error often seen in Windows: 81 | 82 | SSL_connect returned=1 errno=0 state=SSLv3 read server certificate B: certificate verify failed (Seahorse::Client::NetworkingError) 83 | 84 | ## ssl_ca_bundle 85 | 86 | Full path to the SSL certificate authority bundle file that should be used when verifying peer certificates. If you do not pass `ssl_ca_bundle` or `ssl_ca_directory` the the system default will be used if available. 87 | 88 | ## ssl_ca_directory 89 | 90 | Full path of the directory that contains the unbundled SSL certificate authority files for verifying peer certificates. If you do not pass `ssl_ca_bundle` or `ssl_ca_directory` the the system default will be used if available. 91 | 92 | ## ssl_verify_peer 93 | 94 | Verify SSL certificate of the endpoint. Default is true. Set false when you want to ignore the endpoint SSL certificate. 95 | 96 | ## s3_object_key_format 97 | 98 | The format of S3 object keys. You can use several built-in variables: 99 | 100 | * %{path} 101 | * %{time_slice} 102 | * %{index} 103 | * %{file_extension} 104 | * %{hex_random} 105 | * %{uuid_flush} 106 | * %{hostname} 107 | 108 | to decide keys dynamically. 109 | 110 | * %{path} is exactly the value of **path** configured in the configuration file. 111 | E.g., "logs/" in the example configuration above. 112 | * %{time_slice} is the 113 | time-slice in text that are formatted with **time_slice_format**. 114 | * %{index} is the sequential number starts from 0, increments when multiple files are uploaded to S3 in the same time slice. 115 | * %{file_extension} depends on **store_as** parameter. 116 | * %{uuid_flush} a uuid that is replaced everytime the buffer will be flushed. 117 | * %{hostname} is replaced with `Socket.gethostname` result. 118 | * %{hex_random} a random hex string that is replaced for each buffer chunk, not 119 | assured to be unique. This is used to follow a way of performance tuning, `Add 120 | a Hex Hash Prefix to Key Name`, written in [Request Rate and Performance 121 | Considerations - Amazon Simple Storage 122 | Service](https://docs.aws.amazon.com/AmazonS3/latest/dev/request-rate-perf-considerations.html). 123 | You can configure the length of string with a 124 | `hex_random_length` parameter (Default: 4). 125 | 126 | The default format is `%{path}%{time_slice}_%{index}.%{file_extension}`. 127 | In addition, you can use [buffer placeholders](https://docs.fluentd.org/configuration/buffer-section#placeholders) in this parameter, 128 | so you can embed tag, time and record value like below: 129 | 130 | s3_object_key_format %{path}/events/%Y%m%d/${tag}_%{index}.%{file_extension} 131 | 132 | # buffer parameters... 133 | 134 | 135 | For instance, using the example configuration above, actual object keys on S3 136 | will be something like: 137 | 138 | "logs/20130111-22_0.gz" 139 | "logs/20130111-23_0.gz" 140 | "logs/20130111-23_1.gz" 141 | "logs/20130112-00_0.gz" 142 | 143 | With the configuration: 144 | 145 | s3_object_key_format %{path}/events/ts=%{time_slice}/events_%{index}.%{file_extension} 146 | path log 147 | time_slice_format %Y%m%d-%H 148 | 149 | You get: 150 | 151 | "log/events/ts=20130111-22/events_0.gz" 152 | "log/events/ts=20130111-23/events_0.gz" 153 | "log/events/ts=20130111-23/events_1.gz" 154 | "log/events/ts=20130112-00/events_0.gz" 155 | 156 | NOTE: ${hostname} placeholder is deprecated since v0.8. You can get same result by using [configuration's embedded ruby code feature](https://docs.fluentd.org/configuration/config-file#embedded-ruby-code). 157 | 158 | s3_object_key_format %{path}%{time_slice}_%{hostname}%{index}.%{file_extension} 159 | s3_object_key_format "%{path}%{time_slice}_#{Socket.gethostname}%{index}.%{file_extension}" 160 | 161 | Above two configurations are same. The important point is wrapping `""` is needed for `#{Socket.gethostname}`. 162 | 163 | NOTE: If `check_object` is set to `false`, Ensure the value of `s3_object_key_format` must be unique in each write, If not, existing file will be overwritten. 164 | 165 | ## force_path_style 166 | 167 | :force_path_style (Boolean) — default: false — When set to true, the 168 | bucket name is always left in the request URI and never moved to the host 169 | as a sub-domain. See Plugins::S3BucketDns for more details. 170 | 171 | This parameter is deprecated. See AWS announcement: https://aws.amazon.com/blogs/aws/amazon-s3-path-deprecation-plan-the-rest-of-the-story/ 172 | 173 | ## store_as 174 | 175 | archive format on S3. You can use several format: 176 | 177 | * gzip (default) 178 | * json 179 | * text 180 | * lzo (Need lzop command) 181 | * lzma2 (Need xz command) 182 | * gzip_command (Need gzip command) 183 | * This compressor uses an external gzip command, hence would result in 184 | utilizing CPU cores well compared with `gzip` 185 | * parquet (Need columnify command) 186 | * This compressor uses an external [columnify](https://github.com/reproio/columnify) command. 187 | * Use [``](#compress-for-parquet-compressor-only) section to configure columnify command behavior. 188 | 189 | See [Use your compression algorithm](howto.md#use-your-compression-algorighm) section for adding another format. 190 | 191 | ## \ (for parquet compressor only) section 192 | 193 | ### parquet_compression_codec 194 | 195 | parquet compression codec. 196 | 197 | * uncompressed 198 | * snappy (default) 199 | * gzip 200 | * lzo (unsupported by columnify) 201 | * brotli (unsupported by columnify) 202 | * lz4 (unsupported by columnify) 203 | * zstd 204 | 205 | ### parquet_page_size 206 | 207 | parquet file page size. default: 8192 bytes 208 | 209 | ### parquet_row_group_size 210 | 211 | parquet file row group size. default: 128 MB 212 | 213 | ### record_type 214 | 215 | record data format type. 216 | 217 | * avro 218 | * csv 219 | * jsonl 220 | * msgpack 221 | * tsv 222 | * msgpack (default) 223 | * json 224 | 225 | ### schema_type 226 | 227 | schema type. 228 | 229 | * avro (default) 230 | * bigquery 231 | 232 | ### schema_file (required) 233 | 234 | path to schema file. 235 | 236 | ## \ section 237 | 238 | Change one line format in the S3 object. Supported formats are "out_file", 239 | "json", "ltsv", "single_value" and other formatter plugins. See also [official Formatter article](https://docs.fluentd.org/formatter). 240 | 241 | * out_file (default). 242 | 243 | time\ttag\t{..json1..} 244 | time\ttag\t{..json2..} 245 | ... 246 | 247 | * json 248 | 249 | {..json1..} 250 | {..json2..} 251 | ... 252 | 253 | 254 | At this format, "time" and "tag" are omitted. But you can set these 255 | information to the record by setting `` option. If you set following configuration in 256 | S3 output: 257 | 258 | 259 | @type json 260 | 261 | 262 | time_key log_time 263 | 264 | 265 | then the record has log_time field. 266 | 267 | {"log_time":"time string",...} 268 | 269 | See also [official Inject Section article](https://docs.fluentd.org/configuration/inject-section). 270 | 271 | * ltsv 272 | 273 | key1:value1\tkey2:value2 274 | key1:value1\tkey2:value2 275 | ... 276 | 277 | * single_value 278 | 279 | 280 | Use specified value instead of entire recode. If you get '{"message":"my 281 | log"}', then contents are 282 | 283 | my log1 284 | my log2 285 | ... 286 | 287 | You can change key name by "message_key" option. 288 | 289 | ## auto_create_bucket 290 | 291 | Create S3 bucket if it does not exists. Default is true. 292 | 293 | ## check_bucket 294 | 295 | Check mentioned bucket if it exists in AWS or not. Default is true. 296 | 297 | When it is false, fluentd will not check aws s3 for the existence of the mentioned bucket. 298 | This is the case where bucket will be pre-created before running fluentd. 299 | 300 | ## check_object 301 | 302 | Check object before creation if it exists or not. Default is true. 303 | 304 | When it is false, s3_object_key_format will be %{path}%{time_slice}_%{hms_slice}.%{file_extension} by default where, 305 | hms_slice will be time-slice in hhmmss format, so that each object will be unique. 306 | Example object name, assuming it is created on 2016/16/11 3:30:54 PM 20161611_153054.txt (extension can be anything as per user's choice) 307 | 308 | ## check_apikey_on_start 309 | 310 | Check AWS key on start. Default is true. 311 | 312 | ## proxy_uri 313 | 314 | uri of proxy environment. 315 | 316 | ## path 317 | 318 | path prefix of the files on S3. Default is "" (no prefix). 319 | [buffer placeholder](https://docs.fluentd.org/configuration/buffer-section#placeholders) is supported, 320 | so you can embed tag, time and record value like below. 321 | 322 | path logs/%Y%m%d/${tag}/ 323 | 324 | # buffer parameters... 325 | 326 | 327 | ## utc 328 | 329 | Use UTC instead of local time. 330 | 331 | ## storage_class 332 | 333 | Set storage class. Possible values are `STANDARD`, `REDUCED_REDUNDANCY`, `STANDARD_IA` from [Ruby SDK](http://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Object.html#storage_class-instance_method). 334 | 335 | Note that reduced redundancy is [not reccomended](https://serverfault.com/a/1010951/512362). 336 | 337 | ## reduced_redundancy 338 | 339 | Use S3 reduced redundancy storage for 33% cheaper pricing. Default is 340 | false. 341 | 342 | This is deprecated. Use `storage_class REDUCED_REDUNDANCY` instead. 343 | 344 | ## acl 345 | 346 | Permission for the object in S3. This is useful for cross-account access 347 | using IAM roles. Valid values are: 348 | 349 | * private (default) 350 | * public-read 351 | * public-read-write (not recommended - see [Canned 352 | ACL](http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl)) 353 | * authenticated-read 354 | * bucket-owner-read 355 | * bucket-owner-full-control 356 | 357 | To use cross-account access, you will need to create a bucket policy granting 358 | the specific access required. Refer to the [AWS 359 | documentation](http://docs.aws.amazon.com/AmazonS3/latest/dev/example-walkthroughs-managing-access-example3.html) for examples. 360 | 361 | ## grant_full_control 362 | 363 | Allows grantee READ, READ_ACP, and WRITE_ACP permissions on the object. 364 | This is useful for cross-account access using IAM roles. 365 | 366 | Valid values are `id="Grantee-CanonicalUserID"`. Please specify the grantee's canonical user ID. 367 | 368 | e.g. `id="79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be"` 369 | 370 | Note that a canonical user ID is different from an AWS account ID. 371 | Please refer to [AWS documentation](https://docs.aws.amazon.com/general/latest/gr/acct-identifiers.html) for more details. 372 | 373 | ## grant_read 374 | 375 | Allows grantee to read the object data and its metadata. 376 | Valid values are `id="Grantee-CanonicalUserID"`. Please specify the grantee's canonical user ID. 377 | 378 | e.g. `id="79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be"` 379 | 380 | ## grant_read_acp 381 | 382 | Allows grantee to read the object ACL. 383 | Valid values are `id="Grantee-CanonicalUserID"`. Please specify the grantee's canonical user ID. 384 | 385 | e.g. `id="79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be"` 386 | 387 | ## grant_write_acp 388 | 389 | Allows grantee to write the ACL for the applicable object. 390 | Valid values are `id="Grantee-CanonicalUserID"`. Please specify the grantee's canonical user ID. 391 | 392 | e.g. `id="79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be"` 393 | 394 | ## hex_random_length 395 | 396 | The length of `%{hex_random}` placeholder. Default is 4 as written in 397 | [Request Rate and Performance Considerations - Amazon Simple Storage 398 | Service](https://docs.aws.amazon.com/AmazonS3/latest/dev/request-rate-perf-considerations.html). 399 | The maximum length is 16. 400 | 401 | ## index_format 402 | 403 | `%{index}` is formatted by [sprintf](http://ruby-doc.org/core-2.2.0/Kernel.html#method-i-sprintf) using this format_string. Default is '%d'. Zero padding is supported e.g. `%04d` to ensure minimum length four digits. `%{index}` can be in lowercase or uppercase hex using '%x' or '%X' 404 | 405 | ## overwrite 406 | 407 | Overwrite already existing path. Default is false, which raises an error 408 | if a s3 object of the same path already exists, or increment the 409 | `%{index}` placeholder until finding an absent path. 410 | 411 | ## use_server_side_encryption 412 | 413 | The Server-side encryption algorithm used when storing this object in S3 414 | (e.g., AES256, aws:kms) 415 | 416 | ## ssekms_key_id 417 | 418 | Specifies the AWS KMS key ID to use for object encryption. You have to 419 | set "aws:kms" to [`use_server_side_encryption`](#use_server_side_encryption) to use the KMS encryption. 420 | 421 | ## sse_customer_algorithm 422 | 423 | Specifies the algorithm to use to when encrypting the object (e.g., AES256). 424 | 425 | ## sse_customer_key 426 | 427 | Specifies the AWS KMS key ID to use for object encryption. 428 | 429 | ## sse_customer_key_md5 430 | 431 | Specifies the 128-bit MD5 digest of the encryption key according to RFC 1321. 432 | 433 | ## checksum_algorithm 434 | 435 | AWS allows to calculate the integrity checksum server side. The additional checksum is 436 | used to validate the data during upload or download. The following 4 SHA and CRC algorithms are supported: 437 | 438 | * CRC32 439 | * CRC32C 440 | * SHA1 441 | * SHA256 442 | 443 | For more info refer to [object integrity](https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html). 444 | 445 | ## compute_checksums 446 | 447 | AWS SDK uses MD5 for API request/response by default. On FIPS enabled environment, 448 | OpenSSL returns an error because MD5 is disabled. If you want to use 449 | this plugin on FIPS enabled environment, set `compute_checksums false`. 450 | 451 | ## signature_version 452 | 453 | Signature version for API request. `s3` means signature version 2 and 454 | `v4` means signature version 4. Default is `nil` (Following SDK's default). 455 | It would be useful when you use S3 compatible storage that accepts only signature version 2. 456 | 457 | ## warn_for_delay 458 | 459 | Given a threshold to treat events as delay, output warning logs if delayed events were put into s3. 460 | 461 | ## tagging 462 | 463 | The S3 tag-set for the object. The tag-set must be encoded as URL Query parameters. (For example, "Key1=Value1"). 464 | 465 | ## \ section 466 | 467 | Specify one or more lifecycle rules for the bucket 468 | 469 | 470 | id UNIQUE_ID_FOR_THE_RULE 471 | prefix OPTIONAL_PREFIX # Objects whose keys begin with this prefix will be affected by the rule. If not specified all objects of the bucket will be affected 472 | expiration_days NUMBER_OF_DAYS # The number of days before the object will expire 473 | 474 | -------------------------------------------------------------------------------- /docs/v0.12.md: -------------------------------------------------------------------------------- 1 | # Configuration: Output (v0.12 style) 2 | 3 | Here is a sample configuration for old fluentd v0.12. It works with fluentd v1 too but not recommended for it. 4 | 5 | 6 | @type s3 7 | 8 | aws_key_id YOUR_AWS_KEY_ID 9 | aws_sec_key YOUR_AWS_SECRET_KEY 10 | s3_bucket YOUR_S3_BUCKET_NAME 11 | s3_region ap-northeast-1 12 | 13 | path logs/ 14 | s3_object_key_format %{path}%{time_slice}_%{index}.%{file_extension} 15 | buffer_path /var/log/fluent/s3 16 | time_slice_format %Y%m%d-%H 17 | time_slice_wait 10m 18 | utc 19 | format json 20 | 21 | 22 | If you want to embed tag in [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format), you need to use `fluent-plugin-forest` plugin. 23 | 24 | The following explanations are about the differences with v1. Other parameters are same with v1, see [Configuration: Output](output.md) for them. 25 | 26 | ## format (for v0.12) 27 | 28 | @format json 29 | include_time_key true 30 | time_key log_time # default is time 31 | 32 | This parameter is for v0.12. Use [``](https://docs.fluentd.org/configuration/format-section) and [``](https://docs.fluentd.org/configuration/inject-section) for v1. 33 | 34 | ## buffer_path (for v0.12) 35 | 36 | path prefix of the files to buffer logs. 37 | 38 | This parameter is for v0.12. Use [``](https://docs.fluentd.org/configuration/buffer-section)'s `path` in v1. 39 | 40 | ## time_slice_format (for v0.12) 41 | 42 | Format of the time used as the file name. Default is '%Y%m%d'. Use 43 | '%Y%m%d%H' to split files hourly. 44 | 45 | This parameter is for v0.12. Use [buffer placeholder](https://docs.fluentd.org/configuration/buffer-section#placeholders) for [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format) in v1. 46 | 47 | ## time_slice_wait (for v0.12) 48 | 49 | The time to wait old logs. Default is 10 minutes. Specify larger value if 50 | old logs may reach. 51 | 52 | This parameter is for v0.12. Use [``](https://docs.fluentd.org/configuration/buffer-section)'s `timekey_wait` in v1. 53 | -------------------------------------------------------------------------------- /fluent-plugin-s3.gemspec: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | $:.push File.expand_path('../lib', __FILE__) 3 | 4 | Gem::Specification.new do |gem| 5 | gem.name = "fluent-plugin-s3" 6 | gem.description = "Amazon S3 output plugin for Fluentd event collector" 7 | gem.license = "Apache-2.0" 8 | gem.homepage = "https://github.com/fluent/fluent-plugin-s3" 9 | gem.summary = gem.description 10 | gem.version = File.read("VERSION").strip 11 | gem.authors = ["Sadayuki Furuhashi", "Masahiro Nakagawa"] 12 | gem.email = "frsyuki@gmail.com" 13 | #gem.platform = Gem::Platform::RUBY 14 | gem.files = `git ls-files`.split("\n") 15 | gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") 16 | gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } 17 | gem.require_paths = ['lib'] 18 | 19 | gem.add_dependency "fluentd", [">= 0.14.22", "< 2"] 20 | gem.add_dependency "aws-sdk-s3", "~> 1.60" 21 | gem.add_dependency "aws-sdk-sqs", "~> 1.23" 22 | gem.add_development_dependency "rake", ">= 0.9.2" 23 | gem.add_development_dependency "test-unit", ">= 3.0.8" 24 | gem.add_development_dependency "test-unit-rr", ">= 1.0.3" 25 | gem.add_development_dependency "timecop" 26 | # aws-sdk-core requires one of ox, oga, libxml, nokogiri or rexml, 27 | # and rexml is no longer default gem as of Ruby 3.0. 28 | gem.add_development_dependency "rexml" 29 | gem.add_development_dependency "zstd-ruby" 30 | end 31 | -------------------------------------------------------------------------------- /lib/fluent/log-ext.rb: -------------------------------------------------------------------------------- 1 | require 'fluent/log' 2 | # For Fluentd v0.14.13 or earlier 3 | # logger for Aws::S3::Client and Aws::SQS::Client required `#<<` method 4 | module Fluent 5 | class Log 6 | unless method_defined?(:<<) 7 | def <<(message) 8 | write(message) 9 | end 10 | end 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /lib/fluent/plugin/in_s3.rb: -------------------------------------------------------------------------------- 1 | require 'fluent/plugin/input' 2 | require 'fluent/log-ext' 3 | 4 | require 'aws-sdk-s3' 5 | require 'aws-sdk-sqs' 6 | require 'aws-sdk-sqs/queue_poller' 7 | require 'cgi/util' 8 | require 'zlib' 9 | require 'time' 10 | require 'tempfile' 11 | 12 | module Fluent::Plugin 13 | class S3Input < Input 14 | Fluent::Plugin.register_input('s3', self) 15 | 16 | helpers :compat_parameters, :parser, :thread 17 | 18 | def initialize 19 | super 20 | @extractor = nil 21 | end 22 | 23 | DEFAULT_PARSE_TYPE = "none" 24 | 25 | desc "Use aws-sdk-ruby bundled cert" 26 | config_param :use_bundled_cert, :bool, default: false 27 | desc "Add object metadata to the records parsed out of a given object" 28 | config_param :add_object_metadata, :bool, default: false 29 | desc "AWS access key id" 30 | config_param :aws_key_id, :string, default: nil, secret: true 31 | desc "AWS secret key." 32 | config_param :aws_sec_key, :string, default: nil, secret: true 33 | config_section :assume_role_credentials, multi: false do 34 | desc "The Amazon Resource Name (ARN) of the role to assume" 35 | config_param :role_arn, :string 36 | desc "An identifier for the assumed role session" 37 | config_param :role_session_name, :string 38 | desc "An IAM policy in JSON format" 39 | config_param :policy, :string, default: nil 40 | desc "The duration, in seconds, of the role session (900-3600)" 41 | config_param :duration_seconds, :integer, default: nil 42 | desc "A unique identifier that is used by third parties when assuming roles in their customers' accounts." 43 | config_param :external_id, :string, default: nil 44 | end 45 | # See the following link for additional params that could be added: 46 | # https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/STS/Client.html#assume_role_with_web_identity-instance_method 47 | config_section :web_identity_credentials, multi: false do 48 | desc "The Amazon Resource Name (ARN) of the role to assume" 49 | config_param :role_arn, :string # required 50 | desc "An identifier for the assumed role session" 51 | config_param :role_session_name, :string #required 52 | desc "The absolute path to the file on disk containing the OIDC token" 53 | config_param :web_identity_token_file, :string #required 54 | desc "An IAM policy in JSON format" 55 | config_param :policy, :string, default: nil 56 | desc "The duration, in seconds, of the role session (900-43200)" 57 | config_param :duration_seconds, :integer, default: nil 58 | end 59 | config_section :instance_profile_credentials, multi: false do 60 | desc "Number of times to retry when retrieving credentials" 61 | config_param :retries, :integer, default: nil 62 | desc "IP address (default:169.254.169.254)" 63 | config_param :ip_address, :string, default: nil 64 | desc "Port number (default:80)" 65 | config_param :port, :integer, default: nil 66 | desc "Number of seconds to wait for the connection to open" 67 | config_param :http_open_timeout, :float, default: nil 68 | desc "Number of seconds to wait for one block to be read" 69 | config_param :http_read_timeout, :float, default: nil 70 | # config_param :delay, :integer or :proc, :default => nil 71 | # config_param :http_degub_output, :io, :default => nil 72 | end 73 | config_section :shared_credentials, multi: false do 74 | desc "Path to the shared file. (default: $HOME/.aws/credentials)" 75 | config_param :path, :string, default: nil 76 | desc "Profile name. Default to 'default' or ENV['AWS_PROFILE']" 77 | config_param :profile_name, :string, default: nil 78 | end 79 | desc "S3 bucket name" 80 | config_param :s3_bucket, :string 81 | desc "S3 region name" 82 | config_param :s3_region, :string, default: ENV["AWS_REGION"] || "us-east-1" 83 | desc "Use 's3_region' instead" 84 | config_param :s3_endpoint, :string, default: nil 85 | desc "If true, the bucket name is always left in the request URI and never moved to the host as a sub-domain" 86 | config_param :force_path_style, :bool, default: false 87 | desc "Archive format on S3" 88 | config_param :store_as, :string, default: "gzip" 89 | desc "Check AWS key on start" 90 | config_param :check_apikey_on_start, :bool, default: true 91 | desc "URI of proxy environment" 92 | config_param :proxy_uri, :string, default: nil 93 | desc "Optional RegEx to match incoming messages" 94 | config_param :match_regexp, :regexp, default: nil 95 | 96 | config_section :sqs, required: true, multi: false do 97 | desc "SQS queue name" 98 | config_param :queue_name, :string, default: nil 99 | desc "SQS Owner Account ID" 100 | config_param :queue_owner_aws_account_id, :string, default: nil 101 | desc "Use 's3_region' instead" 102 | config_param :endpoint, :string, default: nil 103 | desc "AWS access key id for SQS user" 104 | config_param :aws_key_id, :string, default: nil, secret: true 105 | desc "AWS secret key for SQS user." 106 | config_param :aws_sec_key, :string, default: nil, secret: true 107 | desc "Skip message deletion" 108 | config_param :skip_delete, :bool, default: false 109 | desc "The long polling interval." 110 | config_param :wait_time_seconds, :integer, default: 20 111 | desc "Polling error retry interval." 112 | config_param :retry_error_interval, :integer, default: 300 113 | desc "Event bridge mode" 114 | config_param :event_bridge_mode, :bool, default: false 115 | end 116 | 117 | desc "Tag string" 118 | config_param :tag, :string, default: "input.s3" 119 | 120 | config_section :parse do 121 | config_set_default :@type, DEFAULT_PARSE_TYPE 122 | end 123 | 124 | attr_reader :bucket 125 | 126 | def reject_s3_endpoint? 127 | @s3_endpoint && !@s3_endpoint.end_with?('vpce.amazonaws.com') && 128 | @s3_endpoint.end_with?('amazonaws.com') && !['fips', 'gov'].any? { |e| @s3_endpoint.include?(e) } 129 | end 130 | 131 | def configure(conf) 132 | super 133 | 134 | if reject_s3_endpoint? 135 | raise Fluent::ConfigError, "s3_endpoint parameter is not supported for S3, use s3_region instead. This parameter is for S3 compatible services" 136 | end 137 | 138 | if @sqs.endpoint && (@sqs.endpoint.end_with?('amazonaws.com') && !['fips', 'gov'].any? { |e| @sqs.endpoint.include?(e) }) 139 | raise Fluent::ConfigError, "sqs/endpoint parameter is not supported for SQS, use s3_region instead. This parameter is for SQS compatible services" 140 | end 141 | 142 | parser_config = conf.elements("parse").first 143 | unless @sqs.queue_name 144 | raise Fluent::ConfigError, "sqs/queue_name is required" 145 | end 146 | 147 | if !!@aws_key_id ^ !!@aws_sec_key 148 | raise Fluent::ConfigError, "aws_key_id or aws_sec_key is missing" 149 | end 150 | 151 | if !!@sqs.aws_key_id ^ !!@sqs.aws_sec_key 152 | raise Fluent::ConfigError, "sqs/aws_key_id or sqs/aws_sec_key is missing" 153 | end 154 | 155 | Aws.use_bundled_cert! if @use_bundled_cert 156 | 157 | @extractor = EXTRACTOR_REGISTRY.lookup(@store_as).new(log: log) 158 | @extractor.configure(conf) 159 | 160 | @parser = parser_create(conf: parser_config, default_type: DEFAULT_PARSE_TYPE) 161 | end 162 | 163 | def multi_workers_ready? 164 | true 165 | end 166 | 167 | def start 168 | super 169 | 170 | s3_client = create_s3_client 171 | log.debug("Succeeded to create S3 client") 172 | @s3 = Aws::S3::Resource.new(client: s3_client) 173 | @bucket = @s3.bucket(@s3_bucket) 174 | 175 | raise "#{@bucket.name} is not found." unless @bucket.exists? 176 | 177 | check_apikeys if @check_apikey_on_start 178 | 179 | sqs_client = create_sqs_client 180 | log.debug("Succeeded to create SQS client") 181 | response = sqs_client.get_queue_url(queue_name: @sqs.queue_name, queue_owner_aws_account_id: @sqs.queue_owner_aws_account_id) 182 | sqs_queue_url = response.queue_url 183 | log.debug("Succeeded to get SQS queue URL") 184 | 185 | @poller = Aws::SQS::QueuePoller.new(sqs_queue_url, client: sqs_client) 186 | 187 | @running = true 188 | thread_create(:in_s3, &method(:run)) 189 | end 190 | 191 | def shutdown 192 | @running = false 193 | super 194 | end 195 | 196 | private 197 | 198 | def run 199 | options = {} 200 | options[:wait_time_seconds] = @sqs.wait_time_seconds 201 | options[:skip_delete] = @sqs.skip_delete 202 | @poller.before_request do |stats| 203 | throw :stop_polling unless @running 204 | end 205 | begin 206 | @poller.poll(options) do |message| 207 | begin 208 | body = Yajl.load(message.body) 209 | log.debug(body) 210 | next unless is_valid_queue(body) # skip test queue 211 | if @match_regexp 212 | raw_key = get_raw_key(body) 213 | key = CGI.unescape(raw_key) 214 | next unless @match_regexp.match?(key) 215 | end 216 | process(body) 217 | rescue => e 218 | log.warn(error: e) 219 | log.warn_backtrace(e.backtrace) 220 | throw :skip_delete 221 | end 222 | end 223 | rescue => e 224 | log.warn("SQS Polling Failed. Retry in #{@sqs.retry_error_interval} seconds", error: e) 225 | sleep(@sqs.retry_error_interval) 226 | retry 227 | end 228 | end 229 | 230 | def is_valid_queue(body) 231 | if @sqs.event_bridge_mode 232 | log.debug("checking for eventbridge property") 233 | !!body["detail"] 234 | else 235 | log.debug("checking for Records property") 236 | !!body["Records"] 237 | end 238 | end 239 | 240 | def get_raw_key(body) 241 | if @sqs.event_bridge_mode 242 | body["detail"]["object"]["key"] 243 | else 244 | body["Records"].first["s3"]["object"]["key"] 245 | end 246 | end 247 | 248 | def setup_credentials 249 | options = {} 250 | credentials_options = {} 251 | case 252 | when @aws_key_id && @aws_sec_key 253 | options[:access_key_id] = @aws_key_id 254 | options[:secret_access_key] = @aws_sec_key 255 | when @assume_role_credentials 256 | c = @assume_role_credentials 257 | credentials_options[:role_arn] = c.role_arn 258 | credentials_options[:role_session_name] = c.role_session_name 259 | credentials_options[:policy] = c.policy if c.policy 260 | credentials_options[:duration_seconds] = c.duration_seconds if c.duration_seconds 261 | credentials_options[:external_id] = c.external_id if c.external_id 262 | if @s3_region 263 | credentials_options[:client] = Aws::STS::Client.new(:region => @s3_region) 264 | end 265 | options[:credentials] = Aws::AssumeRoleCredentials.new(credentials_options) 266 | when @web_identity_credentials 267 | c = @web_identity_credentials 268 | credentials_options[:role_arn] = c.role_arn 269 | credentials_options[:role_session_name] = c.role_session_name 270 | credentials_options[:web_identity_token_file] = c.web_identity_token_file 271 | credentials_options[:policy] = c.policy if c.policy 272 | credentials_options[:duration_seconds] = c.duration_seconds if c.duration_seconds 273 | if @s3_region 274 | credentials_options[:client] = Aws::STS::Client.new(:region => @s3_region) 275 | end 276 | options[:credentials] = Aws::AssumeRoleWebIdentityCredentials.new(credentials_options) 277 | when @instance_profile_credentials 278 | c = @instance_profile_credentials 279 | credentials_options[:retries] = c.retries if c.retries 280 | credentials_options[:ip_address] = c.ip_address if c.ip_address 281 | credentials_options[:port] = c.port if c.port 282 | credentials_options[:http_open_timeout] = c.http_open_timeout if c.http_open_timeout 283 | credentials_options[:http_read_timeout] = c.http_read_timeout if c.http_read_timeout 284 | if ENV["AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"] 285 | options[:credentials] = Aws::ECSCredentials.new(credentials_options) 286 | else 287 | options[:credentials] = Aws::InstanceProfileCredentials.new(credentials_options) 288 | end 289 | when @shared_credentials 290 | c = @shared_credentials 291 | credentials_options[:path] = c.path if c.path 292 | credentials_options[:profile_name] = c.profile_name if c.profile_name 293 | options[:credentials] = Aws::SharedCredentials.new(credentials_options) 294 | else 295 | # Use default credentials 296 | # See http://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html 297 | end 298 | options 299 | end 300 | 301 | def create_s3_client 302 | options = setup_credentials 303 | options[:region] = @s3_region if @s3_region 304 | options[:endpoint] = @s3_endpoint if @s3_endpoint 305 | options[:force_path_style] = @force_path_style 306 | options[:http_proxy] = @proxy_uri if @proxy_uri 307 | log.on_trace do 308 | options[:http_wire_trace] = true 309 | options[:logger] = log 310 | end 311 | 312 | Aws::S3::Client.new(options) 313 | end 314 | 315 | def create_sqs_client 316 | options = setup_credentials 317 | options[:region] = @s3_region if @s3_region 318 | options[:endpoint] = @sqs.endpoint if @sqs.endpoint 319 | options[:http_proxy] = @proxy_uri if @proxy_uri 320 | if @sqs.aws_key_id && @sqs.aws_sec_key 321 | options[:access_key_id] = @sqs.aws_key_id 322 | options[:secret_access_key] = @sqs.aws_sec_key 323 | end 324 | log.on_trace do 325 | options[:http_wire_trace] = true 326 | options[:logger] = log 327 | end 328 | 329 | Aws::SQS::Client.new(options) 330 | end 331 | 332 | def check_apikeys 333 | @bucket.objects.first 334 | log.debug("Succeeded to verify API keys") 335 | rescue => e 336 | raise "can't call S3 API. Please check your credentials or s3_region configuration. error = #{e.inspect}" 337 | end 338 | 339 | def process(body) 340 | raw_key = get_raw_key(body) 341 | key = CGI.unescape(raw_key) 342 | 343 | io = @bucket.object(key).get.body 344 | content = @extractor.extract(io) 345 | es = Fluent::MultiEventStream.new 346 | content.each_line do |line| 347 | @parser.parse(line) do |time, record| 348 | if @add_object_metadata 349 | record['s3_bucket'] = @s3_bucket 350 | record['s3_key'] = raw_key 351 | end 352 | es.add(time, record) 353 | end 354 | end 355 | router.emit_stream(@tag, es) 356 | end 357 | 358 | class Extractor 359 | include Fluent::Configurable 360 | 361 | attr_reader :log 362 | 363 | def initialize(log: $log, **options) 364 | super() 365 | @log = log 366 | end 367 | 368 | def configure(conf) 369 | super 370 | end 371 | 372 | def ext 373 | end 374 | 375 | def content_type 376 | end 377 | 378 | def extract(io) 379 | end 380 | 381 | private 382 | 383 | def check_command(command, algo = nil) 384 | require 'open3' 385 | 386 | algo = command if algo.nil? 387 | begin 388 | Open3.capture3("#{command} -V") 389 | rescue Errno::ENOENT 390 | raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression" 391 | end 392 | end 393 | end 394 | 395 | class GzipExtractor < Extractor 396 | def ext 397 | 'gz'.freeze 398 | end 399 | 400 | def content_type 401 | 'application/x-gzip'.freeze 402 | end 403 | 404 | # https://bugs.ruby-lang.org/issues/9790 405 | # https://bugs.ruby-lang.org/issues/11180 406 | # https://github.com/exAspArk/multiple_files_gzip_reader 407 | def extract(io) 408 | parts = [] 409 | loop do 410 | unused = nil 411 | Zlib::GzipReader.wrap(io) do |gz| 412 | parts << gz.read 413 | unused = gz.unused 414 | gz.finish 415 | end 416 | io.pos -= unused ? unused.length : 0 417 | break if io.eof? 418 | end 419 | io.close 420 | parts.join 421 | end 422 | end 423 | 424 | class TextExtractor < Extractor 425 | def ext 426 | 'txt'.freeze 427 | end 428 | 429 | def content_type 430 | 'text/plain'.freeze 431 | end 432 | 433 | def extract(io) 434 | io.read 435 | end 436 | end 437 | 438 | class JsonExtractor < TextExtractor 439 | def ext 440 | 'json'.freeze 441 | end 442 | 443 | def content_type 444 | 'application/json'.freeze 445 | end 446 | end 447 | 448 | EXTRACTOR_REGISTRY = Fluent::Registry.new(:s3_extractor_type, 'fluent/plugin/s3_extractor_') 449 | { 450 | 'gzip' => GzipExtractor, 451 | 'text' => TextExtractor, 452 | 'json' => JsonExtractor 453 | }.each do |name, extractor| 454 | EXTRACTOR_REGISTRY.register(name, extractor) 455 | end 456 | 457 | def self.register_extractor(name, extractor) 458 | EXTRACTOR_REGISTRY.register(name, extractor) 459 | end 460 | end 461 | end 462 | -------------------------------------------------------------------------------- /lib/fluent/plugin/out_s3.rb: -------------------------------------------------------------------------------- 1 | require 'fluent/plugin/output' 2 | require 'fluent/log-ext' 3 | require 'fluent/timezone' 4 | require 'aws-sdk-s3' 5 | require 'zlib' 6 | require 'time' 7 | require 'tempfile' 8 | require 'securerandom' 9 | 10 | module Fluent::Plugin 11 | class S3Output < Output 12 | Fluent::Plugin.register_output('s3', self) 13 | 14 | helpers :compat_parameters, :formatter, :inject 15 | 16 | def initialize 17 | super 18 | @compressor = nil 19 | @uuid_flush_enabled = false 20 | end 21 | 22 | desc "Path prefix of the files on S3" 23 | config_param :path, :string, default: "" 24 | desc "The Server-side encryption algorithm used when storing this object in S3 (AES256, aws:kms)" 25 | config_param :use_server_side_encryption, :string, default: nil 26 | desc "Use aws-sdk-ruby bundled cert" 27 | config_param :use_bundled_cert, :bool, default: false 28 | desc "AWS access key id" 29 | config_param :aws_key_id, :string, default: nil, secret: true 30 | desc "AWS secret key." 31 | config_param :aws_sec_key, :string, default: nil, secret: true 32 | config_section :assume_role_credentials, multi: false do 33 | desc "The Amazon Resource Name (ARN) of the role to assume" 34 | config_param :role_arn, :string, secret: true 35 | desc "An identifier for the assumed role session" 36 | config_param :role_session_name, :string 37 | desc "An IAM policy in JSON format" 38 | config_param :policy, :string, default: nil 39 | desc "The duration, in seconds, of the role session (900-3600)" 40 | config_param :duration_seconds, :integer, default: nil 41 | desc "A unique identifier that is used by third parties when assuming roles in their customers' accounts." 42 | config_param :external_id, :string, default: nil, secret: true 43 | desc "The region of the STS endpoint to use." 44 | config_param :sts_region, :string, default: nil 45 | desc "A http proxy url for requests to aws sts service" 46 | config_param :sts_http_proxy, :string, default: nil, secret: true 47 | desc "A url for a regional sts api endpoint, the default is global" 48 | config_param :sts_endpoint_url, :string, default: nil 49 | end 50 | # See the following link for additional params that could be added: 51 | # https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/STS/Client.html#assume_role_with_web_identity-instance_method 52 | config_section :web_identity_credentials, multi: false do 53 | desc "The Amazon Resource Name (ARN) of the role to assume" 54 | config_param :role_arn, :string # required 55 | desc "An identifier for the assumed role session" 56 | config_param :role_session_name, :string #required 57 | desc "The absolute path to the file on disk containing the OIDC token" 58 | config_param :web_identity_token_file, :string #required 59 | desc "An IAM policy in JSON format" 60 | config_param :policy, :string, default: nil 61 | desc "The duration, in seconds, of the role session (900-43200)" 62 | config_param :duration_seconds, :integer, default: nil 63 | desc "The region of the STS endpoint to use." 64 | config_param :sts_region, :string, default: nil 65 | desc "A http proxy url for requests to aws sts service" 66 | config_param :sts_http_proxy, :string, default: nil, secret: true 67 | desc "A url for a regional sts api endpoint, the default is global" 68 | config_param :sts_endpoint_url, :string, default: nil 69 | end 70 | config_section :instance_profile_credentials, multi: false do 71 | desc "Number of times to retry when retrieving credentials" 72 | config_param :retries, :integer, default: nil 73 | desc "IP address (default:169.254.169.254)" 74 | config_param :ip_address, :string, default: nil 75 | desc "Port number (default:80)" 76 | config_param :port, :integer, default: nil 77 | desc "Number of seconds to wait for the connection to open" 78 | config_param :http_open_timeout, :float, default: nil 79 | desc "Number of seconds to wait for one block to be read" 80 | config_param :http_read_timeout, :float, default: nil 81 | # config_param :delay, :integer or :proc, :default => nil 82 | # config_param :http_degub_output, :io, :default => nil 83 | end 84 | config_section :shared_credentials, multi: false do 85 | desc "Path to the shared file. (default: $HOME/.aws/credentials)" 86 | config_param :path, :string, default: nil 87 | desc "Profile name. Default to 'default' or ENV['AWS_PROFILE']" 88 | config_param :profile_name, :string, default: nil 89 | end 90 | desc "The number of attempts to load instance profile credentials from the EC2 metadata service using IAM role" 91 | config_param :aws_iam_retries, :integer, default: nil, deprecated: "Use 'instance_profile_credentials' instead" 92 | desc "S3 bucket name" 93 | config_param :s3_bucket, :string 94 | desc "S3 region name" 95 | config_param :s3_region, :string, default: ENV["AWS_REGION"] || "us-east-1" 96 | desc "Use 's3_region' instead" 97 | config_param :s3_endpoint, :string, default: nil 98 | desc "If true, S3 Transfer Acceleration will be enabled for uploads. IMPORTANT: You must first enable this feature on your destination S3 bucket" 99 | config_param :enable_transfer_acceleration, :bool, default: false 100 | desc "If true, use Amazon S3 Dual-Stack Endpoints. Will make it possible to use either IPv4 or IPv6 when connecting to S3." 101 | config_param :enable_dual_stack, :bool, default: false 102 | desc "If false, the certificate of endpoint will not be verified" 103 | config_param :ssl_verify_peer, :bool, :default => true 104 | desc "Full path to the SSL certificate authority bundle file that should be used when verifying peer certificates. If unspecified, defaults to the system CA if available." 105 | config_param :ssl_ca_bundle, :string, :default => nil 106 | desc "Full path of the directory that contains the unbundled SSL certificate authority files for verifying peer certificates. If you do not pass ssl_ca_bundle or ssl_ca_directory the the system default will be used if available." 107 | config_param :ssl_ca_directory, :string, :default => nil 108 | desc "The format of S3 object keys" 109 | config_param :s3_object_key_format, :string, default: "%{path}%{time_slice}_%{index}.%{file_extension}" 110 | desc "If true, the bucket name is always left in the request URI and never moved to the host as a sub-domain" 111 | config_param :force_path_style, :bool, default: false, deprecated: "S3 will drop path style API in 2020: See https://aws.amazon.com/blogs/aws/amazon-s3-path-deprecation-plan-the-rest-of-the-story/" 112 | desc "Archive format on S3" 113 | config_param :store_as, :string, default: "gzip" 114 | desc "Create S3 bucket if it does not exists" 115 | config_param :auto_create_bucket, :bool, default: true 116 | desc "Check AWS key on start" 117 | config_param :check_apikey_on_start, :bool, default: true 118 | desc "URI of proxy environment" 119 | config_param :proxy_uri, :string, default: nil 120 | desc "Use S3 reduced redundancy storage for 33% cheaper pricing. Deprecated. Use storage_class instead" 121 | config_param :reduced_redundancy, :bool, default: false, deprecated: "Use storage_class parameter instead." 122 | desc "The type of storage to use for the object(STANDARD,REDUCED_REDUNDANCY,STANDARD_IA)" 123 | config_param :storage_class, :string, default: "STANDARD" 124 | desc "Permission for the object in S3" 125 | config_param :acl, :string, default: nil 126 | desc "Allows grantee READ, READ_ACP, and WRITE_ACP permissions on the object" 127 | config_param :grant_full_control, :string, default: nil 128 | desc "Allows grantee to read the object data and its metadata" 129 | config_param :grant_read, :string, default: nil 130 | desc "Allows grantee to read the object ACL" 131 | config_param :grant_read_acp, :string, default: nil 132 | desc "Allows grantee to write the ACL for the applicable object" 133 | config_param :grant_write_acp, :string, default: nil 134 | desc "The length of `%{hex_random}` placeholder(4-16)" 135 | config_param :hex_random_length, :integer, default: 4 136 | desc "`sprintf` format for `%{index}`" 137 | config_param :index_format, :string, default: "%d" 138 | desc "Overwrite already existing path" 139 | config_param :overwrite, :bool, default: false 140 | desc "Check bucket if exists or not" 141 | config_param :check_bucket, :bool, default: true 142 | desc "Check object before creation" 143 | config_param :check_object, :bool, default: true 144 | desc "Specifies the AWS KMS key ID to use for object encryption" 145 | config_param :ssekms_key_id, :string, default: nil, secret: true 146 | desc "Specifies the algorithm to use to when encrypting the object" 147 | config_param :sse_customer_algorithm, :string, default: nil 148 | desc "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data" 149 | config_param :sse_customer_key, :string, default: nil, secret: true 150 | desc "Specifies the 128-bit MD5 digest of the encryption key according to RFC 1321" 151 | config_param :sse_customer_key_md5, :string, default: nil, secret: true 152 | desc "AWS SDK uses MD5 for API request/response by default" 153 | config_param :compute_checksums, :bool, default: nil # use nil to follow SDK default configuration 154 | desc "Signature version for API Request (s3,v4)" 155 | config_param :signature_version, :string, default: nil # use nil to follow SDK default configuration 156 | desc "Given a threshold to treat events as delay, output warning logs if delayed events were put into s3" 157 | config_param :warn_for_delay, :time, default: nil 158 | desc "Arbitrary S3 tag-set for the object" 159 | config_param :tagging, :string, default: nil 160 | desc "Arbitrary S3 metadata headers to set for the object" 161 | config_param :checksum_algorithm, :string, default: nil 162 | desc "Indicates the algorithm you want Amazon S3 to use to create the checksum for the object (CRC32,CRC32C,SHA1,SHA256)" 163 | config_param :s3_metadata, :hash, default: nil 164 | config_section :bucket_lifecycle_rule, param_name: :bucket_lifecycle_rules, multi: true do 165 | desc "A unique ID for this rule" 166 | config_param :id, :string 167 | desc "Objects whose keys begin with this prefix will be affected by the rule. If not specified all objects of the bucket will be affected" 168 | config_param :prefix, :string, default: '' 169 | desc "The number of days before the object will expire" 170 | config_param :expiration_days, :integer 171 | end 172 | 173 | DEFAULT_FORMAT_TYPE = "out_file" 174 | 175 | config_section :format do 176 | config_set_default :@type, DEFAULT_FORMAT_TYPE 177 | end 178 | 179 | config_section :buffer do 180 | config_set_default :chunk_keys, ['time'] 181 | config_set_default :timekey, (60 * 60 * 24) 182 | end 183 | 184 | attr_reader :bucket 185 | 186 | MAX_HEX_RANDOM_LENGTH = 16 187 | 188 | def reject_s3_endpoint? 189 | @s3_endpoint && !@s3_endpoint.end_with?('vpce.amazonaws.com') && 190 | @s3_endpoint.end_with?('amazonaws.com') && !['fips', 'gov'].any? { |e| @s3_endpoint.include?(e) } 191 | end 192 | 193 | def configure(conf) 194 | compat_parameters_convert(conf, :buffer, :formatter, :inject) 195 | 196 | super 197 | 198 | Aws.use_bundled_cert! if @use_bundled_cert 199 | 200 | if reject_s3_endpoint? 201 | raise Fluent::ConfigError, "s3_endpoint parameter is not supported for S3, use s3_region instead. This parameter is for S3 compatible services" 202 | end 203 | 204 | begin 205 | buffer_type = @buffer_config[:@type] 206 | @compressor = COMPRESSOR_REGISTRY.lookup(@store_as).new(buffer_type: buffer_type, log: log) 207 | rescue => e 208 | log.warn "'#{@store_as}' not supported. Use 'text' instead: error = #{e.message}" 209 | @compressor = TextCompressor.new 210 | end 211 | @compressor.configure(conf) 212 | 213 | @formatter = formatter_create 214 | 215 | if @hex_random_length > MAX_HEX_RANDOM_LENGTH 216 | raise Fluent::ConfigError, "hex_random_length parameter must be less than or equal to #{MAX_HEX_RANDOM_LENGTH}" 217 | end 218 | 219 | unless @index_format =~ /^%(0\d*)?[dxX]$/ 220 | raise Fluent::ConfigError, "index_format parameter should follow `%[flags][width]type`. `0` is the only supported flag, and is mandatory if width is specified. `d`, `x` and `X` are supported types" 221 | end 222 | 223 | if @reduced_redundancy 224 | log.warn "reduced_redundancy parameter is deprecated. Use storage_class parameter instead" 225 | @storage_class = "REDUCED_REDUNDANCY" 226 | end 227 | 228 | @s3_object_key_format = process_s3_object_key_format 229 | if !@check_object 230 | if conf.has_key?('s3_object_key_format') 231 | log.warn "Set 'check_object false' and s3_object_key_format is specified. Check s3_object_key_format is unique in each write. If not, existing file will be overwritten." 232 | else 233 | log.warn "Set 'check_object false' and s3_object_key_format is not specified. Use '%{path}/%{time_slice}_%{hms_slice}.%{file_extension}' for s3_object_key_format" 234 | @s3_object_key_format = "%{path}/%{time_slice}_%{hms_slice}.%{file_extension}" 235 | end 236 | end 237 | 238 | check_s3_path_safety(conf) 239 | 240 | # For backward compatibility 241 | # TODO: Remove time_slice_format when end of support compat_parameters 242 | @configured_time_slice_format = conf['time_slice_format'] 243 | @values_for_s3_object_chunk = {} 244 | @time_slice_with_tz = Fluent::Timezone.formatter(@timekey_zone, @configured_time_slice_format || timekey_to_timeformat(@buffer_config['timekey'])) 245 | end 246 | 247 | def multi_workers_ready? 248 | true 249 | end 250 | 251 | def start 252 | options = setup_credentials 253 | options[:region] = @s3_region if @s3_region 254 | options[:endpoint] = @s3_endpoint if @s3_endpoint 255 | options[:use_accelerate_endpoint] = @enable_transfer_acceleration 256 | options[:use_dualstack_endpoint] = @enable_dual_stack 257 | options[:http_proxy] = @proxy_uri if @proxy_uri 258 | options[:force_path_style] = @force_path_style 259 | options[:compute_checksums] = @compute_checksums unless @compute_checksums.nil? 260 | options[:signature_version] = @signature_version unless @signature_version.nil? 261 | options[:ssl_verify_peer] = @ssl_verify_peer 262 | options[:ssl_ca_bundle] = @ssl_ca_bundle if @ssl_ca_bundle 263 | options[:ssl_ca_directory] = @ssl_ca_directory if @ssl_ca_directory 264 | log.on_trace do 265 | options[:http_wire_trace] = true 266 | options[:logger] = log 267 | end 268 | 269 | s3_client = Aws::S3::Client.new(options) 270 | @s3 = Aws::S3::Resource.new(client: s3_client) 271 | @bucket = @s3.bucket(@s3_bucket) 272 | 273 | check_apikeys if @check_apikey_on_start 274 | ensure_bucket if @check_bucket 275 | ensure_bucket_lifecycle 276 | 277 | super 278 | end 279 | 280 | def format(tag, time, record) 281 | r = inject_values_to_record(tag, time, record) 282 | @formatter.format(tag, time, r) 283 | end 284 | 285 | def write(chunk) 286 | i = 0 287 | metadata = chunk.metadata 288 | previous_path = nil 289 | time_slice = if metadata.timekey.nil? 290 | ''.freeze 291 | else 292 | @time_slice_with_tz.call(metadata.timekey) 293 | end 294 | 295 | if @check_object 296 | begin 297 | @values_for_s3_object_chunk[chunk.unique_id] ||= { 298 | "%{hex_random}" => hex_random(chunk), 299 | } 300 | values_for_s3_object_key_pre = { 301 | "%{path}" => @path, 302 | "%{file_extension}" => @compressor.ext, 303 | } 304 | values_for_s3_object_key_post = { 305 | "%{time_slice}" => time_slice, 306 | "%{index}" => sprintf(@index_format,i), 307 | }.merge!(@values_for_s3_object_chunk[chunk.unique_id]) 308 | values_for_s3_object_key_post["%{uuid_flush}".freeze] = uuid_random if @uuid_flush_enabled 309 | 310 | s3path = @s3_object_key_format.gsub(%r(%{[^}]+})) do |matched_key| 311 | values_for_s3_object_key_pre.fetch(matched_key, matched_key) 312 | end 313 | s3path = extract_placeholders(s3path, chunk) 314 | s3path = s3path.gsub(%r(%{[^}]+}), values_for_s3_object_key_post) 315 | if (i > 0) && (s3path == previous_path) 316 | if @overwrite 317 | log.warn "#{s3path} already exists, but will overwrite" 318 | break 319 | else 320 | raise "duplicated path is generated. use %{index} in s3_object_key_format: path = #{s3path}" 321 | end 322 | end 323 | 324 | i += 1 325 | previous_path = s3path 326 | end while @bucket.object(s3path).exists? 327 | else 328 | if @localtime 329 | hms_slicer = Time.now.strftime("%H%M%S") 330 | else 331 | hms_slicer = Time.now.utc.strftime("%H%M%S") 332 | end 333 | 334 | @values_for_s3_object_chunk[chunk.unique_id] ||= { 335 | "%{hex_random}" => hex_random(chunk), 336 | } 337 | values_for_s3_object_key_pre = { 338 | "%{path}" => @path, 339 | "%{file_extension}" => @compressor.ext, 340 | } 341 | values_for_s3_object_key_post = { 342 | "%{date_slice}" => time_slice, # For backward compatibility 343 | "%{time_slice}" => time_slice, 344 | "%{hms_slice}" => hms_slicer, 345 | }.merge!(@values_for_s3_object_chunk[chunk.unique_id]) 346 | values_for_s3_object_key_post["%{uuid_flush}".freeze] = uuid_random if @uuid_flush_enabled 347 | 348 | s3path = @s3_object_key_format.gsub(%r(%{[^}]+})) do |matched_key| 349 | values_for_s3_object_key_pre.fetch(matched_key, matched_key) 350 | end 351 | s3path = extract_placeholders(s3path, chunk) 352 | s3path = s3path.gsub(%r(%{[^}]+}), values_for_s3_object_key_post) 353 | end 354 | 355 | tmp = Tempfile.new("s3-") 356 | tmp.binmode 357 | begin 358 | @compressor.compress(chunk, tmp) 359 | tmp.rewind 360 | log.debug "out_s3: write chunk #{dump_unique_id_hex(chunk.unique_id)} with metadata #{chunk.metadata} to s3://#{@s3_bucket}/#{s3path}" 361 | 362 | put_options = { 363 | body: tmp, 364 | content_type: @compressor.content_type, 365 | storage_class: @storage_class, 366 | } 367 | put_options[:server_side_encryption] = @use_server_side_encryption if @use_server_side_encryption 368 | put_options[:ssekms_key_id] = @ssekms_key_id if @ssekms_key_id 369 | put_options[:sse_customer_algorithm] = @sse_customer_algorithm if @sse_customer_algorithm 370 | put_options[:sse_customer_key] = @sse_customer_key if @sse_customer_key 371 | put_options[:sse_customer_key_md5] = @sse_customer_key_md5 if @sse_customer_key_md5 372 | put_options[:acl] = @acl if @acl 373 | put_options[:grant_full_control] = @grant_full_control if @grant_full_control 374 | put_options[:grant_read] = @grant_read if @grant_read 375 | put_options[:grant_read_acp] = @grant_read_acp if @grant_read_acp 376 | put_options[:grant_write_acp] = @grant_write_acp if @grant_write_acp 377 | put_options[:checksum_algorithm] = @checksum_algorithm if @checksum_algorithm 378 | put_options[:tagging] = @tagging if @tagging 379 | 380 | if @s3_metadata 381 | put_options[:metadata] = {} 382 | @s3_metadata.each do |k, v| 383 | put_options[:metadata][k] = extract_placeholders(v, chunk).gsub(%r(%{[^}]+}), {"%{index}" => sprintf(@index_format, i - 1)}) 384 | end 385 | end 386 | @bucket.object(s3path).put(put_options) 387 | 388 | log.debug "out_s3: completed to write chunk #{dump_unique_id_hex(chunk.unique_id)} with metadata #{chunk.metadata} to s3://#{@s3_bucket}/#{s3path}" 389 | 390 | @values_for_s3_object_chunk.delete(chunk.unique_id) 391 | 392 | if @warn_for_delay 393 | if Time.at(chunk.metadata.timekey) < Time.now - @warn_for_delay 394 | log.warn "out_s3: delayed events were put to s3://#{@s3_bucket}/#{s3path}" 395 | end 396 | end 397 | ensure 398 | begin 399 | tmp.close(true) 400 | rescue => e 401 | log.info "out_s3: Tempfile#close caused unexpected error", error: e 402 | end 403 | end 404 | end 405 | 406 | private 407 | 408 | def hex_random(chunk) 409 | unique_hex = Fluent::UniqueId.hex(chunk.unique_id) 410 | unique_hex.reverse! # unique_hex is like (time_sec, time_usec, rand) => reversing gives more randomness 411 | unique_hex[0...@hex_random_length] 412 | end 413 | 414 | def uuid_random 415 | SecureRandom.uuid 416 | end 417 | 418 | # This is stolen from Fluentd 419 | def timekey_to_timeformat(timekey) 420 | case timekey 421 | when nil then '' 422 | when 0...60 then '%Y%m%d%H%M%S' # 60 exclusive 423 | when 60...3600 then '%Y%m%d%H%M' 424 | when 3600...86400 then '%Y%m%d%H' 425 | else '%Y%m%d' 426 | end 427 | end 428 | 429 | def ensure_bucket 430 | if !@bucket.exists? 431 | if @auto_create_bucket 432 | log.info "Creating bucket #{@s3_bucket} on #{@s3_endpoint}" 433 | @s3.create_bucket(bucket: @s3_bucket) 434 | else 435 | raise "The specified bucket does not exist: bucket = #{@s3_bucket}" 436 | end 437 | end 438 | end 439 | 440 | def ensure_bucket_lifecycle 441 | unless @bucket_lifecycle_rules.empty? 442 | old_rules = get_bucket_lifecycle_rules 443 | new_rules = @bucket_lifecycle_rules.sort_by { |rule| rule.id }.map do |rule| 444 | { id: rule.id, expiration: { days: rule.expiration_days }, prefix: rule.prefix, status: "Enabled" } 445 | end 446 | 447 | unless old_rules == new_rules 448 | log.info "Configuring bucket lifecycle rules for #{@s3_bucket} on #{@s3_endpoint}" 449 | @bucket.lifecycle_configuration.put({ lifecycle_configuration: { rules: new_rules } }) 450 | end 451 | end 452 | end 453 | 454 | def get_bucket_lifecycle_rules 455 | begin 456 | @bucket.lifecycle_configuration.rules.sort_by { |rule| rule[:id] }.map do |rule| 457 | { id: rule[:id], expiration: { days: rule[:expiration][:days] }, prefix: rule[:prefix], status: rule[:status] } 458 | end 459 | rescue Aws::S3::Errors::NoSuchLifecycleConfiguration 460 | [] 461 | end 462 | end 463 | 464 | def process_s3_object_key_format 465 | %W(%{uuid} %{uuid:random} %{uuid:hostname} %{uuid:timestamp}).each { |ph| 466 | if @s3_object_key_format.include?(ph) 467 | raise Fluent::ConfigError, %!#{ph} placeholder in s3_object_key_format is removed! 468 | end 469 | } 470 | 471 | if @s3_object_key_format.include?('%{uuid_flush}') 472 | @uuid_flush_enabled = true 473 | end 474 | 475 | @s3_object_key_format.gsub('%{hostname}') { |expr| 476 | log.warn "%{hostname} will be removed in the future. Use \"\#{Socket.gethostname}\" instead" 477 | Socket.gethostname 478 | } 479 | end 480 | 481 | def check_s3_path_safety(conf) 482 | unless conf.has_key?('s3_object_key_format') 483 | log.warn "The default value of s3_object_key_format will use ${chunk_id} instead of %{index} to avoid object conflict in v2" 484 | end 485 | 486 | is_working_on_parallel = @buffer_config.flush_thread_count > 1 || system_config.workers > 1 487 | if is_working_on_parallel && ['${chunk_id}', '%{uuid_flush}', '%{hex_random}'].none? { |key| @s3_object_key_format.include?(key) } 488 | log.warn "No ${chunk_id}, %{uuid_flush} or %{hex_random} in s3_object_key_format with multiple flush threads or multiple workers. Recommend to set ${chunk_id}, %{uuid_flush} or %{hex_random} to avoid data lost by object conflict" 489 | end 490 | end 491 | 492 | def check_apikeys 493 | @bucket.objects(prefix: @path, :max_keys => 1).first 494 | rescue Aws::S3::Errors::NoSuchBucket 495 | # ignore NoSuchBucket Error because ensure_bucket checks it. 496 | rescue => e 497 | raise "can't call S3 API. Please check your credentials or s3_region configuration. error = #{e.inspect}" 498 | end 499 | 500 | def setup_credentials 501 | options = {} 502 | credentials_options = {} 503 | case 504 | when @assume_role_credentials 505 | c = @assume_role_credentials 506 | iam_user_credentials = @aws_key_id && @aws_sec_key ? Aws::Credentials.new(@aws_key_id, @aws_sec_key) : nil 507 | region = c.sts_region || @s3_region 508 | credentials_options[:role_arn] = c.role_arn 509 | credentials_options[:role_session_name] = c.role_session_name 510 | credentials_options[:policy] = c.policy if c.policy 511 | credentials_options[:duration_seconds] = c.duration_seconds if c.duration_seconds 512 | credentials_options[:external_id] = c.external_id if c.external_id 513 | credentials_options[:sts_endpoint_url] = c.sts_endpoint_url if c.sts_endpoint_url 514 | credentials_options[:sts_http_proxy] = c.sts_http_proxy if c.sts_http_proxy 515 | if c.sts_http_proxy && c.sts_endpoint_url 516 | credentials_options[:client] = if iam_user_credentials 517 | Aws::STS::Client.new(region: region, http_proxy: c.sts_http_proxy, endpoint: c.sts_endpoint_url, credentials: iam_user_credentials) 518 | else 519 | Aws::STS::Client.new(region: region, http_proxy: c.sts_http_proxy, endpoint: c.sts_endpoint_url) 520 | end 521 | elsif c.sts_http_proxy 522 | credentials_options[:client] = if iam_user_credentials 523 | Aws::STS::Client.new(region: region, http_proxy: c.sts_http_proxy, credentials: iam_user_credentials) 524 | else 525 | Aws::STS::Client.new(region: region, http_proxy: c.sts_http_proxy) 526 | end 527 | elsif c.sts_endpoint_url 528 | credentials_options[:client] = if iam_user_credentials 529 | Aws::STS::Client.new(region: region, endpoint: c.sts_endpoint_url, credentials: iam_user_credentials) 530 | else 531 | Aws::STS::Client.new(region: region, endpoint: c.sts_endpoint_url) 532 | end 533 | else 534 | credentials_options[:client] = if iam_user_credentials 535 | Aws::STS::Client.new(region: region, credentials: iam_user_credentials) 536 | else 537 | Aws::STS::Client.new(region: region) 538 | end 539 | end 540 | 541 | options[:credentials] = Aws::AssumeRoleCredentials.new(credentials_options) 542 | when @aws_key_id && @aws_sec_key 543 | options[:access_key_id] = @aws_key_id 544 | options[:secret_access_key] = @aws_sec_key 545 | when @web_identity_credentials 546 | c = @web_identity_credentials 547 | region = c.sts_region || @s3_region 548 | credentials_options[:role_arn] = c.role_arn 549 | credentials_options[:role_session_name] = c.role_session_name 550 | credentials_options[:web_identity_token_file] = c.web_identity_token_file 551 | credentials_options[:policy] = c.policy if c.policy 552 | credentials_options[:duration_seconds] = c.duration_seconds if c.duration_seconds 553 | credentials_options[:sts_endpoint_url] = c.sts_endpoint_url if c.sts_endpoint_url 554 | credentials_options[:sts_http_proxy] = c.sts_http_proxy if c.sts_http_proxy 555 | if c.sts_http_proxy && c.sts_endpoint_url 556 | credentials_options[:client] = Aws::STS::Client.new(region: region, http_proxy: c.sts_http_proxy, endpoint: c.sts_endpoint_url) 557 | elsif c.sts_http_proxy 558 | credentials_options[:client] = Aws::STS::Client.new(region: region, http_proxy: c.sts_http_proxy) 559 | elsif c.sts_endpoint_url 560 | credentials_options[:client] = Aws::STS::Client.new(region: region, endpoint: c.sts_endpoint_url) 561 | else 562 | credentials_options[:client] = Aws::STS::Client.new(region: region) 563 | end 564 | options[:credentials] = Aws::AssumeRoleWebIdentityCredentials.new(credentials_options) 565 | when @instance_profile_credentials 566 | c = @instance_profile_credentials 567 | credentials_options[:retries] = c.retries if c.retries 568 | credentials_options[:ip_address] = c.ip_address if c.ip_address 569 | credentials_options[:port] = c.port if c.port 570 | credentials_options[:http_open_timeout] = c.http_open_timeout if c.http_open_timeout 571 | credentials_options[:http_read_timeout] = c.http_read_timeout if c.http_read_timeout 572 | if ENV["AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"] 573 | options[:credentials] = Aws::ECSCredentials.new(credentials_options) 574 | else 575 | options[:credentials] = Aws::InstanceProfileCredentials.new(credentials_options) 576 | end 577 | when @shared_credentials 578 | c = @shared_credentials 579 | credentials_options[:path] = c.path if c.path 580 | credentials_options[:profile_name] = c.profile_name if c.profile_name 581 | options[:credentials] = Aws::SharedCredentials.new(credentials_options) 582 | when @aws_iam_retries 583 | log.warn("'aws_iam_retries' parameter is deprecated. Use 'instance_profile_credentials' instead") 584 | credentials_options[:retries] = @aws_iam_retries 585 | if ENV["AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"] 586 | options[:credentials] = Aws::ECSCredentials.new(credentials_options) 587 | else 588 | options[:credentials] = Aws::InstanceProfileCredentials.new(credentials_options) 589 | end 590 | else 591 | # Use default credentials 592 | # See http://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html 593 | end 594 | options 595 | end 596 | 597 | class Compressor 598 | include Fluent::Configurable 599 | 600 | def initialize(opts = {}) 601 | super() 602 | @buffer_type = opts[:buffer_type] 603 | @log = opts[:log] 604 | end 605 | 606 | attr_reader :buffer_type, :log 607 | 608 | def configure(conf) 609 | super 610 | end 611 | 612 | def ext 613 | end 614 | 615 | def content_type 616 | end 617 | 618 | def compress(chunk, tmp) 619 | end 620 | 621 | private 622 | 623 | def check_command(command, algo = nil) 624 | require 'open3' 625 | 626 | algo = command if algo.nil? 627 | begin 628 | Open3.capture3("#{command} -V") 629 | rescue Errno::ENOENT 630 | raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression" 631 | end 632 | end 633 | end 634 | 635 | class GzipCompressor < Compressor 636 | def ext 637 | 'gz'.freeze 638 | end 639 | 640 | def content_type 641 | 'application/x-gzip'.freeze 642 | end 643 | 644 | def compress(chunk, tmp) 645 | w = Zlib::GzipWriter.new(tmp) 646 | chunk.write_to(w) 647 | w.finish 648 | ensure 649 | w.finish rescue nil 650 | end 651 | end 652 | 653 | class TextCompressor < Compressor 654 | def ext 655 | 'txt'.freeze 656 | end 657 | 658 | def content_type 659 | 'text/plain'.freeze 660 | end 661 | 662 | def compress(chunk, tmp) 663 | chunk.write_to(tmp) 664 | end 665 | end 666 | 667 | class JsonCompressor < TextCompressor 668 | def ext 669 | 'json'.freeze 670 | end 671 | 672 | def content_type 673 | 'application/json'.freeze 674 | end 675 | end 676 | 677 | COMPRESSOR_REGISTRY = Fluent::Registry.new(:s3_compressor_type, 'fluent/plugin/s3_compressor_') 678 | { 679 | 'gzip' => GzipCompressor, 680 | 'json' => JsonCompressor, 681 | 'text' => TextCompressor 682 | }.each { |name, compressor| 683 | COMPRESSOR_REGISTRY.register(name, compressor) 684 | } 685 | 686 | def self.register_compressor(name, compressor) 687 | COMPRESSOR_REGISTRY.register(name, compressor) 688 | end 689 | end 690 | end 691 | -------------------------------------------------------------------------------- /lib/fluent/plugin/s3_compressor_gzip_command.rb: -------------------------------------------------------------------------------- 1 | module Fluent::Plugin 2 | class S3Output 3 | class GzipCommandCompressor < Compressor 4 | S3Output.register_compressor('gzip_command', self) 5 | 6 | config_param :command_parameter, :string, default: '' 7 | 8 | def configure(conf) 9 | super 10 | check_command('gzip') 11 | end 12 | 13 | def ext 14 | 'gz'.freeze 15 | end 16 | 17 | def content_type 18 | 'application/x-gzip'.freeze 19 | end 20 | 21 | def compress(chunk, tmp) 22 | chunk_is_file = @buffer_type == 'file' 23 | path = if chunk_is_file 24 | chunk.path 25 | else 26 | w = Tempfile.new("chunk-gzip-tmp") 27 | w.binmode 28 | chunk.write_to(w) 29 | w.close 30 | w.path 31 | end 32 | 33 | res = system "gzip #{@command_parameter} -c #{path} > #{tmp.path}" 34 | unless res 35 | log.warn "failed to execute gzip command. Fallback to GzipWriter. status = #{$?}" 36 | begin 37 | tmp.truncate(0) 38 | gw = Zlib::GzipWriter.new(tmp) 39 | chunk.write_to(gw) 40 | gw.close 41 | ensure 42 | gw.close rescue nil 43 | end 44 | end 45 | ensure 46 | unless chunk_is_file 47 | w.close(true) rescue nil 48 | end 49 | end 50 | end 51 | end 52 | end 53 | -------------------------------------------------------------------------------- /lib/fluent/plugin/s3_compressor_lzma2.rb: -------------------------------------------------------------------------------- 1 | module Fluent::Plugin 2 | class S3Output 3 | class LZMA2Compressor < Compressor 4 | S3Output.register_compressor('lzma2', self) 5 | 6 | config_param :command_parameter, :string, default: '-qf0' 7 | 8 | def configure(conf) 9 | super 10 | check_command('xz', 'LZMA2') 11 | end 12 | 13 | def ext 14 | 'xz'.freeze 15 | end 16 | 17 | def content_type 18 | 'application/x-xz'.freeze 19 | end 20 | 21 | def compress(chunk, tmp) 22 | w = Tempfile.new("chunk-xz-tmp") 23 | w.binmode 24 | chunk.write_to(w) 25 | w.close 26 | 27 | # We don't check the return code because we can't recover lzop failure. 28 | system "xz #{@command_parameter} -c #{w.path} > #{tmp.path}" 29 | ensure 30 | w.close rescue nil 31 | w.unlink rescue nil 32 | end 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/fluent/plugin/s3_compressor_lzo.rb: -------------------------------------------------------------------------------- 1 | module Fluent::Plugin 2 | class S3Output 3 | class LZOCompressor < Compressor 4 | S3Output.register_compressor('lzo', self) 5 | 6 | config_param :command_parameter, :string, default: '-qf1' 7 | 8 | def configure(conf) 9 | super 10 | check_command('lzop', 'LZO') 11 | end 12 | 13 | def ext 14 | 'lzo'.freeze 15 | end 16 | 17 | def content_type 18 | 'application/x-lzop'.freeze 19 | end 20 | 21 | def compress(chunk, tmp) 22 | w = Tempfile.new("chunk-tmp") 23 | w.binmode 24 | chunk.write_to(w) 25 | w.close 26 | 27 | # We don't check the return code because we can't recover lzop failure. 28 | system "lzop #{@command_parameter} -o #{tmp.path} #{w.path}" 29 | ensure 30 | w.close rescue nil 31 | w.unlink rescue nil 32 | end 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/fluent/plugin/s3_compressor_parquet.rb: -------------------------------------------------------------------------------- 1 | require "open3" 2 | 3 | module Fluent::Plugin 4 | class S3Output 5 | class ParquetCompressor < Compressor 6 | S3Output.register_compressor("parquet", self) 7 | 8 | config_section :compress, multi: false do 9 | desc "parquet compression codec" 10 | config_param :parquet_compression_codec, :enum, list: [:uncompressed, :snappy, :gzip, :lzo, :brotli, :lz4, :zstd], default: :snappy 11 | desc "parquet file page size" 12 | config_param :parquet_page_size, :size, default: 8192 13 | desc "parquet file row group size" 14 | config_param :parquet_row_group_size, :size, default: 128 * 1024 * 1024 15 | desc "record data format type" 16 | config_param :record_type, :enum, list: [:avro, :csv, :jsonl, :msgpack, :tsv, :json], default: :msgpack 17 | desc "schema type" 18 | config_param :schema_type, :enum, list: [:avro, :bigquery], default: :avro 19 | desc "path to schema file" 20 | config_param :schema_file, :string 21 | end 22 | 23 | def configure(conf) 24 | super 25 | check_command("columnify", "-h") 26 | 27 | if [:lzo, :brotli, :lz4].include?(@compress.parquet_compression_codec) 28 | raise Fluent::ConfigError, "unsupported compression codec: #{@compress.parquet_compression_codec}" 29 | end 30 | 31 | @parquet_compression_codec = @compress.parquet_compression_codec.to_s.upcase 32 | if @compress.record_type == :json 33 | @record_type = :jsonl 34 | else 35 | @record_type = @compress.record_type 36 | end 37 | end 38 | 39 | def ext 40 | "parquet".freeze 41 | end 42 | 43 | def content_type 44 | "application/octet-stream".freeze 45 | end 46 | 47 | def compress(chunk, tmp) 48 | chunk_is_file = @buffer_type == "file" 49 | path = if chunk_is_file 50 | chunk.path 51 | else 52 | w = Tempfile.new("chunk-parquet-tmp") 53 | w.binmode 54 | chunk.write_to(w) 55 | w.close 56 | w.path 57 | end 58 | stdout, stderr, status = columnify(path, tmp.path) 59 | unless status.success? 60 | raise Fluent::UnrecoverableError, "failed to execute columnify command. stdout=#{stdout} stderr=#{stderr} status=#{status.inspect}" 61 | end 62 | ensure 63 | unless chunk_is_file 64 | w.close(true) rescue nil 65 | end 66 | end 67 | 68 | private 69 | 70 | def columnify(src_path, dst_path) 71 | Open3.capture3("columnify", 72 | "-parquetCompressionCodec", @parquet_compression_codec, 73 | "-parquetPageSize", @compress.parquet_page_size.to_s, 74 | "-parquetRowGroupSize", @compress.parquet_row_group_size.to_s, 75 | "-recordType", @record_type.to_s, 76 | "-schemaType", @compress.schema_type.to_s, 77 | "-schemaFile", @compress.schema_file, 78 | "-output", dst_path, 79 | src_path) 80 | end 81 | end 82 | end 83 | end 84 | -------------------------------------------------------------------------------- /lib/fluent/plugin/s3_compressor_zstd.rb: -------------------------------------------------------------------------------- 1 | module Fluent::Plugin 2 | class S3Output 3 | class ZstdCompressor < Compressor 4 | S3Output.register_compressor('zstd', self) 5 | 6 | config_section :compress, param_name: :compress_config, init: true, multi: false do 7 | desc "Compression level for zstd (1-22)" 8 | config_param :level, :integer, default: 3 9 | end 10 | 11 | def initialize(opts = {}) 12 | super 13 | require 'zstd-ruby' 14 | rescue LoadError => e 15 | log.error "failed to load zstd-ruby gem. You need to manually install 'zstd-ruby' gem to use 'zstd'.", error: e.message 16 | raise Fluent::ConfigError, "failed to load 'zstd-ruby' gem" 17 | end 18 | 19 | def ext 20 | 'zst'.freeze 21 | end 22 | 23 | def content_type 24 | 'application/x-zst'.freeze 25 | end 26 | 27 | def compress(chunk, tmp) 28 | compressed = Zstd.compress(chunk.read, level: @compress_config.level) 29 | tmp.write(compressed) 30 | rescue => e 31 | log.warn "zstd compression failed: #{e.message}" 32 | raise 33 | end 34 | end 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /lib/fluent/plugin/s3_extractor_gzip_command.rb: -------------------------------------------------------------------------------- 1 | module Fluent::Plugin 2 | class S3Input 3 | class GzipCommandExtractor < Extractor 4 | S3Input.register_extractor('gzip_command', self) 5 | 6 | config_param :command_parameter, :string, default: '-dc' 7 | 8 | def configure(conf) 9 | super 10 | check_command('gzip') 11 | end 12 | 13 | def ext 14 | 'gz'.freeze 15 | end 16 | 17 | def content_type 18 | 'application/x-gzip'.freeze 19 | end 20 | 21 | def extract(io) 22 | path = if io.respond_to?(:path) 23 | io.path 24 | else 25 | temp = Tempfile.new("gzip-temp") 26 | temp.write(io.read) 27 | temp.close 28 | temp.path 29 | end 30 | 31 | stdout, succeeded = Open3.capture2("gzip #{@command_parameter} #{path}") 32 | if succeeded 33 | stdout 34 | else 35 | log.warn "failed to execute gzip command. Fallback to GzipReader. status = #{succeeded}" 36 | begin 37 | io.rewind 38 | Zlib::GzipReader.wrap(io) do |gz| 39 | gz.read 40 | end 41 | end 42 | end 43 | end 44 | end 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /lib/fluent/plugin/s3_extractor_lzma2.rb: -------------------------------------------------------------------------------- 1 | module Fluent::Plugin 2 | class S3Input 3 | class LZMA2Extractor < Extractor 4 | S3Input.register_extractor('lzma2', self) 5 | 6 | config_param :command_parameter, :string, default: '-qdc' 7 | 8 | def configure(conf) 9 | super 10 | check_command('xz', 'LZMA') 11 | end 12 | 13 | def ext 14 | 'xz'.freeze 15 | end 16 | 17 | def content_type 18 | 'application/x-xz'.freeze 19 | end 20 | 21 | def extract(io) 22 | path = if io.respond_to?(path) 23 | io.path 24 | else 25 | temp = Tempfile.new("xz-temp") 26 | temp.write(io.read) 27 | temp.close 28 | temp.path 29 | end 30 | 31 | stdout, succeeded = Open3.capture2("xz #{@command_parameter} #{path}") 32 | if succeeded 33 | stdout 34 | else 35 | raise "Failed to extract #{path} with xz command." 36 | end 37 | end 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /lib/fluent/plugin/s3_extractor_lzo.rb: -------------------------------------------------------------------------------- 1 | module Fluent::Plugin 2 | class S3Input 3 | class LZOExtractor < Extractor 4 | S3Input.register_extractor('lzo', self) 5 | 6 | config_param :command_parameter, :string, default: '-qdc' 7 | 8 | def configure(conf) 9 | super 10 | check_command('lzop', 'LZO') 11 | end 12 | 13 | def ext 14 | 'lzo'.freeze 15 | end 16 | 17 | def content_type 18 | 'application/x-lzop'.freeze 19 | end 20 | 21 | def extract(io) 22 | path = if io.respond_to?(path) 23 | io.path 24 | else 25 | temp = Tempfile.new("lzop-temp") 26 | temp.write(io.read) 27 | temp.close 28 | temp.path 29 | end 30 | 31 | stdout, succeeded = Open3.capture2("lzop #{@command_parameter} #{path}") 32 | if succeeded 33 | stdout 34 | else 35 | raise "Failed to extract #{path} with lzop command." 36 | end 37 | end 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /test/test_in_s3.rb: -------------------------------------------------------------------------------- 1 | require 'aws-sdk-s3' 2 | require 'aws-sdk-sqs' 3 | require 'aws-sdk-sqs/queue_poller' 4 | 5 | require 'fluent/test' 6 | require 'fluent/test/helpers' 7 | require 'fluent/test/log' 8 | require 'fluent/test/driver/input' 9 | require 'fluent/plugin/in_s3' 10 | 11 | require 'test/unit/rr' 12 | require 'zlib' 13 | require 'fileutils' 14 | require 'ostruct' 15 | 16 | include Fluent::Test::Helpers 17 | 18 | class S3InputTest < Test::Unit::TestCase 19 | def setup 20 | Fluent::Test.setup 21 | @time = event_time("2015-09-30 13:14:15 UTC") 22 | Fluent::Engine.now = @time 23 | if Fluent.const_defined?(:EventTime) 24 | stub(Fluent::EventTime).now { @time } 25 | end 26 | end 27 | 28 | CONFIG = %[ 29 | aws_key_id test_key_id 30 | aws_sec_key test_sec_key 31 | s3_bucket test_bucket 32 | buffer_type memory 33 | 34 | queue_name test_queue 35 | queue_owner_aws_account_id 123456789123 36 | 37 | ] 38 | 39 | def create_driver(conf = CONFIG) 40 | Fluent::Test::Driver::Input.new(Fluent::Plugin::S3Input).configure(conf) 41 | end 42 | 43 | class ConfigTest < self 44 | def test_default 45 | d = create_driver 46 | extractor = d.instance.instance_variable_get(:@extractor) 47 | actual = { 48 | aws_key_id: d.instance.aws_key_id, 49 | aws_sec_key: d.instance.aws_sec_key, 50 | s3_bucket: d.instance.s3_bucket, 51 | s3_region: d.instance.s3_region, 52 | sqs_queue_name: d.instance.sqs.queue_name, 53 | extractor_ext: extractor.ext, 54 | extractor_content_type: extractor.content_type 55 | } 56 | expected = { 57 | aws_key_id: "test_key_id", 58 | aws_sec_key: "test_sec_key", 59 | s3_bucket: "test_bucket", 60 | s3_region: "us-east-1", 61 | sqs_queue_name: "test_queue", 62 | extractor_ext: "gz", 63 | extractor_content_type: "application/x-gzip" 64 | } 65 | assert_equal(expected, actual) 66 | end 67 | 68 | def test_empty 69 | assert_raise(Fluent::ConfigError) do 70 | create_driver("") 71 | end 72 | end 73 | 74 | def test_without_sqs_section 75 | conf = %[ 76 | aws_key_id test_key_id 77 | aws_sec_key test_sec_key 78 | s3_bucket test_bucket 79 | ] 80 | assert_raise_message("'' sections are required") do 81 | create_driver(conf) 82 | end 83 | end 84 | 85 | def test_unknown_store_as 86 | config = CONFIG + "\nstore_as unknown" 87 | assert_raise(Fluent::NotFoundPluginError) do 88 | create_driver(config) 89 | end 90 | end 91 | 92 | data("json" => ["json", "json", "application/json"], 93 | "text" => ["text", "txt", "text/plain"], 94 | "gzip" => ["gzip", "gz", "application/x-gzip"], 95 | "gzip_command" => ["gzip_command", "gz", "application/x-gzip"], 96 | "lzo" => ["lzo", "lzo", "application/x-lzop"], 97 | "lzma2" => ["lzma2", "xz", "application/x-xz"]) 98 | def test_extractor(data) 99 | store_type, ext, content_type = data 100 | config = CONFIG + "\nstore_as #{store_type}\n" 101 | d = create_driver(config) 102 | extractor = d.instance.instance_variable_get(:@extractor) 103 | expected = { 104 | ext: ext, 105 | content_type: content_type 106 | } 107 | actual = { 108 | ext: extractor.ext, 109 | content_type: extractor.content_type 110 | } 111 | assert_equal(expected, actual) 112 | rescue Fluent::ConfigError => e 113 | pend(e.message) 114 | end 115 | end 116 | 117 | 118 | data('Normal endpoint' => 'riak-cs.example.com', 119 | 'VPCE endpoint' => 'vpce.amazonaws.com', 120 | 'FIPS endpoint' => 'fips.xxx.amazonaws.com', 121 | 'GOV endpoint' => 'gov.xxx.amazonaws.com') 122 | def test_s3_endpoint_with_valid_endpoint(endpoint) 123 | d = create_driver(CONFIG + "s3_endpoint #{endpoint}") 124 | assert_equal endpoint, d.instance.s3_endpoint 125 | end 126 | 127 | data('US West (Oregon)' => 's3-us-west-2.amazonaws.com', 128 | 'EU (Frankfurt)' => 's3.eu-central-1.amazonaws.com', 129 | 'Asia Pacific (Tokyo)' => 's3-ap-northeast-1.amazonaws.com', 130 | 'Invalid VPCE' => 'vpce.xxx.amazonaws.com') 131 | def test_s3_endpoint_with_invalid_endpoint(endpoint) 132 | assert_raise(Fluent::ConfigError, "s3_endpoint parameter is not supported, use s3_region instead. This parameter is for S3 compatible services") { 133 | create_driver(CONFIG + "s3_endpoint #{endpoint}") 134 | } 135 | end 136 | 137 | data('US West (Oregon)' => 's3-us-west-2.amazonaws.com', 138 | 'EU (Frankfurt)' => 's3.eu-central-1.amazonaws.com', 139 | 'Asia Pacific (Tokyo)' => 's3-ap-northeast-1.amazonaws.com') 140 | def test_sqs_endpoint_with_invalid_endpoint(endpoint) 141 | assert_raise(Fluent::ConfigError, "sqs.endpoint parameter is not supported, use s3_region instead. This parameter is for SQS compatible services") { 142 | conf = <<"EOS" 143 | aws_key_id test_key_id 144 | aws_sec_key test_sec_key 145 | s3_bucket test_bucket 146 | buffer_type memory 147 | 148 | queue_name test_queue 149 | endpoint #{endpoint} 150 | 151 | EOS 152 | create_driver(conf) 153 | } 154 | end 155 | 156 | def test_sqs_with_invalid_keys_missing_secret_key 157 | assert_raise(Fluent::ConfigError, "sqs/aws_key_id or sqs/aws_sec_key is missing") { 158 | conf = <<"EOS" 159 | aws_key_id test_key_id 160 | aws_sec_key test_sec_key 161 | s3_bucket test_bucket 162 | buffer_type memory 163 | 164 | queue_name test_queue 165 | endpoint eu-west-1 166 | aws_key_id sqs_test_key_id 167 | 168 | EOS 169 | create_driver(conf) 170 | } 171 | end 172 | 173 | def test_sqs_with_invalid_aws_keys_missing_key_id 174 | assert_raise(Fluent::ConfigError, "sqs/aws_key_id or sqs/aws_sec_key is missing") { 175 | conf = <<"EOS" 176 | aws_key_id test_key_id 177 | aws_sec_key test_sec_key 178 | s3_bucket test_bucket 179 | buffer_type memory 180 | 181 | queue_name test_queue 182 | endpoint eu-west-1 183 | aws_sec_key sqs_test_sec_key 184 | 185 | EOS 186 | create_driver(conf) 187 | } 188 | end 189 | 190 | def test_sqs_with_valid_aws_keys_complete_pair 191 | conf = <<"EOS" 192 | aws_key_id test_key_id 193 | aws_sec_key test_sec_key 194 | s3_bucket test_bucket 195 | buffer_type memory 196 | 197 | queue_name test_queue 198 | endpoint eu-west-1 199 | aws_key_id sqs_test_key_id 200 | aws_sec_key sqs_test_sec_key 201 | 202 | EOS 203 | d = create_driver(conf) 204 | assert_equal 'sqs_test_key_id', d.instance.sqs.aws_key_id 205 | assert_equal 'sqs_test_sec_key', d.instance.sqs.aws_sec_key 206 | end 207 | 208 | def test_with_invalid_aws_keys_missing_secret_key 209 | assert_raise(Fluent::ConfigError, "aws_key_id or aws_sec_key is missing") { 210 | conf = <<"EOS" 211 | aws_key_id test_key_id 212 | s3_bucket test_bucket 213 | buffer_type memory 214 | 215 | queue_name test_queue 216 | endpoint eu-west-1 217 | 218 | EOS 219 | create_driver(conf) 220 | } 221 | end 222 | 223 | def test_with_invalid_aws_keys_missing_key_id 224 | assert_raise(Fluent::ConfigError, "aws_key_id or aws_sec_key is missing") { 225 | conf = <<"EOS" 226 | aws_sec_key test_sec_key 227 | s3_bucket test_bucket 228 | buffer_type memory 229 | 230 | queue_name test_queue 231 | endpoint eu-west-1 232 | 233 | EOS 234 | create_driver(conf) 235 | } 236 | end 237 | 238 | def test_with_valid_aws_keys_complete_pair 239 | conf = <<"EOS" 240 | aws_key_id test_key_id 241 | aws_sec_key test_sec_key 242 | s3_bucket test_bucket 243 | buffer_type memory 244 | 245 | queue_name test_queue 246 | endpoint eu-west-1 247 | 248 | EOS 249 | d = create_driver(conf) 250 | assert_equal 'test_key_id', d.instance.aws_key_id 251 | assert_equal 'test_sec_key', d.instance.aws_sec_key 252 | end 253 | 254 | Struct.new("StubResponse", :queue_url) 255 | Struct.new("StubMessage", :message_id, :receipt_handle, :body) 256 | 257 | def setup_mocks 258 | @s3_client = stub(Aws::S3::Client.new(stub_responses: true)) 259 | stub(@s3_client).config { OpenStruct.new({region: "us-east-1"}) } 260 | mock(Aws::S3::Client).new(anything).at_least(0) { @s3_client } 261 | @s3_resource = mock(Aws::S3::Resource.new(client: @s3_client)) 262 | mock(Aws::S3::Resource).new(client: @s3_client) { @s3_resource } 263 | @s3_bucket = mock(Aws::S3::Bucket.new(name: "test", 264 | client: @s3_client)) 265 | @s3_bucket.exists? { true } 266 | @s3_resource.bucket(anything) { @s3_bucket } 267 | 268 | test_queue_url = "http://example.com/test_queue" 269 | @sqs_client = stub(Aws::SQS::Client.new(stub_responses: true)) 270 | @sqs_response = stub(Struct::StubResponse.new(test_queue_url)) 271 | @sqs_client.get_queue_url(queue_name: "test_queue", queue_owner_aws_account_id: "123456789123"){ @sqs_response } 272 | mock(Aws::SQS::Client).new(anything).once { @sqs_client } 273 | @real_poller = Aws::SQS::QueuePoller.new(test_queue_url, client: @sqs_client) 274 | @sqs_poller = stub(@real_poller) 275 | mock(Aws::SQS::QueuePoller).new(anything, client: @sqs_client) { @sqs_poller } 276 | end 277 | 278 | def test_no_records 279 | setup_mocks 280 | d = create_driver(CONFIG + "\ncheck_apikey_on_start false\n") 281 | mock(d.instance).process(anything).never 282 | 283 | message = Struct::StubMessage.new(1, 1, "{}") 284 | @sqs_poller.get_messages(anything, anything) do |config, stats| 285 | config.before_request.call(stats) if config.before_request 286 | stats.request_count += 1 287 | if stats.request_count > 1 288 | d.instance.instance_variable_set(:@running, false) 289 | end 290 | [message] 291 | end 292 | assert_nothing_raised do 293 | d.run {} 294 | end 295 | end 296 | 297 | def test_one_record 298 | setup_mocks 299 | d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\n") 300 | 301 | s3_object = stub(Object.new) 302 | s3_response = stub(Object.new) 303 | s3_response.body { StringIO.new("aaa") } 304 | s3_object.get { s3_response } 305 | @s3_bucket.object(anything).at_least(1) { s3_object } 306 | 307 | body = { 308 | "Records" => [ 309 | { 310 | "s3" => { 311 | "object" => { 312 | "key" => "test_key" 313 | } 314 | } 315 | } 316 | ] 317 | } 318 | message = Struct::StubMessage.new(1, 1, Yajl.dump(body)) 319 | @sqs_poller.get_messages(anything, anything) do |config, stats| 320 | config.before_request.call(stats) if config.before_request 321 | stats.request_count += 1 322 | if stats.request_count >= 1 323 | d.instance.instance_variable_set(:@running, false) 324 | end 325 | [message] 326 | end 327 | d.run(expect_emits: 1) 328 | events = d.events 329 | assert_equal({ "message" => "aaa" }, events.first[2]) 330 | end 331 | 332 | def test_one_record_with_metadata 333 | setup_mocks 334 | d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\nadd_object_metadata true\n") 335 | 336 | s3_object = stub(Object.new) 337 | s3_response = stub(Object.new) 338 | s3_response.body { StringIO.new("aaa") } 339 | s3_object.get { s3_response } 340 | @s3_bucket.object(anything).at_least(1) { s3_object } 341 | 342 | body = { 343 | "Records" => [ 344 | { 345 | "s3" => { 346 | "object" => { 347 | "key" => "test_key" 348 | } 349 | } 350 | } 351 | ] 352 | } 353 | message = Struct::StubMessage.new(1, 1, Yajl.dump(body)) 354 | @sqs_poller.get_messages(anything, anything) do |config, stats| 355 | config.before_request.call(stats) if config.before_request 356 | stats.request_count += 1 357 | if stats.request_count >= 1 358 | d.instance.instance_variable_set(:@running, false) 359 | end 360 | [message] 361 | end 362 | d.run(expect_emits: 1) 363 | events = d.events 364 | assert_equal({ "s3_bucket" => "test_bucket", "s3_key" => "test_key", "message" => "aaa" }, events.first[2]) 365 | end 366 | 367 | def test_one_record_url_encoded 368 | setup_mocks 369 | d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\n") 370 | 371 | s3_object = stub(Object.new) 372 | s3_response = stub(Object.new) 373 | s3_response.body { StringIO.new("aaa") } 374 | s3_object.get { s3_response } 375 | @s3_bucket.object('test key').at_least(1) { s3_object } 376 | 377 | body = { 378 | "Records" => [ 379 | { 380 | "s3" => { 381 | "object" => { 382 | "key" => "test+key" 383 | } 384 | } 385 | } 386 | ] 387 | } 388 | message = Struct::StubMessage.new(1, 1, Yajl.dump(body)) 389 | @sqs_poller.get_messages(anything, anything) do |config, stats| 390 | config.before_request.call(stats) if config.before_request 391 | stats.request_count += 1 392 | if stats.request_count >= 1 393 | d.instance.instance_variable_set(:@running, false) 394 | end 395 | [message] 396 | end 397 | d.run(expect_emits: 1) 398 | events = d.events 399 | assert_equal({ "message" => "aaa" }, events.first[2]) 400 | end 401 | 402 | def test_one_record_url_encoded_with_metadata 403 | setup_mocks 404 | d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\nadd_object_metadata true") 405 | 406 | s3_object = stub(Object.new) 407 | s3_response = stub(Object.new) 408 | s3_response.body { StringIO.new("aaa") } 409 | s3_object.get { s3_response } 410 | @s3_bucket.object('test key').at_least(1) { s3_object } 411 | 412 | body = { 413 | "Records" => [ 414 | { 415 | "s3" => { 416 | "object" => { 417 | "key" => "test+key" 418 | } 419 | } 420 | } 421 | ] 422 | } 423 | message = Struct::StubMessage.new(1, 1, Yajl.dump(body)) 424 | @sqs_poller.get_messages(anything, anything) do |config, stats| 425 | config.before_request.call(stats) if config.before_request 426 | stats.request_count += 1 427 | if stats.request_count >= 1 428 | d.instance.instance_variable_set(:@running, false) 429 | end 430 | [message] 431 | end 432 | d.run(expect_emits: 1) 433 | events = d.events 434 | assert_equal({ "s3_bucket" => "test_bucket", "s3_key" => "test+key", "message" => "aaa" }, events.first[2]) 435 | end 436 | 437 | def test_one_record_multi_line 438 | setup_mocks 439 | d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\n") 440 | 441 | s3_object = stub(Object.new) 442 | s3_response = stub(Object.new) 443 | s3_response.body { StringIO.new("aaa\nbbb\nccc\n") } 444 | s3_object.get { s3_response } 445 | @s3_bucket.object(anything).at_least(1) { s3_object } 446 | 447 | body = { 448 | "Records" => [ 449 | { 450 | "s3" => { 451 | "object" => { 452 | "key" => "test_key" 453 | } 454 | } 455 | } 456 | ] 457 | } 458 | message = Struct::StubMessage.new(1, 1, Yajl.dump(body)) 459 | @sqs_poller.get_messages(anything, anything) do |config, stats| 460 | config.before_request.call(stats) if config.before_request 461 | stats.request_count += 1 462 | if stats.request_count >= 1 463 | d.instance.instance_variable_set(:@running, false) 464 | end 465 | [message] 466 | end 467 | d.run(expect_emits: 1) 468 | events = d.events 469 | expected_records = [ 470 | { "message" => "aaa\n" }, 471 | { "message" => "bbb\n" }, 472 | { "message" => "ccc\n" } 473 | ] 474 | assert_equal(expected_records, events.map {|_tag, _time, record| record }) 475 | end 476 | 477 | def test_one_record_multi_line_with_metadata 478 | setup_mocks 479 | d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\nadd_object_metadata true") 480 | 481 | s3_object = stub(Object.new) 482 | s3_response = stub(Object.new) 483 | s3_response.body { StringIO.new("aaa\nbbb\nccc\n") } 484 | s3_object.get { s3_response } 485 | @s3_bucket.object(anything).at_least(1) { s3_object } 486 | 487 | body = { 488 | "Records" => [ 489 | { 490 | "s3" => { 491 | "object" => { 492 | "key" => "test_key" 493 | } 494 | } 495 | } 496 | ] 497 | } 498 | message = Struct::StubMessage.new(1, 1, Yajl.dump(body)) 499 | @sqs_poller.get_messages(anything, anything) do |config, stats| 500 | config.before_request.call(stats) if config.before_request 501 | stats.request_count += 1 502 | if stats.request_count >= 1 503 | d.instance.instance_variable_set(:@running, false) 504 | end 505 | [message] 506 | end 507 | d.run(expect_emits: 1) 508 | events = d.events 509 | expected_records = [ 510 | { "s3_bucket" => "test_bucket", "s3_key" => "test_key", "message" => "aaa\n" }, 511 | { "s3_bucket" => "test_bucket", "s3_key" => "test_key", "message" => "bbb\n" }, 512 | { "s3_bucket" => "test_bucket", "s3_key" => "test_key", "message" => "ccc\n" } 513 | ] 514 | assert_equal(expected_records, events.map {|_tag, _time, record| record }) 515 | end 516 | 517 | def test_gzip_single_stream 518 | setup_mocks 519 | d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as gzip\nformat none\n") 520 | 521 | s3_object = stub(Object.new) 522 | s3_response = stub(Object.new) 523 | s3_response.body { 524 | io = StringIO.new 525 | Zlib::GzipWriter.wrap(io) do |gz| 526 | gz.write "aaa\nbbb\n" 527 | gz.finish 528 | end 529 | io.rewind 530 | io 531 | } 532 | s3_object.get { s3_response } 533 | @s3_bucket.object(anything).at_least(1) { s3_object } 534 | 535 | body = { 536 | "Records" => [ 537 | { 538 | "s3" => { 539 | "object" => { 540 | "key" => "test_key" 541 | } 542 | } 543 | } 544 | ] 545 | } 546 | message = Struct::StubMessage.new(1, 1, Yajl.dump(body)) 547 | @sqs_poller.get_messages(anything, anything) do |config, stats| 548 | config.before_request.call(stats) if config.before_request 549 | stats.request_count += 1 550 | if stats.request_count >= 1 551 | d.instance.instance_variable_set(:@running, false) 552 | end 553 | [message] 554 | end 555 | d.run(expect_emits: 1) 556 | events = d.events 557 | expected_records = [ 558 | { "message" => "aaa\n" }, 559 | { "message" => "bbb\n" } 560 | ] 561 | assert_equal(expected_records, events.map {|_tag, _time, record| record }) 562 | end 563 | 564 | def test_gzip_multiple_steams 565 | setup_mocks 566 | d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as gzip\nformat none\n") 567 | 568 | s3_object = stub(Object.new) 569 | s3_response = stub(Object.new) 570 | s3_response.body { 571 | io = StringIO.new 572 | Zlib::GzipWriter.wrap(io) do |gz| 573 | gz.write "aaa\nbbb\n" 574 | gz.finish 575 | end 576 | Zlib::GzipWriter.wrap(io) do |gz| 577 | gz.write "ccc\nddd\n" 578 | gz.finish 579 | end 580 | io.rewind 581 | io 582 | } 583 | s3_object.get { s3_response } 584 | @s3_bucket.object(anything).at_least(1) { s3_object } 585 | 586 | body = { 587 | "Records" => [ 588 | { 589 | "s3" => { 590 | "object" => { 591 | "key" => "test_key" 592 | } 593 | } 594 | } 595 | ] 596 | } 597 | message = Struct::StubMessage.new(1, 1, Yajl.dump(body)) 598 | @sqs_poller.get_messages(anything, anything) do |config, stats| 599 | config.before_request.call(stats) if config.before_request 600 | stats.request_count += 1 601 | if stats.request_count >= 1 602 | d.instance.instance_variable_set(:@running, false) 603 | end 604 | [message] 605 | end 606 | d.run(expect_emits: 1) 607 | events = d.events 608 | expected_records = [ 609 | { "message" => "aaa\n" }, 610 | { "message" => "bbb\n" }, 611 | { "message" => "ccc\n" }, 612 | { "message" => "ddd\n" } 613 | ] 614 | assert_equal(expected_records, events.map {|_tag, _time, record| record }) 615 | end 616 | 617 | def test_regexp_matching 618 | setup_mocks 619 | d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\nmatch_regexp .*_key?") 620 | 621 | s3_object = stub(Object.new) 622 | s3_response = stub(Object.new) 623 | s3_response.body { StringIO.new("aaa bbb ccc") } 624 | s3_object.get { s3_response } 625 | @s3_bucket.object(anything).at_least(1) { s3_object } 626 | 627 | body = { 628 | "Records" => [ 629 | { 630 | "s3" => { 631 | "object" => { 632 | "key" => "test_key" 633 | } 634 | } 635 | } 636 | ] 637 | } 638 | message = Struct::StubMessage.new(1, 1, Yajl.dump(body)) 639 | @sqs_poller.get_messages(anything, anything) do |config, stats| 640 | config.before_request.call(stats) if config.before_request 641 | stats.request_count += 1 642 | if stats.request_count >= 1 643 | d.instance.instance_variable_set(:@running, false) 644 | end 645 | [message] 646 | end 647 | d.run(expect_emits: 1) 648 | events = d.events 649 | assert_equal({ "message" => "aaa bbb ccc" }, events.first[2]) 650 | end 651 | 652 | def test_regexp_not_matching 653 | setup_mocks 654 | d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\nmatch_regexp live?_key") 655 | 656 | body = { 657 | "Records" => [ 658 | { 659 | "s3" => { 660 | "object" => { 661 | "key" => "test_key" 662 | } 663 | } 664 | } 665 | ] 666 | } 667 | message = Struct::StubMessage.new(1, 1, Yajl.dump(body)) 668 | @sqs_poller.get_messages(anything, anything) do |config, stats| 669 | config.before_request.call(stats) if config.before_request 670 | stats.request_count += 1 671 | if stats.request_count >= 1 672 | d.instance.instance_variable_set(:@running, false) 673 | end 674 | [message] 675 | end 676 | assert_nothing_raised do 677 | d.run {} 678 | end 679 | end 680 | 681 | def test_event_bridge_mode 682 | setup_mocks 683 | d = create_driver(" 684 | aws_key_id test_key_id 685 | aws_sec_key test_sec_key 686 | s3_bucket test_bucket 687 | buffer_type memory 688 | check_apikey_on_start false 689 | store_as text 690 | format none 691 | 692 | event_bridge_mode true 693 | queue_name test_queue 694 | queue_owner_aws_account_id 123456789123 695 | 696 | ") 697 | 698 | s3_object = stub(Object.new) 699 | s3_response = stub(Object.new) 700 | s3_response.body { StringIO.new("aaa") } 701 | s3_object.get { s3_response } 702 | @s3_bucket.object(anything).at_least(1) { s3_object } 703 | 704 | body = { 705 | "detail" => { 706 | "object" => { 707 | "key" => "test_key" 708 | } 709 | } 710 | } 711 | 712 | message = Struct::StubMessage.new(1, 1, Yajl.dump(body)) 713 | @sqs_poller.get_messages(anything, anything) do |config, stats| 714 | config.before_request.call(stats) if config.before_request 715 | stats.request_count += 1 716 | if stats.request_count >= 1 717 | d.instance.instance_variable_set(:@running, false) 718 | end 719 | [message] 720 | end 721 | d.run(expect_emits: 1) 722 | events = d.events 723 | assert_equal({ "message" => "aaa" }, events.first[2]) 724 | end 725 | 726 | end 727 | -------------------------------------------------------------------------------- /test/test_out_s3.rb: -------------------------------------------------------------------------------- 1 | require 'fluent/test' 2 | require 'fluent/test/helpers' 3 | require 'fluent/test/log' 4 | require 'fluent/test/driver/output' 5 | require 'aws-sdk-s3' 6 | require 'fluent/plugin/out_s3' 7 | 8 | require 'test/unit/rr' 9 | require 'zlib' 10 | require 'fileutils' 11 | require 'timecop' 12 | require 'ostruct' 13 | 14 | include Fluent::Test::Helpers 15 | 16 | class S3OutputTest < Test::Unit::TestCase 17 | def setup 18 | # Fluent::Test.setup 19 | end 20 | 21 | def teardown 22 | Dir.glob('test/tmp/*').each {|file| FileUtils.rm_f(file) } 23 | end 24 | 25 | CONFIG = %[ 26 | aws_key_id test_key_id 27 | aws_sec_key test_sec_key 28 | s3_bucket test_bucket 29 | path log 30 | utc 31 | buffer_type memory 32 | time_slice_format %Y%m%d-%H 33 | ] 34 | 35 | def create_driver(conf = CONFIG) 36 | Fluent::Test::Driver::Output.new(Fluent::Plugin::S3Output) do 37 | def format(tag, time, record) 38 | super 39 | end 40 | 41 | def write(chunk) 42 | chunk.read 43 | end 44 | 45 | private 46 | 47 | def ensure_bucket 48 | end 49 | 50 | def check_apikeys 51 | end 52 | end.configure(conf) 53 | end 54 | 55 | sub_test_case "configure" do 56 | def test_configure 57 | d = create_driver 58 | assert_equal 'test_key_id', d.instance.aws_key_id 59 | assert_equal 'test_sec_key', d.instance.aws_sec_key 60 | assert_equal 'test_bucket', d.instance.s3_bucket 61 | assert_equal 'log', d.instance.path 62 | assert_equal 'gz', d.instance.instance_variable_get(:@compressor).ext 63 | assert_equal 'application/x-gzip', d.instance.instance_variable_get(:@compressor).content_type 64 | assert_equal false, d.instance.force_path_style 65 | assert_equal nil, d.instance.compute_checksums 66 | assert_equal nil, d.instance.signature_version 67 | assert_equal true, d.instance.check_bucket 68 | assert_equal true, d.instance.check_object 69 | end 70 | 71 | def test_s3_endpoint_with_valid_endpoint 72 | d = create_driver(CONFIG + 's3_endpoint riak-cs.example.com') 73 | assert_equal 'riak-cs.example.com', d.instance.s3_endpoint 74 | end 75 | 76 | data('US West (Oregon)' => 's3-us-west-2.amazonaws.com', 77 | 'EU (Frankfurt)' => 's3.eu-central-1.amazonaws.com', 78 | 'Asia Pacific (Tokyo)' => 's3-ap-northeast-1.amazonaws.com') 79 | def test_s3_endpoint_with_invalid_endpoint(endpoint) 80 | assert_raise(Fluent::ConfigError, "s3_endpoint parameter is not supported, use s3_region instead. This parameter is for S3 compatible services") { 81 | create_driver(CONFIG + "s3_endpoint #{endpoint}") 82 | } 83 | end 84 | 85 | def test_configure_with_mime_type_json 86 | conf = CONFIG.clone 87 | conf << "\nstore_as json\n" 88 | d = create_driver(conf) 89 | assert_equal 'json', d.instance.instance_variable_get(:@compressor).ext 90 | assert_equal 'application/json', d.instance.instance_variable_get(:@compressor).content_type 91 | end 92 | 93 | def test_configure_with_mime_type_text 94 | conf = CONFIG.clone 95 | conf << "\nstore_as text\n" 96 | d = create_driver(conf) 97 | assert_equal 'txt', d.instance.instance_variable_get(:@compressor).ext 98 | assert_equal 'text/plain', d.instance.instance_variable_get(:@compressor).content_type 99 | end 100 | 101 | def test_configure_with_mime_type_lzo 102 | conf = CONFIG.clone 103 | conf << "\nstore_as lzo\n" 104 | d = create_driver(conf) 105 | assert_equal 'lzo', d.instance.instance_variable_get(:@compressor).ext 106 | assert_equal 'application/x-lzop', d.instance.instance_variable_get(:@compressor).content_type 107 | rescue => e 108 | # TODO: replace code with disable lzop command 109 | assert(e.is_a?(Fluent::ConfigError)) 110 | end 111 | 112 | data('level default' => nil, 113 | 'level 1' => 1) 114 | def test_configure_with_mime_type_zstd(level) 115 | conf = CONFIG.clone 116 | conf << "\nstore_as zstd\n" 117 | conf << "\n\nlevel #{level}\n\n" if level 118 | d = create_driver(conf) 119 | assert_equal 'zst', d.instance.instance_variable_get(:@compressor).ext 120 | assert_equal 'application/x-zst', d.instance.instance_variable_get(:@compressor).content_type 121 | assert_equal (level || 3), d.instance.instance_variable_get(:@compressor).instance_variable_get(:@compress_config).level 122 | end 123 | 124 | def test_configure_with_path_style 125 | conf = CONFIG.clone 126 | conf << "\nforce_path_style true\n" 127 | d = create_driver(conf) 128 | assert d.instance.force_path_style 129 | end 130 | 131 | def test_configure_with_compute_checksums 132 | conf = CONFIG.clone 133 | conf << "\ncompute_checksums false\n" 134 | d = create_driver(conf) 135 | assert_equal false, d.instance.compute_checksums 136 | end 137 | 138 | def test_configure_with_hex_random_length 139 | conf = CONFIG.clone 140 | assert_raise Fluent::ConfigError do 141 | create_driver(conf + "\nhex_random_length 17\n") 142 | end 143 | assert_nothing_raised do 144 | create_driver(conf + "\nhex_random_length 16\n") 145 | end 146 | end 147 | 148 | def test_configure_with_no_check_on_s3 149 | conf = CONFIG.clone 150 | conf << "\ncheck_bucket false\ncheck_object false\n" 151 | d = create_driver(conf) 152 | assert_equal false, d.instance.check_bucket 153 | assert_equal false, d.instance.check_object 154 | end 155 | 156 | def test_configure_with_grant 157 | conf = CONFIG.clone 158 | conf << "\grant_full_control id='0123456789'\ngrant_read id='1234567890'\ngrant_read_acp id='2345678901'\ngrant_write_acp id='3456789012'\n" 159 | d = create_driver(conf) 160 | assert_equal "id='0123456789'", d.instance.grant_full_control 161 | assert_equal "id='1234567890'", d.instance.grant_read 162 | assert_equal "id='2345678901'", d.instance.grant_read_acp 163 | assert_equal "id='3456789012'", d.instance.grant_write_acp 164 | end 165 | 166 | CONFIG_WITH_OBJECTKEY_DEFAULT = %[ 167 | s3_object_key_format "%{path}%{time_slice}_%{index}.%{file_extension}" 168 | aws_key_id test_key_id 169 | aws_sec_key test_sec_key 170 | s3_bucket test_bucket 171 | path log 172 | utc 173 | buffer_type memory 174 | time_slice_format %Y%m%d-%H 175 | ] 176 | 177 | CONFIG_WITH_OBJECTKEY_FIXED_FOR_MULTI_THEAD = %[ 178 | s3_object_key_format "%{path}%{time_slice}_${chunk_id}.%{file_extension}" 179 | aws_key_id test_key_id 180 | aws_sec_key test_sec_key 181 | s3_bucket test_bucket 182 | path log 183 | utc 184 | buffer_type memory 185 | time_slice_format %Y%m%d-%H 186 | ] 187 | 188 | data("non_objectkey", {"expected_warning_num" => 1, "conf" => CONFIG, "workers" => 1, "with_multi_buffers" => false}) 189 | data("non_objectkey-multi_buffer", {"expected_warning_num" => 2, "conf" => CONFIG, "workers" => 1, "with_multi_buffers" => true}) 190 | data("non_objectkey-multi_worker", {"expected_warning_num" => 2, "conf" => CONFIG, "workers" => 2, "with_multi_buffers" => false}) 191 | data("default_objectkey", {"expected_warning_num" => 0, "conf" => CONFIG_WITH_OBJECTKEY_DEFAULT, "workers" => 1, "with_multi_buffers" => false}) 192 | data("default_objectkey-multi_buffer", {"expected_warning_num" => 1, "conf" => CONFIG_WITH_OBJECTKEY_DEFAULT, "workers" => 1, "with_multi_buffers" => true}) 193 | data("default_objectkey-multi_worker", {"expected_warning_num" => 1, "conf" => CONFIG_WITH_OBJECTKEY_DEFAULT, "workers" => 2, "with_multi_buffers" => false}) 194 | data("fixed_objectkey", {"expected_warning_num" => 0, "conf" => CONFIG_WITH_OBJECTKEY_FIXED_FOR_MULTI_THEAD, "workers" => 1, "with_multi_buffers" => false}) 195 | data("fixed_objectkey-multi_buffer", {"expected_warning_num" => 0, "conf" => CONFIG_WITH_OBJECTKEY_FIXED_FOR_MULTI_THEAD, "workers" => 1, "with_multi_buffers" => true}) 196 | data("fixed_objectkey-multi_worker", {"expected_warning_num" => 0, "conf" => CONFIG_WITH_OBJECTKEY_FIXED_FOR_MULTI_THEAD, "workers" => 2, "with_multi_buffers" => false}) 197 | def test_configure_warning_on_parallel(data) 198 | conf = data["conf"].clone 199 | if data["with_multi_buffers"] 200 | conf << "\n\n@type memory\nflush_thread_count 2\n\n" 201 | end 202 | assert_rr do 203 | d = Fluent::Test::Driver::Output.new(Fluent::Plugin::S3Output, opts: {"workers": data["workers"]}) 204 | mock(d.instance.log).warn(anything).times(data["expected_warning_num"]) 205 | d.configure(conf) 206 | end 207 | end 208 | end 209 | 210 | def test_format 211 | d = create_driver 212 | 213 | time = event_time("2011-01-02 13:14:15 UTC") 214 | d.run(default_tag: "test") do 215 | d.feed(time, { "a" => 1 }) 216 | d.feed(time, { "a" => 2 }) 217 | end 218 | expected = [ 219 | %[2011-01-02T13:14:15Z\ttest\t{"a":1}\n], 220 | %[2011-01-02T13:14:15Z\ttest\t{"a":2}\n] 221 | ] 222 | assert_equal(expected, d.formatted) 223 | end 224 | 225 | def test_format_included_tag_and_time 226 | config = [CONFIG, 'include_tag_key true', 'include_time_key true'].join("\n") 227 | d = create_driver(config) 228 | 229 | time = event_time("2011-01-02 13:14:15 UTC") 230 | d.run(default_tag: "test") do 231 | d.feed(time, { "a" => 1 }) 232 | d.feed(time, { "a" => 2 }) 233 | end 234 | expected = [ 235 | %[2011-01-02T13:14:15Z\ttest\t{"a":1,"tag":"test","time":"2011-01-02T13:14:15Z"}\n], 236 | %[2011-01-02T13:14:15Z\ttest\t{"a":2,"tag":"test","time":"2011-01-02T13:14:15Z"}\n] 237 | ] 238 | assert_equal(expected, d.formatted) 239 | end 240 | 241 | def test_format_with_format_ltsv 242 | config = [CONFIG, 'format ltsv'].join("\n") 243 | d = create_driver(config) 244 | 245 | time = event_time("2011-01-02 13:14:15 UTC") 246 | d.run(default_tag: "test") do 247 | d.feed(time, {"a"=>1, "b"=>1}) 248 | d.feed(time, {"a"=>2, "b"=>2}) 249 | end 250 | expected = [ 251 | %[a:1\tb:1\n], 252 | %[a:2\tb:2\n] 253 | ] 254 | assert_equal(expected, d.formatted) 255 | end 256 | 257 | def test_format_with_format_json 258 | config = [CONFIG, 'format json'].join("\n") 259 | d = create_driver(config) 260 | 261 | time = event_time("2011-01-02 13:14:15 UTC") 262 | d.run(default_tag: "test") do 263 | d.feed(time, {"a"=>1}) 264 | d.feed(time, {"a"=>2}) 265 | end 266 | expected = [ 267 | %[{"a":1}\n], 268 | %[{"a":2}\n] 269 | ] 270 | assert_equal(expected, d.formatted) 271 | end 272 | 273 | def test_format_with_format_json_included_tag 274 | config = [CONFIG, 'format json', 'include_tag_key true'].join("\n") 275 | d = create_driver(config) 276 | 277 | time = event_time("2011-01-02 13:14:15 UTC") 278 | d.run(default_tag: "test") do 279 | d.feed(time, {"a"=>1}) 280 | d.feed(time, {"a"=>2}) 281 | end 282 | expected = [ 283 | %[{"a":1,"tag":"test"}\n], 284 | %[{"a":2,"tag":"test"}\n] 285 | ] 286 | assert_equal(expected, d.formatted) 287 | end 288 | 289 | def test_format_with_format_json_included_time 290 | config = [CONFIG, 'format json', 'include_time_key true'].join("\n") 291 | d = create_driver(config) 292 | 293 | time = event_time("2011-01-02 13:14:15 UTC") 294 | d.run(default_tag: "test") do 295 | d.feed(time, {"a"=>1}) 296 | d.feed(time, {"a"=>2}) 297 | end 298 | expected = [ 299 | %[{"a":1,"time":"2011-01-02T13:14:15Z"}\n], 300 | %[{"a":2,"time":"2011-01-02T13:14:15Z"}\n] 301 | ] 302 | assert_equal(expected, d.formatted) 303 | end 304 | 305 | def test_format_with_format_json_included_tag_and_time 306 | config = [CONFIG, 'format json', 'include_tag_key true', 'include_time_key true'].join("\n") 307 | d = create_driver(config) 308 | 309 | time = event_time("2011-01-02 13:14:15 UTC") 310 | d.run(default_tag: "test") do 311 | d.feed(time, {"a"=>1}) 312 | d.feed(time, {"a"=>2}) 313 | end 314 | expected = [ 315 | %[{"a":1,"tag":"test","time":"2011-01-02T13:14:15Z"}\n], 316 | %[{"a":2,"tag":"test","time":"2011-01-02T13:14:15Z"}\n] 317 | ] 318 | assert_equal(expected, d.formatted) 319 | end 320 | 321 | CONFIG_TIME_SLICE = <1}) 371 | d.feed(time, {"a"=>2}) 372 | end 373 | 374 | Zlib::GzipReader.open(s3_local_file_path) do |gz| 375 | data = gz.read 376 | assert_equal %[2011-01-02T13:14:15Z\ttest\t{"a":1}\n] + 377 | %[2011-01-02T13:14:15Z\ttest\t{"a":2}\n], 378 | data 379 | end 380 | FileUtils.rm_f(s3_local_file_path) 381 | end 382 | 383 | def test_write_with_custom_s3_object_key_format 384 | # Partial mock the S3Bucket, not to make an actual connection to Amazon S3 385 | setup_mocks(true) 386 | s3_local_file_path = "/tmp/s3-test.txt" 387 | setup_s3_object_mocks(s3_local_file_path: s3_local_file_path) 388 | 389 | d = create_time_sliced_driver 390 | 391 | time = event_time("2011-01-02 13:14:15 UTC") 392 | d.run(default_tag: "test") do 393 | d.feed(time, {"a"=>1}) 394 | d.feed(time, {"a"=>2}) 395 | end 396 | 397 | Zlib::GzipReader.open(s3_local_file_path) do |gz| 398 | data = gz.read 399 | assert_equal %[2011-01-02T13:14:15Z\ttest\t{"a":1}\n] + 400 | %[2011-01-02T13:14:15Z\ttest\t{"a":2}\n], 401 | data 402 | end 403 | FileUtils.rm_f(s3_local_file_path) 404 | end 405 | 406 | def test_write_with_custom_s3_object_key_format_containing_uuid_flush_placeholder 407 | 408 | # Partial mock the S3Bucket, not to make an actual connection to Amazon S3 409 | setup_mocks(true) 410 | 411 | uuid = "5755e23f-9b54-42d8-8818-2ea38c6f279e" 412 | stub(::SecureRandom).uuid{ uuid } 413 | 414 | s3_local_file_path = "/tmp/s3-test.txt" 415 | s3path = "log/events/ts=20110102-13/events_0-#{uuid}.gz" 416 | setup_s3_object_mocks(s3_local_file_path: s3_local_file_path, s3path: s3path) 417 | 418 | config = CONFIG_TIME_SLICE.gsub(/%{hostname}/,"%{uuid_flush}") 419 | d = create_time_sliced_driver(config) 420 | 421 | time = event_time("2011-01-02 13:14:15 UTC") 422 | d.run(default_tag: "test") do 423 | d.feed(time, {"a"=>1}) 424 | d.feed(time, {"a"=>2}) 425 | end 426 | 427 | Zlib::GzipReader.open(s3_local_file_path) do |gz| 428 | data = gz.read 429 | assert_equal %[2011-01-02T13:14:15Z\ttest\t{"a":1}\n] + 430 | %[2011-01-02T13:14:15Z\ttest\t{"a":2}\n], 431 | data 432 | end 433 | FileUtils.rm_f(s3_local_file_path) 434 | Dir.glob('tmp/*').each {|file| FileUtils.rm_f(file) } 435 | end 436 | 437 | # ToDo: need to test hex_random does not change on retry, but it is difficult with 438 | # the current fluentd test helper because it does not provide a way to run with the same chunks 439 | def test_write_with_custom_s3_object_key_format_containing_hex_random_placeholder 440 | unique_hex = "5226c3c4fb3d49b15226c3c4fb3d49b1" 441 | hex_random = unique_hex.reverse[0...5] 442 | 443 | config = CONFIG_TIME_SLICE.gsub(/%{hostname}/,"%{hex_random}") << "\nhex_random_length #{hex_random.length}" 444 | config = config.gsub(/buffer_type memory/, "buffer_type file\nbuffer_path test/tmp/buf") 445 | 446 | # Partial mock the S3Bucket, not to make an actual connection to Amazon S3 447 | setup_mocks(true) 448 | 449 | s3path = "log/events/ts=20110102-13/events_0-#{hex_random}.gz" 450 | s3_local_file_path = "/tmp/s3-test.txt" 451 | setup_s3_object_mocks(s3_local_file_path: s3_local_file_path, s3path: s3path) 452 | 453 | d = create_time_sliced_driver(config) 454 | stub(Fluent::UniqueId).hex(anything) { unique_hex } 455 | 456 | time = event_time("2011-01-02 13:14:15 UTC") 457 | d.run(default_tag: "test") do 458 | d.feed(time, {"a"=>1}) 459 | d.feed(time, {"a"=>2}) 460 | end 461 | 462 | Zlib::GzipReader.open(s3_local_file_path) do |gz| 463 | data = gz.read 464 | assert_equal %[2011-01-02T13:14:15Z\ttest\t{"a":1}\n] + 465 | %[2011-01-02T13:14:15Z\ttest\t{"a":2}\n], 466 | data 467 | end 468 | FileUtils.rm_f(s3_local_file_path) 469 | end 470 | 471 | def test_write_with_zstd 472 | setup_mocks(true) 473 | s3_local_file_path = "/tmp/s3-test.zst" 474 | 475 | expected_s3path = "log/events/ts=20110102-13/events_0-#{Socket.gethostname}.zst" 476 | 477 | setup_s3_object_mocks(s3_local_file_path: s3_local_file_path, s3path: expected_s3path) 478 | 479 | config = CONFIG_TIME_SLICE + "\nstore_as zstd\n" 480 | d = create_time_sliced_driver(config) 481 | 482 | time = event_time("2011-01-02 13:14:15 UTC") 483 | d.run(default_tag: "test") do 484 | d.feed(time, { "a" => 1 }) 485 | d.feed(time, { "a" => 2 }) 486 | end 487 | 488 | File.open(s3_local_file_path, 'rb') do |file| 489 | compressed_data = file.read 490 | uncompressed_data = Zstd.decompress(compressed_data) 491 | expected_data = %[2011-01-02T13:14:15Z\ttest\t{"a":1}\n] + 492 | %[2011-01-02T13:14:15Z\ttest\t{"a":2}\n] 493 | assert_equal expected_data, uncompressed_data 494 | end 495 | FileUtils.rm_f(s3_local_file_path) 496 | end 497 | 498 | class MockResponse 499 | attr_reader :data 500 | 501 | def initialize(data) 502 | @data = data 503 | end 504 | end 505 | 506 | def setup_mocks(exists_return = false) 507 | @s3_client = stub(Aws::S3::Client.new(stub_responses: true)) 508 | stub(@s3_client).config { OpenStruct.new({region: "us-east-1"}) } 509 | # aws-sdk-s3 calls Client#put_object inside Object#put 510 | mock(@s3_client).put_object(anything).at_least(0) { MockResponse.new({}) } 511 | mock(Aws::S3::Client).new(anything).at_least(0) { @s3_client } 512 | @s3_resource = mock(Aws::S3::Resource.new(client: @s3_client)) 513 | mock(Aws::S3::Resource).new(client: @s3_client) { @s3_resource } 514 | @s3_bucket = mock(Aws::S3::Bucket.new(name: "test", 515 | client: @s3_client)) 516 | @s3_bucket.exists? { exists_return } 517 | @s3_object = mock(Aws::S3::Object.new(bucket_name: "test_bucket", 518 | key: "test", 519 | client: @s3_client)) 520 | @s3_object.exists?.at_least(0) { false } 521 | @s3_bucket.object(anything).at_least(0) { @s3_object } 522 | @s3_resource.bucket(anything) { @s3_bucket } 523 | end 524 | 525 | def setup_s3_object_mocks(params = {}) 526 | s3path = params[:s3path] || "log/events/ts=20110102-13/events_0-#{Socket.gethostname}.gz" 527 | s3_local_file_path = params[:s3_local_file_path] || "/tmp/s3-test.txt" 528 | 529 | # Assert content of event logs which are being sent to S3 530 | s3obj = stub(Aws::S3::Object.new(bucket_name: "test_bucket", 531 | key: "test", 532 | client: @s3_client)) 533 | s3obj.exists? { false } 534 | 535 | tempfile = File.new(s3_local_file_path, "w") 536 | stub(Tempfile).new("s3-") { tempfile } 537 | s3obj.put(body: tempfile, 538 | content_type: "application/x-gzip", 539 | storage_class: "STANDARD") 540 | 541 | @s3_bucket.object(s3path) { s3obj } 542 | end 543 | 544 | def setup_mocks_hardened_policy() 545 | @s3_client = stub(Aws::S3::Client.new(:stub_responses => true)) 546 | stub(@s3_client).config { OpenStruct.new({region: "us-east-1"}) } 547 | mock(@s3_client).put_object(anything).at_least(0) { MockResponse.new({}) } 548 | mock(Aws::S3::Client).new(anything).at_least(0) { @s3_client } 549 | @s3_resource = mock(Aws::S3::Resource.new(:client => @s3_client)) 550 | mock(Aws::S3::Resource).new(:client => @s3_client) { @s3_resource } 551 | @s3_bucket = mock(Aws::S3::Bucket.new(:name => "test", 552 | :client => @s3_client)) 553 | @s3_object = mock(Aws::S3::Object.new(:bucket_name => "test_bucket", 554 | :key => "test", 555 | :client => @s3_client)) 556 | @s3_bucket.object(anything).at_least(0) { @s3_object } 557 | @s3_resource.bucket(anything) { @s3_bucket } 558 | end 559 | 560 | def setup_s3_object_mocks_hardened_policy(params = {}) 561 | s3_local_file_path = params[:s3_local_file_path] || "/tmp/s3-test.txt" 562 | 563 | # Assert content of event logs which are being sent to S3 564 | s3obj = stub(Aws::S3::Object.new(:bucket_name => "test_bucket", 565 | :key => "test", 566 | :client => @s3_client)) 567 | 568 | tempfile = File.new(s3_local_file_path, "w") 569 | stub(Tempfile).new("s3-") { tempfile } 570 | s3obj.put(:body => tempfile, 571 | :content_type => "application/x-gzip", 572 | :storage_class => "STANDARD") 573 | end 574 | 575 | def test_auto_create_bucket_false_with_non_existence_bucket 576 | setup_mocks 577 | 578 | config = CONFIG_TIME_SLICE + 'auto_create_bucket false' 579 | d = create_time_sliced_driver(config) 580 | assert_raise(RuntimeError, "The specified bucket does not exist: bucket = test_bucket") { 581 | d.run {} 582 | } 583 | end 584 | 585 | def test_auto_create_bucket_true_with_non_existence_bucket 586 | setup_mocks 587 | @s3_resource.create_bucket(bucket: "test_bucket") 588 | 589 | config = CONFIG_TIME_SLICE + 'auto_create_bucket true' 590 | d = create_time_sliced_driver(config) 591 | assert_nothing_raised { d.run {} } 592 | end 593 | 594 | def test_credentials 595 | d = create_time_sliced_driver 596 | assert_nothing_raised { d.run {} } 597 | client = d.instance.instance_variable_get(:@s3).client 598 | credentials = client.config.credentials 599 | assert_instance_of(Aws::Credentials, credentials) 600 | end 601 | 602 | def test_assume_role_credentials 603 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 604 | mock(Aws::AssumeRoleCredentials).new({ role_arn: "test_arn", 605 | role_session_name: "test_session", 606 | client: anything }){ 607 | expected_credentials 608 | } 609 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 610 | config += %[ 611 | 612 | role_arn test_arn 613 | role_session_name test_session 614 | 615 | ] 616 | d = create_time_sliced_driver(config) 617 | assert_nothing_raised { d.run {} } 618 | client = d.instance.instance_variable_get(:@s3).client 619 | credentials = client.config.credentials 620 | assert_equal(expected_credentials, credentials) 621 | end 622 | 623 | def test_assume_role_credentials_with_region 624 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 625 | sts_client = Aws::STS::Client.new(region: 'ap-northeast-1') 626 | mock(Aws::STS::Client).new(region: 'ap-northeast-1'){ sts_client } 627 | mock(Aws::AssumeRoleCredentials).new({ role_arn: "test_arn", 628 | role_session_name: "test_session", 629 | client: sts_client }){ 630 | expected_credentials 631 | } 632 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 633 | config += %[ 634 | s3_region ap-northeast-1 635 | 636 | role_arn test_arn 637 | role_session_name test_session 638 | 639 | ] 640 | d = create_time_sliced_driver(config) 641 | assert_nothing_raised { d.run {} } 642 | client = d.instance.instance_variable_get(:@s3).client 643 | credentials = client.config.credentials 644 | assert_equal(expected_credentials, credentials) 645 | end 646 | 647 | def test_assume_role_with_iam_credentials 648 | expected_credentials = Aws::Credentials.new("test_key_id", "test_sec_key") 649 | sts_client = Aws::STS::Client.new(region: 'ap-northeast-1', credentials: expected_credentials) 650 | mock(Aws::Credentials).new("test_key_id", "test_sec_key") { expected_credentials } 651 | mock(Aws::STS::Client).new(region: 'ap-northeast-1', credentials: expected_credentials){ sts_client } 652 | mock(Aws::AssumeRoleCredentials).new({ role_arn: "test_arn", 653 | role_session_name: "test_session", 654 | client: sts_client } ){ 655 | expected_credentials 656 | } 657 | config = CONFIG_TIME_SLICE 658 | config += %[ 659 | s3_region ap-northeast-1 660 | 661 | 662 | role_arn test_arn 663 | role_session_name test_session 664 | 665 | ] 666 | d = create_time_sliced_driver(config) 667 | assert_nothing_raised { d.run {} } 668 | client = d.instance.instance_variable_get(:@s3).client 669 | credentials = client.config.credentials 670 | assert_equal(expected_credentials, credentials) 671 | end 672 | 673 | def test_assume_role_credentials_with_region_and_sts_http_proxy 674 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 675 | expected_region = "ap-northeast-1" 676 | expected_sts_http_proxy = 'http://example.com' 677 | sts_client = Aws::STS::Client.new(region: expected_region, http_proxy: expected_sts_http_proxy) 678 | mock(Aws::STS::Client).new(region:expected_region, http_proxy: expected_sts_http_proxy){ sts_client } 679 | mock(Aws::AssumeRoleCredentials).new({ role_arn: "test_arn", 680 | role_session_name: "test_session", 681 | client: sts_client, 682 | sts_http_proxy: expected_sts_http_proxy }){ 683 | expected_credentials 684 | } 685 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 686 | config += %[ 687 | s3_region #{expected_region} 688 | 689 | role_arn test_arn 690 | role_session_name test_session 691 | sts_http_proxy #{expected_sts_http_proxy} 692 | 693 | ] 694 | d = create_time_sliced_driver(config) 695 | assert_nothing_raised { d.run {} } 696 | client = d.instance.instance_variable_get(:@s3).client 697 | credentials = client.config.credentials 698 | assert_equal(expected_credentials, credentials) 699 | end 700 | 701 | def test_assume_role_credentials_with_sts_http_proxy 702 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 703 | expected_sts_http_proxy = 'http://example.com' 704 | sts_client = Aws::STS::Client.new(region: "us-east-1", http_proxy: expected_sts_http_proxy) 705 | mock(Aws::STS::Client).new(region: "us-east-1", http_proxy: expected_sts_http_proxy){ sts_client } 706 | mock(Aws::AssumeRoleCredentials).new({ role_arn: "test_arn", 707 | role_session_name: "test_session", 708 | client: sts_client, 709 | sts_http_proxy: expected_sts_http_proxy }){ 710 | expected_credentials 711 | } 712 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 713 | config += %[ 714 | 715 | role_arn test_arn 716 | role_session_name test_session 717 | sts_http_proxy #{expected_sts_http_proxy} 718 | 719 | ] 720 | d = create_time_sliced_driver(config) 721 | assert_nothing_raised { d.run {} } 722 | client = d.instance.instance_variable_get(:@s3).client 723 | credentials = client.config.credentials 724 | assert_equal(expected_credentials, credentials) 725 | end 726 | 727 | def test_assume_role_credentials_with_sts_endpoint_url 728 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 729 | expected_sts_endpoint_url = 'http://example.com' 730 | sts_client = Aws::STS::Client.new(region: "us-east-1", endpoint: expected_sts_endpoint_url) 731 | mock(Aws::STS::Client).new(region: "us-east-1", endpoint: expected_sts_endpoint_url){ sts_client } 732 | mock(Aws::AssumeRoleCredentials).new({ role_arn: "test_arn", 733 | role_session_name: "test_session", 734 | client: sts_client, 735 | sts_endpoint_url: expected_sts_endpoint_url }){ 736 | expected_credentials 737 | } 738 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 739 | config += %[ 740 | 741 | role_arn test_arn 742 | role_session_name test_session 743 | sts_endpoint_url #{expected_sts_endpoint_url} 744 | 745 | ] 746 | d = create_time_sliced_driver(config) 747 | assert_nothing_raised { d.run {} } 748 | client = d.instance.instance_variable_get(:@s3).client 749 | credentials = client.config.credentials 750 | assert_equal(expected_credentials, credentials) 751 | end 752 | 753 | def test_assume_role_credentials_with_sts_region 754 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 755 | expected_sts_region = 'ap-south-1' 756 | sts_client = Aws::STS::Client.new(region: expected_sts_region) 757 | mock(Aws::STS::Client).new(region: expected_sts_region){ sts_client } 758 | mock(Aws::AssumeRoleCredentials).new({ role_arn: "test_arn", 759 | role_session_name: "test_session", 760 | client: sts_client }){ 761 | expected_credentials 762 | } 763 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 764 | config += %[ 765 | 766 | role_arn test_arn 767 | role_session_name test_session 768 | sts_region #{expected_sts_region} 769 | 770 | ] 771 | d = create_time_sliced_driver(config) 772 | assert_nothing_raised { d.run {} } 773 | client = d.instance.instance_variable_get(:@s3).client 774 | credentials = client.config.credentials 775 | assert_equal(expected_credentials, credentials) 776 | end 777 | 778 | def test_web_identity_credentials 779 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 780 | mock(Aws::AssumeRoleWebIdentityCredentials).new( 781 | { 782 | role_arn: "test_arn", 783 | role_session_name: "test_session", 784 | web_identity_token_file: "test_file", 785 | client: anything 786 | } 787 | ){ 788 | expected_credentials 789 | } 790 | 791 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 792 | config += %[ 793 | 794 | role_arn test_arn 795 | role_session_name test_session 796 | web_identity_token_file test_file 797 | 798 | ] 799 | d = create_time_sliced_driver(config) 800 | assert_nothing_raised { d.run {} } 801 | client = d.instance.instance_variable_get(:@s3).client 802 | credentials = client.config.credentials 803 | assert_equal(expected_credentials, credentials) 804 | end 805 | 806 | def test_web_identity_credentials_with_region_and_sts_http_proxy 807 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 808 | expected_region = "ap-northeast-1" 809 | expected_sts_http_proxy = 'http://example.com' 810 | sts_client = Aws::STS::Client.new(region: expected_region, http_proxy: expected_sts_http_proxy) 811 | mock(Aws::STS::Client).new(region:expected_region, http_proxy: expected_sts_http_proxy){ sts_client } 812 | mock(Aws::AssumeRoleWebIdentityCredentials).new({ role_arn: "test_arn", 813 | role_session_name: "test_session", 814 | web_identity_token_file: "test_file", 815 | client: sts_client, 816 | sts_http_proxy: expected_sts_http_proxy }){ 817 | expected_credentials 818 | } 819 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 820 | config += %[ 821 | s3_region #{expected_region} 822 | 823 | role_arn test_arn 824 | role_session_name test_session 825 | web_identity_token_file test_file 826 | sts_http_proxy #{expected_sts_http_proxy} 827 | 828 | ] 829 | d = create_time_sliced_driver(config) 830 | assert_nothing_raised { d.run {} } 831 | client = d.instance.instance_variable_get(:@s3).client 832 | credentials = client.config.credentials 833 | assert_equal(expected_credentials, credentials) 834 | end 835 | 836 | def test_web_identity_credentials_with_sts_http_proxy 837 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 838 | expected_sts_http_proxy = 'http://example.com' 839 | sts_client = Aws::STS::Client.new(region: "us-east-1", http_proxy: expected_sts_http_proxy) 840 | mock(Aws::STS::Client).new(region: "us-east-1", http_proxy: expected_sts_http_proxy){ sts_client } 841 | mock(Aws::AssumeRoleWebIdentityCredentials).new({ role_arn: "test_arn", 842 | role_session_name: "test_session", 843 | web_identity_token_file: "test_file", 844 | client: sts_client, 845 | sts_http_proxy: expected_sts_http_proxy }){ 846 | expected_credentials 847 | } 848 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 849 | config += %[ 850 | 851 | role_arn test_arn 852 | role_session_name test_session 853 | web_identity_token_file test_file 854 | sts_http_proxy #{expected_sts_http_proxy} 855 | 856 | ] 857 | d = create_time_sliced_driver(config) 858 | assert_nothing_raised { d.run {} } 859 | client = d.instance.instance_variable_get(:@s3).client 860 | credentials = client.config.credentials 861 | assert_equal(expected_credentials, credentials) 862 | end 863 | 864 | def test_web_identity_credentials_with_sts_endpoint_url 865 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 866 | expected_sts_endpoint_url = 'http://example.com' 867 | sts_client = Aws::STS::Client.new(region: "us-east-1", endpoint: expected_sts_endpoint_url) 868 | mock(Aws::STS::Client).new(region: "us-east-1", endpoint: expected_sts_endpoint_url){ sts_client } 869 | mock(Aws::AssumeRoleWebIdentityCredentials).new({ role_arn: "test_arn", 870 | role_session_name: "test_session", 871 | web_identity_token_file: "test_file", 872 | client: sts_client, 873 | sts_endpoint_url: expected_sts_endpoint_url }){ 874 | expected_credentials 875 | } 876 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 877 | config += %[ 878 | 879 | role_arn test_arn 880 | role_session_name test_session 881 | web_identity_token_file test_file 882 | sts_endpoint_url #{expected_sts_endpoint_url} 883 | 884 | ] 885 | d = create_time_sliced_driver(config) 886 | assert_nothing_raised { d.run {} } 887 | client = d.instance.instance_variable_get(:@s3).client 888 | credentials = client.config.credentials 889 | assert_equal(expected_credentials, credentials) 890 | end 891 | 892 | def test_web_identity_credentials_with_sts_region 893 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 894 | sts_client = Aws::STS::Client.new(region: 'us-east-1') 895 | mock(Aws::STS::Client).new(region: 'us-east-1'){ sts_client } 896 | mock(Aws::AssumeRoleWebIdentityCredentials).new( 897 | { 898 | role_arn: "test_arn", 899 | role_session_name: "test_session", 900 | web_identity_token_file: "test_file", 901 | client: sts_client 902 | } 903 | ){ 904 | expected_credentials 905 | } 906 | 907 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 908 | config += %[ 909 | s3_region us-west-2 910 | 911 | role_arn test_arn 912 | role_session_name test_session 913 | web_identity_token_file test_file 914 | sts_region us-east-1 915 | 916 | ] 917 | d = create_time_sliced_driver(config) 918 | assert_nothing_raised { d.run {} } 919 | client = d.instance.instance_variable_get(:@s3).client 920 | credentials = client.config.credentials 921 | assert_equal(expected_credentials, credentials) 922 | end 923 | 924 | def test_instance_profile_credentials 925 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 926 | mock(Aws::InstanceProfileCredentials).new({}).returns(expected_credentials) 927 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 928 | config += %[ 929 | 930 | 931 | ] 932 | d = create_time_sliced_driver(config) 933 | assert_nothing_raised { d.run {} } 934 | client = d.instance.instance_variable_get(:@s3).client 935 | credentials = client.config.credentials 936 | assert_equal(expected_credentials, credentials) 937 | end 938 | 939 | def test_ecs_credentials 940 | ENV["AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"] = "/credential_provider_version/credentials?id=task_UUID" 941 | 942 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 943 | mock(Aws::ECSCredentials).new({}).returns(expected_credentials) 944 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 945 | config += %[ 946 | 947 | 948 | ] 949 | d = create_time_sliced_driver(config) 950 | assert_nothing_raised { d.run {} } 951 | client = d.instance.instance_variable_get(:@s3).client 952 | credentials = client.config.credentials 953 | assert_equal(expected_credentials, credentials) 954 | 955 | ENV["AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"] = nil 956 | end 957 | 958 | def test_instance_profile_credentials_aws_iam_retries 959 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 960 | mock(Aws::InstanceProfileCredentials).new({ retries: 10 }).returns(expected_credentials) 961 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 962 | config += %[ 963 | aws_iam_retries 10 964 | ] 965 | d = create_time_sliced_driver(config) 966 | assert_nothing_raised { d.run {} } 967 | client = d.instance.instance_variable_get(:@s3).client 968 | credentials = client.config.credentials 969 | assert_equal(expected_credentials, credentials) 970 | end 971 | 972 | def test_shared_credentials 973 | expected_credentials = Aws::Credentials.new("test_key", "test_secret") 974 | mock(Aws::SharedCredentials).new({}).returns(expected_credentials) 975 | config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n") 976 | config += %[ 977 | 978 | 979 | ] 980 | d = create_time_sliced_driver(config) 981 | assert_nothing_raised { d.run {} } 982 | client = d.instance.instance_variable_get(:@s3).client 983 | credentials = client.config.credentials 984 | assert_equal(expected_credentials, credentials) 985 | end 986 | 987 | def test_signature_version 988 | config = [CONFIG, 'signature_version s3'].join("\n") 989 | d = create_driver(config) 990 | 991 | signature_version = d.instance.instance_variable_get(:@signature_version) 992 | assert_equal("s3", signature_version) 993 | end 994 | 995 | def test_warn_for_delay 996 | setup_mocks(true) 997 | s3_local_file_path = "/tmp/s3-test.txt" 998 | setup_s3_object_mocks(s3_local_file_path: s3_local_file_path) 999 | 1000 | config = CONFIG_TIME_SLICE + 'warn_for_delay 1d' 1001 | d = create_time_sliced_driver(config) 1002 | 1003 | delayed_time = event_time("2011-01-02 13:14:15 UTC") 1004 | now = delayed_time.to_i + 86000 + 1 1005 | d.instance.log.out.flush_logs = false 1006 | Timecop.freeze(Time.at(now)) do 1007 | d.run(default_tag: "test") do 1008 | d.feed(delayed_time, {"a"=>1}) 1009 | d.feed(delayed_time, {"a"=>2}) 1010 | end 1011 | end 1012 | logs = d.instance.log.out.logs 1013 | assert_true logs.any? {|log| log.include?('out_s3: delayed events were put') } 1014 | d.instance.log.out.flush_logs = true 1015 | d.instance.log.out.reset 1016 | FileUtils.rm_f(s3_local_file_path) 1017 | end 1018 | end 1019 | --------------------------------------------------------------------------------