├── .circleci └── config.yml ├── .coveragerc ├── .editorconfig ├── .envrc ├── .envrc.local.template ├── .flake8 ├── .github └── CODEOWNERS ├── .gitignore ├── .markdownlintrc ├── .pre-commit-config.yaml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── clamav.py ├── clamav_test.py ├── common.py ├── common_test.py ├── deploy └── cloudformation.yaml ├── display_infected.py ├── display_infected_test.py ├── images ├── bucket-antivirus-function.png └── s3-event.png ├── metrics.py ├── requirements-dev.txt ├── requirements.txt ├── scan.py ├── scan_bucket.py ├── scan_bucket_test.py ├── scan_test.py ├── scripts ├── run-scan-lambda └── run-update-lambda └── update.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | ############ 2 | # 3 | # Caches 4 | # 5 | # Caches may require a prefix, since caches in CircleCI 2.0 are immutable. 6 | # A prefix provides an easy way to invalidate a cache. See https://circleci.com/docs/2.0/caching/#clearing-cache 7 | # 8 | ############ 9 | 10 | version: "2.1" 11 | 12 | executors: 13 | primary: 14 | docker: 15 | - image: docker:17.05.0-ce 16 | python_test: 17 | docker: 18 | - image: circleci/python:3.7-stretch 19 | pre_commit_test: 20 | docker: 21 | - image: circleci/python:3.7-stretch 22 | 23 | jobs: 24 | 25 | # `build` is used for building the archive 26 | build: 27 | executor: primary 28 | working_directory: ~/src 29 | steps: 30 | - setup_remote_docker: 31 | reusable: true # default - false 32 | exclusive: true # default - true 33 | - run: 34 | name: Dependencies 35 | command: apk add --no-cache make git curl openssh 36 | - checkout 37 | - run: 38 | name: Build 39 | command: make archive 40 | 41 | # `pre_commit_deps` is used for cache pre-commit sources 42 | pre_commit_deps: 43 | executor: pre_commit_test 44 | steps: 45 | - checkout 46 | 47 | - restore_cache: 48 | keys: 49 | - pre-commit-dot-cache-{{ checksum ".pre-commit-config.yaml" }} 50 | - run: sudo pip install pre-commit==2.12.1 51 | - run: pre-commit install-hooks 52 | 53 | - save_cache: 54 | key: pre-commit-dot-cache-{{ checksum ".pre-commit-config.yaml" }} 55 | paths: 56 | - ~/.cache/pre-commit 57 | 58 | # `pre_commit_test` is used to run pre-commit hooks on all files 59 | pre_commit_test: 60 | executor: pre_commit_test 61 | steps: 62 | - checkout 63 | - restore_cache: 64 | keys: 65 | - pre-commit-dot-cache-{{ checksum ".pre-commit-config.yaml" }} 66 | - run: sudo pip install pre-commit==1.18.3 67 | - run: 68 | name: Run pre-commit tests 69 | command: pre-commit run --all-files 70 | 71 | # `test` is used to run python tests 72 | test: 73 | executor: python_test 74 | steps: 75 | - checkout 76 | - restore_cache: 77 | keys: 78 | - pre-commit-dot-cache-{{ checksum ".pre-commit-config.yaml" }} 79 | - run: sudo pip install -r requirements.txt 80 | - run: sudo pip install -r requirements-dev.txt 81 | - run: nosetests 82 | 83 | workflows: 84 | version: 2 85 | 86 | main: 87 | jobs: 88 | - pre_commit_deps 89 | - pre_commit_test: 90 | requires: 91 | - pre_commit_deps 92 | - test 93 | - build: 94 | requires: 95 | - pre_commit_test 96 | - test -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | data_file = .coverage 3 | 4 | [report] 5 | omit = 6 | */python?.?/* 7 | */site-packages/nose/* 8 | show_missing = true 9 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | tab_width = 2 7 | trim_trailing_whitespace = true 8 | 9 | [Makefile] 10 | indent_style = tab 11 | indent_size = 2 12 | tab_width = 2 13 | 14 | [*.py] 15 | indent_style = space 16 | indent_size = 4 17 | tab_width = 4 18 | -------------------------------------------------------------------------------- /.envrc: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ########################################## 4 | # DO NOT MAKE LOCAL CHANGES TO THIS FILE # 5 | # # 6 | # Vars in this file can be overridden by # 7 | # exporting them in .envrc.local # 8 | ########################################## 9 | 10 | # Add local paths for binaries and scripts 11 | PATH_add ./scripts 12 | 13 | # ShellCheck complains about things like `foo=$(cmd)` because you lose the 14 | # return value of `cmd`. That said, we're not using `set -e`, so we aren't 15 | # really concerned about return values. The following `true`, applies the 16 | # rule to the entire file. 17 | # See: https://github.com/koalaman/shellcheck/wiki/SC2155 18 | # shellcheck disable=SC2155 19 | true 20 | 21 | required_vars=() 22 | var_docs=() 23 | 24 | # Declare an environment variable as required. 25 | # 26 | # require VAR_NAME "Documentation about how to define valid values" 27 | require() { 28 | required_vars+=("$1") 29 | var_docs+=("$2") 30 | } 31 | 32 | # Check all variables declared as required. If any are missing, print a message and 33 | # exit with a non-zero status. 34 | check_required_variables() { 35 | for i in "${!required_vars[@]}"; do 36 | var=${required_vars[i]} 37 | if [[ -z "${!var}" ]]; then 38 | log_status "${var} is not set: ${var_docs[i]}" 39 | missing_var=true 40 | fi 41 | done 42 | 43 | if [[ $missing_var == "true" ]]; then 44 | log_error "Your environment is missing some variables!" 45 | log_error "Set the above variables in .envrc.local and try again." 46 | fi 47 | } 48 | 49 | ######################### 50 | # Project Configuration # 51 | ######################### 52 | 53 | # Lamdba resource constraints (Override in .envrc.local) 54 | # https://docs.docker.com/config/containers/resource_constraints/ 55 | export MEM=1024m 56 | export CPUS=1.0 57 | 58 | require AV_DEFINITION_S3_BUCKET "Add this variable to your .envrc.local" 59 | require AV_DEFINITION_S3_PREFIX "Add this variable to your .envrc.local" 60 | 61 | require TEST_BUCKET "Add this variable to your .envrc.local" 62 | require TEST_KEY "Add this variable to your .envrc.local" 63 | 64 | ############################################## 65 | # Load Local Overrides and Check Environment # 66 | ############################################## 67 | 68 | # Load a local overrides file. Any changes you want to make for your local 69 | # environment should live in that file. 70 | 71 | if [ -e .envrc.local ] 72 | then 73 | source_env .envrc.local 74 | fi 75 | 76 | # Check that all required environment variables are set 77 | check_required_variables 78 | -------------------------------------------------------------------------------- /.envrc.local.template: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | # 4 | # Copy this file `cp .envrc.local.template .envrc.local` and modify the variables below for testing 5 | # 6 | 7 | # Optional AWS Parameters 8 | # WARNING: It's not recommended to keep credentials in this file! 9 | # export AWS_ACCESS_KEY_ID 10 | # export AWS_DEFAULT_REGION 11 | # export AWS_REGION 12 | # export AWS_SECRET_ACCESS_KEY 13 | # export AWS_SESSION_TOKEN 14 | 15 | # Lamdba resource constraints you can override here 16 | # https://docs.docker.com/config/containers/resource_constraints/ 17 | # export MEM=1024m 18 | # export CPUS=1.0 19 | 20 | # Required for both scan and update lambdas scripts 21 | export AV_DEFINITION_S3_BUCKET="" 22 | export AV_DEFINITION_S3_PREFIX="" 23 | 24 | # Required for scan lambda script 25 | export TEST_BUCKET="" 26 | export TEST_KEY="" 27 | 28 | # Uncomment and change as needed for lambda scripts 29 | # export AV_DEFINITION_FILE_PREFIXES 30 | # export AV_DEFINITION_FILE_SUFFIXES 31 | # export AV_DEFINITION_PATH 32 | # export AV_DELETE_INFECTED_FILES 33 | # export AV_PROCESS_ORIGINAL_VERSION_ONLY 34 | # export AV_SCAN_START_METADATA 35 | # export AV_SCAN_START_SNS_ARN 36 | # export AV_SIGNATURE_METADATA 37 | # export AV_SIGNATURE_OK 38 | # export AV_SIGNATURE_UNKNOWN 39 | # export AV_STATUS_CLEAN 40 | # export AV_STATUS_INFECTED 41 | # export AV_STATUS_METADATA 42 | # export AV_STATUS_SNS_ARN 43 | # export AV_STATUS_SNS_PUBLISH_CLEAN 44 | # export AV_STATUS_SNS_PUBLISH_INFECTED 45 | # export AV_TIMESTAMP_METADATA 46 | # export CLAMAVLIB_PATH 47 | # export CLAMSCAN_PATH 48 | # export FRESHCLAM_PATH 49 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E501,F405 3 | max-line-length = 120 4 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @UpsideTravel/full-access 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Idea 2 | .idea 3 | *.iml 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | bin/ 32 | compile/*.zip 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # dotenv 89 | .env 90 | 91 | # virtualenv 92 | .venv 93 | venv/ 94 | ENV/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | 109 | # coverage 110 | .coverage 111 | 112 | .DS_Store 113 | tmp/ 114 | 115 | # direnv 116 | .envrc.local 117 | 118 | # EICAR Files 119 | *eicar* 120 | -------------------------------------------------------------------------------- /.markdownlintrc: -------------------------------------------------------------------------------- 1 | { 2 | "default": true, 3 | "line_length": false, 4 | "MD024": { 5 | "siblings_only": true 6 | }, 7 | "MD014": false 8 | } -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | 3 | - repo: https://github.com/ambv/black 4 | rev: 19.3b0 5 | hooks: 6 | - id: black 7 | language_version: python3.7 8 | exclude: > 9 | (?x)^( 10 | scripts/gen-docs-index| 11 | )$ 12 | 13 | - repo: git://github.com/pre-commit/pre-commit-hooks 14 | rev: v2.2.3 15 | hooks: 16 | - id: check-ast 17 | - id: check-json 18 | - id: check-merge-conflict 19 | - id: check-yaml 20 | exclude: deploy/ 21 | - id: debug-statements 22 | - id: detect-private-key 23 | - id: fix-encoding-pragma 24 | - id: flake8 25 | - id: trailing-whitespace 26 | 27 | - repo: git://github.com/igorshubovych/markdownlint-cli 28 | rev: v0.17.0 29 | hooks: 30 | - id: markdownlint 31 | entry: markdownlint --ignore .github/*.md 32 | 33 | - repo: git://github.com/aws-cloudformation/cfn-python-lint 34 | rev: v0.49.0 35 | hooks: 36 | - id: cfn-python-lint 37 | files: deploy/ -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM amazonlinux:2 2 | 3 | # Set up working directories 4 | RUN mkdir -p /opt/app 5 | RUN mkdir -p /opt/app/build 6 | RUN mkdir -p /opt/app/bin/ 7 | 8 | # Copy in the lambda source 9 | WORKDIR /opt/app 10 | COPY ./*.py /opt/app/ 11 | COPY requirements.txt /opt/app/requirements.txt 12 | 13 | # Install packages 14 | RUN yum update -y 15 | RUN yum install -y cpio python3-pip yum-utils zip unzip less 16 | RUN yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm 17 | 18 | # This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel 19 | RUN pip3 install -r requirements.txt 20 | RUN rm -rf /root/.cache/pip 21 | 22 | # Download libraries we need to run in lambda 23 | WORKDIR /tmp 24 | RUN yumdownloader -x \*i686 --archlist=x86_64 clamav clamav-lib clamav-update json-c pcre2 libprelude gnutls libtasn1 lib64nettle nettle 25 | RUN rpm2cpio clamav-0*.rpm | cpio -idmv 26 | RUN rpm2cpio clamav-lib*.rpm | cpio -idmv 27 | RUN rpm2cpio clamav-update*.rpm | cpio -idmv 28 | RUN rpm2cpio json-c*.rpm | cpio -idmv 29 | RUN rpm2cpio pcre*.rpm | cpio -idmv 30 | RUN rpm2cpio gnutls* | cpio -idmv 31 | RUN rpm2cpio nettle* | cpio -idmv 32 | RUN rpm2cpio lib* | cpio -idmv 33 | RUN rpm2cpio *.rpm | cpio -idmv 34 | RUN rpm2cpio libtasn1* | cpio -idmv 35 | 36 | # Copy over the binaries and libraries 37 | RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /opt/app/bin/ 38 | 39 | # Fix the freshclam.conf settings 40 | RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf 41 | RUN echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf 42 | 43 | # Create the zip file 44 | WORKDIR /opt/app 45 | RUN zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py bin 46 | 47 | WORKDIR /usr/local/lib/python3.7/site-packages 48 | RUN zip -r9 /opt/app/build/lambda.zip * 49 | 50 | WORKDIR /opt/app 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Upside Travel, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | AMZ_LINUX_VERSION:=2 16 | current_dir := $(shell pwd) 17 | container_dir := /opt/app 18 | circleci := ${CIRCLECI} 19 | 20 | .PHONY: help 21 | help: ## Print the help documentation 22 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 23 | 24 | all: archive ## Build the entire project 25 | 26 | .PHONY: clean 27 | clean: ## Clean build artifacts 28 | rm -rf bin/ 29 | rm -rf build/ 30 | rm -rf tmp/ 31 | rm -f .coverage 32 | find ./ -type d -name '__pycache__' -delete 33 | find ./ -type f -name '*.pyc' -delete 34 | 35 | .PHONY: archive 36 | archive: clean ## Create the archive for AWS lambda 37 | docker build -t bucket-antivirus-function:latest . 38 | mkdir -p ./build/ 39 | docker run -v $(current_dir)/build:/opt/mount --rm --entrypoint cp bucket-antivirus-function:latest /opt/app/build/lambda.zip /opt/mount/lambda.zip 40 | 41 | .PHONY: pre_commit_install ## Ensure that pre-commit hook is installed and kept up to date 42 | pre_commit_install: .git/hooks/pre-commit ## Ensure pre-commit is installed 43 | .git/hooks/pre-commit: /usr/local/bin/pre-commit 44 | pip install pre-commit==2.12.1 45 | pre-commit install 46 | pre-commit install-hooks 47 | 48 | .PHONY: pre_commit_tests 49 | pre_commit_tests: ## Run pre-commit tests 50 | pre-commit run --all-files 51 | 52 | .PHONY: test 53 | test: clean ## Run python tests 54 | nosetests 55 | 56 | .PHONY: coverage 57 | coverage: clean ## Run python tests with coverage 58 | nosetests --with-coverage 59 | 60 | .PHONY: scan 61 | scan: ./build/lambda.zip ## Run scan function locally 62 | scripts/run-scan-lambda $(TEST_BUCKET) $(TEST_KEY) 63 | 64 | .PHONY: update 65 | update: ./build/lambda.zip ## Run update function locally 66 | scripts/run-update-lambda -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bucket-antivirus-function 2 | 3 | [![CircleCI](https://circleci.com/gh/upsidetravel/bucket-antivirus-function.svg?style=svg)](https://circleci.com/gh/upsidetravel/bucket-antivirus-function) 4 | 5 | Scan new objects added to any s3 bucket using AWS Lambda. [more details in this post](https://engineering.upside.com/s3-antivirus-scanning-with-lambda-and-clamav-7d33f9c5092e) 6 | 7 | ## Features 8 | 9 | - Easy to install 10 | - Send events from an unlimited number of S3 buckets 11 | - Prevent reading of infected files using S3 bucket policies 12 | - Accesses the end-user’s separate installation of 13 | open source antivirus engine [ClamAV](http://www.clamav.net/) 14 | 15 | ## How It Works 16 | 17 | ![architecture-diagram](../master/images/bucket-antivirus-function.png) 18 | 19 | - Each time a new object is added to a bucket, S3 invokes the Lambda 20 | function to scan the object 21 | - The function package will download (if needed) current antivirus 22 | definitions from a S3 bucket. Transfer speeds between a S3 bucket and 23 | Lambda are typically faster and more reliable than another source 24 | - The object is scanned for viruses and malware. Archive files are 25 | extracted and the files inside scanned also 26 | - The objects tags are updated to reflect the result of the scan, CLEAN 27 | or INFECTED, along with the date and time of the scan. 28 | - Object metadata is updated to reflect the result of the scan (optional) 29 | - Metrics are sent to [DataDog](https://www.datadoghq.com/) (optional) 30 | - Scan results are published to a SNS topic (optional) (Optionally choose to only publish INFECTED results) 31 | - Files found to be INFECTED are automatically deleted (optional) 32 | 33 | ## Installation 34 | 35 | ### Build from Source 36 | 37 | To build the archive to upload to AWS Lambda, run `make all`. The build process is completed using 38 | the [amazonlinux](https://hub.docker.com/_/amazonlinux/) [Docker](https://www.docker.com) 39 | image. The resulting archive will be built at `build/lambda.zip`. This file will be 40 | uploaded to AWS for both Lambda functions below. 41 | 42 | ### Create Relevant AWS Infra via CloudFormation 43 | 44 | Use CloudFormation with the `cloudformation.yaml` located in the `deploy/` directory to quickly spin up the AWS infra needed to run this project. CloudFormation will create: 45 | 46 | - An S3 bucket that will store AntiVirus definitions. 47 | - A Lambda Function called `avUpdateDefinitions` that will update the AV Definitions in the S3 Bucket every 3 hours. 48 | This function accesses the user’s above S3 Bucket to download updated definitions using `freshclam`. 49 | - A Lambda Function called `avScanner` that is triggered on each new S3 object creation which scans the object and tags it appropriately. It is created with `1600mb` of memory which should be enough, however if you start to see function timeouts, this memory may have to be bumped up. In the past, we recommended using `1024mb`, but that has started causing Lambda timeouts and bumping this memory has resolved it. 50 | 51 | Running CloudFormation, it will ask for 2 inputs for this stack: 52 | 53 | 1. BucketType: `private` (default) or `public`. This is applied to the S3 bucket that stores the AntiVirus definitions. We recommend to only use `public` when other AWS accounts need access to this bucket. 54 | 2. SourceBucket: [a non-empty string]. The name (do not include `s3://`) of the S3 bucket that will have its objects scanned. _Note - this is just used to create the IAM Policy, you can add/change source buckets later via the IAM Policy that CloudFormation outputs_ 55 | 56 | After the Stack has successfully created, there are 3 manual processes that still have to be done: 57 | 58 | 1. Upload the `build/lambda.zip` file that was created by running `make all` to the `avUpdateDefinitions` and `avScanner` Lambda functions via the Lambda Console. 59 | 2. To trigger the Scanner function on new S3 objects, go to the `avScanner` Lambda function console, navigate to `Configuration` -> `Trigger` -> `Add Trigger` -> Search for S3, and choose your bucket(s) and select `All object create events`, then click `Add`. _Note - if you chose more than 1 bucket as the source, or chose a different bucket than the Source Bucket in the CloudFormation parameter, you will have to also edit the IAM Role to reflect these new buckets (see "Adding or Changing Source Buckets")_ 60 | 3. Navigate to the `avUpdateDefinitions` Lambda function and manually trigger the function to get the initial Clam definitions in the bucket (instead of waiting for the 3 hour trigger to happen). Do this by clicking the `Test` section, and then clicking the orange `test` button. The function should take a few seconds to execute, and when finished you should see the `clam_defs` in the `av-definitions` S3 bucket. 61 | 62 | #### Adding or Changing Source Buckets 63 | 64 | Changing or adding Source Buckets is done by editing the `AVScannerLambdaRole` IAM Role. More specifically, the `S3AVScan` and `KmsDecrypt` parts of that IAM Role's policy. 65 | 66 | ### S3 Events 67 | 68 | Configure scanning of additional buckets by adding a new S3 event to 69 | invoke the Lambda function. This is done from the properties of any 70 | bucket in the AWS console. 71 | 72 | ![s3-event](../master/images/s3-event.png) 73 | 74 | Note: If configured to update object metadata, events must only be 75 | configured for `PUT` and `POST`. Metadata is immutable, which requires 76 | the function to copy the object over itself with updated metadata. This 77 | can cause a continuous loop of scanning if improperly configured. 78 | 79 | ## Configuration 80 | 81 | Runtime configuration is accomplished using environment variables. See 82 | the table below for reference. 83 | 84 | | Variable | Description | Default | Required | 85 | | --- | --- | --- | --- | 86 | | AV_DEFINITION_S3_BUCKET | Bucket containing antivirus definition files | | Yes | 87 | | AV_DEFINITION_S3_PREFIX | Prefix for antivirus definition files | clamav_defs | No | 88 | | AV_DEFINITION_PATH | Path containing files at runtime | /tmp/clamav_defs | No | 89 | | AV_SCAN_START_SNS_ARN | SNS topic ARN to publish notification about start of scan | | No | 90 | | AV_SCAN_START_METADATA | The tag/metadata indicating the start of the scan | av-scan-start | No | 91 | | AV_SIGNATURE_METADATA | The tag/metadata name representing file's AV type | av-signature | No | 92 | | AV_STATUS_CLEAN | The value assigned to clean items inside of tags/metadata | CLEAN | No | 93 | | AV_STATUS_INFECTED | The value assigned to clean items inside of tags/metadata | INFECTED | No | 94 | | AV_STATUS_METADATA | The tag/metadata name representing file's AV status | av-status | No | 95 | | AV_STATUS_SNS_ARN | SNS topic ARN to publish scan results (optional) | | No | 96 | | AV_STATUS_SNS_PUBLISH_CLEAN | Publish AV_STATUS_CLEAN results to AV_STATUS_SNS_ARN | True | No | 97 | | AV_STATUS_SNS_PUBLISH_INFECTED | Publish AV_STATUS_INFECTED results to AV_STATUS_SNS_ARN | True | No | 98 | | AV_TIMESTAMP_METADATA | The tag/metadata name representing file's scan time | av-timestamp | No | 99 | | CLAMAVLIB_PATH | Path to ClamAV library files | ./bin | No | 100 | | CLAMSCAN_PATH | Path to ClamAV clamscan binary | ./bin/clamscan | No | 101 | | FRESHCLAM_PATH | Path to ClamAV freshclam binary | ./bin/freshclam | No | 102 | | DATADOG_API_KEY | API Key for pushing metrics to DataDog (optional) | | No | 103 | | AV_PROCESS_ORIGINAL_VERSION_ONLY | Controls that only original version of an S3 key is processed (if bucket versioning is enabled) | False | No | 104 | | AV_DELETE_INFECTED_FILES | Controls whether infected files should be automatically deleted | False | No | 105 | | EVENT_SOURCE | The source of antivirus scan event "S3" or "SNS" (optional) | S3 | No | 106 | | S3_ENDPOINT | The Endpoint to use when interacting wth S3 | None | No | 107 | | SNS_ENDPOINT | The Endpoint to use when interacting wth SNS | None | No | 108 | | LAMBDA_ENDPOINT | The Endpoint to use when interacting wth Lambda | None | No | 109 | 110 | ## S3 Bucket Policy Examples 111 | 112 | ### Deny to download the object if not "CLEAN" 113 | 114 | This policy doesn't allow to download the object until: 115 | 116 | 1. The lambda that run Clam-AV is finished (so the object has a tag) 117 | 2. The file is not CLEAN 118 | 119 | Please make sure to check cloudtrail for the arn:aws:sts, just find the event open it and copy the sts. 120 | It should be in the format provided below: 121 | 122 | ```json 123 | { 124 | "Effect": "Deny", 125 | "NotPrincipal": { 126 | "AWS": [ 127 | "arn:aws:iam::<>:role/<>", 128 | "arn:aws:sts::<>:assumed-role/<>/<>", 129 | "arn:aws:iam::<>:root" 130 | ] 131 | }, 132 | "Action": "s3:GetObject", 133 | "Resource": "arn:aws:s3:::<>/*", 134 | "Condition": { 135 | "StringNotEquals": { 136 | "s3:ExistingObjectTag/av-status": "CLEAN" 137 | } 138 | } 139 | } 140 | ``` 141 | 142 | ### Deny to download and re-tag "INFECTED" object 143 | 144 | ```json 145 | { 146 | "Version": "2012-10-17", 147 | "Statement": [ 148 | { 149 | "Effect": "Deny", 150 | "Action": ["s3:GetObject", "s3:PutObjectTagging"], 151 | "Principal": "*", 152 | "Resource": ["arn:aws:s3:::<>/*"], 153 | "Condition": { 154 | "StringEquals": { 155 | "s3:ExistingObjectTag/av-status": "INFECTED" 156 | } 157 | } 158 | } 159 | ] 160 | } 161 | ``` 162 | 163 | ## Manually Scanning Buckets 164 | 165 | You may want to scan all the objects in a bucket that have not previously been scanned or were created 166 | prior to setting up your lambda functions. To do this you can use the `scan_bucket.py` utility. 167 | 168 | ```sh 169 | pip install boto3 170 | scan_bucket.py --lambda-function-name= --s3-bucket-name= 171 | ``` 172 | 173 | This tool will scan all objects that have not been previously scanned in the bucket and invoke the lambda function 174 | asynchronously. As such you'll have to go to your cloudwatch logs to see the scan results or failures. Additionally, 175 | the script uses the same environment variables you'd use in your lambda so you can configure them similarly. 176 | 177 | ## Testing 178 | 179 | There are two types of tests in this repository. The first is pre-commit tests and the second are python tests. All of 180 | these tests are run by CircleCI. 181 | 182 | ### pre-commit Tests 183 | 184 | The pre-commit tests ensure that code submitted to this repository meet the standards of the repository. To get started 185 | with these tests run `make pre_commit_install`. This will install the pre-commit tool and then install it in this 186 | repository. Then the github pre-commit hook will run these tests before you commit your code. 187 | 188 | To run the tests manually run `make pre_commit_tests` or `pre-commit run -a`. 189 | 190 | ### Python Tests 191 | 192 | The python tests in this repository use `unittest` and are run via the `nose` utility. To run them you will need 193 | to install the developer resources and then run the tests: 194 | 195 | ```sh 196 | pip install -r requirements.txt 197 | pip install -r requirements-dev.txt 198 | make test 199 | ``` 200 | 201 | ### Local lambdas 202 | 203 | You can run the lambdas locally to test out what they are doing without deploying to AWS. This is accomplished 204 | by using docker containers that act similarly to lambda. You will need to have set up some local variables in your 205 | `.envrc.local` file and modify them appropriately first before running `direnv allow`. If you do not have `direnv` 206 | it can be installed with `brew install direnv`. 207 | 208 | For the Scan lambda you will need a test file uploaded to S3 and the variables `TEST_BUCKET` and `TEST_KEY` 209 | set in your `.envrc.local` file. Then you can run: 210 | 211 | ```sh 212 | direnv allow 213 | make archive scan 214 | ``` 215 | 216 | If you want a file that will be recognized as a virus you can download a test file from the [EICAR](https://www.eicar.org/?page_id=3950) 217 | website and uploaded to your bucket. 218 | 219 | For the Update lambda you can run: 220 | 221 | ```sh 222 | direnv allow 223 | make archive update 224 | ``` 225 | 226 | ## License 227 | 228 | ```text 229 | Upside Travel, Inc. 230 | 231 | Licensed under the Apache License, Version 2.0 (the "License"); 232 | you may not use this file except in compliance with the License. 233 | You may obtain a copy of the License at 234 | 235 | http://www.apache.org/licenses/LICENSE-2.0 236 | 237 | Unless required by applicable law or agreed to in writing, software 238 | distributed under the License is distributed on an "AS IS" BASIS, 239 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 240 | See the License for the specific language governing permissions and 241 | limitations under the License. 242 | ``` 243 | 244 | ClamAV is released under the [GPL Version 2 License](https://github.com/vrtadmin/clamav-devel/blob/master/COPYING) 245 | and all [source for ClamAV](https://github.com/vrtadmin/clamav-devel) is available 246 | for download on Github. 247 | -------------------------------------------------------------------------------- /clamav.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Upside Travel, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import datetime 17 | import hashlib 18 | import os 19 | import pwd 20 | import re 21 | import subprocess 22 | 23 | import boto3 24 | import botocore 25 | from pytz import utc 26 | 27 | from common import AV_DEFINITION_S3_PREFIX, S3_ENDPOINT 28 | from common import AV_DEFINITION_PATH 29 | from common import AV_DEFINITION_FILE_PREFIXES 30 | from common import AV_DEFINITION_FILE_SUFFIXES 31 | from common import AV_SIGNATURE_OK 32 | from common import AV_SIGNATURE_UNKNOWN 33 | from common import AV_STATUS_CLEAN 34 | from common import AV_STATUS_INFECTED 35 | from common import CLAMAVLIB_PATH 36 | from common import CLAMSCAN_PATH 37 | from common import FRESHCLAM_PATH 38 | from common import create_dir 39 | 40 | 41 | RE_SEARCH_DIR = r"SEARCH_DIR\(\"=([A-z0-9\/\-_]*)\"\)" 42 | 43 | 44 | def current_library_search_path(): 45 | ld_verbose = subprocess.check_output(["ld", "--verbose"]).decode("utf-8") 46 | rd_ld = re.compile(RE_SEARCH_DIR) 47 | return rd_ld.findall(ld_verbose) 48 | 49 | 50 | def update_defs_from_s3(s3_client, bucket, prefix): 51 | create_dir(AV_DEFINITION_PATH) 52 | to_download = {} 53 | for file_prefix in AV_DEFINITION_FILE_PREFIXES: 54 | s3_best_time = None 55 | for file_suffix in AV_DEFINITION_FILE_SUFFIXES: 56 | filename = file_prefix + "." + file_suffix 57 | s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename) 58 | local_path = os.path.join(AV_DEFINITION_PATH, filename) 59 | s3_md5 = md5_from_s3_tags(s3_client, bucket, s3_path) 60 | s3_time = time_from_s3(s3_client, bucket, s3_path) 61 | 62 | if s3_best_time is not None and s3_time < s3_best_time: 63 | print("Not downloading older file in series: %s" % filename) 64 | continue 65 | else: 66 | s3_best_time = s3_time 67 | 68 | if os.path.exists(local_path) and md5_from_file(local_path) == s3_md5: 69 | print("Not downloading %s because local md5 matches s3." % filename) 70 | continue 71 | if s3_md5: 72 | to_download[file_prefix] = { 73 | "s3_path": s3_path, 74 | "local_path": local_path, 75 | } 76 | return to_download 77 | 78 | 79 | def upload_defs_to_s3(s3_client, bucket, prefix, local_path): 80 | for file_prefix in AV_DEFINITION_FILE_PREFIXES: 81 | for file_suffix in AV_DEFINITION_FILE_SUFFIXES: 82 | filename = file_prefix + "." + file_suffix 83 | local_file_path = os.path.join(local_path, filename) 84 | if os.path.exists(local_file_path): 85 | local_file_md5 = md5_from_file(local_file_path) 86 | if local_file_md5 != md5_from_s3_tags( 87 | s3_client, bucket, os.path.join(prefix, filename) 88 | ): 89 | print( 90 | "Uploading %s to s3://%s" 91 | % (local_file_path, os.path.join(bucket, prefix, filename)) 92 | ) 93 | s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) 94 | s3_object = s3.Object(bucket, os.path.join(prefix, filename)) 95 | s3_object.upload_file(os.path.join(local_path, filename)) 96 | s3_client.put_object_tagging( 97 | Bucket=s3_object.bucket_name, 98 | Key=s3_object.key, 99 | Tagging={"TagSet": [{"Key": "md5", "Value": local_file_md5}]}, 100 | ) 101 | else: 102 | print( 103 | "Not uploading %s because md5 on remote matches local." 104 | % filename 105 | ) 106 | else: 107 | print("File does not exist: %s" % filename) 108 | 109 | 110 | def update_defs_from_freshclam(path, library_path=""): 111 | create_dir(path) 112 | fc_env = os.environ.copy() 113 | if library_path: 114 | fc_env["LD_LIBRARY_PATH"] = "%s:%s" % ( 115 | ":".join(current_library_search_path()), 116 | CLAMAVLIB_PATH, 117 | ) 118 | print("Starting freshclam with defs in %s." % path) 119 | fc_proc = subprocess.Popen( 120 | [ 121 | FRESHCLAM_PATH, 122 | "--config-file=./bin/freshclam.conf", 123 | "-u %s" % pwd.getpwuid(os.getuid())[0], 124 | "--datadir=%s" % path, 125 | ], 126 | stderr=subprocess.STDOUT, 127 | stdout=subprocess.PIPE, 128 | env=fc_env, 129 | ) 130 | output = fc_proc.communicate()[0] 131 | print("freshclam output:\n%s" % output) 132 | if fc_proc.returncode != 0: 133 | print("Unexpected exit code from freshclam: %s." % fc_proc.returncode) 134 | return fc_proc.returncode 135 | 136 | 137 | def md5_from_file(filename): 138 | hash_md5 = hashlib.md5() 139 | with open(filename, "rb") as f: 140 | for chunk in iter(lambda: f.read(4096), b""): 141 | hash_md5.update(chunk) 142 | return hash_md5.hexdigest() 143 | 144 | 145 | def md5_from_s3_tags(s3_client, bucket, key): 146 | try: 147 | tags = s3_client.get_object_tagging(Bucket=bucket, Key=key)["TagSet"] 148 | except botocore.exceptions.ClientError as e: 149 | expected_errors = { 150 | "404", # Object does not exist 151 | "AccessDenied", # Object cannot be accessed 152 | "NoSuchKey", # Object does not exist 153 | "MethodNotAllowed", # Object deleted in bucket with versioning 154 | } 155 | if e.response["Error"]["Code"] in expected_errors: 156 | return "" 157 | else: 158 | raise 159 | for tag in tags: 160 | if tag["Key"] == "md5": 161 | return tag["Value"] 162 | return "" 163 | 164 | 165 | def time_from_s3(s3_client, bucket, key): 166 | try: 167 | time = s3_client.head_object(Bucket=bucket, Key=key)["LastModified"] 168 | except botocore.exceptions.ClientError as e: 169 | expected_errors = {"404", "AccessDenied", "NoSuchKey"} 170 | if e.response["Error"]["Code"] in expected_errors: 171 | return datetime.datetime.fromtimestamp(0, utc) 172 | else: 173 | raise 174 | return time 175 | 176 | 177 | # Turn ClamAV Scan output into a JSON formatted data object 178 | def scan_output_to_json(output): 179 | summary = {} 180 | for line in output.split("\n"): 181 | if ":" in line: 182 | key, value = line.split(":", 1) 183 | summary[key] = value.strip() 184 | return summary 185 | 186 | 187 | def scan_file(path): 188 | av_env = os.environ.copy() 189 | av_env["LD_LIBRARY_PATH"] = CLAMAVLIB_PATH 190 | print("Starting clamscan of %s." % path) 191 | av_proc = subprocess.Popen( 192 | [CLAMSCAN_PATH, "-v", "-a", "--stdout", "-d", AV_DEFINITION_PATH, path], 193 | stderr=subprocess.STDOUT, 194 | stdout=subprocess.PIPE, 195 | env=av_env, 196 | ) 197 | output = av_proc.communicate()[0].decode() 198 | print("clamscan output:\n%s" % output) 199 | 200 | # Turn the output into a data source we can read 201 | summary = scan_output_to_json(output) 202 | if av_proc.returncode == 0: 203 | return AV_STATUS_CLEAN, AV_SIGNATURE_OK 204 | elif av_proc.returncode == 1: 205 | signature = summary.get(path, AV_SIGNATURE_UNKNOWN) 206 | return AV_STATUS_INFECTED, signature 207 | else: 208 | msg = "Unexpected exit code from clamscan: %s.\n" % av_proc.returncode 209 | print(msg) 210 | raise Exception(msg) 211 | -------------------------------------------------------------------------------- /clamav_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Upside Travel, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import datetime 17 | import os 18 | import re 19 | import textwrap 20 | import unittest 21 | 22 | import boto3 23 | import botocore.session 24 | from botocore.stub import Stubber 25 | import mock 26 | 27 | from clamav import RE_SEARCH_DIR 28 | from clamav import scan_output_to_json 29 | from clamav import md5_from_s3_tags 30 | from clamav import time_from_s3 31 | from clamav import update_defs_from_s3 32 | from common import AV_DEFINITION_FILE_PREFIXES 33 | from common import AV_DEFINITION_FILE_SUFFIXES 34 | from common import AV_DEFINITION_S3_PREFIX 35 | from common import AV_SIGNATURE_OK 36 | 37 | 38 | class TestClamAV(unittest.TestCase): 39 | def setUp(self): 40 | # Common data 41 | self.s3_bucket_name = "test_bucket" 42 | self.s3_key_name = "test_key" 43 | 44 | # Clients and Resources 45 | self.s3 = boto3.resource("s3") 46 | self.s3_client = botocore.session.get_session().create_client("s3") 47 | self.sns_client = botocore.session.get_session().create_client( 48 | "sns", region_name="us-west-2" 49 | ) 50 | 51 | def test_current_library_search_path(self): 52 | # Calling `ld --verbose` returns a lot of text but the line to check is this one: 53 | search_path = """SEARCH_DIR("=/usr/x86_64-redhat-linux/lib64"); SEARCH_DIR("=/usr/lib64"); SEARCH_DIR("=/usr/local/lib64"); SEARCH_DIR("=/lib64"); SEARCH_DIR("=/usr/x86_64-redhat-linux/lib"); SEARCH_DIR("=/usr/local/lib"); SEARCH_DIR("=/lib"); SEARCH_DIR("=/usr/lib");""" # noqa 54 | rd_ld = re.compile(RE_SEARCH_DIR) 55 | all_search_paths = rd_ld.findall(search_path) 56 | expected_search_paths = [ 57 | "/usr/x86_64-redhat-linux/lib64", 58 | "/usr/lib64", 59 | "/usr/local/lib64", 60 | "/lib64", 61 | "/usr/x86_64-redhat-linux/lib", 62 | "/usr/local/lib", 63 | "/lib", 64 | "/usr/lib", 65 | ] 66 | self.assertEqual(all_search_paths, expected_search_paths) 67 | 68 | def test_scan_output_to_json_clean(self): 69 | file_path = "/tmp/test.txt" 70 | signature = AV_SIGNATURE_OK 71 | output = textwrap.dedent( 72 | """\ 73 | Scanning {0} 74 | {0}: {1} 75 | ----------- SCAN SUMMARY ----------- 76 | Known viruses: 6305127 77 | Engine version: 0.101.4 78 | Scanned directories: 0 79 | Scanned files: 1 80 | Infected files: 0 81 | Data scanned: 0.00 MB 82 | Data read: 0.00 MB (ratio 0.00:1) 83 | Time: 80.299 sec (1 m 20 s) 84 | """.format( 85 | file_path, signature 86 | ) 87 | ) 88 | summary = scan_output_to_json(output) 89 | self.assertEqual(summary[file_path], signature) 90 | self.assertEqual(summary["Infected files"], "0") 91 | 92 | def test_scan_output_to_json_infected(self): 93 | file_path = "/tmp/eicar.com.txt" 94 | signature = "Eicar-Test-Signature FOUND" 95 | output = textwrap.dedent( 96 | """\ 97 | Scanning {0} 98 | {0}: {1} 99 | {0}!(0): {1} 100 | ----------- SCAN SUMMARY ----------- 101 | Known viruses: 6305127 102 | Engine version: 0.101.4 103 | Scanned directories: 0 104 | Scanned files: 1 105 | Infected files: 1 106 | Data scanned: 0.00 MB 107 | Data read: 0.00 MB (ratio 0.00:1) 108 | Time: 80.299 sec (1 m 20 s) 109 | """.format( 110 | file_path, signature 111 | ) 112 | ) 113 | summary = scan_output_to_json(output) 114 | self.assertEqual(summary[file_path], signature) 115 | self.assertEqual(summary["Infected files"], "1") 116 | 117 | def test_md5_from_s3_tags_no_md5(self): 118 | tag_set = {"TagSet": []} 119 | 120 | s3_stubber = Stubber(self.s3_client) 121 | get_object_tagging_response = tag_set 122 | get_object_tagging_expected_params = { 123 | "Bucket": self.s3_bucket_name, 124 | "Key": self.s3_key_name, 125 | } 126 | s3_stubber.add_response( 127 | "get_object_tagging", 128 | get_object_tagging_response, 129 | get_object_tagging_expected_params, 130 | ) 131 | with s3_stubber: 132 | md5_hash = md5_from_s3_tags( 133 | self.s3_client, self.s3_bucket_name, self.s3_key_name 134 | ) 135 | self.assertEquals("", md5_hash) 136 | 137 | def test_md5_from_s3_tags_has_md5(self): 138 | expected_md5_hash = "d41d8cd98f00b204e9800998ecf8427e" 139 | tag_set = {"TagSet": [{"Key": "md5", "Value": expected_md5_hash}]} 140 | 141 | s3_stubber = Stubber(self.s3_client) 142 | get_object_tagging_response = tag_set 143 | get_object_tagging_expected_params = { 144 | "Bucket": self.s3_bucket_name, 145 | "Key": self.s3_key_name, 146 | } 147 | s3_stubber.add_response( 148 | "get_object_tagging", 149 | get_object_tagging_response, 150 | get_object_tagging_expected_params, 151 | ) 152 | with s3_stubber: 153 | md5_hash = md5_from_s3_tags( 154 | self.s3_client, self.s3_bucket_name, self.s3_key_name 155 | ) 156 | self.assertEquals(expected_md5_hash, md5_hash) 157 | 158 | def test_time_from_s3(self): 159 | 160 | expected_s3_time = datetime.datetime(2019, 1, 1) 161 | 162 | s3_stubber = Stubber(self.s3_client) 163 | head_object_response = {"LastModified": expected_s3_time} 164 | head_object_expected_params = { 165 | "Bucket": self.s3_bucket_name, 166 | "Key": self.s3_key_name, 167 | } 168 | s3_stubber.add_response( 169 | "head_object", head_object_response, head_object_expected_params 170 | ) 171 | with s3_stubber: 172 | s3_time = time_from_s3( 173 | self.s3_client, self.s3_bucket_name, self.s3_key_name 174 | ) 175 | self.assertEquals(expected_s3_time, s3_time) 176 | 177 | @mock.patch("clamav.md5_from_file") 178 | @mock.patch("common.os.path.exists") 179 | def test_update_defs_from_s3(self, mock_exists, mock_md5_from_file): 180 | expected_md5_hash = "d41d8cd98f00b204e9800998ecf8427e" 181 | different_md5_hash = "d41d8cd98f00b204e9800998ecf8427f" 182 | 183 | mock_md5_from_file.return_value = different_md5_hash 184 | 185 | tag_set = {"TagSet": [{"Key": "md5", "Value": expected_md5_hash}]} 186 | expected_s3_time = datetime.datetime(2019, 1, 1) 187 | 188 | s3_stubber = Stubber(self.s3_client) 189 | 190 | key_names = [] 191 | side_effect = [] 192 | for file_prefix in AV_DEFINITION_FILE_PREFIXES: 193 | for file_suffix in AV_DEFINITION_FILE_SUFFIXES: 194 | side_effect.extend([True, True]) 195 | filename = file_prefix + "." + file_suffix 196 | key_names.append(os.path.join(AV_DEFINITION_S3_PREFIX, filename)) 197 | mock_exists.side_effect = side_effect 198 | 199 | for s3_key_name in key_names: 200 | get_object_tagging_response = tag_set 201 | get_object_tagging_expected_params = { 202 | "Bucket": self.s3_bucket_name, 203 | "Key": s3_key_name, 204 | } 205 | s3_stubber.add_response( 206 | "get_object_tagging", 207 | get_object_tagging_response, 208 | get_object_tagging_expected_params, 209 | ) 210 | head_object_response = {"LastModified": expected_s3_time} 211 | head_object_expected_params = { 212 | "Bucket": self.s3_bucket_name, 213 | "Key": s3_key_name, 214 | } 215 | s3_stubber.add_response( 216 | "head_object", head_object_response, head_object_expected_params 217 | ) 218 | 219 | expected_to_download = { 220 | "bytecode": { 221 | "local_path": "/tmp/clamav_defs/bytecode.cvd", 222 | "s3_path": "clamav_defs/bytecode.cvd", 223 | }, 224 | "daily": { 225 | "local_path": "/tmp/clamav_defs/daily.cvd", 226 | "s3_path": "clamav_defs/daily.cvd", 227 | }, 228 | "main": { 229 | "local_path": "/tmp/clamav_defs/main.cvd", 230 | "s3_path": "clamav_defs/main.cvd", 231 | }, 232 | } 233 | with s3_stubber: 234 | to_download = update_defs_from_s3( 235 | self.s3_client, self.s3_bucket_name, AV_DEFINITION_S3_PREFIX 236 | ) 237 | self.assertEquals(expected_to_download, to_download) 238 | 239 | @mock.patch("clamav.md5_from_file") 240 | @mock.patch("common.os.path.exists") 241 | def test_update_defs_from_s3_same_hash(self, mock_exists, mock_md5_from_file): 242 | expected_md5_hash = "d41d8cd98f00b204e9800998ecf8427e" 243 | different_md5_hash = expected_md5_hash 244 | 245 | mock_md5_from_file.return_value = different_md5_hash 246 | 247 | tag_set = {"TagSet": [{"Key": "md5", "Value": expected_md5_hash}]} 248 | expected_s3_time = datetime.datetime(2019, 1, 1) 249 | 250 | s3_stubber = Stubber(self.s3_client) 251 | 252 | key_names = [] 253 | side_effect = [] 254 | for file_prefix in AV_DEFINITION_FILE_PREFIXES: 255 | for file_suffix in AV_DEFINITION_FILE_SUFFIXES: 256 | side_effect.extend([True, True]) 257 | filename = file_prefix + "." + file_suffix 258 | key_names.append(os.path.join(AV_DEFINITION_S3_PREFIX, filename)) 259 | mock_exists.side_effect = side_effect 260 | 261 | for s3_key_name in key_names: 262 | get_object_tagging_response = tag_set 263 | get_object_tagging_expected_params = { 264 | "Bucket": self.s3_bucket_name, 265 | "Key": s3_key_name, 266 | } 267 | s3_stubber.add_response( 268 | "get_object_tagging", 269 | get_object_tagging_response, 270 | get_object_tagging_expected_params, 271 | ) 272 | head_object_response = {"LastModified": expected_s3_time} 273 | head_object_expected_params = { 274 | "Bucket": self.s3_bucket_name, 275 | "Key": s3_key_name, 276 | } 277 | s3_stubber.add_response( 278 | "head_object", head_object_response, head_object_expected_params 279 | ) 280 | 281 | expected_to_download = {} 282 | with s3_stubber: 283 | to_download = update_defs_from_s3( 284 | self.s3_client, self.s3_bucket_name, AV_DEFINITION_S3_PREFIX 285 | ) 286 | self.assertEquals(expected_to_download, to_download) 287 | 288 | @mock.patch("clamav.md5_from_file") 289 | @mock.patch("common.os.path.exists") 290 | def test_update_defs_from_s3_old_files(self, mock_exists, mock_md5_from_file): 291 | expected_md5_hash = "d41d8cd98f00b204e9800998ecf8427e" 292 | different_md5_hash = "d41d8cd98f00b204e9800998ecf8427f" 293 | 294 | mock_md5_from_file.return_value = different_md5_hash 295 | 296 | tag_set = {"TagSet": [{"Key": "md5", "Value": expected_md5_hash}]} 297 | expected_s3_time = datetime.datetime(2019, 1, 1) 298 | 299 | s3_stubber = Stubber(self.s3_client) 300 | 301 | key_names = [] 302 | side_effect = [] 303 | for file_prefix in AV_DEFINITION_FILE_PREFIXES: 304 | for file_suffix in AV_DEFINITION_FILE_SUFFIXES: 305 | side_effect.extend([True, True]) 306 | filename = file_prefix + "." + file_suffix 307 | key_names.append(os.path.join(AV_DEFINITION_S3_PREFIX, filename)) 308 | mock_exists.side_effect = side_effect 309 | 310 | count = 0 311 | for s3_key_name in key_names: 312 | get_object_tagging_response = tag_set 313 | get_object_tagging_expected_params = { 314 | "Bucket": self.s3_bucket_name, 315 | "Key": s3_key_name, 316 | } 317 | s3_stubber.add_response( 318 | "get_object_tagging", 319 | get_object_tagging_response, 320 | get_object_tagging_expected_params, 321 | ) 322 | head_object_response = { 323 | "LastModified": expected_s3_time - datetime.timedelta(hours=count) 324 | } 325 | head_object_expected_params = { 326 | "Bucket": self.s3_bucket_name, 327 | "Key": s3_key_name, 328 | } 329 | s3_stubber.add_response( 330 | "head_object", head_object_response, head_object_expected_params 331 | ) 332 | count += 1 333 | 334 | expected_to_download = { 335 | "bytecode": { 336 | "local_path": "/tmp/clamav_defs/bytecode.cld", 337 | "s3_path": "clamav_defs/bytecode.cld", 338 | }, 339 | "daily": { 340 | "local_path": "/tmp/clamav_defs/daily.cld", 341 | "s3_path": "clamav_defs/daily.cld", 342 | }, 343 | "main": { 344 | "local_path": "/tmp/clamav_defs/main.cld", 345 | "s3_path": "clamav_defs/main.cld", 346 | }, 347 | } 348 | with s3_stubber: 349 | to_download = update_defs_from_s3( 350 | self.s3_client, self.s3_bucket_name, AV_DEFINITION_S3_PREFIX 351 | ) 352 | self.assertEquals(expected_to_download, to_download) 353 | -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Upside Travel, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import errno 17 | import datetime 18 | import os 19 | import os.path 20 | 21 | AV_DEFINITION_S3_BUCKET = os.getenv("AV_DEFINITION_S3_BUCKET") 22 | AV_DEFINITION_S3_PREFIX = os.getenv("AV_DEFINITION_S3_PREFIX", "clamav_defs") 23 | AV_DEFINITION_PATH = os.getenv("AV_DEFINITION_PATH", "/tmp/clamav_defs") 24 | AV_SCAN_START_SNS_ARN = os.getenv("AV_SCAN_START_SNS_ARN") 25 | AV_SCAN_START_METADATA = os.getenv("AV_SCAN_START_METADATA", "av-scan-start") 26 | AV_SIGNATURE_METADATA = os.getenv("AV_SIGNATURE_METADATA", "av-signature") 27 | AV_SIGNATURE_OK = "OK" 28 | AV_SIGNATURE_UNKNOWN = "UNKNOWN" 29 | AV_STATUS_CLEAN = os.getenv("AV_STATUS_CLEAN", "CLEAN") 30 | AV_STATUS_INFECTED = os.getenv("AV_STATUS_INFECTED", "INFECTED") 31 | AV_STATUS_METADATA = os.getenv("AV_STATUS_METADATA", "av-status") 32 | AV_STATUS_SNS_ARN = os.getenv("AV_STATUS_SNS_ARN") 33 | AV_STATUS_SNS_PUBLISH_CLEAN = os.getenv("AV_STATUS_SNS_PUBLISH_CLEAN", "True") 34 | AV_STATUS_SNS_PUBLISH_INFECTED = os.getenv("AV_STATUS_SNS_PUBLISH_INFECTED", "True") 35 | AV_TIMESTAMP_METADATA = os.getenv("AV_TIMESTAMP_METADATA", "av-timestamp") 36 | CLAMAVLIB_PATH = os.getenv("CLAMAVLIB_PATH", "./bin") 37 | CLAMSCAN_PATH = os.getenv("CLAMSCAN_PATH", "./bin/clamscan") 38 | FRESHCLAM_PATH = os.getenv("FRESHCLAM_PATH", "./bin/freshclam") 39 | AV_PROCESS_ORIGINAL_VERSION_ONLY = os.getenv( 40 | "AV_PROCESS_ORIGINAL_VERSION_ONLY", "False" 41 | ) 42 | AV_DELETE_INFECTED_FILES = os.getenv("AV_DELETE_INFECTED_FILES", "False") 43 | 44 | AV_DEFINITION_FILE_PREFIXES = ["main", "daily", "bytecode"] 45 | AV_DEFINITION_FILE_SUFFIXES = ["cld", "cvd"] 46 | SNS_ENDPOINT = os.getenv("SNS_ENDPOINT", None) 47 | S3_ENDPOINT = os.getenv("S3_ENDPOINT", None) 48 | LAMBDA_ENDPOINT = os.getenv("LAMBDA_ENDPOINT", None) 49 | 50 | 51 | def create_dir(path): 52 | if not os.path.exists(path): 53 | try: 54 | print("Attempting to create directory %s.\n" % path) 55 | os.makedirs(path) 56 | except OSError as exc: 57 | if exc.errno != errno.EEXIST: 58 | raise 59 | 60 | 61 | def get_timestamp(): 62 | return datetime.datetime.utcnow().strftime("%Y/%m/%d %H:%M:%S UTC") 63 | -------------------------------------------------------------------------------- /common_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Upside Travel, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import errno 17 | import unittest 18 | 19 | import mock 20 | 21 | from common import create_dir 22 | 23 | 24 | class TestCommon(unittest.TestCase): 25 | @mock.patch("common.os.path") 26 | @mock.patch("common.os") 27 | def test_create_dir_already_exists(self, mock_os, mock_path): 28 | mock_path.exists.return_value = True 29 | create_dir("testpath") 30 | self.assertFalse( 31 | mock_os.makedirs.called, "Failed to not make directories if path present." 32 | ) 33 | 34 | @mock.patch("common.os.path") 35 | @mock.patch("common.os") 36 | def test_create_dir_doesnt_exist(self, mock_os, mock_path): 37 | mock_path.exists.return_value = False 38 | create_dir("testpath") 39 | self.assertTrue( 40 | mock_os.makedirs.called, "Failed to make directories if path not present." 41 | ) 42 | 43 | @mock.patch("common.os.path") 44 | @mock.patch("common.os") 45 | def test_create_dir_doesnt_exist_no_raises(self, mock_os, mock_path): 46 | mock_path.exists.return_value = False 47 | mock_os.makedirs.side_effect = OSError(errno.EEXIST, "exists") 48 | create_dir("testpath") 49 | self.assertTrue( 50 | mock_os.makedirs.called, "Failed to make directories if path not present." 51 | ) 52 | 53 | @mock.patch("common.os.path") 54 | @mock.patch("common.os") 55 | def test_create_dir_doesnt_exist_but_raises(self, mock_os, mock_path): 56 | mock_path.exists.return_value = False 57 | mock_os.makedirs.side_effect = OSError(errno.ENAMETOOLONG, "nametoolong") 58 | with self.assertRaises(OSError): 59 | create_dir("testpath") 60 | self.assertTrue( 61 | mock_os.makedirs.called, "Failed to make directories if path not present." 62 | ) 63 | -------------------------------------------------------------------------------- /deploy/cloudformation.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | AWSTemplateFormatVersion: '2010-09-09' 3 | 4 | Description: Bucket Antivirus Quickstart Template 5 | 6 | Parameters: 7 | 8 | AVBucketType: 9 | Type: String 10 | Description: Specifies if the bucket to hold the AV deinitions should be "public" or "private". Only choose "public" if other accounts need to access this bucket." 11 | Default: "private" 12 | AllowedValues: 13 | - "public" 14 | - "private" 15 | 16 | SourceBucket: 17 | Type: String 18 | Description: Name of the source bucket whose objects will be scanned. If more than one source bucket, the others will have to be manually added to the AV Scanner Policy after creation. 19 | Default: "" 20 | AllowedPattern : ".+" 21 | 22 | Conditions: 23 | publicBucket: !Equals [ !Ref AVBucketType, "public" ] 24 | 25 | Resources: 26 | 27 | S3BucketAVDefinitions: 28 | Type: AWS::S3::Bucket 29 | Properties: 30 | BucketName: !Join # Append the CloudFormation StackId for unique bucket naming 31 | - "-" 32 | - - "antivirus-definitions" 33 | - !Select 34 | - 0 35 | - !Split 36 | - "-" 37 | - !Select 38 | - 2 39 | - !Split 40 | - "/" 41 | - !Ref "AWS::StackId" 42 | BucketEncryption: 43 | ServerSideEncryptionConfiguration: 44 | - ServerSideEncryptionByDefault: 45 | SSEAlgorithm: AES256 46 | AccessControl: BucketOwnerFullControl 47 | PublicAccessBlockConfiguration: 48 | BlockPublicAcls: !If [ publicBucket, false, true ] 49 | BlockPublicPolicy: !If [ publicBucket, false, true ] 50 | IgnorePublicAcls: !If [ publicBucket, false, true ] 51 | RestrictPublicBuckets: !If [ publicBucket, false, true ] 52 | Tags: 53 | - Key: Service 54 | Value: bucket-antivirus 55 | VersioningConfiguration: 56 | Status: Suspended 57 | 58 | S3BucketPolicyAVDefinitions: 59 | Type: AWS::S3::BucketPolicy 60 | Condition: publicBucket 61 | Properties: 62 | Bucket: !Ref S3BucketAVDefinitions 63 | PolicyDocument: 64 | Statement: 65 | - Sid: AllowPublic 66 | Action: 67 | - s3:GetObject 68 | - s3:GetObjectTagging 69 | Effect: Allow 70 | Principal: 71 | AWS: 72 | - "*" 73 | Resource: 74 | - !Sub [ "arn:aws:s3:::${BucketName}/*", { BucketName: !Ref S3BucketAVDefinitions } ] 75 | 76 | IamRoleAVDefinitions: 77 | Type: 'AWS::IAM::Role' 78 | Properties: 79 | RoleName: AVDefinitionsLambdaRole 80 | AssumeRolePolicyDocument: 81 | Version: "2012-10-17" 82 | Statement: 83 | - Effect: Allow 84 | Principal: 85 | Service: 86 | - lambda.amazonaws.com 87 | Action: 88 | - 'sts:AssumeRole' 89 | Tags: 90 | - Key: Service 91 | Value: bucket-antivirus 92 | 93 | IamRoleAVScanner: 94 | Type: 'AWS::IAM::Role' 95 | Properties: 96 | RoleName: AVScannerLambdaRole 97 | AssumeRolePolicyDocument: 98 | Version: "2012-10-17" 99 | Statement: 100 | - Effect: Allow 101 | Principal: 102 | Service: 103 | - lambda.amazonaws.com 104 | Action: 105 | - 'sts:AssumeRole' 106 | Tags: 107 | - Key: Service 108 | Value: bucket-antivirus 109 | 110 | IamPolicyAVDefinitions: 111 | Type: AWS::IAM::Policy 112 | Properties: 113 | PolicyName: AVDefinitionsLambdaPolicy 114 | Roles: 115 | - !Ref IamRoleAVDefinitions 116 | PolicyDocument: 117 | Version: "2012-10-17" 118 | Statement: 119 | - Sid: WriteCloudWatchLogs 120 | Effect: Allow 121 | Action: 122 | - "logs:CreateLogGroup" 123 | - "logs:CreateLogStream" 124 | - "logs:PutLogEvents" 125 | Resource: "*" 126 | - Sid: S3GetAndPutWithTagging 127 | Effect: Allow 128 | Action: 129 | - "s3:GetObject" 130 | - "s3:GetObjectTagging" 131 | - "s3:PutObject" 132 | - "s3:PutObjectTagging" 133 | - "s3:PutObjectVersionTagging" 134 | Resource: 135 | - !Sub [ "arn:aws:s3:::${BucketName}/*", { BucketName: !Ref S3BucketAVDefinitions } ] 136 | - Sid: S3HeadObject 137 | Effect: Allow 138 | Action: 139 | - "s3:ListBucket" 140 | Resource: 141 | - !Sub [ "arn:aws:s3:::${BucketName}/*", { BucketName: !Ref S3BucketAVDefinitions } ] 142 | - !Sub [ "arn:aws:s3:::${BucketName}", { BucketName: !Ref S3BucketAVDefinitions } ] 143 | 144 | IamPolicyAVScanner: 145 | Type: AWS::IAM::Policy 146 | Properties: 147 | PolicyName: AVScannerLambdaPolicy 148 | Roles: 149 | - !Ref IamRoleAVScanner 150 | PolicyDocument: 151 | Version: "2012-10-17" 152 | Statement: 153 | - Sid: WriteCloudWatchLogs 154 | Effect: Allow 155 | Action: 156 | - "logs:CreateLogGroup" 157 | - "logs:CreateLogStream" 158 | - "logs:PutLogEvents" 159 | Resource: "*" 160 | - Sid: S3AVScan 161 | Effect: Allow 162 | Action: 163 | - "s3:GetObject" 164 | - "s3:GetObjectTagging" 165 | - "s3:GetObjectVersion" 166 | - "s3:PutObjectTagging" 167 | - "s3:PutObjectVersionTagging" 168 | Resource: 169 | - !Sub [ "arn:aws:s3:::${SourceBucketName}/*", { SourceBucketName: !Ref SourceBucket } ] 170 | - Sid: S3AVDefinitions 171 | Effect: Allow 172 | Action: 173 | - "s3:GetObject" 174 | - "s3:GetObjectTagging" 175 | Resource: 176 | - !Sub [ "arn:aws:s3:::${BucketName}/*", { BucketName: !Ref S3BucketAVDefinitions } ] 177 | - Sid: KmsDecrypt 178 | Effect: Allow 179 | Action: 180 | - "kms:Decrypt" 181 | Resource: 182 | - !Sub [ "arn:aws:s3:::${SourceBucketName}/*", { SourceBucketName: !Ref SourceBucket } ] 183 | - Sid: SNSPublic 184 | Effect: Allow 185 | Action: 186 | - "sns:Publish" 187 | Resource: 188 | - "arn:aws:sns:::" 189 | - "arn:aws:sns:::" 190 | - Sid: S3HeadObject 191 | Effect: Allow 192 | Action: 193 | - "s3:ListBucket" 194 | Resource: 195 | - !Sub [ "arn:aws:s3:::${BucketName}/*", { BucketName: !Ref S3BucketAVDefinitions } ] 196 | - !Sub [ "arn:aws:s3:::${BucketName}", { BucketName: !Ref S3BucketAVDefinitions } ] 197 | 198 | LambdaAVUpdateDefinitions: 199 | Type: AWS::Lambda::Function 200 | Properties: 201 | FunctionName: avUpdateDefinitions 202 | Description: LambdaFunction to update the AntiVirus definitions in the AV Definitions bucket. 203 | Runtime: python3.7 204 | Code: 205 | ZipFile: | 206 | import json 207 | def lambda_handler(event, context): 208 | return { 209 | 'statusCode': 200, 'body': json.dumps('Hello from Lambda!') 210 | } 211 | Handler: "update.lambda_handler" 212 | MemorySize: 1024 213 | Timeout: 300 214 | Role: !GetAtt [ IamRoleAVDefinitions, Arn ] 215 | Environment: 216 | Variables: 217 | AV_DEFINITION_S3_BUCKET: !Ref S3BucketAVDefinitions 218 | Tags: 219 | - Key: Service 220 | Value: bucket-antivirus 221 | 222 | LambdaAVUpdateDefinitionsSchedule: 223 | Type: "AWS::Events::Rule" 224 | DependsOn: 225 | - LambdaAVUpdateDefinitions 226 | Properties: 227 | Name: LambdaAVUpdateDefinitionsSchedule 228 | Description: A schedule for the AV Update Definitions Lambda function. 229 | ScheduleExpression: rate(3 hours) 230 | State: ENABLED 231 | Targets: 232 | - Arn: !Sub ${LambdaAVUpdateDefinitions.Arn} 233 | Id: LambdaAVUpdateDefinitionsSchedule 234 | 235 | LambdaAVUpdateDefinitionsSchedulePermission: 236 | Type: "AWS::Lambda::Permission" 237 | DependsOn: 238 | - LambdaAVUpdateDefinitionsSchedule 239 | Properties: 240 | Action: 'lambda:InvokeFunction' 241 | FunctionName: !Sub ${LambdaAVUpdateDefinitions.Arn} 242 | Principal: 'events.amazonaws.com' 243 | SourceArn: !Sub ${LambdaAVUpdateDefinitionsSchedule.Arn} 244 | 245 | LambdaAVScanner: 246 | Type: AWS::Lambda::Function 247 | Properties: 248 | FunctionName: avScanner 249 | Description: LambdaFunction to scan newly uploaded objects in S3. 250 | Runtime: python3.7 251 | Code: 252 | ZipFile: | 253 | import json 254 | def lambda_handler(event, context): 255 | return { 256 | 'statusCode': 200, 'body': json.dumps('Hello from Lambda!') 257 | } 258 | Handler: "scan.lambda_handler" 259 | MemorySize: 1500 260 | Timeout: 300 261 | Role: !GetAtt [ IamRoleAVScanner, Arn ] 262 | Environment: 263 | Variables: 264 | AV_DEFINITION_S3_BUCKET: !Ref S3BucketAVDefinitions 265 | Tags: 266 | - Key: Service 267 | Value: bucket-antivirus 268 | 269 | 270 | 271 | Outputs: 272 | 273 | S3BucketAvDefinitions: 274 | Value: !Ref S3BucketAVDefinitions 275 | Description: S3 Bucket for the AV Definitions 276 | 277 | LambdaAVUpdateDefinitions: 278 | Value: !Ref LambdaAVUpdateDefinitions 279 | Description: Lambda function to update the Antivirus Definitions in its respective bucket 280 | 281 | LambdaAVScanner: 282 | Value: !Ref LambdaAVScanner 283 | Description: Lambda function to scan newly created S3 objects 284 | 285 | IamRoleAVScanner: 286 | Value: !Ref IamRoleAVScanner 287 | Description: IAM Role used by the Lambda Scanner function. Edit its policy to add/change source S3 buckets, and also to enable SNS functionality if desired -------------------------------------------------------------------------------- /display_infected.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Upside Travel, Inc. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import argparse 19 | import sys 20 | 21 | import boto3 22 | 23 | from common import AV_SIGNATURE_METADATA, S3_ENDPOINT 24 | from common import AV_SIGNATURE_OK 25 | from common import AV_SIGNATURE_UNKNOWN 26 | from common import AV_STATUS_METADATA 27 | from common import AV_STATUS_CLEAN 28 | from common import AV_STATUS_INFECTED 29 | 30 | 31 | # Get all objects in an S3 bucket that are infected 32 | def get_objects_and_sigs(s3_client, s3_bucket_name): 33 | 34 | s3_object_list = [] 35 | 36 | s3_list_objects_result = {"IsTruncated": True} 37 | while s3_list_objects_result["IsTruncated"]: 38 | s3_list_objects_config = {"Bucket": s3_bucket_name} 39 | continuation_token = s3_list_objects_result.get("NextContinuationToken") 40 | if continuation_token: 41 | s3_list_objects_config["ContinuationToken"] = continuation_token 42 | s3_list_objects_result = s3_client.list_objects_v2(**s3_list_objects_config) 43 | if "Contents" not in s3_list_objects_result: 44 | break 45 | for key in s3_list_objects_result["Contents"]: 46 | key_name = key["Key"] 47 | # Include only infected objects 48 | infected, av_signature = object_infected( 49 | s3_client, s3_bucket_name, key_name 50 | ) 51 | if infected: 52 | s3_object_list.append((key_name, av_signature)) 53 | 54 | return s3_object_list 55 | 56 | 57 | # Determine if an object has been previously scanned for viruses 58 | def object_infected(s3_client, s3_bucket_name, key_name): 59 | s3_object_tags = s3_client.get_object_tagging(Bucket=s3_bucket_name, Key=key_name) 60 | if "TagSet" not in s3_object_tags: 61 | return False, None 62 | tags = {} 63 | for tag in s3_object_tags["TagSet"]: 64 | tags[tag["Key"]] = tag["Value"] 65 | 66 | if tags.get(AV_STATUS_METADATA, "") == AV_STATUS_CLEAN: 67 | return False, None 68 | 69 | if AV_SIGNATURE_METADATA in tags and tags[AV_SIGNATURE_METADATA] != AV_SIGNATURE_OK: 70 | return True, tags[AV_SIGNATURE_METADATA] 71 | 72 | if tags.get(AV_STATUS_METADATA, "") == AV_STATUS_INFECTED: 73 | return True, AV_SIGNATURE_UNKNOWN 74 | 75 | return False, None 76 | 77 | 78 | def main(s3_bucket_name): 79 | 80 | # Verify the S3 bucket exists 81 | s3_client = boto3.client("s3", endpoint_url=S3_ENDPOINT) 82 | try: 83 | s3_client.head_bucket(Bucket=s3_bucket_name) 84 | except Exception: 85 | print("S3 Bucket '{}' does not exist".format(s3_bucket_name)) 86 | sys.exit(1) 87 | 88 | # Scan the objects in the bucket 89 | s3_object_and_sigs_list = get_objects_and_sigs(s3_client, s3_bucket_name) 90 | for (key_name, av_signature) in s3_object_and_sigs_list: 91 | print("Infected: {}/{}, {}".format(s3_bucket_name, key_name, av_signature)) 92 | 93 | 94 | if __name__ == "__main__": 95 | parser = argparse.ArgumentParser( 96 | description="Scan an S3 bucket for infected files." 97 | ) 98 | parser.add_argument( 99 | "--s3-bucket-name", required=True, help="The name of the S3 bucket to scan" 100 | ) 101 | args = parser.parse_args() 102 | 103 | main(args.s3_bucket_name) 104 | -------------------------------------------------------------------------------- /display_infected_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Upside Travel, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import datetime 17 | import unittest 18 | 19 | import botocore.session 20 | from botocore.stub import Stubber 21 | 22 | from common import AV_SIGNATURE_METADATA 23 | from common import AV_SIGNATURE_OK 24 | from common import AV_SIGNATURE_UNKNOWN 25 | from common import AV_STATUS_CLEAN 26 | from common import AV_STATUS_METADATA 27 | from common import AV_STATUS_INFECTED 28 | from display_infected import get_objects_and_sigs 29 | 30 | 31 | class TestDisplayInfected(unittest.TestCase): 32 | def setUp(self): 33 | self.s3_bucket_name = "test_bucket" 34 | self.s3_client = botocore.session.get_session().create_client("s3") 35 | self.stubber = Stubber(self.s3_client) 36 | 37 | list_objects_v2_response = { 38 | "IsTruncated": False, 39 | "Contents": [ 40 | { 41 | "Key": "test.txt", 42 | "LastModified": datetime.datetime(2015, 1, 1), 43 | "ETag": '"abc123"', 44 | "Size": 123, 45 | "StorageClass": "STANDARD", 46 | "Owner": {"DisplayName": "myname", "ID": "abc123"}, 47 | } 48 | ], 49 | "Name": self.s3_bucket_name, 50 | "Prefix": "", 51 | "MaxKeys": 1000, 52 | "EncodingType": "url", 53 | } 54 | list_objects_v2_expected_params = {"Bucket": self.s3_bucket_name} 55 | self.stubber.add_response( 56 | "list_objects_v2", list_objects_v2_response, list_objects_v2_expected_params 57 | ) 58 | 59 | def test_get_objects_and_sigs_infected_with_sig_unknown(self): 60 | signature = AV_SIGNATURE_UNKNOWN 61 | 62 | get_object_tagging_response = { 63 | "VersionId": "abc123", 64 | "TagSet": [{"Key": AV_STATUS_METADATA, "Value": AV_STATUS_INFECTED}], 65 | } 66 | get_object_tagging_expected_params = { 67 | "Bucket": self.s3_bucket_name, 68 | "Key": "test.txt", 69 | } 70 | self.stubber.add_response( 71 | "get_object_tagging", 72 | get_object_tagging_response, 73 | get_object_tagging_expected_params, 74 | ) 75 | 76 | with self.stubber: 77 | s3_object_list = get_objects_and_sigs(self.s3_client, self.s3_bucket_name) 78 | expected_object_list = [("test.txt", signature)] 79 | self.assertEqual(s3_object_list, expected_object_list) 80 | 81 | def test_get_objects_and_sigs_infected_with_sig(self): 82 | signature = "Eicar-Test-Signature FOUND" 83 | 84 | get_object_tagging_response = { 85 | "VersionId": "abc123", 86 | "TagSet": [ 87 | {"Key": AV_STATUS_METADATA, "Value": AV_STATUS_INFECTED}, 88 | {"Key": AV_SIGNATURE_METADATA, "Value": signature}, 89 | ], 90 | } 91 | get_object_tagging_expected_params = { 92 | "Bucket": self.s3_bucket_name, 93 | "Key": "test.txt", 94 | } 95 | self.stubber.add_response( 96 | "get_object_tagging", 97 | get_object_tagging_response, 98 | get_object_tagging_expected_params, 99 | ) 100 | 101 | with self.stubber: 102 | s3_object_list = get_objects_and_sigs(self.s3_client, self.s3_bucket_name) 103 | expected_object_list = [("test.txt", signature)] 104 | self.assertEqual(s3_object_list, expected_object_list) 105 | 106 | def test_get_objects_and_sigs_infected_with_sig_ok(self): 107 | signature = AV_SIGNATURE_OK 108 | 109 | get_object_tagging_response = { 110 | "VersionId": "abc123", 111 | "TagSet": [ 112 | {"Key": AV_STATUS_METADATA, "Value": AV_STATUS_CLEAN}, 113 | {"Key": AV_SIGNATURE_METADATA, "Value": signature}, 114 | ], 115 | } 116 | get_object_tagging_expected_params = { 117 | "Bucket": self.s3_bucket_name, 118 | "Key": "test.txt", 119 | } 120 | self.stubber.add_response( 121 | "get_object_tagging", 122 | get_object_tagging_response, 123 | get_object_tagging_expected_params, 124 | ) 125 | 126 | with self.stubber: 127 | s3_object_list = get_objects_and_sigs(self.s3_client, self.s3_bucket_name) 128 | expected_object_list = [] 129 | self.assertEqual(s3_object_list, expected_object_list) 130 | 131 | def test_get_objects_and_sigs_clean(self): 132 | 133 | get_object_tagging_response = { 134 | "VersionId": "abc123", 135 | "TagSet": [{"Key": AV_STATUS_METADATA, "Value": AV_STATUS_CLEAN}], 136 | } 137 | get_object_tagging_expected_params = { 138 | "Bucket": self.s3_bucket_name, 139 | "Key": "test.txt", 140 | } 141 | self.stubber.add_response( 142 | "get_object_tagging", 143 | get_object_tagging_response, 144 | get_object_tagging_expected_params, 145 | ) 146 | 147 | with self.stubber: 148 | s3_object_list = get_objects_and_sigs(self.s3_client, self.s3_bucket_name) 149 | expected_object_list = [] 150 | self.assertEqual(s3_object_list, expected_object_list) 151 | 152 | def test_get_objects_and_sigs_unscanned(self): 153 | 154 | get_object_tagging_response = {"VersionId": "abc123", "TagSet": []} 155 | get_object_tagging_expected_params = { 156 | "Bucket": self.s3_bucket_name, 157 | "Key": "test.txt", 158 | } 159 | self.stubber.add_response( 160 | "get_object_tagging", 161 | get_object_tagging_response, 162 | get_object_tagging_expected_params, 163 | ) 164 | 165 | with self.stubber: 166 | s3_object_list = get_objects_and_sigs(self.s3_client, self.s3_bucket_name) 167 | expected_object_list = [] 168 | self.assertEqual(s3_object_list, expected_object_list) 169 | -------------------------------------------------------------------------------- /images/bucket-antivirus-function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bluesentry/bucket-antivirus-function/0e86c59ad259b266754f2647ed702fbcb9c216c4/images/bucket-antivirus-function.png -------------------------------------------------------------------------------- /images/s3-event.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bluesentry/bucket-antivirus-function/0e86c59ad259b266754f2647ed702fbcb9c216c4/images/s3-event.png -------------------------------------------------------------------------------- /metrics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Upside Travel, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import os 17 | 18 | import datadog 19 | from common import AV_STATUS_CLEAN 20 | from common import AV_STATUS_INFECTED 21 | 22 | 23 | def send(env, bucket, key, status): 24 | if "DATADOG_API_KEY" in os.environ: 25 | datadog.initialize() # by default uses DATADOG_API_KEY 26 | 27 | result_metric_name = "unknown" 28 | 29 | metric_tags = ["env:%s" % env, "bucket:%s" % bucket, "object:%s" % key] 30 | 31 | if status == AV_STATUS_CLEAN: 32 | result_metric_name = "clean" 33 | elif status == AV_STATUS_INFECTED: 34 | result_metric_name = "infected" 35 | datadog.api.Event.create( 36 | title="Infected S3 Object Found", 37 | text="Virus found in s3://%s/%s." % (bucket, key), 38 | tags=metric_tags, 39 | ) 40 | 41 | scanned_metric = { 42 | "metric": "s3_antivirus.scanned", 43 | "type": "counter", 44 | "points": 1, 45 | "tags": metric_tags, 46 | } 47 | result_metric = { 48 | "metric": "s3_antivirus.%s" % result_metric_name, 49 | "type": "counter", 50 | "points": 1, 51 | "tags": metric_tags, 52 | } 53 | print("Sending metrics to Datadog.") 54 | datadog.api.Metric.send([scanned_metric, result_metric]) 55 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # boto3 available by default in AWS but not locally 2 | boto3 3 | 4 | # Test requirements 5 | coverage 6 | mock==3.0.5 7 | nose 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2018.11.29 2 | chardet==3.0.4 3 | datadog==0.26.0 4 | decorator==4.3 5 | idna==2.8 6 | requests==2.21 7 | simplejson==3.16 8 | urllib3==1.24.2 9 | pytz==2019.3 10 | -------------------------------------------------------------------------------- /scan.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Upside Travel, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import copy 17 | import json 18 | import os 19 | from urllib.parse import unquote_plus 20 | from distutils.util import strtobool 21 | 22 | import boto3 23 | 24 | import clamav 25 | import metrics 26 | from common import AV_DEFINITION_S3_BUCKET 27 | from common import AV_DEFINITION_S3_PREFIX 28 | from common import AV_DELETE_INFECTED_FILES 29 | from common import AV_PROCESS_ORIGINAL_VERSION_ONLY 30 | from common import AV_SCAN_START_METADATA 31 | from common import AV_SCAN_START_SNS_ARN 32 | from common import AV_SIGNATURE_METADATA 33 | from common import AV_STATUS_CLEAN 34 | from common import AV_STATUS_INFECTED 35 | from common import AV_STATUS_METADATA 36 | from common import AV_STATUS_SNS_ARN 37 | from common import AV_STATUS_SNS_PUBLISH_CLEAN 38 | from common import AV_STATUS_SNS_PUBLISH_INFECTED 39 | from common import AV_TIMESTAMP_METADATA 40 | from common import SNS_ENDPOINT 41 | from common import S3_ENDPOINT 42 | from common import create_dir 43 | from common import get_timestamp 44 | 45 | 46 | def event_object(event, event_source="s3"): 47 | 48 | # SNS events are slightly different 49 | if event_source.upper() == "SNS": 50 | event = json.loads(event["Records"][0]["Sns"]["Message"]) 51 | 52 | # Break down the record 53 | records = event["Records"] 54 | if len(records) == 0: 55 | raise Exception("No records found in event!") 56 | record = records[0] 57 | 58 | s3_obj = record["s3"] 59 | 60 | # Get the bucket name 61 | if "bucket" not in s3_obj: 62 | raise Exception("No bucket found in event!") 63 | bucket_name = s3_obj["bucket"].get("name", None) 64 | 65 | # Get the key name 66 | if "object" not in s3_obj: 67 | raise Exception("No key found in event!") 68 | key_name = s3_obj["object"].get("key", None) 69 | 70 | if key_name: 71 | key_name = unquote_plus(key_name) 72 | 73 | # Ensure both bucket and key exist 74 | if (not bucket_name) or (not key_name): 75 | raise Exception("Unable to retrieve object from event.\n{}".format(event)) 76 | 77 | # Create and return the object 78 | s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) 79 | return s3.Object(bucket_name, key_name) 80 | 81 | 82 | def verify_s3_object_version(s3, s3_object): 83 | # validate that we only process the original version of a file, if asked to do so 84 | # security check to disallow processing of a new (possibly infected) object version 85 | # while a clean initial version is getting processed 86 | # downstream services may consume latest version by mistake and get the infected version instead 87 | bucket_versioning = s3.BucketVersioning(s3_object.bucket_name) 88 | if bucket_versioning.status == "Enabled": 89 | bucket = s3.Bucket(s3_object.bucket_name) 90 | versions = list(bucket.object_versions.filter(Prefix=s3_object.key)) 91 | if len(versions) > 1: 92 | raise Exception( 93 | "Detected multiple object versions in %s.%s, aborting processing" 94 | % (s3_object.bucket_name, s3_object.key) 95 | ) 96 | else: 97 | # misconfigured bucket, left with no or suspended versioning 98 | raise Exception( 99 | "Object versioning is not enabled in bucket %s" % s3_object.bucket_name 100 | ) 101 | 102 | 103 | def get_local_path(s3_object, local_prefix): 104 | return os.path.join(local_prefix, s3_object.bucket_name, s3_object.key) 105 | 106 | 107 | def delete_s3_object(s3_object): 108 | try: 109 | s3_object.delete() 110 | except Exception: 111 | raise Exception( 112 | "Failed to delete infected file: %s.%s" 113 | % (s3_object.bucket_name, s3_object.key) 114 | ) 115 | else: 116 | print("Infected file deleted: %s.%s" % (s3_object.bucket_name, s3_object.key)) 117 | 118 | 119 | def set_av_metadata(s3_object, scan_result, scan_signature, timestamp): 120 | content_type = s3_object.content_type 121 | metadata = s3_object.metadata 122 | metadata[AV_SIGNATURE_METADATA] = scan_signature 123 | metadata[AV_STATUS_METADATA] = scan_result 124 | metadata[AV_TIMESTAMP_METADATA] = timestamp 125 | s3_object.copy( 126 | {"Bucket": s3_object.bucket_name, "Key": s3_object.key}, 127 | ExtraArgs={ 128 | "ContentType": content_type, 129 | "Metadata": metadata, 130 | "MetadataDirective": "REPLACE", 131 | }, 132 | ) 133 | 134 | 135 | def set_av_tags(s3_client, s3_object, scan_result, scan_signature, timestamp): 136 | curr_tags = s3_client.get_object_tagging( 137 | Bucket=s3_object.bucket_name, Key=s3_object.key 138 | )["TagSet"] 139 | new_tags = copy.copy(curr_tags) 140 | for tag in curr_tags: 141 | if tag["Key"] in [ 142 | AV_SIGNATURE_METADATA, 143 | AV_STATUS_METADATA, 144 | AV_TIMESTAMP_METADATA, 145 | ]: 146 | new_tags.remove(tag) 147 | new_tags.append({"Key": AV_SIGNATURE_METADATA, "Value": scan_signature}) 148 | new_tags.append({"Key": AV_STATUS_METADATA, "Value": scan_result}) 149 | new_tags.append({"Key": AV_TIMESTAMP_METADATA, "Value": timestamp}) 150 | s3_client.put_object_tagging( 151 | Bucket=s3_object.bucket_name, Key=s3_object.key, Tagging={"TagSet": new_tags} 152 | ) 153 | 154 | 155 | def sns_start_scan(sns_client, s3_object, scan_start_sns_arn, timestamp): 156 | message = { 157 | "bucket": s3_object.bucket_name, 158 | "key": s3_object.key, 159 | "version": s3_object.version_id, 160 | AV_SCAN_START_METADATA: True, 161 | AV_TIMESTAMP_METADATA: timestamp, 162 | } 163 | sns_client.publish( 164 | TargetArn=scan_start_sns_arn, 165 | Message=json.dumps({"default": json.dumps(message)}), 166 | MessageStructure="json", 167 | ) 168 | 169 | 170 | def sns_scan_results( 171 | sns_client, s3_object, sns_arn, scan_result, scan_signature, timestamp 172 | ): 173 | # Don't publish if scan_result is CLEAN and CLEAN results should not be published 174 | if scan_result == AV_STATUS_CLEAN and not str_to_bool(AV_STATUS_SNS_PUBLISH_CLEAN): 175 | return 176 | # Don't publish if scan_result is INFECTED and INFECTED results should not be published 177 | if scan_result == AV_STATUS_INFECTED and not str_to_bool( 178 | AV_STATUS_SNS_PUBLISH_INFECTED 179 | ): 180 | return 181 | message = { 182 | "bucket": s3_object.bucket_name, 183 | "key": s3_object.key, 184 | "version": s3_object.version_id, 185 | AV_SIGNATURE_METADATA: scan_signature, 186 | AV_STATUS_METADATA: scan_result, 187 | AV_TIMESTAMP_METADATA: get_timestamp(), 188 | } 189 | sns_client.publish( 190 | TargetArn=sns_arn, 191 | Message=json.dumps({"default": json.dumps(message)}), 192 | MessageStructure="json", 193 | MessageAttributes={ 194 | AV_STATUS_METADATA: {"DataType": "String", "StringValue": scan_result}, 195 | AV_SIGNATURE_METADATA: { 196 | "DataType": "String", 197 | "StringValue": scan_signature, 198 | }, 199 | }, 200 | ) 201 | 202 | 203 | def lambda_handler(event, context): 204 | s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) 205 | s3_client = boto3.client("s3", endpoint_url=S3_ENDPOINT) 206 | sns_client = boto3.client("sns", endpoint_url=SNS_ENDPOINT) 207 | 208 | # Get some environment variables 209 | ENV = os.getenv("ENV", "") 210 | EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3") 211 | 212 | start_time = get_timestamp() 213 | print("Script starting at %s\n" % (start_time)) 214 | s3_object = event_object(event, event_source=EVENT_SOURCE) 215 | 216 | if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY): 217 | verify_s3_object_version(s3, s3_object) 218 | 219 | # Publish the start time of the scan 220 | if AV_SCAN_START_SNS_ARN not in [None, ""]: 221 | start_scan_time = get_timestamp() 222 | sns_start_scan(sns_client, s3_object, AV_SCAN_START_SNS_ARN, start_scan_time) 223 | 224 | file_path = get_local_path(s3_object, "/tmp") 225 | create_dir(os.path.dirname(file_path)) 226 | s3_object.download_file(file_path) 227 | 228 | to_download = clamav.update_defs_from_s3( 229 | s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX 230 | ) 231 | 232 | for download in to_download.values(): 233 | s3_path = download["s3_path"] 234 | local_path = download["local_path"] 235 | print("Downloading definition file %s from s3://%s" % (local_path, s3_path)) 236 | s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) 237 | print("Downloading definition file %s complete!" % (local_path)) 238 | scan_result, scan_signature = clamav.scan_file(file_path) 239 | print( 240 | "Scan of s3://%s resulted in %s\n" 241 | % (os.path.join(s3_object.bucket_name, s3_object.key), scan_result) 242 | ) 243 | 244 | result_time = get_timestamp() 245 | # Set the properties on the object with the scan results 246 | if "AV_UPDATE_METADATA" in os.environ: 247 | set_av_metadata(s3_object, scan_result, scan_signature, result_time) 248 | set_av_tags(s3_client, s3_object, scan_result, scan_signature, result_time) 249 | 250 | # Publish the scan results 251 | if AV_STATUS_SNS_ARN not in [None, ""]: 252 | sns_scan_results( 253 | sns_client, 254 | s3_object, 255 | AV_STATUS_SNS_ARN, 256 | scan_result, 257 | scan_signature, 258 | result_time, 259 | ) 260 | 261 | metrics.send( 262 | env=ENV, bucket=s3_object.bucket_name, key=s3_object.key, status=scan_result 263 | ) 264 | # Delete downloaded file to free up room on re-usable lambda function container 265 | try: 266 | os.remove(file_path) 267 | except OSError: 268 | pass 269 | if str_to_bool(AV_DELETE_INFECTED_FILES) and scan_result == AV_STATUS_INFECTED: 270 | delete_s3_object(s3_object) 271 | stop_scan_time = get_timestamp() 272 | print("Script finished at %s\n" % stop_scan_time) 273 | 274 | 275 | def str_to_bool(s): 276 | return bool(strtobool(str(s))) 277 | -------------------------------------------------------------------------------- /scan_bucket.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Upside Travel, Inc. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import argparse 19 | import json 20 | import sys 21 | 22 | import boto3 23 | 24 | from common import AV_STATUS_METADATA, LAMBDA_ENDPOINT 25 | from common import AV_TIMESTAMP_METADATA 26 | from common import S3_ENDPOINT 27 | 28 | 29 | # Get all objects in an S3 bucket that have not been previously scanned 30 | def get_objects(s3_client, s3_bucket_name): 31 | 32 | s3_object_list = [] 33 | 34 | s3_list_objects_result = {"IsTruncated": True} 35 | while s3_list_objects_result["IsTruncated"]: 36 | s3_list_objects_config = {"Bucket": s3_bucket_name} 37 | continuation_token = s3_list_objects_result.get("NextContinuationToken") 38 | if continuation_token: 39 | s3_list_objects_config["ContinuationToken"] = continuation_token 40 | s3_list_objects_result = s3_client.list_objects_v2(**s3_list_objects_config) 41 | if "Contents" not in s3_list_objects_result: 42 | break 43 | for key in s3_list_objects_result["Contents"]: 44 | key_name = key["Key"] 45 | # Don't include objects that have been scanned 46 | if not object_previously_scanned(s3_client, s3_bucket_name, key_name): 47 | s3_object_list.append(key_name) 48 | 49 | return s3_object_list 50 | 51 | 52 | # Determine if an object has been previously scanned for viruses 53 | def object_previously_scanned(s3_client, s3_bucket_name, key_name): 54 | s3_object_tags = s3_client.get_object_tagging(Bucket=s3_bucket_name, Key=key_name) 55 | if "TagSet" not in s3_object_tags: 56 | return False 57 | for tag in s3_object_tags["TagSet"]: 58 | if tag["Key"] in [AV_STATUS_METADATA, AV_TIMESTAMP_METADATA]: 59 | return True 60 | return False 61 | 62 | 63 | # Scan an S3 object for viruses by invoking the lambda function 64 | # Skip any objects that have already been scanned 65 | def scan_object(lambda_client, lambda_function_name, s3_bucket_name, key_name): 66 | 67 | print("Scanning: {}/{}".format(s3_bucket_name, key_name)) 68 | s3_event = format_s3_event(s3_bucket_name, key_name) 69 | lambda_invoke_result = lambda_client.invoke( 70 | FunctionName=lambda_function_name, 71 | InvocationType="Event", 72 | Payload=json.dumps(s3_event), 73 | ) 74 | if lambda_invoke_result["ResponseMetadata"]["HTTPStatusCode"] != 202: 75 | print("Error invoking lambda: {}".format(lambda_invoke_result)) 76 | 77 | 78 | # Format an S3 Event to use when invoking the lambda function 79 | # https://docs.aws.amazon.com/AmazonS3/latest/dev/notification-content-structure.html 80 | def format_s3_event(s3_bucket_name, key_name): 81 | s3_event = { 82 | "Records": [ 83 | {"s3": {"bucket": {"name": s3_bucket_name}, "object": {"key": key_name}}} 84 | ] 85 | } 86 | return s3_event 87 | 88 | 89 | def main(lambda_function_name, s3_bucket_name, limit): 90 | # Verify the lambda exists 91 | lambda_client = boto3.client("lambda", endpoint_url=LAMBDA_ENDPOINT) 92 | try: 93 | lambda_client.get_function(FunctionName=lambda_function_name) 94 | except Exception: 95 | print("Lambda Function '{}' does not exist".format(lambda_function_name)) 96 | sys.exit(1) 97 | 98 | # Verify the S3 bucket exists 99 | s3_client = boto3.client("s3", endpoint_url=S3_ENDPOINT) 100 | try: 101 | s3_client.head_bucket(Bucket=s3_bucket_name) 102 | except Exception: 103 | print("S3 Bucket '{}' does not exist".format(s3_bucket_name)) 104 | sys.exit(1) 105 | 106 | # Scan the objects in the bucket 107 | s3_object_list = get_objects(s3_client, s3_bucket_name) 108 | if limit: 109 | s3_object_list = s3_object_list[: min(limit, len(s3_object_list))] 110 | for key_name in s3_object_list: 111 | scan_object(lambda_client, lambda_function_name, s3_bucket_name, key_name) 112 | 113 | 114 | if __name__ == "__main__": 115 | parser = argparse.ArgumentParser(description="Scan an S3 bucket for viruses.") 116 | parser.add_argument( 117 | "--lambda-function-name", 118 | required=True, 119 | help="The name of the lambda function to invoke", 120 | ) 121 | parser.add_argument( 122 | "--s3-bucket-name", required=True, help="The name of the S3 bucket to scan" 123 | ) 124 | parser.add_argument("--limit", type=int, help="The number of records to limit to") 125 | args = parser.parse_args() 126 | 127 | main(args.lambda_function_name, args.s3_bucket_name, args.limit) 128 | -------------------------------------------------------------------------------- /scan_bucket_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Upside Travel, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import datetime 17 | import unittest 18 | 19 | import botocore.session 20 | from botocore.stub import Stubber 21 | 22 | from common import AV_STATUS_INFECTED 23 | from common import AV_STATUS_METADATA 24 | from common import AV_TIMESTAMP_METADATA 25 | from common import get_timestamp 26 | from scan_bucket import get_objects 27 | from scan_bucket import format_s3_event 28 | 29 | 30 | class TestDisplayInfected(unittest.TestCase): 31 | def setUp(self): 32 | self.s3_bucket_name = "test_bucket" 33 | self.s3_client = botocore.session.get_session().create_client("s3") 34 | self.stubber = Stubber(self.s3_client) 35 | 36 | list_objects_v2_response = { 37 | "IsTruncated": False, 38 | "Contents": [ 39 | { 40 | "Key": "test.txt", 41 | "LastModified": datetime.datetime(2015, 1, 1), 42 | "ETag": '"abc123"', 43 | "Size": 123, 44 | "StorageClass": "STANDARD", 45 | "Owner": {"DisplayName": "myname", "ID": "abc123"}, 46 | } 47 | ], 48 | "Name": self.s3_bucket_name, 49 | "Prefix": "", 50 | "MaxKeys": 1000, 51 | "EncodingType": "url", 52 | } 53 | list_objects_v2_expected_params = {"Bucket": self.s3_bucket_name} 54 | self.stubber.add_response( 55 | "list_objects_v2", list_objects_v2_response, list_objects_v2_expected_params 56 | ) 57 | 58 | def test_get_objects_previously_scanned_status(self): 59 | 60 | get_object_tagging_response = { 61 | "VersionId": "abc123", 62 | "TagSet": [{"Key": AV_STATUS_METADATA, "Value": AV_STATUS_INFECTED}], 63 | } 64 | get_object_tagging_expected_params = { 65 | "Bucket": self.s3_bucket_name, 66 | "Key": "test.txt", 67 | } 68 | self.stubber.add_response( 69 | "get_object_tagging", 70 | get_object_tagging_response, 71 | get_object_tagging_expected_params, 72 | ) 73 | 74 | with self.stubber: 75 | s3_object_list = get_objects(self.s3_client, self.s3_bucket_name) 76 | expected_object_list = [] 77 | self.assertEqual(s3_object_list, expected_object_list) 78 | 79 | def test_get_objects_previously_scanned_timestamp(self): 80 | 81 | get_object_tagging_response = { 82 | "VersionId": "abc123", 83 | "TagSet": [{"Key": AV_TIMESTAMP_METADATA, "Value": get_timestamp()}], 84 | } 85 | get_object_tagging_expected_params = { 86 | "Bucket": self.s3_bucket_name, 87 | "Key": "test.txt", 88 | } 89 | self.stubber.add_response( 90 | "get_object_tagging", 91 | get_object_tagging_response, 92 | get_object_tagging_expected_params, 93 | ) 94 | 95 | with self.stubber: 96 | s3_object_list = get_objects(self.s3_client, self.s3_bucket_name) 97 | expected_object_list = [] 98 | self.assertEqual(s3_object_list, expected_object_list) 99 | 100 | def test_get_objects_unscanned(self): 101 | 102 | get_object_tagging_response = {"VersionId": "abc123", "TagSet": []} 103 | get_object_tagging_expected_params = { 104 | "Bucket": self.s3_bucket_name, 105 | "Key": "test.txt", 106 | } 107 | self.stubber.add_response( 108 | "get_object_tagging", 109 | get_object_tagging_response, 110 | get_object_tagging_expected_params, 111 | ) 112 | 113 | with self.stubber: 114 | s3_object_list = get_objects(self.s3_client, self.s3_bucket_name) 115 | expected_object_list = ["test.txt"] 116 | self.assertEqual(s3_object_list, expected_object_list) 117 | 118 | def test_format_s3_event(self): 119 | key_name = "key" 120 | s3_event = format_s3_event(self.s3_bucket_name, key_name) 121 | expected_s3_event = { 122 | "Records": [ 123 | { 124 | "s3": { 125 | "bucket": {"name": self.s3_bucket_name}, 126 | "object": {"key": key_name}, 127 | } 128 | } 129 | ] 130 | } 131 | self.assertEquals(s3_event, expected_s3_event) 132 | -------------------------------------------------------------------------------- /scan_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Upside Travel, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import datetime 17 | import json 18 | import unittest 19 | 20 | import boto3 21 | import botocore.session 22 | from botocore.stub import Stubber 23 | 24 | from common import AV_SCAN_START_METADATA 25 | from common import AV_SIGNATURE_METADATA 26 | from common import AV_SIGNATURE_OK 27 | from common import AV_STATUS_METADATA 28 | from common import AV_TIMESTAMP_METADATA 29 | from common import get_timestamp 30 | from scan import delete_s3_object 31 | from scan import event_object 32 | from scan import get_local_path 33 | from scan import set_av_metadata 34 | from scan import set_av_tags 35 | from scan import sns_start_scan 36 | from scan import sns_scan_results 37 | from scan import verify_s3_object_version 38 | 39 | 40 | class TestScan(unittest.TestCase): 41 | def setUp(self): 42 | # Common data 43 | self.s3_bucket_name = "test_bucket" 44 | self.s3_key_name = "test_key" 45 | 46 | # Clients and Resources 47 | self.s3 = boto3.resource("s3") 48 | self.s3_client = botocore.session.get_session().create_client("s3") 49 | self.sns_client = botocore.session.get_session().create_client( 50 | "sns", region_name="us-west-2" 51 | ) 52 | 53 | def test_sns_event_object(self): 54 | event = { 55 | "Records": [ 56 | { 57 | "s3": { 58 | "bucket": {"name": self.s3_bucket_name}, 59 | "object": {"key": self.s3_key_name}, 60 | } 61 | } 62 | ] 63 | } 64 | sns_event = {"Records": [{"Sns": {"Message": json.dumps(event)}}]} 65 | s3_obj = event_object(sns_event, event_source="sns") 66 | expected_s3_object = self.s3.Object(self.s3_bucket_name, self.s3_key_name) 67 | self.assertEquals(s3_obj, expected_s3_object) 68 | 69 | def test_s3_event_object(self): 70 | event = { 71 | "Records": [ 72 | { 73 | "s3": { 74 | "bucket": {"name": self.s3_bucket_name}, 75 | "object": {"key": self.s3_key_name}, 76 | } 77 | } 78 | ] 79 | } 80 | s3_obj = event_object(event) 81 | expected_s3_object = self.s3.Object(self.s3_bucket_name, self.s3_key_name) 82 | self.assertEquals(s3_obj, expected_s3_object) 83 | 84 | def test_s3_event_object_missing_bucket(self): 85 | event = {"Records": [{"s3": {"object": {"key": self.s3_key_name}}}]} 86 | with self.assertRaises(Exception) as cm: 87 | event_object(event) 88 | self.assertEquals(cm.exception.message, "No bucket found in event!") 89 | 90 | def test_s3_event_object_missing_key(self): 91 | event = {"Records": [{"s3": {"bucket": {"name": self.s3_bucket_name}}}]} 92 | with self.assertRaises(Exception) as cm: 93 | event_object(event) 94 | self.assertEquals(cm.exception.message, "No key found in event!") 95 | 96 | def test_s3_event_object_bucket_key_missing(self): 97 | event = {"Records": [{"s3": {"bucket": {}, "object": {}}}]} 98 | with self.assertRaises(Exception) as cm: 99 | event_object(event) 100 | self.assertEquals( 101 | cm.exception.message, 102 | "Unable to retrieve object from event.\n{}".format(event), 103 | ) 104 | 105 | def test_s3_event_object_no_records(self): 106 | event = {"Records": []} 107 | with self.assertRaises(Exception) as cm: 108 | event_object(event) 109 | self.assertEquals(cm.exception.message, "No records found in event!") 110 | 111 | def test_verify_s3_object_version(self): 112 | s3_obj = self.s3.Object(self.s3_bucket_name, self.s3_key_name) 113 | 114 | # Set up responses 115 | get_bucket_versioning_response = {"Status": "Enabled"} 116 | get_bucket_versioning_expected_params = {"Bucket": self.s3_bucket_name} 117 | s3_stubber_resource = Stubber(self.s3.meta.client) 118 | s3_stubber_resource.add_response( 119 | "get_bucket_versioning", 120 | get_bucket_versioning_response, 121 | get_bucket_versioning_expected_params, 122 | ) 123 | list_object_versions_response = { 124 | "Versions": [ 125 | { 126 | "ETag": "string", 127 | "Size": 123, 128 | "StorageClass": "STANDARD", 129 | "Key": "string", 130 | "VersionId": "string", 131 | "IsLatest": True, 132 | "LastModified": datetime.datetime(2015, 1, 1), 133 | "Owner": {"DisplayName": "string", "ID": "string"}, 134 | } 135 | ] 136 | } 137 | list_object_versions_expected_params = { 138 | "Bucket": self.s3_bucket_name, 139 | "Prefix": self.s3_key_name, 140 | } 141 | s3_stubber_resource.add_response( 142 | "list_object_versions", 143 | list_object_versions_response, 144 | list_object_versions_expected_params, 145 | ) 146 | try: 147 | with s3_stubber_resource: 148 | verify_s3_object_version(self.s3, s3_obj) 149 | except Exception as e: 150 | self.fail("verify_s3_object_version() raised Exception unexpectedly!") 151 | raise e 152 | 153 | def test_verify_s3_object_versioning_not_enabled(self): 154 | s3_obj = self.s3.Object(self.s3_bucket_name, self.s3_key_name) 155 | 156 | # Set up responses 157 | get_bucket_versioning_response = {"Status": "Disabled"} 158 | get_bucket_versioning_expected_params = {"Bucket": self.s3_bucket_name} 159 | s3_stubber_resource = Stubber(self.s3.meta.client) 160 | s3_stubber_resource.add_response( 161 | "get_bucket_versioning", 162 | get_bucket_versioning_response, 163 | get_bucket_versioning_expected_params, 164 | ) 165 | with self.assertRaises(Exception) as cm: 166 | with s3_stubber_resource: 167 | verify_s3_object_version(self.s3, s3_obj) 168 | self.assertEquals( 169 | cm.exception.message, 170 | "Object versioning is not enabled in bucket {}".format( 171 | self.s3_bucket_name 172 | ), 173 | ) 174 | 175 | def test_verify_s3_object_version_multiple_versions(self): 176 | s3_obj = self.s3.Object(self.s3_bucket_name, self.s3_key_name) 177 | 178 | # Set up responses 179 | get_bucket_versioning_response = {"Status": "Enabled"} 180 | get_bucket_versioning_expected_params = {"Bucket": self.s3_bucket_name} 181 | s3_stubber_resource = Stubber(self.s3.meta.client) 182 | s3_stubber_resource.add_response( 183 | "get_bucket_versioning", 184 | get_bucket_versioning_response, 185 | get_bucket_versioning_expected_params, 186 | ) 187 | list_object_versions_response = { 188 | "Versions": [ 189 | { 190 | "ETag": "string", 191 | "Size": 123, 192 | "StorageClass": "STANDARD", 193 | "Key": "string", 194 | "VersionId": "string", 195 | "IsLatest": True, 196 | "LastModified": datetime.datetime(2015, 1, 1), 197 | "Owner": {"DisplayName": "string", "ID": "string"}, 198 | }, 199 | { 200 | "ETag": "string", 201 | "Size": 123, 202 | "StorageClass": "STANDARD", 203 | "Key": "string", 204 | "VersionId": "string", 205 | "IsLatest": True, 206 | "LastModified": datetime.datetime(2015, 1, 1), 207 | "Owner": {"DisplayName": "string", "ID": "string"}, 208 | }, 209 | ] 210 | } 211 | list_object_versions_expected_params = { 212 | "Bucket": self.s3_bucket_name, 213 | "Prefix": self.s3_key_name, 214 | } 215 | s3_stubber_resource.add_response( 216 | "list_object_versions", 217 | list_object_versions_response, 218 | list_object_versions_expected_params, 219 | ) 220 | with self.assertRaises(Exception) as cm: 221 | with s3_stubber_resource: 222 | verify_s3_object_version(self.s3, s3_obj) 223 | self.assertEquals( 224 | cm.exception.message, 225 | "Detected multiple object versions in {}.{}, aborting processing".format( 226 | self.s3_bucket_name, self.s3_key_name 227 | ), 228 | ) 229 | 230 | def test_sns_start_scan(self): 231 | sns_stubber = Stubber(self.sns_client) 232 | s3_stubber_resource = Stubber(self.s3.meta.client) 233 | 234 | sns_arn = "some_arn" 235 | version_id = "version-id" 236 | timestamp = get_timestamp() 237 | message = { 238 | "bucket": self.s3_bucket_name, 239 | "key": self.s3_key_name, 240 | "version": version_id, 241 | AV_SCAN_START_METADATA: True, 242 | AV_TIMESTAMP_METADATA: timestamp, 243 | } 244 | publish_response = {"MessageId": "message_id"} 245 | publish_expected_params = { 246 | "TargetArn": sns_arn, 247 | "Message": json.dumps({"default": json.dumps(message)}), 248 | "MessageStructure": "json", 249 | } 250 | sns_stubber.add_response("publish", publish_response, publish_expected_params) 251 | 252 | head_object_response = {"VersionId": version_id} 253 | head_object_expected_params = { 254 | "Bucket": self.s3_bucket_name, 255 | "Key": self.s3_key_name, 256 | } 257 | s3_stubber_resource.add_response( 258 | "head_object", head_object_response, head_object_expected_params 259 | ) 260 | with sns_stubber, s3_stubber_resource: 261 | s3_obj = self.s3.Object(self.s3_bucket_name, self.s3_key_name) 262 | sns_start_scan(self.sns_client, s3_obj, sns_arn, timestamp) 263 | 264 | def test_get_local_path(self): 265 | local_prefix = "/tmp" 266 | 267 | s3_obj = self.s3.Object(self.s3_bucket_name, self.s3_key_name) 268 | file_path = get_local_path(s3_obj, local_prefix) 269 | expected_file_path = "/tmp/test_bucket/test_key" 270 | self.assertEquals(file_path, expected_file_path) 271 | 272 | def test_set_av_metadata(self): 273 | scan_result = "CLEAN" 274 | scan_signature = AV_SIGNATURE_OK 275 | timestamp = get_timestamp() 276 | 277 | s3_obj = self.s3.Object(self.s3_bucket_name, self.s3_key_name) 278 | s3_stubber_resource = Stubber(self.s3.meta.client) 279 | 280 | # First head call is done to get content type and meta data 281 | head_object_response = {"ContentType": "content", "Metadata": {}} 282 | head_object_expected_params = { 283 | "Bucket": self.s3_bucket_name, 284 | "Key": self.s3_key_name, 285 | } 286 | s3_stubber_resource.add_response( 287 | "head_object", head_object_response, head_object_expected_params 288 | ) 289 | 290 | # Next two calls are done when copy() is called 291 | head_object_response_2 = { 292 | "ContentType": "content", 293 | "Metadata": {}, 294 | "ContentLength": 200, 295 | } 296 | head_object_expected_params_2 = { 297 | "Bucket": self.s3_bucket_name, 298 | "Key": self.s3_key_name, 299 | } 300 | s3_stubber_resource.add_response( 301 | "head_object", head_object_response_2, head_object_expected_params_2 302 | ) 303 | copy_object_response = {"VersionId": "version_id"} 304 | copy_object_expected_params = { 305 | "Bucket": self.s3_bucket_name, 306 | "Key": self.s3_key_name, 307 | "ContentType": "content", 308 | "CopySource": {"Bucket": self.s3_bucket_name, "Key": self.s3_key_name}, 309 | "Metadata": { 310 | AV_SIGNATURE_METADATA: scan_signature, 311 | AV_STATUS_METADATA: scan_result, 312 | AV_TIMESTAMP_METADATA: timestamp, 313 | }, 314 | "MetadataDirective": "REPLACE", 315 | } 316 | s3_stubber_resource.add_response( 317 | "copy_object", copy_object_response, copy_object_expected_params 318 | ) 319 | 320 | with s3_stubber_resource: 321 | set_av_metadata(s3_obj, scan_result, scan_signature, timestamp) 322 | 323 | def test_set_av_tags(self): 324 | scan_result = "CLEAN" 325 | scan_signature = AV_SIGNATURE_OK 326 | timestamp = get_timestamp() 327 | tag_set = { 328 | "TagSet": [ 329 | {"Key": AV_SIGNATURE_METADATA, "Value": scan_signature}, 330 | {"Key": AV_STATUS_METADATA, "Value": scan_result}, 331 | {"Key": AV_TIMESTAMP_METADATA, "Value": timestamp}, 332 | ] 333 | } 334 | 335 | s3_stubber = Stubber(self.s3_client) 336 | get_object_tagging_response = tag_set 337 | get_object_tagging_expected_params = { 338 | "Bucket": self.s3_bucket_name, 339 | "Key": self.s3_key_name, 340 | } 341 | s3_stubber.add_response( 342 | "get_object_tagging", 343 | get_object_tagging_response, 344 | get_object_tagging_expected_params, 345 | ) 346 | put_object_tagging_response = {} 347 | put_object_tagging_expected_params = { 348 | "Bucket": self.s3_bucket_name, 349 | "Key": self.s3_key_name, 350 | "Tagging": tag_set, 351 | } 352 | s3_stubber.add_response( 353 | "put_object_tagging", 354 | put_object_tagging_response, 355 | put_object_tagging_expected_params, 356 | ) 357 | 358 | with s3_stubber: 359 | s3_obj = self.s3.Object(self.s3_bucket_name, self.s3_key_name) 360 | set_av_tags(self.s3_client, s3_obj, scan_result, scan_signature, timestamp) 361 | 362 | def test_sns_scan_results(self): 363 | sns_stubber = Stubber(self.sns_client) 364 | s3_stubber_resource = Stubber(self.s3.meta.client) 365 | 366 | sns_arn = "some_arn" 367 | version_id = "version-id" 368 | scan_result = "CLEAN" 369 | scan_signature = AV_SIGNATURE_OK 370 | timestamp = get_timestamp() 371 | message = { 372 | "bucket": self.s3_bucket_name, 373 | "key": self.s3_key_name, 374 | "version": version_id, 375 | AV_SIGNATURE_METADATA: scan_signature, 376 | AV_STATUS_METADATA: scan_result, 377 | AV_TIMESTAMP_METADATA: timestamp, 378 | } 379 | publish_response = {"MessageId": "message_id"} 380 | publish_expected_params = { 381 | "TargetArn": sns_arn, 382 | "Message": json.dumps({"default": json.dumps(message)}), 383 | "MessageAttributes": { 384 | "av-status": {"DataType": "String", "StringValue": scan_result}, 385 | "av-signature": {"DataType": "String", "StringValue": scan_signature}, 386 | }, 387 | "MessageStructure": "json", 388 | } 389 | sns_stubber.add_response("publish", publish_response, publish_expected_params) 390 | 391 | head_object_response = {"VersionId": version_id} 392 | head_object_expected_params = { 393 | "Bucket": self.s3_bucket_name, 394 | "Key": self.s3_key_name, 395 | } 396 | s3_stubber_resource.add_response( 397 | "head_object", head_object_response, head_object_expected_params 398 | ) 399 | with sns_stubber, s3_stubber_resource: 400 | s3_obj = self.s3.Object(self.s3_bucket_name, self.s3_key_name) 401 | sns_scan_results( 402 | self.sns_client, s3_obj, sns_arn, scan_result, scan_signature, timestamp 403 | ) 404 | 405 | def test_delete_s3_object(self): 406 | s3_stubber = Stubber(self.s3.meta.client) 407 | delete_object_response = {} 408 | delete_object_expected_params = { 409 | "Bucket": self.s3_bucket_name, 410 | "Key": self.s3_key_name, 411 | } 412 | s3_stubber.add_response( 413 | "delete_object", delete_object_response, delete_object_expected_params 414 | ) 415 | 416 | with s3_stubber: 417 | s3_obj = self.s3.Object(self.s3_bucket_name, self.s3_key_name) 418 | delete_s3_object(s3_obj) 419 | 420 | def test_delete_s3_object_exception(self): 421 | s3_stubber = Stubber(self.s3.meta.client) 422 | 423 | with self.assertRaises(Exception) as cm: 424 | with s3_stubber: 425 | s3_obj = self.s3.Object(self.s3_bucket_name, self.s3_key_name) 426 | delete_s3_object(s3_obj) 427 | self.assertEquals( 428 | cm.exception.message, 429 | "Failed to delete infected file: {}.{}".format( 430 | self.s3_bucket_name, self.s3_key_name 431 | ), 432 | ) 433 | -------------------------------------------------------------------------------- /scripts/run-scan-lambda: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | set -eu -o pipefail 4 | 5 | # 6 | # Run the scan.lambda_handler locally in a docker container 7 | # 8 | 9 | if [ $# -lt 2 ]; then 10 | echo 1>&2 "$0: not enough arguments. Please provide BUCKET and KEY" 11 | exit 1 12 | fi 13 | 14 | BUCKET=$1 15 | KEY=$2 16 | EVENT="{\"Records\": [{\"s3\": {\"bucket\": {\"name\": \"${BUCKET}\"}, \"object\": {\"key\": \"${KEY}\"}}}]}" 17 | echo "Sending S3 event: ${EVENT}" 18 | 19 | # Verify that the file exists first 20 | aws s3 ls "s3://${BUCKET}/${KEY}" 21 | 22 | rm -rf tmp/ 23 | unzip -qq -d ./tmp build/lambda.zip 24 | 25 | NAME="antivirus-scan" 26 | 27 | docker run --rm \ 28 | -v "$(pwd)/tmp/:/var/task" \ 29 | -e AV_DEFINITION_S3_BUCKET \ 30 | -e AV_DEFINITION_S3_PREFIX \ 31 | -e AV_DELETE_INFECTED_FILES \ 32 | -e AV_PROCESS_ORIGINAL_VERSION_ONLY \ 33 | -e AV_SCAN_START_METADATA \ 34 | -e AV_SCAN_START_SNS_ARN \ 35 | -e AV_SIGNATURE_METADATA \ 36 | -e AV_STATUS_CLEAN \ 37 | -e AV_STATUS_INFECTED \ 38 | -e AV_STATUS_METADATA \ 39 | -e AV_STATUS_SNS_ARN \ 40 | -e AV_STATUS_SNS_PUBLISH_CLEAN \ 41 | -e AV_STATUS_SNS_PUBLISH_INFECTED \ 42 | -e AV_TIMESTAMP_METADATA \ 43 | -e AWS_ACCESS_KEY_ID \ 44 | -e AWS_DEFAULT_REGION \ 45 | -e AWS_REGION \ 46 | -e AWS_SECRET_ACCESS_KEY \ 47 | -e AWS_SESSION_TOKEN \ 48 | --memory="${MEM}" \ 49 | --memory-swap="${MEM}" \ 50 | --cpus="${CPUS}" \ 51 | --name="${NAME}" \ 52 | lambci/lambda:python3.7 scan.lambda_handler "${EVENT}" 53 | -------------------------------------------------------------------------------- /scripts/run-update-lambda: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | set -eu -o pipefail 4 | 5 | # 6 | # Run the update.lambda_handler locally in a docker container 7 | # 8 | 9 | rm -rf tmp/ 10 | unzip -qq -d ./tmp build/lambda.zip 11 | 12 | NAME="antivirus-update" 13 | 14 | docker run --rm \ 15 | -v "$(pwd)/tmp/:/var/task" \ 16 | -e AV_DEFINITION_PATH \ 17 | -e AV_DEFINITION_S3_BUCKET \ 18 | -e AV_DEFINITION_S3_PREFIX \ 19 | -e AWS_ACCESS_KEY_ID \ 20 | -e AWS_DEFAULT_REGION \ 21 | -e AWS_REGION \ 22 | -e AWS_SECRET_ACCESS_KEY \ 23 | -e AWS_SESSION_TOKEN \ 24 | -e CLAMAVLIB_PATH \ 25 | --memory="${MEM}" \ 26 | --memory-swap="${MEM}" \ 27 | --cpus="${CPUS}" \ 28 | --name="${NAME}" \ 29 | lambci/lambda:python3.7 update.lambda_handler 30 | -------------------------------------------------------------------------------- /update.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Upside Travel, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import os 17 | 18 | import boto3 19 | 20 | import clamav 21 | from common import AV_DEFINITION_PATH 22 | from common import AV_DEFINITION_S3_BUCKET 23 | from common import AV_DEFINITION_S3_PREFIX 24 | from common import CLAMAVLIB_PATH 25 | from common import S3_ENDPOINT 26 | from common import get_timestamp 27 | 28 | 29 | def lambda_handler(event, context): 30 | s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) 31 | s3_client = boto3.client("s3", endpoint_url=S3_ENDPOINT) 32 | 33 | print("Script starting at %s\n" % (get_timestamp())) 34 | to_download = clamav.update_defs_from_s3( 35 | s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX 36 | ) 37 | 38 | for download in to_download.values(): 39 | s3_path = download["s3_path"] 40 | local_path = download["local_path"] 41 | print("Downloading definition file %s from s3://%s" % (local_path, s3_path)) 42 | s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) 43 | print("Downloading definition file %s complete!" % (local_path)) 44 | 45 | clamav.update_defs_from_freshclam(AV_DEFINITION_PATH, CLAMAVLIB_PATH) 46 | # If main.cvd gets updated (very rare), we will need to force freshclam 47 | # to download the compressed version to keep file sizes down. 48 | # The existence of main.cud is the trigger to know this has happened. 49 | if os.path.exists(os.path.join(AV_DEFINITION_PATH, "main.cud")): 50 | os.remove(os.path.join(AV_DEFINITION_PATH, "main.cud")) 51 | if os.path.exists(os.path.join(AV_DEFINITION_PATH, "main.cvd")): 52 | os.remove(os.path.join(AV_DEFINITION_PATH, "main.cvd")) 53 | clamav.update_defs_from_freshclam(AV_DEFINITION_PATH, CLAMAVLIB_PATH) 54 | clamav.upload_defs_to_s3( 55 | s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX, AV_DEFINITION_PATH 56 | ) 57 | print("Script finished at %s\n" % get_timestamp()) 58 | --------------------------------------------------------------------------------