├── __init__.py ├── dags ├── __init__.py └── rbac_dag │ ├── __init__.py │ ├── dag_config.py │ └── sample_dag_fine_grain_access.py ├── requirements.txt ├── plugins ├── __init__.py └── rbac_python_operator.py ├── poetry.lock ├── CODE_OF_CONDUCT.md ├── pyproject.toml ├── policy-docs ├── write_access_processed_bucket.json ├── write_access_published_bucket.json └── trust-policy.json ├── LICENSE ├── .gitignore ├── CONTRIBUTING.md └── README.md /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dags/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugins/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dags/rbac_dag/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | package = [] 2 | 3 | [metadata] 4 | lock-version = "1.1" 5 | python-versions = "^3.8" 6 | content-hash = "fafb334cb038533f851c23d0b63254223abf72ce4f02987e7064b0c95566699a" 7 | 8 | [metadata.files] 9 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "code" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Virendhar Sivaraman "] 6 | 7 | [tool.poetry.dependencies] 8 | python = "^3.8" 9 | 10 | [tool.poetry.dev-dependencies] 11 | 12 | [build-system] 13 | requires = ["poetry-core>=1.0.0"] 14 | build-backend = "poetry.core.masonry.api" 15 | -------------------------------------------------------------------------------- /policy-docs/write_access_processed_bucket.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "VisualEditor0", 6 | "Effect": "Allow", 7 | "Action": [ 8 | "s3:PutObject", 9 | "s3:DeleteObject" 10 | ], 11 | "Resource": "arn:aws:s3:::--mwaa-processed/*" 12 | } 13 | ] 14 | } -------------------------------------------------------------------------------- /policy-docs/write_access_published_bucket.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "VisualEditor0", 6 | "Effect": "Allow", 7 | "Action": [ 8 | "s3:PutObject", 9 | "s3:DeleteObject" 10 | ], 11 | "Resource": "arn:aws:s3:::--mwaa-published/*" 12 | } 13 | ] 14 | } -------------------------------------------------------------------------------- /policy-docs/trust-policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Principal": { 7 | "AWS": [ 8 | "arn:aws:iam:::assumed-role//AmazonMWAA-airflow" 9 | ], 10 | "Service": "s3.amazonaws.com" 11 | }, 12 | "Action": "sts:AssumeRole" 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 4 | software and associated documentation files (the "Software"), to deal in the Software 5 | without restriction, including without limitation the rights to use, copy, modify, 6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 7 | permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 10 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 11 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 12 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 13 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 14 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .DS_Store 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # PyBuilder 60 | .pybuilder/ 61 | target/ 62 | 63 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 64 | __pypackages__/ 65 | 66 | # Environments 67 | .env 68 | .venv 69 | env/ 70 | venv/ 71 | ENV/ 72 | env.bak/ 73 | venv.bak/ 74 | 75 | # mypy 76 | .mypy_cache/ 77 | .dmypy.json 78 | dmypy.json 79 | 80 | # Pyre type checker 81 | .pyre/ 82 | 83 | # pytype static type analyzer 84 | .pytype/ 85 | 86 | # Cython debug symbols 87 | cython_debug/ 88 | 89 | # poetry output 90 | **/requirements-poetry.txt 91 | 92 | # whl 93 | **/*.whl 94 | -------------------------------------------------------------------------------- /dags/rbac_dag/dag_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: MIT-0 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | software and associated documentation files (the "Software"), to deal in the Software 7 | without restriction, including without limitation the rights to use, copy, modify, 8 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | """ 18 | 19 | # DAG CONFIGURATIONS 20 | REGION = "us-west-2" # Update region 21 | ACCOUNT_ID = "123456789012" # Enter AWS Account Number here 22 | 23 | PROCESSED_S3_BUCKET = f"{ACCOUNT_ID}-{REGION}-mwaa-processed" 24 | PUBLISHED_S3_BUCKET = f"{ACCOUNT_ID}-{REGION}-mwaa-published" 25 | 26 | PROCESSED_IAM_ROLE = f"arn:aws:iam::{ACCOUNT_ID}:role/write_access_processed_bucket" 27 | PUBLISHED_IAM_ROLE = f"arn:aws:iam::{ACCOUNT_ID}:role/write_access_published_bucket" 28 | 29 | MOCK_TRANSFORMATION_IN_SECONDS = 10 30 | -------------------------------------------------------------------------------- /dags/rbac_dag/sample_dag_fine_grain_access.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: MIT-0 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | software and associated documentation files (the "Software"), to deal in the Software 7 | without restriction, including without limitation the rights to use, copy, modify, 8 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | """ 18 | 19 | from airflow import DAG 20 | from custom_operators.rbac_python_operator import RBACPythonOperator 21 | from airflow.utils.dates import days_ago 22 | 23 | import rbac_dag.dag_config as cf 24 | 25 | from datetime import datetime 26 | from time import sleep 27 | from io import BytesIO 28 | import json 29 | 30 | 31 | def task_process(*args, **kwargs): 32 | bucket = kwargs["bucket"] 33 | boto_session = kwargs["task_session"] 34 | 35 | # Sleep mocks a time taking process 36 | sleep(cf.MOCK_TRANSFORMATION_IN_SECONDS) 37 | 38 | now = datetime.now().strftime("%d/%m/%Y %H:%M:%S") 39 | 40 | if bucket == cf.PROCESSED_S3_BUCKET: 41 | s3_obj = {"processed_dt": now} 42 | obj_name = "processed" 43 | else: 44 | s3_obj = {"published_dt": now} 45 | obj_name = "published" 46 | 47 | s3 = boto_session.resource("s3") 48 | s3.Object(bucket_name=bucket, key=f"control_file/{obj_name}.json").put( 49 | Body=BytesIO(json.dumps(s3_obj).encode("utf-8")).getvalue() 50 | ) 51 | 52 | 53 | dag_default_args = { 54 | "owner": "airflow", 55 | "depends_on_past": False, 56 | "retries": 0, 57 | } 58 | 59 | dag = DAG( 60 | dag_id="rbac_task", 61 | description="Sample DAG with fine grain access for tasks", 62 | default_args=dag_default_args, 63 | start_date=days_ago(0), 64 | schedule_interval="@once", 65 | catchup=False, 66 | ) 67 | 68 | process_task = RBACPythonOperator( 69 | task_id="process_task", 70 | python_callable=task_process, 71 | provide_context=True, 72 | op_kwargs={"bucket": cf.PROCESSED_S3_BUCKET}, 73 | task_iam_role_arn=cf.PROCESSED_IAM_ROLE, 74 | dag=dag, 75 | ) 76 | 77 | publish_task = RBACPythonOperator( 78 | task_id="publish_task", 79 | python_callable=task_process, 80 | provide_context=True, 81 | op_kwargs={"bucket": cf.PUBLISHED_S3_BUCKET}, 82 | task_iam_role_arn=cf.PUBLISHED_IAM_ROLE, 83 | dag=dag, 84 | ) 85 | 86 | process_task >> publish_task 87 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /plugins/rbac_python_operator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | SPDX-License-Identifier: MIT-0 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | software and associated documentation files (the "Software"), to deal in the Software 7 | without restriction, including without limitation the rights to use, copy, modify, 8 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | """ 18 | 19 | import boto3 20 | from airflow.operators.python_operator import PythonOperator 21 | from airflow.utils.decorators import apply_defaults 22 | 23 | STS_TOKEN_VALIDITY = 900 24 | 25 | 26 | class RBACPythonOperator(PythonOperator): 27 | """ 28 | Operator creates a PythonOperator with role bases access control and returns the RBACPythonOperator 29 | :param / :type python_callable: Inherited, refer Airflow's PythonOperator Class 30 | :param / :type op_args: Inherited, refer Airflow's PythonOperator Class 31 | :param / :type op_kwargs: Inherited, efer Airflow's PythonOperator Class 32 | :param / :type provide_context: Inherited, refer Airflow's PythonOperator Class 33 | :param / :type templates_dict: Inherited, refer Airflow's PythonOperator Class 34 | :param / :type templates_exts: Inherited, refer Airflow's PythonOperator Class 35 | :param task_iam_role_arn: IAM role arn that will be associated during task execution 36 | :type task_iam_role_arn: str 37 | """ 38 | 39 | @apply_defaults 40 | def __init__( 41 | self, 42 | task_iam_role_arn, 43 | python_callable, 44 | op_args=None, 45 | op_kwargs=None, 46 | provide_context=False, 47 | templates_dict=None, 48 | templates_exts=None, 49 | *args, 50 | **kwargs 51 | ): 52 | super().__init__( 53 | python_callable=python_callable, 54 | op_args=op_args, 55 | op_kwargs=op_kwargs, 56 | templates_dict=templates_dict, 57 | templates_exts=templates_exts, 58 | provide_context=provide_context, 59 | *args, 60 | **kwargs 61 | ) 62 | self.task_iam_role_arn = task_iam_role_arn 63 | 64 | def execute(self, context): 65 | """Airflow PythonOperator Execute Method""" 66 | assumed_role_object = boto3.client("sts").assume_role( 67 | RoleArn=self.task_iam_role_arn, 68 | RoleSessionName="AssumeRoleSession", 69 | DurationSeconds=STS_TOKEN_VALIDITY, 70 | ) 71 | task_session = boto3.session.Session( 72 | aws_access_key_id=assumed_role_object["Credentials"]["AccessKeyId"], 73 | aws_secret_access_key=assumed_role_object["Credentials"]["SecretAccessKey"], 74 | aws_session_token=assumed_role_object["Credentials"]["SessionToken"], 75 | ) 76 | self.op_kwargs["task_session"] = task_session 77 | super(RBACPythonOperator, self).execute(context) 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bolster security with role-based access control in Amazon MWAA 2 | 3 | ## Overview 4 | The code provided with the repository is a reference [Apache Airflow](https://airflow.apache.org/) Directed Acyclic Graph(DAG) to support task level access control. 5 | The implementation has the following, 6 | 1. A custom Airflow Operator ( [PythonOperator](https://airflow.apache.org/docs/apache-airflow/stable/howto/operator/python.html) ) to provide fine grain access 7 | 2. Sample DAG that can be executed in [Amazon Managed Workflows for Apache Airflow (MWAA)](https://docs.aws.amazon.com/mwaa/index.html) 8 | - Compatible in 1.10.12 and 2.0.2 version 9 | 10 | ## How to use this code base 11 | Follow the instructions to enforce role based access to tasks. note that the below steps will incur cost. 12 | 13 | 1. Create a [MWAA](https://us-west-2.console.aws.amazon.com/mwaa/home) Environment ( Version 1.10.12 or 2.0.2) 14 | 2. Create the following Amazon S3 buckets 15 | - Processed bucket name : `--mwaa-processed` 16 | - Published bucket name : `--mwaa-published` 17 | - Replace `` with your AWS Account ID and `` with the region where the above MWAA service was launched 18 | - Follow [best practices](https://docs.aws.amazon.com/AmazonS3/latest/userguide/security-best-practices.html) while creating buckets. Its is stongly recommended to enable version control, encryption (In this case Amazon S3 master-key i.e SSE-S3) and server access logging. 19 | 3. Create following [AWS Identity and Access Management](https://console.aws.amazon.com/iam/home?region=us-west-2#/roles$new?step=type) Roles 20 | - Write access for the processed bucket 21 | - Role Name : write_access_processed_bucket 22 | - Policy Document: Refer `./policy-docs/write_access_processed_bucket.json` 23 | - Write access for the published bucket 24 | - Role Name : write_access_published_bucket 25 | - Policy Document: Refer `./policy-docs/write_access_published_bucket.json` 26 | 27 | 4. Establish trust relationship with [MWAA](https://us-west-2.console.aws.amazon.com/mwaa/home) execution role (Found in the MWAA environment page) 28 | - Refer `./policy-docs/trust-policy.json` 29 | 30 | 5. Code deployment to [MWAA](https://us-west-2.console.aws.amazon.com/mwaa/home) : 31 | - DAG Deployment 32 | - Code base is present in the `./dags/rbac_dag` directory 33 | - Update the following variables in `./dags/rbac_dag/dag_config.py` file 34 | - REGION ( e.g. us-west-2) 35 | - ACCOUNT_ID 36 | - Deploy to MWAA by copying the DAG files to the appropriate MWAA S3 buckets that was configured in step 1 37 | - Custom Operator 38 | - Code base is present in the `./plugins/`. Create a ZIP and copy to the MWAA's S3 bucket configured in step 1, example s3://plugins/custom_operators.zip 39 | - Deploy to MWAA by editing the MWAA environment and configure `Plugins file - optional` with the above plugins path 40 | - Update the MWAA environment for the above change to take effect 41 | 42 | 6. DAG Execution 43 | - DAG `sample_rbac_dag` should show up in the [MWAA](https://us-west-2.console.aws.amazon.com/mwaa/home) Web UI ( can be accessed from MWAA service page) after few seconds. 44 | - Click the `Play` button to run the DAG 45 | - Notice that, since the roles are established at the task level the DAG completes the execution. Now try to restrict access via the roles and we will notice the DAG failing. 46 | 47 | 48 | ## Clean up: 49 | Bring down the services after implementing the above - as they will incur cost if left running, 50 | - MWAA environment 51 | - Delete S3 Buckets 52 | - Remove IAM Roles and Policies 53 | 54 | --------------------------------------------------------------------------------