├── .gitignore ├── LICENSE ├── example.py ├── iam_tester.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020 Alex Chan 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a 4 | copy of this software and associated documentation files (the "Software"), 5 | to deal in the Software without restriction, including without limitation 6 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | and/or sell copies of the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 17 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 19 | OTHER DEALINGS IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import boto3 4 | 5 | from iam_tester import create_aws_client_from_credentials, temporary_iam_credentials 6 | 7 | 8 | if __name__ == "__main__": 9 | admin_role_arn = "arn:aws:iam::760097843905:role/platform-admin" 10 | 11 | policy_document = { 12 | "Version": "2012-10-17", 13 | "Statement": [ 14 | { 15 | "Effect": "Allow", 16 | "Action": "s3:List*", 17 | "Resource": "*" 18 | }, 19 | { 20 | "Effect": "Deny", 21 | "Action": "s3:List*", 22 | "Resource": "arn:aws:s3:::wellcomecollection-platform-infra" 23 | }, 24 | ], 25 | } 26 | 27 | with temporary_iam_credentials( 28 | admin_role_arn=admin_role_arn, policy_document=policy_document 29 | ) as credentials: 30 | s3_client = create_aws_client_from_credentials("s3", credentials=credentials) 31 | 32 | # Check that we can list objects in any bucket except platform-infra. 33 | # By default, the admin role can list *anything* in the platform account, 34 | # so if we're not using the new role, the second call would succeed. 35 | s3_client.list_objects_v2(Bucket="wellcomecollection-platform-dashboard") 36 | 37 | try: 38 | s3_client.list_objects_v2(Bucket="wellcomecollection-platform-infra") 39 | except Exception: 40 | pass 41 | else: 42 | assert False, "This ListObjects call did not fail!" 43 | -------------------------------------------------------------------------------- /iam_tester.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create some temporary IAM credentials that use a particular policy document, 3 | then create a client that uses that role. This is useful for rapid testing and 4 | experiments with IAM policy documents. 5 | 6 | from iam_tester import ( 7 | create_aws_client_from_credentials, 8 | temporary_iam_credentials 9 | ) 10 | 11 | admin_role_arn = "arn:aws:iam::760097843905:role/platform-admin" 12 | 13 | policy_document = { 14 | "Version": "2012-10-17", 15 | "Statement": [ 16 | { 17 | "Effect": "Allow", 18 | "Action": "s3:List*", 19 | "Resource": "*" 20 | }, 21 | ] 22 | } 23 | 24 | with temporary_iam_credentials( 25 | admin_role=admin_role_arn, policy_document=policy_document 26 | ) as credentials: 27 | s3_client = create_aws_client_from_credentials("s3", credentials=credentials) 28 | 29 | # do stuff with S3 30 | 31 | """ 32 | 33 | import contextlib 34 | import datetime as dt 35 | import json 36 | import secrets 37 | import time 38 | 39 | import boto3 40 | 41 | 42 | ACCOUNT_ID = "975596993436" 43 | 44 | READ_ONLY_ROLE_ARN = "arn:aws:iam::975596993436:role/storage-read_only" 45 | DEV_ROLE_ARN = "arn:aws:iam::975596993436:role/storage-developer" 46 | ADMIN_ROLE_ARN = "arn:aws:iam::975596993436:role/storage-admin" 47 | 48 | 49 | sts_client = boto3.client("sts") 50 | 51 | 52 | def create_aws_client_from_role_arn(resource, *, role_arn): 53 | """ 54 | Create an AWS client using the given role. 55 | """ 56 | assumed_role_object = sts_client.assume_role( 57 | RoleArn=role_arn, RoleSessionName="AssumeRoleSession1" 58 | ) 59 | credentials = assumed_role_object["Credentials"] 60 | return create_aws_client_from_credentials(resource, credentials=credentials) 61 | 62 | 63 | def create_aws_client_from_credentials(resource, *, credentials): 64 | """ 65 | Create an AWS client using the given credentials. 66 | """ 67 | return boto3.client( 68 | resource, 69 | aws_access_key_id=credentials["AccessKeyId"], 70 | aws_secret_access_key=credentials["SecretAccessKey"], 71 | aws_session_token=credentials["SessionToken"], 72 | ) 73 | 74 | 75 | def get_underlying_role_arn(): 76 | """ 77 | Returns the original role ARN. 78 | e.g. at Wellcome we have a base role, but then we assume roles into different 79 | accounts. This returns the ARN of the base role. 80 | """ 81 | client = boto3.client("sts") 82 | return client.get_caller_identity()["Arn"] 83 | 84 | 85 | @contextlib.contextmanager 86 | def _temporary_role(admin_role_arn): 87 | """ 88 | Create an IAM role which can be assumed by ``admin_role_arn``. 89 | 90 | with _temporary_role(admin_role_arn) as (temp_role_arn, temp_role_name): 91 | # do stuff 92 | 93 | The role is deleted when you exit the context manager. 94 | """ 95 | iam_client = create_aws_client_from_role_arn("iam", role_arn=admin_role_arn) 96 | 97 | # Name for the temporary role. Role names must be between 1 and 64 chars 98 | # long, and case insensitive. 99 | # See https://docs.aws.amazon.com/IAM/latest/APIReference/API_CreateRole.html 100 | temporary_role_name = dt.datetime.now().strftime("temporary_role_%Y-%m-%d_%H-%M-%S") 101 | 102 | # Create the temporary role. This policy document describes who is allowed 103 | # to assume this role -- since this is a temporary role only meant to be 104 | # used in the current context, we limit it to the admin role. 105 | assume_role_policy_document = { 106 | "Version": "2012-10-17", 107 | "Statement": [ 108 | { 109 | "Effect": "Allow", 110 | "Principal": {"AWS": admin_role_arn}, 111 | "Action": "sts:AssumeRole", 112 | } 113 | ], 114 | } 115 | 116 | try: 117 | create_role_resp = iam_client.create_role( 118 | RoleName=temporary_role_name, 119 | AssumeRolePolicyDocument=json.dumps(assume_role_policy_document), 120 | Description=f"A temporary role created by {__file__}", 121 | ) 122 | 123 | yield (create_role_resp["Role"]["Arn"], temporary_role_name) 124 | finally: 125 | iam_client.delete_role(RoleName=temporary_role_name) 126 | 127 | 128 | @contextlib.contextmanager 129 | def _temporary_role_policy(iam_client, *, role_name, policy_document): 130 | """ 131 | Temporarily attach a policy document as an inline policy to an IAM role. 132 | """ 133 | temporary_policy_name = dt.datetime.now().strftime( 134 | "temporary_policy_%Y-%m-%d_%H-%M-%S" 135 | ) 136 | 137 | try: 138 | iam_client.put_role_policy( 139 | RoleName=role_name, 140 | PolicyName=temporary_policy_name, 141 | PolicyDocument=json.dumps(policy_document), 142 | ) 143 | 144 | yield 145 | finally: 146 | iam_client.delete_role_policy(RoleName=role_name, PolicyName=temporary_policy_name) 147 | 148 | 149 | 150 | @contextlib.contextmanager 151 | def temporary_iam_credentials(*, admin_role_arn, policy_document): 152 | """ 153 | Creates a temporary IAM credentials to use a particular policy document. 154 | Requires an IAM role that: 155 | 156 | * The caller is allowed to assume 157 | * Has permission to manage IAM roles 158 | 159 | Use this function as a context manager: 160 | 161 | with temporary_iam_credentials(admin_role, policy_document) as credentials: 162 | # Do stuff with credentials 163 | 164 | It creates a temporary admin role with the right policy document, and then 165 | cleans up the role once you're finished (even if an exception is thrown 166 | while using the credentials). 167 | 168 | Our storage-dev and storage-admin roles have an explicit, blanket "Deny" on 169 | deleting any objects in our permanent S3 buckets and DynamoDB tables. 170 | This allows us to create a role with a tightly-scoped holepunch through 171 | these Deny policies. 172 | 173 | """ 174 | iam_client = create_aws_client_from_role_arn("iam", role_arn=admin_role_arn) 175 | 176 | with _temporary_role(admin_role_arn) as (temporary_role_arn, temporary_role_name): 177 | 178 | with contextlib.ExitStack() as es: 179 | 180 | # Attach the policy document we want to test to the temporary role. 181 | es.enter_context(_temporary_role_policy( 182 | iam_client, 183 | role_name=temporary_role_name, 184 | policy_document=policy_document 185 | )) 186 | 187 | # Allowing the admin role to assume the temporary role needs symmetric 188 | # IAM permissions: 189 | # 190 | # * The temporary role needs a rule "the admin role can assume me" 191 | # * The admin role needs a rule "I can assume the temporary role" 192 | # 193 | # We created the first rule with the AssumeRolePolicyDocument parameter on 194 | # the temporary role; now add the second rule on the admin role. 195 | admin_role_name = admin_role_arn.split("/")[-1] 196 | admin_policy_name = f"assume-{temporary_role_name}" 197 | 198 | assume_role_policy_document = { 199 | "Version": "2012-10-17", 200 | "Statement": [ 201 | { 202 | "Action": "sts:AssumeRole", 203 | "Resource": temporary_role_arn, 204 | "Effect": "Allow", 205 | } 206 | ], 207 | } 208 | 209 | es.enter_context(_temporary_role_policy( 210 | iam_client, 211 | role_name=admin_role_name, 212 | policy_document=assume_role_policy_document 213 | )) 214 | 215 | sts_client = create_aws_client_from_role_arn("sts", role_arn=admin_role_arn) 216 | 217 | # IAM updates don't apply instantaneously, and there may be a short delay 218 | # before we can assume the new role. Even a successful call may be followed 219 | # by a failed call as everything sorts itself out. 220 | # 221 | # If you don't wait, you may get an error: 222 | # 223 | # botocore.exceptions.ClientError: An error occurred (InvalidAccessKeyId) 224 | # when calling the [Operation] operation: The AWS Access Key Id you 225 | # provided does not exist in our records. 226 | # 227 | # This shouldn't be running in a hot path -- it's a tool for experimenting -- 228 | # so wait 15 seconds before retrieving credentials. 229 | time.sleep(15) 230 | 231 | assumed_role_credentials = sts_client.assume_role( 232 | RoleArn=temporary_role_arn, RoleSessionName="AssumeRoleSession1" 233 | ) 234 | 235 | yield assumed_role_credentials["Credentials"] 236 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iam-policy-document-tester 2 | 3 | **Create short-lived, temporary roles for experimenting with AWS IAM policy documents.** 4 | 5 | This is a Python function for rapidly testing and experimenting with AWS IAM policy documents. 6 | Here's what it looks like: 7 | 8 | ```python 9 | with temporary_iam_credentials(admin_role_arn, policy_document) as credentials: 10 | # Do stuff with your new credentials, which have the permissions defined by the 11 | # IAM policy document. 12 | ``` 13 | 14 | The function `temporary_iam_credentials()` gives you a set of temporary AWS credentials, which have the permissions defined by the IAM policy document. 15 | You can make API calls using those credentials, and check they behave correctly -- that API calls are allowed or denied as appropriate. 16 | 17 | When you're done, it cleans up after itself, so there are no temporary resources left hanging around in your account. 18 | 19 | I use this in two ways: 20 | 21 | * To dramatically speed up the flow for developing IAM policy documents. 22 | It gives me a fast write-test-debug loop for making changes; much faster than if I was using a more full-featured deployment tool like Terraform or CloudFormation. 23 | 24 | * To create a temporary set of tightly-scoped permissions for a risky operation, which act as an extra guard rail. 25 | 26 | This is why I originally wrote the code: I had an admin role that had blanket "Deny" permissions (on an important S3 bucket), but had full IAM permissions, and so could create new roles with arbitrary permissions. 27 | I used this function to create a temporary set of credentials which only had permission to delete specific objects, minimising the risk that I'd inadvertently delete the wrong thing. 28 | 29 | If you want to select a subset of the permissions in a role you already have (rather than creating a brand new role/permissions set), consider using IAM session policies. 30 | You can apply a policy document when you call AssumeRole, and that policy applies for the duration of your session. 31 | (hat tip [Ben Kehoe](https://twitter.com/ben11kehoe/status/1333885761347731456)) 32 | 33 | 34 | 35 | ## How does it work? 36 | 37 | The function creates a temporary IAM role, and attaches your policy document as an inline policy. 38 | (I considered creating a temporary IAM user, but roles have [a 5x limit on the size of inline policies](https://aws.amazon.com/premiumsupport/knowledge-center/iam-increase-policy-size/).) 39 | 40 | Then it gives your admin role permission to assume the temporary role, assumes it, and gets some credentials using STS. 41 | It hands back those credentials for you to use. 42 | 43 | When you're done, it cleans up the temporary role and associated policies, so there's nothing left hanging around in your account. 44 | 45 | 46 | 47 | ## Interesting ideas: what did I learn? 48 | 49 | Here are some of the interesting things I learnt while writing this code: 50 | 51 | * **Context managers are great for temporary resources.** 52 | Context managers are a Python feature that let you create a resource, and ensure it gets cleaned up afterwards. 53 | An example you've probably used is the `open` function for files: 54 | 55 | ```python 56 | with open('spam.txt', 'r') as f: 57 | print(f.read()) 58 | ``` 59 | 60 | The file will be closed when you're done, even if an exception is thrown inside the `with` block. 61 | 62 | I'm using [contextlib.contextmanager](https://docs.python.org/3/library/contextlib.html#contextlib.contextmanager) to create a couple of my own context managers for temporary IAM resources, so those resources can always be cleaned up afterwards. 63 | It goes something like: 64 | 65 | ```python 66 | import contextlib 67 | 68 | @contextlib.contextmanager 69 | def temporary_iam_resource(*args, **kwargs): 70 | # Code to create resource 71 | resource = create_iam_resource(*args, **kwargs) 72 | try: 73 | yield resource 74 | finally: 75 | # Code to clean up resource 76 | delete_iam_resource(resource) 77 | ``` 78 | 79 | * **ExitStack is a good way to handle nested context managers.** 80 | This script creates several temporary resources, and the nested context managers start to get unwieldy: 81 | 82 | ```python 83 | with temporary_iam_role() as role1: 84 | with temporary_iam_role() as role2: 85 | with temporary_iam_role_policy(role1, policy_document): 86 | with temporary_iam_role_policy(role2, another_policy_document): 87 | ... 88 | ``` 89 | 90 | I recently read about ExitStack in [a blog post by Nikolaus Rath](https://www.rath.org/on-the-beauty-of-pythons-exitstack.html), which gives a way to nest context managers in a cleaner way: 91 | 92 | ```python 93 | with contextlib.ExitStack() as es: 94 | role1 = es.enter_context(temporary_iam_role()) 95 | role2 = es.enter_context(temporary_iam_role()) 96 | es.enter_context(temporary_iam_role_policy(role1, policy_document)) 97 | es.enter_context(temporary_iam_role_policy(role2, another_policy_document)) 98 | ``` 99 | 100 | Not only does it reduce the amount of indentation, it also lines things up vertically so it's easier to see the similarities between different lines. 101 | 102 | * **Changes in IAM take a while to propagate.** 103 | In particular: 104 | 105 | * There's a delay between creating a role and being able to assume it 106 | * There's a delay between creating a role, and credentials that use that role being usable 107 | 108 | I have a hard-coded 15 second delay in my function, because that's what worked in my testing. 109 | 110 | These delays shouldn't be surprising -- IAM is a global, distributed system, and changes won't propagate instantly -- but it's the first time I've encountered them, because I don't usually create a role and immediately try to use it. 111 | 112 | * **You can use EC2's [DescribeRegions API](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeRegions.html) to test IAM credentials.** 113 | The API call has a `DryRun` flag, which tells you if the request was authorised without actually making it, and I saw several examples that suggested using it. 114 | 115 | I did try it here, but it wasn't a reliable source of *"is this role ready yet?"* 116 | Sometimes a DescribeRegions call would succeed, then the next call would fail, then the next call would succeed. 117 | Consistency in distributed systems is hard. 118 | 119 | 120 | 121 | ## Motivation: why did I write this? 122 | 123 | I work on an [archival storage service](https://stacks.wellcomecollection.org/building-wellcome-collections-new-archival-storage-service-3f68ff21927e), which keeps a copy of every object in two S3 buckets (our "permanent storage"). 124 | It's important that objects in these buckets are never inadvertently modified or deleted. 125 | 126 | Developers have several IAM roles that we use, which give us different permissions within the account (e.g. *read-only*, *billing*, *developer*, *admin*). 127 | Although the latter two roles can usually do almost anything in an account, we have [a blanket "Deny" rule](https://github.com/wellcomecollection/storage-service/blob/95e56ae99498e7f6f8d4a3cb430ba4c318d6f645/terraform/critical_prod/delete_protection.tf#L51-L76) that prevents those roles from modifying anything in these permanent storage buckets -- so we can't corrupt the archive by accident. 128 | 129 | However, sometimes we do want to delete objects -- for example, objects that were stored in the wrong place. 130 | 131 | When we do this, I don't want to remove the blanket "Deny" rule, because that puts the archive at risk -- objects that I don't want to change are now vulnerable to an errant DeleteObject call. 132 | Instead, I wanted to create a fine-grained rule that said *"let me delete these three objects, but nothing else"*. 133 | 134 | A "Deny" always beats an "Allow" in an IAM policy document, so I can't modify our developer roles to give us these permissions -- but that developer role can create new IAM roles with arbitrary permissions! 135 | I wrote this function to create a *temporary* role with these permissions, which we could then assume to run the deletion. 136 | There's less risk of accidentally deleting something that we weren't planning to delete, because there shouldn't be an IAM policy anywhere that allows its deletion. 137 | 138 | (We trust each other to use the *"create a role that can do anything"* permission responsibly. 139 | The blanket "Deny" in the roles we use day-to-day is about preventing careless mistakes, not preventing us from ever deleting something.) 140 | 141 | It's not until I finished that I realised this could be more general-purpose, and used to experiment with IAM policy documents. 142 | 143 | 144 | 145 | ## Usage: how can somebody else use this? 146 | 147 | Read the code in [iam_tester.py](iam_tester.py), then look at the worked example in [example.py](example.py). 148 | Hopefully there's enough there to get started. 149 | 150 | You need an IAM role with administrator access (in particular, one that can create, update and destroy IAM roles). 151 | 152 | 153 | 154 | ## License 155 | 156 | MIT. 157 | --------------------------------------------------------------------------------