├── .eslintrc.yml
├── .gitignore
├── LICENSE
├── README.md
├── config
    ├── .env.sample
    ├── cloudformation.template.yml
    ├── config.example.yml
    └── kes.js
├── db
    ├── knexfile.js
    ├── migrations
    │   └── 20190122223855_init.js
    └── setup.sh
├── diagram.png
├── lambda
    ├── Dockerfile
    ├── Makefile
    ├── download_and_predict
    │   ├── __init__.py
    │   ├── base.py
    │   ├── custom_types.py
    │   ├── handler.py
    │   └── mercantile.pyi
    ├── mypy.ini
    ├── package.zip
    ├── setup.py
    └── tests
    │   ├── __init__.py
    │   ├── handler.py
    │   └── test_base.py
├── lambda_examples
    ├── README.md
    ├── ml_enabler.py
    ├── s3_images.py
    ├── save_image.py
    ├── sentinel_hub.py
    └── super_tiles.py
├── package.json
├── scripts
    ├── csv_to_geojson.py
    ├── download.js
    ├── gpu-util.js
    ├── model.js
    ├── run-sqs-push.js
    ├── sqs-push.js
    ├── tag-cloudwatch-logs.js
    └── verify.js
├── test
    └── test_sqs-push.js
└── yarn.lock


/.eslintrc.yml:
--------------------------------------------------------------------------------
1 | extends: standard
2 | plugins:
3 |   - standard
4 |   - promise
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | dist
3 | cloudformation.yml
4 | node_modules
5 | .env
6 | __pycache__
7 | .mypy_cache
8 | .vscode


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Development Seed
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Chip 'n Scale: Queue Arranger
  2 | 
  3 | `chip-n-scale-queue-arranger` helps you run machine learning models over satellite imagery at scale. It is a collection of [AWS CloudFormation](https://aws.amazon.com/cloudformation/) templates deployed by [`kes`](http://devseed.com/kes/), lambda functions, and utility scripts for monitoring and managing the project.
  4 | 
  5 | ![AWS components diagram](diagram.png)
  6 | 
  7 | ## Status
  8 | 
  9 | Currently this is only deployed internally to Development Seed and we are [refactoring a bit](https://github.com/developmentseed/chip-n-scale-queue-arranger/pull/5) for easier reuse, modification, and deployment. Please excuse the dust and feel free to open an issue if you have any questions. The current build process for the lambda looks like:
 10 | 
 11 | ```sh
 12 | cd lambda
 13 | make build
 14 | ```
 15 | which produces a package.zip file. This can eventually be built into another script.
 16 | 
 17 | ## Requirements
 18 | 
 19 | - [`python 3.7.x`](https://www.python.org/)
 20 | - [`node`](https://nodejs.org/en/)
 21 | - [`yarn`](https://yarnpkg.com/en/) (or [`npm`](https://www.npmjs.com/))
 22 | - A [TensorFlow Serving Docker Image](https://www.tensorflow.org/tfx/serving/serving_basic) which accepts base64 encoded images.
 23 |     - For a walkthrough of this process, check out [this post](https://medium.com/devseed/technical-walkthrough-packaging-ml-models-for-inference-with-tf-serving-2a50f73ce6f8).
 24 |     - Or if you just have a model, build an image with the included `yarn model` tool
 25 | - An [XYZ raster tile endpoint](https://docs.mapbox.com/api/maps/#maps)
 26 | - A corresponding list of tiles over the area you'd like to predict on. If you know the extent of your prediction area as [`GeoJSON`](http://geojson.org/), you can use [`geodex`](https://github.com/developmentseed/geodex), [`mercantile`](https://github.com/mapbox/mercantile), or [`tile-cover`](https://github.com/mapbox/tile-cover)
 27 | - An [AWS account](https://aws.amazon.com/) with sufficient privileges to deploy `config/cloudformation.template.yml`
 28 | 
 29 | ## Deploying
 30 | 
 31 | To create your own project, first install the `node` dependencies:
 32 | 
 33 | ```sh
 34 | yarn install
 35 | ```
 36 | 
 37 | Then add values to `config/.env` and to `config/config.yml` to configure your project. Samples for each are provided and you can find more information on the [`kes` documentation page](http://devseed.com/kes/).
 38 | 
 39 | Once these values are filled in, you can deploy the project to AWS (takes ~10 minutes):
 40 | 
 41 | ```sh
 42 | yarn deploy
 43 | ...
 44 | CF operation is in state of CREATE_COMPLETE
 45 | 
 46 | The stack test-stack is deployed or updated.
 47 | - The database is available at: postgres://your-db-string
 48 | - The queue is available at https://your-queue-url
 49 | 
 50 | Is this the first time setting up this stack? Run the following command to set up the database:
 51 | 
 52 |   $ yarn setup postgres://your-db-string
 53 | 
 54 | ✨  Done in 424.62s.
 55 | ```
 56 | 
 57 | This will return a database string to run a migration:
 58 | 
 59 | ```sh
 60 | yarn setup [DB_STRING]
 61 | ```
 62 | 
 63 | If `yarn deploy` fails on the first attempt, you'll need to run `yarn delete` to remove the stack and start again. Otherwise the project will fail on newer updates indicating that it is in the state `ROLLBACK_COMPLETE`. If the first deploy succeeds, you can make future updates by rerunning `yarn deploy`.
 64 | 
 65 | By default, the cloudwatch logs are not tagged for resource tracking. To add `Project` tags to the cloudwatch logs, run the following:
 66 | 
 67 | ```sh
 68 | yarn tag-logs
 69 | ```
 70 | 
 71 | If you'd like to confirm the everything is deployed correctly (recommended), run:
 72 | 
 73 | ```sh
 74 | yarn verify
 75 | ```
 76 | 
 77 | This will test a few portions of the deployed stack to ensure that it will function correctly. Once you're ready, begin pushing tile messages to the SQS queue.
 78 | 
 79 | ## Running
 80 | 
 81 | Once the stack is deployed, you can kick off the prediction by adding messages to the SQS queue. Each individual message will look like:
 82 | 
 83 | ```json
 84 | { "x": 1, "y": 2, "z": 3}
 85 | ```
 86 | 
 87 | where `x`, `y`, and `z` specify [an individual map tile](https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames). Because pushing these messages into the queue quickly is important to running the prediction at scale, we've included a utility script to assist this process:
 88 | 
 89 | ```sh
 90 | yarn sqs-push [tiles.txt] [https://your-queue-url]
 91 | ```
 92 | 
 93 |  The first argument, `tiles.txt`, is a line-delimited file containing your tile indices in the format `x-y-z` and the second argument is the URL of your SQS Queue. If you have a lot of tiles to push to the queue, it's best to run this script in the background or on a separate computer.  The maximum number of simultaneous inflight SQS requests can be set with the `PROMISE_THRESHOLD` environment variable.
 94 | 
 95 | ## Post processing
 96 | 
 97 | Once the processing is complete, you can pull down the stored results as a simple CSV file.
 98 | 
 99 | ```sh
100 | DATABASE_URL='postgres://myusername:mypassword@your-db-string.rds.amazonaws.com:5432/ResultsDB' yarn download my_csv_filename.csv
101 | ```
102 | 
103 | You can then convert that CSV file to a geojson while thresholding on per-class ML confidence. For example, if you have a binary prediction and only want to keep tiles where confidence in class index 1 was 95% or greater, use something like:
104 | 
105 | ```sh
106 | yarn convert-geojson my_csv_filename.csv my_thresholded_features.geojson --thresh_ind 1 --thresh 0.95
107 | ```
108 | 
109 | ## Completion
110 | 
111 | After the prediction is complete, you should download the data from the AWS RDS database. Then it's okay to delete the stack:
112 | 
113 | ```sh
114 | yarn delete
115 | ```
116 | 
117 | ## Speed, Cost, and GPU Utilization
118 | 
119 | The primary costs of running this stack come from Lambda Functions and GPU instances. The Lambdas parallelize the image downloading and database writing; The GPU instances provide the prediction capacity. To run the inference optimally, from a speed and cost perspective, these two resources need to be scaled in tandem. Roughly four scenarios can occur:
120 | - **Lambda concurrency is much higher than GPU prediction capacity**. When too many Lambdas call the prediction endpoint at once, many of them will timeout and fail. The GPU instances will be fully utilized (good) but Lambda costs will be very high running longer and for more times than necessary. This will also hit the satellite imagery tile endpoint more times than needed. If this is happening, Lambda errors will be high, Lambda run time will be high, GPU utilization will be high, and SQS messages will show up in the dead letter queue. To fix it, lower the maximum Lambda concurrency or increase GPU capacity.
121 | - **Lambda concurrency is slightly higher than GPU prediction capacity**. Similar to the above case, if the Lambda concurrency is slightly too high compared to GPU prediction throughput, the Lambdas will run for longer than necessary but not timeout. If this is happening, Lambda errors will be low, Lambda run time will be high, and GPU utilization will be high. To fix it, lower the maximum Lambda concurrency or increase GPU capacity.
122 | - **Lambda concurrency is lower than GPU prediction capacity**. In this case, the Lambda monitoring metrics will look normal (low errors and low run time) but the GPU prediction instances have the capacity to predict many more images. To see this, run `yarn gpu-util [ssh key]` which will show the GPU utilization of each instance/GPU in the cluster:
123 | 
124 | ```bash
125 | $ yarn gpu-util ~/.ssh/my-key.pem
126 | yarn run v1.3.2
127 | $ node scripts/gpu-util.js ~/.ssh/my-key.pem
128 | ┌────────────────────────┬────────────────────────┬────────────────────────┐
129 | │ IP Address             │ Instance Type          │ GPU Utilization        │
130 | ├────────────────────────┼────────────────────────┼────────────────────────┤
131 | │ 3.89.130.180           │ p3.2xlarge             │ 5 %                    │
132 | ├────────────────────────┼────────────────────────┼────────────────────────┤
133 | │ 23.20.130.19           │ p3.2xlarge             │ 2 %                    │
134 | ├────────────────────────┼────────────────────────┼────────────────────────┤
135 | │ 54.224.113.60          │ p3.2xlarge             │ 3 %                    │
136 | ├────────────────────────┼────────────────────────┼────────────────────────┤
137 | │ 34.204.40.177          │ p3.2xlarge             │ 12 %                   │
138 | └────────────────────────┴────────────────────────┴────────────────────────┘
139 | ✨  Done in 3.30s.
140 | ```
141 | 
142 | To fix this, increase the number of concurrent Lambdas or decrease the GPU capacity. (Note that by default, the security group on the instances won't accept SSH connections. To use `gpu-util`, add a new rule to your EC2 security group)
143 | 
144 | - **Optimal :tada:**
145 | 
146 | High GPU utilization, low Lambda errors, and low Lambda run time. :ship:
147 | 
148 | ## Motivation
149 | 
150 | Running machine learning inference at scale can be challenging. One bottleneck is that it's often hard to ingest/download images fast enough to keep a GPU fully utilized. This seeks to solve that bottleneck by parallelizing the imagery acquisition on AWS Lambda functions and running that separate from the machine learning predictions.
151 | 
152 | ## Acknowledgements
153 | 
154 | - [The World Bank](https://www.worldbank.org/), [Nethope](https://nethope.org/), and [UNICEF](https://www.unicef.org/) partnered with us on machine learning projects that provided opportunities to test these capabilities.
155 | - [Digital Globe](https://www.digitalglobe.com/) assisted in using their services to access ultra high-resolution satellite imagery at scale.
156 | - [Azavea's](https://www.azavea.com/) [raster-vision-aws](https://github.com/azavea/raster-vision-aws) repo provides the base AMI for these EC2 instances (`nvidia-docker` + ECS enabled).
157 | 


--------------------------------------------------------------------------------
/config/.env.sample:
--------------------------------------------------------------------------------
1 | TILE_ACCESS_TOKEN='string'
2 | RDS_USERNAME='string'
3 | RDS_PASSWORD='string'
4 | 


--------------------------------------------------------------------------------
/config/cloudformation.template.yml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: '2010-09-09'
  2 | Description: 'stack: {{stackName}} | stage: {{stage}} | deployed by Kes'
  3 | Resources:
  4 | 
  5 |   #################################################
  6 |   # Lambda config BEGIN
  7 |   #################################################
  8 | {{#each lambdas}}
  9 |   {{name}}LambdaFunction:
 10 |     Type: AWS::Lambda::Function
 11 |     Properties:
 12 |       Code:
 13 |         S3Bucket: {{bucket}}
 14 |         S3Key: {{remote}}
 15 |       FunctionName: {{../stackName}}-{{name}}
 16 |       Environment:
 17 |         Variables:
 18 |           stage: {{../stage}}
 19 |           stackName: {{../stackName}}
 20 |           PREDICTION_ENDPOINT:
 21 |             Fn::Join:
 22 |             - ''
 23 |             - - 'http://'
 24 |               - !GetAtt {{../stackNoDash}}LoadBalancer.DNSName
 25 |               - {{{../predictionPath}}}
 26 |               - ':predict'
 27 |           DATABASE_URL:
 28 |             Fn::Join:
 29 |             - ''
 30 |             - - 'postgres://{{../rds.username}}:{{../rds.password}}@'
 31 |               - !GetAtt {{../stackNoDash}}ResultsDB.Endpoint.Address
 32 |               - ':'
 33 |               - !GetAtt {{../stackNoDash}}ResultsDB.Endpoint.Port
 34 |               - '/'
 35 |               - ResultsDB
 36 |         {{#each envs}}
 37 |           {{@key}}: {{{this}}}
 38 |         {{/each}}
 39 |       Handler: {{handler}}
 40 |       MemorySize: {{memory}}
 41 |       Role: !GetAtt LambdaProcessingRole.Arn
 42 |       Runtime: {{runtime}}
 43 |       {{# if concurrent}}
 44 |       ReservedConcurrentExecutions: {{concurrent}}
 45 |       {{/if}}
 46 |       Timeout: {{timeout}}
 47 |       Tags:
 48 |         - Key: Project
 49 |           Value: {{../projectTag}}
 50 |         - Key: Stack
 51 |           Value: {{../stackName}}
 52 | 
 53 |   {{name}}LambdaFunctionLogGroup:
 54 |     Type: AWS::Logs::LogGroup
 55 |     Properties:
 56 |       LogGroupName:
 57 |         Fn::Join:
 58 |           - ''
 59 |           - - '/aws/lambda/'
 60 |             - {{../stackName}}
 61 |             - '-'
 62 |             - {{name}}
 63 | 
 64 |   {{#if queueTrigger}}
 65 |   {{../stackNoDash}}{{name}}LambdaEventSourceMapping:
 66 |     Type: AWS::Lambda::EventSourceMapping
 67 |     Properties:
 68 |       Enabled: True
 69 |       EventSourceArn: !GetAtt {{../stackNoDash}}TileQueue.Arn
 70 |       FunctionName: !Ref {{name}}LambdaFunction
 71 |   {{/if}}
 72 | {{/each}}
 73 | 
 74 |   #################################################
 75 |   # Lambda config END
 76 |   #################################################
 77 | 
 78 |   #################################################
 79 |   # SQS config BEGIN
 80 |   #################################################
 81 | 
 82 |   {{stackNoDash}}TileQueue:
 83 |     Type: AWS::SQS::Queue
 84 |     Properties:
 85 |       QueueName: {{stackNoDash}}TileQueue
 86 |       VisibilityTimeout: {{sqs.visibilityTimeout}}
 87 |       RedrivePolicy:
 88 |         deadLetterTargetArn: !GetAtt {{stackNoDash}}DeadLetterQueue.Arn
 89 |         maxReceiveCount: {{sqs.maxReceiveCount}}
 90 |       Tags:
 91 |         - Key: Project
 92 |           Value: {{projectTag}}
 93 |         - Key: Stack
 94 |           Value: {{stackName}}
 95 | 
 96 |   {{stackNoDash}}DeadLetterQueue:
 97 |     Type: AWS::SQS::Queue
 98 |     Properties:
 99 |       QueueName: {{stackNoDash}}DeadLetterQueue
100 |       Tags:
101 |         - Key: Project
102 |           Value: {{projectTag}}
103 |         - Key: Stack
104 |           Value: {{stackName}}
105 | 
106 |   #################################################
107 |   # SQS config END
108 |   #################################################
109 | 
110 |   #################################################
111 |   # ECS config BEGIN
112 |   #################################################
113 | 
114 |   {{stackNoDash}}InstanceProfile:
115 |     Type: AWS::IAM::InstanceProfile
116 |     Properties:
117 |       Path: "/"
118 |       Roles:
119 |       - !Ref ECSRole
120 | 
121 |   {{stackNoDash}}TaskDefinition:
122 |     Type: AWS::ECS::TaskDefinition
123 |     Properties:
124 |       Family: {{stackName}}-TaskDefinition
125 |       ContainerDefinitions:
126 |       - Name: {{stackNoDash}}TaskDefinition
127 |         Essential: true
128 |         Image: {{ecs.image}}
129 |         MemoryReservation: {{ecs.memory}}
130 |         PortMappings:
131 |           - ContainerPort: 8501
132 |         LogConfiguration:
133 |           LogDriver: awslogs
134 |           Options:
135 |             awslogs-group: !Ref {{stackNoDash}}DockerLogs
136 |             awslogs-region: !Sub ${AWS::Region}
137 | 
138 |   {{stackNoDash}}DockerLogs:
139 |     Type: AWS::Logs::LogGroup
140 |     Properties:
141 |       LogGroupName: {{stackName}}-ecs-docker
142 | 
143 |   {{stackNoDash}}ECSService:
144 |     Type: AWS::ECS::Service
145 |     DependsOn:
146 |     - {{stackNoDash}}ECSAutoScalingGroup
147 |     Properties:
148 |       Cluster: !Ref {{stackNoDash}}ECSCluster
149 |       DesiredCount: 1
150 |       TaskDefinition: !Ref {{stackNoDash}}TaskDefinition
151 |       DeploymentConfiguration:
152 |         MaximumPercent: 100
153 |         MinimumHealthyPercent: 0
154 |       LoadBalancers:
155 |         - ContainerName: {{stackNoDash}}TaskDefinition
156 |           ContainerPort: 8501
157 |           TargetGroupArn: !Ref {{stackNoDash}}TargetGroup
158 | 
159 |   {{stackNoDash}}ECSCluster:
160 |     Type: AWS::ECS::Cluster
161 | 
162 |   {{stackNoDash}}ContainerInstanceLaunch:
163 |     Type: AWS::AutoScaling::LaunchConfiguration
164 |     Metadata:
165 |       AWS::CloudFormation::Init:
166 |         config:
167 |           commands:
168 |             01_add_instance_to_cluster:
169 |               command: !Sub |
170 |                 #!/bin/bash
171 |                 echo ECS_CLUSTER=${ {{stackNoDash}}ECSCluster} >> /etc/ecs/ecs.config
172 |                 echo ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION=1m >> /etc/ecs/ecs.config
173 |           files:
174 |             "/etc/cfn/cfn-hup.conf":
175 |               content: !Sub |
176 |                 [main]
177 |                 stack=${AWS::StackId}
178 |                 region=${AWS::Region}
179 |               mode: '000400'
180 |               owner: root
181 |               group: root
182 |             "/etc/cfn/hooks.d/cfn-auto-reloader.conf":
183 |               content: !Sub |
184 |                 [cfn-auto-reloader-hook]
185 |                 triggers=post.update
186 |                 path=Resources.{{stackNoDash}}ContainerInstanceLaunch.Metadata.AWS::CloudFormation::Init
187 |                 action=/opt/aws/bin/cfn-init -v --stack ${AWS::StackName} --resource {{stackNoDash}}ContainerInstanceLaunch --region ${AWS::Region}
188 |                 runas=root
189 |           services:
190 |             sysvinit:
191 |               cfn-hup:
192 |                 enabled: 'true'
193 |                 ensureRunning: 'true'
194 |                 files:
195 |                 - "/etc/cfn/cfn-hup.conf"
196 |                 - "/etc/cfn/hooks.d/cfn-auto-reloader.conf"
197 |     Properties:
198 |       SecurityGroups:
199 |         -  !Ref {{stackNoDash}}ECSHostSecurityGroup
200 |       ImageId: !FindInMap [AWSRegionToAMI, !Ref "AWS::Region", AMIID]
201 |       InstanceType: {{ecs.instanceType}}
202 |       IamInstanceProfile: !Ref {{stackNoDash}}InstanceProfile
203 |       BlockDeviceMappings:
204 |       - DeviceName: "/dev/xvdcz"
205 |         Ebs:
206 |           DeleteOnTermination: true
207 |           VolumeSize: 100
208 |           VolumeType: gp2
209 |       KeyName: {{ ecs.keyPairName }}
210 |       UserData:
211 |         "Fn::Base64": !Join
212 |           - ""
213 |           - - "#cloud-config\n"
214 |             - "\nruncmd:\n"
215 |             - " - yum install -y aws-cfn-bootstrap\n"
216 |             - !Sub " - /opt/aws/bin/cfn-init -v --stack ${AWS::StackName} --resource {{stackNoDash}}ContainerInstanceLaunch --region ${AWS::Region}\n"
217 |             - !Sub " - /opt/aws/bin/cfn-signal -e $? --stack ${AWS::StackName} --resource {{stackNoDash}}ECSAutoScalingGroup --region ${AWS::Region}\n"
218 |     DependsOn:
219 |     - {{stackNoDash}}ECSHostSecurityGroup
220 | 
221 |   {{stackNoDash}}ECSAutoScalingGroup:
222 |     Type: AWS::AutoScaling::AutoScalingGroup
223 |     UpdatePolicy:
224 |       AutoScalingRollingUpdate:
225 |         MinInstancesInService: 0
226 |     Properties:
227 |       AvailabilityZones:
228 |         - {{ecs.availabilityZone}}
229 |       LaunchConfigurationName: !Ref {{stackNoDash}}ContainerInstanceLaunch
230 |       MinSize: 1
231 |       MaxSize: {{ ecs.maxInstances }}
232 |       DesiredCapacity: {{ ecs.desiredInstances }}
233 |       Tags:
234 |       - Key: Name
235 |         Value: "{{stackName}} ECS"
236 |         PropagateAtLaunch: true
237 |       - Key: Project
238 |         Value: {{projectTag}}
239 |         PropagateAtLaunch: true
240 |       - Key: Stack
241 |         Value: {{stackName}}
242 |         PropagateAtLaunch: true
243 | 
244 |   {{stackNoDash}}ECSHostSecurityGroup:
245 |     Type: AWS::EC2::SecurityGroup
246 |     Properties:
247 |       VpcId: {{vpc}}
248 |       GroupDescription: Access to the ECS hosts and the tasks/containers that run on them
249 |       SecurityGroupIngress:
250 |         # Only allow inbound access to ECS from the ELB
251 |         - SourceSecurityGroupId: !Ref {{stackNoDash}}LoadBalancerSecurityGroup
252 |           IpProtocol: -1
253 | 
254 |   #################################################
255 |   # ECS config END
256 |   #################################################
257 | 
258 |   #################################################
259 |   # RDS config BEGIN
260 |   #################################################
261 | 
262 |   {{stackNoDash}}ResultsDB:
263 |     Type: AWS::RDS::DBInstance
264 |     Properties:
265 |       DBName: ResultsDB
266 |       AllocatedStorage: {{rds.storage}}
267 |       DBInstanceClass: {{rds.instanceType}}
268 |       Engine: postgres
269 |       EngineVersion: 9.6.2
270 |       MasterUsername: {{rds.username}}
271 |       MasterUserPassword: {{rds.password}}
272 |       Tags:
273 |         - Key: Project
274 |           Value: {{projectTag}}
275 |         - Key: Stack
276 |           Value: {{stackName}}
277 | 
278 |   #################################################
279 |   # RDS config END
280 |   #################################################
281 | 
282 |   #################################################
283 |   # Load Balancer BEGIN
284 |   #################################################
285 | 
286 |   {{stackNoDash}}LoadBalancer:
287 |     Type: AWS::ElasticLoadBalancingV2::LoadBalancer
288 |     Properties:
289 |       SecurityGroups:
290 |         - !Ref {{stackNoDash}}LoadBalancerSecurityGroup
291 |       Subnets:
292 |         {{#each subnets}}
293 |         - {{{this}}}
294 |         {{/each}}
295 |       Tags:
296 |         - Key: Project
297 |           Value: {{projectTag}}
298 |         - Key: Stack
299 |           Value: {{stackName}}
300 | 
301 |   {{stackNoDash}}TargetGroup:
302 |     Type: AWS::ElasticLoadBalancingV2::TargetGroup
303 |     Properties:
304 |       VpcId: {{vpc}}
305 |       Port: 80
306 |       Protocol: HTTP
307 |       Matcher:
308 |         HttpCode: 200-299
309 |       HealthCheckIntervalSeconds: 30
310 |       HealthCheckPath: {{predictionPath}}
311 |       HealthCheckProtocol: HTTP
312 |       HealthCheckTimeoutSeconds: 5
313 |       HealthyThresholdCount: 5
314 |     DependsOn:
315 |       - {{stackNoDash}}LoadBalancer
316 | 
317 |   {{stackNoDash}}LoadBalancerListener:
318 |     Type: AWS::ElasticLoadBalancingV2::Listener
319 |     Properties:
320 |       LoadBalancerArn: !Ref {{stackNoDash}}LoadBalancer
321 |       Port: 80
322 |       Protocol: HTTP
323 |       DefaultActions:
324 |         - Type: forward
325 |           TargetGroupArn: !Ref {{stackNoDash}}TargetGroup
326 | 
327 |   {{stackNoDash}}ListenerRule:
328 |     Type: AWS::ElasticLoadBalancingV2::ListenerRule
329 |     Properties:
330 |       ListenerArn: !Ref {{stackNoDash}}LoadBalancerListener
331 |       Priority: 2
332 |       Conditions:
333 |         - Field: path-pattern
334 |           Values:
335 |             - {{{predictionPath}}}
336 |       Actions:
337 |         - TargetGroupArn: !Ref {{stackNoDash}}TargetGroup
338 |           Type: forward
339 | 
340 |   {{stackNoDash}}LoadBalancerSecurityGroup:
341 |     Type: AWS::EC2::SecurityGroup
342 |     Properties:
343 |       VpcId: {{vpc}}
344 |       GroupDescription: Access to the load balancer that sits in front of ECS
345 |       SecurityGroupIngress:
346 |         # Allow access from anywhere to our ECS services
347 |         - CidrIp: 0.0.0.0/0
348 |           IpProtocol: -1
349 | 
350 |   #################################################
351 |   # Load Balancer END
352 |   #################################################
353 | 
354 | 
355 |   #################################################
356 |   # IAM config BEGIN
357 |   #################################################
358 | 
359 |   LambdaProcessingRole:
360 |     Type: AWS::IAM::Role
361 |     Properties:
362 |       RoleName: "{{stackName}}-lambda-processing"
363 |       AssumeRolePolicyDocument:
364 |         Version: '2012-10-17'
365 |         Statement:
366 |           - Effect: Allow
367 |             Principal:
368 |               Service: lambda.amazonaws.com
369 |             Action: sts:AssumeRole
370 |       Path: "/"
371 |       Policies:
372 |         - PolicyName: ProcessingLambda
373 |           PolicyDocument:
374 |             Version: '2012-10-17'
375 |             Statement:
376 |               - Effect: Allow
377 |                 Action:
378 |                   - lambda:GetFunction
379 |                   - lambda:invokeFunction
380 |                   - logs:CreateLogGroup
381 |                   - logs:CreateLogStream
382 |                   - logs:DescribeLogStreams
383 |                   - logs:PutLogEvents
384 |                 Resource: "*"
385 |               # Allow writing to ingest buckets
386 |               - Effect: Allow
387 |                 Action:
388 |                   - s3:AbortMultipartUpload
389 |                   - s3:Get*
390 |                   - s3:Put*
391 |                   - s3:List*
392 |                   - s3:DeleteObject
393 |                   - s3:DeleteObjectVersion
394 |                 Resource:
395 |                   - !Sub "arn:aws:s3:::{{buckets.internal}}"
396 |                   - !Sub "arn:aws:s3:::{{buckets.internal}}/*"
397 |               # Allow access to SQS
398 |               - Effect: Allow
399 |                 Action:
400 |                   - sqs:SendMessage
401 |                   - sqs:ReceiveMessage
402 |                   - sqs:ChangeMessageVisibility
403 |                   - sqs:DeleteMessage
404 |                   - sqs:GetQueueUrl
405 |                   - sqs:GetQueueAttributes
406 |                 Resource: !Sub "arn:aws:sqs:${AWS::Region}:${AWS::AccountId}:{{stackNoDash}}TileQueue"
407 | 
408 |   ECSRole:
409 |     Type: AWS::IAM::Role
410 |     Properties:
411 |       RoleName: !Sub "{{stackName}}-ecs-role"
412 |       AssumeRolePolicyDocument:
413 |         Version: '2012-10-17'
414 |         Statement:
415 |           - Effect: Allow
416 |             Principal:
417 |               Service:
418 |                 - ec2.amazonaws.com
419 |                 - ecs.amazonaws.com
420 |             Action: sts:AssumeRole
421 |       Path: "/"
422 |       Policies:
423 |         - PolicyName: ECSRole
424 |           PolicyDocument:
425 |             Version: '2012-10-17'
426 |             Statement:
427 |               - Effect: Allow
428 |                 Action:
429 |                   - cloudwatch:GetMetricStatistics
430 |                   - ecr:BatchCheckLayerAvailability
431 |                   - ecr:BatchGetImage
432 |                   - ecr:GetAuthorizationToken
433 |                   - ecr:GetDownloadUrlForLayer
434 |                   - ec2:AuthorizeSecurityGroupIngress
435 |                   - ec2:Describe*
436 |                   - ecs:DeregisterContainerInstance
437 |                   - ecs:DescribeClusters
438 |                   - ecs:DescribeContainerInstances
439 |                   - ecs:DescribeServices
440 |                   - ecs:DiscoverPollEndpoint
441 |                   - ecs:ListContainerInstances
442 |                   - ecs:ListServices
443 |                   - ecs:ListTaskDefinitions
444 |                   - ecs:ListTasks
445 |                   - ecs:Poll
446 |                   - ecs:RegisterContainerInstance
447 |                   - ecs:RunTask
448 |                   - ecs:StartTelemetrySession
449 |                   - ecs:Submit*
450 |                   - lambda:GetFunction
451 |                   - lambda:invokeFunction
452 |                   - logs:CreateLogGroup
453 |                   - logs:CreateLogStream
454 |                   - logs:DescribeLogStreams
455 |                   - logs:PutLogEvents
456 |                 Resource: "*"
457 | 
458 |            # Allow interaction with internal buckets
459 |               - Effect: Allow
460 |                 Action:
461 |                   - s3:AbortMultipartUpload
462 |                   - s3:Get*
463 |                   - s3:Put*
464 |                   - s3:List*
465 |                   - s3:DeleteObject
466 |                   - s3:DeleteObjectVersion
467 |                 Resource:
468 |                   - !Sub "arn:aws:s3:::{{buckets.internal}}"
469 |                   - !Sub "arn:aws:s3:::{{buckets.internal}}/*"
470 | 
471 |             # Allow interaction with the load balancer
472 |               - Effect: Allow
473 |                 Action:
474 |                   - elasticloadbalancing:*
475 |                 Resource: "*"
476 | 
477 | 
478 | 
479 |   #################################################
480 |   # IAM config END
481 |   #################################################
482 | 
483 | Mappings:
484 |   AWSRegionToAMI:
485 |     DOCS:
486 |       LIST: http://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-optimized_AMI.html
487 |     us-east-1:
488 |       AMIID: ami-07eb64b216d4d3522 # hardcoded, built via https://github.com/azavea/raster-vision-aws#create-the-custom-ami
489 | 
490 | Outputs:
491 |   dbConnectionString:
492 |     Value:
493 |       Fn::Join:
494 |       - ''
495 |       - - 'postgres://{{rds.username}}:{{rds.password}}@'
496 |         - !GetAtt {{stackNoDash}}ResultsDB.Endpoint.Address
497 |         - ':'
498 |         - !GetAtt {{stackNoDash}}ResultsDB.Endpoint.Port
499 |         - '/'
500 |         - ResultsDB
501 |   queueURL:
502 |     Value: !Ref {{stackNoDash}}TileQueue
503 |   modelEndpoint:
504 |     Value:
505 |       Fn::Join:
506 |       - ''
507 |       - - 'http://'
508 |         - !GetAtt {{stackNoDash}}LoadBalancer.DNSName
509 |         - {{{predictionPath}}}
510 | 


--------------------------------------------------------------------------------
/config/config.example.yml:
--------------------------------------------------------------------------------
 1 | default:
 2 |   stage: dev
 3 |   stackName: your-stack
 4 |   stackNoDash: YourStack
 5 |   projectTag: project
 6 |   capabilities:
 7 |      - CAPABILITY_NAMED_IAM
 8 |   buckets:
 9 |     internal: your-bucket # existing s3 bucket to store deployment artifacts
10 | 
11 |   lambdas:
12 |     DownloadAndPredict:
13 |       handler: download_and_predict.handler.handler
14 |       timeout: 60
15 |       memory: 512
16 |       runtime: python3.7
17 |       source: lambda/package.zip
18 |       queueTrigger: true
19 |       concurrent: 5
20 |       envs:
21 |         TILE_ACCESS_TOKEN: '{{TILE_ACCESS_TOKEN}}'
22 |         TILE_ENDPOINT: 'https://example.com/{}/{}/{}.jpg?access_token={}'
23 | 
24 |   rds:
25 |     username: '{{RDS_USERNAME}}'
26 |     password: '{{RDS_PASSWORD}}'
27 |     storage: 20
28 |     instanceType: 'db.t2.medium'
29 | 
30 |   vpc: your-vpc # existing VPC containing the two subnets below
31 |   subnets:
32 |     - subnet 1
33 |     - subnet 2
34 | 
35 |   ecs:
36 |     availabilityZone: us-east-1a
37 |     maxInstances: 1
38 |     desiredInstances: 1
39 |     keyPairName: your-key-pair
40 |     instanceType: t2.nano # replace with a GPU instance for faster predictions (and higher costs)
41 |     image: tensorflow/serving:latest # docker image containing your inference model built with TF Serving
42 |     memory: 1000 # replace with the memory required by your TF Serving docker image
43 | 
44 |   sqs:
45 |     visibilityTimeout: 60
46 |     maxReceiveCount: 5
47 | 
48 |   predictionPath: '/v1/models/your_model' # path to your model on the TF Serving docker image; don't include :predict
49 | 


--------------------------------------------------------------------------------
/config/kes.js:
--------------------------------------------------------------------------------
 1 | const { Kes } = require('kes')
 2 | 
 3 | // Override the KES class to include useful post-deploy helpers
 4 | class UpdatedKes extends Kes {
 5 |   opsStack () {
 6 |     return super.opsStack()
 7 |       .then(() => this.describeCF())
 8 |       .then((r) => {
 9 |         let output = r.Stacks[0].Outputs
10 |         let dbConnection = output.find(o => o.OutputKey === 'dbConnectionString')['OutputValue']
11 |         let queueURL = output.find(o => o.OutputKey === 'queueURL')['OutputValue']
12 |         return console.log(`
13 | The stack ${r.Stacks[0].StackName} is deployed or updated.
14 | - The database is available at: ${dbConnection}
15 | - The queue is available at ${queueURL}
16 | 
17 | Is this the first time setting up this stack? Run the following command to set up the database:
18 | 
19 |   $ yarn setup ${dbConnection}
20 | `
21 |         )
22 |       })
23 |   }
24 | }
25 | 
26 | module.exports = UpdatedKes
27 | 


--------------------------------------------------------------------------------
/db/knexfile.js:
--------------------------------------------------------------------------------
 1 | var path = require('path')
 2 | module.exports = {
 3 |   remote: {
 4 |     client: 'pg',
 5 |     debug: process.env.KNEX_DEBUG || false,
 6 |     connection: process.env.DATABASE_URL,
 7 |     migrations: {
 8 |       directory: path.join(__dirname, 'migrations')
 9 |     }
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/db/migrations/20190122223855_init.js:
--------------------------------------------------------------------------------
 1 | exports.up = async function (knex) {
 2 |   try {
 3 |     return knex.schema.createTable('results', t => {
 4 |       t.string('tile').primary()
 5 |       t.jsonb('output')
 6 |     })
 7 |   } catch (e) {
 8 |     console.error(e)
 9 |   }
10 | }
11 | 
12 | exports.down = async function (knex) {
13 |   return knex.schema.dropTable('results')
14 | }
15 | 


--------------------------------------------------------------------------------
/db/setup.sh:
--------------------------------------------------------------------------------
1 | DATABASE_URL=$1
2 | DATABASE_URL=$DATABASE_URL knex migrate:latest --env remote --knexfile db/knexfile.js
3 | 


--------------------------------------------------------------------------------
/diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/developmentseed/chip-n-scale-queue-arranger/541bc104ab895e1751d2f38e6c40868c12209360/diagram.png


--------------------------------------------------------------------------------
/lambda/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM lambci/lambda:build-python3.7
 2 | 
 3 | WORKDIR /tmp
 4 | 
 5 | ENV PACKAGE_PREFIX /tmp/python
 6 | 
 7 | ################################################################################
 8 | #                            CREATE PACKAGE                                    #
 9 | ################################################################################
10 | COPY download_and_predict download_and_predict
11 | COPY setup.py setup.py
12 | 
13 | ENV \
14 |   LANG=en_US.UTF-8 \
15 |   LC_ALL=en_US.UTF-8 \
16 |   CFLAGS="--std=c99"
17 | 
18 | RUN pip3 install . --no-binary numpy -t $PACKAGE_PREFIX -U
19 | 
20 | ################################################################################
21 | #                            REDUCE PACKAGE SIZE                               #
22 | ################################################################################
23 | RUN rm -rdf $PACKAGE_PREFIX/boto3/ \
24 |   && rm -rdf $PACKAGE_PREFIX/botocore/ \
25 |   && rm -rdf $PACKAGE_PREFIX/docutils/ \
26 |   && rm -rdf $PACKAGE_PREFIX/dateutil/ \
27 |   && rm -rdf $PACKAGE_PREFIX/jmespath/ \
28 |   && rm -rdf $PACKAGE_PREFIX/s3transfer/ \
29 |   && rm -rdf $PACKAGE_PREFIX/numpy/doc/
30 | 
31 | # Leave module precompiles for faster Lambda startup
32 | RUN find $PACKAGE_PREFIX -type f -name '*.pyc' | while read f; do n=$(echo $f | sed 's/__pycache__\///' | sed 's/.cpython-[2-3][0-9]//'); cp $f $n; done;
33 | RUN find $PACKAGE_PREFIX -type d -a -name '__pycache__' -print0 | xargs -0 rm -rf
34 | RUN find $PACKAGE_PREFIX -type f -a -name '*.py' -print0 | xargs -0 rm -f
35 | 
36 | ################################################################################
37 | #                              CREATE ARCHIVE                                  #
38 | ################################################################################
39 | RUN cd $PACKAGE_PREFIX && zip -r9q /tmp/package.zip *
40 | 
41 | # Cleanup
42 | RUN rm -rf $PACKAGE_PREFIX
43 | 


--------------------------------------------------------------------------------
/lambda/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | SHELL = /bin/bash
3 | 
4 | build:
5 | 	docker build --tag lambda:latest .
6 | 	docker run --name lambda -itd lambda:latest /bin/bash
7 | 	docker cp lambda:/tmp/package.zip package.zip
8 | 	docker stop lambda
9 | 	docker rm lambda


--------------------------------------------------------------------------------
/lambda/download_and_predict/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/developmentseed/chip-n-scale-queue-arranger/541bc104ab895e1751d2f38e6c40868c12209360/lambda/download_and_predict/__init__.py


--------------------------------------------------------------------------------
/lambda/download_and_predict/base.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Lambda for downloading images, packaging them for prediction, sending them
  3 | to a remote ML serving image, and saving them
  4 | @author:Development Seed
  5 | """
  6 | 
  7 | import json
  8 | from functools import reduce
  9 | from io import BytesIO
 10 | from base64 import b64encode
 11 | from urllib.parse import urlparse
 12 | from typing import Dict, List, NamedTuple, Callable, Optional, Tuple, Any, Iterator
 13 | 
 14 | from mercantile import Tile
 15 | import requests
 16 | import pg8000
 17 | 
 18 | from download_and_predict.custom_types import SQSEvent
 19 | 
 20 | class DownloadAndPredict(object):
 21 |     """
 22 |     base object DownloadAndPredict implementing all necessary methods to
 23 |     make machine learning predictions
 24 |     """
 25 | 
 26 |     def __init__(self, imagery: str, db: str, prediction_endpoint: str):
 27 |         super(DownloadAndPredict, self).__init__()
 28 |         self.imagery = imagery
 29 |         self.db = db
 30 |         self.prediction_endpoint = prediction_endpoint
 31 | 
 32 |     @staticmethod
 33 |     def get_tiles(event: SQSEvent) -> List[Tile]:
 34 |         """
 35 |         Return the body of our incoming SQS messages as an array of mercantile Tiles
 36 |         Expects events of the following format:
 37 | 
 38 |         { 'Records': [ { "body": '{ "x": 4, "y": 5, "z":3 }' }] }
 39 | 
 40 |         """
 41 |         return [
 42 |           Tile(*json.loads(record['body']).values())
 43 |           for record
 44 |           in event['Records']
 45 |         ]
 46 | 
 47 | 
 48 |     @staticmethod
 49 |     def b64encode_image(image_binary:bytes) -> str:
 50 |         return b64encode(image_binary).decode('utf-8')
 51 | 
 52 | 
 53 |     def get_images(self, tiles: List[Tile]) -> Iterator[Tuple[Tile, bytes]]:
 54 |         for tile in tiles:
 55 |             url = self.imagery.format(x=tile.x, y=tile.y, z=tile.z)
 56 |             r = requests.get(url)
 57 |             yield (tile, r.content)
 58 | 
 59 | 
 60 |     def get_prediction_payload(self, tiles:List[Tile]) -> Tuple[List[Tile], str]:
 61 |         """
 62 |         tiles: list mercantile Tiles
 63 |         imagery: str an imagery API endpoint with three variables {z}/{x}/{y} to replace
 64 | 
 65 |         Return:
 66 |         - an array of b64 encoded images to send to our prediction endpoint
 67 |         - a corresponding array of tile indices
 68 | 
 69 |         These arrays are returned together because they are parallel operations: we
 70 |         need to match up the tile indicies with their corresponding images
 71 |         """
 72 |         tiles_and_images = self.get_images(tiles)
 73 |         tile_indices, images = zip(*tiles_and_images)
 74 | 
 75 |         instances = [dict(image_bytes=dict(b64=self.b64encode_image(img))) for img in images]
 76 |         payload = json.dumps(dict(instances=instances))
 77 | 
 78 |         return (list(tile_indices), payload)
 79 | 
 80 |     def post_prediction(self, payload:str) -> Dict[str, Any]:
 81 |         r = requests.post(self.prediction_endpoint, data=payload)
 82 |         r.raise_for_status()
 83 |         return r.json()
 84 | 
 85 |     def save_to_db(self, tiles:List[Tile], results:List[Any], result_wrapper:Optional[Callable]=None) -> None:
 86 |         """
 87 |         Save our prediction results to the provided database
 88 |         tiles: list mercantile Tiles
 89 |         results: list of predictions
 90 |         db: str database connection string
 91 | 
 92 |         """
 93 |         db = urlparse(self.db)
 94 | 
 95 |         conn = pg8000.connect(
 96 |           user=db.username,
 97 |           password=db.password,
 98 |           host=db.hostname,
 99 |           database=db.path[1:],
100 |           port=db.port
101 |         )
102 |         cursor = conn.cursor()
103 | 
104 |         for i, output in enumerate(results):
105 |             result = result_wrapper(output) if result_wrapper else output
106 |             cursor.execute("INSERT INTO results VALUES (%s, %s) ON CONFLICT (tile) DO UPDATE SET output = %s", (tiles[i], result, result))
107 | 
108 |         conn.commit()
109 |         conn.close()
110 | 


--------------------------------------------------------------------------------
/lambda/download_and_predict/custom_types.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict, Any
2 | 
3 | SQSEvent = Dict[str, List[Dict[str, Any]]]
4 | 


--------------------------------------------------------------------------------
/lambda/download_and_predict/handler.py:
--------------------------------------------------------------------------------
 1 | """Example AWS Lambda function for chip-n-scale"""
 2 | 
 3 | import os
 4 | import pg8000
 5 | from typing import Dict, Any
 6 | 
 7 | from download_and_predict.base import DownloadAndPredict
 8 | from download_and_predict.custom_types import SQSEvent
 9 | 
10 | def handler(event: SQSEvent, context: Dict[str, Any]) -> None:
11 |     # read all our environment variables to throw errors early
12 |     imagery = os.getenv('TILE_ENDPOINT')
13 |     db = os.getenv('DATABASE_URL')
14 |     prediction_endpoint=os.getenv('PREDICTION_ENDPOINT')
15 | 
16 |     assert(imagery)
17 |     assert(db)
18 |     assert(prediction_endpoint)
19 | 
20 |     # instantiate our DownloadAndPredict class
21 |     dap = DownloadAndPredict(
22 |       imagery=imagery,
23 |       db=db,
24 |       prediction_endpoint=prediction_endpoint
25 |     )
26 | 
27 |     # get tiles from our SQS event
28 |     tiles = dap.get_tiles(event)
29 | 
30 |     # construct a payload for our prediction endpoint
31 |     tile_indices, payload = dap.get_prediction_payload(tiles)
32 | 
33 |     # send prediction request
34 |     content = dap.post_prediction(payload)
35 | 
36 |     # save prediction request to db
37 |     dap.save_to_db(
38 |         tile_indices,
39 |         content['predictions'],
40 |         result_wrapper=lambda x: pg8000.PGJsonb(x)
41 |     )
42 | 


--------------------------------------------------------------------------------
/lambda/download_and_predict/mercantile.pyi:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Any, Callable
 2 | 
 3 | class Tile():
 4 |     @property
 5 |     def x(self) -> int: ...
 6 |     @property
 7 |     def y(self) -> int: ...
 8 |     @property
 9 |     def z(self) -> int: ...
10 | 
11 | quadkey = Callable[[Any], str]
12 | 


--------------------------------------------------------------------------------
/lambda/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | 
3 | scripts_are_modules = True
4 | show_traceback = True
5 | ignore_missing_imports = True
6 | mypy_path = lambdas/download_and_predict
7 | namespace_packages = True
8 | 


--------------------------------------------------------------------------------
/lambda/package.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/developmentseed/chip-n-scale-queue-arranger/541bc104ab895e1751d2f38e6c40868c12209360/lambda/package.zip


--------------------------------------------------------------------------------
/lambda/setup.py:
--------------------------------------------------------------------------------
 1 | """Setup."""
 2 | 
 3 | from setuptools import setup, find_packages
 4 | 
 5 | inst_reqs = [
 6 |     "mercantile",
 7 |     "pg8000==1.16.4",
 8 |     "requests",
 9 |     "pillow",
10 |     "numpy"
11 | ]
12 | extra_reqs = {"test": ["pytest", "pytest-cov"]}
13 | 
14 | setup(
15 |     name="app",
16 |     version="0.0.1",
17 |     description=u"Lambda Download and Predict",
18 |     python_requires=">=3",
19 |     keywords="AWS-Lambda Python",
20 |     packages=find_packages(exclude=["ez_setup", "examples", "tests"]),
21 |     include_package_data=True,
22 |     zip_safe=False,
23 |     install_requires=inst_reqs,
24 |     extras_require=extra_reqs,
25 | )
26 | 


--------------------------------------------------------------------------------
/lambda/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/developmentseed/chip-n-scale-queue-arranger/541bc104ab895e1751d2f38e6c40868c12209360/lambda/tests/__init__.py


--------------------------------------------------------------------------------
/lambda/tests/handler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/developmentseed/chip-n-scale-queue-arranger/541bc104ab895e1751d2f38e6c40868c12209360/lambda/tests/handler.py


--------------------------------------------------------------------------------
/lambda/tests/test_base.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from mercantile import Tile
 3 | 
 4 | from download_and_predict.base import DownloadAndPredict
 5 | 
 6 | def test_get_tiles():
 7 |     # create a class with fake environment variables
 8 |     dap = DownloadAndPredict(
 9 |         imagery='https://example.com/{z}/{x}/{y}.png',
10 |         db='postgres://usr:pw@host:port/database',
11 |         prediction_endpoint='https://myloadbalancer.com/v1/models/ml:predict'
12 |     )
13 | 
14 |     # create an example SQS event which invokes a lambda
15 |     event = { 'Records': [ { "body": '{ "x": 4, "y": 5, "z":3 }' }] }
16 | 
17 |     tiles = dap.get_tiles(event)
18 |     fixture_tiles = [Tile(x=4, y=5, z=3)]
19 | 
20 |     assert(tiles == fixture_tiles)
21 | 


--------------------------------------------------------------------------------
/lambda_examples/README.md:
--------------------------------------------------------------------------------
 1 | ## Lambda Examples
 2 | *(how to customize this repo for running your ML inference task)*
 3 | 
 4 | The primary functionality of this repository is contained in the lambda function located at `lambda/download_and_predict/handler.py`. It is intentionally very little code to allow for easy configuration: with the imports and assertions removed, there is one class instantiation and four method calls. The class `DownloadAndPredict` provides the base functionality required to run machine learning inference:
 5 |   - Creates a list of Mercator tiles based on an input SQS event.
 6 |   - Downloads those tiles from a TMS/XYS tile endpoint and puts them in the proper format for sending them to Tensorflow Serving or an equivalent Docker image.
 7 |   - Sends the payload to the prediction endpoint.
 8 |   - Saves the result into a database.
 9 | 
10 | There are two primary options to customize this workflow:
11 |   - Add new code to `handler.py` to manipulate the returned values (`tiles`, `payload`, `content`, etc.)
12 |   - Subclass `DownloadAndPredict` to provide alternative methods for the operations listed above.
13 | 
14 | Any additional third-party libraries should be added to `lambda/setup.py` for inclusion in the lambda function deployment.
15 | 
16 | Examples of customization are listed in this library to show how `chip-n-scale-queue-arranger` can be used with a variety of different tools.
17 | 
18 | - [Download imagery from Sentinel Hub](sentinel_hub.py). For more information, check out the [`sentinelhub-py` docs](https://sentinelhub-py.readthedocs.io/en/latest/).
19 | - [Download larger tiles and create smaller tiles for inference](super_tiles.py). This is useful for reducing the load on the imagery/tile endpoint.
20 | - [Save results to `ml-enabler`](ml_enabler.py). For more information, check out the [`ml-enabler` repo](https://github.com/hotosm/ml-enabler).
21 | 


--------------------------------------------------------------------------------
/lambda_examples/ml_enabler.py:
--------------------------------------------------------------------------------
 1 | """Example AWS Lambda function for chip-n-scale with ml_enabler"""
 2 | 
 3 | import os
 4 | import datetime
 5 | from typing import Dict, Any, List, Optional, Callable
 6 | from io import BytesIO
 7 | from urllib.parse import urlparse
 8 | 
 9 | from download_and_predict.base import DownloadAndPredict
10 | from download_and_predict.custom_types import SQSEvent
11 | 
12 | import pg8000
13 | from mercantile import Tile, quadkey
14 | 
15 | class MLEnablerSave(DownloadAndPredict):
16 |     def __init__(self, imagery: str, db: str, prediction_endpoint: str, prediction_id: str):
17 |         # type annotatation error ignored, re: https://github.com/python/mypy/issues/5887
18 |         super(DownloadAndPredict, self).__init__(dict( # type: ignore
19 |             imagery=imagery,
20 |             db=db,
21 |             prediction_endpoint=prediction_endpoint
22 |         )) #
23 |         self.prediction_id = prediction_id
24 | 
25 |     def save_to_db(self, tiles:List[Tile], results:List[Any], result_wrapper:Optional[Callable]=None) -> None:
26 |         db = urlparse(self.db)
27 | 
28 |         conn = pg8000.connect(
29 |           user=db.username,
30 |           password=db.password,
31 |           host=db.hostname,
32 |           database=db.path[1:],
33 |           port=db.port
34 |         )
35 |         cursor = conn.cursor()
36 | 
37 |         for i, output in enumerate(results):
38 |             quadkey = quadkey(tiles[i])
39 |             # centroid = db.Column(Geometry('POINT', srid=4326))
40 |             predictions = pg8000.PGJsonb(output)
41 |             cursor.execute("INSERT INTO mlenabler VALUES (null, %s, %s, %s) ON CONFLICT (id) DO UPDATE SET output = %s", (self.prediction_id, quadkey, predictions, predictions))
42 | 
43 |         conn.commit()
44 |         conn.close()
45 | 
46 | 
47 | def handler(event: SQSEvent, context: Dict[str, Any]) -> None:
48 |     # read all our environment variables to throw errors early
49 |     imagery = os.getenv('TILE_ENDPOINT')
50 |     db = os.getenv('DATABASE_URL')
51 |     prediction_endpoint=os.getenv('PREDICTION_ENDPOINT')
52 |     prediction_id = os.getenv('PREDICTION_ID')
53 | 
54 |     assert(imagery)
55 |     assert(db)
56 |     assert(prediction_endpoint)
57 |     assert(prediction_id)
58 | 
59 |     # instantiate our custom DownloadAndPredict class
60 |     dap = MLEnablerSave(
61 |         imagery=imagery,
62 |         db=db,
63 |         prediction_endpoint=prediction_endpoint,
64 |         prediction_id=prediction_id
65 |     )
66 | 
67 |     # now that we've defined the behavior of our custom class, all the below
68 |     # methods are identical to those in the base example (without the db
69 |     # results wrapper)
70 | 
71 |     # get tiles from our SQS event
72 |     tiles = dap.get_tiles(event)
73 | 
74 |     # construct a payload for our prediction endpoint
75 |     tile_indices, payload = dap.get_prediction_payload(tiles)
76 | 
77 |     # send prediction request
78 |     content = dap.post_prediction(payload)
79 | 
80 |     # save prediction request to db
81 |     dap.save_to_db(
82 |         tile_indices,
83 |         content['predictions']
84 |     )
85 | 


--------------------------------------------------------------------------------
/lambda_examples/s3_images.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Example AWS Lambda function for chip-n-scale
 3 | To read images directly from S3 bucket.
 4 | """
 5 | 
 6 | import os
 7 | from os import path as op
 8 | import pg8000
 9 | from typing import List, Dict, Any
10 | import boto3
11 | import json
12 | 
13 | 
14 | from download_and_predict.base import DownloadAndPredict
15 | from download_and_predict.custom_types import SQSEvent
16 | 
17 | class S3_DownloadAndPredict(DownloadAndPredict):
18 |     """
19 |     base object DownloadAndPredict implementing all necessary methods to
20 |     make machine learning predictions
21 |     """
22 | 
23 |     def __init__(self, bucket: str, db: str, prediction_endpoint: str):
24 |         super(DownloadAndPredict, self).__init__()
25 |         self.bucket = bucket
26 |         self.db = db
27 |         self.prediction_endpoint = prediction_endpoint
28 | 
29 | 
30 |     def get_images(self, s3_keys: List):
31 |         s3_client=boto3.client('s3')
32 |         for s3_file in s3_keys:
33 |             key = json.loads(s3_file)['image']
34 |             response = s3_client.get_object(Bucket =self.bucket, Key = key)
35 |             yield(key, response["Body"].read())
36 | 
37 | 
38 | def handler(event: SQSEvent, context: Dict[str, Any]) -> None:
39 |     # read all our environment variables to throw errors early
40 |     bucket =os.getenv('BUCKET')
41 |     db = os.getenv('DATABASE_URL')
42 |     prediction_endpoint=os.getenv('PREDICTION_ENDPOINT')
43 | 
44 |     assert(bucket)
45 |     assert(db)
46 |     assert(prediction_endpoint)
47 | 
48 |     # instantiate our DownloadAndPredict class
49 |     dap = S3_DownloadAndPredict(
50 |         bucket=bucket,
51 |         db=db,
52 |         prediction_endpoint=prediction_endpoint
53 |     )
54 | 
55 |     # construct a payload for our prediction endpoint
56 |     s3_keys =[record['body'] for record in event['Records']]
57 | 
58 |     # sent images fron s3 bucket for inference
59 |     tile_indices, payload = dap.get_prediction_payload(s3_keys)
60 | 
61 |     # send prediction request
62 |     content = dap.post_prediction(payload)
63 | 
64 |     # save prediction request to db
65 |     dap.save_to_db(
66 |         tile_indices,
67 |         content['predictions'],
68 |         result_wrapper=lambda x: pg8000.PGJsonb(x)
69 |     )
70 | 


--------------------------------------------------------------------------------
/lambda_examples/save_image.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Example AWS Lambda function for chip-n-scale for saving get_images
 3 | 
 4 | Note that this requires three corresponding changes:
 5 | - a prediction docker image which returns image results as a list/array
 6 | - an update to the database: column "output" needs type `bytea`
 7 | - pillow and numpy need to be added to the lambda requirements
 8 | """
 9 | 
10 | import os
11 | import pg8000
12 | from typing import Dict, Any, List
13 | from io import BytesIO
14 | 
15 | from PIL import Image
16 | import numpy as np
17 | 
18 | from download_and_predict.base import DownloadAndPredict
19 | from download_and_predict.custom_types import SQSEvent
20 | 
21 | def prediction_to_image(pred: List) -> bytes:
22 |     img = Image.fromarray(np.array(pred).astype(np.uint8))
23 |     byts = BytesIO()
24 |     img.save(byts, format='png')
25 |     return byts.getvalue()
26 | 
27 | def handler(event: SQSEvent, context: Dict[str, Any]) -> None:
28 |     # read all our environment variables to throw errors early
29 |     imagery = os.getenv('TILE_ENDPOINT')
30 |     db = os.getenv('DATABASE_URL')
31 |     prediction_endpoint=os.getenv('PREDICTION_ENDPOINT')
32 | 
33 |     assert(imagery)
34 |     assert(db)
35 |     assert(prediction_endpoint)
36 | 
37 |     # instantiate our DownloadAndPredict class
38 |     dap = DownloadAndPredict(
39 |       imagery=imagery,
40 |       db=db,
41 |       prediction_endpoint=prediction_endpoint
42 |     )
43 | 
44 |     # get tiles from our SQS event
45 |     tiles = dap.get_tiles(event)
46 | 
47 |     # construct a payload for our prediction endpoint
48 |     tile_indices, payload = dap.get_prediction_payload(tiles)
49 | 
50 |     # send prediction request
51 |     content = dap.post_prediction(payload)
52 | 
53 |     # save prediction request to db
54 |     dap.save_to_db(
55 |         tile_indices,
56 |         content['predictions'],
57 |         result_wrapper=prediction_to_image
58 |     )
59 | 


--------------------------------------------------------------------------------
/lambda_examples/sentinel_hub.py:
--------------------------------------------------------------------------------
 1 | """Example AWS Lambda function for chip-n-scale with Sentinel Hub"""
 2 | 
 3 | import os
 4 | import datetime
 5 | from typing import Dict, Any, Tuple, List, Iterator
 6 | from io import BytesIO
 7 | 
 8 | from download_and_predict.base import DownloadAndPredict
 9 | from download_and_predict.custom_types import SQSEvent
10 | 
11 | import pg8000
12 | from sentinelhub import BBox, CRS, WmsRequest, MimeType, DataSource
13 | from PIL import Image
14 | from mercantile import bounds, Tile
15 | 
16 | class SentinelHubDownloader(DownloadAndPredict):
17 |     def __init__(self, imagery: str, db: str, prediction_endpoint: str, sentinel_wms_kwargs: Dict[str, Any]):
18 |         # type annotatation error ignored, re: https://github.com/python/mypy/issues/5887
19 |         super(DownloadAndPredict, self).__init__(dict( # type: ignore
20 |             imagery=imagery,
21 |             db=db,
22 |             prediction_endpoint=prediction_endpoint
23 |         ))
24 |         self.sentinel_wms_kwargs = sentinel_wms_kwargs
25 | 
26 |     def get_images(self, tiles: List[Tile]) -> Iterator[Tuple[Tile, bytes]]:
27 |         for tile in tiles:
28 |             # convert the tile index to a BBox with a buffer
29 |             x, y, z = tile
30 |             bbox = BBox(bounds((x, y, z)), crs=CRS.WGS84)
31 | 
32 |             # request the data from SentinelHub
33 |             request = WmsRequest(**dict(bbox=bbox, **self.sentinel_wms_kwargs))
34 |             image_array = request.get_data(data_filter=[0])[0]
35 |             img = Image.fromarray(image_array)
36 |             img_bytes = BytesIO()
37 |             img.save(img_bytes, format='png')
38 |             yield (tile, img_bytes.getvalue())
39 | 
40 | def handler(event: SQSEvent, context: Dict[str, Any]) -> None:
41 |     # read all our environment variables to throw errors early
42 |     imagery = os.getenv('TILE_ENDPOINT')
43 |     db = os.getenv('DATABASE_URL')
44 |     prediction_endpoint=os.getenv('PREDICTION_ENDPOINT')
45 |     sh_instance_id = os.getenv('SH_INSTANCE_ID')
46 | 
47 |     assert(imagery)
48 |     assert(db)
49 |     assert(prediction_endpoint)
50 |     assert(sh_instance_id)
51 | 
52 |     # instantiate our custom DownloadAndPredict class
53 |     dap = SentinelHubDownloader(
54 |         imagery=imagery,
55 |         db=db,
56 |         prediction_endpoint=prediction_endpoint,
57 |         sentinel_wms_kwargs=dict(
58 |             layer='MY-SENTINEL-HUB-LAYER',
59 |             width=256, height=256,
60 |             maxcc=0.20,
61 |             instance_id=sh_instance_id,
62 |             time=(f'2019-04-01', f'2019-07-30'),
63 |             time_difference=datetime.timedelta(days=21),
64 |       )
65 |     )
66 | 
67 |     # now that we've defined the behavior of our custom class, all the below
68 |     # methods are identical to those in the base example
69 | 
70 |     # get tiles from our SQS event
71 |     tiles = dap.get_tiles(event)
72 | 
73 |     # construct a payload for our prediction endpoint
74 |     tile_indices, payload = dap.get_prediction_payload(tiles)
75 | 
76 |     # send prediction request
77 |     content = dap.post_prediction(payload)
78 | 
79 |     # save prediction request to db
80 |     dap.save_to_db(
81 |         tile_indices,
82 |         content['predictions'],
83 |         result_wrapper=lambda x: pg8000.PGJsonb(x)
84 |     )
85 | 


--------------------------------------------------------------------------------
/lambda_examples/super_tiles.py:
--------------------------------------------------------------------------------
 1 | """Example AWS Lambda function for chip-n-scale with super tiles"""
 2 | 
 3 | import os
 4 | import datetime
 5 | from typing import Dict, Any, Tuple, List, Iterator
 6 | from io import BytesIO
 7 | 
 8 | from download_and_predict.base import DownloadAndPredict
 9 | from download_and_predict.custom_types import SQSEvent
10 | 
11 | import pg8000
12 | from mercantile import Tile, children
13 | from rasterio.io import MemoryFile
14 | from rasterio.windows import Window
15 | import requests
16 | 
17 | class SuperTileDownloader(DownloadAndPredict):
18 |     def __init__(self, imagery: str, db: str, prediction_endpoint: str, model_image_size: int):
19 |         # type annotatation error ignored, re: https://github.com/python/mypy/issues/5887
20 |         super(DownloadAndPredict, self).__init__(dict( # type: ignore
21 |             imagery=imagery,
22 |             db=db,
23 |             prediction_endpoint=prediction_endpoint
24 |         ))
25 |         self.model_image_size = model_image_size
26 | 
27 |     def get_images(self, tiles: List[Tile]) -> Iterator[Tuple[Tile, bytes]]:
28 |         """return images cropped to a given model_image_size from an imagery endpoint"""
29 |         for tile in tiles:
30 |             url = self.imagery.format(x=tile.x, y=tile.y, z=tile.z)
31 |             r = requests.get(url)
32 |             with MemoryFile(BytesIO(r.content)) as memfile:
33 |                 with memfile.open() as dataset:
34 |                     # because of the tile indexing, we assume all tiles are square
35 |                     sz = dataset.width
36 |                     zoom_offset = sz // self.model_image_size - 1
37 | 
38 |                     tile_indices = children(tile, zoom=zoom_offset + tile.z)
39 |                     tile_indices.sort()
40 | 
41 |                     for i in range (2 ** zoom_offset):
42 |                         for j in range(2 ** zoom_offset):
43 |                             window = Window(i * sz, j * sz, (i + 1) * sz, (j + 1) * sz)
44 |                             yield (
45 |                               tile_indices[i + j],
46 |                               dataset.read(window=window)
47 |                              )
48 | 
49 | def handler(event: SQSEvent, context: Dict[str, Any]) -> None:
50 |     # read all our environment variables to throw errors early
51 |     imagery = os.getenv('TILE_ENDPOINT')
52 |     db = os.getenv('DATABASE_URL')
53 |     prediction_endpoint=os.getenv('PREDICTION_ENDPOINT')
54 |     model_image_size = os.getenv('MODEL_IMAGE_SIZE')
55 | 
56 |     assert(imagery)
57 |     assert(db)
58 |     assert(prediction_endpoint)
59 |     assert(model_image_size)
60 | 
61 |     # instantiate our custom DownloadAndPredict class
62 |     dap = SuperTileDownloader(
63 |         imagery=imagery,
64 |         db=db,
65 |         prediction_endpoint=prediction_endpoint,
66 |         model_image_size=int(model_image_size)
67 |     )
68 | 
69 |     # now that we've defined the behavior of our custom class, all the below
70 |     # methods are identical to those in the base example
71 | 
72 |     # get tiles from our SQS event
73 |     tiles = dap.get_tiles(event)
74 | 
75 |     # construct a payload for our prediction endpoint
76 |     tile_indices, payload = dap.get_prediction_payload(tiles)
77 | 
78 |     # send prediction request
79 |     content = dap.post_prediction(payload)
80 | 
81 |     # save prediction request to db
82 |     dap.save_to_db(
83 |         tile_indices,
84 |         content['predictions'],
85 |         result_wrapper=lambda x: pg8000.PGJsonb(x)
86 |     )
87 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "ml-inference-cfn",
 3 |   "version": "0.1.0",
 4 |   "description": "Kes Deployment for Machine Learning Inference",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "model": "node ./scripts/model.js",
 8 |     "build": "node_modules/.bin/webpack",
 9 |     "watch": "node_modules/.bin/webpack -w",
10 |     "deploy": "AWS_SDK_LOAD_CONFIG=true kes cf deploy --kes-folder config --kes-class config/kes.js",
11 |     "setup": "db/setup.sh",
12 |     "delete": "AWS_SDK_LOAD_CONFIG=true kes cf delete --kes-folder config --kes-class config/kes.js",
13 |     "gpu-util": "AWS_SDK_LOAD_CONFIG=true node scripts/gpu-util.js",
14 |     "verify": "AWS_SDK_LOAD_CONFIG=true node scripts/verify.js",
15 |     "sqs-push": "AWS_SDK_LOAD_CONFIG=true node scripts/run-sqs-push.js",
16 |     "download": "AWS_SDK_LOAD_CONFIG=true node scripts/download.js",
17 |     "convert-geojson": "python scripts/csv_to_geojson.py",
18 |     "test": "node test/test_sqs-push.js",
19 |     "tag-logs": "AWS_SDK_LOAD_CONFIG=true node scripts/tag-cloudwatch-logs.js"
20 |   },
21 |   "author": "Development Seed",
22 |   "license": "ISC",
23 |   "dependencies": {
24 |     "@google-cloud/storage": "^4.2.0",
25 |     "aws-sdk": "^2.395.0",
26 |     "cli-table": "^0.3.1",
27 |     "d3-queue": "^3.0.7",
28 |     "js-yaml": "^3.12.1",
29 |     "knex": "^0.16.3",
30 |     "lodash.flatten": "^4.4.0",
31 |     "log-update": "^2.3.0",
32 |     "minimist": "^1.2.0",
33 |     "mkdirp": "^0.5.1",
34 |     "node-ssh": "^5.1.2",
35 |     "pg": "^7.8.0",
36 |     "split": "^1.0.1",
37 |     "through2-batch": "^1.1.1",
38 |     "uuid": "^3.3.2"
39 |   },
40 |   "devDependencies": {
41 |     "axios": "^0.18.0",
42 |     "babel-core": "^6.26.0",
43 |     "babel-loader": "^7.1.2",
44 |     "babel-plugin-transform-async-to-generator": "^6.24.1",
45 |     "babel-polyfill": "^6.26.0",
46 |     "dotenv": "^6.2.0",
47 |     "eslint": "^4.19.1",
48 |     "eslint-config-standard": "^11.0.0",
49 |     "eslint-plugin-import": "^2.10.0",
50 |     "eslint-plugin-node": "^6.0.1",
51 |     "eslint-plugin-promise": "^3.7.0",
52 |     "eslint-plugin-standard": "^3.0.1",
53 |     "kes": "^2.2.7",
54 |     "memorystream": "^0.3.1",
55 |     "prepend-loader": "^0.0.2",
56 |     "proxyquire": "^2.1.3",
57 |     "sinon": "^7.4.2",
58 |     "tape": "^4.11.0",
59 |     "webpack": "^3.5.6"
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/scripts/csv_to_geojson.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simply script to convert a csv file of prediction results into a geojson
 3 | 
 4 | Specify the CSV filepath and your confidence threshold. The CSV file should
 5 | contain tile indices in TMS coordinates. The confidence threshold is useful if
 6 | you ran prediction over a large area -- it will keep the resulting geojson
 7 | much smaller on disk. Set the threshold to 0.0 to include all tile predictions
 8 | in the geojson output.
 9 | 
10 | Requires pygeotile, geojson, and mercantile
11 | """
12 | 
13 | import csv
14 | import json
15 | import argparse
16 | import os.path as op
17 | 
18 | from mercantile import feature, Tile
19 | from geojson import Feature
20 | from pygeotile.tile import Tile as Pygeo_tile
21 | 
22 | 
23 | def convert_csv(fname_csv, fname_geojson, tile_format, thresh_ind, thresh):
24 |     """Convert tile indices in CSV file to geojson"""
25 | 
26 |     if not op.exists(fname_csv):
27 |         raise ValueError(f'Cannot find file {fname_csv}')
28 | 
29 |     # Error check tile format
30 |     if tile_format == 'tms':
31 |         tile_func = Pygeo_tile.from_tms
32 |     elif tile_format == 'google':
33 |         tile_func = Pygeo_tile.from_google
34 |     else:
35 |         raise ValueError(f'Tile format not understood. Got: {tile_format}')
36 | 
37 |     if not 0 <= thresh <= 1.:
38 |         raise ValueError(f"'thresh' must be on interval [0, 1]. Got: {thresh}")
39 | 
40 |     with open(fname_csv, 'r') as csvfile:
41 |         with open(fname_geojson, 'w') as results:
42 |             reader = csv.reader(csvfile)
43 |             first_line = True
44 | 
45 |             # Create a FeatureCollection
46 |             results.write('{"type":"FeatureCollection","features":[')
47 |             next(reader)  # Skip header
48 | 
49 |             for row in reader:
50 | 
51 |                 # Load as pygeotile using TMS coords
52 |                 geot = tile_func(*[int(t) for t in row[0].split('-')])
53 | 
54 |                 # Create feature with mercantile
55 |                 feat = feature(Tile(geot.google[0], geot.google[1], geot.zoom))
56 | 
57 |                 # Get class prediction confidences
58 |                 pred = json.loads(','.join(row[1:]))
59 |                 pred_red = list(map(lambda x: round(x, 2), pred))
60 |                 if pred_red[thresh_ind] >= thresh:
61 |                     # Add commas prior to any feature that isn't the first one
62 |                     if first_line:
63 |                         first_line = False
64 |                     else:
65 |                         results.write(',')
66 | 
67 |                     pred_obj = dict(zip(map(lambda x: 'p%s' % x,
68 |                                             range(len(pred_red))), pred_red))
69 | 
70 |                     results.write(json.dumps(Feature(geometry=feat['geometry'],
71 |                                                      properties=pred_obj)))
72 | 
73 |             # Finalize the feature FeatureCollection
74 |             results.write(']}')
75 | 
76 | 
77 | if __name__ == "__main__":
78 | 
79 |     parser = argparse.ArgumentParser(description='Convert CSV of tile predictions to GeoJSON.')
80 |     parser.add_argument('fname_csv', type=str,
81 |                         help='Filepath to CSV file needing conversion.')
82 |     parser.add_argument('fname_geojson', type=str, default='results.geojson',
83 |                         help='Filepath to save geojson file to.')
84 |     parser.add_argument('--tile-format', type=str, default='tms',
85 |                         help='Format of tile indices in CSV file ("tms" or "google").')
86 |     parser.add_argument('--thresh-ind', type=int, default=1,
87 |                         help='Optional threshold class index for including a prediction.')
88 |     parser.add_argument('--thresh', type=float, default=0.,
89 |                         help='Optional threshold for including a prediction.')
90 | 
91 |     args = parser.parse_args()
92 |     convert_csv(args.fname_csv, args.fname_geojson, args.tile_format,
93 |                 args.thresh_ind, args.thresh)
94 | 


--------------------------------------------------------------------------------
/scripts/download.js:
--------------------------------------------------------------------------------
 1 | const fs = require('fs')
 2 | const { promisify } = require('util')
 3 | 
 4 | const dbConfig = require('../db/knexfile').remote
 5 | const db = require('knex')(dbConfig)
 6 | 
 7 | const writeFile = promisify(fs.writeFile)
 8 | const outputFile = process.argv[2]
 9 | 
10 | db('results').then(results => {
11 |   const csv = ['tile,output'].concat(results.map(result => `${result.tile},${JSON.stringify(result.output)}`)).join('\n')
12 |   return writeFile(outputFile, csv)
13 | }).then(_ => process.exit(0))
14 | 


--------------------------------------------------------------------------------
/scripts/gpu-util.js:
--------------------------------------------------------------------------------
 1 | const fs = require('fs')
 2 | const yaml = require('js-yaml')
 3 | const AWS = require('aws-sdk')
 4 | const NodeSSH = require('node-ssh')
 5 | const flatten = require('lodash.flatten')
 6 | const Table = require('cli-table')
 7 | const logUpdate = require('log-update')
 8 | 
 9 | // setup
10 | const ssh = new NodeSSH()
11 | const GPU_UTIL_QUERY = 'nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader'
12 | const getConnectParams = host => {
13 |   return {
14 |     host, username: 'ec2-user', privateKey: process.argv[2]
15 |   }
16 | }
17 | const tableParams = {
18 |   head: ['IP Address', 'Instance Type', 'GPU Utilization'],
19 |   colWidths: [24, 24, 24]
20 | }
21 | 
22 | // get stackName from our config file
23 | const config = yaml.safeLoad(fs.readFileSync('config/config.yml').toString())
24 | const stackName = config.default.stackName
25 | 
26 | // find all our project EC2s and get their IP
27 | const ec2 = new AWS.EC2()
28 | ec2.describeInstances({ Filters: [{ Name: 'tag:Project', Values: [`${stackName}`] }] })
29 |   .promise()
30 |   .then(resp => flatten(resp.Reservations.map(r => r.Instances)))
31 |   .then(instances => {
32 |     setInterval(() => {
33 |       // run our promises in serial so we don't mix up our ssh connection
34 |       // from: https://decembersoft.com/posts/promises-in-serial-with-array-reduce/
35 |       instances.reduce((promiseChain, instance) => {
36 |         return promiseChain.then(chainResults => {
37 |           return ssh.connect(getConnectParams(instance.PublicIpAddress)).then(() => {
38 |             return ssh.execCommand(GPU_UTIL_QUERY).then(result => {
39 |               ssh.dispose()
40 |               return [ ...chainResults, [
41 |                 instance.PublicIpAddress,
42 |                 instance.InstanceType,
43 |                 result.stdout
44 |               ]
45 |               ]
46 |             })
47 |           })
48 |         })
49 |       }, Promise.resolve([])).then(results => {
50 |         let table = new Table(tableParams)
51 |         results.forEach(r => table.push(r))
52 |         logUpdate(table.toString())
53 |       })
54 |     }, 5000)
55 |   })
56 | 


--------------------------------------------------------------------------------
/scripts/model.js:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env node
  2 | 
  3 | const Q = require('d3-queue').queue;
  4 | const mkdir = require('mkdirp').sync;
  5 | const pipeline = require('stream').pipeline;
  6 | const fs = require('fs');
  7 | const os = require('os');
  8 | const CP = require('child_process');
  9 | const tmp = os.tmpdir() + '/' + Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15)
 10 | const path = require('path');
 11 | const argv = require('minimist')(process.argv, {
 12 |     boolean: ['use-gpu', 'help'],
 13 |     alias: {'use_gpu': 'use-gpu'}
 14 | });
 15 | 
 16 | function help() {
 17 |     console.error();
 18 |     console.error('  Build TFServing docker images for Chip-N-Scale given a GS model location');
 19 |     console.error();
 20 |     console.error('Usage:');
 21 |     console.error();
 22 |     console.error('  yarn model <gs:// folder containing .pb model> [--use_gpu]');
 23 |     console.error();
 24 |     console.error('Options:');
 25 |     console.error(' --use_gpu   builds and tags gpu tfserving image, without this flag cpu image will be used');
 26 |     console.error();
 27 | }
 28 | 
 29 | let model = argv._[2];
 30 | 
 31 | if (!model || argv.help) {
 32 |     return help();
 33 | }
 34 | 
 35 | model = new URL(model);
 36 | 
 37 | if (model.protocol === 's3:') {
 38 |     console.error('s3: models will be supported in the future');
 39 |     process.exit();
 40 | } else if (model.protocol !== 'gs:') {
 41 |     console.error('Only gs:// protocols are supported');
 42 |     process.exit();
 43 | }
 44 | 
 45 | mkdir(tmp + '/001');
 46 | console.error(`ok - tmp dir: ${tmp}`);
 47 | 
 48 | if (model.protocol === 'gs:') {
 49 |     return gs_get(model, docker);
 50 | }
 51 | 
 52 | /**
 53 |  * Given a Google Storage Folder containing a model,
 54 |  * fetch and save it to disk
 55 |  */
 56 | function gs_get(model, cb) {
 57 |     const gs = new (require('@google-cloud/storage').Storage)();
 58 |     const bucket = gs.bucket(model.host);
 59 | 
 60 |     if (!process.env.GOOGLE_APPLICATION_CREDENTIALS) {
 61 |         console.error('GOOGLE_APPLICATION_CREDENTIALS environment var must be set');
 62 |         console.error('See: https://cloud.google.com/docs/authentication/getting-started');
 63 |         process.exit();
 64 |     }
 65 | 
 66 |     const model_path = model.pathname.replace(/^\//, '');
 67 | 
 68 |     bucket.getFiles({
 69 |         prefix: model_path
 70 |     }, (err, files) => {
 71 |         if (err) return cb(err);
 72 | 
 73 |         const q = new Q(1);
 74 | 
 75 |         for (let file of files) {
 76 |             if (file.name[file.name.length - 1] === '/') continue;
 77 | 
 78 |             const name = path.parse(file.name.replace(model_path, ''));
 79 | 
 80 |             if (name.dir) {
 81 |                 mkdir(path.resolve(tmp + '/001', name.dir));
 82 |             }
 83 | 
 84 |             q.defer((file, name, done) => {
 85 |                 console.error(`ok - fetching ${name.dir + '/' +  name.base}`);
 86 |                 pipeline(
 87 |                     file.createReadStream(),
 88 |                     fs.createWriteStream(path.resolve(tmp + '/001', name.dir, name.base)),
 89 |                     done
 90 |                 );
 91 |             }, file, name);
 92 |         }
 93 | 
 94 |         q.awaitAll(cb);
 95 |     });
 96 | }
 97 | 
 98 | function docker(err, res) {
 99 |     if (err) throw err;
100 |     if (argv.use_gpu) {
101 |       console.error('ok - pulling tensorflow/serving:latest-gpu docker image');
102 |       CP.execSync(`
103 |           docker pull tensorflow/serving:latest-gpu
104 |       `);
105 |     } else {
106 |       console.error('ok - pulling tensorflow/serving docker image');
107 |       CP.execSync(`
108 |           docker pull tensorflow/serving
109 |       `);
110 |       }
111 | 
112 |     // Ignore errors, these are to ensure the next commands don't err
113 |     try {
114 |         CP.execSync(`
115 |             docker kill serving_base
116 |         `);
117 |     } catch(err) {
118 |         console.error('ok - no old task to stop');
119 |     }
120 | 
121 |     try {
122 |         CP.execSync(`
123 |             docker rm serving_base
124 |         `);
125 |     } catch(err) {
126 |         console.error('ok - no old image to remove');
127 |     }
128 | 
129 |     CP.execSync(`
130 |         docker run -d --name serving_base tensorflow/serving${argv.use_gpu ? ':latest-gpu' : ''}
131 |     `);
132 | 
133 |     CP.execSync(`
134 |         docker cp ${tmp}/ serving_base:/models/default/ \
135 |     `);
136 | 
137 |     const tag = `developmentseed/default:${Math.random().toString(36).substring(2, 15)}${argv.use_gpu ? '-gpu' : ''}`;
138 | 
139 |     CP.execSync(`
140 |         docker commit --change "ENV MODEL_NAME default" serving_base ${tag}
141 |     `);
142 | 
143 |     console.error(`ok - docker: ${tag}`);
144 | 
145 |     console.error();
146 |     console.error(`ok - Run with docker run -p 8501:8501 -t ${tag}`);
147 |     console.error();
148 | 
149 | }
150 | 


--------------------------------------------------------------------------------
/scripts/run-sqs-push.js:
--------------------------------------------------------------------------------
1 | const sqsPush = require('./sqs-push')
2 | sqsPush.run()
3 | 


--------------------------------------------------------------------------------
/scripts/sqs-push.js:
--------------------------------------------------------------------------------
  1 | const fs = require('fs')
  2 | const { Transform, Writable } = require('stream')
  3 | const split = require('split')
  4 | const through2Batch = require('through2-batch')
  5 | const logUpdate = require('log-update')
  6 | const { SQS } = require('aws-sdk')
  7 | const uuidv4 = require('uuid/v4')
  8 | 
  9 | const promiseThreshold = process.env.PROMISE_THRESHOLD || 500
 10 | const queue = process.argv[3]
 11 | const errors = []
 12 | let count = 0
 13 | 
 14 | const transform = new Transform({
 15 |   objectMode: true,
 16 |   transform: (data, _, done) => {
 17 |     if (!data.toString()) return done(null, null) // don't write empty lines
 18 |     const [ x, y, z ] = data.toString().split('-').map(d => Number(d))
 19 |     done(null, JSON.stringify({ x, y, z }))
 20 |   }
 21 | })
 22 | 
 23 | const counter = new Transform({
 24 |   objectMode: true,
 25 |   transform: (data, _, done) => {
 26 |     logUpdate(`Sending ${++count} messages to queue: ${queue}`)
 27 |     done(null, data)
 28 |   }
 29 | })
 30 | 
 31 | // simplified from https://github.com/danielyaa5/sqs-write-stream
 32 | class SqsWriteStream extends Writable {
 33 |   /**
 34 |    * Must provide a url property
 35 |    * @param {Object} queue - An object with a url property
 36 |    */
 37 |   constructor (queue, options) {
 38 |     super({
 39 |       objectMode: true
 40 |     })
 41 |     this.queueUrl = queue.url
 42 |     this.sqs = new SQS()
 43 |     this.activePromises = new Map()
 44 |     this.decrementActivePromises = this.decrementActivePromises.bind(this)
 45 |     this.sendMessages = this.sendMessages.bind(this)
 46 |     this.paused = false
 47 |     this.buffer = []
 48 |   }
 49 | 
 50 |   decrementActivePromises (id) {
 51 |     this.activePromises.delete(id)
 52 |     if (this.paused && this.activePromises.size < promiseThreshold / 2) {
 53 |       this.paused = false
 54 |       this.cb()
 55 |     }
 56 |   }
 57 | 
 58 |   sendMessages (Entries) {
 59 |     const Id = uuidv4()
 60 |     const promise = this.sqs.sendMessageBatch({
 61 |       Entries,
 62 |       QueueUrl: this.queueUrl
 63 |     })
 64 |       .promise()
 65 |       .then((data) => {
 66 |         if (data.Failed && data.Failed.length > 0) {
 67 |           data.Failed.forEach((error) => {
 68 |             errors.push(error)
 69 |           })
 70 |         }
 71 |         this.decrementActivePromises(Id)
 72 |       })
 73 |       .catch((error) => {
 74 |         errors.push(`Error: ${error}`)
 75 |         this.decrementActivePromises(Id)
 76 |       })
 77 |     this.activePromises.set(Id, promise)
 78 |   }
 79 | 
 80 |   _write (obj, enc, cb) {
 81 |     if (this.activePromises.size >= promiseThreshold) {
 82 |       this.paused = true
 83 |       this.cb = cb
 84 |       this.buffer.push(obj)
 85 |       return false
 86 |     } else {
 87 |       try {
 88 |         if (this.buffer.length > 0) {
 89 |           this.buffer.forEach((bufferedObject) => {
 90 |             const Entries = obj.map((object) => ({
 91 |               MessageBody: object,
 92 |               Id: uuidv4()
 93 |             }))
 94 |             this.sendMessages(Entries)
 95 |           })
 96 |           this.buffer = []
 97 |         }
 98 |         const Entries = obj.map((object) => ({
 99 |           MessageBody: object,
100 |           Id: uuidv4()
101 |         }))
102 |         this.sendMessages(Entries)
103 |         return cb()
104 |       } catch (err) {
105 |         errors.push(`Error: ${err}`)
106 |         return cb(err)
107 |       }
108 |     }
109 |   }
110 | }
111 | 
112 | function run () {
113 |   const sqsStream = new SqsWriteStream({ url: queue })
114 |   fs.createReadStream(process.argv[2])
115 |     .pipe(split())
116 |     .pipe(counter)
117 |     .pipe(transform)
118 |     .pipe(through2Batch.obj({batchSize: 10}))
119 |     .pipe(sqsStream)
120 |   sqsStream.on('finish', () => {
121 |     if (errors.length > 0) {
122 |       logUpdate(errors)
123 |     }
124 |   })
125 | }
126 | 
127 | module.exports = {
128 |   run
129 | }
130 | 


--------------------------------------------------------------------------------
/scripts/tag-cloudwatch-logs.js:
--------------------------------------------------------------------------------
 1 | const fs = require('fs')
 2 | const AWS = require('aws-sdk')
 3 | const yaml = require('js-yaml')
 4 | 
 5 | // get stackName, projectTag from our config file
 6 | const config = yaml.safeLoad(fs.readFileSync('config/config.yml').toString())
 7 | const stackName = config.default.stackName
 8 | const projectTag = config.default.projectTag
 9 | 
10 | const cw = new AWS.CloudWatchLogs()
11 | 
12 | // helper
13 | function tagWithProject (logGroup) {
14 |   console.log(`tagging ${logGroup.logGroupName} with { Project: ${projectTag} }`)
15 |   return cw.tagLogGroup({
16 |     logGroupName: logGroup.logGroupName,
17 |     tags: { Project: projectTag }
18 |   }).promise()
19 | }
20 | 
21 | // tag lambda cloudwatch logs
22 | cw.describeLogGroups({ logGroupNamePrefix: `/aws/lambda/${stackName}` })
23 |   .promise()
24 |   .then(resp => {
25 |     return Promise.all(resp.logGroups.map(tagWithProject))
26 |   })
27 | 
28 | // tag ECS cloudwatch logs
29 | cw.describeLogGroups({ logGroupNamePrefix: stackName })
30 |   .promise()
31 |   .then(resp => {
32 |     return Promise.all(resp.logGroups.map(tagWithProject))
33 |   })
34 | 


--------------------------------------------------------------------------------
/scripts/verify.js:
--------------------------------------------------------------------------------
 1 | const assert = require('assert')
 2 | const path = require('path')
 3 | const fs = require('fs')
 4 | const yaml = require('js-yaml')
 5 | const AWS = require('aws-sdk')
 6 | const dbConfig = require('../db/knexfile').remote
 7 | const axios = require('axios')
 8 | require('dotenv').config({ path: path.join(process.env.PWD, 'config', '.env') })
 9 | 
10 | // get stackName from our config file
11 | const config = yaml.safeLoad(fs.readFileSync('config/config.yml').toString())
12 | const stackName = config.default.stackName
13 | 
14 | // fixtures
15 | const DB_TYPES = [
16 |   { column_name: 'tile', data_type: 'character varying' },
17 |   { column_name: 'output', data_type: 'jsonb' }
18 | ]
19 | 
20 | // get output values from the cloudformation stack
21 | async function getStackOutputs (stackName) {
22 |   const cf = new AWS.CloudFormation()
23 |   return cf.describeStacks({ StackName: stackName }).promise()
24 |     .then(resp => resp.Stacks[0].Outputs)
25 | }
26 | 
27 | async function verify () {
28 |   console.log(`Verifying stack ${stackName}`)
29 |   const outputs = await getStackOutputs(stackName)
30 |   dbConfig.connection = outputs.find(o => o.OutputKey === 'dbConnectionString').OutputValue
31 |   const db = require('knex')(dbConfig)
32 | 
33 |   // check that our db has the correct columns
34 |   await db.select(['column_name', 'data_type'])
35 |     .table('information_schema.columns')
36 |     .where({ 'table_name': 'results' })
37 |     .then(rows => assert.deepStrictEqual(rows, DB_TYPES))
38 |     .catch(err => console.error(err))
39 |     .then(_ => console.log('Database has the correct columns'))
40 | 
41 |   // check that our ALB/GPU endpoint is healthy
42 |   const endpoint = outputs.find(o => o.OutputKey === 'modelEndpoint').OutputValue
43 |   await axios.get(endpoint)
44 |     .then(resp => assert.deepStrictEqual(resp.status, 200))
45 |     .catch(err => console.error(err))
46 |     .then(_ => console.log('TF Serving returns a 200 status from the internal load balancer endpoint'))
47 | 
48 |   // download a tile
49 |   const tile = { x: 184260, y: 107656, z: 18 }
50 |   const url = config.default.lambdas.DownloadAndPredict.envs.TILE_ENDPOINT
51 |     .replace('{}', tile.z)
52 |     .replace('{}', tile.x)
53 |     .replace('{}', tile.y)
54 |     .replace('{}', process.env.TILE_ACCESS_TOKEN)
55 | 
56 |   const img = await axios.get(url, { responseType: 'arraybuffer' })
57 |     .then(resp => {
58 |       assert.deepStrictEqual(resp.status, 200)
59 |       console.log('Tile endpoint returns a 200 status')
60 |       return resp.data.toString('base64')
61 |     })
62 |     .catch(err => console.error(err))
63 | 
64 |   // confirm that we receive a prediction from the endpoint using the tile
65 |   const body = { instances: [{ 'image_bytes': { 'b64': img } }] }
66 |   await axios.post(`${endpoint}:predict`, body)
67 |     .then(resp => resp.data)
68 |     .then(data => assert(Array.isArray(data.predictions)))
69 |     .catch(err => console.error(err))
70 |     .then(_ => console.log('Prediction endpoint response has key "predictions" and it is an array'))
71 | 
72 |   return true
73 | }
74 | 
75 | verify()
76 |   .then(a => console.log('Stack verified'))
77 |   .catch(err => console.error(err) && process.exit(1))
78 |   .then(_ => process.exit(0))
79 | 


--------------------------------------------------------------------------------
/test/test_sqs-push.js:
--------------------------------------------------------------------------------
 1 | const test = require('tape')
 2 | const sinon = require('sinon')
 3 | const proxyquire = require('proxyquire').noCallThru()
 4 | const MemoryStream = require('memorystream')
 5 | const fs = require('fs')
 6 | 
 7 | test('sqs-push', (t) => {
 8 |   const error = 'error'
 9 |   const sendMessageBatch = sinon.stub()
10 |   sendMessageBatch.onFirstCall().returns({ promise: () => (Promise.reject(error)) })
11 |   const SQS = function () {
12 |     return {
13 |       sendMessageBatch
14 |     }
15 |   }
16 |   const aws = { SQS }
17 | 
18 |   const memStream = new MemoryStream()
19 |   const stubFsCreateReadStream = sinon.stub(fs, 'createReadStream')
20 |   stubFsCreateReadStream.returns(memStream)
21 |   const logUpdate = sinon.stub()
22 | 
23 |   process.argv = [
24 |     'command',
25 |     'empty',
26 |     'file',
27 |     'queueurl'
28 |   ]
29 | 
30 |   const sqsPush = proxyquire(
31 |     '../scripts/sqs-push.js',
32 |     {
33 |       'aws-sdk': aws,
34 |       'log-update': logUpdate,
35 |       'fs': fs
36 |     }
37 |   )
38 |   sqsPush.run()
39 |   memStream.write('9-162-307\n9-161-307\n9-163-307')
40 |   memStream.end('')
41 |   setTimeout(() => {
42 |     t.equal(logUpdate.getCall(3).args[0][0], 'Error: error',
43 |       'Logs error when sqs message promise rejects')
44 |     t.end()
45 |   }, 1)
46 | })
47 | 


--------------------------------------------------------------------------------