├── .github
└── workflows
│ └── main.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── Templates
├── AWS-HPC-Cluster.yaml
├── HPC-AD.yaml
├── HPC-Networking.yaml
└── HPC-Storage.yaml
├── docs
├── CF-2VPC.FSx.md
├── CF-2VPC.md
├── CF-3VPC.FSx.md
├── CF-3VPC.md
├── EnginFrame-1Click-Arch.png
├── README.md
├── step2.png
├── step3.png
├── step4.png
├── step5.png
├── step6.png
└── step7.png
├── enginframe
├── alb.session.closing.hook.sh
├── alb.session.starting.hook.sh
├── efinstall.config
├── fm.browse.ui
├── mysql
│ ├── ef.mysql
│ └── efdb.config
└── services
│ └── ef-services.Linux Desktop.2022-11-22T10-22-47.zip
├── modules
├── 04.configure.disable.anacron.compute.sh
├── 04.configure.slurm.AllOrNothing.headnode.sh
├── 07.configure.slurm.tagging.headnode.sh
├── 10.install.enginframe.headnode.sh
├── 12.configure.enginframe.alb.headnode.sh
├── 15.install.dcv.broker.headnode.sh
├── 20.install.dcv.slurm.headnode.sh
├── 25.install.dcv-server.compute.sh
├── 25.install.dcv-server.gpu.sh
├── 26.configure.dcv.alb.compute.sh
├── 27.configure.dcv.nat.compute.sh
├── 30.install.dcv-sm-agent.compute.sh
├── 40.install.monitoring.compute.sh
└── 40.install.monitoring.headnode.sh
├── monitoring
├── custom-metrics
│ ├── 1h-cost-metrics.sh
│ ├── 1m-cost-metrics.sh
│ └── aws-region.py
├── docker-compose
│ ├── docker-compose.compute.gpu.yml
│ ├── docker-compose.compute.yml
│ └── docker-compose.headnode.yml
├── grafana
│ ├── dashboards
│ │ ├── ParallelCluster.json
│ │ ├── compute-node-details.json
│ │ ├── compute-node-list.json
│ │ ├── costs.json.OLD
│ │ ├── dashboards.yml
│ │ ├── gpu.json
│ │ ├── headnode-details.json
│ │ └── logs.json
│ └── datasources
│ │ └── datasource.yml
├── nginx
│ └── conf.d
│ │ └── nginx.conf
├── prometheus-slurm-exporter
│ └── slurm_exporter.service
├── prometheus
│ └── prometheus.yml
└── www
│ ├── aws-logo.svg
│ ├── background.png
│ └── index.html
├── parallelcluster
├── config.ap-east-1.sample.yaml
├── config.ap-northeast-1.sample.yaml
├── config.ap-northeast-2.sample.yaml
├── config.ap-south-1.sample.yaml
├── config.ca-central-1.sample.yaml
├── config.eu-central-1.sample.yaml
├── config.eu-north-1.sample.yaml
├── config.eu-south-1.sample.yaml
├── config.eu-west-1.sample.yaml
├── config.us-east-1.sample.yaml
├── config.us-east-2.sample.yaml
├── config.us-west-1.sample.yaml
└── config.us-west-2.sample.yaml
└── scripts
├── Cloud9-Bootstrap.sh
├── motd
├── post.install.sh
└── prolog.sh
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | name: CloudFormation Template S3 upload
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | jobs:
9 | deploy:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@master
13 | - uses: jakejarvis/s3-sync-action@master
14 | with:
15 | args: --acl public-read
16 | env:
17 | AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }}
18 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
19 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
20 | AWS_REGION: ${{ secrets.AWS_REGION }}
21 | SOURCE_DIR: ${{ secrets.SOURCE_DIR }}
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing Guidelines
2 |
3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
4 | documentation, we greatly value feedback and contributions from our community.
5 |
6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
7 | information to effectively respond to your bug report or contribution.
8 |
9 |
10 | ## Reporting Bugs/Feature Requests
11 |
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 |
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 |
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 |
22 |
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 |
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 |
30 | To send us a pull request, please:
31 |
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 |
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 |
42 |
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 |
46 |
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 |
52 |
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 |
56 |
57 | ## Licensing
58 |
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
4 | this software and associated documentation files (the "Software"), to deal in
5 | the Software without restriction, including without limitation the rights to
6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7 | the Software, and to permit persons to whom the Software is furnished to do so.
8 |
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 |
16 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 1Click-HPC
2 | This project aims at speeding up the deployment of an HPC Cluster on AWS.
3 | Following the instructions below a fully functional and ready to use HPC cluster will be created with just 1-Click.
4 |
5 | # Get Started
6 |
7 | ## Step 1
8 | Click the link below corresponding to your preferred [AWS Region](https://aws.amazon.com/about-aws/global-infrastructure/regions_az/) .
9 | You will be asked a few questions about services like VPC, FSx, etc; if you have no idea how to answer or what these services are, just leave the detault values.
10 | 1Click-HPC will take care of creating everything needed for your HPC Cluster to run.
11 |
12 | | Region | Launch |
13 | |--------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
14 | | US | --- |
15 | | N. Virginia (us-east-1) | [](https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.yaml&stackName=hpc-cluster) |
16 | | Ohio (us-east-2) | [](https://console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.yaml&stackName=hpc-cluster) |
17 | | N. California (us-west-1) | [](https://console.aws.amazon.com/cloudformation/home?region=us-west-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.yaml&stackName=hpc-cluster) |
18 | | Oregon (us-west-2) | [](https://console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.yaml&stackName=hpc-cluster) |
19 | | Canada | --- |
20 | | Central (ca-central-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ca-central-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.yaml&stackName=hpc-cluster) |
21 | | EU |---|
22 | | Frankfurt (eu-central-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-central-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.yaml&stackName=hpc-cluster) |
23 | | Ireland (eu-west-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-west-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.yaml&stackName=hpc-cluster) |
24 | | Stockholm (eu-north-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-north-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.yaml&stackName=hpc-cluster) |
25 | | Milan (eu-south-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-south-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.yaml&stackName=hpc-cluster) |
26 | | APJ |---|
27 | | Tokyo (ap-northeast-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-northeast-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.yaml&stackName=hpc-cluster) |
28 | | Seoul (ap-northeast-2) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-northeast-2#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.yaml&stackName=hpc-cluster) |
29 | | Hong Kong (ap-east-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-east-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.yaml&stackName=hpc-cluster) |
30 | | Mumbai (ap-south-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-south-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.yaml&stackName=hpc-cluster) |
31 |
32 |
33 | ## Step 2
34 | 1. Just change the "Stack Name" as you like.
35 | 2. Enter the password for the Admin user "ec2-user":
36 | 2. Check the checkbox to acknowledge the IAM resources creations.
37 | 3. Click the "Create Stack" botton.
38 |
39 |
40 | 
41 |
42 | ## Step 3
43 | 1. Click on the "Stack Name" to monitor the cluster creation steps.
44 | 2. Wait until all the resources are created
45 |
46 |
47 | 
48 |
49 | ## Step 4
50 | 1. When the cluster creation is completed, go to the "outputs" tab
51 | 2. Click the "EnginFrameURL" to access your HPC Cluster using the EnginFrame portal.
52 | 3. Alternatively, Click the "Cloud9URL" if you wish to connect to your Cloud9 Instance and then ssh into your cluster form there.
53 |
54 |
55 | 
56 |
57 | ## Step 5
58 | You can login on EnginFrame by using "ec2-user" as username and the password you chose.
59 | ```Username: ec2-user```
60 |
61 | ```Password: *********```
62 |
63 |
64 | 
65 |
66 | ## Step 6
67 | After you login, you are redirected to the "list Spoolers" page.
68 | Spoolers are scratch area located in the /fsx FileSystem that are managed by EnginFrame and used as the HPC jobs execution directory.
69 |
70 |
71 | 
72 |
73 | ## Step 7
74 | We would reccomend to immediatelly change the password by using the service as below.
75 |
76 |
77 | 
78 |
79 | # Architecture
80 | 
81 |
82 | # Additional Docs
83 |
84 | https://github.com/aws-samples/1click-hpc/tree/main/docs
85 |
86 | # License
87 |
88 | This software is licensed under the MIT-0 License. See the LICENSE file.
--------------------------------------------------------------------------------
/Templates/HPC-Networking.yaml:
--------------------------------------------------------------------------------
1 | AWSTemplateFormatVersion: '2010-09-09'
2 | Description: HPC-Networking
3 |
4 | Parameters:
5 | CidrBlock:
6 | AllowedPattern: '((\d{1,3})\.){3}\d{1,3}/\d{1,2}'
7 | Default: 10.3.0.0/16
8 | Description: VPC CIDR Block (eg 10.3.0.0/16)
9 | Type: String
10 | CidrPublicSubnetA:
11 | AllowedPattern: '((\d{1,3})\.){3}\d{1,3}/\d{1,2}'
12 | Default: 10.3.128.0/20
13 | Description: VPC CIDR Block for the Public Subnet A (eg 10.3.128.0/20)
14 | Type: String
15 | CidrPublicSubnetB:
16 | AllowedPattern: '((\d{1,3})\.){3}\d{1,3}/\d{1,2}'
17 | Default: 10.3.144.0/20
18 | Description: VPC CIDR Block for the Public Subnet B (eg 10.3.144.0/20)
19 | Type: String
20 | CidrPrivateSubnetA:
21 | AllowedPattern: '((\d{1,3})\.){3}\d{1,3}/\d{1,2}'
22 | Default: 10.3.0.0/18
23 | Description: VPC CIDR Block for the Private Subnet A (eg 10.3.1.0/18)
24 | Type: String
25 | CidrPrivateSubnetB:
26 | AllowedPattern: '((\d{1,3})\.){3}\d{1,3}/\d{1,2}'
27 | Default: 10.3.64.0/18
28 | Description: VPC CIDR Block for the Private Subnet B (eg 10.3.64.0/18)
29 | Type: String
30 |
31 | Mappings:
32 | RegionMap:
33 | us-east-1:
34 | ZoneId1: use1-az6
35 | ZoneId2: use1-az4
36 | us-east-2:
37 | ZoneId1: use2-az2
38 | ZoneId2: use2-az3
39 | us-west-1:
40 | ZoneId1: usw1-az1
41 | ZoneId2: usw1-az3
42 | us-west-2:
43 | ZoneId1: usw2-az1
44 | ZoneId2: usw2-az2
45 | eu-central-1:
46 | ZoneId1: euc1-az3
47 | ZoneId2: euc1-az2
48 | eu-west-1:
49 | ZoneId1: euw1-az1
50 | ZoneId2: euw1-az2
51 | eu-north-1:
52 | ZoneId1: eun1-az2
53 | ZoneId2: eun1-az1
54 | ca-central-1:
55 | ZoneId1: cac1-az2
56 | ZoneId2: cac1-az1
57 | eu-south-1:
58 | ZoneId1: eus1-az2
59 | ZoneId2: eus1-az1
60 | ap-east-1:
61 | ZoneId1: ape1-az3
62 | ZoneId2: ape1-az2
63 | ap-northeast-1:
64 | ZoneId1: apne1-az4
65 | ZoneId2: apne1-az1
66 | ap-northeast-2:
67 | ZoneId1: apne2-az1
68 | ZoneId2: apne2-az3
69 | ap-south-1:
70 | ZoneId1: aps1-az2
71 | ZoneId2: aps1-az3
72 |
73 | Resources:
74 |
75 | VPC:
76 | Type: AWS::EC2::VPC
77 | Properties:
78 | CidrBlock: !Ref CidrBlock
79 | EnableDnsHostnames: true
80 | EnableDnsSupport: true
81 | Tags:
82 | - Key: "Name"
83 | Value: !Sub '${AWS::StackName}-HPC-VPC'
84 |
85 | PublicSubnetA:
86 | Type: AWS::EC2::Subnet
87 | Properties:
88 | VpcId: !Ref VPC
89 | CidrBlock: !Ref CidrPublicSubnetA
90 | AvailabilityZone: !GetAtt AvailabiltyZone1.ZoneName
91 | MapPublicIpOnLaunch: true
92 | Tags:
93 | - Key: Name
94 | Value: !Sub '${AWS::StackName}-Public-SubnetA'
95 |
96 | PublicSubnetB:
97 | Type: AWS::EC2::Subnet
98 | Properties:
99 | VpcId: !Ref VPC
100 | CidrBlock: !Ref CidrPublicSubnetB
101 | AvailabilityZone: !GetAtt AvailabiltyZone2.ZoneName
102 | MapPublicIpOnLaunch: true
103 | Tags:
104 | - Key: Name
105 | Value: !Sub '${AWS::StackName}-Public-SubnetB'
106 |
107 | InternetGateway:
108 | Type: AWS::EC2::InternetGateway
109 |
110 | AttachGateway:
111 | Type: AWS::EC2::VPCGatewayAttachment
112 | Properties:
113 | VpcId: !Ref VPC
114 | InternetGatewayId: !Ref InternetGateway
115 |
116 | PublicRouteTable:
117 | Type: AWS::EC2::RouteTable
118 | Properties:
119 | VpcId: !Ref VPC
120 | Tags:
121 | - Key: Name
122 | Value: !Sub '${AWS::StackName}-Public-Route'
123 | PublicRoute1:
124 | Type: AWS::EC2::Route
125 | Properties:
126 | RouteTableId: !Ref PublicRouteTable
127 | DestinationCidrBlock: 0.0.0.0/0
128 | GatewayId: !Ref InternetGateway
129 |
130 | PublicSubnetARouteTableAssociation:
131 | Type: AWS::EC2::SubnetRouteTableAssociation
132 | Properties:
133 | SubnetId: !Ref PublicSubnetA
134 | RouteTableId: !Ref PublicRouteTable
135 |
136 | PublicSubnetBRouteTableAssociation:
137 | Type: AWS::EC2::SubnetRouteTableAssociation
138 | Properties:
139 | SubnetId: !Ref PublicSubnetB
140 | RouteTableId: !Ref PublicRouteTable
141 |
142 | PrivateSubnetA:
143 | Type: AWS::EC2::Subnet
144 | Properties:
145 | VpcId: !Ref VPC
146 | AvailabilityZone: !GetAtt AvailabiltyZone1.ZoneName
147 | CidrBlock: !Ref CidrPrivateSubnetA
148 | MapPublicIpOnLaunch: false
149 | Tags:
150 | - Key: Name
151 | Value: !Sub '${AWS::StackName}-Private-SubnetA'
152 |
153 | PrivateSubnetB:
154 | Type: AWS::EC2::Subnet
155 | Properties:
156 | VpcId: !Ref VPC
157 | AvailabilityZone: !GetAtt AvailabiltyZone2.ZoneName
158 | CidrBlock: !Ref CidrPrivateSubnetB
159 | MapPublicIpOnLaunch: false
160 | Tags:
161 | - Key: Name
162 | Value: !Sub '${AWS::StackName}-Private-SubnetB'
163 |
164 | NatGatewayAEIP:
165 | Type: AWS::EC2::EIP
166 | DependsOn: AttachGateway
167 | Properties:
168 | Domain: vpc
169 |
170 | NatGatewayBEIP:
171 | Type: AWS::EC2::EIP
172 | DependsOn: AttachGateway
173 | Properties:
174 | Domain: vpc
175 |
176 | NatGatewayA:
177 | Type: AWS::EC2::NatGateway
178 | Properties:
179 | AllocationId: !GetAtt NatGatewayAEIP.AllocationId
180 | SubnetId: !Ref PublicSubnetA
181 |
182 | NatGatewayB:
183 | Type: AWS::EC2::NatGateway
184 | Properties:
185 | AllocationId: !GetAtt NatGatewayBEIP.AllocationId
186 | SubnetId: !Ref PublicSubnetB
187 |
188 | PrivateRouteTableA:
189 | Type: AWS::EC2::RouteTable
190 | Properties:
191 | VpcId: !Ref VPC
192 | Tags:
193 | - Key: Name
194 | Value: !Sub '${AWS::StackName}-Private-Route-A'
195 |
196 | PrivateRouteTableB:
197 | Type: AWS::EC2::RouteTable
198 | Properties:
199 | VpcId: !Ref VPC
200 | Tags:
201 | - Key: Name
202 | Value: !Sub '${AWS::StackName}-Private-Route-B'
203 |
204 | DefaultPrivateRouteA:
205 | Type: AWS::EC2::Route
206 | Properties:
207 | RouteTableId: !Ref PrivateRouteTableA
208 | DestinationCidrBlock: 0.0.0.0/0
209 | NatGatewayId: !Ref NatGatewayA
210 |
211 | DefaultPrivateRouteB:
212 | Type: AWS::EC2::Route
213 | Properties:
214 | RouteTableId: !Ref PrivateRouteTableB
215 | DestinationCidrBlock: 0.0.0.0/0
216 | NatGatewayId: !Ref NatGatewayB
217 |
218 | PrivateSubnetARouteTableAssociation:
219 | Type: AWS::EC2::SubnetRouteTableAssociation
220 | Properties:
221 | RouteTableId: !Ref PrivateRouteTableA
222 | SubnetId: !Ref PrivateSubnetA
223 |
224 | PrivateSubnetBRouteTableAssociation:
225 | Type: AWS::EC2::SubnetRouteTableAssociation
226 | Properties:
227 | RouteTableId: !Ref PrivateRouteTableB
228 | SubnetId: !Ref PrivateSubnetB
229 |
230 | AvailabiltyZone1:
231 | Type: Custom::AvailabiltyZone
232 | DependsOn: LogGroupGetAZLambdaFunction
233 | Properties:
234 | ServiceToken: !GetAtt GetAZLambdaFunction.Arn
235 | ZoneId: !FindInMap [RegionMap, !Ref "AWS::Region", ZoneId1]
236 |
237 | AvailabiltyZone2:
238 | Type: Custom::AvailabiltyZone
239 | DependsOn: LogGroupGetAZLambdaFunction
240 | Properties:
241 | ServiceToken: !GetAtt GetAZLambdaFunction.Arn
242 | ZoneId: !FindInMap [RegionMap, !Ref "AWS::Region", ZoneId2]
243 |
244 | LogGroupGetAZLambdaFunction:
245 | Type: AWS::Logs::LogGroup
246 | DeletionPolicy: Delete
247 | Properties:
248 | LogGroupName: !Sub /aws/lambda/${GetAZLambdaFunction}
249 | RetentionInDays: 7
250 |
251 | GetAZLambdaFunction:
252 | Type: AWS::Lambda::Function
253 | Properties:
254 | Description: GetAZLambdaFunction
255 | Timeout: 60
256 | Runtime: python3.7
257 | Handler: index.handler
258 | Role: !GetAtt GetAZLambdaRole.Arn
259 | Code:
260 | ZipFile: |
261 | import cfnresponse
262 | from json import dumps
263 | from boto3 import client
264 | EC2 = client('ec2')
265 | def handler(event, context):
266 | if event['RequestType'] in ('Create', 'Update'):
267 | print(dumps(event, default=str))
268 | data = {}
269 | try:
270 | response = EC2.describe_availability_zones(
271 | Filters=[{'Name': 'zone-id', 'Values': [event['ResourceProperties']['ZoneId']]}]
272 | )
273 | print(dumps(response, default=str))
274 | data['ZoneName'] = response['AvailabilityZones'][0]['ZoneName']
275 | except Exception as error:
276 | cfnresponse.send(event, context, cfnresponse.FAILED, {}, reason=error)
277 | finally:
278 | cfnresponse.send(event, context, cfnresponse.SUCCESS, data)
279 | else:
280 | cfnresponse.send(event, context, cfnresponse.SUCCESS, {})
281 | Tags:
282 | - Key: Name
283 | Value: !Sub ${AWS::StackName}-GetAZLambdaFunction
284 |
285 | GetAZLambdaRole:
286 | Type: AWS::IAM::Role
287 | Properties:
288 | Path: /
289 | Description: GetAZLambdaFunction
290 | AssumeRolePolicyDocument:
291 | Version: '2012-10-17'
292 | Statement:
293 | - Effect: Allow
294 | Action:
295 | - sts:AssumeRole
296 | Principal:
297 | Service:
298 | - !Sub 'lambda.${AWS::URLSuffix}'
299 | ManagedPolicyArns:
300 | - !Sub 'arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole'
301 | Policies:
302 | - PolicyName: GetAZLambdaFunction
303 | PolicyDocument:
304 | Version: '2012-10-17'
305 | Statement:
306 | - Sid: ec2
307 | Effect: Allow
308 | Action:
309 | - ec2:DescribeAvailabilityZones
310 | Resource:
311 | - '*'
312 | Tags:
313 | - Key: Name
314 | Value: !Sub ${AWS::StackName}-GetAZLambdaFunction
315 |
316 | S3Endpoint:
317 | Type: 'AWS::EC2::VPCEndpoint'
318 | Properties:
319 | VpcEndpointType: 'Gateway'
320 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.s3'
321 | RouteTableIds:
322 | - !Ref PublicRouteTable
323 | - !Ref PrivateRouteTableA
324 | - !Ref PrivateRouteTableB
325 | VpcId: !Ref VPC
326 |
327 | localSG:
328 | Type: AWS::EC2::SecurityGroup
329 | Properties:
330 | GroupDescription: Allow all traffic from resources in VPC
331 | VpcId:
332 | Ref: VPC
333 | SecurityGroupIngress:
334 | - IpProtocol: -1
335 | CidrIp: !Ref CidrBlock
336 | SecurityGroupEgress:
337 | - IpProtocol: -1
338 | CidrIp: !Ref CidrBlock
339 |
340 | Outputs:
341 | VPC:
342 | Description: The ID of the VPC
343 | Value: !Ref VPC
344 | Export:
345 | Name: !Sub "${AWS::StackName}-VPC"
346 | PrivateSubnetA:
347 | Description: The ID of the PrivateSubnetA
348 | Value: !Ref PrivateSubnetA
349 | Export:
350 | Name: !Sub "${AWS::StackName}-PrivateSubnetA"
351 | PrivateSubnetB:
352 | Description: The ID of the PrivateSubnetB
353 | Value: !Ref PrivateSubnetB
354 | Export:
355 | Name: !Sub "${AWS::StackName}-PrivateSubnetB"
356 | PublicSubnetA:
357 | Description: The ID of the PublicSubnetA
358 | Value: !Ref PublicSubnetA
359 | Export:
360 | Name: !Sub "${AWS::StackName}-PublicSubnetA"
361 | PublicSubnetB:
362 | Description: The ID of the PublicSubnetB
363 | Value: !Ref PublicSubnetB
364 | Export:
365 | Name: !Sub "${AWS::StackName}-PublicSubnetB"
366 | localSG:
367 | Description: The ID of the localSG
368 | Value: !Ref localSG
369 | Export:
370 | Name: !Sub "${AWS::StackName}-localSG"
--------------------------------------------------------------------------------
/Templates/HPC-Storage.yaml:
--------------------------------------------------------------------------------
1 | AWSTemplateFormatVersion: '2010-09-09'
2 | Description: HPC-FSx-Lustre
3 |
4 | Parameters:
5 | PrivateSubnet:
6 | Description: The ID of your private subnet.
7 | Type: String
8 | AllowedPattern: ^(subnet-[0-9a-z]+)$
9 | SecurityGroup:
10 | Description: The ID of the SecurityGroup you want to attach.
11 | Type: String
12 | AllowedPattern: ^(sg-[0-9a-z]+)$
13 |
14 | Resources:
15 |
16 | FSx:
17 | DeletionPolicy: Retain
18 | Type: AWS::FSx::FileSystem
19 | Properties:
20 | FileSystemType: LUSTRE
21 | StorageCapacity: 1200
22 | StorageType: SSD
23 | SubnetIds:
24 | - !Ref PrivateSubnet
25 | SecurityGroupIds:
26 | - !Ref SecurityGroup
27 | LustreConfiguration:
28 | WeeklyMaintenanceStartTime: "4:00:00"
29 | DeploymentType: PERSISTENT_2
30 | PerUnitStorageThroughput: 1000
31 | DataCompressionType: LZ4
32 | FileSystemTypeVersion: "2.12"
--------------------------------------------------------------------------------
/docs/CF-2VPC.FSx.md:
--------------------------------------------------------------------------------
1 | # 1Click-HPC with existing FSx and VPC (Public only)
2 | This CloudFormation Template allows you to deploy 1Click-HPC using your existing FSx for Lustre FS, within an existing VPC with 2 Public subnets (No private subnet).
3 |
4 | Click the link below corresponding to your preferred [AWS Region](https://aws.amazon.com/about-aws/global-infrastructure/regions_az/) .
5 |
6 | | Region | Launch |
7 | |--------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
8 | | US | --- |
9 | | N. Virginia (us-east-1) | [](https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.FSx.yaml&stackName=hpc-cluster) |
10 | | Ohio (us-east-2) | [](https://console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.FSx.yaml&stackName=hpc-cluster) |
11 | | N. California (us-west-1) | [](https://console.aws.amazon.com/cloudformation/home?region=us-west-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.FSx.yaml&stackName=hpc-cluster) |
12 | | Oregon (us-west-2) | [](https://console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.FSx.yaml&stackName=hpc-cluster) |
13 | | Canada | --- |
14 | | Central (ca-central-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ca-central-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.FSx.yaml&stackName=hpc-cluster) |
15 | | EU |---|
16 | | Frankfurt (eu-central-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-central-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.FSx.yaml&stackName=hpc-cluster) |
17 | | Ireland (eu-west-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-west-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.FSx.yaml&stackName=hpc-cluster) |
18 | | Stockholm (eu-north-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-north-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.FSx.yaml&stackName=hpc-cluster) |
19 | | APJ |---|
20 | | Tokyo (ap-northeast-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-northeast-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.FSx.yaml&stackName=hpc-cluster) |
21 | | Hong Kong (ap-east-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-east-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.FSx.yaml&stackName=hpc-cluster) |
22 | | Mumbai (ap-south-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-south-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.FSx.yaml&stackName=hpc-cluster) |
23 |
--------------------------------------------------------------------------------
/docs/CF-2VPC.md:
--------------------------------------------------------------------------------
1 | # 1Click-HPC within existing VPC (Public only)
2 | This CloudFormation Template allows you to deploy 1Click-HPC within an existing VPC with 2 Public subnets (No private subnet).
3 |
4 | Click the link below corresponding to your preferred [AWS Region](https://aws.amazon.com/about-aws/global-infrastructure/regions_az/) .
5 |
6 | | Region | Launch |
7 | |--------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
8 | | US | --- |
9 | | N. Virginia (us-east-1) | [](https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PublicVPC.yaml&stackName=hpc-cluster) |
10 | | Ohio (us-east-2) | [](https://console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PublicVPC.yaml&stackName=hpc-cluster) |
11 | | N. California (us-west-1) | [](https://console.aws.amazon.com/cloudformation/home?region=us-west-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PublicVPC.yaml&stackName=hpc-cluster) |
12 | | Oregon (us-west-2) | [](https://console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PublicVPC.yaml&stackName=hpc-cluster) |
13 | | Canada | --- |
14 | | Central (ca-central-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ca-central-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PublicVPC.yaml&stackName=hpc-cluster) |
15 | | EU |---|
16 | | Frankfurt (eu-central-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-central-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PublicVPC.yaml&stackName=hpc-cluster) |
17 | | Ireland (eu-west-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-west-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PublicVPC.yaml&stackName=hpc-cluster) |
18 | | Stockholm (eu-north-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-north-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PublicVPC.yaml&stackName=hpc-cluster) |
19 | | APJ |---|
20 | | Tokyo (ap-northeast-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-northeast-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PublicVPC.yaml&stackName=hpc-cluster) |
21 | | Hong Kong (ap-east-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-east-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PublicVPC.yaml&stackName=hpc-cluster) |
22 | | Mumbai (ap-south-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-south-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PublicVPC.yaml&stackName=hpc-cluster) |
23 |
--------------------------------------------------------------------------------
/docs/CF-3VPC.FSx.md:
--------------------------------------------------------------------------------
1 | # 1Click-HPC with existing FSx and VPC (Public & Private)
2 | This CloudFormation Template allows you to deploy 1Click-HPC using your existing FSx for Lustre FS, within an existing VPC with 2 Public and 1 Private subnets.
3 |
4 | Please note that the cluster is deployed into the Private subnet. Public subnets are being used to host Cloud9 and the Application Load Balancer.
5 |
6 | Click the link below corresponding to your preferred [AWS Region](https://aws.amazon.com/about-aws/global-infrastructure/regions_az/) .
7 |
8 | | Region | Launch |
9 | |--------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
10 | | US | --- |
11 | | N. Virginia (us-east-1) | [](https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.FSx.yaml&stackName=hpc-cluster) |
12 | | Ohio (us-east-2) | [](https://console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.FSx.yaml&stackName=hpc-cluster) |
13 | | N. California (us-west-1) | [](https://console.aws.amazon.com/cloudformation/home?region=us-west-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.FSx.yaml&stackName=hpc-cluster) |
14 | | Oregon (us-west-2) | [](https://console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.FSx.yaml&stackName=hpc-cluster) |
15 | | Canada | --- |
16 | | Central (ca-central-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ca-central-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.FSx.yaml&stackName=hpc-cluster) |
17 | | EU |---|
18 | | Frankfurt (eu-central-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-central-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.FSx.yaml&stackName=hpc-cluster) |
19 | | Ireland (eu-west-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-west-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.FSx.yaml&stackName=hpc-cluster) |
20 | | Stockholm (eu-north-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-north-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.FSx.yaml&stackName=hpc-cluster) |
21 | | APJ |---|
22 | | Tokyo (ap-northeast-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-northeast-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.FSx.yaml&stackName=hpc-cluster) |
23 | | Hong Kong (ap-east-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-east-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.FSx.yaml&stackName=hpc-cluster) |
24 | | Mumbai (ap-south-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-south-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.FSx.yaml&stackName=hpc-cluster) |
25 |
--------------------------------------------------------------------------------
/docs/CF-3VPC.md:
--------------------------------------------------------------------------------
1 | # 1Click-HPC within existing VPC (Public & Private)
2 | This CloudFormation Template allows you to deploy 1Click-HPC within an existing VPC with 2 Public and 1 Private subnets.
3 |
4 | Please note that the cluster is deployed into the Private subnet. Public subnets are being used to host Cloud9 and the Application Load Balancer.
5 |
6 | Click the link below corresponding to your preferred [AWS Region](https://aws.amazon.com/about-aws/global-infrastructure/regions_az/) .
7 |
8 | | Region | Launch |
9 | |--------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
10 | | US | --- |
11 | | N. Virginia (us-east-1) | [](https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.yaml&stackName=hpc-cluster) |
12 | | Ohio (us-east-2) | [](https://console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.yaml&stackName=hpc-cluster) |
13 | | N. California (us-west-1) | [](https://console.aws.amazon.com/cloudformation/home?region=us-west-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.yaml&stackName=hpc-cluster) |
14 | | Oregon (us-west-2) | [](https://console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.yaml&stackName=hpc-cluster) |
15 | | Canada | --- |
16 | | Central (ca-central-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ca-central-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.yaml&stackName=hpc-cluster) |
17 | | EU |---|
18 | | Frankfurt (eu-central-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-central-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.yaml&stackName=hpc-cluster) |
19 | | Ireland (eu-west-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-west-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.yaml&stackName=hpc-cluster) |
20 | | Stockholm (eu-north-1) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-north-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.yaml&stackName=hpc-cluster) |
21 | | APJ |---|
22 | | Tokyo (ap-northeast-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-northeast-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.yaml&stackName=hpc-cluster) |
23 | | Hong Kong (ap-east-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-east-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.yaml&stackName=hpc-cluster) |
24 | | Mumbai (ap-south-1) | [](https://console.aws.amazon.com/cloudformation/home?region=ap-south-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Fenginframe.s3.amazonaws.com%2FAWS-HPC-Cluster.PrivateVPC.yaml&stackName=hpc-cluster) |
25 |
--------------------------------------------------------------------------------
/docs/EnginFrame-1Click-Arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/1click-hpc/390dfe1e4c131f135cdd8b5f853371e763cd8877/docs/EnginFrame-1Click-Arch.png
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # QuickStart
2 | In case you do not want to use our 1Click-HPC Cloudformation template, but you still want to build your cluster with all the components and modules available in thie reporitory, you can follow the instruction below to configure your ParallelCluster configuration file.
3 | You can create a new cluster using your existing configuration file and just add the following parameters, everything will be installed and configured automatically.
4 | If this is your first approach to AWS ParallelCluster, either go back to the section above or follow all the steps of our [Workshop](https://www.hpcworkshops.com/03-hpc-aws-parallelcluster-workshop.html) and include the following configuration:
5 | ```ini
6 | [cluster yourcluster]
7 | ...
8 | post_install = https://raw.githubusercontent.com/aws-samples/1click-hpc/main/scripts/post.install.sh
9 | post_install_args = "05.install.ldap.server.headnode.sh 06.install.ldap.client.compute.sh 06.install.ldap.client.headnode.sh 10.install.enginframe.headnode.sh 11.install.ldap.enginframe.headnode.sh 20.install.dcv.slurm.headnode.sh 25.install.dcv-server.compute.sh 35.install.dcv.slurm.compute.sh"
10 | extra_json = {"post_install":{"enginframe":{"ef_admin_pass":"Put_Your_Password_HERE"}}}
11 | tags = {"EnginFrame" : "true"}
12 | ...
13 | ```
14 |
Note: You need to specify a custom Security Group (that allows inbound connection to the port 8443) defined as `additional_sg` parameter in the `[VPC]` section of your AWS ParallelCluster config file.
15 |
16 | # (Optional) QuickStart parameters customization
17 | In addition to the Quickstart deployment, there are a few parameters that you can optionally define to customize the components installed.
18 | These parameters are defined as part of the `extra_json` [parameter](https://docs.aws.amazon.com/parallelcluster/latest/ug/cluster-definition.html#extra-json) in the [cluster section](https://docs.aws.amazon.com/parallelcluster/latest/ug/cluster-definition.html) of the AWS ParallelCluster configuration file.
19 | If the `extra_json` is not specified, all the components will be installed using the default values.
20 | See below a example:
21 | ```json
22 | {
23 | "post_install": {
24 | "enginframe": {
25 | "nice_root": "/fsx/nice",
26 | "ef_admin": "ec2-user",
27 | "ef_conf_root": "/fsx/nice/enginframe/conf",
28 | "ef_data_root": "/fsx/nice/enginframe/data",
29 | "ef_spooler": "/fsx/nice/enginframe/spoolers",
30 | "ef_repository": "/fsx/nice/enginframe/repository",
31 | "ef_admin_pass": "Change_this!"
32 | },
33 | "dcvsm": {
34 | "agent_broker_port": 8445,
35 | "broker_ca": "/home/ec2-user/dcvsmbroker_ca.pem",
36 | "client_broker_port": 8446
37 | },
38 | "dcv": {
39 | "dcv_queue_keyword": "dcv"
40 | }
41 | }
42 | }
43 | ```
44 | * `nice_root` by default `${SHARED_FS_DIR}/nice` , is the base directory where EnginFrame is installed.
45 | * `ef_admin` by default `ec2-user` , is the EnginFrame user with administrative rights.
46 | * `ef_conf_root` by default `${NICE_ROOT}/enginframe/conf`, is the path of the EnginFrame configuration directory.
47 | * `ef_data_root` by default `${NICE_ROOT}/enginframe/data`, is the path of the EnginFrame data directory.
48 | * `ef_spooler` by default `${NICE_ROOT}/enginframe/spoolers`, is the path of the EnginFrame Spoolers. Please consider that the Spoolers are the loaction where your jobs are executed.
49 | * `ef_repository` by default `${NICE_ROOT}/enginframe/repository`, is the EnginFrame repository directory path.
50 | * `ef_admin_pass` by default `Change_this!` , is the EnginFrame admin password. Use this user and pass for your first login into EnginFrame.
51 | * `agent_broker_port` by default `8445`, is the DCV Session Manager Broker port.
52 | * `broker_ca` by default `/home/ec2-user/dcvsmbroker_ca.pem`, is the location for the DCV Session Manager Broker certificate.
53 | * `client_broker_port` by default `8446` , is the DCV Session Manager Broker port used by the client.
54 | * `dcv_queue_keyword` by default `dcv` , is a keyword that identifies the queues of your cluster where you want to enable DCV.
55 |
56 | **Note:** Because of the `extra_json` is a parameter in a `.ini` file, you need to put your custom json on a single line.
57 | You can use the following command to convert your json into a one-line json:
58 | ```bash
59 | tr -d '\n' < your_extra.json
60 | ```
61 | See below an example output.
62 | ```json
63 | { "post_install": { "enginframe": { "nice_root": "/fsx/nice", "ef_admin": "ec2-user", "ef_conf_root": "/fsx/nice/enginframe/conf", "ef_data_root": "/fsx/nice/enginframe/data", "ef_spooler": "/fsx/nice/enginframe/spoolers", "ef_repository": "/fsx/nice/enginframe/repository", "ef_admin_pass": "Change_this!" }, "dcvsm": { "agent_broker_port": 8445, "broker_ca": "/home/ec2-user/dcvsmbroker_ca.pem", "client_broker_port": 8446 }, "dcv": { "dcv_queue_keyword": "dcv" }}}
64 | ```
65 |
66 | # (Optional) Launch script customization
67 | An additional way to further customize the installation and configuration of your components is by downlaoding the scripts locally, modify them, and put them back onto S3.
68 | ```bash
69 | export S3_BUCKET=
70 |
71 | aws s3 cp --quiet --recursive 1click-hpc/scripts/ s3://$S3_BUCKET/scripts/
72 | aws s3 cp --quiet --recursive 1click-hpc/packages/ s3://$S3_BUCKET/packages/
73 | aws s3 cp --quiet --recursive 1click-hpc/parallelcluster/ s3://$S3_BUCKET/parallelcluster/
74 | aws s3 cp --quiet --recursive 1click-hpc/enginframe/ s3://$S3_BUCKET/enginframe/
75 | ```
76 |
77 | In this case, your AWS ParallelCluster configuration file has the following parameteres:
78 | ```ini
79 | post_install = s3:///scripts/post.install.sh
80 | post_install_args = "01.install.enginframe.headnode.sh 03.install.dcv.slurm.headnode.sh 04.install.dcv-server.compute.sh 06.install.dcv.slurm.compute.sh"
81 | ```
82 |
83 | The first one, `post_install`, specifies the S3 bucket you choose to store your post_install bash script.
84 | This is the main script that will run all the secondary scripts for installing EnginFrame, DCV Session Manager, DCV Server, and other components.
85 | The second parameter, `post_install_args`, contains the scripts being launched for installing the selected components.
86 | EnginFrame and DCV Session Manager Broker, and all the other secondary scripts are build indipendently, so you can potentially install just one of them.
87 |
88 |
89 |
Note: This procedure has been tested with EnginFrame version 2020.0 and DCV Session Manager Broker version 2020.2. With easy modifications, though, it can work with previous versions, just mind to add the license management.
90 |
Requirements
91 | To perform a successful installation of EnginFrame and DCV Sesssion Manager broker, you’ll need:
92 |
An S3 bucket, made accessible to ParallelCluster via its s3_read_resource or s3_read_write_resource[cluster] settings. Refer to ParallelCluster configuration for details.
93 |
An EnginFrameefinstall.config file, containing the desired settings for EnginFrame installation. This enables post-install script to install EnginFrame in unattended mode. An example efinstall.config is provided in this post code: You an review and modify it according to your preferences. Alternatively, you can generate your own one by performing an EnginFrame installation: in this case an efinstall.config containing all your choices will be generated in the folder where you ran the installation.
94 |
Asecurity group allowing EnginFrame inbound port. By default ParallelCluster creates a new security group with just port 22 publicly opened, so you can either use a replacement (via ParallelCluster vpc_security_group_id setting) or add an additional security group (additional_sg setting). In this post I’ll specify an additional security group.
95 |
ParallelCluster configuration including post_install and post_install_argsas mentioned above and described later with more details
96 |
(optionally) EnginFrame and DCV Session Manager packages, available online from https://download.enginframe.com. Having them in the bucket avoids the need for outgoing internet access for your ParallelCluster headnode to download them. In this article I’ll instead have them copied into my target S3 bucket. My scripts will copy them from S3 to the headnode node.
97 |
Note: neither EnginFrame 2020 or DCV Session Manager Broker need a license if running on EC2 instances. For more details please refer to their documentation.
98 |
99 |
Troubleshooting
100 | Detailed output log is available on the headnode node, in:
101 |
/var/log/cfn-init.log
102 |
/var/log/cfn-init-cmd.log
103 |
You can reach it via ssh, after getting the headnode node IP address from AWS Console → EC2 → Instances and looking for an instance named HeadNode.
104 |
105 | ## Security
106 |
107 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
108 |
109 | ## License
110 |
111 | This library is licensed under the MIT-0 License. See the LICENSE file.
112 |
113 |
--------------------------------------------------------------------------------
/docs/step2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/1click-hpc/390dfe1e4c131f135cdd8b5f853371e763cd8877/docs/step2.png
--------------------------------------------------------------------------------
/docs/step3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/1click-hpc/390dfe1e4c131f135cdd8b5f853371e763cd8877/docs/step3.png
--------------------------------------------------------------------------------
/docs/step4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/1click-hpc/390dfe1e4c131f135cdd8b5f853371e763cd8877/docs/step4.png
--------------------------------------------------------------------------------
/docs/step5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/1click-hpc/390dfe1e4c131f135cdd8b5f853371e763cd8877/docs/step5.png
--------------------------------------------------------------------------------
/docs/step6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/1click-hpc/390dfe1e4c131f135cdd8b5f853371e763cd8877/docs/step6.png
--------------------------------------------------------------------------------
/docs/step7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/1click-hpc/390dfe1e4c131f135cdd8b5f853371e763cd8877/docs/step7.png
--------------------------------------------------------------------------------
/enginframe/alb.session.closing.hook.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 1999-2021 by Nice, srl.,
4 | # Via Milliavacca, 9
5 | # 14100 Asti - ITALY
6 | # All rights reserved.
7 | #
8 | # This software is the confidential and proprietary information
9 | # of Nice, srl. ("Confidential Information").
10 | # You shall not disclose such Confidential Information
11 | # and shall use it only in accordance with the terms of
12 | # the license agreement you entered into with Nice.
13 |
14 | # This script configures an AWS Application Load Balancer (ALB) to disable a connection to an host
15 | # where an Interactive Session was running.
16 | # This script is meant to be used with DCV 2017 (and later) interactive sessions only.
17 |
18 | # This script delete the Target Group containing the instance where the Session was running
19 | # and delete the previously created Listener Rule.
20 |
21 | # The Listener Rule has the role to associate the input URL path to the Target Group. This path
22 | # must be the web url path of the DCV server running on the execution node.
23 | # Since it not possible to do URL path translation with ALB, every DCV server must have an unique
24 | # web url path configured. It is suggested to use the hostname of the node as web url path
25 | # for the DCV server running on that node.
26 |
27 | # The maximum number of Listener Rule per ALB is 100, hence a single ALB can handle at maximum
28 | # 100 Interactive Session running concurrently. To increase this limit, consider to add more ALB
29 | # in the infrastructure.
30 |
31 | # Prerequisites for:
32 | # EnginFrame node:
33 | # - AWS Command Line Interface (CLI) must be installed
34 | # - Since this script is going to be executed by the user running the EnginFrame Server, i.e. the Apache Tomcat user,
35 | # an AWS CLI profile must be configured for that user, having the permissions to list instances and to manage load balancers.
36 | # (see https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html)
37 | # Or alternatively, if EnginFrame is installed into an EC2 instance, configure the correct AWS role for this instance.
38 | #
39 | # AWS account:
40 | # - AWS Application Load Balancer (ALB) and an HTTPS listener with a Default Target Group must be already configured and running.
41 | #
42 | # DCV server node:
43 | # - configure each DCV server node with a unique web url path (see dcv.conf)
44 |
45 | # Configuration parameters:
46 |
47 | # ALB public DNS name
48 | ALB_PUBLIC_DNS_NAME=
49 | # ALB port
50 | ALB_PORT=443
51 | # AWS default region
52 | export AWS_DEFAULT_REGION=
53 |
54 | _die() {
55 | echo "ERROR: $@"
56 | exit 1
57 | }
58 |
59 | _help() {
60 | _cmd=$(basename "$0")
61 | echo "${_cmd}"
62 | echo "Usage:"
63 | echo " ${_cmd} \"\" \"\" \"\" \"\" \"\" \"\""
64 | echo " ${_cmd} \"tmp3569402005256372176\" \"alb-enginframe-xxx.eu-west-1.elb.amazonaws.com\" 443 \"10.0.0.10\" 8443 \"/dcv-server1\""
65 | }
66 |
67 | # Input parameters:
68 | # - $1 session-id
69 | # - $2 alb-host (alb public dnsname)
70 | # - $3 alb-port
71 | # - $4 target-host (private dnsname)
72 | # - $5 target-port
73 | # - $6 target-web-url-path (it must start with the "/" character)
74 | main() {
75 | # parse input parameters
76 | if [[ $# -lt 3 ]] ; then
77 | _help
78 | exit 0
79 | fi
80 | local -- _session_id=$1
81 | local -- _alb_host=$2
82 | local -- _alb_port=$3
83 |
84 | [ -z "${_session_id}" ] && _die "Missing input Session Id parameter."
85 | [ -z "${_alb_host}" ] && _die "Missing input ALB Host parameter."
86 | [ -z "${_alb_port}" ] && _die "Missing input ALB Port parameter."
87 |
88 | # check if AWS Cli is in the path
89 | aws help >/dev/null || _die "AWS Cli is not installed."
90 |
91 | # get ALB Amazon Resource Name (ARN) by dns-name
92 | local -- _alb_arn=$(aws elbv2 describe-load-balancers --query "LoadBalancers[? DNSName == '${_alb_host}'].LoadBalancerArn" --output text)
93 | [ -n "${_alb_arn}" ] || _die "Unable to get ALB identifier for the ALB (${_alb_host})."
94 |
95 | # get Listener arn
96 | local -- _listener_arn=$(aws elbv2 describe-listeners --load-balancer-arn "${_alb_arn}" \
97 | --query 'Listeners[? Port == `'${_alb_port}'`].ListenerArn' --output text)
98 | [ -n "${_listener_arn}" ] || _die "Listener for port (${_alb_port}) does not exist in the ALB (${_alb_host})."
99 |
100 | # get Target Group arn
101 | local -- _target_group_name=$(printf "%s" "${_session_id}" | tr -c 'a-zA-Z0-9' -)
102 | local -- _target_group_arn=$(aws elbv2 describe-target-groups --load-balancer-arn "${_alb_arn}" \
103 | --query "TargetGroups[? TargetGroupName == '${_target_group_name}'].TargetGroupArn" --output text)
104 | [ -n "${_target_group_arn}" ] || _die "Unable to get Target Group (${_target_group_name})"
105 |
106 | # get Rule arn
107 | local -- _rule_arn=$(aws elbv2 describe-rules --listener-arn "${_listener_arn}" \
108 | --query "Rules[? Actions[? TargetGroupArn == '${_target_group_arn}']].RuleArn" --output text)
109 | [ -n "${_rule_arn}" ] || _die "Unable to get Rule for Target Group (${_target_group_arn}) in the Listener (${_listener_arn})."
110 |
111 | # delete Rule
112 | aws elbv2 delete-rule --rule-arn "${_rule_arn}" >/dev/null
113 | [ $? -eq 0 ] || _die "Unable to delete Listener Rule (${_rule_arn})."
114 |
115 | # delete Target Group
116 | aws elbv2 delete-target-group --target-group-arn "${_target_group_arn}" >/dev/null
117 | [ $? -eq 0 ] || _die "Unable to delete Target Group (${_target_group_arn})."
118 | }
119 |
120 | # Check it's a DCV 2017 interactive session.
121 | if [ "${INTERACTIVE_SESSION_REMOTE}" = "dcv2" ]; then
122 | main "${INTERACTIVE_SESSION_REMOTE_SESSION_ID}" "${ALB_PUBLIC_DNS_NAME}" "${ALB_PORT}"
123 | fi
124 |
125 | # ex:ts=4:sw=4:et:ft=sh:
--------------------------------------------------------------------------------
/enginframe/alb.session.starting.hook.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 1999-2021 by Nice, srl.,
4 | # Via Milliavacca, 9
5 | # 14100 Asti - ITALY
6 | # All rights reserved.
7 | #
8 | # This software is the confidential and proprietary information
9 | # of Nice, srl. ("Confidential Information").
10 | # You shall not disclose such Confidential Information
11 | # and shall use it only in accordance with the terms of
12 | # the license agreement you entered into with Nice.
13 |
14 | # This script configures an AWS Application Load Balancer (ALB) to enable a connection to an host
15 | # where an Interactive Session is running.
16 | # This script is meant to be used with DCV 2017 (and later) interactive sessions only.
17 |
18 | # This script creates a new Target Group containing the instance where the Session is running
19 | # and add a new Listener Rule for the HTTPS listener of the ALB.
20 |
21 | # The Listener Rule has the role to associate the input URL path to the Target Group. This path
22 | # must be the web url path of the DCV server running on the execution node.
23 | # Since it not possible to do URL path translation with ALB, every DCV server must have an unique
24 | # web url path configured. It is suggested to use the hostname of the node as web url path
25 | # for the DCV server running on that node.
26 |
27 | # The maximum number of Listener Rule per ALB is 100, hence a single ALB can handle at maximum
28 | # 100 Interactive Session running concurrently. To increase this limit, consider to add more ALB
29 | # in the infrastructure.
30 |
31 | # Prerequisites for:
32 | # EnginFrame node:
33 | # - AWS Command Line Interface (CLI) must be installed
34 | # - Since this script is going to be executed by the user running the EnginFrame Server, i.e. the Apache Tomcat user,
35 | # an AWS CLI profile must be configured for that user, having the permissions to list instances and to manage load balancers.
36 | # (see https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html)
37 | # Or alternatively, if EnginFrame is installed into an EC2 instance, configure the correct AWS role for this instance.
38 | #
39 | # AWS account:
40 | # - AWS Application Load Balancer (ALB) and an HTTPS listener with a Default Target Group must be already configured and running.
41 | #
42 | # DCV server node:
43 | # - configure each DCV server node with a unique web url path (see dcv.conf)
44 |
45 | # Configuration parameters:
46 |
47 | # ALB public DNS name
48 | ALB_PUBLIC_DNS_NAME=
49 | # ALB port
50 | ALB_PORT=443
51 | # AWS default region
52 | export AWS_DEFAULT_REGION=
53 |
54 | _die() {
55 | echo "ERROR: $@"
56 | exit 1
57 | }
58 |
59 | _help() {
60 | _cmd=$(basename "$0")
61 | echo "${_cmd}"
62 | echo "Usage:"
63 | echo " ${_cmd} \"\" \"\" \"\" \"\" \"\" \"\""
64 | echo " ${_cmd} \"tmp3569402005256372176\" \"alb-enginframe-xxx.eu-west-1.elb.amazonaws.com\" 443 \"10.0.0.10\" 8443 \"/dcv-server1\""
65 | }
66 |
67 | # Input parameters:
68 | # - $1 session-id
69 | # - $2 alb-host (alb public dnsname)
70 | # - $3 alb-port
71 | # - $4 target-host (private dnsname)
72 | # - $5 target-port
73 | # - $6 target-web-url-path (it must start with the "/" character)
74 | main() {
75 | # parse input parameters
76 | if [[ $# -lt 6 ]] ; then
77 | _help
78 | exit 0
79 | fi
80 | local -- _session_id=$1
81 | local -- _alb_host=$2
82 | local -- _alb_port=$3
83 | local -- _instance_id=$4
84 | local -- _target_port=$5
85 | local -- _target_web_url_path=$6
86 |
87 | [ -z "${_session_id}" ] && _die "Missing input Session Id parameter."
88 | [ -z "${_alb_host}" ] && _die "Missing input ALB Host parameter."
89 | [ -z "${_alb_port}" ] && _die "Missing input ALB Port parameter."
90 | [ -z "${_instance_id}" ] && _die "Missing input InstanceID."
91 | [ -z "${_target_port}" ] && _die "Missing input Target Port parameter."
92 | [ -z "${_target_web_url_path}" ] && _die "Missing input Target Web Url Path parameter."
93 |
94 | # check if AWS Cli is in the path
95 | aws help >/dev/null || _die "AWS Cli is not installed."
96 |
97 | # get ALB Amazon Resource Name (ARN) by dns-name
98 | local -- _alb_arn=$(aws elbv2 describe-load-balancers --query "LoadBalancers[? DNSName == '${_alb_host}'].LoadBalancerArn" --output text)
99 | [ -n "${_alb_arn}" ] || _die "Unable to get ALB identifier for the ALB (${_alb_host})."
100 |
101 | # detect VPC of the ALB
102 | local -- _vpc_id=$(aws elbv2 describe-load-balancers --load-balancer-arns "${_alb_arn}" \
103 | --query "LoadBalancers[].VpcId" --output text)
104 | [ -n "${_vpc_id}" ] || _die "Unable to detect VPC of the ALB (${_alb_host})."
105 |
106 | # check if Listener exist
107 | local -- _listener_arn=$(aws elbv2 describe-listeners --load-balancer-arn "${_alb_arn}" \
108 | --query 'Listeners[? Port == `'${_alb_port}'`].ListenerArn' --output text)
109 | [ -n "${_listener_arn}" ] || _die "Listener for port (${_alb_port}) does not exist in the ALB (${_alb_host})."
110 |
111 | # check if Target Group for the given session already exists
112 | local -- _target_group_name=$(printf "%s" "${_session_id}" | tr -c 'a-zA-Z0-9' -)
113 | local -- _target_group_arn=$(aws elbv2 describe-target-groups --load-balancer-arn "${_alb_arn}" \
114 | --query "TargetGroups[? TargetGroupName == '${_target_group_name}'].TargetGroupArn" --output text)
115 | if [ -z "${_target_group_arn}" ]; then
116 |
117 | # create new target group for the given instance (Healty Check 404 is expected from the DCV Server)
118 | _target_group_arn=$(aws elbv2 create-target-group --name "${_target_group_name}" --protocol HTTPS --port "${_target_port}" --matcher "HttpCode=404" --vpc-id "${_vpc_id}" \
119 | --query "TargetGroups[0].TargetGroupArn" --output text)
120 | [ -n "${_target_group_arn}" ] || _die "Unable to create Target Group (${_target_group_name}) in the VPC (${_vpc_id})"
121 |
122 | # enable sticky session
123 | #aws elbv2 modify-target-group-attributes --target-group-arn "${_target_group_arn}" --attributes "Key=stickiness.enabled,Value=true" >/dev/null
124 | #[ $? -eq 0 ] || _die "Unable to set sticky session for the Target Group (${_target_group_arn})."
125 |
126 | # register instance in the new target group
127 | aws elbv2 register-targets --target-group-arn "${_target_group_arn}" --targets "Id=${_instance_id}" >/dev/null
128 | [ $? -eq 0 ] || _die "Unable to register Instance (${_instance_id}) in the Target Group (${_target_group_arn})."
129 |
130 | # get current max priority
131 | local -- _current_priority=$(aws elbv2 describe-rules --listener-arn "${_listener_arn}" \
132 | --query "max(Rules[? Priority != 'default'].Priority.to_number(@))" --output text)
133 | [ -n "${_current_priority}" ] || _current_priority=0
134 |
135 | # add target rule to the selected listener
136 | local -- _priority=$((_current_priority+1))
137 | local -- _target_path="${_target_web_url_path}*"
138 |
139 | local -- _rule_arn=$(aws elbv2 create-rule --listener-arn "${_listener_arn}" --priority "${_priority}" \
140 | --conditions Field=path-pattern,Values="${_target_path}" --actions Type=forward,TargetGroupArn=${_target_group_arn} \
141 | --query "Rules[0].RuleArn" --output text)
142 | [ -n "${_rule_arn}" ] || _die "Unable to create Rule for the Listener (${_listener_arn}), Target Group (${_target_group_arn}) and target path (${_target_path})."
143 | fi
144 |
145 | #avoid 404 ALB error
146 | sleep 10
147 |
148 | # set output variables
149 | export INTERACTIVE_SESSION_TARGET_HOST="${_alb_host}"
150 | export INTERACTIVE_SESSION_TARGET_PORT="${_alb_port}"
151 | export INTERACTIVE_SESSION_TARGET_WEBURLPATH="${_target_web_url_path}"
152 | }
153 |
154 | # Check it's a DCV 2017 interactive session.
155 | if [ "${INTERACTIVE_SESSION_REMOTE}" = "dcv2" ]; then
156 | main "${INTERACTIVE_SESSION_REMOTE_SESSION_ID}" "${ALB_PUBLIC_DNS_NAME}" "${ALB_PORT}" "${INTERACTIVE_SESSION_DCV2_WEBURLPATH:1}" "${INTERACTIVE_DEFAULT_DCV2_WEB_PORT}" "${INTERACTIVE_SESSION_DCV2_WEBURLPATH}"
157 | fi
158 |
159 | # ex:ts=4:sw=4:et:ft=sh:
--------------------------------------------------------------------------------
/enginframe/efinstall.config:
--------------------------------------------------------------------------------
1 | efinstall.config.version = 1.0
2 | ef.accept.eula = true
3 | kernel.agent.on.same.machine = true
4 | kernel.agent.rmi.port = 9999
5 | kernel.agent.rmi.bind.port = 9998
6 | kernel.ef.admin.user = ec2-user
7 | kernel.server.tomcat.https = true
8 | kernel.ef.tomcat.user = efnobody
9 | kernel.ef.root.context = enginframe
10 | kernel.tomcat.https.port = 8443
11 | kernel.tomcat.shutdown.port = 8005
12 | kernel.start_enginframe_at_boot = true
13 | demo.install = true
14 | default.auth.mgr = pam
15 | pam.service = system-auth
16 | ef.jobmanager = slurm
17 | slurm.binaries.path = /opt/slurm/bin
18 | ef.delegate.dcvsm = true
19 | dcvsm.oauth2.url = https\://sm-hostname\:sm-port/oauth2/token
20 | dcvsm.oauth2.id =
21 | dcvsm.broker.url = https\://sm-hostname\:sm-port
22 | dcvsm.no.strict.tls = false
23 | intro-targets = component_enginframe,component_kernel,component_applets,component_parser,component_http,component_pam,component_ldap,component_activedirectory,component_rss,component_lsf,component_pbs,component_torque,component_sge,component_slurm,component_awsbatch,component_dcvsm,component_demo,component_neutro,component_vdi,component_applications,component_service-manager,component_user-group-manager,component_hpc,component_enginframe_finalizer,
24 | progress-targets = cleanuptarget,
25 | kernel.ef.db = other-db
26 | kernel.ef.derby.db.port = 3306
27 | kernel.ef.db.admin.name = admin
28 | kernel.ef.db.savePasswordInKeystore = true
29 | kernel.ef.db.url = jdbc\:mysql\://admin@${SLURM_DB_ENDPOINT}\:3306/EnginFrameDB
30 |
--------------------------------------------------------------------------------
/enginframe/fm.browse.ui:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | . "${EF_ROOT}/plugins/ef/lib/utils"
4 |
5 | ef_source_conf hydrogen "ui.hydrogen.conf"
6 |
7 | if [ -n "${HY_FM_BROWSE_SORT_BY}" ]; then
8 | sortBy="${HY_FM_BROWSE_SORT_BY}"
9 | else
10 | sortBy="${HY_FM_BROWSE_DEFAULT_SORT_BY}"
11 | fi
12 |
13 | _ui="hydrogen"
14 |
15 | _widget_id="fm-browse"
16 |
17 | # Create File Manager /fsx anchor
18 | _fsx_vroot=$("${EF_ROOT}/plugins/fm/bin/fm.vroot.create" "${FM_BROWSE_SPOOLER}" "fm" "file:///fsx")
19 | if [ $? -ne 0 ]; then
20 | echo "Problem creating vroot for /fsx location. Please check your permissions" >&2
21 | exit 1
22 | fi
23 |
24 | _s3_vroot=$("${EF_ROOT}/plugins/fm/bin/fm.vroot.create" "${FM_BROWSE_SPOOLER}" "fm" "s3://@${S3_BUCKET}/")
25 | if [ $? -ne 0 ]; then
26 | echo "Problem creating vroot for S3. Please check your permissions" >&2
27 | exit 1
28 | fi
29 |
30 | cat << EOF
31 |
32 | EOF
33 |
34 | if [ -n "${_fsx_vroot}" ]; then
35 | cat << EOF
36 |
37 | FSx for Lustre (/fsx)
38 |
39 | EOF
40 | fi
41 |
42 | if [ -n "${_s3_vroot}" ]; then
43 | cat << EOF
44 |
45 | S3 (${S3_BUCKET})
46 |
47 | EOF
48 | fi
49 |
50 | cat << EOF
51 |
52 | EOF
--------------------------------------------------------------------------------
/enginframe/mysql/ef.mysql:
--------------------------------------------------------------------------------
1 | CREATE DATABASE EnginFrameDB DEFAULT CHARACTER SET latin1;
2 | GRANT ALL ON `%`.* TO admin@`%`;
3 | flush privileges;
--------------------------------------------------------------------------------
/enginframe/mysql/efdb.config:
--------------------------------------------------------------------------------
1 | [client]
2 | user=admin
3 | password=${EF_DB_PASS}
4 |
5 | [mysql]
6 | no-auto-rehash
7 | host=${SLURM_DB_ENDPOINT}
8 | port=3306
--------------------------------------------------------------------------------
/enginframe/services/ef-services.Linux Desktop.2022-11-22T10-22-47.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/1click-hpc/390dfe1e4c131f135cdd8b5f853371e763cd8877/enginframe/services/ef-services.Linux Desktop.2022-11-22T10-22-47.zip
--------------------------------------------------------------------------------
/modules/04.configure.disable.anacron.compute.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
7 | # software and associated documentation files (the "Software"), to deal in the Software
8 | # without restriction, including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 |
19 | set -x
20 | set -e
21 |
22 | #temporary fix to manually disable Anacron, up until PC handles this.
23 | disableAnacron() {
24 | sudo sed 's/^/#/' /etc/anacrontab | sudo tee /etc/anacrontab.tmp
25 | sudo mv -f --backup /etc/anacrontab.tmp /etc/anacrontab
26 | }
27 |
28 | # main
29 | # ----------------------------------------------------------------------------
30 | main() {
31 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 04.configure.disable.anacron.compute.sh: START" >&2
32 | disableAnacron
33 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 04.configure.disable.anacron.compute.sh: STOP" >&2
34 | }
35 |
36 | main "$@"
--------------------------------------------------------------------------------
/modules/04.configure.slurm.AllOrNothing.headnode.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
7 | # software and associated documentation files (the "Software"), to deal in the Software
8 | # without restriction, including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 |
19 | SLURM_RESUME_CONF_FILE="/etc/parallelcluster/slurm_plugin/parallelcluster_slurm_resume.conf"
20 |
21 | set -x
22 | set -e
23 |
24 | #ADD All or Nothing to the Slurm conf
25 | addAllOrNothingtoSlurmConf() {
26 | echo "all_or_nothing_batch = True" >> "${SLURM_RESUME_CONF_FILE}"
27 | }
28 |
29 | restartSlurmDaemon() {
30 | systemctl restart slurmctld
31 | }
32 |
33 | # main
34 | # ----------------------------------------------------------------------------
35 | main() {
36 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 04.configure.slurm.AllOrNothing.headnode.sh: START" >&2
37 | addAllOrNothingtoSlurmConf
38 | restartSlurmDaemon
39 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 04.configure.slurm.AllOrNothing.headnode.sh: STOP" >&2
40 | }
41 |
42 | main "$@"
--------------------------------------------------------------------------------
/modules/07.configure.slurm.tagging.headnode.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
7 | # software and associated documentation files (the "Software"), to deal in the Software
8 | # without restriction, including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 |
19 | set -x
20 | set -e
21 |
22 | configureSACCT() {
23 | aws s3 cp --quiet "${post_install_base}/scripts/prolog.sh" "${SLURM_ETC}/" --region "${cfn_region}" || exit 1
24 | chmod +x "${SLURM_ETC}/prolog.sh"
25 | echo "Prolog=/opt/slurm/etc/prolog.sh" >> "${SLURM_ETC}/slurm.conf"
26 | }
27 |
28 | restartSlurmDaemons() {
29 | systemctl restart slurmctld
30 | }
31 |
32 | # main
33 | # ----------------------------------------------------------------------------
34 | main() {
35 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 07.configure.slurm.tagging.headnode.sh: START" >&2
36 | configureSACCT
37 | restartSlurmDaemons
38 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 07.configure.slurm.tagging.headnode.sh: STOP" >&2
39 | }
40 |
41 | main "$@"
--------------------------------------------------------------------------------
/modules/10.install.enginframe.headnode.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
7 | # software and associated documentation files (the "Software"), to deal in the Software
8 | # without restriction, including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 |
19 |
20 | # Installs EnginFrame on the headnode
21 |
22 | set -x
23 | set -e
24 |
25 | # install EnginFrame
26 | # ----------------------------------------------------------------------------
27 | installEnginFrame() {
28 |
29 | amazon-linux-extras install -y java-openjdk11
30 |
31 | wget -nv -P /tmp/packages https://dn3uclhgxk1jt.cloudfront.net/enginframe/packages/enginframe-latest.jar || exit 1
32 |
33 | aws s3 cp --quiet "${post_install_base}/enginframe/efinstall.config" /tmp/packages/ --region "${cfn_region}" || exit 1
34 |
35 | # set permissions and uncompress
36 | chmod 755 -R /tmp/packages/*
37 | enginframe_jar=$(find /tmp/packages -type f -name 'enginframe-*.jar')
38 | # some checks
39 | [[ -z ${enginframe_jar} ]] && \
40 | echo "[ERROR] missing enginframe jar" && return 1
41 | [[ ! -f /tmp/packages/efinstall.config ]] && \
42 | echo "[ERROR] missing efinstall.config" && return 1
43 |
44 | cat <<-EOF >> /tmp/packages/efinstall.config
45 | kernel.java.home = /usr/lib/jvm/jre-11/
46 | nice.root.dir.ui = ${NICE_ROOT}
47 | ef.spooler.dir = ${NICE_ROOT}/enginframe/spoolers/
48 | ef.repository.dir = ${NICE_ROOT}/enginframe/repository/
49 | ef.sessions.dir = ${NICE_ROOT}/enginframe/sessions/
50 | ef.data.root.dir = ${NICE_ROOT}/enginframe/data/
51 | ef.logs.root.dir = ${NICE_ROOT}/enginframe/logs/
52 | ef.temp.root.dir = ${NICE_ROOT}/enginframe/tmp/
53 | kernel.server.tomcat.https.ef.hostname = ${head_node_hostname}
54 | kernel.ef.db.admin.password = ${ec2user_pass}
55 | EOF
56 |
57 |
58 | # add EnginFrame users if not already exist
59 | id -u efnobody &>/dev/null || adduser efnobody
60 |
61 | echo "${ec2user_pass}" | passwd ec2-user --stdin
62 |
63 | if [[ -d "${SHARED_FS_DIR}/nice" ]]; then
64 | mv -f "${SHARED_FS_DIR}/nice" "${SHARED_FS_DIR}/nice.$(date "+%d-%m-%Y-%H-%M").BAK"
65 | fi
66 |
67 | # finally, launch EnginFrame installer
68 | ( cd /tmp/packages
69 | /usr/lib/jvm/jre-11/bin/java -jar "${enginframe_jar}" --text --batch )
70 | }
71 |
72 | configureEnginFrameDB(){
73 |
74 | #FIXME: use latest link
75 | wget -nv -P "${EF_ROOT}/WEBAPP/WEB-INF/lib/" https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.28/mysql-connector-java-8.0.28.jar
76 | chown ec2-user:efnobody "${EF_ROOT}/WEBAPP/WEB-INF/lib/mysql-connector-java-8.0.28.jar"
77 |
78 | aws s3 cp --quiet "${post_install_base}/enginframe/mysql/efdb.config" /tmp/ --region "${cfn_region}" || exit 1
79 | aws s3 cp --quiet "${post_install_base}/enginframe/mysql/ef.mysql" /tmp/ --region "${cfn_region}" || exit 1
80 | aws s3 cp --quiet "${post_install_base}/enginframe/mysql/mysql" /tmp/ --region "${cfn_region}" || exit 1
81 |
82 | chown ec2-user:efnobody "/tmp/mysql"
83 | chmod +x "/tmp/mysql"
84 |
85 | export EF_DB_PASS="${ec2user_pass}"
86 | /usr/bin/envsubst < efdb.config > efdb.pass.config
87 |
88 | /tmp/mysql --defaults-extra-file="efdb.pass.config" < "ef.mysql"
89 | rm efdb.pass.config efdb.config ef.mysql mysql
90 | }
91 |
92 | customizeEnginFrame() {
93 | aws s3 cp --quiet "${post_install_base}/enginframe/fm.browse.ui" "${EF_ROOT}/plugins/applications/bin/" --region "${cfn_region}" || exit 1
94 | chown ec2-user:efnobody "${EF_ROOT}/plugins/applications/bin/fm.browse.ui"
95 | chmod 755 "${EF_ROOT}/plugins/applications/bin/fm.browse.ui"
96 |
97 | sed -i \
98 | "s/^HY_CONNECT_SESSION_MAX_WAIT=.*$/HY_CONNECT_SESSION_MAX_WAIT='600'/" \
99 | "${EF_ROOT}/plugins/hydrogen/conf/ui.hydrogen.conf"
100 |
101 | #Fix DCV sessions not working with AD users
102 | sed '2 i id "${USER}"' -i "${EF_ROOT}/plugins/interactive/lib/remote/linux.jobscript.functions"
103 | }
104 |
105 | startEnginFrame() {
106 | systemctl start enginframe
107 | }
108 |
109 |
110 | # main
111 | # ----------------------------------------------------------------------------
112 | main() {
113 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 10.install.enginframe.headnode.sh: START" >&2
114 | export ec2user_pass="$(aws secretsmanager get-secret-value --secret-id "${stack_name}" --query SecretString --output text --region "${cfn_region}")"
115 | installEnginFrame
116 | EF_TOP="${NICE_ROOT}/enginframe"
117 | unset EF_VERSION
118 | source "${EF_TOP}/current-version"
119 | export EF_ROOT="${EF_TOP}/${EF_VERSION}/enginframe"
120 | customizeEnginFrame
121 | configureEnginFrameDB
122 | startEnginFrame
123 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 10.install.enginframe.headnode.sh: STOP" >&2
124 | }
125 |
126 | main "$@"
--------------------------------------------------------------------------------
/modules/12.configure.enginframe.alb.headnode.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
7 | # software and associated documentation files (the "Software"), to deal in the Software
8 | # without restriction, including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 |
19 | set -x
20 | set -e
21 |
22 | configureEF4ALB() {
23 |
24 | cat <<-EOF >> ${EF_CONF_ROOT}/plugins/interactive/interactive.efconf
25 | INTERACTIVE_SESSION_STARTING_HOOK=${EF_DATA_ROOT}/plugins/interactive/bin/alb.session.starting.hook.sh
26 | INTERACTIVE_SESSION_CLOSING_HOOK=${EF_DATA_ROOT}/plugins/interactive/bin/alb.session.closing.hook.sh
27 | EOF
28 |
29 | cat <<-EOF >> ${EF_CONF_ROOT}/enginframe/agent.conf
30 | ef.download.server.url=https://127.0.0.1:8443/enginframe/download
31 | EOF
32 |
33 | alb_name="$(echo $stack_name | sed 's/hpc-1click-//')"
34 | ALB_PUBLIC_DNS_NAME=$(aws elbv2 describe-load-balancers --names "${alb_name}" --query "LoadBalancers[? LoadBalancerName == '${alb_name}'].DNSName" --output text --region "${cfn_region}")
35 |
36 | pattern='^ALB_PUBLIC_DNS_NAME=.*$'
37 | replace="ALB_PUBLIC_DNS_NAME=${ALB_PUBLIC_DNS_NAME}"
38 | sed -i -e "s|${pattern}|${replace}|" "${EF_DATA_ROOT}/plugins/interactive/bin/alb.session.starting.hook.sh"
39 | sed -i -e "s|${pattern}|${replace}|" "${EF_DATA_ROOT}/plugins/interactive/bin/alb.session.closing.hook.sh"
40 |
41 | pattern='^export AWS_DEFAULT_REGION=.*$'
42 | replace="export AWS_DEFAULT_REGION=${cfn_region}"
43 | sed -i -e "s|${pattern}|${replace}|" "${EF_DATA_ROOT}/plugins/interactive/bin/alb.session.starting.hook.sh"
44 | sed -i -e "s|${pattern}|${replace}|" "${EF_DATA_ROOT}/plugins/interactive/bin/alb.session.closing.hook.sh"
45 |
46 | }
47 |
48 |
49 | downloadALBhooks() {
50 |
51 | aws s3 cp --quiet "${post_install_base}/enginframe/alb.session.closing.hook.sh" "${EF_DATA_ROOT}/plugins/interactive/bin/" --region "${cfn_region}" || exit 1
52 | aws s3 cp --quiet "${post_install_base}/enginframe/alb.session.starting.hook.sh" "${EF_DATA_ROOT}/plugins/interactive/bin/" --region "${cfn_region}" || exit 1
53 |
54 | ### FIX: DO NOT TO HARDCODE usernames
55 | chown ec2-user:efnobody "${EF_DATA_ROOT}/plugins/interactive/bin/alb.session.closing.hook.sh"
56 | chmod +x "${EF_DATA_ROOT}/plugins/interactive/bin/alb.session.closing.hook.sh"
57 | chown ec2-user:efnobody "${EF_DATA_ROOT}/plugins/interactive/bin/alb.session.starting.hook.sh"
58 | chmod +x "${EF_DATA_ROOT}/plugins/interactive/bin/alb.session.starting.hook.sh"
59 | }
60 |
61 |
62 | # main
63 | # ----------------------------------------------------------------------------
64 | main() {
65 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 12.configure.enginframe.alb.headnode.sh: START" >&2
66 | downloadALBhooks
67 | configureEF4ALB
68 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 12.configure.enginframe.alb.headnode.sh: STOP" >&2
69 |
70 | }
71 |
72 | main "$@"
73 |
--------------------------------------------------------------------------------
/modules/15.install.dcv.broker.headnode.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
7 | # software and associated documentation files (the "Software"), to deal in the Software
8 | # without restriction, including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 |
19 | # Installs DCV Session Broker on headnode
20 |
21 | set -x
22 | set -e
23 |
24 | # install DCV Session Broker
25 | installDCVSessionBroker() {
26 |
27 | rpm --import "${NICE_GPG_KEY_URL}"
28 | yum install -y -q https://d1uj6qtbmh3dt5.cloudfront.net/nice-dcv-session-manager-broker.el7.noarch.rpm || exit 1
29 |
30 | # switch broker to 8446 since 8443 is used by EnginFrame
31 | pattern='^ *client-to-broker-connector-https-port *=.*$'
32 | replace="client-to-broker-connector-https-port = ${CLIENT_BROKER_PORT}"
33 | sed -i -e "s|${pattern}|${replace}|" '/etc/dcv-session-manager-broker/session-manager-broker.properties'
34 |
35 | pattern='^ *agent-to-broker-connector-https-port *=.*$'
36 | replace="agent-to-broker-connector-https-port = ${AGENT_BROKER_PORT}"
37 | sed -i -e "s|${pattern}|${replace}|" '/etc/dcv-session-manager-broker/session-manager-broker.properties'
38 |
39 | # switch broker discovery port to 45001 since in the boot phase it can be busy
40 | #sed -i 's/broker-to-broker-discovery-port = .*$/broker-to-broker-discovery-port = 47501/' \
41 | # /etc/dcv-session-manager-broker/session-manager-broker.properties
42 | #sed -i 's/broker-to-broker-discovery-addresses = .*$/broker-to-broker-discovery-addresses = 127.0.0.1:47501/' \
43 | # /etc/dcv-session-manager-broker/session-manager-broker.properties
44 | }
45 |
46 |
47 | # start DCV session broker
48 | startDCVSessionBroker() {
49 | local -i attempts=10 wait=1
50 | systemctl enable dcv-session-manager-broker
51 | systemctl start dcv-session-manager-broker
52 | sleep 10 # wait for a correct ignite initialization
53 |
54 | # wait for the certificate to be available, and copy it to efadmin's home
55 | while [[ $((attempts--)) -gt 0 ]]; do
56 | if [[ -r /var/lib/dcvsmbroker/security/dcvsmbroker_ca.pem ]]; then
57 | cp /var/lib/dcvsmbroker/security/dcvsmbroker_ca.pem "${BROKER_CA}"
58 | break
59 | else sleep $((wait++))
60 | fi
61 | done
62 | [[ ${attempts} -gt 0 ]] || return 1
63 | }
64 |
65 |
66 | # sets DCV session broker in EnginFrame
67 | # avoid this function if you don't install EnginFrame
68 | setupEFSessionManager() {
69 | local -i attempts=10 wait=1
70 | source "${NICE_ROOT}/enginframe/conf/enginframe.conf"
71 |
72 | # register and set EnginFrame as API client
73 | while [[ $((attempts--)) -gt 0 ]]; do
74 | systemctl is-active --quiet dcv-session-manager-broker
75 | if [[ $? == 0 ]]; then
76 | dcv-session-manager-broker register-api-client --client-name EnginFrame > /tmp/packages/ef_client_reg
77 | [[ $? == 0 ]] || return 1
78 | break
79 | else sleep $((wait++))
80 | fi
81 | done
82 | [[ ${attempts} -gt 0 ]] || return 1
83 |
84 | client_id=$(cat /tmp/packages/ef_client_reg | sed -n 's/^[ \t]*client-id:[ \t]*//p')
85 | client_pw=$(cat /tmp/packages/ef_client_reg | sed -n 's/^[ \t]*client-password:[ \t]*//p')
86 | sed -i "s/^DCVSM_CLUSTER_dcvsm_cluster1_AUTH_ID=.*$/DCVSM_CLUSTER_dcvsm_cluster1_AUTH_ID=${client_id//\//\\/}/" \
87 | "${NICE_ROOT}/enginframe/conf/plugins/dcvsm/clusters.props"
88 | sed -i \
89 | "s/^DCVSM_CLUSTER_dcvsm_cluster1_AUTH_PASSWORD=.*$/DCVSM_CLUSTER_dcvsm_cluster1_AUTH_PASSWORD=${client_pw//\//\\/}/" \
90 | "${NICE_ROOT}/enginframe/conf/plugins/dcvsm/clusters.props"
91 | sed -i \
92 | "s/^DCVSM_CLUSTER_dcvsm_cluster1_AUTH_ENDPOINT=.*$/DCVSM_CLUSTER_dcvsm_cluster1_AUTH_ENDPOINT=https:\/\/${host_name}:${CLIENT_BROKER_PORT}\/oauth2\/token/" \
93 | "${NICE_ROOT}/enginframe/conf/plugins/dcvsm/clusters.props"
94 | sed -i \
95 | "s/^DCVSM_CLUSTER_dcvsm_cluster1_SESSION_MANAGER_ENDPOINT=.*$/DCVSM_CLUSTER_dcvsm_cluster1_SESSION_MANAGER_ENDPOINT=https:\/\/${host_name}:${CLIENT_BROKER_PORT}/" \
96 | "${NICE_ROOT}/enginframe/conf/plugins/dcvsm/clusters.props"
97 |
98 | # add dcvsm certificate to Java keystore
99 | openssl x509 -in /var/lib/dcvsmbroker/security/dcvsmbroker_ca.pem -inform pem \
100 | -out /tmp/packages/dcvsmbroker_ca.der -outform der
101 | keytool -importcert -alias dcvsm \
102 | -keystore "${JAVA_HOME}/lib/security/cacerts" \
103 | -storepass changeit \
104 | -noprompt \
105 | -file /tmp/packages/dcvsmbroker_ca.der
106 | systemctl restart enginframe
107 | }
108 |
109 |
110 | # main
111 | # ----------------------------------------------------------------------------
112 | main() {
113 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 15.install.dcv.broker.headnode.sh: START" >&2
114 | installDCVSessionBroker
115 | startDCVSessionBroker
116 | setupEFSessionManager
117 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 15.install.dcv.broker.headnode.sh: STOP" >&2
118 | }
119 |
120 | main "$@"
--------------------------------------------------------------------------------
/modules/20.install.dcv.slurm.headnode.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
7 | # software and associated documentation files (the "Software"), to deal in the Software
8 | # without restriction, including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 |
19 |
20 | # Add "dcv2" requirements on the DCV nodes.
21 |
22 | set -x
23 | set -e
24 |
25 | DCV_KEY_WORD="dcv"
26 |
27 | #ADD DCV as a features to Slurm Partitions
28 | addDCVtoSlurmPartitions() {
29 | for conf_file in $(ls ${SLURM_CONF_FILE} | grep "${DCV_KEY_WORD}"); do
30 | sed -i 's/Feature=/Feature=dcv2,/g' "${conf_file}"
31 | done
32 | }
33 |
34 | restartSlurmDaemon() {
35 | systemctl restart slurmctld
36 | }
37 |
38 | # main
39 | # ----------------------------------------------------------------------------
40 | main() {
41 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 20.install.dcv.slurm.headnode.sh: START" >&2
42 | addDCVtoSlurmPartitions
43 | restartSlurmDaemon
44 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 20.install.dcv.slurm.headnode.sh: STOP" >&2
45 | }
46 |
47 | main "$@"
48 |
--------------------------------------------------------------------------------
/modules/25.install.dcv-server.compute.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
7 | # software and associated documentation files (the "Software"), to deal in the Software
8 | # without restriction, including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 |
19 | set -x
20 | set -e
21 |
22 | installSimpleExternalAuth() {
23 |
24 | yum -y -q install nice-dcv-*/nice-dcv-simple-external-authenticator-*.rpm
25 |
26 | systemctl start dcvsimpleextauth.service
27 |
28 | }
29 |
30 | installMissingLib() {
31 | yum -y -q install ImageMagick
32 | }
33 |
34 | configureDCVexternalAuth() {
35 |
36 | pattern='\[security\]'
37 | replace='&\n'
38 | replace+="auth-token-verifier=\"http://localhost:8444\""
39 | cp '/etc/dcv/dcv.conf' "/etc/dcv/dcv.conf.$(date --iso=s --utc)"
40 | # remove duplicates if any
41 | #sed -i -e '/^ *\(administrators\|ca-file\|auth-token-verifier\) *=.*$/d' '/etc/dcv/dcv.conf'
42 | sed -i -e "s|${pattern}|${replace}|" '/etc/dcv/dcv.conf'
43 |
44 | }
45 |
46 | restartDCV() {
47 |
48 | systemctl restart dcvserver.service
49 |
50 | }
51 |
52 | # main
53 | # ----------------------------------------------------------------------------
54 | main() {
55 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] install.dcv-server.compute.sh: START" >&2
56 |
57 | wget -nv https://d1uj6qtbmh3dt5.cloudfront.net/nice-dcv-el7-x86_64.tgz
58 | tar zxvf nice-dcv-el7-x86_64.tgz
59 | installSimpleExternalAuth
60 | dcvusbdriverinstaller --quiet
61 |
62 | installMissingLib
63 | configureDCVexternalAuth
64 | restartDCV
65 |
66 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] install.dcv-server.compute.sh: STOP" >&2
67 | }
68 |
69 | main "$@"
--------------------------------------------------------------------------------
/modules/25.install.dcv-server.gpu.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
7 | # software and associated documentation files (the "Software"), to deal in the Software
8 | # without restriction, including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 |
19 | set -x
20 | set -e
21 |
22 | installSimpleExternalAuth() {
23 |
24 | yum -y -q install nice-dcv-*/nice-dcv-simple-external-authenticator-*.rpm
25 | systemctl start dcvsimpleextauth.service
26 |
27 | }
28 |
29 | installDCVGLonG4() {
30 |
31 | systemctl stop dcvserver.service
32 | systemctl disable slurmd
33 | systemctl isolate multi-user.target
34 |
35 | nvidia-xconfig --enable-all-gpus --preserve-busid --connected-monitor=DFP-0,DFP-1,DFP-2,DFP-3
36 | nvidia-persistenced
37 | nvidia-smi -ac 5001,1590
38 |
39 | yum -y -q install nice-dcv-*/nice-dcv-gl*.rpm nice-dcv-*/nice-dcv-server*.rpm nice-dcv-*/nice-xdcv*.rpm nice-dcv-*/nice-dcv-web-viewer*.rpm
40 |
41 | systemctl isolate graphical.target
42 | systemctl start dcvserver.service
43 | systemctl enable slurmd
44 | }
45 |
46 | installMissingLib() {
47 | yum -y -q install ImageMagick
48 | }
49 |
50 | configureDCVexternalAuth() {
51 |
52 | pattern='\[security\]'
53 | replace='&\n'
54 | replace+="auth-token-verifier=\"http://localhost:8444\""
55 | cp '/etc/dcv/dcv.conf' "/etc/dcv/dcv.conf.$(date --iso=s --utc)"
56 | # remove duplicates if any
57 | #sed -i -e '/^ *\(administrators\|ca-file\|auth-token-verifier\) *=.*$/d' '/etc/dcv/dcv.conf'
58 | sed -i -e "s|${pattern}|${replace}|" '/etc/dcv/dcv.conf'
59 |
60 | }
61 |
62 | restartDCV() {
63 |
64 | systemctl restart dcvserver.service
65 |
66 | }
67 |
68 | # main
69 | # ----------------------------------------------------------------------------
70 | main() {
71 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] install.dcv-server.gpu.sh: START" >&2
72 |
73 | wget -nv https://d1uj6qtbmh3dt5.cloudfront.net/nice-dcv-el7-x86_64.tgz
74 | tar zxvf nice-dcv-el7-x86_64.tgz
75 | installDCVGLonG4
76 | installSimpleExternalAuth
77 | dcvusbdriverinstaller --quiet
78 |
79 | installMissingLib
80 | configureDCVexternalAuth
81 | restartDCV
82 |
83 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] install.dcv-server.gpu.sh: STOP" >&2
84 | }
85 |
86 | main "$@"
--------------------------------------------------------------------------------
/modules/26.configure.dcv.alb.compute.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
7 | # software and associated documentation files (the "Software"), to deal in the Software
8 | # without restriction, including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 |
19 | # Intall DCV con compute Nodes.
20 |
21 | set -x
22 | set -e
23 |
24 | configureDCVforALB() {
25 | cp '/etc/dcv/dcv.conf' "/etc/dcv/dcv.conf.$(date --iso=s --utc)"
26 | WEB_URL_PATH="$(ec2-metadata -i| awk '{print $2}')"
27 | pattern='^ *#web-url-path*=.*$'
28 | replace="web-url-path=\"/${WEB_URL_PATH}\""
29 | sed -i -e "s|${pattern}|${replace}|" "/etc/dcv/dcv.conf"
30 | }
31 |
32 | restartDCV() {
33 |
34 | systemctl restart dcvserver.service
35 |
36 | }
37 |
38 | # main
39 | # ----------------------------------------------------------------------------
40 | main() {
41 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] configure.dcv.alb.compute.sh: START" >&2
42 | configureDCVforALB
43 | restartDCV
44 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] configure.dcv.alb.compute.sh: STOP" >&2
45 | }
46 |
47 | main "$@"
--------------------------------------------------------------------------------
/modules/27.configure.dcv.nat.compute.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
7 | # software and associated documentation files (the "Software"), to deal in the Software
8 | # without restriction, including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 |
19 |
20 | # Intall DCV con compute Nodes.
21 | EF_NAT_CONF="${NICE_ROOT}/enginframe/conf/plugins/interactive/nat.conf"
22 |
23 |
24 | set -x
25 | set -e
26 |
27 | fixNat() {
28 |
29 | #fix the nat
30 | h2="${host_name//./\\.}"
31 | sed -i "/^${h2} .*$/d" "${EF_NAT_CONF}"
32 | echo "$host_name $(ec2-metadata -p| awk '{print $2}')" >> "${EF_NAT_CONF}"
33 | }
34 |
35 |
36 | # main
37 | # ----------------------------------------------------------------------------
38 | main() {
39 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 27.configure.dcv.nat.compute.sh: START" >&2
40 | for conf_file in $(ls ${SLURM_CONF_FILE} | grep "${DCV_KEY_WORD}"); do
41 | if [[ ! -z $(grep "${compute_instance_type}" "${conf_file}") ]]; then
42 | fixNat
43 | fi
44 | done
45 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 27.configure.dcv.nat.compute.sh: STOP" >&2
46 | }
47 |
48 | main "$@"
--------------------------------------------------------------------------------
/modules/30.install.dcv-sm-agent.compute.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
7 | # software and associated documentation files (the "Software"), to deal in the Software
8 | # without restriction, including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 |
19 | DCV_SM_ROOT="/etc/dcv-session-manager-agent"
20 |
21 | set -x
22 | set -e
23 |
24 | configureDCVforSMAgent() {
25 |
26 | pattern='\[security\]'
27 | replace='&\n'
28 | replace+='administrators=["dcvsmagent"]\n'
29 | replace+='ca-file="/etc/dcv-session-manager-agent/dcvsmbroker_ca.pem"\n'
30 | replace+="auth-token-verifier=\"https://${head_node_hostname}:${AGENT_BROKER_PORT}/agent/validate-authentication-token\""
31 | cp '/etc/dcv/dcv.conf' "/etc/dcv/dcv.conf.$(date --iso=s --utc)"
32 | # remove duplicates if any
33 | #sed -i -e '/^ *\(administrators\|ca-file\|auth-token-verifier\) *=.*$/d' '/etc/dcv/dcv.conf'
34 | sed -i -e "s|${pattern}|${replace}|" '/etc/dcv/dcv.conf'
35 |
36 | }
37 |
38 | installDCVSMAgent() {
39 |
40 | BROKER_CA_NEW="${DCV_SM_ROOT}/dcvsmbroker_ca.pem"
41 | DCV_SM_AGENT_CONF="${DCV_SM_ROOT}/agent.conf"
42 |
43 | rpm --import "${NICE_GPG_KEY_URL}"
44 | yum -y -q install https://d1uj6qtbmh3dt5.cloudfront.net/nice-dcv-session-manager-agent.el7.x86_64.rpm || exit 1
45 |
46 | pattern='^ *broker_host *=.*$'
47 | replace="broker_host = \'${head_node_hostname}\'"
48 | sed -i -e "s|${pattern}|${replace}|" "${DCV_SM_AGENT_CONF}"
49 |
50 | pattern='^ *#broker_port *=.*$'
51 | replace="broker_port = ${AGENT_BROKER_PORT}"
52 | sed -i -e "s|${pattern}|${replace}|" "${DCV_SM_AGENT_CONF}"
53 |
54 | pattern='^ *#ca_file *=.*$'
55 | replace="ca_file = \'${BROKER_CA_NEW}\'"
56 | sed -i -e "s|${pattern}|${replace}|" "${DCV_SM_AGENT_CONF}"
57 | cp "${BROKER_CA}" "${BROKER_CA_NEW}"
58 |
59 | }
60 |
61 |
62 | configureAgentTags() {
63 | mkdir -p "${DCV_SM_ROOT}/tags"
64 | echo "AWS_EC2_PUBLIC_HOSTNAME=\"$(ec2-metadata -p| awk '{print $2}')\"" >> "${DCV_SM_ROOT}/tags/agent_tags.toml"
65 | echo "INSTANCE_TYPE=\"$(ec2-metadata -t| awk '{print $2}')\"" >> "${DCV_SM_ROOT}/tags/agent_tags.toml"
66 | echo "AWS_EC2_INSTANCE_ID=\"$(ec2-metadata -i| awk '{print $2}')\"" >> "${DCV_SM_ROOT}/tags/agent_tags.toml"
67 | }
68 |
69 | startServices() {
70 |
71 | systemctl start dcv-session-manager-agent
72 | systemctl restart dcvserver.service
73 |
74 | }
75 |
76 | # main
77 | # ----------------------------------------------------------------------------
78 | main() {
79 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] install.dcv-sm-agent.compute.sh: START" >&2
80 | if [[ ! -z $(grep "${compute_instance_type}" "${conf_file}") ]]; then
81 | configureDCVforSMAgent
82 | installDCVSMAgent
83 | configureAgentTags
84 | startServices
85 | fi
86 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] install.dcv-sm-agent.compute.sh: STOP" >&2
87 | }
88 |
89 | main "$@"
--------------------------------------------------------------------------------
/modules/40.install.monitoring.compute.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
7 | # software and associated documentation files (the "Software"), to deal in the Software
8 | # without restriction, including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 |
19 |
20 |
21 | set -x
22 | set -e
23 |
24 | installPreReq() {
25 | yum -y -q install docker golang-bin
26 | service docker start
27 | chkconfig docker on
28 | usermod -a -G docker $cfn_cluster_user
29 |
30 | #to be replaced with yum -y install docker-compose as the repository problem is fixed
31 | curl -s -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
32 | chmod +x /usr/local/bin/docker-compose
33 | }
34 |
35 | installMonitoring() {
36 |
37 | gpu_instances="[pg][2-9].*\.[0-9]*[x]*large"
38 |
39 | if [[ $compute_instance_type =~ $gpu_instances ]]; then
40 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
41 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.repo | tee /etc/yum.repos.d/nvidia-docker.repo
42 | yum -y -q clean expire-cache
43 | yum -y -q install nvidia-docker2
44 | systemctl restart docker
45 | /usr/local/bin/docker-compose -f "${monitoring_home}/docker-compose/docker-compose.compute.gpu.yml" -p monitoring-compute up -d
46 | else
47 | /usr/local/bin/docker-compose -f "${monitoring_home}/docker-compose/docker-compose.compute.yml" -p monitoring-compute up -d
48 | fi
49 | }
50 |
51 | # main
52 | # ----------------------------------------------------------------------------
53 | main() {
54 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 40.install.monitoring.compute.sh: START" >&2
55 |
56 | job_id=$($SLURM_ROOT/bin/squeue -h -w "${host_name}" | awk '{print $1}')
57 | job_comment=$($SLURM_ROOT/bin/scontrol show job $job_id | grep Comment | sed 's/Comment=//' | sed 's/^ *//g')
58 |
59 | if [[ $job_comment == *"Key=Monitoring,Value=ON"* ]]; then
60 | installPreReq
61 | installMonitoring
62 | fi
63 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 40.install.monitoring.compute.sh: STOP" >&2
64 | }
65 | main "$@"
--------------------------------------------------------------------------------
/modules/40.install.monitoring.headnode.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
7 | # software and associated documentation files (the "Software"), to deal in the Software
8 | # without restriction, including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 |
19 |
20 | #max_queue_size=$(aws cloudformation describe-stacks --stack-name $stack_name --region $cfn_region | jq -r '.Stacks[0].Parameters | map(select(.ParameterKey == "MaxSize"))[0].ParameterValue')
21 |
22 | s3_bucket=$(echo $cfn_postinstall | sed "s/s3:\/\///g;s/\/.*//")
23 |
24 | set -x
25 | set -e
26 |
27 | installPreReq() {
28 | yum -y -q install docker golang-bin
29 | service docker start
30 | chkconfig docker on
31 | usermod -a -G docker $cfn_cluster_user
32 |
33 | #to be replaced with yum -y install docker-compose as the repository problem is fixed
34 | curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
35 | chmod +x /usr/local/bin/docker-compose
36 | }
37 |
38 | saveClusterConfigLocally(){
39 |
40 | cluster_s3_bucket=$(jq -r '.cluster.cluster_s3_bucket' "${dna_json}")
41 | cluster_config_s3_key=$(jq -r '.cluster.cluster_config_s3_key' "${dna_json}")
42 | cluster_config_version=$(jq -r '.cluster.cluster_config_version' "${dna_json}")
43 | log_group_names=$(jq -r '.cluster.log_group_name' "${dna_json}")
44 |
45 | mkdir -p "${monitoring_home}/parallelcluster"
46 | aws s3api get-object --bucket $cluster_s3_bucket --key $cluster_config_s3_key --region $cfn_region --version-id $cluster_config_version "${monitoring_home}/parallelcluster/cluster-config.json"
47 | }
48 |
49 | installMonitoring(){
50 |
51 | aws s3 cp --quiet --recursive "${post_install_base}/monitoring" "${monitoring_home}" --region "${cfn_region}" || exit 1
52 | chown $cfn_cluster_user:$cfn_cluster_user -R "${monitoring_home}"
53 | chmod +x ${monitoring_home}/custom-metrics/*
54 |
55 | cp -rp ${monitoring_home}/custom-metrics/* /usr/local/bin/
56 | mv -f "${monitoring_home}/prometheus-slurm-exporter/slurm_exporter.service" /etc/systemd/system/
57 |
58 | cp -rp ${monitoring_home}/www/* "${NICE_ROOT}/enginframe/conf/tomcat/webapps/ROOT/"
59 | }
60 |
61 |
62 |
63 | configureMonitoring() {
64 |
65 | fsx_fs_id=$(jq -r '.cluster.fsx_fs_id' "${dna_json}")
66 | headnode_instance_id=$(ec2-metadata -i | awk '{print $2}')
67 |
68 | #FIXME: the cost dashboard need to be re-designed.
69 | #(crontab -l -u $cfn_cluster_user; echo "*/1 * * * * /usr/local/bin/1m-cost-metrics.sh") | crontab -u $cfn_cluster_user -
70 | #(crontab -l -u $cfn_cluster_user; echo "*/60 * * * * /usr/local/bin/1h-cost-metrics.sh") | crontab -u $cfn_cluster_user -
71 |
72 | # replace tokens
73 | sed -i "s/_S3_BUCKET_/${s3_bucket}/g" "${monitoring_home}/grafana/dashboards/ParallelCluster.json"
74 | sed -i "s/__INSTANCE_ID__/${headnode_instance_id}/g" "${monitoring_home}/grafana/dashboards/ParallelCluster.json"
75 | sed -i "s/__FSX_ID__/${fsx_fs_id}/g" "${monitoring_home}/grafana/dashboards/ParallelCluster.json"
76 | sed -i "s/__AWS_REGION__/${cfn_region}/g" "${monitoring_home}/grafana/dashboards/ParallelCluster.json"
77 |
78 | sed -i "s/__AWS_REGION__/${cfn_region}/g" "${monitoring_home}/grafana/dashboards/logs.json"
79 | sed -i "s~__LOG_GROUP__NAMES__~${log_group_names}~g" "${monitoring_home}/grafana/dashboards/logs.json"
80 |
81 | sed -i "s/__Application__/${stack_name}/g" "${monitoring_home}/prometheus/prometheus.yml"
82 | sed -i "s/__AWS_REGION__/${cfn_region}/g" "${monitoring_home}/prometheus/prometheus.yml"
83 |
84 | sed -i "s/__INSTANCE_ID__/${headnode_instance_id}/g" "${monitoring_home}/grafana/dashboards/headnode-details.json"
85 | sed -i "s/__INSTANCE_ID__/${headnode_instance_id}/g" "${monitoring_home}/grafana/dashboards/compute-node-list.json"
86 | sed -i "s/__INSTANCE_ID__/${headnode_instance_id}/g" "${monitoring_home}/grafana/dashboards/compute-node-details.json"
87 |
88 | sed -i "s~__MONITORING_DIR__~${monitoring_home}~g" "${monitoring_home}/docker-compose/docker-compose.headnode.yml"
89 | sed -i "s~__GRAFANA_PASSWORD__~${ec2user_pass}~g" "${monitoring_home}/docker-compose/docker-compose.headnode.yml"
90 |
91 | # Download and build prometheus-slurm-exporter
92 | ##### Plese note this software package is under GPLv3 License #####
93 | # More info here: https://github.com/vpenso/prometheus-slurm-exporter/blob/master/LICENSE
94 | cd "${monitoring_home}"
95 | #FIXME: temporary
96 | rm -rf prometheus-slurm-exporter
97 | git clone https://github.com/vpenso/prometheus-slurm-exporter.git
98 | cd prometheus-slurm-exporter
99 | sed -i 's/NodeList,AllocMem,Memory,CPUsState,StateLong/NodeList: ,AllocMem: ,Memory: ,CPUsState: ,StateLong:/' node.go
100 | GOPATH=/root/go-modules-cache HOME=/root go mod download
101 | GOPATH=/root/go-modules-cache HOME=/root go build
102 | mv -f "${monitoring_home}/prometheus-slurm-exporter/prometheus-slurm-exporter" /usr/bin/prometheus-slurm-exporter
103 | }
104 |
105 |
106 | startMonitoringDaemons() {
107 |
108 | /usr/local/bin/docker-compose --env-file /etc/parallelcluster/cfnconfig -f "${monitoring_home}/docker-compose/docker-compose.headnode.yml" -p monitoring-1click-hpc up -d
109 | systemctl daemon-reload
110 | systemctl enable slurm_exporter
111 | systemctl start slurm_exporter
112 |
113 | }
114 |
115 | # main
116 | # ----------------------------------------------------------------------------
117 | main() {
118 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 40.install.monitoring.headnode.sh: START" >&2
119 | if [[ -d "${monitoring_home}" ]]; then
120 | mv -f "${monitoring_home}" "${monitoring_home}.$(date "+%d-%m-%Y-%H-%M").BAK"
121 | fi
122 | installPreReq
123 | saveClusterConfigLocally
124 | installMonitoring
125 | configureMonitoring
126 | startMonitoringDaemons
127 | echo "[INFO][$(date '+%Y-%m-%d %H:%M:%S')] 40.install.monitoring.headnode.sh: STOP" >&2
128 | }
129 |
130 | main "$@"
--------------------------------------------------------------------------------
/monitoring/custom-metrics/1h-cost-metrics.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | #
4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
5 | # SPDX-License-Identifier: MIT-0
6 | #
7 | #
8 |
9 | #source the AWS ParallelCluster profile
10 | . /etc/parallelcluster/cfnconfig
11 |
12 | export AWS_DEFAULT_REGION=$cfn_region
13 | aws_region_long_name=$(python /usr/local/bin/aws-region.py $cfn_region)
14 | aws_region_long_name=${aws_region_long_name/Europe/EU}
15 |
16 | headnodeInstanceType=$(ec2-metadata -t | awk '{print $2}')
17 | headnodeInstanceId=$(ec2-metadata -i | awk '{print $2}')
18 | s3_bucket=$(echo $cfn_postinstall | sed "s/s3:\/\///g;s/\/.*//")
19 | s3_size_gb=$(echo "$(aws s3api list-objects --bucket $s3_bucket --output json --query "[sum(Contents[].Size)]"| sed -n 2p | tr -d ' ') / 1024 / 1024 / 1024" | bc)
20 |
21 |
22 | #retrieve the s3 cost
23 | if [[ $s3_size_gb -le 51200 ]]; then
24 | s3_range=51200
25 | elif [[ $VAR -le 512000 ]]; then
26 | s3_range=512000
27 | else
28 | s3_range="Inf"
29 | fi
30 |
31 | ####################### S3 #########################
32 |
33 | s3_cost_gb_month=$(aws --region us-east-1 pricing get-products \
34 | --service-code AmazonS3 \
35 | --filters 'Type=TERM_MATCH,Field=location,Value='"${aws_region_long_name}" \
36 | 'Type=TERM_MATCH,Field=storageClass,Value=General Purpose' \
37 | --query 'PriceList[0]' --output text \
38 | | jq -r --arg endRange $s3_range '.terms.OnDemand | to_entries[] | .value.priceDimensions | to_entries[].value | select(.endRange==$endRange).pricePerUnit.USD')
39 |
40 | s3=$(echo "scale=2; $s3_cost_gb_month * $s3_size_gb / 720" | bc)
41 | echo "s3_cost $s3" | curl --data-binary @- http://127.0.0.1:9091/metrics/job/cost
42 |
43 |
44 | ####################### headnode #########################
45 | headnode_node_h_price=$(aws pricing get-products \
46 | --region us-east-1 \
47 | --service-code AmazonEC2 \
48 | --filters 'Type=TERM_MATCH,Field=instanceType,Value='$headnodeInstanceType \
49 | 'Type=TERM_MATCH,Field=location,Value='"${aws_region_long_name}" \
50 | 'Type=TERM_MATCH,Field=preInstalledSw,Value=NA' \
51 | 'Type=TERM_MATCH,Field=operatingSystem,Value=Linux' \
52 | 'Type=TERM_MATCH,Field=tenancy,Value=Shared' \
53 | 'Type=TERM_MATCH,Field=capacitystatus,Value=UnusedCapacityReservation' \
54 | --output text \
55 | --query 'PriceList' \
56 | | jq -r '.terms.OnDemand | to_entries[] | .value.priceDimensions | to_entries[] | .value.pricePerUnit.USD')
57 |
58 | echo "headnode_cost $headnode_node_h_price" | curl --data-binary @- http://127.0.0.1:9091/metrics/job/cost
59 |
60 |
61 | fsx_id=$(aws cloudformation describe-stacks --stack-name $stack_name --region $cfn_region \
62 | | jq -r '.Stacks[0].Parameters | map(select(.ParameterKey == "FSXOptions"))[0].ParameterValue' \
63 | | awk -F "," '{print $2}')
64 | fsx_summary=$(aws fsx describe-file-systems --region $cfn_region --file-system-ids $fsx_id)
65 | fsx_size_gb=$(echo $fsx_summary | jq -r '.FileSystems[0].StorageCapacity')
66 | fsx_type=$(echo $fsx_summary | jq -r '.FileSystems[0].LustreConfiguration.DeploymentType')
67 | fsx_throughput=$(echo $fsx_summary | jq -r '.FileSystems[0].LustreConfiguration.PerUnitStorageThroughput')
68 |
69 | if [[ $fsx_type = "SCRATCH_2" ]] || [[ $fsx_type = "SCRATCH_1" ]]; then
70 | fsx_cost_gb_month=$(aws pricing get-products \
71 | --region us-east-1 \
72 | --service-code AmazonFSx \
73 | --filters 'Type=TERM_MATCH,Field=location,Value='"${aws_region_long_name}" \
74 | 'Type=TERM_MATCH,Field=fileSystemType,Value=Lustre' \
75 | 'Type=TERM_MATCH,Field=throughputCapacity,Value=N/A' \
76 | --output text \
77 | --query 'PriceList' \
78 | | jq -r '.terms.OnDemand | to_entries[] | .value.priceDimensions | to_entries[] | .value.pricePerUnit.USD')
79 |
80 | elif [ $fsx_type = "PERSISTENT_1" ]; then
81 | fsx_cost_gb_month=$(aws pricing get-products \
82 | --region us-east-1 \
83 | --service-code AmazonFSx \
84 | --filters 'Type=TERM_MATCH,Field=location,Value='"${aws_region_long_name}" \
85 | 'Type=TERM_MATCH,Field=fileSystemType,Value=Lustre' \
86 | 'Type=TERM_MATCH,Field=throughputCapacity,Value='$fsx_throughput \
87 | --output text \
88 | --query 'PriceList' \
89 | | jq -r '.terms.OnDemand | to_entries[] | .value.priceDimensions | to_entries[] | .value.pricePerUnit.USD')
90 |
91 | else
92 | fsx_cost_gb_month=0
93 | fi
94 |
95 | fsx=$(echo "scale=2; $fsx_cost_gb_month * $fsx_size_gb / 720" | bc)
96 | echo "fsx_cost $fsx" | curl --data-binary @- http://127.0.0.1:9091/metrics/job/cost
97 |
98 |
99 | #parametrize:
100 | ebs_volume_total_cost=0
101 | ebs_volume_ids=$(aws ec2 describe-instances --instance-ids $headnodeInstanceId \
102 | | jq -r '.Reservations | to_entries[].value | .Instances | to_entries[].value | .BlockDeviceMappings | to_entries[].value | .Ebs.VolumeId')
103 |
104 | for ebs_volume_id in $ebs_volume_ids
105 | do
106 | ebs_volume_type=$(aws ec2 describe-volumes --volume-ids $ebs_volume_id | jq -r '.Volumes | to_entries[].value.VolumeType')
107 | #ebs_volume_iops=$(aws ec2 describe-volumes --volume-ids $ebs_volume_id | jq -r '.Volumes | to_entries[].value.Iops')
108 | ebs_volume_size=$(aws ec2 describe-volumes --volume-ids $ebs_volume_id | jq -r '.Volumes | to_entries[].value.Size')
109 |
110 | ebs_cost_gb_month=$(aws --region us-east-1 pricing get-products \
111 | --service-code AmazonEC2 \
112 | --query 'PriceList' \
113 | --output text \
114 | --filters 'Type=TERM_MATCH,Field=location,Value='"${aws_region_long_name}" \
115 | 'Type=TERM_MATCH,Field=productFamily,Value=Storage' \
116 | 'Type=TERM_MATCH,Field=volumeApiName,Value='$ebs_volume_type \
117 | | jq -r '.terms.OnDemand | to_entries[] | .value.priceDimensions | to_entries[] | .value.pricePerUnit.USD')
118 |
119 | ebs_volume_cost=$(echo "scale=2; $ebs_cost_gb_month * $ebs_volume_size / 720" | bc)
120 | ebs_volume_total_cost=$(echo "scale=2; $ebs_volume_total_cost + $ebs_volume_cost" | bc)
121 | done
122 |
123 | echo "ebs_headnode_cost $ebs_volume_total_cost" | curl --data-binary @- http://127.0.0.1:9091/metrics/job/cost
--------------------------------------------------------------------------------
/monitoring/custom-metrics/1m-cost-metrics.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | #
4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
5 | # SPDX-License-Identifier: MIT-0
6 | #
7 | #
8 |
9 | #!/bin/bash
10 | #
11 | #
12 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
13 | # SPDX-License-Identifier: MIT-0
14 | #
15 | #
16 |
17 | exit 0
18 |
19 | #source the AWS ParallelCluster profile
20 | . /etc/parallelcluster/cfnconfig
21 |
22 | export AWS_DEFAULT_REGION=$cfn_region
23 | aws_region_long_name=$(python /usr/local/bin/aws-region.py $cfn_region)
24 | aws_region_long_name=${aws_region_long_name/Europe/EU}
25 |
26 | #FIXME: not hardcode dir
27 | monitoring_dir_name="monitoring"
28 | monitoring_home="/fsx/${monitoring_dir_name}"
29 |
30 | queues=$(/opt/slurm/bin/sinfo --noheader -O partition | sed 's/\*//g')
31 | cluster_config_file="${monitoring_home}/parallelcluster/cluster-config.json"
32 |
33 | compute_nodes_total_cost=0
34 |
35 | for queue in $queues; do
36 |
37 | instance_type=$(cat "${cluster_config_file}" | jq -r --arg queue $queue '.cluster.queue_settings | to_entries[] | select(.key==$queue).value.compute_resource_settings | to_entries[]| .value.instance_type')
38 |
39 | compute_node_h_price=$(aws pricing get-products \
40 | --region us-east-1 \
41 | --service-code AmazonEC2 \
42 | --filters 'Type=TERM_MATCH,Field=instanceType,Value='$instance_type \
43 | 'Type=TERM_MATCH,Field=location,Value='"${aws_region_long_name}" \
44 | 'Type=TERM_MATCH,Field=preInstalledSw,Value=NA' \
45 | 'Type=TERM_MATCH,Field=operatingSystem,Value=Linux' \
46 | 'Type=TERM_MATCH,Field=tenancy,Value=Shared' \
47 | 'Type=TERM_MATCH,Field=capacitystatus,Value=UnusedCapacityReservation' \
48 | --output text \
49 | --query 'PriceList' \
50 | | jq -r '.terms.OnDemand | to_entries[] | .value.priceDimensions | to_entries[] | .value.pricePerUnit.USD')
51 |
52 | ebs_cost_gb_month=$(aws --region us-east-1 pricing get-products \
53 | --service-code AmazonEC2 \
54 | --query 'PriceList' \
55 | --output text \
56 | --filters 'Type=TERM_MATCH,Field=location,Value='"${aws_region_long_name}" \
57 | 'Type=TERM_MATCH,Field=productFamily,Value=Storage' \
58 | 'Type=TERM_MATCH,Field=volumeApiName,Value=gp2' \
59 | | jq -r '.terms.OnDemand | to_entries[] | .value.priceDimensions | to_entries[] | .value.pricePerUnit.USD')
60 |
61 | total_num_compute_nodes=$(/opt/slurm/bin/sinfo --noheader --partition=$queue | egrep -v "idle~" | awk '{sum += $4} END {if (sum) print sum; else print 0; }')
62 |
63 | ebs_volume_size=$(aws cloudformation describe-stacks --stack-name $stack_name --region $cfn_region | jq -r '.Stacks[0].Parameters | map(select(.ParameterKey == "ComputeRootVolumeSize"))[0].ParameterValue')
64 | compute_ebs_volume_cost=$(echo "scale=2; $ebs_cost_gb_month * $total_num_compute_nodes * $ebs_volume_size / 720" | bc)
65 | compute_nodes_cost=$(echo "scale=2; $total_num_compute_nodes * $compute_node_h_price" | bc)
66 |
67 | compute_nodes_total_cost=$(echo "scale=2; $compute_nodes_total_cost + $compute_nodes_cost" | bc)
68 |
69 | done
70 |
71 | echo "ebs_compute_cost $compute_ebs_volume_cost" | curl --data-binary @- http://127.0.0.1:9091/metrics/job/cost
72 | echo "compute_nodes_cost $compute_nodes_total_cost" | curl --data-binary @- http://127.0.0.1:9091/metrics/job/cost
--------------------------------------------------------------------------------
/monitoring/custom-metrics/aws-region.py:
--------------------------------------------------------------------------------
1 | #
2 | #
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | #
7 | import json
8 | import sys
9 |
10 | from pkg_resources import resource_filename
11 |
12 | region = str(sys.argv[1])
13 |
14 | name = None
15 | endpoint_file = resource_filename('botocore', 'data/endpoints.json')
16 | with open(endpoint_file, 'r') as ep_file:
17 | data = json.load(ep_file)
18 | for partition in data['partitions']:
19 | if region in partition['regions']:
20 | name = partition['regions'][region]['description']
21 | break
22 |
23 | print(name)
--------------------------------------------------------------------------------
/monitoring/docker-compose/docker-compose.compute.gpu.yml:
--------------------------------------------------------------------------------
1 | #
2 | #
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | #
7 | version: '3.8'
8 | services:
9 | prometheus-node-exporter:
10 | container_name: node-exporter
11 | network_mode: host
12 | pid: host
13 | restart: unless-stopped
14 | volumes:
15 | - '/:/host:ro,rslave'
16 | image: quay.io/prometheus/node-exporter
17 | command:
18 | - '--path.rootfs=/host'
19 | dcgm-exporter:
20 | container_name: nvidia-dcgm
21 | network_mode: host
22 | pid: host
23 | restart: unless-stopped
24 | image: nvidia/dcgm-exporter
25 | runtime: nvidia
26 | environment:
27 | - NVIDIA_VISIBLE_DEVICES=all
28 | - NVIDIA_DRIVER_CAPABILITIES=all
--------------------------------------------------------------------------------
/monitoring/docker-compose/docker-compose.compute.yml:
--------------------------------------------------------------------------------
1 | #
2 | #
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | #
7 | version: '3.8'
8 | services:
9 | prometheus-node-exporter:
10 | container_name: node-exporter
11 | network_mode: host
12 | pid: host
13 | restart: unless-stopped
14 | volumes:
15 | - '/:/host:ro,rslave'
16 | image: quay.io/prometheus/node-exporter
17 | command:
18 | - '--path.rootfs=/host'
--------------------------------------------------------------------------------
/monitoring/docker-compose/docker-compose.headnode.yml:
--------------------------------------------------------------------------------
1 | #
2 | #
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | #
7 | version: '3.8'
8 | services:
9 | pushgateway:
10 | container_name: pushgateway
11 | network_mode: host
12 | pid: host
13 | restart: unless-stopped
14 | image: prom/pushgateway
15 | prometheus:
16 | container_name: prometheus
17 | network_mode: host
18 | pid: host
19 | restart: unless-stopped
20 | volumes:
21 | - '__MONITORING_DIR__/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml'
22 | - 'prometheus-data:/prometheus'
23 | image: prom/prometheus
24 | command:
25 | - '--config.file=/etc/prometheus/prometheus.yml'
26 | - '--storage.tsdb.path=/prometheus'
27 | - '--web.console.libraries=/usr/share/prometheus/console_libraries'
28 | - '--web.console.templates=/usr/share/prometheus/consoles'
29 | - '--web.external-url=/prometheus/'
30 | - '--web.route-prefix=/'
31 | grafana:
32 | container_name: grafana
33 | network_mode: host
34 | pid: host
35 | restart: unless-stopped
36 | environment:
37 | - 'GF_SECURITY_ADMIN_PASSWORD=__GRAFANA_PASSWORD__'
38 | - 'GF_SERVER_ROOT_URL=http://%(domain)s/grafana/'
39 | - 'GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH=/etc/grafana/provisioning/dashboards/ParallelCluster.json'
40 | volumes:
41 | - '__MONITORING_DIR__/grafana:/etc/grafana/provisioning'
42 | - 'grafana-data:/var/lib/grafana'
43 | image: grafana/grafana
44 | prometheus-node-exporter:
45 | container_name: node-exporter
46 | network_mode: host
47 | pid: host
48 | restart: unless-stopped
49 | volumes:
50 | - '/:/host:ro,rslave'
51 | image: quay.io/prometheus/node-exporter
52 | command:
53 | - '--path.rootfs=/host'
54 | nginx:
55 | container_name: nginx
56 | network_mode: host
57 | pid: host
58 | restart: unless-stopped
59 | volumes:
60 | - '__MONITORING_DIR__/nginx/conf.d:/etc/nginx/conf.d/'
61 | - '__MONITORING_DIR__/nginx/ssl:/etc/ssl/'
62 | - '__MONITORING_DIR__/www:/usr/share/nginx/html'
63 | image: nginx
64 | volumes:
65 | prometheus-data:
66 | grafana-data:
--------------------------------------------------------------------------------
/monitoring/grafana/dashboards/compute-node-list.json:
--------------------------------------------------------------------------------
1 | {
2 | "annotations": {
3 | "list": [
4 | {
5 | "builtIn": 1,
6 | "datasource": "-- Grafana --",
7 | "enable": true,
8 | "hide": true,
9 | "iconColor": "rgba(0, 211, 255, 1)",
10 | "name": "Annotations & Alerts",
11 | "type": "dashboard"
12 | }
13 | ]
14 | },
15 | "editable": true,
16 | "gnetId": null,
17 | "graphTooltip": 0,
18 | "iteration": 1592242343557,
19 | "links": [
20 | {
21 | "$$hashKey": "object:53",
22 | "icon": "external link",
23 | "tags": [],
24 | "type": "dashboards"
25 | }
26 | ],
27 | "panels": [
28 | {
29 | "columns": [],
30 | "datasource": null,
31 | "fieldConfig": {
32 | "defaults": {
33 | "custom": {}
34 | },
35 | "overrides": []
36 | },
37 | "fontSize": "100%",
38 | "gridPos": {
39 | "h": 24,
40 | "w": 9,
41 | "x": 0,
42 | "y": 0
43 | },
44 | "id": 2,
45 | "pageSize": null,
46 | "showHeader": true,
47 | "sort": {
48 | "col": 2,
49 | "desc": true
50 | },
51 | "styles": [
52 | {
53 | "alias": "Time",
54 | "align": "auto",
55 | "dateFormat": "YYYY-MM-DD HH:mm:ss",
56 | "pattern": "Time",
57 | "type": "hidden"
58 | },
59 | {
60 | "alias": "Availability Zone",
61 | "align": "left",
62 | "colorMode": null,
63 | "colors": [
64 | "rgba(245, 54, 54, 0.9)",
65 | "rgba(237, 129, 40, 0.89)",
66 | "rgba(50, 172, 45, 0.97)"
67 | ],
68 | "decimals": 2,
69 | "pattern": "instance_az",
70 | "thresholds": [],
71 | "type": "number",
72 | "unit": "short"
73 | },
74 | {
75 | "alias": "Instance Id",
76 | "align": "auto",
77 | "colorMode": null,
78 | "colors": [
79 | "rgba(245, 54, 54, 0.9)",
80 | "rgba(237, 129, 40, 0.89)",
81 | "rgba(50, 172, 45, 0.97)"
82 | ],
83 | "dateFormat": "YYYY-MM-DD HH:mm:ss",
84 | "decimals": 2,
85 | "link": true,
86 | "linkTargetBlank": true,
87 | "linkTooltip": "Go To Node Details",
88 | "linkUrl": "/grafana/d/qI8VfvXZz/node-details-copy?var-instance_id=${__cell}",
89 | "mappingType": 1,
90 | "pattern": "instance_id",
91 | "thresholds": [],
92 | "type": "number",
93 | "unit": "short"
94 | },
95 | {
96 | "alias": "Instance Type",
97 | "align": "auto",
98 | "colorMode": null,
99 | "colors": [
100 | "rgba(245, 54, 54, 0.9)",
101 | "rgba(237, 129, 40, 0.89)",
102 | "rgba(50, 172, 45, 0.97)"
103 | ],
104 | "dateFormat": "YYYY-MM-DD HH:mm:ss",
105 | "decimals": 2,
106 | "mappingType": 1,
107 | "pattern": "instance_type",
108 | "thresholds": [],
109 | "type": "number",
110 | "unit": "short"
111 | },
112 | {
113 | "alias": "CPU load",
114 | "align": "auto",
115 | "colorMode": null,
116 | "colors": [
117 | "rgba(245, 54, 54, 0.9)",
118 | "rgba(237, 129, 40, 0.89)",
119 | "rgba(50, 172, 45, 0.97)"
120 | ],
121 | "dateFormat": "YYYY-MM-DD HH:mm:ss",
122 | "decimals": 2,
123 | "mappingType": 1,
124 | "pattern": "Value #A",
125 | "thresholds": [],
126 | "type": "number",
127 | "unit": "short"
128 | },
129 | {
130 | "alias": "Transmit Rate",
131 | "align": "auto",
132 | "colorMode": null,
133 | "colors": [
134 | "rgba(245, 54, 54, 0.9)",
135 | "rgba(237, 129, 40, 0.89)",
136 | "rgba(50, 172, 45, 0.97)"
137 | ],
138 | "dateFormat": "YYYY-MM-DD HH:mm:ss",
139 | "decimals": 2,
140 | "mappingType": 1,
141 | "pattern": "Value #B",
142 | "thresholds": [],
143 | "type": "number",
144 | "unit": "Bps"
145 | },
146 | {
147 | "alias": "Receive Rate",
148 | "align": "auto",
149 | "colorMode": null,
150 | "colors": [
151 | "rgba(245, 54, 54, 0.9)",
152 | "rgba(237, 129, 40, 0.89)",
153 | "rgba(50, 172, 45, 0.97)"
154 | ],
155 | "dateFormat": "YYYY-MM-DD HH:mm:ss",
156 | "decimals": 2,
157 | "mappingType": 1,
158 | "pattern": "Value #C",
159 | "thresholds": [],
160 | "type": "number",
161 | "unit": "Bps"
162 | }
163 | ],
164 | "targets": [
165 | {
166 | "expr": "sum(rate(node_cpu_seconds_total{mode!=\"idle\"}[1m])) by (instance_id, instance_type, instance_az)",
167 | "format": "table",
168 | "instant": true,
169 | "legendFormat": "",
170 | "refId": "A"
171 | },
172 | {
173 | "expr": "sum(rate(node_network_transmit_bytes_total[1m])) by (instance_id, instance_type, instance_az)",
174 | "format": "table",
175 | "instant": true,
176 | "refId": "B"
177 | },
178 | {
179 | "expr": "sum(rate(node_network_receive_bytes_total[1m])) by (instance_id, instance_type, instance_az)",
180 | "format": "table",
181 | "instant": true,
182 | "refId": "C"
183 | }
184 | ],
185 | "timeFrom": null,
186 | "timeShift": null,
187 | "title": "All Available Nodes",
188 | "transform": "table",
189 | "type": "table-old"
190 | }
191 | ],
192 | "schemaVersion": 25,
193 | "style": "dark",
194 | "tags": [],
195 | "templating": {
196 | "list": [
197 | {
198 | "datasource": null,
199 | "filters": [
200 | {
201 | "condition": "",
202 | "key": "instance_id",
203 | "operator": "!=",
204 | "value": "__INSTANCE_ID__"
205 | }
206 | ],
207 | "hide": 2,
208 | "label": "",
209 | "name": "Filters",
210 | "skipUrlSync": false,
211 | "type": "adhoc"
212 | }
213 | ]
214 | },
215 | "time": {
216 | "from": "now-15m",
217 | "to": "now"
218 | },
219 | "timepicker": {
220 | "refresh_intervals": [
221 | "10s",
222 | "30s",
223 | "1m",
224 | "5m",
225 | "15m",
226 | "30m",
227 | "1h",
228 | "2h",
229 | "1d"
230 | ]
231 | },
232 | "timezone": "",
233 | "title": "Compute Node List",
234 | "uid": "SugNQvuWk",
235 | "version": 1
236 | }
--------------------------------------------------------------------------------
/monitoring/grafana/dashboards/dashboards.yml:
--------------------------------------------------------------------------------
1 | apiVersion: 1
2 | providers:
3 | - name: "Dashboards"
4 | orgId: 1
5 | folder: ""
6 | type: file
7 | disableDeletion: false
8 | editable: true
9 | options:
10 | path: /etc/grafana/provisioning/dashboards
11 |
--------------------------------------------------------------------------------
/monitoring/grafana/datasources/datasource.yml:
--------------------------------------------------------------------------------
1 | #
2 | #
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | #
7 | apiVersion: 1
8 | datasources:
9 | - name: prometheus
10 | type: prometheus
11 | access: proxy
12 | orgId: 1
13 | version: 1
14 | url: http://localhost:9090
15 | isDefault: true
16 | editable: true
17 | jsonData:
18 | timeInterval: 10s
19 | - name: cloudwatch
20 | type: cloudwatch
21 | orgId: 1
22 | version: 1
23 | editable: true
24 | jsonData:
25 | authType: default
26 | defaultRegion: us-east
27 |
--------------------------------------------------------------------------------
/monitoring/nginx/conf.d/nginx.conf:
--------------------------------------------------------------------------------
1 | server {
2 | listen 80 default_server;
3 | listen [::]:80 default_server;
4 | server_name _;
5 | server_tokens off;
6 |
7 | location /grafana/ {
8 | proxy_set_header Host $http_host;
9 | proxy_pass http://localhost:3000/;
10 | }
11 |
12 | location /prometheus/ {
13 | proxy_pass http://localhost:9090/;
14 | }
15 |
16 | location /pushgateway/ {
17 | proxy_pass http://localhost:9091/;
18 | }
19 |
20 | location /slurmexporter/ {
21 | proxy_pass http://localhost:8081/;
22 | }
23 | }
--------------------------------------------------------------------------------
/monitoring/prometheus-slurm-exporter/slurm_exporter.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Prometheus SLURM Exporter
3 |
4 | [Service]
5 | Environment=PATH=/opt/slurm/bin:$PATH
6 | ExecStart=/usr/bin/prometheus-slurm-exporter -listen-address 0.0.0.0:8081
7 | Restart=on-failure
8 | RestartSec=15
9 | Type=simple
10 |
11 |
12 | [Install]
13 | WantedBy=multi-user.target
--------------------------------------------------------------------------------
/monitoring/prometheus/prometheus.yml:
--------------------------------------------------------------------------------
1 | #
2 | #
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | # SPDX-License-Identifier: MIT-0
5 | #
6 | #
7 | global:
8 | scrape_interval: 15s
9 | evaluation_interval: 15s
10 | scrape_timeout: 15s
11 |
12 | scrape_configs:
13 | - job_name: 'slurm_exporter'
14 | scrape_interval: 30s
15 | scrape_timeout: 30s
16 | static_configs:
17 | - targets: ['localhost:8081']
18 | - job_name: 'pushgateway'
19 | honor_labels: true
20 | static_configs:
21 | - targets: ['localhost:9091']
22 | - job_name: 'prometheus_server'
23 | scrape_interval: 5s
24 | static_configs:
25 | - targets: ['localhost:9090']
26 | - job_name: 'ec2_instances'
27 | scrape_interval: 5s
28 | ec2_sd_configs:
29 | - port: 9100
30 | region: __AWS_REGION__
31 | refresh_interval: 10s
32 | - port: 9400
33 | region: __AWS_REGION__
34 | refresh_interval: 10s
35 | filters:
36 | - name: instance-type
37 | values:
38 | - p2.xlarge
39 | - p2.8xlarge
40 | - p2.16xlarge
41 | - p3.2xlarge
42 | - p3.8xlarge
43 | - p3.16xlarge
44 | - p3dn.24xlarge
45 | - p4d.24xlarge
46 | - g3s.xlarge
47 | - g3.4xlarge
48 | - g3.8xlarge
49 | - g3.16xlarge
50 | - g4dn.xlarge
51 | - g4dn.2xlarge
52 | - g4dn.4xlarge
53 | - g4dn.8xlarge
54 | - g4dn.16xlarge
55 | - g4dn.12xlarge
56 | - g4dn.metal
57 | relabel_configs:
58 | - source_labels: [__meta_ec2_tag_Name]
59 | target_label: instance_name
60 | - source_labels: [__meta_ec2_tag_parallelcluster_cluster_name]
61 | target_label: instance_grafana
62 | regex: __Application__
63 | action: keep
64 | - source_labels: [__meta_ec2_instance_id]
65 | target_label: instance_id
66 | - source_labels: [__meta_ec2_availability_zone]
67 | target_label: instance_az
68 | - source_labels: [__meta_ec2_instance_state]
69 | regex: running
70 | action: keep
71 | target_label: instance_state
72 | - source_labels: [__meta_ec2_instance_type]
73 | target_label: instance_type
74 | - source_labels: [__meta_ec2_vpc_id]
75 | target_label: instance_vpc
--------------------------------------------------------------------------------
/monitoring/www/aws-logo.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/monitoring/www/background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/1click-hpc/390dfe1e4c131f135cdd8b5f853371e763cd8877/monitoring/www/background.png
--------------------------------------------------------------------------------
/monitoring/www/index.html:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 | AWS ParallelCluster
9 |
73 |
74 |
75 |
76 |
77 |
1Click-HPC
78 |
1Click-HPC is an open-source project that aims at speeding up the deployment of an HPC Cluster on AWS. You can have a fully functional and ready to use HPC cluster in minutes and with just 1-Click.
79 |
1Click-HPC source code and get started guide can be found .
80 |
It leverages on AWS supported services and projects, like:
81 |
82 |
83 |
AWS ParallelCluster
84 |
an AWS-supported open source cluster management tool that makes it easy for you to deploy and manage High Performance Computing (HPC) clusters on AWS.
85 |
86 |
NICE DCV
87 |
a high-performance remote display protocol that provides customers with a secure way to deliver remote desktops and application streaming from any cloud or data center to any device, over varying network conditions.
88 |
NICE EnginFrame
89 |
is an advanced web front-end for accessing technical and scientific applications running on an HPC Cluster