├── docs ├── sdp-aks.png ├── sdp-aks-traffic.png ├── upgrade-kubernetes-cluster.md ├── upgrade-flux.md ├── velero-backup-routine.md ├── velero.md └── security.md ├── .gitattributes ├── .gitignore ├── arm-templates ├── dev │ ├── deploy-storage.parameters.json │ ├── deploy-arm.parameters.json │ ├── assign-aksroleassignment.parameters.json │ ├── deploy-aks.parameters.json │ └── deploy-gitlab-storage.parameters.json ├── prod │ ├── deploy-storage.parameters.json │ ├── deploy-arm.parameters.json │ ├── assign-aksroleassignment.parameters.json │ ├── deploy-aks.parameters.json │ └── deploy-gitlab-storage.parameters.json ├── classic-values │ └── deploy-vms.parameters.json ├── base │ ├── assign-aksroleassignment.json │ ├── deploy-storage.json │ ├── deploy-arm.json │ ├── deploy-gitlab-storage.json │ ├── deploy-aks.json │ └── deploy-k8s-infra.json ├── README.md └── classic │ ├── deploy-externalvms.json │ └── deploy-internalvms.json ├── manifests ├── namespaces.yaml └── storageclasses.yaml ├── .github └── workflows │ ├── Stop-AKS-dev.yml │ ├── Start-AKS-dev.yml │ ├── AKS-prod-command-runner.yml │ ├── Apply-AKS.yml │ └── Apply-Classic.yml ├── env.template ├── bootstrap.sh ├── pre-arm.sh ├── README.md ├── post-arm.sh └── LICENSE /docs/sdp-aks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/equinor/sdp-omnia/HEAD/docs/sdp-aks.png -------------------------------------------------------------------------------- /docs/sdp-aks-traffic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/equinor/sdp-omnia/HEAD/docs/sdp-aks-traffic.png -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | *.sh text eol=lf 3 | Dockerfile text -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .*secrets 2 | .envexternal-dns/azure.json 3 | .env 4 | azure.json 5 | acr.properties 6 | velero-credentials 7 | .idea 8 | knsk.sh 9 | cat 10 | .vscode/settings.json 11 | -------------------------------------------------------------------------------- /arm-templates/dev/deploy-storage.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /arm-templates/prod/deploy-storage.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /arm-templates/dev/deploy-arm.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "environment": { 6 | "value": "dev" 7 | } 8 | } 9 | } -------------------------------------------------------------------------------- /arm-templates/prod/deploy-arm.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "environment": { 6 | "value": "prod" 7 | } 8 | } 9 | } -------------------------------------------------------------------------------- /manifests/namespaces.yaml: -------------------------------------------------------------------------------- 1 | # Secrets and certain resources created by post-arm template such as Flux require that namespaces are created from this repo. 2 | 3 | --- 4 | apiVersion: v1 5 | kind: Namespace 6 | metadata: 7 | name: external-dns 8 | --- 9 | apiVersion: v1 10 | kind: Namespace 11 | metadata: 12 | name: sealed-secrets 13 | --- 14 | apiVersion: v1 15 | kind: Namespace 16 | metadata: 17 | name: flux 18 | --- 19 | apiVersion: v1 20 | kind: Namespace 21 | metadata: 22 | name: velero 23 | --- 24 | apiVersion: v1 25 | kind: Namespace 26 | metadata: 27 | name: gitlab -------------------------------------------------------------------------------- /arm-templates/dev/assign-aksroleassignment.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "environment": { 6 | "value": "dev" 7 | }, 8 | "principalId": { 9 | "reference": { 10 | "keyVault": { 11 | "id": "/subscriptions/b18da12e-efa1-4642-8fec-b6580b00212c/resourceGroups/k8s-infrastructure/providers/Microsoft.KeyVault/vaults/SDPVault" 12 | }, 13 | "secretName": "sdpaks-common-velero-sp-object-id" 14 | } 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /arm-templates/prod/assign-aksroleassignment.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "environment": { 6 | "value": "prod" 7 | }, 8 | "principalId": { 9 | "reference": { 10 | "keyVault": { 11 | "id": "/subscriptions/b18da12e-efa1-4642-8fec-b6580b00212c/resourceGroups/k8s-infrastructure/providers/Microsoft.KeyVault/vaults/SDPVault" 12 | }, 13 | "secretName": "sdpaks-common-velero-sp-object-id" 14 | } 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /.github/workflows/Stop-AKS-dev.yml: -------------------------------------------------------------------------------- 1 | name: Stop AKS dev cluster 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | # * is a special character in YAML so you have to quote this string 7 | - cron: "30 17 * * 1-5" 8 | 9 | jobs: 10 | stop-dev-cluster: 11 | name: Job 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: "Set env variables" 15 | run: | 16 | echo "AZCLIVERSION=2.33.1" >> $GITHUB_ENV 17 | - name: "az login" 18 | uses: azure/login@v1 19 | with: 20 | creds: ${{ secrets.AZURE_CREDENTIALS }} 21 | - name: "az AKS stop cluster" 22 | uses: azure/CLI@v1 23 | with: 24 | azcliversion: ${{ env.AZCLIVERSION }} 25 | inlineScript: | 26 | az aks stop --name sdpaks-dev-k8s --resource-group sdpaks-dev 27 | -------------------------------------------------------------------------------- /.github/workflows/Start-AKS-dev.yml: -------------------------------------------------------------------------------- 1 | name: Start AKS dev cluster 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | # * is a special character in YAML so you have to quote this string 7 | - cron: "0 5 * * 1-5" 8 | 9 | jobs: 10 | start-dev-cluster: 11 | name: Job 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: "Set env variables" 15 | run: | 16 | echo "AZCLIVERSION=2.33.1" >> $GITHUB_ENV 17 | - name: "az login" 18 | uses: azure/login@v1 19 | with: 20 | creds: ${{ secrets.AZURE_CREDENTIALS }} 21 | - name: "az AKS start cluster" 22 | uses: azure/CLI@v1 23 | with: 24 | azcliversion: ${{ env.AZCLIVERSION }} 25 | inlineScript: | 26 | az aks start --name sdpaks-dev-k8s --resource-group sdpaks-dev 27 | -------------------------------------------------------------------------------- /arm-templates/prod/deploy-aks.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "vnetName": { 6 | "value": "sdpaks-prod-vnet" 7 | }, 8 | "subnetRange":{ 9 | "value": "10.0.0.0/16" 10 | }, 11 | "subnetRangePsql":{ 12 | "value": "10.2.0.0/20" 13 | }, 14 | "vnetRange":{ 15 | "value": ["10.0.0.0/16", "10.2.0.0/16"] 16 | }, 17 | "serviceCidr":{ 18 | "value": "10.1.0.0/16" 19 | }, 20 | "dnsServiceIp":{ 21 | "value": "10.1.0.10" 22 | }, 23 | "maxNodes": { 24 | "value": 6 25 | }, 26 | "kubernetesVersion":{ 27 | "value": "1.22.6" 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /arm-templates/dev/deploy-aks.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "vnetName": { 6 | "value": "sdpaks-dev-vnet" 7 | }, 8 | "subnetRange":{ 9 | "value": "10.100.0.0/16" 10 | }, 11 | "subnetRangePsql":{ 12 | "value": "10.99.0.0/20" 13 | }, 14 | "vnetRange":{ 15 | "value": ["10.99.0.0/16", "10.100.0.0/16"] 16 | }, 17 | "serviceCidr":{ 18 | "value": "10.101.0.0/16" 19 | }, 20 | "dnsServiceIp":{ 21 | "value": "10.101.0.10" 22 | }, 23 | "minNodes":{ 24 | "value": 2 25 | }, 26 | "maxNodes":{ 27 | "value": 3 28 | }, 29 | "kubernetesVersion":{ 30 | "value": "1.22.6" 31 | }, 32 | "enableAutoscaler":{ 33 | "value": true 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /manifests/storageclasses.yaml: -------------------------------------------------------------------------------- 1 | kind: StorageClass 2 | apiVersion: storage.k8s.io/v1 3 | metadata: 4 | name: managed-premium-retain-nocache 5 | provisioner: kubernetes.io/azure-disk 6 | reclaimPolicy: Retain 7 | parameters: 8 | storageaccounttype: Premium_LRS 9 | kind: Managed 10 | cachingmode: None 11 | --- 12 | kind: StorageClass 13 | apiVersion: storage.k8s.io/v1 14 | metadata: 15 | name: managed-premium-retain-expandable 16 | provisioner: kubernetes.io/azure-disk 17 | allowVolumeExpansion: true 18 | reclaimPolicy: Retain 19 | parameters: 20 | storageaccounttype: Premium_LRS 21 | kind: Managed 22 | cachingmode: None 23 | --- 24 | kind: StorageClass 25 | apiVersion: storage.k8s.io/v1 26 | metadata: 27 | name: azurefile 28 | provisioner: kubernetes.io/azure-file 29 | mountOptions: 30 | - dir_mode=0777 31 | - file_mode=0777 32 | - uid=1000 33 | - gid=1000 34 | - mfsymlinks 35 | - nobrl 36 | - cache=none 37 | parameters: 38 | skuName: Premium_LRS 39 | -------------------------------------------------------------------------------- /env.template: -------------------------------------------------------------------------------- 1 | # Azure subscription to use, for the SDP Tools team this typically is SDP Tools 2 | AZ_SUBSCRIPTION="S942-SDP-Tools" 3 | # Environment - should match subdomain, parameter folder, flux branch and flux folder 4 | ENVIRONMENT=prod 5 | # Prefix - Used for app registrations. Use "dev." for dev environment. Leave blank for prod(xxx.sdpaks.equinor.com) 6 | PREFIX= 7 | # Repository used for GitOps 8 | FLUX_GITOPS_REPO=git@github.com:Equinor/sdp-flux.git 9 | # PSQL details for Gitlabs external storage, only change locally. (Blank entries are OK if the secrets already exist) 10 | PSQL_USERNAME= 11 | PSQL_PASSWORD= 12 | 13 | # 14 | # Do not change below 15 | # 16 | 17 | # Location 18 | AZ_LOCATION=norwayeast 19 | # Main resource group where aks will be deployed 20 | AZ_GROUP="sdpaks-${ENVIRONMENT}" 21 | # Branch to be used in GitOps 22 | FLUX_GITOPS_BRANCH="$ENVIRONMENT" 23 | # kustomize path for override folder i.e. production for production, development for develop 24 | FLUX_GITOPS_PATH="$ENVIRONMENT" 25 | -------------------------------------------------------------------------------- /docs/upgrade-kubernetes-cluster.md: -------------------------------------------------------------------------------- 1 | # Upgrade Kubernetes cluster 2 | 3 | If you need to upgrade the cluster these are the minimal steps needed. Make sure your cluster has at least 2 nodes before upgrading. If not, the applications running will not have any chance of starting on another node while one is beeing upgraded. 4 | 5 | - Then find the upgrade path, make sure to upgrade to the highest possible version from the version you currently use. 6 | `az aks get-versions --location $AZ_LOCATION --output table` 7 | 8 | - Version should be upgraded by updating the arm-templates/base/deploy-aks template. For minor version upgrades you should always test the new version in dev first. 9 | If the pipeline is struggling for some reason (Give it time, upgrade can take about an hour), then you may try to upgrade by using the az CLI. 10 | 11 | - To run cluster upgrade 'manually', do the following. 12 | `az aks upgrade --name $AZ_AKS_NAME --kubernetes-version VERSION` 13 | 14 | - FluxCD should also be upgraded to match supported kubernetes versions. See 15 | -------------------------------------------------------------------------------- /arm-templates/classic-values/deploy-vms.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "location": { 6 | "value": "Norway East" 7 | }, 8 | "adminUsername": { 9 | "value": "sdpteam" 10 | }, 11 | "vmNamePrefix": { 12 | "value": "vm" 13 | }, 14 | "vnetName": { 15 | "value": "S066-NOE-vnet" 16 | }, 17 | "subnetName": { 18 | "value": "S066-NOE-subnet" 19 | }, 20 | "netRgName": { 21 | "value": "S066-NOE-network" 22 | }, 23 | "sshPublicKeys": { 24 | "reference": { 25 | "keyVault": { 26 | "id": "/subscriptions/47dd9472-aaea-401b-add5-55fccfe63434/resourceGroups/sdp-infrastructure/providers/Microsoft.KeyVault/vaults/sdp-vault" 27 | }, 28 | "secretName": "sdp-team-pub-sshkeys" 29 | } 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /.github/workflows/AKS-prod-command-runner.yml: -------------------------------------------------------------------------------- 1 | # The point of this file is to run commands which should ideally be run outside work hours, without the need of being in place ourselves 2 | # Disable the cron once done. 3 | 4 | name: Run AKS command on prod 5 | 6 | on: 7 | workflow_dispatch: 8 | schedule: 9 | # * is a special character in YAML so you have to quote this string 10 | - cron: "15 7 6 3 *" #Set specific yearly date in https://crontab.guru/ 11 | 12 | jobs: 13 | run-aks-command: 14 | name: Job 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: "Set env variables" 18 | run: | 19 | echo "AZCLIVERSION=2.33.1" >> $GITHUB_ENV 20 | - name: "az login" 21 | uses: azure/login@v1 22 | with: 23 | creds: ${{ secrets.AZURE_CREDENTIALS }} 24 | - name: "Run az aks commands" 25 | uses: azure/CLI@v1 26 | with: 27 | azcliversion: ${{ env.AZCLIVERSION }} 28 | inlineScript: | 29 | az aks upgrade --resource-group sdpaks-prod --name sdpaks-prod-k8s --kubernetes-version "1.19.7 -y" 30 | -------------------------------------------------------------------------------- /arm-templates/dev/deploy-gitlab-storage.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "storageAccountName": { 6 | "value": "sdpaksdevminio" 7 | }, 8 | "administratorLogin": { 9 | "reference": { 10 | "keyVault": { 11 | "id": "/subscriptions/b18da12e-efa1-4642-8fec-b6580b00212c/resourceGroups/k8s-infrastructure/providers/Microsoft.KeyVault/vaults/SDPVault" 12 | }, 13 | "secretName": "sdpaks-dev-psql-username" 14 | } 15 | }, 16 | "administratorLoginPassword": { 17 | "reference": { 18 | "keyVault": { 19 | "id": "/subscriptions/b18da12e-efa1-4642-8fec-b6580b00212c/resourceGroups/k8s-infrastructure/providers/Microsoft.KeyVault/vaults/SDPVault" 20 | }, 21 | "secretName": "sdpaks-dev-psql-password" 22 | } 23 | }, 24 | "dbName": { 25 | "value": "sdpaks-dev-gitlab-psql13" 26 | }, 27 | "environment": { 28 | "value": "dev" 29 | }, 30 | "tags": { 31 | "value": { 32 | "app": "gitlab", 33 | "cluster": "dev" 34 | } 35 | }, 36 | "backupRetention": { 37 | "value": 7 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /docs/upgrade-flux.md: -------------------------------------------------------------------------------- 1 | # Upgrade Flux 2 | 3 | Per Aug. 2020 - Flux is now in two separate helm releases - flux and helm-operator 4 | 5 | Use the following commands to upgrade the Flux installed in cluster. Remember to upgrade fluxctl to latest version in case of breaking changes 6 | ``` 7 | # Get latest variables 8 | source .env 9 | 10 | # Upgrade to latest helm charts 11 | helm repo update 12 | 13 | # Upgrade flux in place with the same values/settings 14 | helm upgrade flux --reuse-values fluxcd/flux 15 | Remember to upgrade the content in post-arm.sh file. 16 | ``` 17 | 18 | If Flux fails to upgrade you need to remove Flux and install it again. But be aware that if you just try `helm delete flux` you __WILL__ get all your HelmReleases removed from the cluster. This is considered as bad. The procedure is therefore as follows. 19 | ``` 20 | # Remove the operator that is responsible for Flux Helmreleases 21 | kubectl -n flux delete deployment/flux-helm-operator 22 | 23 | # Remove Flux 24 | helm delete --purge flux 25 | 26 | # Run the post-arm bootstrap script again to install with correct values 27 | ./post-arm.sh 28 | ``` 29 | 30 | You might need to add your deployment key to the git repository. Ideally the same public key should be used, some attempts earlier have found that only the private key is re-used. -------------------------------------------------------------------------------- /arm-templates/prod/deploy-gitlab-storage.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "storageAccountName": { 6 | "value": "sdpaksprodminio" 7 | }, 8 | "replication": { 9 | "value": "Standard_GRS" 10 | }, 11 | "administratorLogin": { 12 | "reference": { 13 | "keyVault": { 14 | "id": "/subscriptions/b18da12e-efa1-4642-8fec-b6580b00212c/resourceGroups/k8s-infrastructure/providers/Microsoft.KeyVault/vaults/SDPVault" 15 | }, 16 | "secretName": "sdpaks-prod-psql-username" 17 | } 18 | }, 19 | "administratorLoginPassword": { 20 | "reference": { 21 | "keyVault": { 22 | "id": "/subscriptions/b18da12e-efa1-4642-8fec-b6580b00212c/resourceGroups/k8s-infrastructure/providers/Microsoft.KeyVault/vaults/SDPVault" 23 | }, 24 | "secretName": "sdpaks-prod-psql-password" 25 | } 26 | }, 27 | "dbName": { 28 | "value": "sdpaks-prod-gitlab-psql12" 29 | }, 30 | "environment": { 31 | "value": "prod" 32 | }, 33 | "tags": { 34 | "value": { 35 | "app": "gitlab", 36 | "cluster": "prod" 37 | } 38 | }, 39 | "backupRetention": { 40 | "value": 35 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /arm-templates/base/assign-aksroleassignment.json: -------------------------------------------------------------------------------- 1 | // This role assignment is for the Velero SP to be able to take snapshots and re-create disks to the AKS cluster 2 | // Per Aug. 2020 - not included in Github Actions CI 3 | { 4 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 5 | "contentVersion": "1.0.0.0", 6 | "parameters": { 7 | "environment": { 8 | "type": "string", 9 | "metadata": { 10 | "description": "The name of the environment. E.g. 'dev' " 11 | } 12 | }, 13 | "principalId": { 14 | "metadata": { 15 | "description": "Object id of service principal." 16 | }, 17 | "type": "string" 18 | } 19 | }, 20 | "variables": { 21 | "rootRgName": "[concat('sdpaks-', parameters('environment'))]", 22 | "managedClusterName": "[concat(variables('rootRgName'), '-k8s')]", 23 | "contributor": "[concat('/subscriptions/', subscription().subscriptionId, '/providers/Microsoft.Authorization/roleDefinitions/', 'b24988ac-6180-42a0-ab88-20f7382dd24c')]" 24 | }, 25 | "resources": [ 26 | { 27 | "type": "Microsoft.Authorization/roleAssignments", 28 | "apiVersion": "2018-09-01-preview", 29 | "name": "[guid('MC_', variables('rootRgName'), '_', variables('managedClusterName'), '_norwayeast', parameters('principalId'),variables('contributor'))]", 30 | "properties": { 31 | "roleDefinitionId": "[variables('contributor')]", 32 | "principalId": "[parameters('principalId')]", 33 | "scope": "[concat('/subscriptions/b18da12e-efa1-4642-8fec-b6580b00212c/resourceGroups/MC_sdpaks-', parameters('environment'), '_sdpaks-', parameters('environment'), '-k8s_norwayeast')]" 34 | } 35 | } 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script bootstraps a kubernetes cluster on Azure (AKS) with helm, 3 | # It also configures the client to run kubectl commands on the cluster (.kube/config) 4 | # All variables should be defined in a file called ".env" 5 | 6 | source .env 7 | set -e 8 | 9 | # Check for prerequisites binaries 10 | echo 11 | echo " Check for neccesary executables" 12 | hash az || { echo "Error: Azure-CLI not found in PATH. Exiting..."; exit 1; } 13 | hash kubectl || { echo "Error: kubectl not found in PATH. Exiting..."; exit 1; } 14 | hash helm || { echo "Error: helm not found in PATH. Exiting..."; exit 1; } 15 | 16 | # Login to Azure if not already logged inn 17 | echo 18 | echo " Logging you in to Azure if not already logged in" 19 | az account show > /dev/null || az login > /dev/null 20 | 21 | # Set Azure-CLI config 22 | echo 23 | echo " Setting subscription (${AZ_SUBSCRIPTION})" 24 | az account set --subscription "$AZ_SUBSCRIPTION" > /dev/null 25 | 26 | 27 | # Run Pre-ARM to create needed service accounts 28 | source ./pre-arm.sh 29 | 30 | # Deploy Azure resources defined in arm-templates 31 | echo 32 | echo " Deploying arm templates with parameter file ./arm-templates/${ENVIRONMENT}/deploy-arm.parameters.json" 33 | echo " Applying templates found at https://raw.githubusercontent.com/equinor/sdp-omnia/ in ${ENVIRONMENT} branch" 34 | 35 | az deployment create --debug --name "$AZ_GROUP" --location "$AZ_LOCATION" --template-file ./arm-templates/base/deploy-arm.json --parameters @./arm-templates/${ENVIRONMENT}/deploy-arm.parameters.json > /dev/null 36 | 37 | echo 38 | echo " Set default resource group (${AZ_GROUP})" 39 | az configure --defaults group=$AZ_GROUP > /dev/null 40 | 41 | # Register the client in the kubernetes cluster and creates ~/.kube directory with keys and kubectl connection info 42 | echo 43 | echo " Getting Kubernetes cluster details" 44 | az aks get-credentials --name "${AZ_GROUP}-k8s" 45 | 46 | # Setup basic K8s services 47 | source ./post-arm.sh 48 | -------------------------------------------------------------------------------- /.github/workflows/Apply-AKS.yml: -------------------------------------------------------------------------------- 1 | # ARM template validate and apply. Should be synced and triggered on changes to the arm-template folder 2 | # For more information on GitHub Actions for Azure, refer to https://github.com/Azure/Actions 3 | # For more samples to get started with GitHub Action workflows to deploy to Azure, refer to https://github.com/Azure/actions-workflow-samples 4 | name: 'Apply AKS ARM template' 5 | on: 6 | workflow_dispatch: 7 | push: 8 | branches: 9 | - prod 10 | - dev 11 | paths: 12 | - 'arm-templates/base/*' 13 | - 'arm-templates/dev/*' 14 | - 'arm-templates/prod/*' 15 | jobs: 16 | # AKS deployments are made on a subscription scope 17 | deploy-aks-arm: 18 | name: Apply AKS ARM template 19 | runs-on: ubuntu-latest 20 | steps: 21 | - name: 'Set env variables' 22 | run: | 23 | echo "LOCATION=norwayeast" >> $GITHUB_ENV 24 | echo "AZCLIVERSION=2.30.0" >> $GITHUB_ENV 25 | echo "AKS=https://raw.githubusercontent.com/equinor/sdp-omnia/${GITHUB_REF##*/}/arm-templates/base/deploy-arm.json" >> $GITHUB_ENV 26 | echo "AKS_PARAMS=https://raw.githubusercontent.com/equinor/sdp-omnia/${GITHUB_REF##*/}/arm-templates/${GITHUB_REF##*/}/deploy-arm.parameters.json" >> $GITHUB_ENV 27 | - name: 'az login' 28 | uses: azure/login@v1 29 | with: 30 | # Paste output of `az ad sp create-for-rbac --name Apply-ARMTemplate --role "Contributor" --sdk-auth` as value of secret variable: AZURE_CREDENTIALS, add to Repo settings --> secrets 31 | creds: ${{ secrets.AZURE_CREDENTIALS }} 32 | 33 | - name: 'az AKS deployment what-if' 34 | uses: azure/CLI@v1 35 | with: 36 | azcliversion: ${{ env.AZCLIVERSION }} 37 | inlineScript: | 38 | az deployment sub what-if --location ${{ env.LOCATION }} --template-uri ${{ env.AKS }} --parameters ${{ env.AKS_PARAMS }} > /dev/null 39 | - if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' 40 | name: 'az AKS deployment create' 41 | uses: azure/CLI@v1 42 | with: 43 | azcliversion: ${{ env.AZCLIVERSION }} 44 | inlineScript: | 45 | az deployment sub create --location ${{ env.LOCATION }} --template-uri ${{ env.AKS }} --parameters ${{ env.AKS_PARAMS }} 46 | echo Succcessfully updated - ${GITHUB_REF##*/} cluster -------------------------------------------------------------------------------- /arm-templates/README.md: -------------------------------------------------------------------------------- 1 | # ARM templates 2 | 3 | ARM Templates are IaC for the Azure platform. They are idempotent. 4 | If you are used to scripts and are sceptical, know this: Any administrative task you do through the portal, CLI or REST API are just wrappers for abstracted ARM templates. Using ARM templates directly provide a declarative, idempotent and more granular control of your infrastructure. 5 | 6 | A detailed intro to IaC on Azure can be found here https://github.com/starkfell/100DaysOfIaC 7 | 8 | ## Set up 9 | 10 | In true GitOps fashion, arm templates should be synced regularly, and Dev and Prod should also be in sync. 11 | Put common values - e.g. default ARM-template in the /base folder. Put "diffs" - parameter files in /development or /production folders. 12 | 13 | ## Info and limitations 14 | 15 | Please note that ARM templates are not perfect. They do not contain state, for this you should use Terraform, which has its own limitations. Also, just as the CLI and portal, you cannot do illegal operations. E.g. decreasing the size of a VM in an AKS cluster "just because I can" in the arm-template. 16 | To see changes applied by an ARM template, see https://docs.microsoft.com/en-us/azure/azure-resource-manager/templates/template-deploy-what-if 17 | 18 | The ARM templates are grouped per resource group. The exception is "deploy-arm.json", which is the main template to be called from Github Actions. This template links to the other ARM templates, and has dependencies so everything should run smoothly from scratch to fully deployed cluster. 19 | 20 | ## Update infrastructure 21 | 22 | Github Actions are setup to deploy any changes to the ARM-templates from `prod` branch. 23 | Authentication is done using the `Apply-ARMTemplate` servicePrincipal. 24 | The templates are set to be none-destructable, meaning they can only create resources, not remove them. 25 | 26 | ## Developing ARM templates 27 | 28 | Recommended software to develop ARM templates: VScode + ARM extension + ARM Template Viewer extension 29 | Alternatively - Visual Studio --> create new ARM project (requires Azure extension) 30 | 31 | ### Testing 32 | 33 | To test your templates, run the `az deployment`-command with `validate` on the specific template 34 | 35 | ```bash 36 | az deployment group validate -g sdpaks-dev --template-file arm-templates/base/deploy-aks.json --parameters arm-templates/dev/deploy-aks.parameters.json --debug 37 | 38 | ``` 39 | -------------------------------------------------------------------------------- /arm-templates/base/deploy-storage.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "buckets": { 6 | "type": "array", 7 | "defaultValue": [ 8 | "prod", 9 | "dev" 10 | ] 11 | } 12 | }, 13 | "variables": { 14 | "vnetSubnetIdProd": "[concat('/subscriptions/b18da12e-efa1-4642-8fec-b6580b00212c/resourceGroups/', 'sdpaks-prod', '/providers/Microsoft.Network/virtualNetworks/', 'sdpaks-prod', '-vnet', '/subnets/aks-subnet')]", 15 | "vnetSubnetIdDev": "[concat('/subscriptions/b18da12e-efa1-4642-8fec-b6580b00212c/resourceGroups/', 'sdpaks-dev', '/providers/Microsoft.Network/virtualNetworks/', 'sdpaks-dev', '-vnet', '/subnets/aks-subnet')]", 16 | "storageAccountName": "sdpakscommonbackup", 17 | "contributor": "[concat('/subscriptions/', subscription().subscriptionId, '/providers/Microsoft.Authorization/roleDefinitions/', 'b24988ac-6180-42a0-ab88-20f7382dd24c')]" 18 | }, 19 | "resources": [ 20 | { 21 | "type": "Microsoft.Storage/storageAccounts", 22 | "apiVersion": "2019-04-01", 23 | "name": "[variables('storageAccountName')]", 24 | "location": "[resourceGroup().location]", 25 | "comments": "Common backup storage account for sdpaks", 26 | "sku": { 27 | "name": "Standard_LRS", 28 | "tier": "Standard" 29 | }, 30 | "kind": "StorageV2", 31 | "properties": { 32 | "networkAcls": { 33 | "bypass": "None", 34 | "defaultAction": "Deny", 35 | "virtualNetworkRules": [ 36 | { 37 | "id": "[variables('vnetSubnetIdProd')]", 38 | "action": "Allow" 39 | }, 40 | { 41 | "id": "[variables('vnetSubnetIdDev')]", 42 | "action": "Allow" 43 | } 44 | ] 45 | }, 46 | "supportsHttpsTrafficOnly": true, 47 | "accessTier": "Cool" 48 | }, 49 | "tags": { 50 | "cost": "backup", 51 | "cluster": "common" 52 | } 53 | }, 54 | { 55 | "name": "[concat(variables('storageAccountName'), '/default/', 'velero-', parameters('buckets')[copyIndex()], '-storage')]", 56 | "type": "Microsoft.Storage/storageAccounts/blobServices/containers", 57 | "comments": "Buckets under the selected storage account", 58 | "apiVersion": "2018-07-01", 59 | "dependsOn": [ 60 | "[variables('storageAccountName')]" 61 | ], 62 | "copy": { 63 | "name": "storagecopy", 64 | "count": "[length(parameters('buckets'))]" 65 | } 66 | } 67 | ], 68 | "outputs": { 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /.github/workflows/Apply-Classic.yml: -------------------------------------------------------------------------------- 1 | # ARM template validate and apply. Should be synced and triggered on changes to the arm-template folder 2 | # For more information on GitHub Actions for Azure, refer to https://github.com/Azure/Actions 3 | # For more samples to get started with GitHub Action workflows to deploy to Azure, refer to https://github.com/Azure/actions-workflow-samples 4 | name: "Apply Omnia classic ARM templates" 5 | on: 6 | workflow_dispatch: 7 | push: 8 | branches: 9 | - prod 10 | - dev 11 | paths: 12 | - "arm-templates/classic/*" 13 | - "arm-templates/classic-values/*" 14 | 15 | jobs: 16 | # Classic deployments are made on a group scope 17 | deploy-classic-external-arm: 18 | name: Apply Classic External ARM template 19 | runs-on: ubuntu-latest 20 | steps: 21 | - name: "Set env variables" 22 | run: | 23 | echo "LOCATION=norwayeast" >> $GITHUB_ENV 24 | echo "CLASSIC_SUB=S066-SDP-Tools-Classic" >> $GITHUB_ENV 25 | echo "EXTERNAL_GROUP=sdp-external-vms" >> $GITHUB_ENV 26 | echo "INTERNAL_GROUP=sdp-internal-vms" >> $GITHUB_ENV 27 | echo "AZCLIVERSION=2.33.1" >> $GITHUB_ENV 28 | echo "CLASSIC_EXTERNAL=https://raw.githubusercontent.com/equinor/sdp-omnia/${GITHUB_REF##*/}/arm-templates/classic/deploy-externalvms.json" >> $GITHUB_ENV 29 | echo "CLASSIC_INTERNAL=https://raw.githubusercontent.com/equinor/sdp-omnia/${GITHUB_REF##*/}/arm-templates/classic/deploy-internalvms.json" >> $GITHUB_ENV 30 | echo "CLASSIC_PARAMS=https://raw.githubusercontent.com/equinor/sdp-omnia/${GITHUB_REF##*/}/arm-templates/classic-values/deploy-vms.parameters.json" >> $GITHUB_ENV 31 | - name: "az login" 32 | uses: azure/login@v1 33 | with: 34 | creds: ${{ secrets.AZURE_CREDENTIALS }} 35 | 36 | - name: "az CLASSIC_EXTERNAL deployment what-if" 37 | uses: azure/CLI@v1 38 | with: 39 | azcliversion: ${{ env.AZCLIVERSION }} 40 | inlineScript: | 41 | az deployment group what-if --subscription ${{ env.CLASSIC_SUB }} --resource-group ${{ env.EXTERNAL_GROUP }} --template-uri ${{ env.CLASSIC_EXTERNAL }} --parameters ${{ env.CLASSIC_PARAMS }} > /dev/null 42 | 43 | - name: "az CLASSIC_EXTERNAL deployment create" 44 | uses: azure/CLI@v1 45 | with: 46 | azcliversion: ${{ env.AZCLIVERSION }} 47 | inlineScript: | 48 | az deployment group create --subscription ${{ env.CLASSIC_SUB }} --resource-group ${{ env.EXTERNAL_GROUP }} --template-uri ${{ env.CLASSIC_EXTERNAL }} --parameters ${{ env.CLASSIC_PARAMS }} 49 | 50 | deploy-classic-internal-arm: 51 | name: Apply Classic Internal ARM template 52 | runs-on: ubuntu-latest 53 | steps: 54 | - name: "az login" 55 | uses: azure/login@v1 56 | with: 57 | creds: ${{ secrets.AZURE_CREDENTIALS }} 58 | 59 | - name: "az CLASSIC_INTERNAL deployment what-if" 60 | uses: azure/CLI@v1 61 | with: 62 | azcliversion: ${{ env.AZCLIVERSION }} 63 | inlineScript: | 64 | az deployment group what-if --subscription ${{ env.CLASSIC_SUB }} --resource-group ${{ env.INTERNAL_GROUP }} --template-uri ${{ env.CLASSIC_INTERNAL }} --parameters ${{ env.CLASSIC_PARAMS }} > /dev/null 65 | 66 | - name: "az CLASSIC_INTERNAL deployment create" 67 | uses: azure/CLI@v1 68 | with: 69 | azcliversion: ${{ env.AZCLIVERSION }} 70 | inlineScript: | 71 | az deployment group create --subscription ${{ env.CLASSIC_SUB }} --resource-group ${{ env.INTERNAL_GROUP }} --template-uri ${{ env.CLASSIC_INTERNAL }} --parameters ${{ env.CLASSIC_PARAMS }} 72 | -------------------------------------------------------------------------------- /arm-templates/base/deploy-arm.json: -------------------------------------------------------------------------------- 1 | // This template is used as a wrapper to deploy SDP-aks resources and dependencies. 2 | // It ensures required resource groups are in place, then runs a deployment towards a RGs. 3 | // Service principals and keyvault secrets referred in these templates are created by pre-arm script. 4 | 5 | // Template loops are used to ensure dependencies are created in the correct order. 6 | 7 | { 8 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 9 | "contentVersion": "1.0.0.0", 10 | "parameters": { 11 | "environment": { 12 | "type": "string", 13 | "metadata": { 14 | "description": "The name of the environment. E.g. 'dev'" 15 | } 16 | } 17 | }, 18 | "variables": { 19 | "rootRgName": "[concat('sdpaks-', parameters('environment'))]", 20 | "location": "norwayeast", 21 | 22 | "templateBaseUrl": "[concat('https://raw.githubusercontent.com/equinor/sdp-omnia/', parameters('environment'), '/arm-templates/base/')]", 23 | "parametersBaseUrl": "[concat('https://raw.githubusercontent.com/equinor/sdp-omnia/', parameters('environment'), '/arm-templates/', parameters('environment'), '/')]", 24 | 25 | "templateLoop1": [ 26 | { 27 | "name": "aksTemplate", 28 | "rgName": "[variables('rootRgName')]", 29 | "templateUrl": "[concat(variables('templateBaseUrl'), 'deploy-aks.json')]", 30 | "parametersUrl": "[concat(variables('parametersBaseUrl'), 'deploy-aks.parameters.json')]" 31 | } 32 | ], 33 | "templateLoop2": [ 34 | { 35 | "name": "gitlabStorageTemplate", 36 | "rgName": "[concat(variables('rootRgName'),'-gitlab-storage')]", 37 | "templateUrl": "[concat(variables('templateBaseUrl'), 'deploy-gitlab-storage.json')]", 38 | "parametersUrl": "[concat(variables('parametersBaseUrl'), 'deploy-gitlab-storage.parameters.json')]" 39 | }, 40 | { 41 | "name": "storageTemplate", 42 | "rgName": "sdpaks-common-backup", 43 | "templateUrl": "[concat(variables('templateBaseUrl'), 'deploy-storage.json')]", 44 | "parametersUrl": "[concat(variables('parametersBaseUrl'), 'deploy-storage.parameters.json')]" 45 | } 46 | ] 47 | }, 48 | "resources": [ 49 | { 50 | "type": "Microsoft.Resources/resourceGroups", 51 | "apiVersion": "2018-05-01", 52 | "location": "[variables('location')]", 53 | "name": "[variables('templateLoop1')[copyIndex()].rgName]", 54 | "copy": { 55 | "name": "rgCopy1", 56 | "count": "[length(variables('templateLoop1'))]" 57 | } 58 | }, 59 | { 60 | "type": "Microsoft.Resources/resourceGroups", 61 | "apiVersion": "2018-05-01", 62 | "location": "[variables('location')]", 63 | "name": "[variables('templateLoop2')[copyIndex()].rgName]", 64 | "copy": { 65 | "name": "rgCopy2", 66 | "count": "[length(variables('templateLoop2'))]" 67 | } 68 | }, 69 | { 70 | "type": "Microsoft.Resources/deployments", 71 | "apiVersion": "2020-06-01", 72 | "resourceGroup": "[variables('templateLoop1')[copyIndex()].rgName]", 73 | "name": "[concat('nestedTemplate-', variables('templateLoop1')[copyIndex()].name)]", 74 | "copy": { 75 | "name": "templateCopy1", 76 | "count": "[length(variables('templateLoop1'))]" 77 | }, 78 | "dependsOn": [ 79 | "rgCopy1" 80 | ], 81 | "properties": { 82 | "mode": "Incremental", 83 | "templateLink": { 84 | "uri": "[variables('templateLoop1')[copyIndex()].templateUrl]", 85 | "contentVersion": "1.0.0.0" 86 | }, 87 | "parametersLink": { 88 | "uri": "[variables('templateLoop1')[copyIndex()].parametersUrl]", 89 | "contentVersion":"1.0.0.0" 90 | } 91 | } 92 | }, 93 | { 94 | "type": "Microsoft.Resources/deployments", 95 | "apiVersion": "2020-06-01", 96 | "resourceGroup": "[variables('templateLoop2')[copyIndex()].rgName]", 97 | "name": "[concat('nestedTemplate-', variables('templateLoop2')[copyIndex()].name)]", 98 | "copy": { 99 | "name": "templateCopy2", 100 | "count": "[length(variables('templateLoop2'))]" 101 | }, 102 | "dependsOn": [ 103 | "rgCopy2", 104 | "templateCopy1" 105 | ], 106 | "properties": { 107 | "mode": "Incremental", 108 | "templateLink": { 109 | "uri": "[variables('templateLoop2')[copyIndex()].templateUrl]", 110 | "contentVersion": "1.0.0.0" 111 | }, 112 | "parametersLink": { 113 | "uri": "[variables('templateLoop2')[copyIndex()].parametersUrl]", 114 | "contentVersion":"1.0.0.0" 115 | } 116 | } 117 | } 118 | ] 119 | } 120 | -------------------------------------------------------------------------------- /pre-arm.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # This script creates needs to run before deploying the ARM-templates. 3 | # It will create any outside dependecies the the templates needs (mainly Service Accounts) 4 | # All variables should be defined in a file called ".env" 5 | set -e 6 | source .env 7 | 8 | function service_principal_exist { 9 | az ad sp show --id http://$1 --query objectId -o tsv > /dev/null 2>&1 10 | } 11 | function keyvault_secret_exist { 12 | az keyvault secret show --vault-name SDPVault -n $1 --query value -o tsv > /dev/null 2>&1 13 | } 14 | 15 | if ! keyvault_secret_exist "$AZ_GROUP-psql-username" || ! keyvault_secret_exist "$AZ_GROUP-psql-password"; then 16 | echo " PSQL account details for $ENVIRONMENT cluster does not exist, creating it.." 17 | az keyvault secret set --name "$AZ_GROUP-psql-username" --vault-name SDPVault --value $PSQL_USERNAME > /dev/null 18 | az keyvault secret set --name "$AZ_GROUP-psql-password" --vault-name SDPVault --value $PSQL_PASSWORD > /dev/null 19 | else 20 | echo " PSQL account details already exists..." 21 | fi 22 | 23 | # Create service principals and store the credentials in Azure Key Vault 24 | SP_NAME="${AZ_GROUP}-dns-sp" 25 | echo 26 | if ! service_principal_exist $SP_NAME; then 27 | echo " Service principal $SP_NAME does not exist, creating it.." 28 | SP_PASSWORD=$(az ad sp create-for-rbac --skip-assignment --name $SP_NAME --query password -o tsv) 29 | az keyvault secret set --name "$SP_NAME-password" --vault-name SDPVault --value $SP_PASSWORD > /dev/null 30 | SP_OBJECT_ID=$(az ad sp show --id http://$SP_NAME --query objectId -o tsv) 31 | az keyvault secret set --name "$SP_NAME-object-id" --vault-name SDPVault --value $SP_OBJECT_ID > /dev/null 32 | SP_APP_ID=$(az ad sp show --id http://$SP_NAME --query appId -o tsv) 33 | az keyvault secret set --name "$SP_NAME-app-id" --vault-name SDPVault --value $SP_APP_ID > /dev/null 34 | else 35 | echo " Service principal for dns zone already exists..." 36 | fi 37 | 38 | SP_NAME="${AZ_GROUP}-aks-sp" 39 | echo 40 | if ! service_principal_exist $SP_NAME; then 41 | echo " Service principal $SP_NAME does not exist, creating it.." 42 | SP_PASSWORD=$(az ad sp create-for-rbac --skip-assignment --name $SP_NAME --query password -o tsv) 43 | az keyvault secret set --name "$SP_NAME-password" --vault-name SDPVault --value $SP_PASSWORD > /dev/null 44 | SP_OBJECT_ID=$(az ad sp show --id http://$SP_NAME --query objectId -o tsv) 45 | az keyvault secret set --name "$SP_NAME-object-id" --vault-name SDPVault --value $SP_OBJECT_ID > /dev/null 46 | SP_APP_ID=$(az ad sp show --id http://$SP_NAME --query appId -o tsv) 47 | az keyvault secret set --name "$SP_NAME-app-id" --vault-name SDPVault --value $SP_APP_ID > /dev/null 48 | else 49 | echo " Service principal for aks already exists..." 50 | fi 51 | 52 | SP_NAME="sdpaks-common-velero-sp" 53 | echo 54 | if ! service_principal_exist $SP_NAME; then 55 | echo " Service principal $SP_NAME does not exist, creating it.." 56 | SP_PASSWORD=$(az ad sp create-for-rbac --skip-assignment --name $SP_NAME --query password -o tsv) 57 | az keyvault secret set --name "$SP_NAME-password" --vault-name SDPVault --value $SP_PASSWORD > /dev/null 58 | SP_OBJECT_ID=$(az ad sp show --id http://$SP_NAME --query objectId -o tsv) 59 | az keyvault secret set --name "$SP_NAME-object-id" --vault-name SDPVault --value $SP_OBJECT_ID > /dev/null 60 | SP_APP_ID=$(az ad sp show --id http://$SP_NAME --query appId -o tsv) 61 | az keyvault secret set --name "$SP_NAME-app-id" --vault-name SDPVault --value $SP_APP_ID > /dev/null 62 | else 63 | echo " Service principal for aks already exists..." 64 | fi 65 | 66 | echo 67 | echo " If the ARM deployment fails with 'service principal does not exist' run the script again.." 68 | 69 | # Get existing reply urls for app registration 70 | EXISTINGREGS=$(az ad app list --display-name 'SDP Team' --query [0].replyUrls -o tsv) 71 | 72 | echo " Creating List of Reply Urls per environment (leave variable for prod cluster blank)..." 73 | # Make sure there are no spaces at the end of each line!" 74 | cat << EOF > newapplist.json 75 | https://alertmanager.${PREFIX}sdpaks.equinor.com/oauth2/callback 76 | https://aware.${PREFIX}sdpaks.equinor.com/oauth2/callback 77 | https://gitlab.${PREFIX}sdpaks.equinor.com/users/auth/azure_oauth2/callback 78 | https://grafana.${PREFIX}sdpaks.equinor.com/login/generic_oauth 79 | https://kibana.${PREFIX}sdpaks.equinor.com/oauth2/callback 80 | https://monitor.${PREFIX}sdpaks.equinor.com/oauth2/callback 81 | https://prometheus.${PREFIX}sdpaks.equinor.com/oauth2/callback 82 | https://release-aware.${PREFIX}sdpaks.equinor.com/oauth2/callback 83 | https://sdp-web.${PREFIX}sdpaks.equinor.com/oauth2/callback 84 | EOF 85 | 86 | # Sort and format list to newline format 87 | echo 88 | echo $EXISTINGREGS | tr ' ' '\n' | sort | tr '\n' ' ' | tr " " "\n" > cat > existingregs.json 89 | 90 | # Create differential list (requires sorted newline) 91 | DIFFLIST=$(awk 'NR==FNR{a[$0]=1;next}!a[$0]' existingregs.json newapplist.json) 92 | # Cleanup files 93 | rm existingregs.json & rm newapplist.json 94 | 95 | # Hardcoded object Id for 'SDP Team' (required) 96 | # Important, no quotes around the below command! az cli requires space separated values 97 | echo " Updating existing app registration with the following reply urls:" 98 | az ad app update --add replyUrls ${DIFFLIST[@]} --id 3b014a2c-797d-43aa-a379-2344fc04b8cc 99 | 100 | echo "${DIFFLIST[@]}" 101 | echo 102 | echo " Reply urls successfully added." 103 | -------------------------------------------------------------------------------- /arm-templates/base/deploy-gitlab-storage.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": 5 | { 6 | "storageAccountName": { 7 | "type": "string" 8 | }, 9 | "administratorLogin": { 10 | "type": "string" 11 | }, 12 | "administratorLoginPassword": { 13 | "type": "securestring" 14 | }, 15 | "dbName": { 16 | "type": "string" 17 | }, 18 | "tags": { 19 | "defaultValue": {}, 20 | "type": "object" 21 | }, 22 | "buckets": { 23 | "type": "array", 24 | "defaultValue": [ 25 | "artifacts", 26 | "backup", 27 | "lfs", 28 | "packages", 29 | "pseudonymizer", 30 | "registry", 31 | "tmp", 32 | "uploads" 33 | ] 34 | }, 35 | "environment":{ 36 | "type":"string" 37 | }, 38 | "replication":{ 39 | "type":"string", 40 | "defaultValue":"Standard_LRS" 41 | }, 42 | "backupRetention":{ 43 | "type":"int", 44 | "defaultValue": 35 45 | } 46 | 47 | }, 48 | "variables": { 49 | "rootRgName": "[concat('sdpaks-', parameters('environment'))]", 50 | "vnetName": "[concat(variables('rootRgName'), '-vnet')]", 51 | "vNetResourceId":"[resourceId(variables('rootRgName'),'Microsoft.Network/virtualNetworks',variables('vnetName'))]", 52 | "managedClusterName": "[concat(variables('rootRgName'), '-k8s')]", 53 | "privateDnsZoneName": "[concat(parameters('dbName'), '.private.postgres.database.azure.com')]", 54 | "privateDnsZoneId": "[resourceId('Microsoft.Network/privateDnsZones',variables('privateDnsZoneName'))]", 55 | "virtualNetworkLinkName": "[concat(variables('privateDnsZoneName'), '/', uniqueString(variables('vNetResourceId')))]", 56 | "virtualNetworkLinkId": "[resourceId('Microsoft.Network/privateDnsZones/virtualNetworkLinks',variables('privateDnsZoneName'),uniqueString(variables('vNetResourceId')))]", 57 | "vnetSubnetPsqlId": "[concat(variables('vNetResourceId'), '/subnets/', 'aks-subnet-psql')]", 58 | "vnetSubnetId": "[concat(variables('vNetResourceId'), '/subnets/', 'aks-subnet')]" 59 | 60 | }, 61 | "resources": [ 62 | { 63 | "type": "Microsoft.Network/privateDnsZones", 64 | "apiVersion": "2018-09-01", 65 | "name": "[variables('privateDnsZoneName')]", 66 | "location": "global", 67 | "tags": {}, 68 | "properties": {} 69 | }, 70 | { 71 | "type": "Microsoft.Network/privateDnsZones/virtualNetworkLinks", 72 | "dependsOn": [ 73 | "[variables('privateDnsZoneName')]" 74 | ], 75 | "apiVersion": "2018-09-01", 76 | "name": "[variables('virtualNetworkLinkName')]", 77 | "location": "global", 78 | "properties": { 79 | "virtualNetwork": { 80 | "id": "[variables('vNetResourceId')]" 81 | }, 82 | "registrationEnabled": false 83 | } 84 | }, 85 | { 86 | "type": "Microsoft.DBforPostgreSQL/flexibleServers", 87 | "apiVersion": "2021-06-01", 88 | "name": "[parameters('dbName')]", 89 | "location": "[resourceGroup().location]", 90 | "dependsOn": [ 91 | "[variables('virtualNetworkLinkId')]" 92 | ], 93 | "tags": "[parameters('tags')]", 94 | "sku": { 95 | "name": "Standard_D2ds_v4", 96 | "tier": "GeneralPurpose" 97 | }, 98 | "properties": { 99 | "administratorLogin": "[parameters('administratorLogin')]", 100 | "administratorLoginPassword": "[parameters('administratorLoginPassword')]", 101 | "availabilityZone": "", 102 | "backup": { 103 | "backupRetentionDays": "[parameters('backupRetention')]", 104 | "geoRedundantBackup": "Disabled" 105 | }, 106 | "highAvailability": { 107 | "mode": "Disabled" 108 | }, 109 | "network": { 110 | "delegatedSubnetResourceId": "[variables('vnetSubnetPsqlId')]", 111 | "privateDnsZoneArmResourceId": "[variables('privateDnsZoneId')]" 112 | }, 113 | "storage": { 114 | "storageSizeGB": 128 115 | }, 116 | "version": "12" 117 | } 118 | }, 119 | { 120 | "type": "Microsoft.Storage/storageAccounts", 121 | "apiVersion": "2019-04-01", 122 | "name": "[parameters('storageAccountName')]", 123 | "location": "norwayeast", 124 | "sku": { 125 | "name": "[parameters('replication')]", 126 | "tier": "Standard" 127 | }, 128 | "kind": "StorageV2", 129 | "properties": { 130 | "networkAcls": { 131 | "bypass": "None", 132 | "defaultAction": "Deny", 133 | "virtualNetworkRules": [ 134 | { 135 | "id": "[variables('vnetSubnetId')]", // Allow connection only from the environment's AKS cluster 136 | "action": "Allow" 137 | } 138 | ] 139 | }, 140 | "accessTier": "Hot" 141 | } 142 | }, 143 | { 144 | "name": "[concat(parameters('storageAccountName'), '/default/', 'gitlab-', parameters('buckets')[copyIndex()], '-storage')]", 145 | "type": "Microsoft.Storage/storageAccounts/blobServices/containers", 146 | "comments": "Buckets under the selected storage account", 147 | "apiVersion": "2018-07-01", 148 | "dependsOn": [ 149 | "[parameters('storageAccountName')]" 150 | ], 151 | "copy": { 152 | "name": "storagecopy", 153 | "count": "[length(parameters('buckets'))]" 154 | }, 155 | "properties": { 156 | "publicAccess": "None" 157 | } 158 | } 159 | ], 160 | "outputs": { 161 | "storageAccountName": { 162 | "type": "string", 163 | "value": "[parameters('storageAccountName')]" 164 | }, 165 | "postgresDbName":{ 166 | "type": "string", 167 | "value": "[parameters('dbName')]" 168 | }, 169 | "vnetSubnetId": { 170 | "type": "string", 171 | "value": "[reference(resourceId(variables('rootRgName'),'Microsoft.ContainerService/managedClusters/', variables('managedClusterName')), '2019-08-01').agentPoolProfiles[0].vnetSubnetID]" 172 | } 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /docs/velero-backup-routine.md: -------------------------------------------------------------------------------- 1 | # Backup routines for Velero 2 | 3 | [Velero](https://github.com/heptio/velero) is the backup solution we use to backup our cluster. 4 | Velero backups should be tested as one cannot blindly trust untested backups. 5 | 6 | ## Gitlab disaster recovery 7 | 8 | When using Gitlab DR, data is stored three places 9 | 10 | Azure blob storage 11 | In cluster Gitaly disk (PV) 12 | Managed postgres (db) 13 | 14 | Blob storage has built-in soft deletion and geo-redundancy. 15 | 16 | Azure postgreSQL has built-in regular Point-in-time backups. 17 | 18 | For The PV, the easiest way to recover is simply to delete the old namespace and restore from a backup 19 | Using another namespace will not work "just like that", because secrets connecting services typically point to a namespace. e.g. "gitlab.gitlab.svc.local" 20 | 21 | Note that there are some issues with restoring PV's which have the "retain" mode set. THis means that even if you delete the NS, the PV is still there. 22 | The solution to this is simply to delete all related PV's 23 | `k delete pv xxx yyy` 24 | 25 | `velero restore create --from-backup gitlab-ns-xxx ` 26 | 27 | This method has been tested successfully. RTO (after old NS is deleted) is about 12-15 mins. Deleting the NS takes about 5-10 mins: 28 | Total RTO: 17-25 minutes. 29 | RPO: 0,5-24,5 hours. We run nightly full backups. I estimate that these take max 30 minutes. 30 | 31 | To recover the postgres database, go to the Azure portal, find your database server and click "restore" 32 | This copies your point in time restore point to a new server. 33 | Note that the newly generated DB does not copy over existing VNET rules, you should set these to increase security. 34 | 35 | Remember to update the helm chart: 36 | 37 | ``` 38 | psql: 39 | database: postgres 40 | host: sdpaks-prod-gitlab-psql.postgres.database.azure.com 41 | password: 42 | key: password 43 | secret: gitlab-postgres-secret 44 | username: gitlab@sdpaks-prod-gitlab-psql 45 | ``` 46 | You should not have to change the content of the secret, but "username" and "host" must be updated. 47 | 48 | 49 | ## Disaster recovery 50 | 51 | Most sdp-aks resources including secrets can be restored or recreated easily with Flux. The exception to this are persistent volumes (PVs) 52 | 53 | Say for example that the volume of your deployment "verdaccio-verdaccio" has gone corrupt. 54 | You only wish to restore the volume without altering other deployments in the cluster. 55 | 56 | * First, scale down the flux deployment in your infrastructure namespace 57 | 58 | `kubectl scale deployments/flux -n infrastructure --replicas=0 ` 59 | 60 | * Scale down your corrupted deployment 61 | 62 | `kubectl scale deployments/verdaccio-verdaccio --replicas=0` 63 | 64 | * Next, delete the PVC which points to the corrupted PV. Note that the `-l release=verdaccio` will delete any PVC containing the label `release: verdaccio`. __Also note that label tags will vary from helm chart to helm chart. Commonly used tags are "app" and "release".__ 65 | 66 | `kubectl delete pvc -l release=verdaccio` 67 | 68 | * After this is done, attempt to restore 69 | 70 | `velero restore create --from-schedule prod-ns --include-resources persistentvolume,persistentvolumeclaim -l release=verdaccio` 71 | 72 | This will recreate the PVC and with a new PV, which in turn points to a newly created Azure Disk "restore-xxx-yyy". 73 | 74 | * Scale back up your deployment, and be patient (may take a few minutes) 75 | 76 | `kubectl scale deployments/verdaccio-verdaccio --replicas=1` 77 | `kubectl get deployments --watch` 78 | 79 | Check out the log files for your deployment if the restore is not successful. 80 | 81 | * If successful, scale the flux deployment back up 82 | 83 | `kubectl scale deployments/flux -n infrastructure --replicas=0 ` 84 | 85 | ### Troubleshooting 86 | In some cases you might need to delete the entire deployment as opposed to just the PVC. We recommend doing this only after you've tried the steps above. 87 | 88 | `kubectl delete deployment verdaccio/verdaccio` 89 | `velero restore create --from-schedule prod-ns -l release=verdaccio`: 90 | 91 | In case the latest backup from schedule is corrupted, manually enter a backup name instead 92 | 93 | `velero backup get` 94 | `velero restore create --from-backup prod-ns-xxx-yyy -l release=verdaccio` 95 | 96 | ## Regular backup testing 97 | 98 | ### Pre-reqs: 99 | * Two AKS clusters with a velero deployment each. The deployments should have a connection to the same storage account in Azure. 100 | This can be set up from scratch (with some manual config) using the `/sdp-omnia/velero/bootstrap.azcli` script. 101 | * Empty namespace in dev cluster named `backup-sandbox` 102 | * Backup created in prod cluster containing one or more PVs. 103 | 104 | #### Dev cluster: 105 | 106 | * Make sure that your velero deployment is in restore-only mode: 107 | 108 | ```kubectl patch deployment velero -n infrastructure --patch '{"spec": {"template": {"spec": {"containers": [{"name": "velero","args": ["server", "--restore-only"]}]}}}}' ``` 109 | 110 | * Run the following command from the infrastructure namespace. This restores the backup into the backup-sandbox namespace. 111 | 112 | `velero restore create --from-backup prod-ns-longlived-xxx --namespace-mappings prod:backup-sandbox` 113 | 114 | * To figure out where your pod has mounted its PV, use the command 115 | 116 | `kubectl exec -it $(kubectl get pods -o name | grep -m1 verdaccio-verdaccio | cut -d'/' -f 2) -- '/bin/sh' -c "df -h" ` 117 | 118 | The volume will have a size slightly smaller than what is declared in the PVC. For instance a PVC of 8.0 Gi will be listed at a size of 7.7G. 119 | 120 | * Next copy metadata from the pod with the mounted volume to your local machine. 121 | 122 | ```kubectl exec -it $(kubectl get pods -o name | grep -m1 verdaccio-verdaccio | cut -d'/' -f 2) -- '/bin/sh' -c "ls -laR /verdaccio/storage" >> ./DevDiff``` 123 | 124 | Where `verdaccio-verdaccio` is the name of your kubernetes deployment, and `/verdaccio/storage` is the path to where the PV is mounted. 125 | 126 | * You can now cleanup your dev cluster, or do so at a later time 127 | 128 | `kubectl delete ns backup-sandbox` 129 | 130 | #### Prod cluster: 131 | 132 | Switch K8s context to your prod cluster, and execute the same command. 133 | 134 | ```kubectl exec -it $(kubectl get pods -o name | grep -m1 verdaccio-verdaccio | cut -d'/' -f 2) -- '/bin/sh' -c "ls -laR /verdaccio/storage" >> ./ProdDiff``` 135 | 136 | Finally, compare the two files using your favorite diff tool. Have in mind that there may have been changes between the time of your backup and what is now running in production. 137 | -------------------------------------------------------------------------------- /docs/velero.md: -------------------------------------------------------------------------------- 1 | # How do we use Velero 2 | 3 | [Velero](https://github.com/heptio/velero) is the backup solution we use to backup our cluster. It takes backups by storing the manifests in an Azure Blob Storage and creates snapshots of the persistent disks. With this information we can restore the state of the cluster/workload. 4 | 5 | ## Backup 6 | Our prod cluster uses this backup schema; 7 | ``` 8 | velero schedule create prod-ns --schedule "0 1 * * *" --include-namespaces prod 9 | velero schedule create staging-ns --schedule "30 1 * * *" --include-namespaces staging 10 | velero schedule create dev-ns --schedule "0 2 * * *" --include-namespaces dev 11 | velero schedule create monitoring-ns --schedule "30 2 * * *" --include-namespaces monitoring 12 | ``` 13 | ### Take backup 14 | 15 | Before we can restore files we need a backup, this can be done with a one off or a schedule. To create the schedule the basic syntax is `velero backup create NAME`. 16 | This command takes a full backup of the whole cluster. 17 | 18 | We can further define what to take backup of with arguments. Get the whole list with `velero backup create --help`. 19 | 20 | - Backup a namespace do 21 | 22 | ``` 23 | velero backup create NAME --include-namespaces NAMESPACE 24 | ``` 25 | 26 | - Backup all but namespace 27 | 28 | ``` 29 | velero backup create NAME --exclude-namespaces NAMESPACE 30 | ``` 31 | 32 | - Backup a resource in namespace 33 | 34 | ``` 35 | velero backup create NAME --include-namespaces NAMESPACE --include-resource RESOURCE 36 | ``` 37 | 38 | - Backup based on label 39 | 40 | ``` 41 | velero backup create NAME --selector KEY=VALUE 42 | ``` 43 | 44 | - Backup only lasts 12hours 45 | 46 | ``` 47 | velero backup create NAME --include-namespace NAMESPACE --selector KEY=VALUE --ttl 12h 48 | ``` 49 | 50 | ### Schedule backups 51 | 52 | The one of backup is a useful feature, but the bread and butter of backups is the scheduled backups. To create a backup schedule once a day at 01:00. 53 | 54 | ``` 55 | velero schedule create NAME --schedule "0 1 * * *" --include-namespace NAMESPACE 56 | ``` 57 | 58 | Schedule reference 59 | 60 | ``` 61 | | Character Position | Character Period | Acceptable Values | 62 | | -------------------|:----------------:| -----------------:| 63 | | 1 | Minute | 0-59,* | 64 | | 2 | Hour | 0-23,* | 65 | | 3 | Day of Month | 1-31,* | 66 | | 4 | Month | 1-12,* | 67 | | 5 | Day of Week | 0-7,* | 68 | ``` 69 | 70 | As you can see we can define namespace, or any of the other parameters we use with one of backups. 71 | 72 | ## Restore 73 | 74 | To restore a backup it often is useful to delete the resource in the cluster before we restore it to minimize conflicts. 75 | In SDP-aks' case the Kubernetes manifests are controlled by Flux and will automatically reapply. In this case it is best to scale down a deployment, delete a PVC then 76 | With velero we need to "create" a restore in the same fashion as we create backups. 77 | 78 | ``` 79 | velero restore create --from-backup NAME # or --from-schedule if backup was scheduled 80 | ``` 81 | 82 | This will try to restore the full contents of this backup. If we only want to restore parts of the backup we use the same arguments as for taking backup. E.g. `velero restore create --from-backup NAME --include-resources persistentvolume,persistentvolumeclaim` 83 | 84 | Say for example that the volume of your deployment named "verdaccio" has gone corrupt. You only wish to restore the volume without altering other deployments in the cluster. For this use the label selector, -l 85 | 86 | ``` 87 | velero restore create --from-schedule prod-ns --include-resources persistentvolume,persistentvolumeclaim -l release=verdaccio 88 | ``` 89 | 90 | 91 | ## How-to's 92 | 93 | ### List backups and schedules 94 | 95 | - List backups 96 | `velero backup get` 97 | - List schedules 98 | `velero schedule get` 99 | 100 | ### Migrate a service with PV from Cluster A to Cluster B 101 | 1. Allow vnet for B in the storageaccount. This is easiest done in the Azure portal. StorageAccount --> Firewalls --> +Add existing virtual network 102 | 2. Install Velero (if not already done) in B using the bootstrap.azcli file ** remember to edit out a line as to not overwrite the existing service principal for cluster A! ** Make sure that the BackupStorageLocation and VolumeStorageLocations specified match the ones created in A. Without this Velero will not be able to locate the backups in cluster B. 103 | 3. Inject the "velero-creds"-secret from A to B. E.g; ```kubectl get secrets -n infrastructure velerocreds -o yaml ``` and then apply in B. You might have to modify the `AZURE_RESOURCE_GROUP` part. 104 | 4. Restore the PV into B. ```velero restore create --from-backup myBackup``` this will create a new PV in B. The PV's name will be identical to the original in A, but the duplicated resource will point to a newly created Azure Disk in B's resource group named 'restore_xxx'. 105 | 106 | 107 | ### Restore HelmRelease that creates secrets (wordpress) 108 | 109 | Some Helm charts creates secrets on creation, and will try to do this when restored with HelmRelease and Flux. An example of this is the wordpress helm chart. To restore the wordpress chart this procedure worked in testing. 110 | 111 | We assume you know what backup to restore and that wordpress has the label `release=wordpress`, we assume the backup is called `wordpress`. 112 | 113 | - Start by restoring the helmrelease (and namespace if you have deleted this) 114 | `velero restore create --from-backup wordpress --include-resources namespace,helmrelease --wait` 115 | This command will restore the namespace and helmrelease, when the helmrelease has been restored it will create statefulset, deployments, pvc and pv and more. 116 | - Remove the generic resources created by helmrelease after you have made sure they have been created (`kubectl -n NAMESPACE get all -l release=wordpress`) 117 | `kubectl -n NAMESPACE delete pvc,po,svc,ing,deployment,replicaset,statefulset,secrets -l release=wordpress` 118 | This wil remove all generic resources created and make sure you have a clean environment to restore the original resources 119 | - Restore the backed up resources after making sure the original persistent volumes has been deleted (`kubectl -n NAMESPACE get pv | grep wordpress`) 120 | `velero restore create --from-backup wordpress --exclude-resources=namespace,helmrelease,pod --wait` 121 | This will restore everything except what we already have recreated and the pods (that will be created new anyway). 122 | 123 | After some time the Wordpress environment should come back up. Give it a few minutes. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Software Development Platform's Cloud platform 2 | 3 | This is the main repository for SDP's cloud platform. 4 | 5 | It has two parts: 6 | - AKS solution 7 | - VM's with on-prem connectivity. 8 | 9 | This repo mostly focuses on AKS, as the VM's are here mostly for legacy support. New apps and services should always be placed in the AKS solution if possible. 10 | We conver some basic overview information on these topics; 11 | 12 | - How AKS works 13 | - How we have configured it 14 | - Which additional tools we have integrated with the cluster 15 | - Scripts and ARM-templates to recreate the clusters. 16 | 17 | ## Related Repositories 18 | 19 | Flux GitOps Manifests - 20 | 21 | 22 | The Flux repo should be considered as the "source of truth" of what is actually deployed in the cluster. 23 | 24 | ## Technologies 25 | 26 | ![Architecture](/docs/sdp-aks.png) 27 | AKS Stack with example services 28 | 29 | ### AKS 30 | 31 | With AKS, Azure provides virtual machines running Ubuntu preconfigured with Kubernetes setup as a cluster. AKS also includes the preconfigured Azure resources _Network security group_, _Route table_, _Disk_, _Network interface_, _Virtual network_, _Public IP address_, and _Load balancer_. The K8s master node(s) are fully controlled by Azure. This include etcd, kube-scheduler, kube-controller-manager, and the kube-apiserver. 32 | 33 | ### Helm 34 | 35 | On top of Kubernetes we run Helm. Helm is a way of deploying configurable _packages_ of K8s manifests into a K8s cluster. 36 | 37 | ### Flux 38 | 39 | To operate a K8s cluster that has a _Single Source of Truth_, deploy K8s resources in a _declarative way_, and provide _configuration traceability_, we use [FluxCD/Flux](https://github.com/fluxcd/flux) as a GitOps controller. This ensures that the cluster state mirrors the configuration in our Git repository. 40 | 41 | ### VMware velero 42 | 43 | To backup both Persistent Volumes and K8s manifests, we deploy Vmware Velero. Velero is configured with a seperate Azure Resource group and Storage account. Velero runs on a schedule to take snapshots of the Azure Disks and the deployed K8s manifests. 44 | 45 | ### Grafana & Prometheus 46 | To collect metrics we use Prometheus. We display these graphically in Grafana. For collecting logs we use Grafana Loki, and collect both logs from our VM's and in-cluster resources. We also display our log output centrally through Grafana, so that we can use a "single pane of glass" for logs and metrics for as many of our services as possible. 47 | 48 | ### Sealed Secrets 49 | 50 | To be able to maintain our GitOps workflow, we need to commit secrets(SSL Keys, Oauth keys, Container Registry Keys etc.) to Git. These secrets are encrypted with asymmetric cryptography, where the private key only resides within the Sealed-Secrets controller in the K8s cluster. We deploy SealedSecret, which picks up encrypted secrets and "translates" them into regular K8s secrets. 51 | 52 | ### External-DNS 53 | 54 | Is simply a service that read ingress manifest annotations, and talks to the Cloud provider to automatically creat DNS records. 55 | 56 | ### Ingress-Controller 57 | 58 | We use Nginx ingress-controller to expose services from within the cluster to the Internet. The ingress-controllers main job is to connect hostnames and K8s services, and terminate SSL traffic. Port 22 is opened to support SSH cloning from our Gitlab instance. 59 | 60 | ### Cert-Manager 61 | 62 | Cert-Manager reads ingress manifests annotations and creates a SSL certificate and key for a given hostname. We have configured our cert-manager to order certificates from Let'sEncrypt. 63 | 64 | ### Node-Puppet 65 | 66 | This is a homemade solution that gives us the capacity to controll the AKS nodes via Puppet. It works by running a DaemonSet that uses a InitContainer to install Puppet and subscribe it to our Puppet Git repository. 67 | 68 | ### Kured 69 | 70 | Kured is a simple solution to the problem on rebooting nodes to enable security patches. If an update requires the node to restart, it creates a file that Kured looks for. If the file is there, Kured drains, restart, and then uncordons the given node. 71 | 72 | ### Loki 73 | 74 | Loki allows us to collect logs from both in-cluster pods and VM's outside the cluster. We forward the logs to Grafana where we can centrally search and correlate metrics and logs. 75 | 76 | ### Sysdig technologies 77 | 78 | OSS Falco and Sysdig Inspect are used to gain security insight and packet captures in case of incidents. Falco exporter collects metrics and forwards them to Grafana for visualization and correlation with logs collected by Loki. 79 | 80 | ### Oauth2-proxy 81 | For services which do not have sufficient built-in authentication, we use an Oauth2-proxy to ensure proper access restrictions. 82 | 83 | ### VMs 84 | 85 | In the /arm-templates/classic folder you will find ARM templates for our VM's which not run in a separate subscription. These run apps which require on-prem connectivity to function. This separate subscription has stricter policies, so currently we cannot use a CI job to automatically update our templates. This will be valid until when service principals can use JiT access, and can do this via the CLI. 86 | 87 | ## Usage 88 | 89 | ### Prerequisite 90 | 91 | - Install Azure CLI (az) 92 | - Install kubectl; `az aks install-cli` 93 | - Install [helm client](https://helm.sh/docs/intro/install/) 94 | 95 | Note: Installing and using kubectl commands does not work through the Equinor proxy. You should therefore be on the 'approved network' to avoid the proxy. 96 | 97 | ### Cluster Setup 98 | 99 | 1. Make sure the Azure Key Vault is created 100 | 2. Create and populate `.env` from `env.template` 101 | 3. Bootstrap AKS with additional dependencies `./bootstrap.sh` 102 | 4. Further updates should be done to the ARM templates. The CI will automatically apply updates when committing so make sure you commit to dev before merging into prod. 103 | 104 | ## How-to's 105 | 106 | - Get access to created cluster 107 | - `az aks get-credentials --resource-group $AZ_GROUP --name $AZ_AKS_NAME` 108 | This populates `~/.kube/config` with certs and keys 109 | - [Expand Persistent Volume Claims](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#expanding-persistent-volumes-claims) 110 | - [Upgrade Flux](/docs/upgrade-flux.md) 111 | - Check Helm charts for updates 112 | 1. `helm plugin install https://github.com/bacongobbler/helm-whatup` 113 | 2. `helm whatup` 114 | - Changing run settings for Flux (e.g. branch or repository) 115 | To change the branch Flux uses, you "upgrade" Flux and set some variables. 116 | 1. Find the installed version of Flux 117 | `helm list --all flux` 118 | 2. `helm upgrade flux --reuse-values --set git.branch=dev fluxcd/flux` 119 | - [Upgrade Kubernetes cluster](/docs/upgrade-kubernetes-cluster.md) 120 | - Revoke Let's Encrypt Certificates 121 | 1. Extract key and cert to PEM-format 122 | `kubectl get secret my-tls-secret -o jsonpath='{.data.tls\.crt}' | base64 --decode > crt.pem` 123 | `kubectl get secret my-tls-secret -o jsonpath='{.data.tls\.key}' | base64 --decode > key.pem` 124 | 2. Issue revoke request 125 | `sudo certbot revoke --cert-path ./crt.pem --key-path ./key.pem` 126 | - Usage of VMware velero Backup solution 127 | 1. [Configuration](/docs/velero.md) 128 | 2. [Disaster recovery and backup testing](/docs/velero-backup-routine.md) 129 | -------------------------------------------------------------------------------- /post-arm.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # This script needs to be run after the ARM-templates have been deployed. 3 | # It's job is to create the basic kubernetes services that are needed for our GitOps to work (mainly Helm and Flux) 4 | source .env 5 | 6 | echo $ENVIRONMENT 7 | 8 | # Ensure correct cluster context 9 | az account set --subscription "${AZ_SUBSCRIPTION}" 10 | #az aks get-credentials -g "${AZ_GROUP}" -n "${AZ_GROUP}-k8s" --overwrite-existing 11 | 12 | echo 13 | echo " Creating namespaces" 14 | kubectl apply -f manifests/namespaces.yaml > /dev/null || true 15 | 16 | echo " Creating Custom storageclasses" 17 | kubectl apply -f manifests/storageclasses.yaml > /dev/null || true 18 | 19 | # Fetch Azure ID's from Keyvault (Created in pre-arm.sh) 20 | AZ_SUBSCRIPTION_ID=$(az account show --query "id" -o tsv) 21 | AZ_TENANT_ID=$(az account show --query "tenantId" -o tsv) 22 | AZ_DNS_SP_NAME="${AZ_GROUP}-dns-sp" 23 | AZ_DNS_SP_PASSWORD=$(az keyvault secret show --name "${AZ_DNS_SP_NAME}-password" --vault-name SDPVault --query value -o tsv) 24 | AZ_DNS_SP_ID=$(az keyvault secret show --name "${AZ_DNS_SP_NAME}-app-id" --vault-name SDPVault --query value -o tsv) 25 | AZ_BACKUP_SP_NAME="sdpaks-common-velero-sp" 26 | AZ_BACKUP_SP_PASSWORD=$(az keyvault secret show --name "${AZ_BACKUP_SP_NAME}-password" --vault-name SDPVault --query value -o tsv) 27 | AZ_BACKUP_SP_ID=$(az keyvault secret show --name "${AZ_BACKUP_SP_NAME}-app-id" --vault-name SDPVault --query value -o tsv) 28 | AZ_CLUSTER_GROUP=$(az aks show --resource-group $AZ_GROUP --name "${AZ_GROUP}-k8s" --query nodeResourceGroup -o tsv) 29 | POSTGRES_USERNAME=$(az keyvault secret show --name "${AZ_GROUP}-psql-username" --vault-name SDPVault --query value -o tsv) 30 | POSTGRES_PASSWORD=$(az keyvault secret show --name "${AZ_GROUP}-psql-password" --vault-name SDPVault --query value -o tsv) 31 | 32 | # 33 | # Create external dns secret 34 | # 35 | 36 | # Use custom configuration file 37 | echo 38 | echo " Creating azure.json file with DNS service principal information" 39 | cat << EOF > azure.json 40 | { 41 | "tenantId": "$AZ_TENANT_ID", 42 | "subscriptionId": "$AZ_SUBSCRIPTION_ID", 43 | "aadClientId": "$AZ_DNS_SP_ID", 44 | "aadClientSecret": "$AZ_DNS_SP_PASSWORD", 45 | "resourceGroup": "k8s-infrastructure" 46 | } 47 | EOF 48 | 49 | # Create a secret so that external-dns can connect to the DNS zone 50 | echo 51 | echo " Creating Kubernetes secret (external-dns/azure-dns-config-file) from azure.json file" 52 | kubectl create secret generic azure-dns-config-file --from-file=azure.json -n external-dns --dry-run=client -o yaml | kubectl apply -f - > /dev/null || true 53 | rm -f azure.json 54 | 55 | # 56 | # Create sealed-secrets secret 57 | # 58 | 59 | az keyvault secret show --name "sealed-secrets-key" --vault-name SDPVault --query value -o tsv > tmp.key 60 | az keyvault secret show --name "sealed-secrets-cert" --vault-name SDPVault --query value -o tsv > tmp.crt 61 | kubectl create secret tls -n sealed-secrets sealed-secret-custom-key --cert=tmp.crt --key=tmp.key --dry-run=client -o yaml | kubectl apply -f - > /dev/null || true 62 | rm -f tmp.key tmp.crt 63 | echo 64 | echo " Remember to restart sealed-secret pod if it already exists to pick up custom keys" 65 | 66 | 67 | function key_exists { 68 | az keyvault secret show --name $1 --vault-name SDPVault > /dev/null 69 | } 70 | 71 | # Add flux repo to helm 72 | echo 73 | echo " Adding fluxcd/flux repository to Helm" 74 | helm repo add fluxcd https://charts.fluxcd.io > /dev/null 75 | 76 | # Install Flux 77 | echo 78 | echo " Installing or upgrading Flux with Helm operator in the flux namespace" 79 | helm upgrade --install flux --version 1.12.0 \ 80 | --namespace flux \ 81 | --set git.url="$FLUX_GITOPS_REPO" \ 82 | --set git.branch="$FLUX_GITOPS_BRANCH" \ 83 | --set git.path=$FLUX_GITOPS_PATH \ 84 | --set manifestGeneration=true \ 85 | fluxcd/flux > /dev/null 86 | 87 | 88 | # Install Flux Helm Operator with Helm v3 support 89 | # HelmRelease CRD first 90 | kubectl apply -f https://raw.githubusercontent.com/fluxcd/helm-operator/master/deploy/crds.yaml 91 | 92 | helm upgrade -i helm-operator fluxcd/helm-operator --version 1.4.2 --wait \ 93 | --namespace flux \ 94 | --set git.ssh.secretName="flux-git-deploy" \ 95 | --set helm.versions=v3 96 | 97 | # Create cluster secret for velero - two format types needed due to bug with azure provider 98 | 99 | echo 100 | echo " Generating velero credentials..." 101 | 102 | cat << EOF > cloud 103 | AZURE_SUBSCRIPTION_ID=${AZ_SUBSCRIPTION_ID} 104 | AZURE_TENANT_ID=${AZ_TENANT_ID} 105 | AZURE_CLIENT_ID=${AZ_BACKUP_SP_ID} 106 | AZURE_CLIENT_SECRET=${AZ_BACKUP_SP_PASSWORD} 107 | AZURE_RESOURCE_GROUP=${AZ_CLUSTER_GROUP} 108 | AZURE_CLOUD_NAME=AzurePublicCloud 109 | EOF 110 | 111 | kubectl create secret generic velero-credentials --from-file=cloud -n velero --dry-run=client -o yaml | kubectl apply -f - > /dev/null || true 112 | 113 | # Create secret for gitlab to connect to postgresSQL 114 | echo 115 | echo " Generating secret for gitlab - external postgres.." 116 | kubectl create secret generic gitlab-postgres-secret \ 117 | --namespace gitlab \ 118 | --from-literal username=${POSTGRES_USERNAME} \ 119 | --from-literal password=${POSTGRES_PASSWORD} --dry-run=client -o yaml | kubectl apply -f - > /dev/null || true 120 | 121 | # Create secrets for minio to connect to storage account (multiple needed) 122 | echo 123 | echo " Generating secrets for gitlab-minio..." 124 | 125 | MINIO_STORAGE_NAME="sdpaks${ENVIRONMENT}minio" 126 | MINIO_SECRET_KEY=$(az storage account keys list --resource-group sdpaks-"${ENVIRONMENT}"-gitlab-storage --account-name "$MINIO_STORAGE_NAME" --query [0].value -o tsv) 127 | 128 | kubectl create secret generic gitlab-minio-secret \ 129 | --namespace gitlab \ 130 | --from-literal accesskey=${MINIO_STORAGE_NAME} \ 131 | --from-literal secretkey=${MINIO_SECRET_KEY} --dry-run=client -o yaml | kubectl apply -f - > /dev/null || true 132 | 133 | rm -f azure.json 134 | 135 | cat << EOF > connection 136 | provider: AWS 137 | region: us-east-1 138 | aws_access_key_id: ${MINIO_STORAGE_NAME} 139 | aws_secret_access_key: ${MINIO_SECRET_KEY} 140 | aws_signature_version: 4 141 | host: http://gitlab-minio.gitlab.svc.cluster.local:9000 142 | endpoint: http://gitlab-minio.gitlab.svc.cluster.local:9000 143 | path_style: true 144 | EOF 145 | 146 | kubectl create secret generic gitlab-rails-storage --from-file=connection -n gitlab --dry-run=client -o yaml | kubectl apply -f - > /dev/null || true 147 | 148 | cat << EOF > config 149 | azure: 150 | accountname: ${MINIO_STORAGE_NAME} 151 | accountkey: ${MINIO_SECRET_KEY} 152 | container: gitlab-registry-storage 153 | redirect: 154 | disable: true 155 | EOF 156 | 157 | kubectl create secret generic registry-storage --from-file=config -n gitlab --dry-run=client -o yaml | kubectl apply -f - > /dev/null || true 158 | 159 | cat << EOF > config 160 | [default] 161 | host_base = http://gitlab-minio.gitlab.svc.cluster.local:9000 162 | host_bucket = http://gitlab-minio.gitlab.svc.cluster.local:9000 163 | # Leave as default 164 | bucket_location = us-east-1 165 | use_https = false 166 | access_key = ${MINIO_STORAGE_NAME} 167 | secret_key = ${MINIO_SECRET_KEY} 168 | 169 | signature_v2 = False 170 | EOF 171 | 172 | kubectl create secret generic backup-storage-config --from-file=config -n gitlab --dry-run=client -o yaml | kubectl apply -f - > /dev/null || true 173 | 174 | rm -f connection & rm -f azure.json & rm -f cloud & rm -f config 175 | 176 | echo " Script completed." 177 | -------------------------------------------------------------------------------- /arm-templates/base/deploy-aks.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "vnetName":{ 6 | "type":"string" 7 | }, 8 | "enableAutoscaler":{ 9 | "type": "bool", 10 | "defaultValue": true 11 | }, 12 | "subnetRange":{ 13 | "type":"string", 14 | "defaultValue": "10.0.0.0/8" 15 | }, 16 | "subnetRangePsql":{ 17 | "type":"string", 18 | "defaultValue": "10.0.0.0/8" 19 | }, 20 | "vnetRange":{ 21 | "type": "array", 22 | "defaultValue": ["10.240.0.0/16"] 23 | }, 24 | "serviceCidr":{ 25 | "type": "string" 26 | }, 27 | "dnsServiceIp":{ 28 | "type": "string" 29 | }, 30 | "minNodes":{ 31 | "type": "int", 32 | "defaultValue": 2 33 | }, 34 | "maxNodes":{ 35 | "type": "int", 36 | "defaultValue": 3 37 | }, 38 | "kubernetesVersion": { 39 | "type": "string", 40 | "defaultValue": "1.22.6", 41 | "metadata": { 42 | "description": "The version of Kubernetes. For testing in dev, set version in parameter file instead" 43 | } 44 | } 45 | }, 46 | "variables": { 47 | "clusterName": "[concat(resourceGroup().name, '-k8s')]" 48 | }, 49 | "resources": [ 50 | { 51 | "apiVersion": "2019-04-01", 52 | "name": "[parameters('vnetName')]", 53 | "type": "Microsoft.Network/virtualNetworks", 54 | "location": "[resourceGroup().location]", 55 | "properties": { 56 | "addressSpace": { 57 | "addressPrefixes": "[parameters('vnetRange')]" 58 | }, 59 | "subnets": [ 60 | { 61 | "name": "aks-subnet", 62 | "properties": { 63 | "addressPrefix": "[parameters('subnetRange')]", 64 | "addressPrefixes": [], 65 | "serviceEndpoints": [ // Open up endpoint to Gitlab PSQL, Gitlab SA, Velero SA 66 | { 67 | "service": "Microsoft.Sql" 68 | }, 69 | { 70 | "service": "Microsoft.Storage" 71 | } 72 | ] 73 | } 74 | }, 75 | { 76 | "name": "aks-subnet-psql", 77 | "properties": { 78 | "addressPrefix": "[parameters('subnetRangePsql')]", 79 | "addressPrefixes": [], 80 | "delegations": [ 81 | { 82 | "name": "dlg-Microsoft.DBforPostgreSQL-flexibleServers", 83 | "properties": { 84 | "serviceName": "Microsoft.DBforPostgreSQL/flexibleServers" 85 | } 86 | } 87 | ], 88 | "serviceEndpoints": [ // Open up endpoint to Gitlab PSQL, Gitlab SA, Velero SA 89 | { 90 | "service": "Microsoft.Sql" 91 | }, 92 | { 93 | "service": "Microsoft.Storage" 94 | } 95 | ] 96 | } 97 | } 98 | ] 99 | } 100 | }, 101 | { 102 | "apiVersion": "2020-09-01", 103 | "dependsOn": [ 104 | "[parameters('vnetName')]" 105 | ], 106 | "type": "Microsoft.ContainerService/managedClusters", 107 | "location": "[resourceGroup().location]", 108 | "name": "[variables('clusterName')]", 109 | "properties": { 110 | "kubernetesVersion": "[parameters('kubernetesVersion')]", 111 | "enableRBAC": true, 112 | "nodeResourceGroup": "[if(contains(resourceGroup().name, 'dev'), concat(variables('clusterName'), '-dataplane'), '')]", //custom nodepool rg for dev only 113 | "dnsPrefix": "[concat(variables('clusterName'),'-dns')]", 114 | "agentPoolProfiles": [ 115 | { 116 | "name": "agentpool", 117 | "mode": "System", 118 | "count": 2, 119 | "osDiskType": "[if(contains(resourceGroup().name, 'dev'), 'Ephemeral', 'Managed')]", //Epheremal only for dev 120 | "vmSize": "Standard_DS12_v2", 121 | "osType": "Linux", 122 | "enableAutoScaling": "[parameters('enableAutoscaler')]", 123 | "minCount": "[parameters('minNodes')]", 124 | "maxCount": "[parameters('maxNodes')]", 125 | "storageProfile": "ManagedDisks", 126 | "vnetSubnetID": "[resourceId('Microsoft.Network/virtualNetworks/subnets', parameters('vnetName'),'aks-subnet')]", 127 | "type": "VirtualMachineScaleSets", 128 | "maxPods": 250 129 | } 130 | ], 131 | "networkProfile": { 132 | "networkPlugin": "azure", 133 | "loadBalancerSku": "[if(contains(resourceGroup().name, 'dev'), 'Standard', 'Basic')]", //Standard only for dev until cluster 134 | "serviceCidr": "[parameters('serviceCidr')]", 135 | "dnsServiceIP": "[parameters('dnsServiceIp')]", 136 | "dockerBridgeCidr": "172.17.0.1/16" 137 | }, 138 | "addonProfiles": { 139 | "httpApplicationRouting": { 140 | "enabled": false 141 | } 142 | }, 143 | "aadProfile": { 144 | "managed": true, 145 | "tenantId": "3aa4a235-b6e2-48d5-9195-7fcf05b459b0", 146 | "adminGroupObjectIDs": [ 147 | "a4f395a4-3aaa-4a34-8b72-977b3e382fab" // Only AZAPPL SDP Tools - Contributor can use kubecontext 148 | ] 149 | }, 150 | "identity": { 151 | "type": "UserAssigned", 152 | "userAssignedIdentites": "[concat('/subscriptions/b18da12e-efa1-4642-8fec-b6580b00212c/resourceGroups/k8s-infrastructure/providers/Microsoft.ManagedIdentity/userAssignedIdentities/', resourceGroup().name, '-aks-mi')]" 153 | } 154 | }, 155 | "tags": {} 156 | } 157 | ], 158 | "outputs": { 159 | "controlPlaneFQDN": { 160 | "type": "string", 161 | "value": "[reference(concat('Microsoft.ContainerService/managedClusters/', variables('clusterName'))).fqdn]" 162 | }, 163 | "vnetSubnetId2": { 164 | "type": "string", 165 | "value": "[reference(resourceId(resourceGroup().name,'Microsoft.ContainerService/managedClusters/', variables('clusterName')), '2019-08-01').agentPoolProfiles[0].vnetSubnetID]" 166 | } 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /arm-templates/base/deploy-k8s-infra.json: -------------------------------------------------------------------------------- 1 | // This is not managed by Github actions per Aug. 2020, as human errors can be critical. Used for reference. 2 | 3 | { 4 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", 5 | "contentVersion": "1.0.0.0", 6 | "parameters": { 7 | "vaults_SDPVault_name": { 8 | "defaultValue": "SDPVault", 9 | "type": "String" 10 | }, 11 | "actionGroups_SDP_Alerts_name": { 12 | "defaultValue": "SDP-Alerts", 13 | "type": "String" 14 | }, 15 | "dnszones_sdpaks_equinor_com_name": { 16 | "defaultValue": "sdpaks.equinor.com", 17 | "type": "String" 18 | }, 19 | "registries_sdpaksCr_name": { 20 | "defaultValue": "sdpaksCr", 21 | "type": "String" 22 | }, 23 | "dnszones_dev_sdpaks_equinor_com_name": { 24 | "defaultValue": "dev.sdpaks.equinor.com", 25 | "type": "String" 26 | }, 27 | "publicIPAddresses_sdpaks_dev_ipaddr_name": { 28 | "defaultValue": "sdpaks-dev-ipaddr", 29 | "type": "String" 30 | }, 31 | "publicIPAddresses_sdpaks_prod_ipaddr_name": { 32 | "defaultValue": "sdpaks-prod-ipaddr", 33 | "type": "String" 34 | } 35 | }, 36 | "variables": {}, 37 | "resources": [ 38 | { 39 | "type": "Microsoft.ContainerRegistry/registries", 40 | "apiVersion": "2019-12-01-preview", 41 | "name": "[parameters('registries_sdpaksCr_name')]", 42 | "location": "northeurope", 43 | "sku": { 44 | "name": "Basic", 45 | "tier": "Basic" 46 | }, 47 | "properties": { 48 | "adminUserEnabled": true, 49 | "policies": { 50 | "quarantinePolicy": { 51 | "status": "disabled" 52 | }, 53 | "trustPolicy": { 54 | "type": "Notary", 55 | "status": "disabled" 56 | }, 57 | "retentionPolicy": { 58 | "days": 7, 59 | "status": "disabled" 60 | } 61 | }, 62 | "dataEndpointEnabled": false, 63 | "publicNetworkAccess": "Enabled" 64 | } 65 | }, 66 | { 67 | "type": "microsoft.insights/actionGroups", 68 | "apiVersion": "2019-03-01", 69 | "name": "[parameters('actionGroups_SDP_Alerts_name')]", 70 | "location": "Global", 71 | "properties": { 72 | "groupShortName": "sdpalerts", 73 | "enabled": true, 74 | "emailReceivers": [ 75 | { 76 | "name": "Notify Audun_-EmailAction-", 77 | "emailAddress": "auls@equinor.com", 78 | "useCommonAlertSchema": false 79 | }, 80 | { 81 | "name": "Notify Stig Oskar_-EmailAction-", 82 | "emailAddress": "stoo@equinor.com", 83 | "useCommonAlertSchema": false 84 | } 85 | ] 86 | } 87 | }, 88 | { 89 | "type": "Microsoft.KeyVault/vaults", 90 | "apiVersion": "2019-09-01", 91 | "name": "[parameters('vaults_SDPVault_name')]", 92 | "location": "norwayeast", 93 | "tags": { 94 | "cluster": "common" 95 | }, 96 | "properties": { 97 | "sku": { 98 | "family": "A", 99 | "name": "Standard" 100 | }, 101 | "tenantId": "3aa4a235-b6e2-48d5-9195-7fcf05b459b0", 102 | "accessPolicies": [ // SDP users listed below. 103 | { 104 | "tenantId": "3aa4a235-b6e2-48d5-9195-7fcf05b459b0", 105 | "objectId": "f9a2ce40-2a8c-4a67-9506-27c18dbf9777", 106 | "permissions": { 107 | "secrets": [ 108 | "List", 109 | "Get", 110 | "Set" 111 | ] 112 | } 113 | }, 114 | { 115 | "tenantId": "3aa4a235-b6e2-48d5-9195-7fcf05b459b0", 116 | "objectId": "0ef942a4-3e5b-4c58-99a6-41c72433e1db", 117 | "permissions": { 118 | "secrets": [ 119 | "Get", 120 | "List", 121 | "Set", 122 | "Delete", 123 | "Recover", 124 | "Backup", 125 | "Restore" 126 | ] 127 | } 128 | }, 129 | { 130 | "tenantId": "3aa4a235-b6e2-48d5-9195-7fcf05b459b0", 131 | "objectId": "d8e3b652-c3f1-4530-af67-4d203b5e6d86", 132 | "permissions": { 133 | "secrets": [ 134 | "Get", 135 | "List", 136 | "Set", 137 | "Delete", 138 | "Recover", 139 | "Backup", 140 | "Restore" 141 | ] 142 | } 143 | } 144 | ], 145 | "enabledForTemplateDeployment": true, 146 | "enableSoftDelete": true 147 | } 148 | }, 149 | { 150 | "type": "Microsoft.Network/dnszones", 151 | "apiVersion": "2018-05-01", 152 | "name": "[parameters('dnszones_dev_sdpaks_equinor_com_name')]", 153 | "location": "global", 154 | "properties": { 155 | "zoneType": "Public" 156 | } 157 | }, 158 | { 159 | "type": "Microsoft.Network/dnszones", 160 | "apiVersion": "2018-05-01", 161 | "name": "[parameters('dnszones_sdpaks_equinor_com_name')]", 162 | "location": "global", 163 | "properties": { 164 | "zoneType": "Public" 165 | } 166 | }, 167 | { 168 | "type": "Microsoft.Network/publicIPAddresses", 169 | "apiVersion": "2020-05-01", 170 | "name": "[parameters('publicIPAddresses_sdpaks_dev_ipaddr_name')]", 171 | "location": "norwayeast", 172 | "sku": { 173 | "name": "Basic" 174 | }, 175 | "properties": { 176 | "ipAddress": "51.120.76.221", 177 | "publicIPAddressVersion": "IPv4", 178 | "publicIPAllocationMethod": "Static", 179 | "idleTimeoutInMinutes": 4, 180 | "ipTags": [] 181 | } 182 | }, 183 | { 184 | "type": "Microsoft.Network/publicIPAddresses", 185 | "apiVersion": "2020-05-01", 186 | "name": "[parameters('publicIPAddresses_sdpaks_prod_ipaddr_name')]", 187 | "location": "norwayeast", 188 | "sku": { 189 | "name": "Basic" 190 | }, 191 | "properties": { 192 | "ipAddress": "51.120.76.229", 193 | "publicIPAddressVersion": "IPv4", 194 | "publicIPAllocationMethod": "Static", 195 | "idleTimeoutInMinutes": 4, 196 | "ipTags": [] 197 | } 198 | } 199 | ] 200 | } -------------------------------------------------------------------------------- /arm-templates/classic/deploy-externalvms.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "location": { 6 | "type": "string", 7 | "defaultValue": "[resourceGroup().location]", 8 | "metadata": { 9 | "description": "Azure region for your VM(s) deployment. This must much with your Virtual Network location." 10 | } 11 | }, 12 | "vmNamePrefix": { 13 | "type": "string", 14 | "maxLength": 4, 15 | "metadata": { 16 | "description": "Prefix for your virtual machines, will be concatenated with a number, e.g. testvm1" 17 | } 18 | }, 19 | "adminUsername": { 20 | "type": "string", 21 | "defaultValue": "sdpteam", 22 | "metadata": { 23 | "description": "Admin username" 24 | } 25 | }, 26 | "sshPublicKeys": { 27 | "type": "securestring", 28 | "metadata": { 29 | "description": "Configure the linux machines with the SSH public keys string. Your key should include three parts, for example 'ssh-rsa AAAAB...snip...UcyupgH azureuser@linuxvm'" 30 | } 31 | }, 32 | "vnetName": { 33 | "type": "string", 34 | "metadata": { 35 | "description": "Name of your existing Virtual Network in your subscription." 36 | } 37 | }, 38 | "subnetName": { 39 | "type": "string", 40 | "metadata": { 41 | "description": "Name of a subnet, where you want your VMs deployed." 42 | } 43 | }, 44 | "netRgName": { 45 | "type": "string", 46 | "metadata": { 47 | "description": "Network Resource Group." 48 | } 49 | } 50 | }, 51 | "variables": { 52 | "subnetRef": "[resourceId(parameters('netRgName'), 'Microsoft.Network/virtualNetworks/subnets', parameters('vnetName'), parameters('subnetName'))]", 53 | "nicNamePrefix": "[concat(parameters('vmNamePrefix'),'-nic')]", 54 | "imageDefinition": { 55 | "imagePublisher": "OpenLogic", 56 | "imageOffer": "CentOS", 57 | "imageSKU": "8_1-gen2" 58 | }, 59 | "nsgName": "S066-NOE-subnet-nsg", 60 | "type1VmCount": 3, 61 | "type2VmCount": 3, 62 | "totalVmCount": "[add(variables('type1VmCount'), variables('type2VmCount'))]", 63 | "vmSizes": { 64 | "type": "array", 65 | "values": [ 66 | "Standard_E2s_v3", 67 | "Standard_B8ms" 68 | ] 69 | } 70 | }, 71 | "resources": [ 72 | { 73 | "apiVersion": "2019-11-01", 74 | "type": "Microsoft.Network/networkInterfaces", 75 | "name": "[concat(variables('nicNamePrefix'), 3, copyindex(1))]", 76 | "location": "[parameters('location')]", 77 | "copy": { 78 | "name": "nicLoop", 79 | "count": "[variables('totalVmCount')]" 80 | }, 81 | "properties": { 82 | "ipConfigurations": [ 83 | { 84 | "name": "ipconfig1", 85 | "properties": { 86 | "privateIPAllocationMethod": "Static", 87 | "privateIPAddress": "[concat('10.73.68.13', copyindex(1))]", 88 | "subnet": { 89 | "id": "[variables('subnetRef')]" 90 | } 91 | } 92 | } 93 | ], 94 | "networkSecurityGroup": { 95 | "id": "[resourceId(parameters('netRgName'),'Microsoft.Network/networkSecurityGroups', variables('nsgName'))]" 96 | 97 | } 98 | } 99 | }, 100 | { 101 | "type": "Microsoft.Compute/virtualMachines/extensions", 102 | "apiVersion": "2019-07-01", 103 | "name": "[concat(parameters('vmNamePrefix'), 3, copyindex(1), '/AzureNetworkWatcherExtension')]", 104 | "copy": { 105 | "name": "vmExtensionLoop", 106 | "count": "[variables('totalVmCount')]" 107 | }, 108 | "location": "norwayeast", 109 | "dependsOn": [ 110 | "[resourceId('Microsoft.Compute/virtualMachines', concat(parameters('vmNamePrefix'), 3, copyindex(1)))]" 111 | ], 112 | "properties": { 113 | "autoUpgradeMinorVersion": true, 114 | "publisher": "Microsoft.Azure.NetworkWatcher", 115 | "type": "NetworkWatcherAgentLinux", 116 | "typeHandlerVersion": "1.4" 117 | } 118 | }, 119 | { // The below disks are per Aug. 2020 not auto-mounted. Follow this guide https://docs.microsoft.com/en-us/azure/virtual-machines/linux/add-disk 120 | "type": "Microsoft.Compute/disks", 121 | "apiVersion": "2019-07-01", 122 | "comments": "1 Disk for vm21 only", 123 | "name": "[concat(parameters('vmNamePrefix'), 3, '1-', 'datadisk0', copyIndex(1))]", 124 | "location": "[parameters('location')]", 125 | "sku": { 126 | "name": "Standard_LRS" 127 | }, 128 | "properties": { 129 | "creationData": { 130 | "createOption": "Empty" 131 | }, 132 | "diskSizeGB": 255 133 | }, 134 | "copy": { 135 | "name": "diskCopy", 136 | "count": 1 137 | } 138 | }, 139 | { 140 | "apiVersion": "2019-07-01", 141 | "type": "Microsoft.Compute/virtualMachines", 142 | "name": "[concat(parameters('vmNamePrefix'), 3, copyindex(1))]", 143 | "copy": { 144 | "name": "virtualMachineLoop", 145 | "count": "[variables('totalVmCount')]" 146 | }, 147 | "tags": { 148 | "TechnicalContact": "gm_sds_rdi@equinor.com", 149 | "serviceType": "external" 150 | }, 151 | "identity": { 152 | "type": "SystemAssigned" 153 | }, 154 | "location": "[parameters('location')]", 155 | "dependsOn": [ 156 | "[concat('Microsoft.Network/networkInterfaces/', variables('nicNamePrefix'), 3, copyindex(1))]" 157 | ], 158 | "properties": { 159 | "hardwareProfile": { // The below line does not scale well with changes. Should be reworked if major changes are required 160 | "vmSize": "[if(equals(copyIndex(1),variables('totalVmCount')), variables('vmSizes').Values[1], variables('vmSizes').values[0])]" 161 | 162 | }, 163 | "osProfile": { 164 | "computerName": "[concat(parameters('vmNamePrefix'), 3, copyIndex(1))]", 165 | "adminUsername": "[parameters('adminUsername')]", 166 | "linuxConfiguration": { 167 | "disablePasswordAuthentication": true, 168 | "ssh": { 169 | "publicKeys": [ 170 | { 171 | "path": "[concat('/home/',parameters('adminUsername'),'/.ssh/authorized_keys')]", 172 | "keyData": "[parameters('sshPublicKeys')]" // Per Aug. 2020 - struggling to get multi-key secrets to work. 173 | } 174 | ] 175 | }, 176 | "provisionVMAgent": true 177 | } 178 | }, 179 | "storageProfile": { 180 | "imageReference": { 181 | "publisher": "[variables('imageDefinition').imagePublisher]", 182 | "offer": "[variables('imageDefinition').imageOffer]", 183 | "sku": "[variables('imageDefinition').imageSKU]", 184 | "version": "latest" 185 | }, 186 | "osDisk": { 187 | "createOption": "FromImage", 188 | "caching": "ReadWrite", 189 | "name": "[concat(parameters('vmNamePrefix'), 3, copyIndex(1), '-osdisk')]", 190 | "managedDisk": { 191 | "storageAccountType": "Standard_LRS" 192 | } 193 | } 194 | }, 195 | "networkProfile": { 196 | "networkInterfaces": [ 197 | { 198 | "id": "[resourceId('Microsoft.Network/networkInterfaces',concat(variables('nicNamePrefix'), 3, copyindex(1)))]" 199 | } 200 | ] 201 | }, 202 | "diagnosticsProfile": { 203 | "bootDiagnostics": { 204 | "enabled": true, 205 | "storageUri": "https://sdpbootdiagnostics.blob.core.windows.net/" 206 | } 207 | } 208 | } 209 | } 210 | ], 211 | "outputs": { 212 | }, 213 | "functions": [ 214 | ] 215 | } 216 | -------------------------------------------------------------------------------- /arm-templates/classic/deploy-internalvms.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "location": { 6 | "type": "string", 7 | "defaultValue": "[resourceGroup().location]", 8 | "metadata": { 9 | "description": "Azure region for your VM(s) deployment. This must much with your Virtual Network location." 10 | } 11 | }, 12 | "vmNamePrefix": { 13 | "type": "string", 14 | "maxLength": 4, 15 | "metadata": { 16 | "description": "Prefix for your virtual machines, will be concatenated with a number, e.g. testvm1" 17 | } 18 | }, 19 | "adminUsername": { 20 | "type": "string", 21 | "defaultValue": "sdpteam", 22 | "metadata": { 23 | "description": "Admin username" 24 | } 25 | }, 26 | "sshPublicKeys": { 27 | "type": "securestring", 28 | "metadata": { 29 | "description": "Configure the linux machines with the SSH public keys string. Your key should include three parts, for example 'ssh-rsa AAAAB...snip...UcyupgH azureuser@linuxvm'" 30 | } 31 | }, 32 | "vnetName": { 33 | "type": "string", 34 | "metadata": { 35 | "description": "Name of your existing Virtual Network in your subscription." 36 | } 37 | }, 38 | "subnetName": { 39 | "type": "string", 40 | "metadata": { 41 | "description": "Name of a subnet, where you want your VMs deployed." 42 | } 43 | }, 44 | "netRgName": { 45 | "type": "string", 46 | "metadata": { 47 | "description": "Network Resource Group." 48 | } 49 | } 50 | }, 51 | "variables": { 52 | "subnetRef": "[resourceId(parameters('netRgName'), 'Microsoft.Network/virtualNetworks/subnets', parameters('vnetName'), parameters('subnetName'))]", 53 | "nicNamePrefix": "[concat(parameters('vmNamePrefix'),'-nic')]", 54 | "imageDefinition": { 55 | "imagePublisher": "OpenLogic", 56 | "imageOffer": "CentOS", 57 | "imageSKU": "8_1-gen2" 58 | }, 59 | "nsgName": "S066-NOE-subnet-nsg", 60 | "type1VmCount": 4, 61 | "type2VmCount": 1, 62 | "totalVmCount": "[add(variables('type1VmCount'), variables('type2VmCount'))]", 63 | "vmSizes": { 64 | "type": "array", 65 | "values": [ 66 | "Standard_B2s", 67 | "Standard_B2ms" 68 | ] 69 | } 70 | }, 71 | "resources": [ 72 | { 73 | "apiVersion": "2019-11-01", 74 | "type": "Microsoft.Network/networkInterfaces", 75 | "name": "[concat(variables('nicNamePrefix'), 0, copyindex(1))]", 76 | "location": "[parameters('location')]", 77 | "copy": { 78 | "name": "nicLoop", 79 | "count": "[variables('totalVmCount')]" 80 | }, 81 | "properties": { 82 | "ipConfigurations": [ 83 | { 84 | "name": "ipconfig1", 85 | "properties": { 86 | "privateIPAllocationMethod": "Static", 87 | "privateIPAddress": "[concat('10.73.68.10', copyindex(1))]", 88 | "subnet": { 89 | "id": "[variables('subnetRef')]" 90 | } 91 | } 92 | } 93 | ], 94 | "networkSecurityGroup": { 95 | "id": "[resourceId(parameters('netRgName'),'Microsoft.Network/networkSecurityGroups', variables('nsgName'))]" 96 | 97 | } 98 | } 99 | }, 100 | { 101 | "type": "Microsoft.Compute/virtualMachines/extensions", 102 | "apiVersion": "2019-07-01", 103 | "name": "[concat(parameters('vmNamePrefix'), 0, copyindex(1), '/AzureNetworkWatcherExtension')]", 104 | "copy": { 105 | "name": "vmExtensionLoop", 106 | "count": "[variables('totalVmCount')]" 107 | }, 108 | "location": "norwayeast", 109 | "dependsOn": [ 110 | "[resourceId('Microsoft.Compute/virtualMachines', concat(parameters('vmNamePrefix'), 0, copyindex(1)))]" 111 | ], 112 | "properties": { 113 | "autoUpgradeMinorVersion": true, 114 | "publisher": "Microsoft.Azure.NetworkWatcher", 115 | "type": "NetworkWatcherAgentLinux", 116 | "typeHandlerVersion": "1.4" 117 | } 118 | }, 119 | { // The below disks are per Aug. 2020 not auto-mounted. Follow this guide https://docs.microsoft.com/en-us/azure/virtual-machines/linux/add-disk 120 | "type": "Microsoft.Compute/disks", 121 | "apiVersion": "2019-07-01", 122 | "comments": "4 Disks for vm05 only", 123 | "name": "[concat(parameters('vmNamePrefix'), 0, '5-', 'datadisk0', copyIndex(1))]", 124 | "location": "[parameters('location')]", 125 | "sku": { 126 | "name":"Standard_LRS" 127 | }, 128 | "properties": { 129 | "creationData": { 130 | "createOption": "Empty" 131 | }, 132 | "diskSizeGB": 1024 133 | }, 134 | "copy": { 135 | "name": "diskCopy", 136 | "count": 4 137 | } 138 | }, 139 | { 140 | "type": "Microsoft.Compute/disks", 141 | "apiVersion": "2019-07-01", 142 | "comments": "1 Disk for vm01 only", 143 | "name": "[concat(parameters('vmNamePrefix'), 0, '1-', 'datadisk0', copyIndex(1))]", 144 | "location": "[parameters('location')]", 145 | "sku": { 146 | "name":"Standard_LRS" 147 | }, 148 | "properties": { 149 | "creationData": { 150 | "createOption": "Empty" 151 | }, 152 | "diskSizeGB": 63 153 | }, 154 | "copy": { 155 | "name": "diskCopy", 156 | "count": 1 157 | } 158 | }, 159 | { 160 | "apiVersion": "2019-07-01", 161 | "type": "Microsoft.Compute/virtualMachines", 162 | "name": "[concat(parameters('vmNamePrefix'), 0, copyindex(1))]", 163 | "copy": { 164 | "name": "virtualMachineLoop", 165 | "count": "[variables('totalVmCount')]" 166 | }, 167 | "tags": { 168 | "TechnicalContact": "gm_sds_rdi@equinor.com", 169 | "serviceType": "internal" 170 | }, 171 | "identity": { 172 | "type": "SystemAssigned" 173 | }, 174 | "location": "[parameters('location')]", 175 | "dependsOn": [ 176 | "[concat('Microsoft.Network/networkInterfaces/', variables('nicNamePrefix'), 0, copyindex(1))]" 177 | ], 178 | "properties": { 179 | "hardwareProfile": { // The below line does not scale well with changes. Should be reworked if major changes are required 180 | "vmSize": "[if(equals(copyIndex(1),variables('totalVmCount')), variables('vmSizes').Values[1], variables('vmSizes').values[0])]" 181 | 182 | }, 183 | "osProfile": { 184 | "computerName": "[concat(parameters('vmNamePrefix'), 0, copyIndex(1))]", 185 | "adminUsername": "[parameters('adminUsername')]", 186 | "linuxConfiguration": { 187 | "disablePasswordAuthentication": true, 188 | "ssh": { 189 | "publicKeys": [ 190 | { 191 | "path": "[concat('/home/',parameters('adminUsername'),'/.ssh/authorized_keys')]", 192 | "keyData": "[parameters('sshPublicKeys')]" // Per Aug. 2020 - struggling to get multi-key secrets to work. 193 | } 194 | ] 195 | }, 196 | "provisionVMAgent": true 197 | } 198 | }, 199 | "storageProfile": { 200 | "imageReference": { 201 | "publisher": "[variables('imageDefinition').imagePublisher]", 202 | "offer": "[variables('imageDefinition').imageOffer]", 203 | "sku": "[variables('imageDefinition').imageSKU]", 204 | "version": "latest" 205 | }, 206 | "osDisk": { 207 | "createOption": "FromImage", 208 | "caching": "ReadWrite", 209 | "name": "[concat(parameters('vmNamePrefix'), 0, copyIndex(1), '-osdisk')]", 210 | "managedDisk": { 211 | "storageAccountType": "Standard_LRS" 212 | } 213 | } 214 | }, 215 | "networkProfile": { 216 | "networkInterfaces": [ 217 | { 218 | "id": "[resourceId('Microsoft.Network/networkInterfaces',concat(variables('nicNamePrefix'), 0, copyindex(1)))]" 219 | } 220 | ] 221 | }, 222 | "diagnosticsProfile": { 223 | "bootDiagnostics": { 224 | "enabled": true, 225 | "storageUri": "https://sdpbootdiagnostics.blob.core.windows.net/" 226 | } 227 | } 228 | } 229 | } 230 | ], 231 | "outputs": { 232 | }, 233 | "functions": [ 234 | ] 235 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2021 Equinor ASA 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /docs/security.md: -------------------------------------------------------------------------------- 1 | # Security considerations 2 | 3 | This document describes some of the security considerations that have gone into SDP's AKS deployment. 4 | We have identified four main levels of concern that are, to a degree, unique to the AKS/Kubernetes technologies: 5 | 6 | - The Azure Portal 7 | - The AKS Nodes 8 | - Kubernetes and Cluster Supporting Services 9 | - Pods and Applications. 10 | 11 | For each of these levels we will describe the main threats, and the most important security measures to remedy these threats. 12 | The CIS(Center for Internet Security) Benchmarks have been used as a guideline for this work. Primarily the "CIS Benchmark for Ubuntu Linux 16.04 LTS", the "CIS Benchmark for Containers", and the "CIS Benchmark for Kubernetes". 13 | 14 | ## Traffic flow 15 | 16 | ![SDP-AKS traffic flow](sdp-aks-traffic.png) 17 | 18 | Flow chart highlighting traffic flow and all access points to the the cluster. 19 | 20 | ## Azure Portal 21 | 22 | ### Threats 23 | 24 | - Accidental deletion/misconfiguration of resources or entire cluster 25 | - Unauthorized access to Azure resources 26 | 27 | ### Measures 28 | 29 | - Own Azure subscription for SDP admins. 30 | - Limit user in the "SDP Tools" subscriptions, or get own subscriptions. 31 | - Equinor security standard for Azure Portal/az CLI login (Equinor organization + Two-factor authentication(2FA)) 32 | - TODO: JustInTime Access to AzureDNS 33 | 34 | ## AKS Nodes 35 | 36 | ### Threats 37 | 38 | - Not hardened from Azure by default 39 | - Not restarted by default to get latest security patches 40 | - Missing minimum security settings/features 41 | - If somebody get access to the VNET the default security is not sufficient 42 | - Big attack surface 43 | 44 | ### Measures 45 | 46 | - Disabled/uninstalled all unnecessary services 47 | - !Confirm: Removed unnecessary users 48 | - The K8s cluster runs in a virtual network protected by a Azure Network Security Group firewall. This firewall only allows ports 80 and 443 inbound. 49 | - Automatic security patches from Azure. 50 | - !TODO: Update manually packages that are not upated by Azure. 51 | - Automatically reboots if nessecary to apply security updates. 52 | - !TODO: local Firewall 53 | - !TODO: Filter and monitor audit logs. Log events that modify date&time, user&groups, AppArmor, login&logout, access rights, unsuccessful authorization, docker files 54 | - Ensure file permissions. Eg. /etc/passwd, /etc/shadow, /etc/crontab 55 | - Restrict traffic between containers (Docker daemon) 56 | - !Investigate: Enforce AppArmor 57 | - !Confirm Ensure kubelet configuration files permissions 58 | 59 | ## Kubernetes and Cluster Supporting Services 60 | 61 | Services: Helm, flux, puppet, github, ark, kured, sealed secrets, azure dns, azure container registry, azure AD 62 | 63 | ### Threats 64 | 65 | - Access to GitHub repo - flux and puppet 66 | - Access to Azure container registry 67 | - Compromised cluster access keys(certs) 68 | - Compromised private key for sealed secrets 69 | - Vulnerabilities in cluster and services 70 | 71 | ### Measures 72 | 73 | - Master nodes and it's services in AKS is fully managed by Azure. 74 | - TODO: Rotate Cluster certs on a regular schedule 75 | - Keep cluster and services up-to-date 76 | - TODO: Get notification when new releases 77 | - TODO: NetworkPolicies 78 | - TODO: Don't allow dangerous runtime options. This include mounting the docker socket, using priviliged flag, --pid=host, --network=host, --device. This can be done with PodSecurityPolicy 79 | - Limit number of users that has access to GitHub repositories. 80 | 81 | ## Pods and Applications 82 | 83 | ### Threats 84 | 85 | - Malicious docker images 86 | - Resource hogging 87 | - Vulnerable applications 88 | 89 | ### Measures 90 | 91 | - TODO: Don't use latest tags. Pin versions on docker images and helm charts. 92 | - TODO: Turn off automated docker images/helm chart annotations. This should be done manually. 93 | - Consider (later): Pod Security (Is this the same as docker run's --security-opt where you can specify an AppArmor profile?) 94 | - TODO: Set resource limits for all pods 95 | - TODO (later): Vulnerability scanned images 96 | - Audit HelmCharts and docker images. 97 | 98 | # BOYH CIS Ubuntu 99 | 100 | Link til doc: https://neprisstore.blob.core.windows.net/sessiondocs/doc_8ac75a77-40a4-4e08-a6c0-93b39b92abd8.pdf 101 | 102 | ## Burde gjøres 103 | 104 | ### Høy 105 | 106 | ### Medium 107 | 108 | - 2.2.15 Ensure mail transfer agent is configured for local-only mode (Scored) 109 | 110 | - 3.6 Firewall Configuration 111 | 112 | - 4.2.4 Ensure permissions on all logfiles are configured (Scored) 113 | 114 | ### Lav 115 | 116 | - 1.1.1 Disable unused filesystems (Scored) 117 | - 1.1.2 Ensure separate partition exist for /tmp (Scored) 118 | - 1.1.21 Disable Automounting (Scored) 119 | 120 | - 1.4.1 Ensure permissions on bootloader config are configured (Scored) 121 | 122 | - 1.5.4 Ensure prelink is disabled (Scored) 123 | 124 | - 1.7.1 Command Line Warning Banners 125 | 126 | - 3.2.2 Ensure ICMP redirects are not accepted (Scored) 127 | - 3.2.3 Ensure secure ICMP redirects are not accepted (Scored) 128 | - 3.2.4 Ensure suspicious packets are logged (Scored) 129 | - 3.2.5 Ensure broadcast ICMP requests are ignored (Scored) 130 | - 3.2.6 Ensure bogus ICMP responses are ignored (Scored) 131 | - 3.2.8 Ensure TCP SYN Cookies is enabled (Scored) 132 | 133 | - 3.3.1 Ensure IPv6 router advertisements are not accepted (Not Scored) 134 | - 3.3.2 Ensure IPv6 redirects are not accepted (Not Scored) 135 | 136 | - 3.5 Uncommon Network Protocols 137 | 138 | - 5.1 Configure cron 139 | 140 | - 5.2 Configure SSH 141 | 142 | - 5.4.2 Ensure system accounts are non-login (Scored) 143 | - 5.4.3 Ensure default group for the root account is GID 0 (Scored) 144 | - 5.4.4 Ensure default user umask is 027 or more restrictive (Scored) 145 | 146 | - 6.2.5 Ensure root is the only UID 0 account (Scored) 147 | - 6.2.6 Ensure root PATH Integrity (Scored) 148 | - 6.2.7 Ensure all users' home directories exist (Scored) 149 | - 6.2.8 Ensure users' home directories permissions are 750 or more restrictive (Scored) 150 | - 6.2.9 Ensure users own their home directories (Scored) 151 | - 6.2.10 Ensure users' dot files are not group or world writable (Scored) 152 | - 6.2.11 Ensure no users have .forward files (Scored) 153 | - 6.2.12 Ensure no users have .netrc files (Scored) 154 | - 6.2.13 Ensure users' .netrc Files are not group or world accessible (Scored) 155 | - 6.2.14 Ensure no users have .rhosts files (Scored) 156 | - 6.2.15 Ensure all groups in /etc/passwd exist in /etc/group (Scored) 157 | 158 | ## Kan gjøres / Bør vurderes 159 | 160 | - 1.1.3 Ensure nodev option set on /tmp partition (Scored) 161 | - 1.1.4 Ensure nosuid option set on /tmp partition (Scored) 162 | - 1.1.10 Ensure separate partition exists for /var/log (Scored) 163 | - 1.1.11 Ensure separate partition exists for /var/log/audit (Scored) 164 | - 1.1.12 Ensure separate partition exists for /home (Scored) 165 | - 1.1.13 Ensure nodev option set on /home partition (Scored) 166 | - 1.1.14 Ensure nodev option set on /dev/shm partition (Scored) 167 | - 1.1.15 Ensure nosuid option set on /dev/shm partition (Scored) 168 | - 1.1.16 Ensure noexec option set on /dev/shm partition (Scored) 169 | - 1.1.20 Ensure sticky bit is set on all world-writable directories (Scored) 170 | 171 | - 1.2.1 Ensure package manager repositories are configured (Not Scored) 172 | - 1.2.2 Ensure GPG keys are configured (Not Scored) 173 | 174 | - 1.4.3 Ensure authentication required for single user mode (Scored) 175 | 176 | - 1.5.1 Ensure core dumps are restricted (Scored) 177 | - 1.5.3 Ensure address space layout randomization (ASLR) is enabled (Scored) 178 | 179 | - 1.6.1 Configure SELinux 180 | - 1.6.2 Configure AppArmor 181 | 182 | - 2.1.10 Ensure xinetd is not enabled (Scored) 183 | 184 | - 2.2.7 Ensure NFS and RPC are not enabled (Scored) 185 | 186 | - 3.1.2 Ensure packet redirect sending is disabled (Scored) 187 | 188 | - 3.2.1 Ensure source routed packets are not accepted (Scored) 189 | - 3.2.7 Ensure Reverse Path Filtering is enabled (Scored) 190 | 191 | - 3.3.3 Ensure IPv6 is disabled (Not Scored) 192 | 193 | - 3.4 TCP Wrappers 194 | 195 | - 3.7 Ensure wireless interfaces are disabled (Not Scored) 196 | 197 | - 4.1.1.2 Ensure system is disabled when audit logs are full (Scored) 198 | - 4.1.1.3 Ensure audit logs are not automatically deleted (Scored) 199 | - 4.1.3 Ensure auditing for processes that start prior to auditd is enabled (Scored) 200 | - 4.1.9 Ensure session initiation information is collected (Scored) 201 | - 4.1.10 Ensure discretionary access control permission modification events are collected (Scored) 202 | - 4.1.13 Ensure successful file system mounts are collected (Scored) 203 | - 4.1.18 Ensure the audit configuration is immutable (Scored) 204 | 205 | - 4.2.1 Configure rsyslog 206 | - 4.2.2 Configure syslog-ng 207 | - 4.2.3 Ensure rsyslog or syslog-ng is installed (Scored) 208 | 209 | - 5.3.1 Ensure password creation requirements are configured (Scored) 210 | - 5.3.2 Ensure lockout for failed password attempts is configured (Not Scored) 211 | - 5.3.3 Ensure password reuse is limited (Scored) 212 | - 5.3.4 Ensure password hashing algorithm is SHA-512 (Scored) 213 | 214 | - 5.4.1 Set Shadow Password Suite Parameters 215 | 216 | - 5.5 Ensure root login is restricted to system console (Not Scored) 217 | 218 | - 5.6 Ensure access to the su command is restricted (Scored) 219 | 220 | - 6.1.1 Audit system file permissions (Not Scored) 221 | - 6.1.10 Ensure no world writable files exist (Scored) 222 | - 6.1.11 Ensure no unowned files or directories exist (Scored) 223 | - 6.1.12 Ensure no ungrouped files or directories exist (Scored) 224 | - 6.1.13 Audit SUID executables (Not Scored) 225 | - 6.1.14 Audit SGID executables (Not Scored) 226 | 227 | - 6.2.1 Ensure password fields are not empty (Scored) 228 | - 6.2.2 Ensure no legacy "+" entries exist in /etc/passwd (Scored) 229 | - 6.2.4 Ensure no legacy "+" entries exist in /etc/group (Scored) 230 | - 6.2.16 Ensure no duplicate UIDs exist (Scored) 231 | - 6.2.17 Ensure no duplicate GIDs exist (Scored) 232 | - 6.2.18 Ensure no duplicate user names exist (Scored) 233 | - 6.2.19 Ensure no duplicate group names exist (Scored) 234 | - 6.2.20 Ensure shadow group is empty (Scored) 235 | 236 | ## Irrelevant / Vanskelig å gjennomføre 237 | 238 | - 1.1.5 Ensure separate partition exists for /var (Scored) 239 | - 1.1.6 Ensure separate partition exists for /var/tmp (Scored) 240 | - 1.1.7 Ensure nodev option set on /var/tmp partition (Scored) 241 | - 1.1.8 Ensure nosuid option set on /var/tmp partition (Scored) 242 | - 1.1.9 Ensure noexec option set on /var/tmp partition (Scored) 243 | - 1.1.17 Ensure nodev option set on removable media partitions (Not Scored) 244 | - 1.1.18 Ensure nosuid option set on removable media partitions (Not Scored) 245 | - 1.1.19 Ensure noexec option set on removable media partitions (Not Scored) 246 | 247 | - 1.4.2 Ensure bootloader password is set (Scored) 248 | 249 | - 1.5.2 Ensure XD/NX support is enabled (Not Scored) 250 | 251 | - 1.7.2 Ensure GDM login banner is configured (Scored) 252 | 253 | - 2.2.1.3 Ensure chrony is configured (Scored) 254 | 255 | - 3.1.1 Ensure IP forwarding is disabled (Scored) 256 | 257 | ## Gjort 258 | 259 | - 1.8 Ensure updates, patches, and additional security software are installed (Not Scored) 260 | - 2.1.1 Ensure chargen services are not enabled (Scored) 261 | - 2.1.2 Ensure daytime services are not enabled (Scored) 262 | - 2.1.3 Ensure discard services are not enabled (Scored) 263 | - 2.1.4 Ensure echo services are not enabled (Scored) 264 | - 2.1.5 Ensure time services are not enabled (Scored) 265 | - 2.1.6 Ensure rsh server is not enabled (Scored) 266 | - 2.1.7 Ensure talk server is not enabled (Scored) 267 | - 2.1.8 Ensure telnet server is not enabled (Scored) 268 | - 2.1.9 Ensure tftp server is not enabled (Scored) 269 | - 2.2.1.1 Ensure time synchronization is in use (Not Scored) 270 | - 2.2.1.2 Ensure ntp is configured (Scored) 271 | - 2.2.2 Ensure X Window System is not installed (Scored) 272 | - 2.2.3 Ensure Avahi Server is not enabled (Scored) 273 | - 2.2.4 Ensure CUPS is not enabled (Scored) 274 | - 2.2.5 Ensure DHCP Server is not enabled (Scored) 275 | - 2.2.6 Ensure LDAP server is not enabled (Scored) 276 | - 2.2.8 Ensure DNS Server is not enabled (Scored) 277 | - 2.2.9 Ensure FTP Server is not enabled (Scored) 278 | - 2.2.10 Ensure HTTP server is not enabled (Scored) 279 | - 2.2.11 Ensure IMAP and POP3 server is not enabled (Scored) 280 | - 2.2.12 Ensure Samba is not enabled (Scored) 281 | - 2.2.13 Ensure HTTP Proxy Server is not enabled (Scored) 282 | - 2.2.14 Ensure SNMP Server is not enabled (Scored) 283 | - 2.2.16 Ensure rsync service is not enabled (Scored) 284 | - 2.2.17 Ensure NIS Server is not enabled (Scored) 285 | - 2.3.1 Ensure NIS Client is not installed (Scored) 286 | - 2.3.2 Ensure rsh client is not installed (Scored) 287 | - 2.3.3 Ensure talk client is not installed (Scored) 288 | - 2.3.4 Ensure telnet client is not installed (Scored) 289 | - 2.3.5 Ensure LDAP client is not installed (Scored) 290 | - 4.1.1.1 Ensure audit log storage size is configured (Not Scored) 291 | - 4.1.2 Ensure auditd service is enabled (Scored) 292 | - 4.1.4 Ensure events that modify date and time information are collected (Scored) 293 | - 4.1.5 Ensure events that modify user/group information are collected (Scored) 294 | - 4.1.6 Ensure events that modify the system's network environment are collected (Scored) 295 | - 4.1.7 Ensure events that modify the system's Mandatory Access Controls are collected (Scored) 296 | - 4.1.8 Ensure login and logout events are collected (Scored) 297 | - 4.1.11 Ensure unsuccessful unauthorized file access attempts are collected (Scored) 298 | - 4.1.12 Ensure use of privileged commands is collected (Scored) 299 | - 4.1.14 Ensure file deletion events by users are collected (Scored) 300 | - 4.1.15 Ensure changes to system administration scope (sudoers) is collected (Scored) 301 | - 4.1.16 Ensure system administrator actions (sudolog) are collected (Scored) 302 | - 4.1.17 Ensure kernel module loading and unloading is collected (Scored) 303 | - 4.3 Ensure logrotate is configured (Not Scored) 304 | - 6.1.2 Ensure permissions on /etc/passwd are configured (Scored) 305 | - 6.1.3 Ensure permissions on /etc/shadow are configured (Scored) 306 | - 6.1.4 Ensure permissions on /etc/group are configured (Scored) 307 | - 6.1.5 Ensure permissions on /etc/gshadow are configured (Scored) 308 | - 6.1.6 Ensure permissions on /etc/passwd- are configured (Scored) 309 | - 6.1.7 Ensure permissions on /etc/shadow- are configured (Scored) 310 | - 6.1.8 Ensure permissions on /etc/group- are configured (Scored) 311 | - 6.1.9 Ensure permissions on /etc/gshadow- are configured (Scored) 312 | --------------------------------------------------------------------------------