├── .github └── workflows │ └── build-simple-orm.yml ├── .gitignore ├── LICENSE ├── README.md ├── datasources.tf ├── iam.tf ├── images ├── deployment_architecture.png ├── oke_registry_info.png └── vault_secret.png ├── main.tf ├── modules ├── airflow │ ├── data.tf │ ├── main.tf │ ├── output.tf │ └── variables.tf ├── bastion │ ├── data.tf │ ├── main.tf │ ├── output.tf │ └── variables.tf ├── fss │ ├── data.tf │ ├── main.tf │ ├── mount.sh │ ├── outputs.tf │ └── variables.tf ├── network │ ├── data.tf │ ├── main.tf │ ├── outputs.tf │ └── variables.tf ├── oci-mysql │ ├── data.tf │ ├── main.tf │ ├── output.tf │ └── variables.tf └── oke │ ├── data.tf │ ├── main.tf │ ├── outputs.tf │ └── variables.tf ├── oci-provider ├── custom │ ├── connection.py │ ├── connection_form.js │ ├── www_rbac_views.py │ └── www_views.py ├── dags │ ├── oci_adb_sql_example.py │ ├── oci_advanced_example.py │ ├── oci_simple_example.py │ ├── oci_smoketest.py │ ├── schedule_dataflow_app.py │ ├── schedule_dataflow_pipeline.py │ ├── schedule_dataflow_with_parameters.py │ └── trigger_dataflow_when_file_exists.py └── plugins │ └── plugins │ ├── hooks │ ├── oci_adb.py │ ├── oci_base.py │ ├── oci_data_catalog.py │ ├── oci_data_flow.py │ └── oci_object_storage.py │ ├── operators │ ├── oci_adb.py │ ├── oci_copy_object_to_adb.py │ ├── oci_data_catalog.py │ ├── oci_data_flow.py │ └── oci_object_storage.py │ └── sensors │ ├── oci_adb.py │ └── oci_object_storage.py ├── outputs.tf ├── schema.yaml ├── userdata ├── Dockerfile ├── cli_config.sh ├── cloudinit.sh ├── create_db.sh ├── deploy_airflow.sh ├── generate_kubeconfig.sh ├── init.sh ├── install_docker.sh ├── install_kubectl.sh ├── install_oci_dag_templates.sh ├── install_oci_plugins.sh ├── is_worker_active.sh ├── push_to_registry.sh └── templates │ ├── airflow.yaml.template │ ├── configmap.yaml.template │ ├── pod_template.yaml │ ├── secrets.yaml.template │ └── volumes.yaml.template ├── variables.tf └── versions.tf /.github/workflows/build-simple-orm.yml: -------------------------------------------------------------------------------- 1 | name: 'simple-orm-stack' 2 | 3 | on: [push, pull_request] 4 | env: 5 | PROJECT_WORKING_DIR: '/build-orm/' 6 | 7 | jobs: 8 | terraform_generate_orm_zip: 9 | name: 'Generate Stack Package' 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v1 13 | - name: 'Terraform Init' 14 | uses: hashicorp/terraform-github-actions@master 15 | with: 16 | tf_actions_version: 0.12.17 17 | tf_actions_subcommand: 'init' 18 | tf_actions_working_dir: ${{ env.PROJECT_WORKING_DIR }} 19 | env: 20 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 21 | - name: 'Terraform Validate' 22 | uses: hashicorp/terraform-github-actions@master 23 | with: 24 | tf_actions_version: 0.12.17 25 | tf_actions_subcommand: 'validate' 26 | tf_actions_working_dir: ${{ env.PROJECT_WORKING_DIR }} 27 | env: 28 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 29 | - name: 'Terraform Apply' 30 | uses: hashicorp/terraform-github-actions@master 31 | with: 32 | tf_actions_version: 0.12.17 33 | tf_actions_subcommand: 'apply' 34 | tf_actions_working_dir: ${{ env.PROJECT_WORKING_DIR }} 35 | args: '-var="save_to"="export-dist"' 36 | env: 37 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 38 | - name: Create Release 39 | id: create_release 40 | uses: actions/create-release@v1 41 | env: 42 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token 43 | with: 44 | tag_name: ${{ github.ref }} 45 | release_name: Release ${{ github.ref }} 46 | body: | 47 | Changes in this Release 48 | - New ORM Stack template ${{ github.ref }} 49 | draft: true 50 | prerelease: true 51 | - name: Upload Release Asset 52 | id: upload-release-asset 53 | uses: actions/upload-release-asset@v1.0.1 54 | env: 55 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 56 | with: 57 | upload_url: ${{ steps.create_release.outputs.upload_url }} 58 | asset_path: ${{ github.workspace }}${{ env.PROJECT_WORKING_DIR }}export-dist/orm.zip 59 | asset_name: orm-stack.zip 60 | asset_content_type: application/zip 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Local .terraform directories 2 | **/.terraform/* 3 | 4 | # .tfstate files 5 | *.tfstate 6 | *.tfstate.* 7 | 8 | # .tfvars files 9 | *.tfvars 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021 Oracle and/or its affiliates. All rights reserved. 2 | 3 | The Universal Permissive License (UPL), Version 1.0 4 | 5 | Subject to the condition set forth below, permission is hereby granted to any person obtaining a copy of this 6 | software, associated documentation and/or data (collectively the "Software"), free of charge and under any and 7 | all copyright rights in the Software, and any and all patent rights owned or freely licensable by each licensor 8 | hereunder covering either (i) the unmodified Software as contributed to or provided by such licensor, or 9 | (ii) the Larger Works (as defined below), to deal in both 10 | 11 | (a) the Software, and 12 | (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if one is included with the Software 13 | (each a “Larger Work” to which the Software is contributed by such licensors), 14 | 15 | without restriction, including without limitation the rights to copy, create derivative works of, display, 16 | perform, and distribute the Software and make, use, sell, offer for sale, import, export, have made, and have 17 | sold the Software and the Larger Work(s), and to sublicense the foregoing rights on either these or other terms. 18 | 19 | This license is subject to the following condition: 20 | The above copyright notice and either this complete permission notice or at a minimum a reference to the UPL must 21 | be included in all copies or substantial portions of the Software. 22 | 23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 24 | THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 26 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 27 | IN THE SOFTWARE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apache Airflow on OCI OKE 2 | This quickstart template deploys [Apache Airflow](https://airflow.apache.org/) on [Oracle Kubernetes Engine (OKE)](https://docs.oracle.com/en-us/iaas/Content/ContEng/Concepts/contengoverview.htm). 3 | 4 | # Pre-Requisites 5 | Airflow on OKE depends on use of [Instance Principals](https://docs.oracle.com/en-us/iaas/Content/Identity/Tasks/callingservicesfrominstances.htm) for DAG execution. You should create a [dynamic group](https://docs.oracle.com/en-us/iaas/Content/Identity/Tasks/managingdynamicgroups.htm) for the compartment where you are deploying your Airflow OKE cluster. In this example, I am using a [Default Tag](https://docs.oracle.com/en-us/iaas/Content/Tagging/Tasks/managingtagdefaults.htm) for all resources in the target compartment to define the Dynamic Group: 6 | 7 | tag.Airflow.InstancePrincipal.value='Enabled' 8 | 9 | After creating the group, you should set specific [IAM policies](https://docs.oracle.com/en-us/iaas/Content/Identity/Reference/policyreference.htm) for OCI services which you want Airflow to integrate with. 10 | 11 | **Due to enforcement of [OSMS](https://docs.oracle.com/en-us/iaas/os-management/osms/osms-getstarted.htm) for compute resources created using an `manage all-resources` policy, you need to specify each service in a separate policy syntax** 12 | 13 | At a minimum, the following policies are required - in this example both the Dynamic Group and target Compartment are "Airflow": 14 | 15 | Allow dynamic-group Airflow to manage cluster-family in compartment Airflow 16 | Allow dynamic-group Airflow to manage secret-family in compartment Airflow 17 | Allow dynamic-group Airflow to manage vaults in compartment Airflow 18 | 19 | The above will allow the Airflow OKE cluster to leverage [KubernetesExecutor](https://airflow.apache.org/docs/apache-airflow/stable/executor/kubernetes.html). This spins up containers in worker pods on-demand for DAG execution, so there is no need for any persistent infrastructure beyond the Bastion host used to access the cluster, and the webserver & scheduler containers in the Airflow pod. 20 | 21 | Common integrations include Object Storage, Autonomous Database, and Data Flow. Here are policy statements for those related services: 22 | 23 | Allow dynamic-group Airflow to manage object-family in compartment Airflow 24 | Allow dynamic-group Airflow to manage autonomous-database-family in compartment Airflow 25 | Allow dynamic-group Airflow to manage dataflow-family in compartment Airflow 26 | 27 | Also required prior to deployment are an [OCI Registry](https://docs.oracle.com/en-us/iaas/Content/Registry/Concepts/registryoverview.htm), [OCI Vault](https://docs.oracle.com/en-us/iaas/Content/KeyManagement/Concepts/keyoverview.htm), [Auth Token](https://docs.oracle.com/en-us/iaas/Content/Identity/Tasks/managingcredentials.htm#create_swift_password), and a [Vault Secret](https://docs.oracle.com/en-us/iaas/Content/KeyManagement/Tasks/managingsecrets.htm) which contains the Auth Token. 28 | 29 | **The OCI registry must be in the tenanacy root and the user account associated with the auth token will need relevant privileges for the repo** 30 | 31 | This policy syntax is as follows, assuming the user is in a group "RepoAccess" 32 | 33 | Allow group RepoAccess to manage repos in tenancy 34 | 35 | You will need to gather the repo name, and user login to access the registry. You will also need to configure the registry field to the region where your registry is deployed. 36 | 37 | ![OKE Registry Settings](images/oke_registry_info.png) 38 | 39 | Note that in this example the registry username uses [Oracle Cloud Identity Service Federation](https://docs.oracle.com/en-us/iaas/Content/Identity/Tasks/federatingIDCS.htm). If you are not using IDCS and using a local account, simply use the local account login (email address). 40 | 41 | The auth token is fetched from OCI Vault Secrets - you will need to capture the secret OCID prior to deployment. 42 | 43 | ![Vault Secret](images/vault_secret.png) 44 | 45 | # Deployment 46 | The main branch of this deployment uses [Oracle Resource Manager](https://docs.oracle.com/en-us/iaas/Content/ResourceManager/Concepts/resourcemanager.htm). The shell branch uses stand-alone Terraform (CLI). 47 | 48 | This template deploys the following: 49 | 50 | * Virtual Cloud Network 51 | * Public (Edge) Subnet 52 | * Private Subnet 53 | * Internet Gateway 54 | * NAT Gateway 55 | * Service Gateway 56 | * Route tables 57 | * Security Lists 58 | * TCP 22 for Edge SSH on public subnet 59 | * Ingress to both subnets from VCN CIDR 60 | * Egress to Internet for both subnets 61 | * Ingress to var.service_port - default 8080 for Airflow UI on loadbalancer 62 | * OCI Virtual Machine Bastion Instance 63 | * OCI MySQL as a Service for Airflow Metadata 64 | * OKE Cluster & loadbalancer 65 | * Webserver container 66 | * Scheduler container 67 | 68 | 69 | Simply click the Deploy to OCI button to create an ORM stack, then walk through the menu driven deployment. Once the stack is created, use the Terraform Actions drop-down menu to Plan, then Apply the stack. 70 | 71 | [![Deploy to Oracle Cloud](https://oci-resourcemanager-plugin.plugins.oci.oraclecloud.com/latest/deploy-to-oracle-cloud.svg)](https://cloud.oracle.com/resourcemanager/stacks/create?region=home&zipUrl=https://github.com/oracle-quickstart/oke-airflow/archive/2.1.3.zip) 72 | 73 | ## Remote-exec vs. CloudInit 74 | This template allows for both remote-exec and CloudInit based deployments. By default it uses remote-exec, which uses the bastion host as an intermediary to run deployment commands using SSH. Alternatively this can be disabled, which allows for CloudInit based deployment. CloudInit packs all deployment commands into the bastion host metadata, and executes at host build time. This permits deployment with zero internet exposure, entirely on private subnet if desired. Disabling the remote-exec option during stack setup will unlock options for CloudInit based deployment. 75 | 76 | ### Logging 77 | Deployment log for CloudInit based deployment can be tracked by logging into the bastion host and executing: 78 | 79 | tail -f /var/log/OCI-airflow-initialize.log 80 | 81 | Remote execution logging is done in terraform output directly. 82 | 83 | ## Deployment Architecture Diagram 84 | ![Deployment Architecture Diagram](images/deployment_architecture.png) 85 | 86 | This diagaram reflects the default deployment architecture. The bastion host is deployed to the public (edge) subnet and used to access the OKE cluster. It is also leveraged in the build process as mentioned above using either remote-execution or CloudInit. 87 | 88 | Build assets are staged in $HOME/airflow for opc user when using remote-exec, or /airflow when using CloudInit. They can be removed or changed after initial stack deployment, but you may want to keep them if you plan to modify/re-deploy. 89 | 90 | ## OKE 91 | Load balancer service is included in the OKE cluster deployment for access to the Airflow Web UI on port 8080. Once the cluster is built you can find the public IP by issuing the command: 92 | 93 | kubectl -n airflow get svc 94 | 95 | You should see similar output: 96 | 97 | NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE 98 | airflow LoadBalancer 10.96.187.11 129.146.222.217 8080:32300/TCP 74s 99 | 100 | The deployment on OKE produces an Airflow pod which has a Webserver and a Scheduler container. DAG execution using KubernetesExcutor will spin up new pods for each worker task, along with associated containers to drive execution. You can see the pods using the following command: 101 | 102 | kubectl -n airflow get pods -o wide 103 | 104 | You should see similar output: 105 | 106 | NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES 107 | airflow-75f45994f5-ll2rz 2/2 Init:0/1 0 76s 10.244.0.17 10.0.2.5 108 | 109 | You can drill into additional detail by issuing the command using the pod name from the previous command: 110 | 111 | kubectl -n airflow describe pod/airflow-75f45994f5-ll2rz 112 | 113 | 114 | ## OCI FSS 115 | OCI File Storage Service is used to provide a fault tolerant, highly available shared filesystem where Airflow DAGS and Log data are stored. This filesystem is automatically mounted on OKE containers in `/opt/airflow/dags` and `/opt/airflow/logs`. 116 | 117 | You may also want to mount this on the Bastion host for ease of access - by default this is only mounted on OKE cluster containers. In this example the FSS export IP is set to 10.0.2.3. 118 | 119 | sudo mkdir -p /opt/airflow/dags 120 | sudo mkdir -p /opt/airflow/logs 121 | sudo mount -o nosuid,resvport 10.0.2.3:/airflow-dags/ /opt/airflow/dags/ 122 | sudo mount -o nosuid,resvport 10.0.2.3:/airflow-logs/ /opt/airflow/logs/ 123 | 124 | ## OCI MySQL 125 | OCI MySQL service is used to store Airflow Metadata. You can configure elements of the MySQL deployment, most importantly is ensuring the internal IP address is in scope with the OKE cluster subnet should you choose to deploy this to an existing VCN/Subnet topology. 126 | 127 | # *Important Note* 128 | It's very important to note that you *must delete the airflow service* before you attempt to execute a Terraform *DESTROY* either via ORM or CLI. To do this, login to the bastion host and execute the following command: 129 | 130 | kubectl -n airflow delete svc airflow 131 | 132 | This will remove the airflow service, and allow for graceful termination of related stack elements upon Terraform destroy. 133 | 134 | -------------------------------------------------------------------------------- /datasources.tf: -------------------------------------------------------------------------------- 1 | # Gets VCN ID 2 | data "oci_core_vcn" "vcn_info" { 3 | vcn_id = var.useExistingVcn ? var.myVcn : module.network.vcn-id 4 | } 5 | 6 | # Gets a list of Availability Domains 7 | data "oci_identity_availability_domains" "ADs" { 8 | compartment_id = var.tenancy_ocid 9 | } 10 | 11 | 12 | locals { 13 | # Helm repos 14 | helm_repository = { 15 | stable = "https://kubernetes-charts.storage.googleapis.com" 16 | ingress-nginx = "https://kubernetes.github.io/ingress-nginx" 17 | jetstack = "https://charts.jetstack.io" # cert-manager 18 | svc-cat = "https://svc-catalog-charts.storage.googleapis.com" # Service Catalog 19 | } 20 | } 21 | 22 | # OCI Services 23 | ## Available Services 24 | data "oci_core_services" "all_services" { 25 | filter { 26 | name = "name" 27 | values = ["All .* Services In Oracle Services Network"] 28 | regex = true 29 | } 30 | } 31 | 32 | ## Object Storage 33 | data "oci_objectstorage_namespace" "ns" { 34 | compartment_id = var.compartment_ocid 35 | } 36 | 37 | # Randoms 38 | resource "random_string" "deploy_id" { 39 | length = 4 40 | special = false 41 | } 42 | 43 | # OEL Image lookup 44 | data "oci_core_images" "oraclelinux7" { 45 | compartment_id = var.compartment_ocid 46 | operating_system = "Oracle Linux" 47 | operating_system_version = "7.9" 48 | filter { 49 | name = "display_name" 50 | values = ["^([a-zA-z]+)-([a-zA-z]+)-([\\.0-9]+)-([\\.0-9-]+)$"] 51 | regex = true 52 | } 53 | } 54 | 55 | locals { 56 | bastion_subnet = var.public_edge_node ? module.network.edge-id : module.network.private-id 57 | } 58 | 59 | -------------------------------------------------------------------------------- /iam.tf: -------------------------------------------------------------------------------- 1 | resource "tls_private_key" "oke_ssh_key" { 2 | algorithm = "RSA" 3 | } 4 | 5 | resource "local_file" "key_file" { 6 | filename = "${path.module}/key.pem" 7 | content = tls_private_key.oke_ssh_key.private_key_pem 8 | 9 | provisioner "local-exec" { 10 | command = "chmod 600 ${path.module}/key.pem" 11 | } 12 | } 13 | 14 | 15 | -------------------------------------------------------------------------------- /images/deployment_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oracle-quickstart/oke-airflow/d3f14c1b4702871fd0736d70cb4d158e0cb3e34a/images/deployment_architecture.png -------------------------------------------------------------------------------- /images/oke_registry_info.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oracle-quickstart/oke-airflow/d3f14c1b4702871fd0736d70cb4d158e0cb3e34a/images/oke_registry_info.png -------------------------------------------------------------------------------- /images/vault_secret.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oracle-quickstart/oke-airflow/d3f14c1b4702871fd0736d70cb4d158e0cb3e34a/images/vault_secret.png -------------------------------------------------------------------------------- /main.tf: -------------------------------------------------------------------------------- 1 | module "network" { 2 | source = "./modules/network" 3 | tenancy_ocid = var.tenancy_ocid 4 | compartment_ocid = var.compartment_ocid 5 | region = var.region 6 | useExistingVcn = var.useExistingVcn 7 | VCN_CIDR = var.VCN_CIDR 8 | edge_cidr = var.edge_cidr 9 | private_cidr = var.private_cidr 10 | vcn_dns_label = var.vcn_dns_label 11 | service_port = var.service_port 12 | custom_vcn = [var.myVcn] 13 | OKESubnet = var.OKESubnet 14 | edgeSubnet = var.edgeSubnet 15 | myVcn = var.myVcn 16 | } 17 | 18 | module "fss" { 19 | source = "./modules/fss" 20 | compartment_ocid = var.compartment_ocid 21 | subnet_id = var.useExistingVcn ? var.OKESubnet : module.network.private-id 22 | availability_domain = var.availability_domain 23 | vcn_cidr = data.oci_core_vcn.vcn_info.cidr_block 24 | } 25 | 26 | module "oci-mysql" { 27 | source = "./modules/oci-mysql" 28 | availability_domain = var.availability_domain 29 | compartment_ocid = var.compartment_ocid 30 | mysqladmin_password = var.mysql_admin_password 31 | mysqladmin_username = var.mysql_admin_username 32 | mysql_shape = var.mysql_shape 33 | enable_mysql_backups = var.enable_backups 34 | oci_mysql_ip = var.private_ip_address 35 | subnet_id = var.useExistingVcn ? var.OKESubnet : module.network.private-id 36 | 37 | } 38 | 39 | module "oke" { 40 | source = "./modules/oke" 41 | create_new_oke_cluster = var.create_new_oke_cluster 42 | existing_oke_cluster_id = var.existing_oke_cluster_id 43 | tenancy_ocid = var.tenancy_ocid 44 | compartment_ocid = var.compartment_ocid 45 | cluster_name = var.cluster_name 46 | kubernetes_version = var.kubernetes_version 47 | airflow_node_pool_name = var.airflow_node_pool_name 48 | airflow_node_pool_shape = var.airflow_node_pool_shape 49 | airflow_node_pool_size = var.airflow_node_pool_size 50 | cluster_options_add_ons_is_kubernetes_dashboard_enabled = var.cluster_options_add_ons_is_kubernetes_dashboard_enabled 51 | cluster_options_admission_controller_options_is_pod_security_policy_enabled = var.cluster_options_admission_controller_options_is_pod_security_policy_enabled 52 | image_id = data.oci_core_images.oraclelinux7.images.0.id 53 | vcn_id = var.useExistingVcn ? var.myVcn : module.network.vcn-id 54 | subnet_id = var.useExistingVcn ? var.OKESubnet : module.network.private-id 55 | lb_subnet_id = module.network.edge-id 56 | ssh_public_key = var.use_remote_exec ? tls_private_key.oke_ssh_key.public_key_openssh : var.ssh_provided_public_key 57 | endpoint_subnet_id = var.useExistingVcn ? var.OKESubnet : module.network.private-id 58 | node_pool_node_shape_config_memory_in_gbs = var.flex_gbs 59 | node_pool_node_shape_config_ocpus = var.flex_ocpu 60 | is_flex_shape = contains(["VM.Standard.E3.Flex", "VM.Standard.E4.Flex", "VM.Optimized3.Flex", "VM.Standard.A1.Flex"], var.airflow_node_pool_shape) 61 | } 62 | 63 | module "bastion" { 64 | depends_on = [module.oke, module.oci-mysql, module.network, module.fss] 65 | source = "./modules/bastion" 66 | user_data = var.use_remote_exec ? base64encode(file("userdata/init.sh")) : base64encode(file("userdata/cloudinit.sh")) 67 | compartment_ocid = var.compartment_ocid 68 | availability_domain = var.availability_domain 69 | image_id = data.oci_core_images.oraclelinux7.images.0.id 70 | instance_shape = var.bastion_shape 71 | instance_name = var.bastion_name 72 | subnet_id = var.useExistingVcn ? var.edgeSubnet : local.bastion_subnet 73 | ssh_public_key = var.use_remote_exec ? tls_private_key.oke_ssh_key.public_key_openssh : var.ssh_provided_public_key 74 | public_edge_node = var.public_edge_node 75 | image_name = var.image_name 76 | image_label = var.image_label 77 | oke_cluster_id = var.create_new_oke_cluster ? module.oke.cluster_id : var.existing_oke_cluster_id 78 | nodepool_id = module.oke.nodepool_id 79 | repo_name = var.repo_name 80 | registry = var.registry 81 | registry_user = var.username 82 | secret_id = var.vault_secret_id 83 | tenancy_ocid = var.tenancy_ocid 84 | admin_db_user = var.mysql_admin_username 85 | admin_db_password = var.mysql_admin_password 86 | airflow_db_user = var.airflow_username 87 | airflow_db_password = var.airflow_password 88 | db_name = var.db_name 89 | db_ip = module.oci-mysql.db_ip 90 | db_port = module.oci-mysql.db_port 91 | namespace = var.airflow_namespace 92 | kube_label = var.kube_label 93 | mount_target_id = module.fss.mount_target_id 94 | nfs_ip = module.fss.nfs_ip 95 | bastion_flex_gbs = var.bastion_flex_gbs 96 | bastion_flex_ocpus = var.bastion_flex_ocpus 97 | is_flex_shape = contains(["VM.Standard.E3.Flex", "VM.Standard.E4.Flex", "VM.Optimized3.Flex", "VM.Standard.A1.Flex"], var.bastion_shape) 98 | } 99 | 100 | module "airflow" { 101 | count = var.use_remote_exec ? 1 : 0 102 | source = "./modules/airflow" 103 | airflow_depends_on = [module.bastion, module.oke, module.oci-mysql, module.network] 104 | compartment_ocid = var.compartment_ocid 105 | tenancy_ocid = var.tenancy_ocid 106 | instance_ip = module.bastion.public_ip 107 | cluster_id = var.create_new_oke_cluster ? module.oke.cluster_id : var.existing_oke_cluster_id 108 | nodepool_id = module.oke.nodepool_id 109 | region = var.region 110 | ssh_public_key = var.use_remote_exec ? tls_private_key.oke_ssh_key.public_key_openssh : var.ssh_provided_public_key 111 | ssh_private_key = tls_private_key.oke_ssh_key.private_key_pem 112 | registry = var.registry 113 | repo_name = var.repo_name 114 | registry_user = var.username 115 | image_name = var.image_name 116 | image_label = var.image_label 117 | secret_id = var.vault_secret_id 118 | namespace = var.airflow_namespace 119 | kube_label = var.kube_label 120 | mount_target_id = module.fss.mount_target_id 121 | nfs_ip = module.fss.nfs_ip 122 | admin_db_user = var.mysql_admin_username 123 | admin_db_password = var.mysql_admin_password 124 | airflow_db_user = var.airflow_username 125 | airflow_db_password = var.airflow_password 126 | db_name = var.db_name 127 | db_ip = module.oci-mysql.db_ip 128 | db_port = module.oci-mysql.db_port 129 | } 130 | -------------------------------------------------------------------------------- /modules/airflow/data.tf: -------------------------------------------------------------------------------- 1 | data "oci_identity_tenancy" "my_tenancy" { 2 | #Required 3 | tenancy_id = var.tenancy_ocid 4 | } 5 | 6 | # Lookup namespace 7 | data "oci_objectstorage_namespace" "lookup" { 8 | compartment_id = var.compartment_ocid 9 | } 10 | -------------------------------------------------------------------------------- /modules/airflow/main.tf: -------------------------------------------------------------------------------- 1 | # OCI CLI Installation 2 | 3 | data "template_file" "install_oci_cli" { 4 | template = file("${path.module}/../../userdata/cli_config.sh") 5 | } 6 | 7 | resource null_resource "install_oci_cli" { 8 | depends_on = [var.airflow_depends_on] 9 | 10 | connection { 11 | host = var.instance_ip 12 | private_key = var.ssh_private_key 13 | timeout = "200s" 14 | type = "ssh" 15 | user = "opc" 16 | } 17 | 18 | provisioner "file" { 19 | content = data.template_file.install_oci_cli.rendered 20 | destination = "~/cli_config.sh" 21 | } 22 | 23 | provisioner "remote-exec" { 24 | inline = [ 25 | "chmod +x $HOME/cli_config.sh", 26 | "bash $HOME/cli_config.sh", 27 | "rm -f $HOME/cli_config.sh", 28 | "rm -f $HOME/install.sh" 29 | ] 30 | } 31 | } 32 | 33 | # Create airflow DB and grant airflow user full access to it 34 | 35 | data "template_file" "create_db" { 36 | template = file("${path.module}/../../userdata/create_db.sh") 37 | vars = { 38 | db_ip = var.db_ip 39 | db_name = var.db_name 40 | admin_db_user = var.admin_db_user 41 | admin_db_password = var.admin_db_password 42 | airflow_db_user = var.airflow_db_user 43 | airflow_db_password = var.admin_db_password 44 | } 45 | } 46 | 47 | resource "null_resource" "create_db" { 48 | depends_on = [var.airflow_depends_on] 49 | connection { 50 | host = var.instance_ip 51 | private_key = var.ssh_private_key 52 | timeout = "40m" 53 | type = "ssh" 54 | user = "opc" 55 | } 56 | 57 | provisioner "remote-exec" { 58 | inline = [ 59 | "mkdir -p $HOME/airflow" 60 | ] 61 | } 62 | 63 | provisioner "file" { 64 | content = data.template_file.create_db.rendered 65 | destination = "~/airflow/create_db.sh" 66 | } 67 | 68 | provisioner "remote-exec" { 69 | inline = [ 70 | "cd $HOME/airflow", 71 | "chmod +x create_db.sh", 72 | "./create_db.sh" 73 | ] 74 | } 75 | } 76 | 77 | # Kubectl 78 | 79 | data "template_file" "install_kubectl" { 80 | template = file("${path.module}/../../userdata/install_kubectl.sh") 81 | } 82 | 83 | resource "null_resource" "install_kubectl" { 84 | depends_on = [null_resource.install_oci_cli] 85 | 86 | connection { 87 | host = var.instance_ip 88 | private_key = var.ssh_private_key 89 | timeout = "40m" 90 | type = "ssh" 91 | user = "opc" 92 | } 93 | 94 | provisioner "file" { 95 | content = data.template_file.install_kubectl.rendered 96 | destination = "~/install_kubectl.sh" 97 | } 98 | 99 | provisioner "remote-exec" { 100 | inline = [ 101 | "chmod +x $HOME/install_kubectl.sh", 102 | "bash $HOME/install_kubectl.sh", 103 | "rm -f $HOME/install_kubectl.sh" 104 | ] 105 | } 106 | } 107 | 108 | # Kubeconfig 109 | 110 | data "template_file" "generate_kubeconfig" { 111 | template = file("${path.module}/../../userdata/generate_kubeconfig.sh") 112 | 113 | vars = { 114 | cluster-id = var.cluster_id 115 | region = var.region 116 | } 117 | } 118 | 119 | resource "null_resource" "write_kubeconfig_on_bastion" { 120 | depends_on = [null_resource.install_oci_cli] 121 | 122 | connection { 123 | host = var.instance_ip 124 | private_key = var.ssh_private_key 125 | timeout = "40m" 126 | type = "ssh" 127 | user = "opc" 128 | } 129 | 130 | provisioner "file" { 131 | content = data.template_file.generate_kubeconfig.rendered 132 | destination = "~/generate_kubeconfig.sh" 133 | } 134 | 135 | provisioner "remote-exec" { 136 | inline = [ 137 | "chmod +x $HOME/generate_kubeconfig.sh", 138 | "$HOME/generate_kubeconfig.sh", 139 | "rm -f $HOME/generate_kubeconfig.sh" 140 | ] 141 | } 142 | } 143 | 144 | # Checking node lifecycle state 145 | 146 | data "template_file" "check_node_lifecycle" { 147 | template = file("${path.module}/../../userdata/is_worker_active.sh") 148 | 149 | vars = { 150 | nodepool-id = var.nodepool_id 151 | } 152 | } 153 | 154 | resource "null_resource" "node_lifecycle" { 155 | depends_on = [null_resource.install_oci_cli] 156 | 157 | connection { 158 | host = var.instance_ip 159 | private_key = var.ssh_private_key 160 | timeout = "40m" 161 | type = "ssh" 162 | user = "opc" 163 | } 164 | 165 | provisioner "file" { 166 | content = data.template_file.check_node_lifecycle.rendered 167 | destination = "~/is_worker_active.sh" 168 | } 169 | 170 | provisioner "remote-exec" { 171 | inline = [ 172 | "chmod +x $HOME/is_worker_active.sh", 173 | "$HOME/is_worker_active.sh", 174 | "rm -f $HOME/is_worker_active.sh" 175 | ] 176 | } 177 | } 178 | 179 | # Build airflow docker image with OCI plugins 180 | 181 | data "template_file" "install_docker" { 182 | template = file("${path.module}/../../userdata/install_docker.sh") 183 | vars = { 184 | user = "opc" 185 | } 186 | } 187 | data "template_file" "Dockerfile" { 188 | template = file("${path.module}/../../userdata/Dockerfile") 189 | } 190 | 191 | data "template_file" "install_oci_plugins" { 192 | template = file("${path.module}/../../userdata/install_oci_plugins.sh") 193 | } 194 | 195 | data "template_file" "install_oci_dag_templates" { 196 | template = file("${path.module}/../../userdata/install_oci_dag_templates.sh") 197 | } 198 | 199 | data "template_file" "pod_template" { 200 | template = file("${path.module}/../../userdata/templates/pod_template.yaml") 201 | } 202 | 203 | resource "null_resource" "build_docker_image" { 204 | 205 | connection { 206 | host = var.instance_ip 207 | private_key = var.ssh_private_key 208 | timeout = "40m" 209 | type = "ssh" 210 | user = "opc" 211 | } 212 | 213 | provisioner "remote-exec" { 214 | inline = [ 215 | "mkdir -p $HOME/airflow" 216 | ] 217 | } 218 | 219 | provisioner "file" { 220 | content = data.template_file.install_docker.rendered 221 | destination = "~/airflow/install_docker.sh" 222 | } 223 | 224 | provisioner "file" { 225 | content = data.template_file.Dockerfile.rendered 226 | destination = "~/airflow/Dockerfile" 227 | } 228 | 229 | provisioner "file" { 230 | content = data.template_file.install_oci_plugins.rendered 231 | destination = "~/airflow/install_oci_plugins.sh" 232 | } 233 | 234 | provisioner "file" { 235 | content = data.template_file.install_oci_dag_templates.rendered 236 | destination = "~/airflow/install_oci_dag_templates.sh" 237 | } 238 | 239 | provisioner "file" { 240 | content = data.template_file.pod_template.rendered 241 | destination = "~/airflow/pod_template.yaml" 242 | } 243 | 244 | provisioner "remote-exec" { 245 | inline = [ 246 | "cd $HOME/airflow", 247 | "chmod +x install_docker.sh", 248 | "./install_docker.sh", 249 | ] 250 | } 251 | 252 | provisioner "remote-exec" { 253 | inline = [ 254 | "cd airflow; docker build -t ${var.image_name}:${var.image_label} ." 255 | ] 256 | } 257 | } 258 | 259 | # Push airflow image to OCI registry 260 | 261 | data "template_file" "push_to_registry" { 262 | template = file("${path.module}/../../userdata/push_to_registry.sh") 263 | vars = { 264 | secret_id = var.secret_id, 265 | registry = var.registry 266 | repo_name = var.repo_name 267 | registry_user = var.registry_user 268 | tenancy_name = data.oci_objectstorage_namespace.lookup.namespace 269 | region = var.region 270 | image_name = var.image_name 271 | image_label = var.image_label 272 | } 273 | } 274 | 275 | resource "null_resource" "push_to_registry" { 276 | depends_on = [null_resource.build_docker_image, null_resource.install_oci_cli] 277 | 278 | connection { 279 | host = var.instance_ip 280 | private_key = var.ssh_private_key 281 | timeout = "40m" 282 | type = "ssh" 283 | user = "opc" 284 | } 285 | 286 | provisioner "file" { 287 | content = data.template_file.push_to_registry.rendered 288 | destination = "~/airflow/push_to_registry.sh" 289 | } 290 | 291 | provisioner "remote-exec" { 292 | inline = [ 293 | "cd $HOME/airflow", 294 | "chmod +x push_to_registry.sh", 295 | "./push_to_registry.sh" 296 | ] 297 | } 298 | } 299 | 300 | 301 | # Deploy airflow containers on OKE 302 | 303 | data "template_file" "deploy_airflow" { 304 | template = file("${path.module}/../../userdata/deploy_airflow.sh") 305 | vars = { 306 | secret_id = var.secret_id 307 | registry = var. registry 308 | repo_name = var.repo_name 309 | registry_user = var.registry_user 310 | tenancy_name = data.oci_objectstorage_namespace.lookup.namespace 311 | region = var.region 312 | image_name = var.image_name 313 | image_label = var.image_label 314 | namespace = var.namespace 315 | mount_target_id = var.mount_target_id 316 | nfs_ip = var.nfs_ip 317 | } 318 | } 319 | 320 | data "template_file" "volumes_template" { 321 | template = file("${path.module}/../../userdata/templates/volumes.yaml.template") 322 | vars = { 323 | MNT_TARGET_ID = var.mount_target_id 324 | NFS_IP = var.nfs_ip 325 | } 326 | } 327 | 328 | data "template_file" "configmap_template" { 329 | template = file("${path.module}/../../userdata/templates/configmap.yaml.template") 330 | vars = { 331 | namespace = var.namespace 332 | registry = var.registry 333 | tenancy_name = data.oci_objectstorage_namespace.lookup.namespace 334 | repo_name = var.repo_name 335 | image_name = var.image_name 336 | image_label = var.image_label 337 | } 338 | } 339 | 340 | data "template_file" "secrets_template" { 341 | template = file("${path.module}/../../userdata/templates/secrets.yaml.template") 342 | vars = { 343 | sql_alchemy_conn = local.sql_alchemy_conn 344 | } 345 | } 346 | 347 | 348 | data "template_file" "airflow_template" { 349 | template = file("${path.module}/../../userdata/templates/airflow.yaml.template") 350 | vars = { 351 | namespace = var.namespace 352 | registry = var.registry 353 | tenancy_name = data.oci_objectstorage_namespace.lookup.namespace 354 | repo_name = var.repo_name 355 | image_name = var.image_name 356 | image_label = var.image_label 357 | } 358 | } 359 | 360 | 361 | resource "null_resource" "deploy_airflow" { 362 | depends_on = [null_resource.push_to_registry, null_resource.node_lifecycle] 363 | 364 | connection { 365 | host = var.instance_ip 366 | private_key = var.ssh_private_key 367 | timeout = "40m" 368 | type = "ssh" 369 | user = "opc" 370 | } 371 | 372 | provisioner "remote-exec" { 373 | inline = [ 374 | "mkdir -p $HOME/airflow/build" 375 | ] 376 | } 377 | 378 | provisioner "file" { 379 | content = data.template_file.volumes_template.rendered 380 | destination = "~/airflow/build/volumes.yaml" 381 | } 382 | 383 | provisioner "file" { 384 | content = data.template_file.configmap_template.rendered 385 | destination = "~/airflow/build/configmap.yaml" 386 | } 387 | 388 | provisioner "file" { 389 | content = data.template_file.airflow_template.rendered 390 | destination = "~/airflow/build/airflow.yaml" 391 | } 392 | 393 | provisioner "file" { 394 | content = data.template_file.secrets_template.rendered 395 | destination = "~/airflow/build/secrets.yaml" 396 | } 397 | 398 | 399 | provisioner "file" { 400 | content = data.template_file.deploy_airflow.rendered 401 | destination = "~/airflow/deploy_airflow.sh" 402 | } 403 | 404 | 405 | provisioner "remote-exec" { 406 | inline = [ 407 | "cd $HOME/airflow", 408 | "chmod +x deploy_airflow.sh", 409 | "./deploy_airflow.sh" 410 | ] 411 | } 412 | } 413 | -------------------------------------------------------------------------------- /modules/airflow/output.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oracle-quickstart/oke-airflow/d3f14c1b4702871fd0736d70cb4d158e0cb3e34a/modules/airflow/output.tf -------------------------------------------------------------------------------- /modules/airflow/variables.tf: -------------------------------------------------------------------------------- 1 | variable airflow_depends_on {} 2 | variable "instance_ip" {} 3 | variable "compartment_ocid" {} 4 | variable "ssh_public_key" {} 5 | variable "ssh_private_key" {} 6 | variable "cluster_id" {} 7 | variable "nodepool_id" {} 8 | variable region {} 9 | variable registry {} 10 | variable repo_name {} 11 | variable registry_user {} 12 | variable image_name {} 13 | variable image_label {} 14 | variable secret_id {} 15 | variable tenancy_ocid {} 16 | variable namespace {} 17 | variable kube_label {} 18 | variable mount_target_id {} 19 | variable nfs_ip {} 20 | variable admin_db_user {} 21 | variable admin_db_password {} 22 | variable airflow_db_user {} 23 | variable airflow_db_password {} 24 | variable db_name {} 25 | variable db_ip {} 26 | variable db_port {} 27 | locals { 28 | sql_alchemy_conn=base64encode("mysql://${var.airflow_db_user}:${var.airflow_db_password}@${var.db_ip}:${var.db_port}/${var.db_name}") 29 | } 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /modules/bastion/data.tf: -------------------------------------------------------------------------------- 1 | data "oci_identity_availability_domains" "ads" { 2 | compartment_id = var.compartment_ocid 3 | } 4 | 5 | locals { 6 | flex_shape = var.is_flex_shape ? [{ memory_in_gbs = var.bastion_flex_gbs, ocpus = var.bastion_flex_ocpus }] : [] 7 | } 8 | -------------------------------------------------------------------------------- /modules/bastion/main.tf: -------------------------------------------------------------------------------- 1 | resource "oci_core_instance" "bastion" { 2 | availability_domain = var.availability_domain 3 | compartment_id = var.compartment_ocid 4 | shape = var.instance_shape 5 | display_name = var.instance_name 6 | 7 | source_details { 8 | source_id = var.image_id 9 | source_type = "image" 10 | } 11 | 12 | create_vnic_details { 13 | subnet_id = var.subnet_id 14 | assign_public_ip = var.public_edge_node 15 | } 16 | 17 | metadata = { 18 | ssh_authorized_keys = var.ssh_public_key 19 | user_data = var.user_data 20 | } 21 | 22 | dynamic "shape_config" { 23 | for_each = local.flex_shape 24 | content { 25 | memory_in_gbs = shape_config.value.memory_in_gbs 26 | ocpus = shape_config.value.ocpus 27 | } 28 | } 29 | 30 | extended_metadata = { 31 | image_name = var.image_name 32 | image_label = var.image_label 33 | oke_cluster_id = var.oke_cluster_id 34 | nodepool_id = var.nodepool_id 35 | repo_name = var.repo_name 36 | registry = var.registry 37 | registry_user = var.registry_user 38 | secret_id = var.secret_id 39 | tenancy_ocid = var.tenancy_ocid 40 | sql_alchemy_conn = local.sql_alchemy_conn 41 | namespace = var.namespace 42 | kube_label = var.kube_label 43 | mount_target_id = var.mount_target_id 44 | nfs_ip = var.nfs_ip 45 | admin_db_user = var.admin_db_user 46 | admin_db_password = base64encode(var.admin_db_password) 47 | db_ip = var.db_ip 48 | db_name = var.db_name 49 | airflow_db_user = var.airflow_db_user 50 | airflow_db_password = base64encode(var.airflow_db_password) 51 | } 52 | } 53 | 54 | -------------------------------------------------------------------------------- /modules/bastion/output.tf: -------------------------------------------------------------------------------- 1 | output "bastion" { 2 | value = oci_core_instance.bastion 3 | } 4 | 5 | locals { 6 | 7 | private_ip = oci_core_instance.bastion.private_ip 8 | 9 | public_ip = oci_core_instance.bastion.public_ip 10 | 11 | instance_id = oci_core_instance.bastion.id 12 | 13 | } 14 | 15 | output "private_ip" { 16 | value = local.private_ip 17 | } 18 | 19 | output "public_ip" { 20 | value = local.public_ip 21 | } 22 | 23 | output "instance_id" { 24 | value = local.instance_id 25 | } 26 | -------------------------------------------------------------------------------- /modules/bastion/variables.tf: -------------------------------------------------------------------------------- 1 | variable "availability_domain" {} 2 | variable "compartment_ocid" {} 3 | variable "subnet_id" {} 4 | variable "instance_name" {} 5 | variable "instance_shape" {} 6 | variable "image_id" {} 7 | variable "public_edge_node" {} 8 | variable "ssh_public_key" {} 9 | variable "image_name" {} 10 | variable "image_label" {} 11 | variable "oke_cluster_id" {} 12 | variable "nodepool_id" {} 13 | variable "repo_name" {} 14 | variable "registry" {} 15 | variable "registry_user" {} 16 | variable "secret_id" {} 17 | variable "tenancy_ocid" {} 18 | variable "admin_db_user" {} 19 | variable "admin_db_password" {} 20 | variable "airflow_db_user" {} 21 | variable "airflow_db_password" {} 22 | variable "db_name" {} 23 | variable "db_ip" {} 24 | variable "db_port" {} 25 | variable "namespace" {} 26 | variable "kube_label" {} 27 | variable "mount_target_id" {} 28 | variable "nfs_ip" {} 29 | variable "user_data" {} 30 | locals { 31 | sql_alchemy_conn=base64encode("mysql://${var.airflow_db_user}:${var.airflow_db_password}@${var.db_ip}:${var.db_port}/${var.db_name}") 32 | } 33 | variable "bastion_flex_gbs" {} 34 | variable "bastion_flex_ocpus" {} 35 | variable "is_flex_shape" {} 36 | -------------------------------------------------------------------------------- /modules/fss/data.tf: -------------------------------------------------------------------------------- 1 | data "oci_core_private_ips" "fss_ip" { 2 | subnet_id = var.subnet_id 3 | 4 | filter { 5 | name = "id" 6 | values = [oci_file_storage_mount_target.airflow_mount_target.private_ip_ids.0] 7 | } 8 | } 9 | 10 | data "oci_identity_availability_domains" "ads" { 11 | compartment_id = var.compartment_ocid 12 | } 13 | 14 | -------------------------------------------------------------------------------- /modules/fss/main.tf: -------------------------------------------------------------------------------- 1 | resource "oci_file_storage_file_system" "airflow_dags" { 2 | availability_domain = var.availability_domain 3 | compartment_id = var.compartment_ocid 4 | display_name = "Airflow Dags" 5 | } 6 | 7 | resource "oci_file_storage_file_system" "airflow_logs" { 8 | availability_domain = var.availability_domain 9 | compartment_id = var.compartment_ocid 10 | display_name = "Airflow Logs" 11 | } 12 | 13 | resource "oci_file_storage_export_set" "airflow_export_set" { 14 | mount_target_id = oci_file_storage_mount_target.airflow_mount_target.id 15 | display_name = "Airflow Dags Export" 16 | } 17 | 18 | resource "oci_file_storage_export" "airflow_export_mount1" { 19 | export_set_id = oci_file_storage_export_set.airflow_export_set.id 20 | file_system_id = oci_file_storage_file_system.airflow_dags.id 21 | path = "/airflow-dags" 22 | 23 | export_options { 24 | source = var.vcn_cidr 25 | access = "READ_WRITE" 26 | identity_squash = "NONE" 27 | require_privileged_source_port = false 28 | } 29 | } 30 | 31 | resource "oci_file_storage_export" "airflow_export_mount2" { 32 | export_set_id = oci_file_storage_export_set.airflow_export_set.id 33 | file_system_id = oci_file_storage_file_system.airflow_logs.id 34 | path = "/airflow-logs" 35 | 36 | export_options { 37 | source = var.vcn_cidr 38 | access = "READ_WRITE" 39 | identity_squash = "NONE" 40 | require_privileged_source_port = false 41 | } 42 | } 43 | 44 | resource "oci_file_storage_mount_target" "airflow_mount_target" { 45 | availability_domain = var.availability_domain 46 | compartment_id = var.compartment_ocid 47 | subnet_id = var.subnet_id 48 | } 49 | 50 | -------------------------------------------------------------------------------- /modules/fss/mount.sh: -------------------------------------------------------------------------------- 1 | "sudo yum -y install nfs-utils > nfs-utils-install.log", 2 | "sudo mkdir -p /mnt/myfsspaths/fs1/path1", 3 | "sudo mount ${local.mount_target_1_ip_address}:${var.export_path_fs1_mt1} /mnt${var.export_path_fs1_mt1}", 4 | -------------------------------------------------------------------------------- /modules/fss/outputs.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | mount_target_id = oci_file_storage_mount_target.airflow_mount_target.id 3 | } 4 | 5 | output "mount_target_id" { 6 | value = local.mount_target_id 7 | } 8 | 9 | output "nfs_ip" { 10 | value = lookup(data.oci_core_private_ips.fss_ip.private_ips[0], "ip_address") 11 | } 12 | -------------------------------------------------------------------------------- /modules/fss/variables.tf: -------------------------------------------------------------------------------- 1 | variable "availability_domain" {} 2 | variable "compartment_ocid" {} 3 | variable "subnet_id" {} 4 | variable "vcn_cidr" {} 5 | -------------------------------------------------------------------------------- /modules/network/data.tf: -------------------------------------------------------------------------------- 1 | data "oci_core_services" "net_services" { 2 | # count = var.useExistingVcn ? 0 : 1 3 | filter { 4 | name = "name" 5 | values = ["All .* Services In Oracle Services Network"] 6 | regex = true 7 | } 8 | } 9 | 10 | 11 | # Randoms 12 | resource "random_string" "deploy_id" { 13 | length = 4 14 | special = false 15 | } 16 | 17 | -------------------------------------------------------------------------------- /modules/network/main.tf: -------------------------------------------------------------------------------- 1 | resource "oci_core_vcn" "airflow_vcn" { 2 | count = var.useExistingVcn ? 0 : 1 3 | cidr_block = var.VCN_CIDR 4 | compartment_id = var.compartment_ocid 5 | display_name = "OKE Airflow VCN - ${random_string.deploy_id.result}" 6 | dns_label = var.vcn_dns_label 7 | } 8 | 9 | resource "oci_core_internet_gateway" "airflow_internet_gateway" { 10 | count = var.useExistingVcn ? 0 : 1 11 | compartment_id = var.compartment_ocid 12 | display_name = "airflow_internet_gateway" 13 | vcn_id = var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.airflow_vcn.0.id 14 | } 15 | 16 | resource "oci_core_nat_gateway" "nat_gateway" { 17 | count = var.useExistingVcn ? 0 : 1 18 | compartment_id = var.compartment_ocid 19 | vcn_id = var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.airflow_vcn.0.id 20 | display_name = "nat_gateway" 21 | } 22 | 23 | resource "oci_core_service_gateway" "airflow_service_gateway" { 24 | count = var.useExistingVcn ? 0 : 1 25 | compartment_id = var.compartment_ocid 26 | services { 27 | service_id = data.oci_core_services.net_services.services[0]["id"] 28 | } 29 | vcn_id = var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.airflow_vcn.0.id 30 | display_name = "Airflow Service Gateway" 31 | } 32 | 33 | resource "oci_core_route_table" "RouteForComplete" { 34 | count = var.useExistingVcn ? 0 : 1 35 | compartment_id = var.compartment_ocid 36 | vcn_id = var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.airflow_vcn.0.id 37 | display_name = "RouteTableForComplete" 38 | 39 | route_rules { 40 | destination = "0.0.0.0/0" 41 | destination_type = "CIDR_BLOCK" 42 | network_entity_id = oci_core_internet_gateway.airflow_internet_gateway.*.id[count.index] 43 | } 44 | } 45 | 46 | resource "oci_core_route_table" "private" { 47 | count = var.useExistingVcn ? 0 : 1 48 | compartment_id = var.compartment_ocid 49 | vcn_id = var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.airflow_vcn.0.id 50 | display_name = "private" 51 | 52 | route_rules { 53 | # destination = var.oci_service_gateway 54 | destination = data.oci_core_services.net_services.services[0]["cidr_block"] 55 | destination_type = "SERVICE_CIDR_BLOCK" 56 | network_entity_id = oci_core_service_gateway.airflow_service_gateway.*.id[count.index] 57 | } 58 | 59 | route_rules { 60 | destination = "0.0.0.0/0" 61 | destination_type = "CIDR_BLOCK" 62 | network_entity_id = oci_core_nat_gateway.nat_gateway.*.id[count.index] 63 | } 64 | } 65 | 66 | resource "oci_core_security_list" "EdgeSubnet" { 67 | count = var.useExistingVcn ? 0 : 1 68 | compartment_id = var.compartment_ocid 69 | display_name = "Edge Subnet" 70 | vcn_id = var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.airflow_vcn.0.id 71 | 72 | egress_security_rules { 73 | destination = "0.0.0.0/0" 74 | protocol = "all" 75 | } 76 | 77 | ingress_security_rules { 78 | tcp_options { 79 | max = 22 80 | min = 22 81 | } 82 | 83 | protocol = "6" 84 | source = "0.0.0.0/0" 85 | } 86 | 87 | ingress_security_rules { 88 | tcp_options { 89 | max = var.service_port 90 | min = var.service_port 91 | } 92 | 93 | protocol = "6" 94 | source = "0.0.0.0/0" 95 | } 96 | 97 | ingress_security_rules { 98 | protocol = "all" 99 | source = var.VCN_CIDR 100 | } 101 | } 102 | 103 | resource "oci_core_security_list" "PrivateSubnet" { 104 | count = var.useExistingVcn ? 0 : 1 105 | compartment_id = var.compartment_ocid 106 | display_name = "Private" 107 | vcn_id = var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.airflow_vcn.0.id 108 | 109 | egress_security_rules { 110 | destination = "0.0.0.0/0" 111 | protocol = "all" 112 | } 113 | # egress_security_rules { 114 | # protocol = "6" 115 | # destination = var.VCN_CIDR 116 | # } 117 | 118 | ingress_security_rules { 119 | protocol = "all" 120 | source = var.VCN_CIDR 121 | } 122 | } 123 | 124 | resource "oci_core_subnet" "edge" { 125 | count = var.useExistingVcn ? 0 : 1 126 | cidr_block = var.edge_cidr 127 | display_name = "edge" 128 | compartment_id = var.compartment_ocid 129 | vcn_id = var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.airflow_vcn.0.id 130 | route_table_id = oci_core_route_table.RouteForComplete[count.index].id 131 | security_list_ids = [oci_core_security_list.EdgeSubnet.*.id[count.index]] 132 | dhcp_options_id = oci_core_vcn.airflow_vcn[count.index].default_dhcp_options_id 133 | dns_label = "edge" 134 | } 135 | 136 | resource "oci_core_subnet" "private" { 137 | count = var.useExistingVcn ? 0 : 1 138 | cidr_block = var.private_cidr 139 | display_name = "private" 140 | compartment_id = var.compartment_ocid 141 | vcn_id = var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.airflow_vcn.0.id 142 | route_table_id = oci_core_route_table.private[count.index].id 143 | security_list_ids = [oci_core_security_list.PrivateSubnet.*.id[count.index]] 144 | dhcp_options_id = oci_core_vcn.airflow_vcn[count.index].default_dhcp_options_id 145 | prohibit_public_ip_on_vnic = "true" 146 | dns_label = "private" 147 | } 148 | -------------------------------------------------------------------------------- /modules/network/outputs.tf: -------------------------------------------------------------------------------- 1 | output "vcn-id" { 2 | value = var.useExistingVcn ? var.myVcn : oci_core_vcn.airflow_vcn.0.id 3 | } 4 | 5 | output "private-id" { 6 | value = var.useExistingVcn ? var.OKESubnet : oci_core_subnet.private.0.id 7 | } 8 | 9 | output "edge-id" { 10 | value = var.useExistingVcn ? var.edgeSubnet : oci_core_subnet.edge.0.id 11 | } 12 | -------------------------------------------------------------------------------- /modules/network/variables.tf: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------------------------------------------------- 2 | # Environmental variables 3 | # You probably want to define these as environmental variables. 4 | # Instructions on that are here: https://github.com/oci-quickstart/oci-prerequisites 5 | # --------------------------------------------------------------------------------------------------------------------- 6 | 7 | variable "tenancy_ocid" {} 8 | variable "compartment_ocid" {} 9 | variable "region" {} 10 | #variable "oci_service_gateway" {} 11 | variable "VCN_CIDR" {} 12 | variable "useExistingVcn" {} 13 | variable "custom_vcn" { 14 | type = list(string) 15 | default = [" "] 16 | } 17 | 18 | variable "vcn_dns_label" {} 19 | 20 | variable "edge_cidr" {} 21 | variable "private_cidr" {} 22 | variable "myVcn" {} 23 | variable "OKESubnet" { 24 | default = " " 25 | } 26 | variable "edgeSubnet" { 27 | default = " " 28 | } 29 | variable service_port {} 30 | -------------------------------------------------------------------------------- /modules/oci-mysql/data.tf: -------------------------------------------------------------------------------- 1 | data "oci_identity_availability_domains" "ads" { 2 | compartment_id = var.compartment_ocid 3 | } 4 | 5 | data "oci_mysql_mysql_db_system" "airflow_database" { 6 | db_system_id = oci_mysql_mysql_db_system.airflow_database.id 7 | } 8 | -------------------------------------------------------------------------------- /modules/oci-mysql/main.tf: -------------------------------------------------------------------------------- 1 | resource "oci_mysql_mysql_db_system" "airflow_database" { 2 | admin_password = var.mysqladmin_password 3 | admin_username = var.mysqladmin_username 4 | availability_domain = var.availability_domain 5 | compartment_id = var.compartment_ocid 6 | shape_name = var.mysql_shape 7 | subnet_id = var.subnet_id 8 | backup_policy { 9 | is_enabled = var.enable_mysql_backups 10 | retention_in_days = "10" 11 | } 12 | description = "Airflow Database" 13 | port = "3306" 14 | port_x = "33306" 15 | data_storage_size_in_gb = 50 16 | ip_address = var.oci_mysql_ip 17 | } 18 | 19 | -------------------------------------------------------------------------------- /modules/oci-mysql/output.tf: -------------------------------------------------------------------------------- 1 | output "db_ip" { 2 | value = data.oci_mysql_mysql_db_system.airflow_database.endpoints[0].ip_address 3 | } 4 | 5 | output "db_port" { 6 | value = data.oci_mysql_mysql_db_system.airflow_database.endpoints[0].port 7 | } 8 | -------------------------------------------------------------------------------- /modules/oci-mysql/variables.tf: -------------------------------------------------------------------------------- 1 | variable "availability_domain" {} 2 | variable "mysqladmin_password" {} 3 | variable "mysqladmin_username" {} 4 | variable "compartment_ocid" {} 5 | variable "mysql_shape" {} 6 | variable "subnet_id" {} 7 | variable "enable_mysql_backups" {} 8 | variable "oci_mysql_ip" {} 9 | -------------------------------------------------------------------------------- /modules/oke/data.tf: -------------------------------------------------------------------------------- 1 | # Gets a list of Availability Domains 2 | data "oci_identity_availability_domains" "ads" { 3 | compartment_id = var.tenancy_ocid 4 | } 5 | 6 | locals { 7 | flex_shape = var.is_flex_shape ? [{ memory_in_gbs = var.node_pool_node_shape_config_memory_in_gbs, ocpus = var.node_pool_node_shape_config_ocpus }] : [] 8 | } 9 | -------------------------------------------------------------------------------- /modules/oke/main.tf: -------------------------------------------------------------------------------- 1 | resource "oci_containerengine_cluster" "oke_airflow_cluster" { 2 | compartment_id = var.compartment_ocid 3 | kubernetes_version = var.kubernetes_version 4 | name = var.cluster_name 5 | vcn_id = var.vcn_id 6 | 7 | endpoint_config { 8 | is_public_ip_enabled = false 9 | # nsg_ids = var.cluster_endpoint_config_nsg_ids 10 | subnet_id = var.endpoint_subnet_id 11 | } 12 | 13 | options { 14 | add_ons { 15 | is_kubernetes_dashboard_enabled = var.cluster_options_add_ons_is_kubernetes_dashboard_enabled 16 | is_tiller_enabled = false # Default is false, left here for reference 17 | } 18 | admission_controller_options { 19 | is_pod_security_policy_enabled = var.cluster_options_admission_controller_options_is_pod_security_policy_enabled 20 | } 21 | service_lb_subnet_ids = [var.lb_subnet_id] 22 | } 23 | 24 | count = var.create_new_oke_cluster ? 1 : 0 25 | } 26 | 27 | resource "oci_containerengine_node_pool" "airflow_node_pool" { 28 | cluster_id = var.create_new_oke_cluster ? oci_containerengine_cluster.oke_airflow_cluster[0].id : var.existing_oke_cluster_id 29 | compartment_id = var.compartment_ocid 30 | kubernetes_version = var.kubernetes_version 31 | name = var.airflow_node_pool_name 32 | node_shape = var.airflow_node_pool_shape 33 | ssh_public_key = var.ssh_public_key 34 | 35 | node_config_details { 36 | dynamic "placement_configs" { 37 | for_each = data.oci_identity_availability_domains.ads.availability_domains 38 | 39 | content { 40 | availability_domain = placement_configs.value.name 41 | subnet_id = var.subnet_id 42 | } 43 | } 44 | size = var.airflow_node_pool_size 45 | } 46 | 47 | dynamic "node_shape_config" { 48 | for_each = local.flex_shape 49 | content { 50 | memory_in_gbs = node_shape_config.value.memory_in_gbs 51 | ocpus = node_shape_config.value.ocpus 52 | } 53 | } 54 | 55 | node_source_details { 56 | source_type = "IMAGE" 57 | image_id = var.image_id 58 | } 59 | 60 | initial_node_labels { 61 | key = "name" 62 | value = var.airflow_node_pool_name 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /modules/oke/outputs.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | cluster_id = var.create_new_oke_cluster ? oci_containerengine_cluster.oke_airflow_cluster[0].id : var.existing_oke_cluster_id 3 | nodepool_id = oci_containerengine_node_pool.airflow_node_pool.id 4 | } 5 | 6 | 7 | output "cluster_id" { 8 | value = local.cluster_id 9 | } 10 | 11 | output "nodepool_id" { 12 | value = local.nodepool_id 13 | } 14 | 15 | -------------------------------------------------------------------------------- /modules/oke/variables.tf: -------------------------------------------------------------------------------- 1 | variable "tenancy_ocid" {} 2 | variable "compartment_ocid" {} 3 | variable "vcn_id" {} 4 | variable "subnet_id" {} 5 | variable "lb_subnet_id" {} 6 | variable "cluster_name" {} 7 | variable "kubernetes_version" {} 8 | variable "airflow_node_pool_name" {} 9 | variable "airflow_node_pool_shape" {} 10 | variable "airflow_node_pool_size" {} 11 | variable "cluster_options_add_ons_is_kubernetes_dashboard_enabled" {} 12 | variable "cluster_options_admission_controller_options_is_pod_security_policy_enabled" {} 13 | variable "image_id" {} 14 | variable "ssh_public_key" {} 15 | variable "create_new_oke_cluster" {} 16 | variable "existing_oke_cluster_id" {} 17 | variable "endpoint_subnet_id" {} 18 | variable "node_pool_node_shape_config_memory_in_gbs" {} 19 | variable "node_pool_node_shape_config_ocpus" {} 20 | variable "is_flex_shape" {} 21 | -------------------------------------------------------------------------------- /oci-provider/custom/connection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | import json 21 | from builtins import bytes 22 | from urllib.parse import parse_qsl, quote, unquote, urlencode, urlparse 23 | 24 | from sqlalchemy import Column, Integer, String, Boolean 25 | from sqlalchemy.ext.declarative import declared_attr 26 | from sqlalchemy.orm import synonym 27 | 28 | from airflow import LoggingMixin 29 | from airflow.exceptions import AirflowException 30 | from airflow.models.base import Base, ID_LEN 31 | from airflow.models.crypto import get_fernet 32 | 33 | 34 | # Python automatically converts all letters to lowercase in hostname 35 | # See: https://issues.apache.org/jira/browse/AIRFLOW-3615 36 | def parse_netloc_to_hostname(uri_parts): 37 | hostname = unquote(uri_parts.hostname or '') 38 | if '/' in hostname: 39 | hostname = uri_parts.netloc 40 | if "@" in hostname: 41 | hostname = hostname.rsplit("@", 1)[1] 42 | if ":" in hostname: 43 | hostname = hostname.split(":", 1)[0] 44 | hostname = unquote(hostname) 45 | return hostname 46 | 47 | 48 | class Connection(Base, LoggingMixin): 49 | """ 50 | Placeholder to store information about different database instances 51 | connection information. The idea here is that scripts use references to 52 | database instances (conn_id) instead of hard coding hostname, logins and 53 | passwords when using operators or hooks. 54 | """ 55 | __tablename__ = "connection" 56 | 57 | id = Column(Integer(), primary_key=True) 58 | conn_id = Column(String(ID_LEN)) 59 | conn_type = Column(String(500)) 60 | host = Column(String(500)) 61 | schema = Column(String(500)) 62 | login = Column(String(500)) 63 | _password = Column('password', String(5000)) 64 | port = Column(Integer()) 65 | is_encrypted = Column(Boolean, unique=False, default=False) 66 | is_extra_encrypted = Column(Boolean, unique=False, default=False) 67 | _extra = Column('extra', String(5000)) 68 | 69 | _types = [ 70 | ('docker', 'Docker Registry',), 71 | ('fs', 'File (path)'), 72 | ('ftp', 'FTP',), 73 | ('google_cloud_platform', 'Google Cloud Platform'), 74 | ('hdfs', 'HDFS',), 75 | ('http', 'HTTP',), 76 | ('pig_cli', 'Pig Client Wrapper',), 77 | ('hive_cli', 'Hive Client Wrapper',), 78 | ('hive_metastore', 'Hive Metastore Thrift',), 79 | ('hiveserver2', 'Hive Server 2 Thrift',), 80 | ('jdbc', 'Jdbc Connection',), 81 | ('jenkins', 'Jenkins'), 82 | ('mysql', 'MySQL',), 83 | ('postgres', 'Postgres',), 84 | ('oci', 'Oracle Cloud Infrastructure',), 85 | ('oracle', 'Oracle',), 86 | ('vertica', 'Vertica',), 87 | ('presto', 'Presto',), 88 | ('s3', 'S3',), 89 | ('samba', 'Samba',), 90 | ('sqlite', 'Sqlite',), 91 | ('ssh', 'SSH',), 92 | ('cloudant', 'IBM Cloudant',), 93 | ('mssql', 'Microsoft SQL Server'), 94 | ('mesos_framework-id', 'Mesos Framework ID'), 95 | ('jira', 'JIRA',), 96 | ('redis', 'Redis',), 97 | ('wasb', 'Azure Blob Storage'), 98 | ('databricks', 'Databricks',), 99 | ('aws', 'Amazon Web Services',), 100 | ('emr', 'Elastic MapReduce',), 101 | ('snowflake', 'Snowflake',), 102 | ('segment', 'Segment',), 103 | ('azure_data_lake', 'Azure Data Lake'), 104 | ('azure_container_instances', 'Azure Container Instances'), 105 | ('azure_cosmos', 'Azure CosmosDB'), 106 | ('cassandra', 'Cassandra',), 107 | ('qubole', 'Qubole'), 108 | ('mongo', 'MongoDB'), 109 | ('gcpcloudsql', 'Google Cloud SQL'), 110 | ('grpc', 'GRPC Connection'), 111 | ('yandexcloud', 'Yandex Cloud'), 112 | ('spark', 'Spark'), 113 | ] 114 | 115 | def __init__( 116 | self, conn_id=None, conn_type=None, 117 | host=None, login=None, password=None, 118 | schema=None, port=None, extra=None, 119 | uri=None): 120 | self.conn_id = conn_id 121 | if uri: 122 | self.parse_from_uri(uri) 123 | else: 124 | self.conn_type = conn_type 125 | self.host = host 126 | self.login = login 127 | self.password = password 128 | self.schema = schema 129 | self.port = port 130 | self.extra = extra 131 | 132 | def parse_from_uri(self, uri): 133 | uri_parts = urlparse(uri) 134 | conn_type = uri_parts.scheme 135 | if conn_type == 'postgresql': 136 | conn_type = 'postgres' 137 | elif '-' in conn_type: 138 | conn_type = conn_type.replace('-', '_') 139 | self.conn_type = conn_type 140 | self.host = parse_netloc_to_hostname(uri_parts) 141 | quoted_schema = uri_parts.path[1:] 142 | self.schema = unquote(quoted_schema) if quoted_schema else quoted_schema 143 | self.login = unquote(uri_parts.username) \ 144 | if uri_parts.username else uri_parts.username 145 | self.password = unquote(uri_parts.password) \ 146 | if uri_parts.password else uri_parts.password 147 | self.port = uri_parts.port 148 | if uri_parts.query: 149 | self.extra = json.dumps(dict(parse_qsl(uri_parts.query, keep_blank_values=True))) 150 | 151 | def get_uri(self): 152 | uri = '{}://'.format(str(self.conn_type).lower().replace('_', '-')) 153 | 154 | authority_block = '' 155 | if self.login is not None: 156 | authority_block += quote(self.login, safe='') 157 | 158 | if self.password is not None: 159 | authority_block += ':' + quote(self.password, safe='') 160 | 161 | if authority_block != '': 162 | authority_block += '@' 163 | 164 | uri += authority_block 165 | 166 | host_block = '' 167 | if self.host: 168 | host_block += quote(self.host, safe='') 169 | 170 | if self.port: 171 | if host_block != '': 172 | host_block += ':{}'.format(self.port) 173 | else: 174 | host_block += '@:{}'.format(self.port) 175 | 176 | if self.schema: 177 | host_block += '/{}'.format(quote(self.schema, safe='')) 178 | 179 | uri += host_block 180 | 181 | if self.extra_dejson: 182 | uri += '?{}'.format(urlencode(self.extra_dejson)) 183 | 184 | return uri 185 | 186 | def get_password(self): 187 | if self._password and self.is_encrypted: 188 | fernet = get_fernet() 189 | if not fernet.is_encrypted: 190 | raise AirflowException( 191 | "Can't decrypt encrypted password for login={}, \ 192 | FERNET_KEY configuration is missing".format(self.login)) 193 | return fernet.decrypt(bytes(self._password, 'utf-8')).decode() 194 | else: 195 | return self._password 196 | 197 | def set_password(self, value): 198 | if value: 199 | fernet = get_fernet() 200 | self._password = fernet.encrypt(bytes(value, 'utf-8')).decode() 201 | self.is_encrypted = fernet.is_encrypted 202 | 203 | @declared_attr 204 | def password(cls): 205 | return synonym('_password', 206 | descriptor=property(cls.get_password, cls.set_password)) 207 | 208 | def get_extra(self): 209 | if self._extra and self.is_extra_encrypted: 210 | fernet = get_fernet() 211 | if not fernet.is_encrypted: 212 | raise AirflowException( 213 | "Can't decrypt `extra` params for login={},\ 214 | FERNET_KEY configuration is missing".format(self.login)) 215 | return fernet.decrypt(bytes(self._extra, 'utf-8')).decode() 216 | else: 217 | return self._extra 218 | 219 | def set_extra(self, value): 220 | if value: 221 | fernet = get_fernet() 222 | self._extra = fernet.encrypt(bytes(value, 'utf-8')).decode() 223 | self.is_extra_encrypted = fernet.is_encrypted 224 | else: 225 | self._extra = value 226 | self.is_extra_encrypted = False 227 | 228 | @declared_attr 229 | def extra(cls): 230 | return synonym('_extra', 231 | descriptor=property(cls.get_extra, cls.set_extra)) 232 | 233 | def rotate_fernet_key(self): 234 | fernet = get_fernet() 235 | if self._password and self.is_encrypted: 236 | self._password = fernet.rotate(self._password.encode('utf-8')).decode() 237 | if self._extra and self.is_extra_encrypted: 238 | self._extra = fernet.rotate(self._extra.encode('utf-8')).decode() 239 | 240 | def get_hook(self): 241 | if self.conn_type == 'mysql': 242 | from airflow.hooks.mysql_hook import MySqlHook 243 | return MySqlHook(mysql_conn_id=self.conn_id) 244 | elif self.conn_type == 'google_cloud_platform': 245 | from airflow.contrib.hooks.bigquery_hook import BigQueryHook 246 | return BigQueryHook(bigquery_conn_id=self.conn_id) 247 | elif self.conn_type == 'postgres': 248 | from airflow.hooks.postgres_hook import PostgresHook 249 | return PostgresHook(postgres_conn_id=self.conn_id) 250 | elif self.conn_type == 'pig_cli': 251 | from airflow.hooks.pig_hook import PigCliHook 252 | return PigCliHook(pig_cli_conn_id=self.conn_id) 253 | elif self.conn_type == 'hive_cli': 254 | from airflow.hooks.hive_hooks import HiveCliHook 255 | return HiveCliHook(hive_cli_conn_id=self.conn_id) 256 | elif self.conn_type == 'presto': 257 | from airflow.hooks.presto_hook import PrestoHook 258 | return PrestoHook(presto_conn_id=self.conn_id) 259 | elif self.conn_type == 'hiveserver2': 260 | from airflow.hooks.hive_hooks import HiveServer2Hook 261 | return HiveServer2Hook(hiveserver2_conn_id=self.conn_id) 262 | elif self.conn_type == 'sqlite': 263 | from airflow.hooks.sqlite_hook import SqliteHook 264 | return SqliteHook(sqlite_conn_id=self.conn_id) 265 | elif self.conn_type == 'jdbc': 266 | from airflow.hooks.jdbc_hook import JdbcHook 267 | return JdbcHook(jdbc_conn_id=self.conn_id) 268 | elif self.conn_type == 'mssql': 269 | from airflow.hooks.mssql_hook import MsSqlHook 270 | return MsSqlHook(mssql_conn_id=self.conn_id) 271 | elif self.conn_type == 'oci': 272 | from hooks.oci_base import OCIBaseHook 273 | return OCIBaseHook(oci_conn_id=self.conn_id) 274 | elif self.conn_type == 'oracle': 275 | from airflow.hooks.oracle_hook import OracleHook 276 | return OracleHook(oracle_conn_id=self.conn_id) 277 | elif self.conn_type == 'vertica': 278 | from airflow.contrib.hooks.vertica_hook import VerticaHook 279 | return VerticaHook(vertica_conn_id=self.conn_id) 280 | elif self.conn_type == 'cloudant': 281 | from airflow.contrib.hooks.cloudant_hook import CloudantHook 282 | return CloudantHook(cloudant_conn_id=self.conn_id) 283 | elif self.conn_type == 'jira': 284 | from airflow.contrib.hooks.jira_hook import JiraHook 285 | return JiraHook(jira_conn_id=self.conn_id) 286 | elif self.conn_type == 'redis': 287 | from airflow.contrib.hooks.redis_hook import RedisHook 288 | return RedisHook(redis_conn_id=self.conn_id) 289 | elif self.conn_type == 'wasb': 290 | from airflow.contrib.hooks.wasb_hook import WasbHook 291 | return WasbHook(wasb_conn_id=self.conn_id) 292 | elif self.conn_type == 'docker': 293 | from airflow.hooks.docker_hook import DockerHook 294 | return DockerHook(docker_conn_id=self.conn_id) 295 | elif self.conn_type == 'azure_data_lake': 296 | from airflow.contrib.hooks.azure_data_lake_hook import AzureDataLakeHook 297 | return AzureDataLakeHook(azure_data_lake_conn_id=self.conn_id) 298 | elif self.conn_type == 'azure_cosmos': 299 | from airflow.contrib.hooks.azure_cosmos_hook import AzureCosmosDBHook 300 | return AzureCosmosDBHook(azure_cosmos_conn_id=self.conn_id) 301 | elif self.conn_type == 'cassandra': 302 | from airflow.contrib.hooks.cassandra_hook import CassandraHook 303 | return CassandraHook(cassandra_conn_id=self.conn_id) 304 | elif self.conn_type == 'mongo': 305 | from airflow.contrib.hooks.mongo_hook import MongoHook 306 | return MongoHook(conn_id=self.conn_id) 307 | elif self.conn_type == 'gcpcloudsql': 308 | from airflow.contrib.hooks.gcp_sql_hook import CloudSqlDatabaseHook 309 | return CloudSqlDatabaseHook(gcp_cloudsql_conn_id=self.conn_id) 310 | elif self.conn_type == 'grpc': 311 | from airflow.contrib.hooks.grpc_hook import GrpcHook 312 | return GrpcHook(grpc_conn_id=self.conn_id) 313 | raise AirflowException("Unknown hook type {}".format(self.conn_type)) 314 | 315 | def __repr__(self): 316 | return self.conn_id 317 | 318 | def log_info(self): 319 | return ("id: {}. Host: {}, Port: {}, Schema: {}, " 320 | "Login: {}, Password: {}, extra: {}". 321 | format(self.conn_id, 322 | self.host, 323 | self.port, 324 | self.schema, 325 | self.login, 326 | "XXXXXXXX" if self.password else None, 327 | "XXXXXXXX" if self.extra_dejson else None)) 328 | 329 | def debug_info(self): 330 | return ("id: {}. Host: {}, Port: {}, Schema: {}, " 331 | "Login: {}, Password: {}, extra: {}". 332 | format(self.conn_id, 333 | self.host, 334 | self.port, 335 | self.schema, 336 | self.login, 337 | "XXXXXXXX" if self.password else None, 338 | self.extra_dejson)) 339 | 340 | @property 341 | def extra_dejson(self): 342 | """Returns the extra property by deserializing json.""" 343 | obj = {} 344 | if self.extra: 345 | try: 346 | obj = json.loads(self.extra) 347 | except Exception as e: 348 | self.log.exception(e) 349 | self.log.error("Failed parsing the json for conn_id %s", self.conn_id) 350 | 351 | return obj 352 | -------------------------------------------------------------------------------- /oci-provider/custom/connection_form.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * Created by janomar on 23/07/15. 22 | */ 23 | 24 | $(document).ready(function() { 25 | var config = { 26 | jdbc: { 27 | hidden_fields: ['port', 'schema', 'extra'], 28 | relabeling: {'host': 'Connection URL'}, 29 | }, 30 | google_cloud_platform: { 31 | hidden_fields: ['host', 'schema', 'login', 'password', 'port', 'extra'], 32 | relabeling: {}, 33 | }, 34 | cloudant: { 35 | hidden_fields: ['port', 'extra'], 36 | relabeling: { 37 | 'host': 'Account', 38 | 'login': 'Username (or API Key)', 39 | 'schema': 'Database' 40 | } 41 | }, 42 | jenkins: { 43 | hidden_fields: ['schema'], 44 | relabeling: { 45 | 'login': 'Username', 46 | 'password': 'API token or password', 47 | 'extra': 'Use https (true/false, default false)' 48 | } 49 | }, 50 | docker: { 51 | hidden_fields: ['port', 'schema'], 52 | relabeling: { 53 | 'host': 'Registry URL', 54 | 'login': 'Username', 55 | } 56 | }, 57 | oci: { 58 | hidden_fields: ['host', 'schema', 'password', 'port', 'extra'], 59 | relabeling: { 60 | 'login': 'User OCID'}, 61 | }, 62 | qubole: { 63 | hidden_fields: ['login', 'schema', 'port', 'extra'], 64 | relabeling: { 65 | 'host': 'API Endpoint', 66 | 'password': 'Auth Token', 67 | }, 68 | placeholders: { 69 | 'host': 'https://.qubole.com/api' 70 | } 71 | }, 72 | ssh: { 73 | hidden_fields: ['schema'], 74 | relabeling: { 75 | 'login': 'Username', 76 | } 77 | }, 78 | yandexcloud: { 79 | hidden_fields: ['host', 'schema', 'login', 'password', 'port', 'extra'], 80 | relabeling: {}, 81 | }, 82 | spark: { 83 | hidden_fields: ['schema', 'login', 'password'], 84 | relabeling: {}, 85 | }, 86 | } 87 | function connTypeChange(connectionType) { 88 | $("div.form-group").removeClass("hide"); 89 | $.each($("[id^='extra__']"), function() { 90 | $(this).parent().parent().addClass('hide') 91 | }); 92 | // Somehow the previous command doesn't honor __ 93 | $("#extra").parent().parent().removeClass('hide') 94 | $.each($("[id^='extra__"+connectionType+"']"), function() { 95 | $(this).parent().parent().removeClass('hide') 96 | }); 97 | $("label[orig_text]").each(function(){ 98 | $(this).text($(this).attr("orig_text")); 99 | }); 100 | $(".form-control").each(function(){$(this).attr('placeholder', '')}); 101 | 102 | if (config[connectionType] != undefined){ 103 | $.each(config[connectionType].hidden_fields, function(i, field){ 104 | $("#" + field).parent().parent().addClass('hide') 105 | }); 106 | $.each(config[connectionType].relabeling, function(k, v){ 107 | lbl = $("label[for='" + k + "']") 108 | lbl.attr("orig_text", lbl.text()); 109 | $("label[for='" + k + "']").text(v); 110 | }); 111 | $.each(config[connectionType].placeholders, function(k, v){ 112 | $("#" + k).attr('placeholder', v); 113 | }); 114 | } 115 | } 116 | var connectionType=$("#conn_type").val(); 117 | $("#conn_type").on('change', function(e) { 118 | connectionType = $("#conn_type").val(); 119 | connTypeChange(connectionType); 120 | }); 121 | connTypeChange(connectionType); 122 | }); 123 | -------------------------------------------------------------------------------- /oci-provider/dags/oci_adb_sql_example.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from airflow import DAG 3 | from sys import modules 4 | from operators.oci_adb import OCIDBOperator 5 | 6 | default_args = {'owner': 'airflow', 7 | 'start_date': datetime(2020, 5, 26), 8 | 'email': ['your.email@somecompany.com'], 9 | 'email_on_failure': False, 10 | 'email_on_retry': False 11 | } 12 | 13 | dag = DAG('oci_adb_sql_example', 14 | default_args=default_args, 15 | schedule_interval='@hourly', 16 | catchup=False 17 | ) 18 | 19 | oci_conn_id = "oci_default" 20 | bucketname = "BUCKET_NAME" 21 | db_name = "DATABASE_NAME" 22 | compartment_ocid = "COMPARTMENT OCID" 23 | db_workload = "DW" 24 | tns_admin_root = "/path/to/tns_admin/" 25 | user_id = "DATABASE_USER" 26 | password = "DATABASE_PASSWORD" 27 | drop_table = """ 28 | BEGIN 29 | EXECUTE IMMEDIATE 'DROP TABLE python_modules'; 30 | EXCEPTION 31 | WHEN OTHERS THEN 32 | IF SQLCODE != -942 THEN 33 | RAISE; 34 | END IF; 35 | END; 36 | """ 37 | create_table = """ 38 | CREATE TABLE python_modules ( 39 | module_name VARCHAR2(100) NOT NULL, 40 | file_path VARCHAR2(300) NOT NULL 41 | ) 42 | """ 43 | many_sql_data = [] 44 | for m_name, m_info in modules.items(): 45 | try: 46 | many_sql_data.append((m_name, m_info.__file__)) 47 | except AttributeError: 48 | pass 49 | many_sql="INSERT INTO python_modules(module_name, file_path) VALUES (:1, :2)" 50 | debug = True 51 | 52 | with dag: 53 | t1 = OCIDBOperator(task_id='drop_table', compartment_ocid=compartment_ocid, db_name=db_name, 54 | db_workload=db_workload, tns_admin_root=tns_admin_root, user_id=user_id, 55 | password=password, single_sql=drop_table, debug=debug) 56 | t2 = OCIDBOperator(task_id='create_table', compartment_ocid=compartment_ocid, db_name=db_name, 57 | db_workload=db_workload, tns_admin_root=tns_admin_root, user_id=user_id, 58 | password=password, single_sql=create_table, debug=debug) 59 | t3 = OCIDBOperator(task_id='insert_data', compartment_ocid=compartment_ocid, db_name=db_name, 60 | db_workload=db_workload, tns_admin_root=tns_admin_root, user_id=user_id, 61 | password=password, many_sql=many_sql, many_sql_data=many_sql_data, debug=debug) 62 | t1 >> t2 >> t3 63 | 64 | -------------------------------------------------------------------------------- /oci-provider/dags/oci_advanced_example.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from airflow import DAG 3 | from operators.oci_object_storage import MakeBucket, CopyFileToOCIObjectStorageOperator 4 | from operators.oci_data_flow import OCIDataFlowRun, OCIDataFlowCreateApplication 5 | 6 | default_args = {'owner': 'airflow', 7 | 'start_date': datetime(2020, 5, 26), 8 | 'email': ['your_email@somecompany.com'], 9 | 'email_on_failure': False, 10 | 'email_on_retry': False 11 | } 12 | 13 | dag = DAG('oci_advanced_example', 14 | default_args=default_args, 15 | schedule_interval='@hourly', 16 | catchup=False 17 | ) 18 | 19 | oci_conn_id = "oci_default" 20 | bucketname = "SomeBucketName" 21 | compartment_ocid = "COMPARTMENT_OCID" 22 | dataflow_file = "some_local_file" 23 | dataflow_appname = "some_app_name" 24 | 25 | 26 | with dag: 27 | t1 = MakeBucket(task_id='Make_Bucket', 28 | bucket_name=bucketname, 29 | oci_conn_id=oci_conn_id, 30 | compartment_ocid=compartment_ocid) 31 | t2 = CopyFileToOCIObjectStorageOperator(task_id='Copy_{0}_to_Bucket'.format(dataflow_file), 32 | bucket_name=bucketname, 33 | compartment_ocid=compartment_ocid, 34 | oci_conn_id=oci_conn_id, 35 | object_name=dataflow_file, 36 | local_file_path='/home/airflow/') 37 | t3 = OCIDataFlowCreateApplication(task_id='Create_Dataflow_Application_{0}'.format(dataflow_appname), 38 | bucket_name=bucketname, 39 | display_name=dataflow_appname, 40 | compartment_ocid=compartment_ocid, 41 | oci_conn_id=oci_conn_id, 42 | object_name=dataflow_file, 43 | language='PYTHON', 44 | ) 45 | t4 = OCIDataFlowRun(task_id='Run_Dataflow_Application_{0}'.format(dataflow_appname), 46 | compartment_ocid=compartment_ocid, 47 | display_name=dataflow_appname, 48 | oci_conn_id=oci_conn_id, 49 | bucket_name=bucketname 50 | ) 51 | t1 >> t2 >> t3 >> t4 52 | -------------------------------------------------------------------------------- /oci-provider/dags/oci_simple_example.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from airflow import DAG 3 | from hooks.oci_base import OCIBaseHook 4 | from hooks.oci_object_storage import OCIObjectStorageHook 5 | from operators.oci_object_storage import MakeBucket 6 | 7 | default_args = {'owner': 'airflow', 8 | 'start_date': datetime(2020, 5, 26), 9 | 'email': ['your_email@somecompany.com'], 10 | 'email_on_failure': False, 11 | 'email_on_retry': False 12 | } 13 | 14 | dag = DAG('oci_simple_example', 15 | default_args=default_args, 16 | schedule_interval='@hourly', 17 | catchup=False 18 | ) 19 | 20 | oci_conn_id = "oci_default" 21 | bucketname = "SomeBucketName" 22 | compartment_ocid = "COMPARTMENT_OCID" 23 | 24 | with dag: 25 | make_bucket = MakeBucket(task_id='Make_Bucket', bucket_name=bucketname,oci_conn_id=oci_conn_id, compartment_ocid=compartment_ocid) 26 | 27 | make_bucket 28 | -------------------------------------------------------------------------------- /oci-provider/dags/oci_smoketest.py: -------------------------------------------------------------------------------- 1 | # A smoke test to ensure your environment works. 2 | 3 | import datetime 4 | 5 | import oci 6 | 7 | from airflow import DAG 8 | from airflow.models.baseoperator import BaseOperator 9 | from airflow.utils.decorators import apply_defaults 10 | from hooks.oci_base import OCIBaseHook 11 | 12 | 13 | # The smoke test loads the object storage namespace. 14 | class SmokeTestOperator(BaseOperator): 15 | @apply_defaults 16 | def __init__(self, oci_conn_id: str, *args, **kwargs): 17 | self.oci_conn_id = oci_conn_id 18 | super().__init__(*args, **kwargs) 19 | 20 | def execute(self, context): 21 | self.hook = OCIBaseHook(self.oci_conn_id) 22 | object_store_client = self.hook.get_client( 23 | oci.object_storage.ObjectStorageClient 24 | ) 25 | self.hook.validate_config() 26 | namespace = object_store_client.get_namespace().data 27 | self.log.info(f"Namespace is {namespace}") 28 | 29 | 30 | default_args = { 31 | "owner": "airflow", 32 | "start_date": datetime.datetime(2020, 7, 1), 33 | "email": ["your_email@somecompany.com"], 34 | "email_on_failure": False, 35 | "email_on_retry": False, 36 | } 37 | 38 | # This schedule_interval runs the Application every 30 minutes. 39 | # Customize it as needed. 40 | dag = DAG( 41 | "oci_smoke_test", 42 | default_args=default_args, 43 | schedule_interval="0 * * * *", 44 | catchup=False, 45 | ) 46 | 47 | # Customize the connection you want to use. 48 | oci_conn_id = "oci_default" 49 | 50 | smoke_test_step = SmokeTestOperator( 51 | task_id="oci_smoke_test", oci_conn_id=oci_conn_id, dag=dag, 52 | ) 53 | smoke_test_step 54 | -------------------------------------------------------------------------------- /oci-provider/dags/schedule_dataflow_app.py: -------------------------------------------------------------------------------- 1 | # This a very simple example to schedule a Data Flow Application with just a few 2 | # tweaks. 3 | # 4 | # To use this: 5 | # 1. Customize the schedule_interval as needed. 6 | # 2. Set the Application OCID, Compartment OCID. 7 | # 3. If needed, set logs and warehouse buckets. 8 | # 4. If needed, set the oci_namespace variable or create an Airflow Variable (preferred). 9 | # 5. If you want to, customize the display_name variable to change how Runs appear. 10 | # 6. If you want to, customize the SLA setting. SLA misses will appear in the Airflow UI. 11 | # 12 | # After setting these, copy the script into your production DAG directory 13 | # usually (/opt/airflow/dags) and your job will run on the period you specified. 14 | 15 | from airflow import DAG 16 | from airflow.models import Variable 17 | from operators.oci_data_flow import OCIDataFlowRun 18 | 19 | import datetime 20 | 21 | default_args = { 22 | "owner": "airflow", 23 | "start_date": datetime.datetime(2020, 6, 26), 24 | "email": ["your_email@somecompany.com"], 25 | "email_on_failure": False, 26 | "email_on_retry": False, 27 | "sla": datetime.timedelta(hours=12), 28 | } 29 | 30 | # This schedule_interval runs the Application every 30 minutes. 31 | # Customize it as needed. 32 | dag = DAG( 33 | "schedule_dataflow_app", 34 | default_args=default_args, 35 | schedule_interval="0/30 * * * *", 36 | catchup=False, 37 | ) 38 | 39 | # Customize these variables. 40 | # Find the OCID values in the UI or using the CLI. 41 | oci_conn_id = "oci_default" 42 | dataflow_application_ocid = "UNSET" 43 | compartment_ocid = "UNSET" 44 | logs_bucket = "dataflow-logs" 45 | warehouse_bucket = "dataflow-warehouse" 46 | try: 47 | namespace = Variable.get("oci_namespace") 48 | except: 49 | namespace = "UNSET" 50 | 51 | # Ensure everything is set. 52 | assert dataflow_application_ocid != "UNSET", "You need to set dataflow_application_ocid" 53 | assert compartment_ocid != "UNSET", "You need to set compartment_ocid" 54 | assert ( 55 | namespace != "UNSET" 56 | ), "You need to set namespace as an Airflow variable or in the script" 57 | 58 | logs_bucket_uri = f"oci://{logs_bucket}@{namespace}/" 59 | warehouse_bucket_uri = f"oci://{warehouse_bucket}@{namespace}/" 60 | display_name = "Application Run on {{ ds }}" 61 | 62 | run_application_step = OCIDataFlowRun( 63 | task_id="Run_Dataflow_Application", 64 | compartment_ocid=compartment_ocid, 65 | application_ocid=dataflow_application_ocid, 66 | display_name=display_name, 67 | oci_conn_id=oci_conn_id, 68 | logs_bucket_uri=logs_bucket_uri, 69 | warehouse_bucket_uri=warehouse_bucket_uri, 70 | dag=dag, 71 | ) 72 | run_application_step 73 | -------------------------------------------------------------------------------- /oci-provider/dags/schedule_dataflow_pipeline.py: -------------------------------------------------------------------------------- 1 | # Schedule a sequence of Data Flow jobs to be run one after another. 2 | # 3 | # To use this: 4 | # 1. Customize the schedule_interval as needed. 5 | # 2. Set the Compartment OCID and Application OCIDs. 6 | # 3. If needed, set logs and warehouse buckets. 7 | # 4. If needed, set the oci_namespace variable or create an Airflow Variable (preferred). 8 | # 5. If you want to, customize the display_name variable to change how Runs appear. 9 | # 6. If you want to, customize the SLA setting. SLA misses will appear in the Airflow UI. 10 | # 11 | # After setting these, copy the script into your production DAG directory 12 | # usually (/opt/airflow/dags) and your job will run on the period you specified. 13 | 14 | from airflow import DAG 15 | from airflow.models import Variable 16 | from operators.oci_data_flow import OCIDataFlowRun 17 | 18 | import datetime 19 | 20 | default_args = { 21 | "owner": "airflow", 22 | "start_date": datetime.datetime(2020, 6, 26), 23 | "email": ["your_email@somecompany.com"], 24 | "email_on_failure": False, 25 | "email_on_retry": False, 26 | "sla": datetime.timedelta(hours=12), 27 | } 28 | 29 | # This schedule_interval runs the DAG every 30 minutes. 30 | # Customize it as needed. 31 | dag = DAG( 32 | "schedule_dataflow_pipeline", 33 | default_args=default_args, 34 | schedule_interval="0/30 * * * *", 35 | catchup=False, 36 | ) 37 | 38 | # Customize these variables. 39 | # Find the OCID values in the UI or using the CLI. 40 | oci_conn_id = "oci_default" 41 | dataflow_application_ocids = [ 42 | "my_dataflow_ocid_1", 43 | "my_dataflow_ocid_2", 44 | "my_dataflow_ocid_3" 45 | ] 46 | compartment_ocid = "UNSET" 47 | logs_bucket = "dataflow-logs" 48 | warehouse_bucket = "dataflow-warehouse" 49 | try: 50 | namespace = Variable.get("oci_namespace") 51 | except: 52 | namespace = "UNSET" 53 | 54 | # Ensure everything is set. 55 | assert len(dataflow_application_ocids) > 0, "You need to set dataflow_application_ocids" 56 | assert compartment_ocid != "UNSET", "You need to set compartment_ocid" 57 | assert ( 58 | namespace != "UNSET" 59 | ), "You need to set namespace as an Airflow variable or in the script" 60 | 61 | logs_bucket_uri = f"oci://{logs_bucket}@{namespace}/" 62 | warehouse_bucket_uri = f"oci://{warehouse_bucket}@{namespace}/" 63 | display_name = "Pipeline Step {i} run on {{{{ ts_nodash }}}}" 64 | 65 | with dag: 66 | steps = [] 67 | for i, ocid in enumerate(dataflow_application_ocids): 68 | steps.append(OCIDataFlowRun( 69 | task_id=f"Dataflow_Pipeline_Step_{i}", 70 | compartment_ocid=compartment_ocid, 71 | application_ocid=ocid, 72 | display_name=display_name.format(i=i+1), 73 | oci_conn_id=oci_conn_id, 74 | logs_bucket_uri=logs_bucket_uri, 75 | warehouse_bucket_uri=warehouse_bucket_uri, 76 | )) 77 | 78 | # Chain the steps together sequentially. 79 | for head, tail in zip(steps, steps[1:]): 80 | head.set_downstream(tail) 81 | -------------------------------------------------------------------------------- /oci-provider/dags/schedule_dataflow_with_parameters.py: -------------------------------------------------------------------------------- 1 | # To use this example: 2 | # 1. Customize the schedule_interval as needed. 3 | # 2. Set the Application OCID, Compartment OCID. 4 | # 3. If needed, set logs and warehouse buckets. 5 | # 4. If needed, set the oci_namespace variable or create an Airflow Variable (preferred). 6 | # 5. If you want to, customize the display_name variable to change how Runs appear. 7 | # 6. If you want to, customize the SLA setting. SLA misses will appear in the Airflow UI. 8 | # 9 | # Additionally you will need to customize parameter_list. 10 | # The parameters you provide need to be consistent with what your Application expects. 11 | # 12 | # After setting these, copy the script into your production DAG directory 13 | # usually (/opt/airflow/dags) and your job will run on the period you specified. 14 | 15 | from airflow import DAG 16 | from airflow.models import Variable 17 | from operators.oci_data_flow import OCIDataFlowRun 18 | 19 | import datetime 20 | import oci 21 | 22 | default_args = { 23 | "owner": "airflow", 24 | "start_date": datetime.datetime(2020, 6, 26), 25 | "email": ["your_email@somecompany.com"], 26 | "email_on_failure": False, 27 | "email_on_retry": False, 28 | "sla": datetime.timedelta(hours=12), 29 | } 30 | 31 | # This schedule_interval runs the Application every 30 minutes. 32 | # Customize it as needed. 33 | dag = DAG( 34 | "schedule_dataflow_with_parameters", 35 | default_args=default_args, 36 | schedule_interval="0/30 * * * *", 37 | catchup=False, 38 | ) 39 | 40 | # Customize these variables. 41 | # Find the OCID values in the UI or using the CLI. 42 | oci_conn_id = "oci_default" 43 | dataflow_application_ocid = "UNSET" 44 | compartment_ocid = "UNSET" 45 | logs_bucket = "dataflow-logs" 46 | warehouse_bucket = "dataflow-warehouse" 47 | try: 48 | namespace = Variable.get("oci_namespace") 49 | except: 50 | namespace = "UNSET" 51 | 52 | # Ensure everything is set. 53 | assert dataflow_application_ocid != "UNSET", "You need to set dataflow_application_ocid" 54 | assert compartment_ocid != "UNSET", "You need to set compartment_ocid" 55 | assert ( 56 | namespace != "UNSET" 57 | ), "You need to set namespace as an Airflow variable or in the script" 58 | 59 | logs_bucket_uri = f"oci://{logs_bucket}@{namespace}/" 60 | warehouse_bucket_uri = f"oci://{warehouse_bucket}@{namespace}/" 61 | display_name = "Application Run on {{ ds }}" 62 | 63 | # Set this based on the parameters your Application expects. 64 | parameter_list = [ 65 | oci.data_flow.models.ApplicationParameter( 66 | name="input_path", value="oci://bucket@namespace/input" 67 | ), 68 | oci.data_flow.models.ApplicationParameter( 69 | name="output_path", value="oci://bucket@namespace/output" 70 | ), 71 | ] 72 | 73 | run_application_step = OCIDataFlowRun( 74 | application_ocid=dataflow_application_ocid, 75 | compartment_ocid=compartment_ocid, 76 | dag=dag, 77 | display_name=display_name, 78 | logs_bucket_uri=logs_bucket_uri, 79 | oci_conn_id=oci_conn_id, 80 | parameters=parameter_list, 81 | task_id="Run_Dataflow_Application", 82 | warehouse_bucket_uri=warehouse_bucket_uri, 83 | ) 84 | run_application_step 85 | -------------------------------------------------------------------------------- /oci-provider/dags/trigger_dataflow_when_file_exists.py: -------------------------------------------------------------------------------- 1 | # To use this example: 2 | # 1. Customize the schedule_interval as needed. 3 | # 2. Set the Application OCID, Compartment OCID and name of the bucket to probe. 4 | # 3. If needed, set logs and warehouse buckets. 5 | # 4. If needed, set the oci_namespace variable or create an Airflow Variable (preferred). 6 | # 5. If you want to, customize the display_name variable to change how Runs appear. 7 | # 6. If you want to, customize the SLA setting. SLA misses will appear in the Airflow UI. 8 | # 9 | # Additionally you will need to customize parameter_list. 10 | # The parameters you provide need to be consistent with what your Application expects. 11 | # 12 | # After setting these, copy the script into your production DAG directory 13 | # usually (/opt/airflow/dags) and your job will run on the period you specified. 14 | 15 | from airflow import DAG 16 | from airflow.models import Variable 17 | from operators.oci_data_flow import OCIDataFlowRun 18 | from sensors.oci_object_storage import OCIObjectStoragePrefixSensor 19 | 20 | import datetime 21 | 22 | default_args = { 23 | "owner": "airflow", 24 | "start_date": datetime.datetime(2020, 6, 26), 25 | "email": ["your_email@somecompany.com"], 26 | "email_on_failure": False, 27 | "email_on_retry": False, 28 | "sla": datetime.timedelta(hours=12), 29 | } 30 | 31 | # This schedule_interval runs the Application every 30 minutes. 32 | # Customize it as needed. 33 | dag = DAG( 34 | "transcoder_ng5", 35 | default_args=default_args, 36 | schedule_interval="0/30 * * * *", 37 | catchup=False, 38 | concurrency=1, 39 | max_active_runs=1, 40 | ) 41 | 42 | # Customize these variables. 43 | # Find the OCID values in the UI or using the CLI. 44 | oci_conn_id = "oci_default" 45 | dataflow_application_ocid = "UNSET" 46 | compartment_ocid = "UNSET" 47 | logs_bucket = "dataflow-logs" 48 | warehouse_bucket = "dataflow-warehouse" 49 | try: 50 | namespace = Variable.get("oci_namespace") 51 | except: 52 | namespace = "UNSET" 53 | bucket_name = "UNSET" 54 | bucket_base_path = "" 55 | 56 | # Ensure everything is set. 57 | assert bucket_name != "UNSET", "You need to set bucket_name" 58 | assert dataflow_application_ocid != "UNSET", "You need to set dataflow_application_ocid" 59 | assert compartment_ocid != "UNSET", "You need to set compartment_ocid" 60 | assert ( 61 | namespace != "UNSET" 62 | ), "You need to set namespace as an Airflow variable or in the script" 63 | 64 | logs_bucket_uri = f"oci://{logs_bucket}@{namespace}/" 65 | warehouse_bucket_uri = f"oci://{warehouse_bucket}@{namespace}/" 66 | display_name = "Application Run on {{ ts }}" 67 | 68 | def argument_builder_callback(context): 69 | runtime_arguments = dict() 70 | 71 | # Launch an extra executor for every 10 files, up to 20 total executors. 72 | total_files = context["task_instance"].xcom_pull( 73 | "Probe_New_Data", key="oci_prefix_total_files" 74 | ) 75 | num_executors = min(total_files // 10 + 2, 20) 76 | runtime_arguments["num_executors"] = num_executors 77 | runtime_arguments["driver_shape"] = "VM.Standard2.2" 78 | runtime_arguments["executor_shape"] = "VM.Standard2.2" 79 | 80 | # Set application arguments including parallelism. 81 | # Target 3 partitions per core (VM.Standard2.2 = 2 cores). 82 | number_partitions = str(num_executors * 2 * 3) 83 | runtime_arguments["arguments"] = [ 84 | "--input", 85 | bucket_name, 86 | "--output", 87 | "output", 88 | "--number-partitions", 89 | number_partitions, 90 | ] 91 | return runtime_arguments 92 | 93 | with dag: 94 | sensor = OCIObjectStoragePrefixSensor( 95 | task_id="Probe_New_Data", 96 | bucket_name=bucket_name, 97 | mode="reschedule", 98 | prefix=bucket_base_path, 99 | ) 100 | run_application = OCIDataFlowRun( 101 | task_id="Run_Dataflow_Application", 102 | application_ocid=dataflow_application_ocid, 103 | compartment_ocid=compartment_ocid, 104 | display_name=display_name, 105 | logs_bucket_uri=logs_bucket_uri, 106 | oci_conn_id=oci_conn_id, 107 | runtime_callback=argument_builder_callback, 108 | warehouse_bucket_uri=warehouse_bucket_uri, 109 | ) 110 | sensor >> run_application 111 | -------------------------------------------------------------------------------- /oci-provider/plugins/plugins/hooks/oci_adb.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | import oci 19 | import os 20 | import re 21 | import cx_Oracle 22 | from typing import Optional 23 | from sqlalchemy import create_engine 24 | from hooks.oci_base import OCIBaseHook 25 | from airflow.exceptions import AirflowException 26 | 27 | 28 | class OCIDBHook(OCIBaseHook): 29 | """ 30 | Interact with Databases on OCI 31 | 32 | :param compartment_id: Target compartment OCID 33 | :type compartment_id: str 34 | :param tns_admin_root: The wallet root directory. The wallet will be loaded from $TNS_ADMIN/sqlnet.ora. 35 | If you do not set tns_admin_root, it is assumed to be in your environment. 36 | :type tns_admin_root: str 37 | :param database_ocid: Database ID 38 | :type database_ocid: str 39 | :param db_workload: DB Workload type, valid options are DW or OLTP 40 | :type str: 41 | :param db_name: Databse Name (Not display) 42 | :type db_name: str 43 | :param debug: Whether to display debug output 44 | :type debug: bool 45 | :param dsn: DSN (TNS Name) for connection 46 | :type dsn: str 47 | :param oci_conn_id: Airflow connection ID 48 | :type oci_conn_id: str 49 | :param oci_region: Target OCI Region 50 | :type oci_region: str 51 | :param password: Database password for user_id 52 | :type password: str 53 | :param user_id: User ID for Database login 54 | :type user_id: str 55 | :param wallet_location: Filesystem location for wallet files 56 | :param wallet_location: str 57 | """ 58 | def __init__(self, 59 | compartment_ocid: str, 60 | tns_admin_root: Optional[str] = None, 61 | database_ocid: Optional[str] = None, 62 | db_workload: Optional[str] = None, 63 | db_name: Optional[str] = None, 64 | debug: Optional[bool] = False, 65 | dsn: Optional[str] = None, 66 | oci_conn_id: Optional[str] = "oci_default", 67 | oci_region: Optional[str] = None, 68 | password: Optional[str] = None, 69 | user_id: Optional[str] = None, 70 | wallet_location: Optional[str] = None, 71 | *args, 72 | **kwargs): 73 | super(OCIDBHook, self).__init__(*args, **kwargs) 74 | self.compartment_id = compartment_ocid 75 | self.tns_admin_root = tns_admin_root 76 | self.database_id = database_ocid 77 | self.db_workload = db_workload 78 | self.db_name = db_name 79 | self.debug = debug 80 | self.dsn = dsn 81 | self.oci_conn_id = oci_conn_id 82 | self.oci_region = oci_region 83 | self.password = password 84 | self.user_id = user_id 85 | self.wallet_location = wallet_location 86 | self.oci_client = oci.database.DatabaseClient 87 | 88 | def get_ocid_by_name(self, db_name=None, db_workload=None): 89 | """ 90 | Look up databases by name and return OCID 91 | :param db_name: Target DB Name (Not display name) 92 | :type db_name: str 93 | :param db_workload: Workload type, valid options are DW or OLTP 94 | :type db_workload: str 95 | :return: db_id (OCID) 96 | """ 97 | try: 98 | adb_list = \ 99 | self.get_client(self.oci_client).list_autonomous_databases(compartment_id=self.compartment_id, 100 | db_workload=self.db_workload).data 101 | if self.debug is True: 102 | self.log.info("ADB List: {0}".format(adb_list)) 103 | for db in adb_list: 104 | if db.db_name == self.db_name: 105 | self.database_id = db.id 106 | return db.id 107 | else: 108 | continue 109 | return None 110 | except AirflowException as e: 111 | self.log.error(e.response["Error"]["Message"]) 112 | 113 | def relocalize_sqlnet(self): 114 | """ 115 | Update the path in $TNS_ADMIN/sqlnet.ora to the correct path 116 | """ 117 | if self.tns_admin_root is None: 118 | self.log.error("tns_admin_root not specified or null: {0}".format(self.tns_admin_root)) 119 | else: 120 | os.environ["TNS_ADMIN"] = self.tns_admin_root 121 | file_path = os.path.join(os.environ["TNS_ADMIN"], "sqlnet.ora") 122 | if not os.path.exists(file_path): 123 | raise Exception("{} does not exist".format(file_path)) 124 | with open(file_path, "r") as fd: 125 | self.log.info("Reading sqlnet.ora") 126 | original = fd.read() 127 | # Set the correct path. 128 | modified = re.sub( 129 | 'DIRECTORY="([^"]+)"', 130 | 'DIRECTORY="{}"'.format(self.tns_admin_root), 131 | original, 132 | ) 133 | with open(file_path, "w") as fd: 134 | self.log.info("Writing modified sqlnet.ora") 135 | fd.write(modified) 136 | 137 | def connect(self, **kwargs): 138 | """ 139 | Connect to an Oracle DSN using a wallet. 140 | The wallet will be loaded from $TNS_ADMIN/sqlnet.ora. 141 | If you do not set this, it is assumed to be in your environment. 142 | :param dsn: The TNS name. 143 | :type dns: str 144 | :param tns_admin_root: The wallet root directory. 145 | :type tns_admin_root: str 146 | :param **kwargs: Arbitrary keyword arguments to pass to cx_Oracle.connect. 147 | :return: connection: True if successful, False otherwise. 148 | """ 149 | try: 150 | if self.dsn is None: 151 | if self.db_name is not None: 152 | self.dsn = str(self.db_name) + "_medium" 153 | if self.debug is True: 154 | self.log.info("Connecting to Oracle database with DSN {}".format(self.dsn.lower())) 155 | self.connection = cx_Oracle.connect(dsn=self.dsn.lower(), **kwargs) 156 | else: 157 | self.log.error("DB Name and DSN are null, one of these is required to connect") 158 | else: 159 | if self.debug is True: 160 | self.log.info("Connecting to Oracle database with DSN {}".format(self.dsn)) 161 | self.connection = cx_Oracle.connect(dsn=self.dsn, **kwargs) 162 | return self.connection 163 | except AirflowException as e: 164 | self.log.error(e.response["Error"]["Message"]) 165 | 166 | def connect_sqlalchemy( 167 | self, 168 | url=None, 169 | **kwargs 170 | ): 171 | """ 172 | Create and return a .Engine instance 173 | :param url: String that indicates database dialect and connection arguments 174 | :type url: str 175 | :param kwargs: Additional arguments supported by create_engine 176 | :return: 177 | """ 178 | if url is not None: 179 | self.engine = create_engine(url, **kwargs) 180 | else: 181 | self.engine = create_engine( 182 | "oracle+cx_oracle://{}:{}@{}".format(self.user_id, self.password, self.dsn), **kwargs 183 | ) 184 | return self.engine 185 | 186 | def check_state(self, **kwargs): 187 | """ 188 | Check Database state and return lifecycle_state 189 | :param kwargs: 190 | :return: 191 | """ 192 | db_details = self.get_client(self.oci_client).get_autonomous_database(autonomous_database_id=self.database_id, 193 | **kwargs).data 194 | return db_details.lifecycle_state 195 | -------------------------------------------------------------------------------- /oci-provider/plugins/plugins/hooks/oci_base.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | """ 19 | This module contains Base Oracle Cloud Infrastructure (OCI) Hook. 20 | """ 21 | from os import path 22 | from typing import Optional 23 | import oci 24 | from airflow.exceptions import AirflowException 25 | from airflow.hooks.base_hook import BaseHook 26 | 27 | 28 | class OCIBaseHook(BaseHook): 29 | """ 30 | Interact with OCI 31 | This class is a thin wrapper around the OCI Python SDK 32 | 33 | :param oci_conn_id: The OCI connection profile used for Airflow connection. 34 | :type oci_conn_id: str 35 | :param config: OCI API Access Configuration - usually read from Airflow but can be provided. 36 | :type config: dict 37 | :param verify: Whether or not to verify SSL certificates. 38 | :type verify: str or bool 39 | 40 | How to Set OCI configuration 41 | For detail on the contents of the default config file, see 42 | https://docs.cloud.oracle.com/en-us/iaas/Content/API/Concepts/sdkconfig.htm 43 | If you don't want to use a file, populate values as detailed here 44 | https://oracle-cloud-infrastructure-python-sdk.readthedocs.io/en/latest/configuration.html 45 | Fallback to Instance Principals if not using config files or passed parameters 46 | """ 47 | 48 | def __init__(self, 49 | oci_conn_id: Optional[str] = "oci_default", 50 | verify: Optional[bool] = None 51 | ): 52 | super(OCIBaseHook, self).__init__() 53 | self.oci_conn_id = oci_conn_id 54 | self.config = None 55 | self.client_kwargs = None 56 | self.signer = None 57 | self.verify = verify 58 | 59 | def get_config(self): 60 | try: 61 | try: 62 | connection_object = self.get_connection(self.oci_conn_id) 63 | extra_config = connection_object.extra_dejson 64 | if extra_config.get("extra__oci__tenancy"): 65 | self.config = { 66 | "log_requests": False, 67 | "additional_user_agent": '', 68 | "pass_phrase": None, 69 | "user": connection_object.login, 70 | "fingerprint": extra_config["extra__oci__fingerprint"], 71 | "key_file": extra_config["extra__oci__key_file"], 72 | "tenancy": extra_config["extra__oci__tenancy"], 73 | "region": extra_config["extra__oci__region"] 74 | } 75 | self.client_kwargs = dict() 76 | elif "config_path" in extra_config: 77 | if path.exists(extra_config["config_path"]) is True: 78 | self.config = oci.config.from_file(extra_config["config_path"]) 79 | self.client_kwargs = dict() 80 | else: 81 | raise AirflowException('Config Path %s not found' % extra_config["config_path"]) 82 | elif "service_principal" in extra_config: 83 | self.log.debug("Attempting to use service principal") 84 | self.signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner(dict(purpose="SERVICE_PRINCIPAL")) 85 | self.client_kwargs = dict(signer=self.signer) 86 | self.config = { 87 | "tenancy": self.signer.tenancy_id, 88 | "region": self.signer.region, 89 | } 90 | else: 91 | self.log.info("Failed to find valid oci config in Airflow, falling back to Instance Principals") 92 | self.signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner() 93 | self.client_kwargs = dict(signer=self.signer) 94 | self.config = { 95 | "tenancy": self.signer.tenancy_id, 96 | "region": self.signer.region, 97 | } 98 | except: 99 | self.log.info("Failed to find valid oci config in Airflow, falling back to Instance Principals") 100 | self.signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner() 101 | self.client_kwargs = dict(signer=self.signer) 102 | self.config = { 103 | "tenancy": self.signer.tenancy_id, 104 | "region": self.signer.region, 105 | } 106 | except AirflowException as e: 107 | self.log.error("All attempts to get valid configuration failed") 108 | self.log.error(str(e)) 109 | raise e 110 | return self.config, self.client_kwargs 111 | 112 | def validate_config(self): 113 | from oci.config import validate_config 114 | try: 115 | validate_config(self.config, **self.client_kwargs) 116 | self.identity = oci.identity.IdentityClient(self.config, **self.client_kwargs) 117 | if "user" in self.config: 118 | self.user = self.identity.get_user(self.config["user"]).data 119 | except AirflowException: 120 | self.log.warning("Configuration Validation Failed") 121 | 122 | def get_client(self, client_class): 123 | client, client_kwargs = self.get_config() 124 | return client_class(client, **client_kwargs) 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /oci-provider/plugins/plugins/hooks/oci_data_catalog.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | from typing import Optional 20 | import oci 21 | from hooks.oci_base import OCIBaseHook 22 | from airflow.exceptions import AirflowException 23 | 24 | 25 | class OCIDataCatalogHook(OCIBaseHook): 26 | """ 27 | Interact with Oracle Data Catalog. 28 | """ 29 | def __init__(self, 30 | compartment_ocid: str, 31 | data_catalog_ocid: Optional[str] = None, 32 | display_name: Optional[str] = None, 33 | oci_conn_id: Optional[str] = "oci_default", 34 | oci_region: Optional[str] = None, 35 | *args, 36 | **kwargs): 37 | super(OCIDataCatalogHook, self).__init__(*args, **kwargs) 38 | self.compartment_id = compartment_ocid 39 | self.data_catalog_ocid = data_catalog_ocid 40 | self.display_name = display_name 41 | self.job_key = None 42 | self.oci_conn_id = oci_conn_id 43 | self.oci_region = oci_region 44 | self.oci_client = oci.data_catalog.DataCatalogClient 45 | 46 | def get_catalog_ocid(self, **kwargs): 47 | """ 48 | Get Data Catalog OCID by catalog_name 49 | :param compartment_id: 50 | :param catalog_name: 51 | :return: 52 | """ 53 | try: 54 | catalogdetails = self.get_client(self.oci_client).list_catalogs(compartment_id=self.compartment_id, 55 | **kwargs).data 56 | for catalog in catalogdetails: 57 | if catalog.display_name == self.display_name: 58 | self.data_catalog_ocid = catalog.id 59 | return catalog.id 60 | else: 61 | continue 62 | return None 63 | except AirflowException as e: 64 | self.log.error(e.response["Error"]["Message"]) 65 | 66 | def get_job_key(self, **kwargs): 67 | """ 68 | Get Job Key by display_name 69 | :param kwargs: 70 | :return: 71 | """ 72 | try: 73 | joblist = self.get_client(self.oci_client).list_jobs(compartment_id=self.compartment_id, 74 | display_name=self.display_name, 75 | **kwargs).data 76 | for job in joblist: 77 | if job.display_name == self.display_name: 78 | self.job_key = job.key 79 | return job.key 80 | else: 81 | continue 82 | return None 83 | except AirflowException as e: 84 | self.log.error(e.response["Error"]["Message"]) 85 | -------------------------------------------------------------------------------- /oci-provider/plugins/plugins/hooks/oci_data_flow.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | from typing import Optional 20 | import oci 21 | from hooks.oci_base import OCIBaseHook 22 | from airflow.exceptions import AirflowException 23 | """ 24 | Get OCID by Name - Compartment ID, Application Name 25 | """ 26 | 27 | 28 | class OCIDataFlowHook(OCIBaseHook): 29 | """ 30 | Interact with Oracle Data Flow. 31 | """ 32 | def __init__(self, 33 | compartment_ocid: str, 34 | display_name: str, 35 | oci_conn_id: Optional[str] = "oci_default", 36 | oci_region: Optional[str] = None, 37 | driver_shape: Optional[str] = None, 38 | executor_shape: Optional[str] = None, 39 | file_uri: Optional[str] = None, 40 | language: Optional[str] = "English", 41 | num_executors: Optional[int] = "1", 42 | spark_version: Optional[str] = None, 43 | *args, 44 | **kwargs): 45 | super(OCIDataFlowHook, self).__init__(*args, **kwargs) 46 | self.compartment_id = compartment_ocid 47 | self.display_name = display_name 48 | self.oci_conn_id = oci_conn_id 49 | self.oci_region = oci_region 50 | self.driver_shape = driver_shape 51 | self.executor_shape = executor_shape 52 | self.file_uri = file_uri 53 | self.language = language 54 | self.num_executors = num_executors 55 | self.spark_version = spark_version 56 | self.oci_client = oci.data_flow.DataFlowClient 57 | 58 | def get_application_ocid(self, compartment_id=None, display_name=None): 59 | try: 60 | appdetails = self.get_client(self.oci_client).list_applications(compartment_id=self.compartment_id).data 61 | for app in appdetails: 62 | if app.display_name == self.display_name: 63 | return app.id 64 | else: 65 | continue 66 | return None 67 | except AirflowException as e: 68 | self.log.error(e.response["Error"]["Message"]) 69 | 70 | 71 | def check_for_application_by_name(self, compartment_id=None, display_name=None): 72 | try: 73 | appdetails = self.get_client(self.oci_client).list_applications(compartment_id=self.compartment_id).data 74 | for app in appdetails: 75 | if app.display_name == self.display_name: 76 | return True 77 | else: 78 | continue 79 | return False 80 | except AirflowException as e: 81 | self.log.error(e.response["Error"]["Message"]) 82 | 83 | 84 | def create_application_details(self): 85 | try: 86 | application_details = oci.data_flow.models.CreateApplicationDetails(compartment_id=self.compartment_id, 87 | display_name=self.display_name, 88 | driver_shape=self.driver_shape, 89 | executor_shape=self.executor_shape, 90 | file_uri=self.file_uri, 91 | language=self.language, 92 | num_executors=self.num_executors, 93 | spark_version=self.spark_version, 94 | **kwargs) 95 | return application_details 96 | except AirflowException as e: 97 | self.log.error(e.response["Error"]["Message"]) 98 | 99 | def create_run_details(self): 100 | try: 101 | run_details = oci.data_flow.models.CreateRunDetails(compartment_id=self.compartment_id, 102 | application_id=self.get_application_ocid(self.display_name), 103 | display_name=self.display_name, 104 | **kwargs) 105 | return run_details 106 | except AirflowException as e: 107 | self.log.error(e.response["Error"]["Message"]) 108 | 109 | -------------------------------------------------------------------------------- /oci-provider/plugins/plugins/hooks/oci_object_storage.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | import oci 18 | from typing import Optional 19 | from hooks.oci_base import OCIBaseHook 20 | from airflow.exceptions import AirflowException 21 | 22 | 23 | class OCIObjectStorageHook(OCIBaseHook): 24 | """ 25 | Interact with OCI Object Storage 26 | 27 | :param compartment_id: Target compartment OCID 28 | :type compartment_id: str 29 | :param bucket_name: Target bucket name 30 | :type bucket_name: str 31 | :param oci_conn_id: Airflow connection ID 32 | :type oci_conn_id: str 33 | :param args: Additional arguments 34 | :param kwargs: Additional arguments 35 | """ 36 | def __init__(self, 37 | compartment_id: str, 38 | bucket_name: Optional[str] = None, 39 | oci_conn_id: Optional[str] = "oci_default", 40 | *args, 41 | **kwargs): 42 | super(OCIObjectStorageHook, self).__init__(*args, **kwargs) 43 | self.bucket_name = bucket_name 44 | self.oci_conn_id = oci_conn_id 45 | self.compartment_id = compartment_id 46 | self.oci_client = oci.object_storage.ObjectStorageClient 47 | 48 | def get_namespace(self, compartment_id=None): 49 | """ 50 | Get OCI Object Storage Namespace using config 51 | :param compartment_id: Compartment OCID 52 | :type compartment_id: str 53 | :return: Object Storage Namespace Name 54 | :rtype: str 55 | """ 56 | try: 57 | self.namespace_name = self.get_client(self.oci_client).get_namespace(compartment_id=self.compartment_id).data 58 | return self.namespace_name 59 | except AirflowException as e: 60 | self.log.error(e.response["Error"]["Message"]) 61 | 62 | 63 | def check_for_bucket(self, bucket_name=None, namespace_name=None): 64 | """ 65 | Check if bucket_name exists 66 | :param bucket_name: Target bucket name 67 | :param namespace_name: Object Storage Namespace 68 | :return: True if exists, False if not 69 | :rtype: bool 70 | """ 71 | try: 72 | bucketsummary = self.get_client(self.oci_client).list_buckets(namespace_name=self.namespace_name, 73 | compartment_id=self.compartment_id) 74 | bucket_list = bucketsummary.data 75 | for bucket in bucket_list: 76 | if bucket.name == self.bucket_name: 77 | return True 78 | else: 79 | continue 80 | return False 81 | except AirflowException as e: 82 | self.log.error(e.response["Error"]["Message"]) 83 | 84 | def check_for_object(self, object_name, bucket_name=None, namespace_name=None, **kwargs): 85 | """ 86 | Check if Object exists in Bucket 87 | :param bucket_name: Target Bucket name 88 | :param namespace_name: Object Storage Namespace 89 | :param object_name: Name of Object in Bucket to check if exists 90 | :return: True if exists, False if not 91 | :rtype: bool 92 | """ 93 | if bucket_name is None: 94 | bucket_name = self.bucket_name 95 | if namespace_name is None: 96 | namespace_name = self.namespace_name 97 | try: 98 | # TODO: You might only need to check the first returned object. 99 | next_start_with = None 100 | while True: 101 | objectsummary = self.get_client(self.oci_client).list_objects(namespace_name=namespace_name, 102 | bucket_name=bucket_name, 103 | prefix=object_name, 104 | start_after=next_start_with, 105 | **kwargs) 106 | object_list = objectsummary.data 107 | for object in object_list.objects: 108 | if object.name == object_name: 109 | return True 110 | if object_list.next_start_with is None: 111 | return False 112 | next_start_with = object_list.next_start_with 113 | except AirflowException as e: 114 | self.log.error(e.response["Error"]["Message"]) 115 | 116 | def copy_to_bucket(self, bucket_name=None, namespace_name=None, put_object_body=None, object_name=None, 117 | **kwargs): 118 | """ 119 | Copy source data to bucket using put_object 120 | :param bucket_name: Target bucket 121 | :type bucket_name: str 122 | :param namespace_name: Namespace name 123 | :type namespace_name: str 124 | :param put_object_body: The object to upload to the object store 125 | :type put_object_body: stream 126 | :param object_name: Name of object to be created in bucket 127 | :type object_name: str 128 | :return: Response object with data type None 129 | """ 130 | try: 131 | self.get_client(self.oci_client).put_object(bucket_name=self.bucket_name, namespace_name=self.namespace_name, 132 | put_object_body=put_object_body, object_name=object_name, 133 | **kwargs) 134 | except AirflowException as e: 135 | self.log.error(e.response["Error"]["Message"]) 136 | 137 | def read_from_bucket(self, bucket_name=None, namespace_name=None, object_name=None, **kwargs): 138 | """ 139 | Read object from bucket and return contents 140 | :param bucket_name: Target bucket 141 | :type bucket_name: str 142 | :param namespace_name: Namespace name 143 | :type namespace_name: str 144 | :param put_object_body: The object to upload to the object store 145 | :type put_object_body: stream 146 | :param object_name: Name of object to be created in bucket 147 | :type object_name: str 148 | :param kwargs: additional arguments 149 | :return: Response object with data type stream 150 | """ 151 | try: 152 | object_data = self.get_client(self.oci_client).get_object(bucket_name=self.bucket_name, 153 | namespace_name=self.namespace_name, 154 | object_name=object_name, **kwargs).data 155 | return object_data 156 | except AirflowException as e: 157 | self.log.error(e.response["Error"]["Message"]) 158 | 159 | -------------------------------------------------------------------------------- /oci-provider/plugins/plugins/operators/oci_adb.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | import oci 19 | import cx_Oracle 20 | import gzip 21 | import pandas as pd 22 | from typing import Optional 23 | from hooks.oci_adb import OCIDBHook 24 | from airflow.models.baseoperator import BaseOperator 25 | from airflow.utils.decorators import apply_defaults 26 | from airflow.exceptions import AirflowException 27 | 28 | 29 | class OCIDBOperator(BaseOperator): 30 | """ 31 | Execute SQL on OCI ADB/ADW 32 | 33 | :param compartment_id: Target compartment OCID 34 | :type compartment_id: str 35 | :param tns_admin_root: The wallet root directory. The wallet will be loaded from $TNS_ADMIN/sqlnet.ora. 36 | If you do not set tns_admin_root, it is assumed to be in your environment. 37 | :type tns_admin_root: str 38 | :param database_ocid: Database ID 39 | :type database_ocid: str 40 | :param db_workload: DB Workload type, valid options are DW or OLTP 41 | :type str: 42 | :param db_name: Databse Name (Not display) 43 | :type db_name: str 44 | :param debug: Whether to display debug output 45 | :type debug: bool 46 | :param dsn: DSN (TNS Name) for connection 47 | :type dsn: str 48 | :param oci_conn_id: Airflow connection ID 49 | :type oci_conn_id: str 50 | :param oci_region: Target OCI Region 51 | :type oci_region: str 52 | :param password: Database password for user_id 53 | :type password: str 54 | :param user_id: User ID for Database login 55 | :type user_id: str 56 | :param wallet_location: Filesystem location for wallet files 57 | :param wallet_location: str 58 | :param single_sql: Single-line SQL to execute on the database with cx_Oracle cursor.execute 59 | :type single_sql: str 60 | :param many_sql: Batch SQL to execute on the database with cx_Oracle cursor.executemany loading many_sql_data 61 | :type many_sql: str 62 | :param many_sql_data: Data to batch load with cursor.exeecutemany 63 | :param many_sql_data: list 64 | :param kwargs: Additional parameters for cx_Oracle execution 65 | """ 66 | @apply_defaults 67 | def __init__(self, 68 | compartment_ocid: str, 69 | tns_admin_root: Optional[str] = None, 70 | database_ocid: Optional[str] = None, 71 | db_workload: Optional[str] = None, 72 | db_name: Optional[str] = None, 73 | debug: Optional[bool] = False, 74 | dsn: Optional[str] = None, 75 | oci_conn_id: Optional[str] = "oci_default", 76 | oci_region: Optional[str] = None, 77 | password: Optional[str] = None, 78 | user_id: Optional[str] = None, 79 | wallet_location: Optional[str] = None, 80 | single_sql: Optional[str] = None, 81 | many_sql: Optional[str] = None, 82 | many_sql_data: Optional[list] = None, 83 | *args, 84 | **kwargs): 85 | super(OCIDBOperator, self).__init__(*args, **kwargs) 86 | self.compartment_id = compartment_ocid 87 | self.tns_admin_root = tns_admin_root 88 | self.database_id = database_ocid 89 | self.db_workload = db_workload 90 | self.db_name = db_name 91 | self.debug = debug 92 | self.dsn = dsn 93 | self.oci_conn_id = oci_conn_id 94 | self.oci_region = oci_region 95 | self.password = password 96 | self.user_id = user_id 97 | self.wallet_location = wallet_location 98 | self.single_sql = single_sql 99 | self.many_sql = many_sql 100 | self.many_sql_data = many_sql_data 101 | self.oci_client = oci.database.DatabaseClient 102 | 103 | def execute(self, context, **kwargs): 104 | try: 105 | self._oci_hook = OCIDBHook(compartment_ocid=self.compartment_id, db_name=self.db_name, 106 | db_workload=self.db_workload, tns_admin_root=self.tns_admin_root, 107 | wallet_location=self.wallet_location) 108 | db_id = self._oci_hook.get_ocid_by_name(db_name=self.db_name) 109 | self.log.info("{0} Database ID: {1}".format(self.db_name, db_id)) 110 | self.log.info("Relocalizing sqlnet.ora") 111 | self._oci_hook.relocalize_sqlnet() 112 | self.log.info("Sqlnet.ora relocalized to {0}".format(self.tns_admin_root)) 113 | self.log.info("Establishing DB Connection") 114 | with self._oci_hook.connect(user=self.user_id, password=self.password) as conn: 115 | cursor = conn.cursor() 116 | if self.single_sql is not None: 117 | if self.debug is True: 118 | self.log.info("Running Single SQL {}".format(self.single_sql)) 119 | cursor.execute(self.single_sql, **kwargs) 120 | if self.many_sql is not None: 121 | if self.debug is True: 122 | self.log.info("Running Many SQL {}".format(self.many_sql)) 123 | cursor.prepare(self.many_sql) 124 | cursor.executemany(None, self.many_sql_data, **kwargs) 125 | conn.commit() 126 | except AirflowException as e: 127 | self.log.error(e.response["Error"]["Message"]) 128 | 129 | -------------------------------------------------------------------------------- /oci-provider/plugins/plugins/operators/oci_copy_object_to_adb.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | import oci 19 | import pandas as pd 20 | from typing import Optional 21 | from hooks.oci_adb import OCIDBHook 22 | from hooks.oci_object_storage import OCIObjectStorageHook 23 | from airflow.models.baseoperator import BaseOperator 24 | from airflow.utils.decorators import apply_defaults 25 | from airflow.exceptions import AirflowException 26 | 27 | 28 | class OCIDBCopyFromObject(BaseOperator): 29 | """ 30 | Copy data from a file in Object Storage into OCI ADB/ADW 31 | :param compartment_id: Target compartment OCID 32 | :type compartment_id: str 33 | :param tns_admin_root: The wallet root directory. The wallet will be loaded from $TNS_ADMIN/sqlnet.ora. 34 | If you do not set tns_admin_root, it is assumed to be in your environment. 35 | :type tns_admin_root: str 36 | :param database_ocid: Database ID 37 | :type database_ocid: str 38 | :param db_workload: DB Workload type, valid options are DW or OLTP 39 | :type str: 40 | :param db_name: Databse Name (Not display) 41 | :type db_name: str 42 | :param debug: Whether to display debug output 43 | :type debug: bool 44 | :param dsn: DSN (TNS Name) for connection 45 | :type dsn: str 46 | :param oci_conn_id: Airflow connection ID 47 | :type oci_conn_id: str 48 | :param oci_region: Target OCI Region 49 | :type oci_region: str 50 | :param password: Database password for user_id 51 | :type password: str 52 | :param user_id: User ID for Database login 53 | :type user_id: str 54 | :param wallet_location: Filesystem location for wallet files 55 | :param wallet_location: str 56 | """ 57 | 58 | @apply_defaults 59 | def __init__(self, 60 | compartment_ocid: str, 61 | bucket_name: str, 62 | object_name: str, 63 | tns_admin_root: Optional[str] = None, 64 | database_ocid: Optional[str] = None, 65 | db_workload: Optional[str] = None, 66 | db_name: Optional[str] = None, 67 | debug: Optional[bool] = False, 68 | dsn: Optional[str] = None, 69 | oci_conn_id: Optional[str] = "oci_default", 70 | oci_region: Optional[str] = None, 71 | password: Optional[str] = None, 72 | user_id: Optional[str] = None, 73 | wallet_location: Optional[str] = None, 74 | *args, 75 | **kwargs): 76 | super(OCIDBCopyFromObject, self).__init__(*args, **kwargs) 77 | self.compartment_id = compartment_ocid 78 | self.bucket_name = bucket_name 79 | self.object_name = object_name 80 | self.tns_admin_root = tns_admin_root 81 | self.database_id = database_ocid 82 | self.db_workload = db_workload 83 | self.db_name = db_name 84 | self.debug = debug 85 | self.dsn = dsn 86 | self.oci_conn_id = oci_conn_id 87 | self.oci_region = oci_region 88 | self.password = password 89 | self.user_id = user_id 90 | self.wallet_location = wallet_location 91 | self._oci_hook = None 92 | self._oci_storage_hook = None 93 | self.oci_client = oci.database.DatabaseClient 94 | 95 | def execute(self, context, **kwargs): 96 | try: 97 | self._oci_hook = OCIDBHook(compartment_ocid=self.compartment_id, db_name=self.db_name, 98 | db_workload=self.db_workload, tns_admin_root=self.tns_admin_root, 99 | wallet_location=self.wallet_location) 100 | self._oci_storage_hook = OCIObjectStorageHook(compartment_id=self.compartment_id, 101 | bucket_name=self.bucket_name) 102 | self.log.info("Relocalizing sqlnet.ora") 103 | self._oci_hook.relocalize_sqlnet() 104 | self.log.info("Sqlnet.ora relocalized to {0}".format(self.tns_admin_root)) 105 | self.log.info("Establishing DB Connection") 106 | with self._oci_hook.connect_sqlalchemy(dsn=self.dsn, user=self.user_id, password=self.password) as conn: 107 | namespace = self._oci_storage_hook.get_namespace(compartment_id=self.compartment_id) 108 | object_contents = self._oci_storage_hook.read_from_bucket(bucket_name=self.bucket_name, 109 | namespace_name=namespace, 110 | object_name=self.object_name) 111 | dff = pd.DataFrameFactory(conn) 112 | dff.write(object_contents, name=self.object_name, if_exists='replace') 113 | except AirflowException as e: 114 | self.log.error(e.response["Error"]["Message"]) 115 | 116 | -------------------------------------------------------------------------------- /oci-provider/plugins/plugins/operators/oci_data_catalog.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | from airflow.models.baseoperator import BaseOperator 20 | from airflow.utils.decorators import apply_defaults 21 | from typing import Optional 22 | import oci 23 | from oci.data_catalog.data_catalog_client import DataCatalogClient 24 | from hooks.oci_data_catalog import OCIDataCatalogHook 25 | from airflow.exceptions import AirflowException 26 | import time 27 | """ 28 | Interact with OCI Data Catalog 29 | """ 30 | 31 | 32 | class OCIDataCatalogExecute(BaseOperator): 33 | """ 34 | Create Data Catalog Job Execution 35 | :param compartment_ocid: Compartment OCID 36 | :param oci_conn_id: Airflow connection ID 37 | :param data_catalog_ocid: Data Catalog OCID 38 | :param retry_strategy: Retry Strategy 39 | """ 40 | 41 | @apply_defaults 42 | def __init__( 43 | self, 44 | compartment_ocid: str, 45 | oci_conn_id: str, 46 | data_catalog_ocid: str, 47 | job_key: str, 48 | job_execution_details: object, 49 | retry_strategy: Optional[str] = None, 50 | *args, 51 | **kwargs 52 | ): 53 | super().__init__(*args, **kwargs) 54 | self.compartment_id = compartment_ocid 55 | self.oci_conn_id = oci_conn_id 56 | self.data_catalog_ocid = data_catalog_ocid 57 | self.job_key = job_key 58 | self.job_execution_details = job_execution_details 59 | self.retry_strategy = retry_strategy 60 | self._oci_hook = None 61 | 62 | def execute(self, context, **kwargs): 63 | self._oci_hook = OCIDataCatalogHook(compartment_ocid=self.compartment_id, oci_conn_id=self.oci_conn_id) 64 | client = self._oci_hook.get_client(oci.data_catalog.DataCatalogClient) 65 | self.log.info("Validating OCI Config") 66 | self._oci_hook.validate_config() 67 | 68 | try: 69 | print("Submitting Data Catalog Job Execution") 70 | submit_job = DataCatalogClient(client) 71 | submit_job.create_job_execution(catalog_id=self.data_catalog_ocid, 72 | job_key=self.job_key, 73 | create_job_execution_details=self.job_execution_details, 74 | **kwargs) 75 | check_job = DataCatalogClient(client) 76 | job_data = check_job.get_job(catalog_id=self.data_catalog_ocid, 77 | job_key=self.job_key).data 78 | while job_data.lifecycle_state is not "completed": 79 | time.sleep(15) 80 | job_data = check_job.get_job(catalog_id=self.data_catalog_ocid, 81 | job_key=self.job_key).data 82 | 83 | except AirflowException as e: 84 | self.log.error(e.response["Error"]["Message"]) 85 | -------------------------------------------------------------------------------- /oci-provider/plugins/plugins/operators/oci_data_flow.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | from airflow.models.baseoperator import BaseOperator 20 | from airflow.utils.decorators import apply_defaults 21 | from typing import Optional 22 | import oci 23 | from oci.data_flow.data_flow_client_composite_operations import DataFlowClientCompositeOperations 24 | from hooks.oci_data_flow import OCIDataFlowHook 25 | from hooks.oci_object_storage import OCIObjectStorageHook 26 | from airflow.exceptions import AirflowException 27 | """ 28 | Interact with OCI Data Flow 29 | """ 30 | 31 | 32 | class OCIDataFlowRun(BaseOperator): 33 | template_fields = ('display_name',) 34 | 35 | """ 36 | Create a Data Flow Run 37 | :param comprtment_ocid: Compartment OCID 38 | :param application_ocid: Data Flow Applicaation OCID 39 | :param display_name: Data Flow App Name 40 | :param oci_conn_id: Airflow Connection ID 41 | :param bucket_name: Application Bucket Name 42 | :param arguments: Arguments 43 | :param parameters: Parameters 44 | :param driver_shape: Spark Driver Shape 45 | :param executor_shape: Spark Executor Shape 46 | :param num_executors: Spark Executors 47 | :param logs_bucket_uri: OCI Logs Bucket 48 | :param logs_run_output: Whether to log the run output 49 | :param defined_tags: Defined Tags 50 | :param freeform_tags: Freeform Tags 51 | :param check_interval: Check Interval 52 | :param timeout: Timeout 53 | """ 54 | @apply_defaults 55 | def __init__( 56 | self, 57 | compartment_ocid: str, 58 | display_name: str, 59 | oci_conn_id: str, 60 | bucket_name: Optional[str] = None, 61 | application_ocid: Optional = None, 62 | arguments: Optional = None, 63 | parameters: Optional = None, 64 | driver_shape: Optional = None, 65 | executor_shape: Optional = None, 66 | num_executors: Optional = None, 67 | log_run_output: Optional[bool] = True, 68 | logs_bucket_uri: Optional = None, 69 | defined_tags: Optional = None, 70 | freeform_tags: Optional = None, 71 | warehouse_bucket_uri: Optional = None, 72 | check_interval: Optional[int] = None, 73 | timeout: Optional[int] = None, 74 | runtime_callback: Optional = None, 75 | *args, 76 | **kwargs 77 | ): 78 | super().__init__(*args, **kwargs) 79 | self.compartment_id = compartment_ocid 80 | self.application_id = application_ocid 81 | self.display_name = display_name 82 | self.oci_conn_id = oci_conn_id 83 | self.bucket_name = bucket_name 84 | self.arguments= arguments 85 | self.parameters = parameters 86 | self.driver_shape = driver_shape 87 | self.executor_shape = executor_shape 88 | self.num_executors = num_executors 89 | self.log_run_output = log_run_output 90 | self.logs_bucket_uri = logs_bucket_uri 91 | self.defined_tags = defined_tags 92 | self.freeform_tags = freeform_tags 93 | self.warehouse_bucket_uri = warehouse_bucket_uri 94 | self.check_interval = check_interval 95 | self.timeout = timeout 96 | self.runtime_callback = runtime_callback 97 | self._oci_hook = None 98 | 99 | def execute(self, context): 100 | self._oci_hook = OCIDataFlowHook(compartment_ocid=self.compartment_id, oci_conn_id=self.oci_conn_id, display_name=self.display_name) 101 | client = self._oci_hook.get_client(oci.data_flow.DataFlowClient) 102 | self.log.info("Validating OCI Config") 103 | self._oci_hook.validate_config() 104 | if not self.timeout: 105 | self.timeout = float('inf') 106 | if not self.check_interval: 107 | self.check_interval = 30 108 | if not self.executor_shape: 109 | self.executor_shape = 'VM.Standard2.1' 110 | if not self.num_executors: 111 | self.num_executors = 1 112 | if not self.driver_shape: 113 | self.driver_shape = self.executor_shape 114 | if not self.warehouse_bucket_uri: 115 | self.namespace = OCIObjectStorageHook(compartment_id=self.compartment_id, oci_conn_id=self.oci_conn_id, bucket_name=self.bucket_name).get_namespace() 116 | self.warehouse_bucket_uri = "oci://" + str(self.bucket_name) + "@" + str(self.namespace) + "/" 117 | if not self.application_id: 118 | self.application_id = OCIDataFlowHook(compartment_ocid=self.compartment_id, oci_conn_id=self.oci_conn_id, display_name=self.display_name).get_application_ocid() 119 | run_details = { 120 | "application_id": self.application_id, 121 | "compartment_id": self.compartment_id, 122 | "display_name": self.display_name, 123 | "executor_shape": self.executor_shape, 124 | "num_executors": self.num_executors, 125 | "driver_shape": self.driver_shape, 126 | "warehouse_bucket_uri": self.warehouse_bucket_uri, 127 | "logs_bucket_uri": self.logs_bucket_uri, 128 | "arguments": self.arguments, 129 | "parameters": self.parameters, 130 | } 131 | if self.runtime_callback is not None: 132 | callback_settings = self.runtime_callback(context) 133 | run_details = {**run_details, **callback_settings} 134 | dataflow_run = oci.data_flow.models.CreateRunDetails(**run_details) 135 | try: 136 | submit_run = DataFlowClientCompositeOperations(client) 137 | response = submit_run.create_run_and_wait_for_state(create_run_details=dataflow_run, 138 | wait_for_states=["CANCELED", "SUCCEEDED", "FAILED"], 139 | waiter_kwargs={ 140 | "max_interval_seconds": self.check_interval, 141 | "max_wait_seconds": self.timeout 142 | }) 143 | if response.data.lifecycle_state != "SUCCEEDED": 144 | self.log.error(response.data.lifecycle_details) 145 | raise AirflowException(response.data.lifecycle_details) 146 | if self.log_run_output: 147 | try: 148 | log_contents = client.get_run_log(run_id=response.data.id, name="spark_application_stdout.log.gz") 149 | self.log.info("Data Flow Run Output:") 150 | self.log.info(log_contents.data.text) 151 | except: 152 | self.log.info("Unable to fetch Run logs. This can be due to a missing IAM policy") 153 | self.log.info("Data Flow needs a policy like \"allow service dataflow to read objects in tenancy where target.bucket.name=''\" to read your logs") 154 | self.log.info("See https://docs.cloud.oracle.com/en-us/iaas/data-flow/using/dfs_getting_started.htm#set_up_admin for more information") 155 | except oci.exceptions.CompositeOperationError as e: 156 | self.log.error(str(e.cause)) 157 | raise e 158 | 159 | 160 | class OCIDataFlowCreateApplication(BaseOperator): 161 | """ 162 | Create a Data Flow Run 163 | :param comprtment_ocid: Compartment OCID 164 | :param application_ocid: Data Flow Applicaation OCID 165 | :param display_name: Data Flow App Name 166 | :param oci_conn_id: Airflow Connection ID 167 | :param bucket_name: Application Bucket Name 168 | :param arguments: Arguments 169 | :param parameters: Parameters 170 | :param driver_shape: Spark Driver Shape 171 | :param executor_shape: Spark Executor Shape 172 | :param num_executors: Spark Executors 173 | :param logs_bucket_uri: OCI Logs Bucket 174 | :param defined_tags: Defined Tags 175 | :param freeform_tags: Freeform Tags 176 | :param check_interval: Check Interval 177 | :param timeout: Timeout 178 | """ 179 | @apply_defaults 180 | def __init__( 181 | self, 182 | compartment_ocid: str, 183 | display_name: str, 184 | oci_conn_id: str, 185 | bucket_name: str, 186 | object_name: str, 187 | language: str, 188 | file_uri: Optional[str] = None, 189 | arguments: Optional = None, 190 | parameters: Optional = None, 191 | driver_shape: Optional = None, 192 | executor_shape: Optional = None, 193 | num_executors: Optional = None, 194 | logs_bucket_uri: Optional = None, 195 | spark_version: Optional = None, 196 | check_interval: Optional[int] = None, 197 | timeout: Optional[int] = None, 198 | *args, 199 | **kwargs 200 | ): 201 | super().__init__(*args, **kwargs) 202 | self.compartment_id = compartment_ocid 203 | self.display_name = display_name 204 | self.oci_conn_id = oci_conn_id 205 | self.bucket_name = bucket_name 206 | self.object_name = object_name 207 | self.language = language 208 | self.file_uri = file_uri 209 | self.arguments = arguments 210 | self.parameters = parameters 211 | self.driver_shape = driver_shape 212 | self.executor_shape = executor_shape 213 | self.num_executors = num_executors 214 | self.logs_bucket_uri = logs_bucket_uri 215 | self.spark_version = spark_version 216 | self.check_interval = check_interval 217 | self.timeout = timeout 218 | self._oci_hook = None 219 | 220 | def execute(self, context): 221 | self._oci_hook = OCIDataFlowHook(compartment_ocid=self.compartment_id, oci_conn_id=self.oci_conn_id, display_name=self.display_name) 222 | client = self._oci_hook.get_client(oci.data_flow.DataFlowClient) 223 | self.log.info("Validating OCI Config") 224 | self._oci_hook.validate_config() 225 | if not self.timeout: 226 | self.timeout = float('inf') 227 | if not self.check_interval: 228 | self.check_interval = 30 229 | if not self.executor_shape: 230 | self.executor_shape = 'VM.Standard2.1' 231 | if not self.num_executors: 232 | self.num_executors = 1 233 | if not self.driver_shape: 234 | self.driver_shape = self.executor_shape 235 | if not self.file_uri: 236 | self.namespace = OCIObjectStorageHook(compartment_id=self.compartment_id, oci_conn_id=self.oci_conn_id, bucket_name=self.bucket_name).get_namespace() 237 | self.file_uri = "oci://" + str(self.bucket_name) + "@" + str(self.namespace) + "/" + str(self.object_name) 238 | self.log.info("File URI: {0}".format(self.file_uri)) 239 | if not self.language: 240 | self.log.error("Application Language must be set") 241 | if not self.spark_version: 242 | self.spark_version = '2.4.4' 243 | app_details = { 244 | "compartment_id": self.compartment_id, 245 | "display_name": self.display_name, 246 | "driver_shape": self.driver_shape, 247 | "executor_shape": self.executor_shape, 248 | "file_uri": self.file_uri, 249 | "language": self.language, 250 | "num_executors": self.num_executors, 251 | "spark_version": self.spark_version 252 | } 253 | dataflow_create = \ 254 | oci.data_flow.models.CreateApplicationDetails(compartment_id=app_details["compartment_id"], 255 | display_name=app_details["display_name"], 256 | driver_shape=app_details["driver_shape"], 257 | executor_shape=app_details["executor_shape"], 258 | file_uri=app_details["file_uri"], 259 | language=app_details["language"], 260 | num_executors=app_details["num_executors"], 261 | spark_version=app_details["spark_version"] 262 | ) 263 | try: 264 | print("Checking if Application {0} exists".format(self.display_name)) 265 | appcheck = self._oci_hook.check_for_application_by_name() 266 | if appcheck is True: 267 | self.log.error("Application {0} already exists".format(self.display_name)) 268 | else: 269 | print("Creating DataFlow Application {0}".format(self.display_name)) 270 | create_app = DataFlowClientCompositeOperations(client) 271 | create_app.create_application_and_wait_for_state(create_application_details=dataflow_create, 272 | wait_for_states=["ACTIVE"], 273 | waiter_kwargs={ 274 | "max_interval_seconds": self.check_interval, 275 | "max_wait_seconds": self.timeout 276 | }) 277 | except AirflowException as e: 278 | self.log.error(e.response["Error"]["Message"]) 279 | -------------------------------------------------------------------------------- /oci-provider/plugins/plugins/operators/oci_object_storage.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | import oci 19 | from typing import Optional 20 | from airflow.models.baseoperator import BaseOperator 21 | from hooks.oci_object_storage import OCIObjectStorageHook 22 | from airflow.utils.decorators import apply_defaults 23 | from airflow.exceptions import AirflowException 24 | from os import path 25 | 26 | 27 | class MakeBucket(BaseOperator): 28 | """ 29 | Create a Bucket in OCI object store 30 | 31 | :param bucket_name: Name of bucket 32 | :type bucket_name: str 33 | :param compartment_ocid: Compartment ID 34 | :type compartment_id: str 35 | :param namespace_name: Object storage namespace 36 | :type namespace_name: str 37 | :param oci_conn_id: Airflow connection ID 38 | :type oci_conn_id: str 39 | """ 40 | 41 | @apply_defaults 42 | def __init__( 43 | self, 44 | bucket_name: str, 45 | compartment_ocid: str, 46 | namespace_name: Optional[str] = None, 47 | oci_conn_id: Optional[str] = "oci_default", 48 | *args, 49 | **kwargs 50 | ) -> None: 51 | super().__init__(*args, **kwargs) 52 | self.bucket_name = bucket_name 53 | self.compartment_id = compartment_ocid 54 | self.namespace_name = namespace_name 55 | self.oci_conn_id = oci_conn_id 56 | self._oci_hook = None 57 | self.oci_client = oci.object_storage.ObjectStorageClient 58 | 59 | def execute(self, context, **kwargs): 60 | self._oci_hook = OCIObjectStorageHook(compartment_id=self.compartment_id, bucket_name=self.bucket_name, 61 | oci_conn_id=self.oci_conn_id) 62 | client = self._oci_hook.get_client(self.oci_client) 63 | self.log.info("Validating OCI Config") 64 | self._oci_hook.validate_config() 65 | if not self.namespace_name: 66 | self.namespace_name = self._oci_hook.get_namespace() 67 | details = oci.object_storage.models.CreateBucketDetails( 68 | compartment_id=self.compartment_id, name=self.bucket_name 69 | ) 70 | self.log.info("Checking if Bucket {} exists".format(self.bucket_name)) 71 | bucket_exists = self._oci_hook.check_for_bucket(namespace_name=self.namespace_name, bucket_name=self.bucket_name) 72 | if bucket_exists is True: 73 | self.log.info("Bucket {0} exists, skipping creation".format(self.bucket_name)) 74 | else: 75 | self.log.info("Creating Bucket {0} in {1}".format(self.bucket_name, self.namespace_name)) 76 | client.create_bucket(namespace_name=self.namespace_name, create_bucket_details=details, **kwargs) 77 | self.log.info("Create bucket complete") 78 | 79 | 80 | class CopyFileToOCIObjectStorageOperator(BaseOperator): 81 | """ 82 | Copy local file to OCI object store 83 | 84 | :param bucket_name: Name of bucket 85 | :type bucket_name: str 86 | :param compartment_ocid: Compartment ID 87 | :type compartment_id: str 88 | :param object_name: Object name - must match local file 89 | :type object_name: str 90 | :param local_file_path: Path to local file 91 | :type local_file_path: str 92 | :param namespace_name: Object storage namespace 93 | :type namespace_name: str 94 | :param oci_conn_id: Airflow connection ID 95 | :type oci_conn_id: str 96 | """ 97 | 98 | @apply_defaults 99 | def __init__( 100 | self, 101 | bucket_name: str, 102 | compartment_ocid: str, 103 | object_name: str, 104 | local_file_path: str, 105 | namespace_name: Optional[str] = None, 106 | oci_conn_id: Optional[str] = "oci_default", 107 | *args, 108 | **kwargs 109 | ) -> None: 110 | super().__init__(*args, **kwargs) 111 | self.bucket_name = bucket_name 112 | self.compartment_id = compartment_ocid 113 | self.namespace_name = namespace_name 114 | self.object_name = object_name 115 | self.local_file_path = local_file_path 116 | self.oci_conn_id = oci_conn_id 117 | self._oci_hook = None 118 | self.oci_client = oci.object_storage.ObjectStorageClient 119 | 120 | def execute(self, context, **kwargs): 121 | self._oci_hook = OCIObjectStorageHook(compartment_id=self.compartment_id, bucket_name=self.bucket_name, 122 | oci_conn_id=self.oci_conn_id) 123 | client = self._oci_hook.get_client(self.oci_client) 124 | self.log.info("Validating OCI Config") 125 | self._oci_hook.validate_config() 126 | if not self.namespace_name: 127 | self.namespace_name = self._oci_hook.get_namespace() 128 | details = oci.object_storage.models.CreateBucketDetails( 129 | compartment_id=self.compartment_id, name=self.bucket_name 130 | ) 131 | self.log.info("Checking if Bucket {} exists".format(self.bucket_name)) 132 | bucket_exists = self._oci_hook.check_for_bucket(namespace_name=self.namespace_name, bucket_name=self.bucket_name) 133 | if bucket_exists is True: 134 | self.log.info("Bucket {0} exists, skipping creation".format(self.bucket_name)) 135 | else: 136 | self.log.info("Creating Bucket {0} in {1}".format(self.bucket_name, self.namespace_name)) 137 | client.create_bucket(namespace_name=self.namespace_name, create_bucket_details=details) 138 | self.log.info("Create bucket complete") 139 | self.log.info("Checking if {0} exists in {1}".format(self.object_name, self.bucket_name)) 140 | object_exists = self._oci_hook.check_for_object(namespace_name=self.namespace_name, bucket_name=self.bucket_name, 141 | object_name=self.object_name) 142 | if object_exists is True: 143 | self.log.info("Object {0} exists already in {1}".format(self.object_name, self.bucket_name)) 144 | else: 145 | self.log.info("Validating local file {0} exists".format(self.object_name)) 146 | if path.exists(self.local_file_path) is True: 147 | self.local_file = self.local_file_path + self.object_name 148 | if path.exists(self.local_file) is True: 149 | self.log.info("Copying {0} to {1}".format(self.local_file, self.bucket_name)) 150 | self.put_object_body = open(self.local_file, 'rb') 151 | self._oci_hook.copy_to_bucket(bucket_name=self.bucket_name, 152 | namespace_name=self.namespace_name, 153 | object_name=self.object_name, 154 | put_object_body=self.put_object_body, **kwargs) 155 | else: 156 | self.log.error("Local file {0} does not exist".format(self.local_file)) 157 | else: 158 | self.log.error("Local file path {0} does not exist".format(self.local_file_path)) 159 | 160 | 161 | class CopyToOCIObjectStorageOperator(BaseOperator): 162 | """ 163 | Copy data to OCI object store 164 | 165 | :param bucket_name: Name of target bucket 166 | :type bucket_name: str 167 | :param compartment_ocid: Compartment ID 168 | :type compartment_id: str 169 | :param object_name: Object name to create in object store 170 | :type object_name: str 171 | :param put_object_body: Contents of object_name 172 | :type put_object_body: stream 173 | :param namespace_name: Object storage namespace 174 | :type namespace_name: str 175 | :param oci_conn_id: Airflow connection ID 176 | :type oci_conn_id: str 177 | """ 178 | 179 | @apply_defaults 180 | def __init__( 181 | self, 182 | bucket_name: str, 183 | compartment_ocid: str, 184 | object_name: str, 185 | put_object_body: str, 186 | namespace_name: Optional[str] = None, 187 | oci_conn_id: Optional[str] = "oci_default", 188 | *args, 189 | **kwargs 190 | ) -> None: 191 | super().__init__(*args, **kwargs) 192 | self.bucket_name = bucket_name 193 | self.compartment_id = compartment_ocid 194 | self.namespace_name = namespace_name 195 | self.object_name = object_name 196 | self.put_object_body = put_object_body 197 | self.oci_conn_id = oci_conn_id 198 | self._oci_hook = None 199 | self.oci_client = oci.object_storage.ObjectStorageClient 200 | 201 | def execute(self, context, **kwargs): 202 | self._oci_hook = OCIObjectStorageHook(compartment_id=self.compartment_id, bucket_name=self.bucket_name, 203 | oci_conn_id=self.oci_conn_id) 204 | client = self._oci_hook.get_client(self.oci_client) 205 | self.log.info("Validating OCI Config") 206 | self._oci_hook.validate_config() 207 | if not self.namespace_name: 208 | self.namespace_name = self._oci_hook.get_namespace() 209 | details = oci.object_storage.models.CreateBucketDetails( 210 | compartment_id=self.compartment_id, name=self.bucket_name 211 | ) 212 | self.log.info("Checking if Bucket {} exists".format(self.bucket_name)) 213 | bucket_exists = self._oci_hook.check_for_bucket(namespace_name=self.namespace_name, bucket_name=self.bucket_name) 214 | if bucket_exists is True: 215 | self.log.info("Bucket {0} exists, skipping creation".format(self.bucket_name)) 216 | else: 217 | self.log.info("Creating Bucket {0} in {1}".format(self.bucket_name, self.namespace_name)) 218 | client.create_bucket(namespace_name=self.namespace_name, create_bucket_details=details) 219 | self.log.info("Create bucket complete") 220 | self.log.info("Checking if {0} exists in {1}".format(self.object_name, self.bucket_name)) 221 | object_exists = self._oci_hook.check_for_object(namespace_name=self.namespace_name, bucket_name=self.bucket_name, 222 | object_name=self.object_name) 223 | if object_exists is True: 224 | self.log.info("Object {0} exists already in {1}".format(self.object_name, self.bucket_name)) 225 | else: 226 | self.log.info("Copying {0} to {1}".format(self.object_name, self.bucket_name)) 227 | self._oci_hook.copy_to_bucket(bucket_name=self.bucket_name, namespace_name=self.namespace_name, 228 | object_name=self.object_name, put_object_body=self.put_object_body, **kwargs) 229 | 230 | 231 | class CopyFromOCIObjectStorage(BaseOperator): 232 | """ 233 | Copy object from OCI object store 234 | 235 | :param bucket_name: Name of target bucket 236 | :type bucket_name: str 237 | :param compartment_ocid: Compartment ID 238 | :type compartment_id: str 239 | :param object_name: Object name to create in object store 240 | :type object_name: str 241 | :param put_object_body: Contents of object_name 242 | :type put_object_body: stream 243 | :param namespace_name: Object storage namespace 244 | :type namespace_name: str 245 | :param oci_conn_id: Airflow connection ID 246 | :type oci_conn_id: str 247 | """ 248 | @apply_defaults 249 | def __init__( 250 | self, 251 | bucket_name: str, 252 | compartment_id: str, 253 | object_name: str, 254 | namespace_name: Optional[str] = None, 255 | oci_conn_id: Optional[str] = "oci_default", 256 | *args, 257 | **kwargs 258 | ) -> None: 259 | super().__init__(*args, **kwargs) 260 | self.bucket_name = bucket_name 261 | self.compartment_id = compartment_id 262 | self.namespace_name = namespace_name 263 | self.object_name = object_name 264 | self.oci_conn_id = oci_conn_id 265 | self._oci_hook = None 266 | self.oci_client = oci.object_storage.ObjectStorageClient 267 | 268 | def execute(self, context, **kwargs): 269 | self._oci_hook = OCIObjectStorageHook(compartment_id=self.compartment_id, bucket_name=self.bucket_name, 270 | oci_conn_id=self.oci_conn_id) 271 | client = self._oci_hook.get_client(self.oci_client) 272 | self.log.info("Validating OCI Config") 273 | self._oci_hook.validate_config() 274 | if not self.namespace_name: 275 | self.namespace_name = self._oci_hook.get_namespace() 276 | self.log.info("Checking if {0} exists in {1}".format(self.object_name, self.bucket_name)) 277 | object_exists = self._oci_hook.check_for_object(namespace_name=self.namespace_name, bucket_name=self.bucket_name, 278 | object_name=self.object_name, **kwargs) 279 | if object_exists is True: 280 | self.log.info("Reading {0} from {1}".format(self.object_name, self.bucket_name)) 281 | return client.get_object(namespace_name=self.namespace_name, object_name=self.object_name, 282 | bucket_name=self.bucket_name, **kwargs) 283 | else: 284 | raise AirflowException("{0} does not exist in {1}".format(self.object_name, self.bucket_name)) 285 | 286 | -------------------------------------------------------------------------------- /oci-provider/plugins/plugins/sensors/oci_adb.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language 17 | # governing permissions and limitations 18 | # under the License. 19 | 20 | from airflow.sensors.base_sensor_operator import BaseSensorOperator 21 | from airflow.utils.decorators import apply_defaults 22 | from airflow.exceptions import AirflowException 23 | from hooks.oci_adb import OCIDBHook 24 | import time 25 | 26 | class OCIADBSensor(BaseSensorOperator): 27 | """ 28 | Sensor to interact with OCI ADB 29 | """ 30 | 31 | @apply_defaults 32 | def __init__(self, 33 | compartment_ocid = None, 34 | oci_conn_id = 'oci_default', 35 | database_id = None, 36 | target_state = None, 37 | *args, 38 | **kwargs): 39 | super(OCIADBSensor, self).__init__(*args, **kwargs) 40 | self.compartment_id = compartment_ocid, 41 | self.oci_conn_id = oci_conn_id, 42 | self.database_id = database_id, 43 | self.target_state = target_state, 44 | self._oci_hook = None 45 | 46 | def poke(self): 47 | self.log.info('Checking database %s', self.database_id) 48 | db_state = self.get_oci_hook().check_state(database_id=self.database_id) 49 | while db_state is not self.target_state: 50 | self.log.info('DB State: {}'.format(db_state)) 51 | time.sleep(15) 52 | db_state = self.get_oci_hook().check_state(database_id=self.database_id) 53 | 54 | def get_oci_hook(self): 55 | """ 56 | Create and return OCI Hook 57 | :return: 58 | """ 59 | if not self._oci_hook: 60 | self._oci_hook = OCIDBHook(compartment_id=self.compartment_id, oci_conn_id=self.oci_conn_id) 61 | return self._oci_hook 62 | -------------------------------------------------------------------------------- /oci-provider/plugins/plugins/sensors/oci_object_storage.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language 17 | # governing permissions and limitations 18 | # under the License. 19 | 20 | from airflow.sensors.base_sensor_operator import BaseSensorOperator 21 | from airflow.utils.decorators import apply_defaults 22 | from airflow.exceptions import AirflowException 23 | from hooks.oci_object_storage import OCIObjectStorageHook 24 | 25 | class BaseOCIObjectStorageSensor(BaseSensorOperator): 26 | template_fields = ('prefix', 'object_name', 'bucket_name') 27 | 28 | @apply_defaults 29 | def __init__(self, 30 | compartment_ocid = None, 31 | bucket_name = None, 32 | object_name = None, 33 | prefix = None, 34 | namespace_name = None, 35 | oci_conn_id = 'oci_default', 36 | verify = None, 37 | *args, 38 | **kwargs): 39 | super().__init__(*args, **kwargs) 40 | if type(self).__name__ == "OCIObjectStorageSensor": 41 | if object_name is None: 42 | raise AirflowException('Please provide object_name') 43 | self.object_name = object_name 44 | self.prefix = None 45 | elif type(self).__name__ == "OCIObjectStoragePrefixSensor": 46 | if prefix is None: 47 | raise AirflowException('Please provide prefix') 48 | self.object_name = None 49 | self.prefix = prefix 50 | if bucket_name is None: 51 | raise AirflowException('Please provide bucket_name') 52 | self.compartment_id = compartment_ocid 53 | self.bucket_name = bucket_name 54 | self.oci_conn_id = oci_conn_id 55 | self.verify = verify 56 | self.namespace_name = namespace_name 57 | self._oci_hook = None 58 | 59 | def poke(self, context): 60 | raise Exception("Class did not implement poke method") 61 | 62 | def list_objects(self, file, prefix_match=False): 63 | hook = self.get_oci_hook() 64 | if not self.namespace_name: 65 | self.namespace_name = hook.get_namespace(compartment_id=self.compartment_id) 66 | object_store_client = hook.get_client(hook.oci_client) 67 | base_arguments = dict( 68 | bucket_name=self.bucket_name, 69 | fields="size", 70 | limit=100, 71 | namespace_name=self.namespace_name, 72 | prefix=file, 73 | ) 74 | objectsummary = object_store_client.list_objects(**base_arguments) 75 | 76 | # For exact match we only consider the first match. 77 | if prefix_match == False: 78 | first_match = objectsummary.data.objects[0] 79 | if first_match.name == file: 80 | return 1, first_match.size 81 | return 0, 0 82 | 83 | # Prefix mode: Build a list of matching files. 84 | total_files = 0 85 | total_size = 0 86 | while True: 87 | object_list = objectsummary.data 88 | for object in object_list.objects: 89 | total_files += 1 90 | total_size += object.size 91 | if object_list.next_start_with is None: 92 | break 93 | base_arguments["next_start_with"] = object_list.next_start_with 94 | object_store_client.list_object(**base_arguments) 95 | return total_files, total_size 96 | 97 | def get_oci_hook(self): 98 | """ 99 | Create and return OCI Hook 100 | :return: 101 | """ 102 | if not self._oci_hook: 103 | self._oci_hook = OCIObjectStorageHook(bucket_name=self.bucket_name, compartment_id=self.compartment_id, 104 | oci_conn_id=self.oci_conn_id, verify=self.verify) 105 | return self._oci_hook 106 | 107 | 108 | class OCIObjectStorageSensor(BaseOCIObjectStorageSensor): 109 | """ 110 | Sensor to interact with OCI Object Storage 111 | """ 112 | 113 | def __init__(self, *args, **kwargs): 114 | super().__init__(*args, **kwargs) 115 | 116 | def poke(self, context): 117 | self.log.info('Poking for object %s in bucket %s', self.object_name, self.bucket_name) 118 | try: 119 | total_files, total_size = self.list_objects(self.prefix, prefix_match=False) 120 | if total_files > 0: 121 | self.log.info('Found object of size %d', total_size) 122 | context['task_instance'].xcom_push('oci_storage_sensor_size', total_size) 123 | return True 124 | self.log.info('Object not found') 125 | return False 126 | 127 | except AirflowException as e: 128 | self.log.error(e.response["Error"]["Message"]) 129 | 130 | 131 | class OCIObjectStoragePrefixSensor(BaseOCIObjectStorageSensor): 132 | """ 133 | Prefix sensor for OCI Object Storage 134 | """ 135 | 136 | def __init__(self, *args, **kwargs): 137 | super().__init__(*args, **kwargs) 138 | 139 | def poke(self, context): 140 | self.log.info('Poking for prefix [%s] in bucket %s', self.prefix, self.bucket_name) 141 | try: 142 | total_files, total_size = self.list_objects(self.prefix, prefix_match=True) 143 | 144 | # If we matched anything, record file count, total size and return true. 145 | if total_files > 0: 146 | self.log.info('Found %d objects with total size %d', total_files, total_size) 147 | context['task_instance'].xcom_push('oci_prefix_total_files', total_files) 148 | context['task_instance'].xcom_push('oci_prefix_total_size', total_size) 149 | return True 150 | self.log.info('No matching objects') 151 | return False 152 | 153 | except AirflowException as e: 154 | self.log.error(e.response["Error"]["Message"]) 155 | 156 | -------------------------------------------------------------------------------- /outputs.tf: -------------------------------------------------------------------------------- 1 | output "BASTION_PUBLIC_IP" { value = var.public_edge_node ? module.bastion.public_ip : "No public IP assigned" } 2 | output "INFO" { value = var.use_remote_exec ? "Remote Execution used for deployment, check output for SSH key to access bastion": "CloudInit on Bastion host drives Airflow deployment. Login to Bastion host and check /var/log/OCI-airflow-initialize.log for status" } 3 | output "SSH_PRIVATE_KEY" { value = var.use_remote_exec ? nonsensitive(tls_private_key.oke_ssh_key.private_key_pem) : "SSH Key provided by user" } 4 | -------------------------------------------------------------------------------- /schema.yaml: -------------------------------------------------------------------------------- 1 | title: Airflow on OKE 2 | schemaVersion: 1.1.0 3 | version: "20230322" 4 | locale: "en" 5 | groupings: 6 | - title: "Deployment Type" 7 | variables: 8 | - use_remote_exec 9 | - title: "SSH Key" 10 | variables: 11 | - ssh_provided_public_key 12 | visible: 13 | not: 14 | - use_remote_exec 15 | - title: "Availabilty Domain" 16 | variables: 17 | - availability_domain 18 | - title: "VCN Options" 19 | variables: 20 | - useExistingVcn 21 | - myVcn 22 | - vcn_dns_label 23 | - custom_cidrs 24 | - VCN_CIDR 25 | - edge_cidr 26 | - private_cidr 27 | - title: "OKE Cluster Options" 28 | variables: 29 | - create_new_oke_cluster 30 | - cluster_name 31 | - kubernetes_version 32 | - OKESubnet 33 | - existing_oke_cluster_id 34 | - title: "OKE Airflow Pool Configuration" 35 | variables: 36 | - airflow_node_pool_name 37 | - airflow_node_pool_shape 38 | - flex_gbs 39 | - flex_ocpu 40 | - airflow_node_pool_size 41 | - airflow_namespace 42 | - kube_label 43 | - title: "OKE Registry Configuration" 44 | variables: 45 | - registry 46 | - repo_name 47 | - username 48 | - image_name 49 | - image_label 50 | - title: "Vault Configuration" 51 | variables: 52 | - vault_secret_id 53 | - title: "OCI-MySQL Configuration" 54 | variables: 55 | - mysql_admin_username 56 | - mysql_admin_password 57 | - mysql_shape 58 | - db_name 59 | - airflow_username 60 | - airflow_password 61 | - private_ip_address 62 | - enable_backups 63 | - title: "Bastion Configuration" 64 | variables: 65 | - public_edge_node 66 | - bastion_name 67 | - bastion_shape 68 | - bastion_flex_gbs 69 | - bastion_flex_ocpus 70 | - edgeSubnet 71 | - title: "Pre-Defined" 72 | variables: 73 | - region 74 | - compartment_ocid 75 | - tenancy_ocid 76 | - meta_db_type 77 | - cluster_options_add_ons_is_kubernetes_dashboard_enabled 78 | - cluster_options_admission_controller_options_is_pod_security_policy_enabled 79 | - service_port 80 | - endpoint_subnet_id 81 | - node_pool_node_shape_config_memory_in_gbs 82 | - node_pool_node_shape_config_ocpus 83 | visible: false 84 | 85 | variables: 86 | use_remote_exec: 87 | type: boolean 88 | title: "Use Remote Execution" 89 | description: "By default this deployment will use remote execution which requires a bastion with public IP address. If you disable this, the deployment will allow you to use CloudInit on the bastion host for deployment. This allows for more flexible deployment options which are not internet facing." 90 | default: true 91 | 92 | ssh_provided_public_key: 93 | type: string 94 | title: "SSH Public Key" 95 | description: "Copy/Paste the contents of your SSH Public Key" 96 | required: true 97 | default: "" 98 | 99 | create_new_oke_cluster: 100 | type: boolean 101 | title: "Create OKE Cluster" 102 | description: "Check to deploy a new OKE cluster. If you do not create an OKE cluster you must provide the OCID of an existing one." 103 | default: "true" 104 | 105 | existing_oke_cluster_id: 106 | type: string 107 | title: "Existing OKE Cluster ID" 108 | description: "Input the existing OKE cluster ID here for deployment." 109 | visible: 110 | not: 111 | - create_new_oke_cluster 112 | 113 | kubernetes_version: 114 | type: enum 115 | enum: 116 | - "v1.26.2 117 | - "v1.25.4" 118 | - "v1.24.1" 119 | title: "Kubernetes Version" 120 | description: "Choose the version of Kubernetes to deploy" 121 | required: true 122 | default: "v1.26.2" 123 | 124 | cluster_name: 125 | type: string 126 | title: "OKE Cluster Name" 127 | description: "Name the OKE Cluster" 128 | required: "true" 129 | default: "airflow-cluster" 130 | visible: create_new_oke_cluster 131 | 132 | useExistingVcn: 133 | type: boolean 134 | title: "Use Existing VCN" 135 | description: "Click to use existing VCN, otherwise VCN and Subnets will be created" 136 | required: true 137 | default: false 138 | 139 | myVcn: 140 | type: oci:core:vcn:id 141 | title: "Existing VCN" 142 | description: "Select Existing VCN" 143 | dependsOn: 144 | compartmentId: compartment_ocid 145 | visible: useExistingVcn 146 | required: true 147 | 148 | custom_cidrs: 149 | type: boolean 150 | title: "Customize Network CIDRS" 151 | description: "Click to customize CIDR ranges, only applicable when creating VCN as part of deployment" 152 | required: true 153 | default: false 154 | visible: 155 | not: 156 | - useExistingVcn 157 | 158 | OKESubnet: 159 | type: oci:core:subnet:id 160 | title: "OKE Subnet" 161 | description: "Select Subnet for OKE cluster" 162 | dependsOn: 163 | compartmentId: compartment_ocid 164 | vcnId: myVcn 165 | visible: useExistingVcn 166 | required: true 167 | 168 | edgeSubnet: 169 | type: oci:core:subnet:id 170 | title: "Edge Subnet" 171 | description: "Select Subnet for Bastion. If using remote exec, ensure that this is a public subnet or deployment will fail." 172 | dependsOn: 173 | compartmentId: compartment_ocid 174 | vcnId: myVcn 175 | visible: useExistingVcn 176 | required: true 177 | 178 | availability_domain: 179 | type: oci:identity:availabilitydomain:name 180 | title: "Availability Domain" 181 | description: "Select AD" 182 | dependsOn: 183 | compartmentId: compartment_ocid 184 | required: true 185 | 186 | VCN_CIDR: 187 | type: string 188 | title: "VCN CIDR" 189 | description: "Customize VCN top level CIDR" 190 | visible: custom_cidrs 191 | 192 | edge_cidr: 193 | type: string 194 | title: "Edge Subnet CIDR" 195 | description: "Customize Edge Subnet CIDR, ensure this fits in VCN CIDR range." 196 | visible: custom_cidrs 197 | 198 | private_cidr: 199 | type: string 200 | title: "Private Subnet CIDR" 201 | description: "Customize Private Subnet CIDR, ensure this fits in VCN CIDR range." 202 | visible: custom_cidrs 203 | 204 | vcn_dns_label: 205 | type: string 206 | title: "VCN DNS Label" 207 | description: "Set the VCN DNS label to be used when creating VCN. Default is 'airflowvcn' which sets the VCN domain to 'airflowvcn.oraclevcn.com'" 208 | visible: 209 | not: 210 | - useExistingVcn 211 | 212 | meta_db_type: 213 | type: enum 214 | title: "Airflow Meta-Database" 215 | description: "Pick which database to use for Airflow Metadata." 216 | enum: 217 | - "OCI Mysql" 218 | required: true 219 | visible: false 220 | 221 | airflow_node_pool_name: 222 | type: string 223 | title: "Airflow Node Pool Name" 224 | description: "Define the node pool name, no spaces" 225 | required: true 226 | visible: create_new_oke_cluster 227 | 228 | airflow_node_pool_shape: 229 | type: oci:core:instanceshape:name 230 | title: "Airflow Node Pool Shape" 231 | description: "Define node pool shape" 232 | required: true 233 | visible: create_new_oke_cluster 234 | dependsOn: 235 | compartmentId: compartment_ocid 236 | default: "VM.Standard.E4.Flex" 237 | 238 | airflow_node_pool_size: 239 | type: int 240 | title: "Airflow Node Pool size" 241 | description: "Enter a value, minimum 1" 242 | min: 1 243 | default: 1 244 | required: true 245 | visible: create_new_oke_cluster 246 | 247 | airflow_namespace: 248 | type: string 249 | title: "Airflow Namespace" 250 | default: "airflow" 251 | required: true 252 | 253 | kube_label: 254 | type: string 255 | title: "Kube Label" 256 | default: "airflow" 257 | required: true 258 | 259 | registry: 260 | type: string 261 | title: "OCI Registry" 262 | description: "Set this to the target region you are deploying to" 263 | default: "iad.ocir.io" 264 | required: true 265 | 266 | repo_name: 267 | type: string 268 | title: "Repo Name" 269 | default: "airflow" 270 | required: true 271 | 272 | username: 273 | type: string 274 | title: "OCI Registry Username" 275 | description: "Set this to your username for OCI Registry" 276 | required: true 277 | default: "oracleidentitycloudservice/" 278 | 279 | image_name: 280 | type: string 281 | title: "Docker Image Name" 282 | default: "airflow" 283 | required: true 284 | 285 | image_label: 286 | type: string 287 | title: "Docker Image Label" 288 | default: "2.0" 289 | required: true 290 | 291 | vault_secret_id: 292 | type: string 293 | title: "Vault Secret ID" 294 | description: "OCI vault secret ID where authentication key is stored - it is used for authenticatoin when pushing/pulling images to/from OCIR registry. Set it to secret OCID where you store authentication token that is used to push/pull images from OCIR" 295 | default: "ocid1.vaultsecret.oc1.iad.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" 296 | required: true 297 | 298 | mysql_admin_username: 299 | type: string 300 | title: "OCI MySQL username" 301 | description: "Enter a username for the MySQL database admin user" 302 | default: "mysqladmin" 303 | required: true 304 | 305 | mysql_admin_password: 306 | type: password 307 | title: "OCI MySQL password" 308 | description: "The password for the administrative user. The password must be between 8 and 32 characters long, and must contain at least 1 numeric character, 1 lowercase character, 1 uppercase character, and 1 special (nonalphanumeric) character." 309 | required: true 310 | 311 | mysql_shape: 312 | type: enum 313 | enum: 314 | - "MySQL.VM.Standard.E3.1.8GB" 315 | - "MySQL.VM.Standard.E3.1.16GB" 316 | - "MySQL.VM.Standard.E3.2.32GB" 317 | - "MySQL.VM.Standard.E3.4.64GB" 318 | - "MySQL.VM.Standard.E3.8.128GB" 319 | - "MySQL.VM.Standard.E3.16.256GB" 320 | - "MySQL.VM.Standard.E3.24.384GB" 321 | - "MySQL.VM.Standard.E3.32.512GB" 322 | - "MySQL.VM.Standard.E3.48.768GB" 323 | - "MySQL.VM.Standard.E3.64.1024GB" 324 | - "MySQL.VM.Standard.E4.1.8GB" 325 | - "MySQL.VM.Standard.E4.1.16GB" 326 | - "MySQL.VM.Standard.E4.2.32GB" 327 | - "MySQL.VM.Standard.E4.4.64GB" 328 | - "MySQL.VM.Standard.E4.8.128GB" 329 | - "MySQL.VM.Standard.E4.16.256GB" 330 | - "MySQL.VM.Standard.E4.24.384GB" 331 | - "MySQL.VM.Standard.E4.32.512GB" 332 | - "MySQL.VM.Standard.E4.48.768GB" 333 | - "MySQL.VM.Standard.E4.64.1024GB" 334 | title: "OCI MySQL instance shape" 335 | description: "Shape of the OCI MySQL intance" 336 | default: "MySQL.VM.Standard.E3.1.8GB" 337 | 338 | private_ip_address: 339 | type: string 340 | title: "OCI MySQL IP" 341 | description: "Private IP Address for the OCI MySQL server listener. The default uses the built-in VCN configuration when creating a private subnet, if using a custom VCN or changing the VCN/Subnet CIDRs you will need to adjust this accordingly to ensure it's in scope." 342 | default: "10.0.2.8" 343 | 344 | enable_backups: 345 | type: boolean 346 | title: "Enable MySQL backups" 347 | description: "Enable MySQL backups for OCI MySQL database" 348 | default: false 349 | 350 | db_name: 351 | type: string 352 | title: "Airflow Database Name" 353 | default: "airflow" 354 | required: true 355 | 356 | airflow_username: 357 | type: string 358 | title: "Airflow DB Username" 359 | default: "airflow" 360 | required: true 361 | 362 | airflow_password: 363 | type: password 364 | title: "Airflow DB password" 365 | required: true 366 | 367 | bastion_name: 368 | type: string 369 | title: "Bastion Name" 370 | default: "bastion" 371 | required: true 372 | 373 | bastion_shape: 374 | type: oci:core:instanceshape:name 375 | title: "Bastion Instance Shape" 376 | dependsOn: 377 | compartmentId: compartment_ocid 378 | default: "VM.Standard.E4.Flex" 379 | required: true 380 | 381 | public_edge_node: 382 | type: boolean 383 | title: "Deploy Bastion to Public Subnet" 384 | default: true 385 | required: true 386 | visible: 387 | not: 388 | - use_remote_exec 389 | 390 | flex_gbs: 391 | type: int 392 | title: "Memory in Gbs" 393 | description: "Set memory in Gbs for Node Pool Flex shape - Maximum varies based on which flex shape is chosen - refer to https://docs.oracle.com/en-us/iaas/Content/Compute/References/computeshapes.htm" 394 | default: 0 395 | required: true 396 | visible: 397 | or: 398 | - eq: 399 | - ${airflow_node_pool_shape} 400 | - "VM.Standard.E3.Flex" 401 | - eq: 402 | - ${airflow_node_pool_shape} 403 | - "VM.Standard.E4.Flex" 404 | - eq: 405 | - ${airflow_node_pool_shape} 406 | - "VM.Optimized3.Flex" 407 | - eq: 408 | - ${airflow_node_pool_shape} 409 | - "VM.Standard.A1.Flex" 410 | 411 | flex_ocpu: 412 | type: int 413 | title: "Number of OCPU" 414 | description: "Set OCPU for Node Pool Flex shape - Maximum varies based on which flex shape is chosen - refer to https://docs.oracle.com/en-us/iaas/Content/Compute/References/computeshapes.htm" 415 | default: 0 416 | required: true 417 | visible: 418 | or: 419 | - eq: 420 | - ${airflow_node_pool_shape} 421 | - "VM.Standard.E3.Flex" 422 | - eq: 423 | - ${airflow_node_pool_shape} 424 | - "VM.Standard.E4.Flex" 425 | - eq: 426 | - ${airflow_node_pool_shape} 427 | - "VM.Optimized3.Flex" 428 | - eq: 429 | - ${airflow_node_pool_shape} 430 | - "VM.Standard.A1.Flex" 431 | 432 | bastion_flex_gbs: 433 | type: int 434 | title: "Memory in Gbs" 435 | description: "Set memory in Gbs for Node Pool Flex shape - Maximum varies based on which flex shape is chosen - refer to https://docs.oracle.com/en-us/iaas/Content/Compute/References/computeshapes.htm" 436 | default: 0 437 | required: true 438 | visible: 439 | or: 440 | - eq: 441 | - ${bastion_shape} 442 | - "VM.Standard.E3.Flex" 443 | - eq: 444 | - ${bastion_shape} 445 | - "VM.Standard.E4.Flex" 446 | - eq: 447 | - ${bastion_shape} 448 | - "VM.Optimized3.Flex" 449 | - eq: 450 | - ${bastion_shape} 451 | - "VM.Standard.A1.Flex" 452 | 453 | bastion_flex_ocpus: 454 | type: int 455 | title: "Number of OCPU" 456 | description: "Set OCPU for Node Pool Flex shape - Maximum varies based on which flex shape is chosen - refer to https://docs.oracle.com/en-us/iaas/Content/Compute/References/computeshapes.htm" 457 | default: 0 458 | required: true 459 | visible: 460 | or: 461 | - eq: 462 | - ${bastion_shape} 463 | - "VM.Standard.E3.Flex" 464 | - eq: 465 | - ${bastion_shape} 466 | - "VM.Standard.E4.Flex" 467 | - eq: 468 | - ${bastion_shape} 469 | - "VM.Optimized3.Flex" 470 | - eq: 471 | - ${bastion_shape} 472 | - "VM.Standard.A1.Flex" 473 | -------------------------------------------------------------------------------- /userdata/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:latest 2 | 3 | ARG AIRFLOW_USER_HOME=/opt/airflow 4 | ARG AIRFLOW_USER="airflow" 5 | ARG AIRFLOW_UID="1000" 6 | ARG AIRFLOW_GID="1000" 7 | ENV AIRFLOW_HOME=$AIRFLOW_USER_HOME 8 | 9 | RUN groupadd -g $AIRFLOW_GID airflow && \ 10 | useradd -ms /bin/bash -u $AIRFLOW_UID airflow -g $AIRFLOW_GID -d $AIRFLOW_USER_HOME && \ 11 | chown $AIRFLOW_USER:$AIRFLOW_GID $AIRFLOW_USER_HOME && \ 12 | buildDeps='freetds-dev libkrb5-dev libsasl2-dev libssl-dev libffi-dev libpq-dev' \ 13 | apt-get update && \ 14 | apt-get install -yqq sudo && \ 15 | apt-get install -yqq wget && \ 16 | apt-get install -yqq --no-install-recommends $buildDeps build-essential default-libmysqlclient-dev && \ 17 | pip3 install --no-cache-dir 'apache-airflow[crypto,kubernetes,mysql]' && \ 18 | pip3 install --no-cache-dir 'pandas' && \ 19 | apt-get purge --auto-remove -yqq $buildDeps && \ 20 | apt-get autoremove -yqq --purge && \ 21 | rm -rf /var/lib/apt/lists/* 22 | 23 | # Enable sudo for airflow user without asking for password 24 | RUN usermod -aG sudo $AIRFLOW_USER && \ 25 | echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers 26 | 27 | 28 | # Install OCI python SKD 29 | RUN pip3 install oci && \ 30 | pip3 install cx_Oracle 31 | 32 | # Copy airflow pod template file 33 | COPY pod_template.yaml $AIRFLOW_USER_HOME/pod_template.yaml 34 | RUN chown $AIRFLOW_UID:$AIRFLOW_GID $AIRFLOW_USER_HOME/pod_template.yaml 35 | 36 | # Install OCI plugins and copy the script to download OCI DAG templates 37 | RUN mkdir -p $AIRFLOW_USER_HOME/scripts 38 | COPY install_oci_plugins.sh $AIRFLOW_USER_HOME/scripts/install_oci_plugins.sh 39 | COPY install_oci_dag_templates.sh $AIRFLOW_USER_HOME/scripts/install_oci_dag_templates.sh 40 | RUN chown -R $AIRFLOW_UID:$AIRFLOW_GID $AIRFLOW_USER_HOME/scripts && \ 41 | chmod +x $AIRFLOW_USER_HOME/scripts/install_oci_plugins.sh && \ 42 | chmod +x $AIRFLOW_USER_HOME/scripts/install_oci_dag_templates.sh 43 | 44 | USER $AIRFLOW_UID 45 | 46 | WORKDIR $AIRFLOW_USER_HOME 47 | 48 | # Install OCI plugins 49 | RUN $AIRFLOW_USER_HOME/scripts/install_oci_plugins.sh 50 | 51 | -------------------------------------------------------------------------------- /userdata/cli_config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # sudo curl -L -O https://raw.githubusercontent.com/oracle/oci-cli/master/scripts/install/install.sh && sudo chmod a+x install.sh && sudo ./install.sh --accept-all-defaults 3 | curl -L -O https://raw.githubusercontent.com/oracle/oci-cli/master/scripts/install/install.sh && chmod a+x install.sh && ./install.sh --accept-all-defaults 4 | 5 | echo "export OCI_CLI_AUTH=instance_principal" >> ~/.bash_profile 6 | echo "export OCI_CLI_AUTH=instance_principal" >> ~/.bashrc -------------------------------------------------------------------------------- /userdata/create_db.sh: -------------------------------------------------------------------------------- 1 | 2 | #/bin/bash 3 | 4 | # Install MySQL client 5 | sudo yum install -y https://dev.mysql.com/get/mysql80-community-release-el7-3.noarch.rpm 6 | sudo yum install -y mysql --nogpgcheck 7 | 8 | # Connect to MySQL instance and create airflow database and user 9 | mysql -h ${db_ip} -u ${admin_db_user} -p${admin_db_password} -e "CREATE DATABASE IF NOT EXISTS ${db_name} CHARACTER SET utf8 COLLATE utf8_unicode_ci;;" 10 | mysql -h ${db_ip} -u ${admin_db_user} -p${admin_db_password} -e "CREATE USER IF NOT EXISTS ${airflow_db_user} IDENTIFIED WITH mysql_native_password BY '${airflow_db_password}'" 11 | mysql -h ${db_ip} -u ${admin_db_user} -p${admin_db_password} -e "GRANT ALL ON ${db_name}.* TO ${airflow_db_user}" 12 | -------------------------------------------------------------------------------- /userdata/deploy_airflow.sh: -------------------------------------------------------------------------------- 1 | #?/bin/bash 2 | set -x 3 | 4 | #build_dir="$HOME/airflow/build" 5 | #mkdir -p $build_dir 6 | #cd $build_dir 7 | 8 | # Create airflow namespace (if it does not exist) 9 | kubectl get namespaces | grep ${namespace} 10 | if [[ $? -ne 0 ]]; then 11 | kubectl create namespace ${namespace} 12 | fi 13 | 14 | # Get authentication token stored in OCI vault 15 | auth_token=`oci secrets secret-bundle get --secret-id ${secret_id} --stage CURRENT | jq ."data.\"secret-bundle-content\".content" | tr -d '"' | base64 --decode` 16 | 17 | # Create ocir registry secret (if it does not exist already) 18 | kubectl -n ${namespace} get secrets | grep 'airflow-ocir-secret' 19 | if [[ $? -ne 0 ]]; then 20 | kubectl -n ${namespace} create secret docker-registry airflow-ocir-secret --docker-server=${registry} --docker-username=${tenancy_name}/${registry_user} --docker-password=$auth_token 21 | fi 22 | 23 | echo "NFS IP:" ${nfs_ip} 24 | echo "Mount target ID:" ${mount_target_id} 25 | 26 | cd $HOME/airflow/build 27 | 28 | # Create NFS persistent volumes using FSS mount target 29 | kubectl -n ${namespace} apply -f volumes.yaml 30 | 31 | # Create airflow config map with container environment variables 32 | kubectl -n ${namespace} apply -f configmap.yaml 33 | 34 | # Create airflow secret (encoded DB connection string) 35 | kubectl -n ${namespace} apply -f secrets.yaml 36 | 37 | # Deploy airflow containers 38 | kubectl -n ${namespace} apply -f airflow.yaml 39 | 40 | # Wait until LB is created and public IP is allocated to airflow service 41 | sleep 120 42 | 43 | # Get service public IP address 44 | kubectl -n ${namespace} get svc 45 | -------------------------------------------------------------------------------- /userdata/generate_kubeconfig.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017, 2019, Oracle Corporation and/or affiliates. All rights reserved. 3 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl 4 | 5 | # trying until: 6 | # - instance-principal rights are active 7 | # - kubeconfig generation is successful 8 | RET_CODE=1 9 | INDEX_NR=1 10 | SLEEP_TIME="20s" 11 | while [ $RET_CODE -ne 0 ] 12 | do 13 | echo "Started sleep. INDEX_NR is: $INDEX_NR. SLEEP_TIME is $SLEEP_TIME" 14 | sleep $SLEEP_TIME 15 | echo "Finished sleep" 16 | 17 | echo "Started generating config: ${cluster-id} ${region}" 18 | oci ce cluster create-kubeconfig --cluster-id ${cluster-id} --file $HOME/.kube/config --region ${region} --token-version 2.0.0 19 | RET_CODE=$? 20 | echo "Finished generating config. RET_CODE is : $RET_CODE" 21 | 22 | ((INDEX_NR+=1)) 23 | done 24 | -------------------------------------------------------------------------------- /userdata/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | -------------------------------------------------------------------------------- /userdata/install_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Install docker 4 | sudo yum-config-manager --enable ol7_addons 5 | sudo yum install -y docker-engine docker-cli 6 | 7 | 8 | # Enable and start docker daemon 9 | sudo systemctl enable docker 10 | sudo systemctl start docker 11 | 12 | # Add user to docker group 13 | sudo usermod -a -G docker ${user} 14 | -------------------------------------------------------------------------------- /userdata/install_kubectl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017, 2019, Oracle Corporation and/or affiliates. All rights reserved. 3 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl 4 | 5 | #sudo yum install -y oracle-olcne-release-el7 6 | 7 | #sudo yum-config-manager --enable ol7_olcne 8 | 9 | sudo yum install -y kubectl git 10 | 11 | mkdir -p ~/.kube 12 | 13 | echo "source <(kubectl completion bash)" >> ~/.bashrc 14 | echo "alias k='kubectl'" >> ~/.bashrc 15 | -------------------------------------------------------------------------------- /userdata/install_oci_dag_templates.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Install OCI plugins 3 | 4 | dags_dir="$HOME/dags" 5 | 6 | mkdir -p $dags_dir 7 | 8 | dag_url=https://raw.githubusercontent.com/oracle-quickstart/oci-airflow/master/scripts/dags 9 | 10 | 11 | # Airflow OCI DAGs 12 | for file in oci_simple_example.py oci_advanced_example.py oci_adb_sql_example.py oci_smoketest.py; do 13 | wget $dag_url/$file -O $dags_dir/$file 14 | done 15 | for file in schedule_dataflow_app.py schedule_dataflow_with_parameters.py trigger_dataflow_when_file_exists.py; do 16 | wget $dag_url/$file -O $dags_dir/$file.template 17 | done 18 | -------------------------------------------------------------------------------- /userdata/install_oci_plugins.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Install OCI plugins 3 | 4 | hooks_dir="$HOME/plugins/hooks" 5 | operators_dir="$HOME/plugins/operators" 6 | sensors_dir="$HOME/plugins/sensors" 7 | 8 | mkdir -p $hooks_dir 9 | mkdir -p $operators_dir 10 | mkdir -p $sensors_dir 11 | 12 | plugin_url=https://raw.githubusercontent.com/oracle-quickstart/oci-airflow/master/scripts/plugins 13 | dag_url=https://raw.githubusercontent.com/oracle-quickstart/oci-airflow/master/scripts/dags 14 | 15 | # hooks 16 | for file in oci_base.py oci_object_storage.py oci_data_flow.py oci_data_catalog.py oci_adb.py; do 17 | wget $plugin_url/hooks/$file -O $hooks_dir/$file 18 | done 19 | # operators 20 | for file in oci_object_storage.py oci_data_flow.py oci_data_catalog.py oci_adb.py oci_copy_object_to_adb.py; do 21 | wget $plugin_url/operators/$file -O $operators_dir/$file 22 | done 23 | # sensors 24 | for file in oci_object_storage.py oci_adb.py; do 25 | wget $plugin_url/sensors/$file -O $sensors_dir/$file 26 | done 27 | -------------------------------------------------------------------------------- /userdata/is_worker_active.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | SLEEP_TIME="20s" 3 | active_nodes="" 4 | while [ -z "$active_nodes" ] 5 | do 6 | sleep $SLEEP_TIME 7 | echo "Checking if there is a worker node in ACTIVE state" 8 | active_nodes=`oci ce node-pool get --node-pool-id ${nodepool-id} --query 'data.nodes[*].{ocid:id, state:"lifecycle-state"}' | jq '.[] | select(.state=="ACTIVE")' | jq ."ocid"` 9 | done 10 | echo $active_nodes 11 | -------------------------------------------------------------------------------- /userdata/push_to_registry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Get authentication token stored in OCI vault 4 | auth_token=`oci secrets secret-bundle get --secret-id ${secret_id} --stage CURRENT | jq ."data.\"secret-bundle-content\".content" | tr -d '"' | base64 --decode` 5 | 6 | # Get tenancy name 7 | #tenancy_name=`oci iam tenancy get --tenancy-id $tenancy_id | jq ."data.name" | tr -d '"'` 8 | 9 | # Login OCI registry 10 | docker login ${registry} -u ${tenancy_name}/${registry_user} -p $auth_token 11 | 12 | # Tag container image 13 | docker tag "${image_name}:${image_label}" ${registry}/${tenancy_name}/${repo_name}/${image_name}:${image_label} 14 | 15 | # Push container image to OCI registry 16 | docker push ${registry}/${tenancy_name}/${repo_name}/${image_name}:${image_label} 17 | 18 | -------------------------------------------------------------------------------- /userdata/templates/airflow.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: airflow 6 | namespace: ${namespace} 7 | --- 8 | apiVersion: rbac.authorization.k8s.io/v1 9 | kind: Role 10 | metadata: 11 | name: airflow 12 | namespace: ${namespace} 13 | rules: 14 | - apiGroups: [""] 15 | resources: ["configmaps"] 16 | verbs: ["get", "watch", "list"] 17 | - apiGroups: [""] 18 | resources: ["secrets"] 19 | verbs: ["get", "watch", "list"] 20 | - apiGroups: [""] 21 | resources: ["pods"] 22 | verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] 23 | - apiGroups: [""] 24 | resources: ["pods/exec"] 25 | verbs: ["get", "create"] 26 | - apiGroups: [""] 27 | resources: ["pods/log"] 28 | verbs: ["get", "list"] 29 | --- 30 | kind: RoleBinding 31 | apiVersion: rbac.authorization.k8s.io/v1 32 | metadata: 33 | name: airflow 34 | namespace: ${namespace} 35 | subjects: 36 | - kind: ServiceAccount 37 | name: airflow 38 | roleRef: 39 | kind: Role 40 | name: airflow 41 | apiGroup: rbac.authorization.k8s.io 42 | --- 43 | apiVersion: apps/v1 44 | kind: Deployment 45 | metadata: 46 | name: airflow 47 | namespace: ${namespace} 48 | labels: 49 | app: airflow 50 | spec: 51 | replicas: 1 52 | selector: 53 | matchLabels: 54 | app: airflow 55 | template: 56 | metadata: 57 | labels: 58 | app: airflow 59 | spec: 60 | serviceAccountName: airflow 61 | initContainers: 62 | - name: "init" 63 | image: ${registry}/${tenancy_name}/${repo_name}/${image_name}:${image_label} 64 | imagePullPolicy: Always 65 | envFrom: 66 | - configMapRef: 67 | name: airflow-config 68 | env: 69 | - name: AIRFLOW__CORE__SQL_ALCHEMY_CONN 70 | valueFrom: 71 | secretKeyRef: 72 | name: airflow-secrets 73 | key: sql_alchemy_conn 74 | command: ["/bin/sh", "-c"] 75 | 76 | args: 77 | - sudo chown airflow:airflow /opt/airflow/dags; 78 | sudo chown airflow:airflow /opt/airflow/logs; 79 | airflow db init; 80 | airflow users create --username airflow --firstname airflow --lastname airflow --role Admin --password airflow --email admin@airflow.org; 81 | $HOME/scripts/install_oci_dag_templates.sh; 82 | 83 | volumeMounts: 84 | - name: airflow-dags 85 | mountPath: /opt/airflow/dags 86 | - name: airflow-logs 87 | mountPath: /opt/airflow/logs 88 | 89 | 90 | containers: 91 | 92 | - name: webserver 93 | image: ${registry}/${tenancy_name}/${repo_name}/${image_name}:${image_label} 94 | imagePullPolicy: IfNotPresent 95 | command: ["airflow","webserver"] 96 | envFrom: 97 | - configMapRef: 98 | name: airflow-config 99 | env: 100 | - name: AIRFLOW__CORE__SQL_ALCHEMY_CONN 101 | valueFrom: 102 | secretKeyRef: 103 | name: airflow-secrets 104 | key: sql_alchemy_conn 105 | volumeMounts: 106 | - name: airflow-dags 107 | mountPath: /opt/airflow/dags 108 | - name: airflow-logs 109 | mountPath: /opt/airflow/logs 110 | 111 | - name: scheduler 112 | image: ${registry}/${tenancy_name}/${repo_name}/${image_name}:${image_label} 113 | imagePullPolicy: IfNotPresent 114 | command: ["airflow","scheduler"] 115 | envFrom: 116 | - configMapRef: 117 | name: airflow-config 118 | env: 119 | - name: AIRFLOW__CORE__SQL_ALCHEMY_CONN 120 | valueFrom: 121 | secretKeyRef: 122 | name: airflow-secrets 123 | key: sql_alchemy_conn 124 | volumeMounts: 125 | - name: airflow-dags 126 | mountPath: /opt/airflow/dags 127 | - name: airflow-logs 128 | mountPath: /opt/airflow/logs 129 | 130 | volumes: 131 | - name: airflow-dags 132 | persistentVolumeClaim: 133 | claimName: airflow-dags 134 | - name: airflow-logs 135 | persistentVolumeClaim: 136 | claimName: airflow-logs 137 | 138 | 139 | imagePullSecrets: 140 | - name: airflow-ocir-secret 141 | --- 142 | apiVersion: v1 143 | kind: Service 144 | metadata: 145 | name: airflow 146 | namespace: ${namespace} 147 | spec: 148 | type: LoadBalancer 149 | ports: 150 | - port: 8080 151 | selector: 152 | app: airflow 153 | -------------------------------------------------------------------------------- /userdata/templates/configmap.yaml.template: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: airflow-config 5 | namespace: ${namespace} 6 | data: 7 | AIRFLOW_HOME: "/opt/airflow" 8 | AIRFLOW__CORE__DAGS_FOLDER: "/opt/airflow/dags" 9 | AIRFLOW__CORE__LOAD_EXAMPLES: "True" 10 | AIRFLOW__CORE__EXECUTOR: "KubernetesExecutor" 11 | AIRFLOW__CORE__SQL_ALCHEMY_CONN_SECRET: "sql_alchemy_conn" 12 | AIRFLOW__KUBERNETES__POD_TEMPLATE_FILE: "/opt/airflow/pod_template.yaml" 13 | AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY: "${registry}/${tenancy_name}/${repo_name}/${image_name}" 14 | AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG: "${image_label}" 15 | AIRFLOW__KUBERNETES__WORKER_SERVICE_ACCOUNT_NAME: "airflow" 16 | AIRFLOW__KUBERNETES__NAMESPACE: "${namespace}" 17 | #AIRFLOW__LOGGING__BASE_LOG_FOLDER: "/opt/airflow/dags/logs" 18 | #AIRFLOW__CORE__DAG_PROCESSOR_MANAGER_LOG_LOCATION: "/opt/airflow/dags/logs" 19 | #AIRFLOW__SCHEDULER__CHILD_PROCESS_LOG_DIRECTORY: "/opt/airflow/dags/logs" -------------------------------------------------------------------------------- /userdata/templates/pod_template.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Pod 4 | metadata: 5 | name: dummy-name 6 | spec: 7 | containers: 8 | - args: [] 9 | command: [] 10 | env: 11 | - name: AIRFLOW__CORE__EXECUTOR 12 | value: "KubernetesExecutor" 13 | - name: AIRFLOW__CORE__SQL_ALCHEMY_CONN 14 | valueFrom: 15 | secretKeyRef: 16 | name: airflow-secrets 17 | key: sql_alchemy_conn 18 | envFrom: [] 19 | image: dummy_image 20 | imagePullPolicy: IfNotPresent 21 | name: base 22 | ports: [] 23 | volumeMounts: 24 | - name: airflow-dags 25 | mountPath: /opt/airflow/dags 26 | - name: airflow-logs 27 | mountPath: /opt/airflow/logs 28 | volumes: 29 | - name: airflow-dags 30 | persistentVolumeClaim: 31 | claimName: airflow-dags 32 | - name: airflow-logs 33 | persistentVolumeClaim: 34 | claimName: airflow-logs 35 | #hostNetwork: false 36 | restartPolicy: Never 37 | serviceAccountName: airflow 38 | -------------------------------------------------------------------------------- /userdata/templates/secrets.yaml.template: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one * 2 | # or more contributor license agreements. See the NOTICE file * 3 | # distributed with this work for additional information * 4 | # regarding copyright ownership. The ASF licenses this file * 5 | # to you under the Apache License, Version 2.0 (the * 6 | # "License"); you may not use this file except in compliance * 7 | # with the License. You may obtain a copy of the License at * 8 | # * 9 | # http://www.apache.org/licenses/LICENSE-2.0 * 10 | # * 11 | # Unless required by applicable law or agreed to in writing, * 12 | # software distributed under the License is distributed on an * 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 14 | # KIND, either express or implied. See the License for the * 15 | # specific language governing permissions and limitations * 16 | # under the License. * 17 | apiVersion: v1 18 | kind: Secret 19 | metadata: 20 | name: airflow-secrets 21 | namespace: airflow 22 | type: Opaque 23 | data: 24 | # The sql_alchemy_conn value is a base64 encoded representation of this connection string: 25 | # mysql+mysql://airflow:A@flow1234@10.0.0.3:3306/airflow 26 | sql_alchemy_conn: ${sql_alchemy_conn} 27 | -------------------------------------------------------------------------------- /userdata/templates/volumes.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | kind: StorageClass 3 | apiVersion: storage.k8s.io/v1beta1 4 | metadata: 5 | name: oci-fss 6 | provisioner: oracle.com/oci-fss 7 | parameters: 8 | mntTargetId: ${MNT_TARGET_ID} 9 | --- 10 | kind: PersistentVolume 11 | apiVersion: v1 12 | metadata: 13 | name: airflow-dags 14 | spec: 15 | storageClassName: oci-fss 16 | accessModes: 17 | - ReadOnlyMany 18 | capacity: 19 | storage: 20Gi 20 | mountOptions: 21 | - nosuid 22 | nfs: 23 | server: ${NFS_IP} 24 | path: "/airflow-dags/" 25 | readOnly: false 26 | --- 27 | kind: PersistentVolumeClaim 28 | apiVersion: v1 29 | metadata: 30 | name: airflow-dags 31 | spec: 32 | storageClassName: "oci-fss" 33 | accessModes: 34 | - ReadOnlyMany 35 | resources: 36 | requests: 37 | storage: 20Gi 38 | volumeName: airflow-dags 39 | --- 40 | kind: PersistentVolume 41 | apiVersion: v1 42 | metadata: 43 | name: airflow-logs 44 | spec: 45 | storageClassName: oci-fss 46 | accessModes: 47 | - ReadOnlyMany 48 | capacity: 49 | storage: 20Gi 50 | mountOptions: 51 | - nosuid 52 | nfs: 53 | server: ${NFS_IP} 54 | path: "/airflow-logs/" 55 | readOnly: false 56 | --- 57 | kind: PersistentVolumeClaim 58 | apiVersion: v1 59 | metadata: 60 | name: airflow-logs 61 | spec: 62 | storageClassName: "oci-fss" 63 | accessModes: 64 | - ReadOnlyMany 65 | resources: 66 | requests: 67 | storage: 20Gi 68 | volumeName: airflow-logs 69 | --- 70 | -------------------------------------------------------------------------------- /variables.tf: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------------------------------------------------- 2 | # AD Settings. By default uses AD1 3 | # --------------------------------------------------------------------------------------------------------------------- 4 | variable "availability_domain" { 5 | default = "1" 6 | } 7 | 8 | # --------------------------------------------------------------------------------------------------------------------- 9 | # SSH Keys - Put this to top level because they are required 10 | # --------------------------------------------------------------------------------------------------------------------- 11 | variable "ssh_provided_public_key" { 12 | default = "" 13 | } 14 | 15 | 16 | # --------------------------------------------------------------------------------------------------------------------- 17 | # Network Settings 18 | # --------------------------------------------------------------------------------------------------------------------- 19 | 20 | # If you want to use an existing VCN set useExistingVcn = "true" and configure OCID(s) of myVcn, OKESubnet and edgeSubnet 21 | 22 | variable "useExistingVcn" { 23 | default = "false" 24 | } 25 | 26 | variable "myVcn" { 27 | default = " " 28 | } 29 | variable "OKESubnet" { 30 | default = " " 31 | } 32 | variable "edgeSubnet" { 33 | default = " " 34 | } 35 | 36 | variable "custom_cidrs" { 37 | default = "false" 38 | } 39 | 40 | variable "VCN_CIDR" { 41 | default = "10.0.0.0/16" 42 | } 43 | 44 | variable "edge_cidr" { 45 | default = "10.0.1.0/24" 46 | } 47 | 48 | variable "private_cidr" { 49 | default = "10.0.2.0/24" 50 | } 51 | 52 | variable "vcn_dns_label" { 53 | default = "airflowvcn" 54 | } 55 | 56 | variable "service_port" { 57 | default = "8080" 58 | } 59 | 60 | variable "public_edge_node" { 61 | default = true 62 | } 63 | 64 | # --------------------------------------------------------------------------------------------------------------------- 65 | # OKE Settings 66 | # --------------------------------------------------------------------------------------------------------------------- 67 | 68 | variable "create_new_oke_cluster" { 69 | default = "true" 70 | } 71 | 72 | variable "existing_oke_cluster_id" { 73 | default = " " 74 | } 75 | 76 | variable "cluster_name" { 77 | default = "airflow-cluster" 78 | } 79 | 80 | variable "kubernetes_version" { 81 | default = "v1.25.4" 82 | } 83 | 84 | variable "airflow_node_pool_name" { 85 | default = "Airflow-Node-Pool" 86 | } 87 | 88 | variable "airflow_node_pool_shape" { 89 | default = "VM.Standard2.2" 90 | } 91 | 92 | variable "airflow_node_pool_size" { 93 | default = 1 94 | } 95 | 96 | variable "airflow_namespace" { 97 | default = "airflow" 98 | } 99 | 100 | variable "kube_label" { 101 | default = "airflow" 102 | } 103 | 104 | variable "cluster_options_add_ons_is_kubernetes_dashboard_enabled" { 105 | default = "false" 106 | } 107 | 108 | variable "cluster_options_admission_controller_options_is_pod_security_policy_enabled" { 109 | default = "false" 110 | } 111 | 112 | variable "use_remote_exec" { 113 | default = "true" 114 | } 115 | 116 | variable "endpoint_subnet_id" { 117 | default = " " 118 | } 119 | 120 | variable "node_pool_node_shape_config_memory_in_gbs" { 121 | default = 2 122 | } 123 | 124 | variable "node_pool_node_shape_config_ocpus" { 125 | default = 1 126 | } 127 | 128 | variable "flex_gbs" { 129 | default = 2 130 | } 131 | 132 | variable "flex_ocpu" { 133 | default = 1 134 | } 135 | 136 | # --------------------------------------------------------------------------------------------------------------------- 137 | # OCI registry settings 138 | # --------------------------------------------------------------------------------------------------------------------- 139 | 140 | variable "registry" { 141 | default = "iad.ocir.io" 142 | } 143 | 144 | variable "repo_name" { 145 | default = "airflow" 146 | } 147 | 148 | # Set the user to login OCIR registry 149 | variable "username" { 150 | default = "oracleidentitycloudservice/" 151 | } 152 | 153 | variable "image_name" { 154 | default = "airflow" 155 | } 156 | 157 | variable "image_label" { 158 | default = "2.0" 159 | } 160 | 161 | # --------------------------------------------------------------------------------------------------------------------- 162 | # OCI vault secret ID where authentication key is stored 163 | # it is used for authenticatoin when pushing/pulling images to/from OCIR registry 164 | # Set it to secret OCID where you store authentication token that is used to push/pull images from OCIR 165 | # --------------------------------------------------------------------------------------------------------------------- 166 | variable "vault_secret_id" { 167 | # default = "ocid1.vaultsecret.oc1.iad.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" 168 | } 169 | 170 | 171 | # --------------------------------------------------------------------------------------------------------------------- 172 | # DB settings 173 | # --------------------------------------------------------------------------------------------------------------------- 174 | 175 | variable "meta_db_type" { 176 | default = "OCI Mysql" 177 | } 178 | 179 | variable "mysql_admin_username" { 180 | default = "mysqladmin" 181 | } 182 | 183 | variable "mysql_admin_password" {} 184 | 185 | variable "mysql_shape" { 186 | default = "MySQL.VM.Standard.E3.1.8GB" 187 | } 188 | 189 | variable "enable_backups" { 190 | default = "false" 191 | } 192 | 193 | variable "private_ip_address" { 194 | default = "10.0.2.8" 195 | } 196 | 197 | variable "db_name" { 198 | default = "airflow" 199 | } 200 | 201 | variable "airflow_username" { 202 | default = "airflow" 203 | } 204 | 205 | variable "airflow_password" {} 206 | 207 | 208 | # --------------------------------------------------------------------------------------------------------------------- 209 | # Bastion VM Settings 210 | # --------------------------------------------------------------------------------------------------------------------- 211 | 212 | 213 | variable "bastion_name" { 214 | default = "bastion" 215 | } 216 | 217 | variable "bastion_shape" { 218 | default = "VM.Standard2.1" 219 | } 220 | 221 | variable "bastion_flex_gbs" { 222 | default = 1 223 | } 224 | 225 | variable "bastion_flex_ocpus" { 226 | default = 2 227 | } 228 | # --------------------------------------------------------------------------------------------------------------------- 229 | # Environmental variables 230 | # You probably want to define these as environmental variables. 231 | # Instructions on that are here: https://github.com/oracle/oci-quickstart-prerequisites 232 | # --------------------------------------------------------------------------------------------------------------------- 233 | 234 | variable "compartment_ocid" {} 235 | 236 | # Required by the OCI Provider 237 | 238 | variable "tenancy_ocid" {} 239 | variable "region" {} 240 | 241 | -------------------------------------------------------------------------------- /versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.0" 3 | } 4 | --------------------------------------------------------------------------------