├── versions.tf ├── images └── SecretsExample.png ├── modules ├── master │ ├── outputs.tf │ ├── data.tf │ ├── variables.tf │ └── main.tf ├── fss │ ├── outputs.tf │ ├── variables.tf │ ├── mount.sh │ ├── data.tf │ └── main.tf ├── worker │ ├── outputs.tf │ ├── main.tf │ └── variables.tf ├── oci-mysql │ ├── variables.tf │ └── main.tf └── network │ ├── data.tf │ ├── outputs.tf │ ├── variables.tf │ └── main.tf ├── data.tf ├── fss.tf ├── outputs.tf ├── network.tf ├── oci-mysql.tf ├── scripts ├── dags │ ├── oci_simple_example.py │ ├── oci_smoketest.py │ ├── oci_adb_sql_example.py │ ├── oci_advanced_example.py │ ├── schedule_dataflow_app.py │ ├── schedule_dataflow_pipeline.py │ ├── schedule_dataflow_with_parameters.py │ └── trigger_dataflow_when_file_exists.py ├── plugins │ ├── sensors │ │ ├── oci_adb.py │ │ └── oci_object_storage.py │ ├── hooks │ │ ├── oci_data_catalog.py │ │ ├── oci_data_flow.py │ │ ├── oci_base.py │ │ ├── oci_object_storage.py │ │ └── oci_adb.py │ └── operators │ │ ├── oci_data_catalog.py │ │ ├── oci_copy_object_to_adb.py │ │ ├── oci_adb.py │ │ ├── oci_data_flow.py │ │ └── oci_object_storage.py ├── custom │ ├── connection_form.js │ └── connection.py └── boot.sh ├── iam.tf ├── LICENSE ├── compute.tf ├── README.md ├── variables.tf └── schema.yaml /versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 0.12" 3 | } 4 | -------------------------------------------------------------------------------- /images/SecretsExample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oracle-quickstart/oci-airflow/HEAD/images/SecretsExample.png -------------------------------------------------------------------------------- /modules/master/outputs.tf: -------------------------------------------------------------------------------- 1 | output "airflow-public-ip" { value = "${data.oci_core_vnic.master_node_vnic.public_ip_address}" } 2 | -------------------------------------------------------------------------------- /modules/fss/outputs.tf: -------------------------------------------------------------------------------- 1 | output "nfs-ip" { 2 | value = "${var.enable_fss ? lookup(data.oci_core_private_ips.fss_ip[0].private_ips[0], "ip_address") : " "}" 3 | } 4 | -------------------------------------------------------------------------------- /modules/worker/outputs.tf: -------------------------------------------------------------------------------- 1 | output "block-volume-count" { value = "${var.block_volumes_per_worker}" } 2 | output "block-volume-size" { value = "${var.data_blocksize_in_gbs}" } 3 | -------------------------------------------------------------------------------- /modules/fss/variables.tf: -------------------------------------------------------------------------------- 1 | variable "availability_domain" {} 2 | variable "enable_fss" {} 3 | variable "compartment_ocid" {} 4 | variable "subnet_id" {} 5 | variable "vcn_cidr" {} 6 | -------------------------------------------------------------------------------- /data.tf: -------------------------------------------------------------------------------- 1 | data "oci_identity_availability_domains" "ADs" { 2 | compartment_id = "${var.tenancy_ocid}" 3 | } 4 | 5 | data "oci_identity_compartment" "airflow_compartment" { 6 | id = "${var.compartment_ocid}" 7 | } 8 | 9 | -------------------------------------------------------------------------------- /modules/fss/mount.sh: -------------------------------------------------------------------------------- 1 | "sudo yum -y install nfs-utils > nfs-utils-install.log", 2 | "sudo mkdir -p /mnt/myfsspaths/fs1/path1", 3 | "sudo mount ${local.mount_target_1_ip_address}:${var.export_path_fs1_mt1} /mnt${var.export_path_fs1_mt1}", 4 | -------------------------------------------------------------------------------- /modules/fss/data.tf: -------------------------------------------------------------------------------- 1 | data "oci_core_private_ips" "fss_ip" { 2 | count = "${var.enable_fss ? 1 : 0}" 3 | subnet_id = "${var.subnet_id}" 4 | 5 | filter { 6 | name = "id" 7 | values = ["${oci_file_storage_mount_target.airflow_mount_target.0.private_ip_ids.0}"] 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /modules/oci-mysql/variables.tf: -------------------------------------------------------------------------------- 1 | variable "availability_domain" {} 2 | variable "airflow_database" {} 3 | variable "mysqladmin_password" {} 4 | variable "mysqladmin_username" {} 5 | variable "compartment_ocid" {} 6 | variable "mysql_shape" {} 7 | variable "subnet_id" {} 8 | variable "enable_mysql_backups" {} 9 | variable "oci_mysql_ip" {} 10 | -------------------------------------------------------------------------------- /fss.tf: -------------------------------------------------------------------------------- 1 | module "fss" { 2 | source = "./modules/fss" 3 | compartment_ocid = "${var.compartment_ocid}" 4 | enable_fss = "${var.enable_fss}" 5 | subnet_id = "${var.useExistingVcn ? var.clusterSubnet : module.network.private-id}" 6 | availability_domain = "${var.availability_domain}" 7 | vcn_cidr = "${data.oci_core_vcn.vcn_info.cidr_block}" 8 | } 9 | 10 | -------------------------------------------------------------------------------- /modules/network/data.tf: -------------------------------------------------------------------------------- 1 | data "oci_identity_availability_domains" "ADs" { 2 | count = var.useExistingVcn ? 0 : 1 3 | compartment_id = "${var.tenancy_ocid}" 4 | } 5 | 6 | data "oci_core_services" "all_svcs_moniker" { 7 | count = var.useExistingVcn ? 0 : 1 8 | filter { 9 | name = "name" 10 | values = ["All .* Services In Oracle Services Network"] 11 | regex = true 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /modules/network/outputs.tf: -------------------------------------------------------------------------------- 1 | output "vcn-id" { 2 | value = "${var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.data_vcn.0.id}" 3 | } 4 | 5 | output "public-id" { 6 | value = "${var.useExistingVcn ? var.masterSubnet : oci_core_subnet.public.0.id}" 7 | } 8 | 9 | output "private-id" { 10 | value = "${var.useExistingVcn ? var.clusterSubnet : oci_core_subnet.private.0.id}" 11 | } 12 | -------------------------------------------------------------------------------- /modules/master/data.tf: -------------------------------------------------------------------------------- 1 | 2 | data "oci_core_vnic_attachments" "master_node_vnics" { 3 | compartment_id = "${var.compartment_ocid}" 4 | availability_domain = "${var.availability_domain}" 5 | instance_id = "${oci_core_instance.Master.id}" 6 | } 7 | 8 | data "oci_core_vnic" "master_node_vnic" { 9 | vnic_id = "${lookup(data.oci_core_vnic_attachments.master_node_vnics.vnic_attachments[0],"vnic_id")}" 10 | } 11 | -------------------------------------------------------------------------------- /outputs.tf: -------------------------------------------------------------------------------- 1 | output "AIRFLOW-WEBSERVER-IP" { value = "http://${module.master.airflow-public-ip}:8080" } 2 | output "AIRFLOW-FLOWER-IP" { value = "${var.executor == "celery" ? "http://${module.master.airflow-public-ip}:5555" : "Not applicable for local executor."}" } 3 | output "SSH_KEY_INFO" { value = "${var.provide_ssh_key ? "SSH Key Provided by user" : "See below for generated SSH private key."}" } 4 | output "SSH_PRIVATE_KEY" { value = "${var.provide_ssh_key ? "SSH Key Provided by user" : tls_private_key.key.private_key_pem}" } 5 | -------------------------------------------------------------------------------- /network.tf: -------------------------------------------------------------------------------- 1 | module "network" { 2 | source = "./modules/network" 3 | tenancy_ocid = "${var.tenancy_ocid}" 4 | compartment_ocid = "${var.compartment_ocid}" 5 | availability_domain = "${var.availability_domain}" 6 | region = "${var.region}" 7 | oci_service_gateway = "${var.oci_service_gateway[var.region]}" 8 | useExistingVcn = "${var.useExistingVcn}" 9 | VPC_CIDR = "${var.VPC_CIDR}" 10 | custom_vcn = ["${var.myVcn}"] 11 | clusterSubnet = "${var.clusterSubnet}" 12 | masterSubnet = "${var.masterSubnet}" 13 | } 14 | 15 | -------------------------------------------------------------------------------- /oci-mysql.tf: -------------------------------------------------------------------------------- 1 | module "oci-mysql" { 2 | source = "./modules/oci-mysql" 3 | availability_domain = "${var.availability_domain}" 4 | airflow_database = "${var.airflow_database}" 5 | mysqladmin_password = "${var.mysqladmin_password}" 6 | mysqladmin_username = "${var.mysqladmin_username}" 7 | compartment_ocid = "${var.compartment_ocid}" 8 | mysql_shape = "${var.mysql_shape}" 9 | subnet_id = "${var.useExistingVcn ? var.clusterSubnet : module.network.private-id}" 10 | enable_mysql_backups = "${var.enable_mysql_backups}" 11 | oci_mysql_ip = "${var.oci_mysql_ip}" 12 | } 13 | -------------------------------------------------------------------------------- /modules/oci-mysql/main.tf: -------------------------------------------------------------------------------- 1 | resource "oci_mysql_mysql_db_system" "airflow_database" { 2 | count = "${var.airflow_database == "mysql-oci" ? 1 : 0}" 3 | admin_password = "${var.mysqladmin_password}" 4 | admin_username = "${var.mysqladmin_username}" 5 | availability_domain = "${var.availability_domain}" 6 | compartment_id = "${var.compartment_ocid}" 7 | shape_name = "${var.mysql_shape}" 8 | subnet_id = "${var.subnet_id}" 9 | backup_policy { 10 | is_enabled = "${var.enable_mysql_backups}" 11 | retention_in_days = "10" 12 | } 13 | description = "Airflow Database" 14 | port = "3306" 15 | port_x = "33306" 16 | data_storage_size_in_gb = 50 17 | ip_address = var.oci_mysql_ip 18 | } 19 | 20 | 21 | data "oci_mysql_mysql_db_system" "airflow_database" { 22 | count = "${var.airflow_database == "mysql-oci" ? 1 : 0}" 23 | db_system_id = "${oci_mysql_mysql_db_system.airflow_database.0.id}" 24 | } 25 | -------------------------------------------------------------------------------- /scripts/dags/oci_simple_example.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from airflow import DAG 3 | from hooks.oci_base import OCIBaseHook 4 | from hooks.oci_object_storage import OCIObjectStorageHook 5 | from operators.oci_object_storage import MakeBucket 6 | 7 | default_args = {'owner': 'airflow', 8 | 'start_date': datetime(2020, 5, 26), 9 | 'email': ['your_email@somecompany.com'], 10 | 'email_on_failure': False, 11 | 'email_on_retry': False 12 | } 13 | 14 | dag = DAG('oci_simple_example', 15 | default_args=default_args, 16 | schedule_interval='@hourly', 17 | catchup=False 18 | ) 19 | 20 | oci_conn_id = "oci_default" 21 | bucketname = "SomeBucketName" 22 | compartment_ocid = "COMPARTMENT_OCID" 23 | 24 | with dag: 25 | make_bucket = MakeBucket(task_id='Make_Bucket', bucket_name=bucketname,oci_conn_id=oci_conn_id, compartment_ocid=compartment_ocid) 26 | 27 | make_bucket 28 | -------------------------------------------------------------------------------- /iam.tf: -------------------------------------------------------------------------------- 1 | resource "tls_private_key" "key" { 2 | algorithm = "RSA" 3 | } 4 | 5 | resource "local_file" "key_file" { 6 | filename = "${path.module}/key.pem" 7 | content = "${tls_private_key.key.private_key_pem}" 8 | } 9 | 10 | 11 | resource "oci_identity_dynamic_group" "airflow_dynamic_group" { 12 | count = "${var.enable_instance_principals ? 1 : 0}" 13 | compartment_id = "${var.tenancy_ocid}" 14 | matching_rule = "ANY {instance.compartment.id = '${var.compartment_ocid}'}" 15 | name = "airflow-dynamic-group" 16 | description = "Dynamic Group created by Airflow Terraform for use with Instance Principals" 17 | } 18 | 19 | resource "oci_identity_policy" "airflow_instance_principals" { 20 | count = "${var.enable_instance_principals ? 1 : 0}" 21 | name = "airflow-instance-principals" 22 | description = "Policy to enable Instance Principals for Airflow hosts" 23 | compartment_id = "${var.tenancy_ocid}" 24 | statements = ["Allow dynamic-group ${oci_identity_dynamic_group.airflow_dynamic_group.0.name} to manage all-resources in compartment ${data.oci_identity_compartment.airflow_compartment.name}"] 25 | } 26 | 27 | -------------------------------------------------------------------------------- /modules/network/variables.tf: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------------------------------------------------- 2 | # Environmental variables 3 | # You probably want to define these as environmental variables. 4 | # Instructions on that are here: https://github.com/oci-quickstart/oci-prerequisites 5 | # --------------------------------------------------------------------------------------------------------------------- 6 | 7 | variable "tenancy_ocid" {} 8 | variable "compartment_ocid" {} 9 | variable "region" {} 10 | variable "oci_service_gateway" {} 11 | variable "VPC_CIDR" {} 12 | variable "useExistingVcn" {} 13 | variable "custom_vcn" { 14 | type = list(string) 15 | default = [" "] 16 | } 17 | variable "vcn_dns_label" { 18 | default = "datavcn" 19 | } 20 | variable "masterSubnet" {} 21 | variable "clusterSubnet" {} 22 | # --------------------------------------------------------------------------------------------------------------------- 23 | # Optional variables 24 | # You can modify these. 25 | # --------------------------------------------------------------------------------------------------------------------- 26 | 27 | variable "availability_domain" { 28 | default = "2" 29 | } 30 | 31 | 32 | -------------------------------------------------------------------------------- /modules/fss/main.tf: -------------------------------------------------------------------------------- 1 | resource "oci_file_storage_file_system" "airflow_dags" { 2 | count = "${var.enable_fss ? 1 : 0}" 3 | availability_domain = "${var.availability_domain}" 4 | compartment_id = "${var.compartment_ocid}" 5 | display_name = "Airflow Dags" 6 | } 7 | 8 | resource "oci_file_storage_export_set" "airflow_export_set" { 9 | count = "${var.enable_fss ? 1 : 0}" 10 | mount_target_id = "${oci_file_storage_mount_target.airflow_mount_target.0.id}" 11 | display_name = "Airflow Dags Export" 12 | } 13 | 14 | resource "oci_file_storage_export" "airflow_export_mount" { 15 | count = "${var.enable_fss ? 1 : 0}" 16 | export_set_id = "${oci_file_storage_export_set.airflow_export_set.0.id}" 17 | file_system_id = "${oci_file_storage_file_system.airflow_dags.0.id}" 18 | path = "/airflow" 19 | 20 | export_options { 21 | source = "${var.vcn_cidr}" 22 | access = "READ_WRITE" 23 | identity_squash = "NONE" 24 | require_privileged_source_port = true 25 | } 26 | } 27 | 28 | resource "oci_file_storage_mount_target" "airflow_mount_target" { 29 | count = "${var.enable_fss ? 1 : 0}" 30 | availability_domain = "${var.availability_domain}" 31 | compartment_id = "${var.compartment_ocid}" 32 | subnet_id = "${var.subnet_id}" 33 | } 34 | 35 | -------------------------------------------------------------------------------- /scripts/dags/oci_smoketest.py: -------------------------------------------------------------------------------- 1 | # A smoke test to ensure your environment works. 2 | 3 | import datetime 4 | 5 | import oci 6 | 7 | from airflow import DAG 8 | from airflow.models.baseoperator import BaseOperator 9 | from airflow.utils.decorators import apply_defaults 10 | from hooks.oci_base import OCIBaseHook 11 | 12 | 13 | # The smoke test loads the object storage namespace. 14 | class SmokeTestOperator(BaseOperator): 15 | @apply_defaults 16 | def __init__(self, oci_conn_id: str, *args, **kwargs): 17 | self.oci_conn_id = oci_conn_id 18 | super().__init__(*args, **kwargs) 19 | 20 | def execute(self, context): 21 | self.hook = OCIBaseHook(self.oci_conn_id) 22 | object_store_client = self.hook.get_client( 23 | oci.object_storage.ObjectStorageClient 24 | ) 25 | self.hook.validate_config() 26 | namespace = object_store_client.get_namespace().data 27 | self.log.info(f"Namespace is {namespace}") 28 | 29 | 30 | default_args = { 31 | "owner": "airflow", 32 | "start_date": datetime.datetime(2020, 7, 1), 33 | "email": ["your_email@somecompany.com"], 34 | "email_on_failure": False, 35 | "email_on_retry": False, 36 | } 37 | 38 | # This schedule_interval runs the Application every 30 minutes. 39 | # Customize it as needed. 40 | dag = DAG( 41 | "oci_smoke_test", 42 | default_args=default_args, 43 | schedule_interval="0 * * * *", 44 | catchup=False, 45 | ) 46 | 47 | # Customize the connection you want to use. 48 | oci_conn_id = "oci_default" 49 | 50 | smoke_test_step = SmokeTestOperator( 51 | task_id="oci_smoke_test", oci_conn_id=oci_conn_id, dag=dag, 52 | ) 53 | smoke_test_step 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020 Oracle and/or its affiliates. All rights reserved. 2 | 3 | The Universal Permissive License (UPL), Version 1.0 4 | 5 | Subject to the condition set forth below, permission is hereby granted to any person obtaining a copy of this 6 | software, associated documentation and/or data (collectively the "Software"), free of charge and under any and 7 | all copyright rights in the Software, and any and all patent rights owned or freely licensable by each licensor 8 | hereunder covering either (i) the unmodified Software as contributed to or provided by such licensor, or 9 | (ii) the Larger Works (as defined below), to deal in both 10 | 11 | (a) the Software, and 12 | (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if one is included with the Software 13 | (each a “Larger Work” to which the Software is contributed by such licensors), 14 | 15 | without restriction, including without limitation the rights to copy, create derivative works of, display, 16 | perform, and distribute the Software and make, use, sell, offer for sale, import, export, have made, and have 17 | sold the Software and the Larger Work(s), and to sublicense the foregoing rights on either these or other terms. 18 | 19 | This license is subject to the following condition: 20 | The above copyright notice and either this complete permission notice or at a minimum a reference to the UPL must 21 | be included in all copies or substantial portions of the Software. 22 | 23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 24 | THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 26 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 27 | IN THE SOFTWARE. 28 | -------------------------------------------------------------------------------- /modules/worker/main.tf: -------------------------------------------------------------------------------- 1 | resource "oci_core_instance" "AirflowWorker" { 2 | count = "${var.instances}" 3 | availability_domain = "${var.availability_domain}" 4 | compartment_id = "${var.compartment_ocid}" 5 | shape = "${var.worker_instance_shape}" 6 | display_name = "AirflowWorker ${format("%01d", count.index+1)}" 7 | fault_domain = "FAULT-DOMAIN-${(count.index%3)+1}" 8 | 9 | source_details { 10 | source_type = "image" 11 | source_id = "${var.image_ocid}" 12 | } 13 | 14 | create_vnic_details { 15 | subnet_id = "${var.subnet_id}" 16 | display_name = "AirflowWorker ${format("%01d", count.index+1)}" 17 | hostname_label = "AirflowWorker-${format("%01d", count.index+1)}" 18 | assign_public_ip = "${var.hide_public_subnet ? false : true}" 19 | } 20 | 21 | metadata = { 22 | ssh_authorized_keys = "${var.ssh_public_key}" 23 | user_data = "${var.user_data}" 24 | } 25 | 26 | extended_metadata = { 27 | block_volume_count = "${var.block_volume_count}" 28 | enable_fss = "${var.enable_fss}" 29 | nfs_ip = "${var.nfs_ip}" 30 | airflow_master = "${var.airflow_master}" 31 | oci_mysql_ip = "${var.oci_mysql_ip}" 32 | airflow_database = "${var.airflow_database}" 33 | } 34 | 35 | timeouts { 36 | create = "30m" 37 | } 38 | } 39 | 40 | // Block Volume Creation for AirflowWorker 41 | 42 | # Data Volumes 43 | resource "oci_core_volume" "AirflowWorkerDataVolume" { 44 | count = "${(var.instances * var.block_volumes_per_worker)}" 45 | availability_domain = "${var.availability_domain}" 46 | compartment_id = "${var.compartment_ocid}" 47 | display_name = "AirflowWorker ${format("%01d", floor((count.index / var.block_volumes_per_worker)+1))} Data ${format("%01d", floor((count.index%(var.block_volumes_per_worker))+1))}" 48 | size_in_gbs = "${var.data_blocksize_in_gbs}" 49 | vpus_per_gb = "${var.vpus_per_gb}" 50 | } 51 | 52 | resource "oci_core_volume_attachment" "AirflowWorkerDataAttachment" { 53 | count = "${(var.instances * var.block_volumes_per_worker)}" 54 | attachment_type = "iscsi" 55 | instance_id = "${oci_core_instance.AirflowWorker[floor(count.index/var.block_volumes_per_worker)].id}" 56 | volume_id = "${oci_core_volume.AirflowWorkerDataVolume[count.index].id}" 57 | device = "${var.data_volume_attachment_device[floor(count.index%(var.block_volumes_per_worker))]}" 58 | } 59 | 60 | -------------------------------------------------------------------------------- /scripts/dags/oci_adb_sql_example.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from airflow import DAG 3 | from sys import modules 4 | from operators.oci_adb import OCIDBOperator 5 | 6 | default_args = {'owner': 'airflow', 7 | 'start_date': datetime(2020, 5, 26), 8 | 'email': ['your.email@somecompany.com'], 9 | 'email_on_failure': False, 10 | 'email_on_retry': False 11 | } 12 | 13 | dag = DAG('oci_adb_sql_example', 14 | default_args=default_args, 15 | schedule_interval='@hourly', 16 | catchup=False 17 | ) 18 | 19 | oci_conn_id = "oci_default" 20 | bucketname = "BUCKET_NAME" 21 | db_name = "DATABASE_NAME" 22 | compartment_ocid = "COMPARTMENT OCID" 23 | db_workload = "DW" 24 | tns_admin_root = "/path/to/tns_admin/" 25 | user_id = "DATABASE_USER" 26 | password = "DATABASE_PASSWORD" 27 | drop_table = """ 28 | BEGIN 29 | EXECUTE IMMEDIATE 'DROP TABLE python_modules'; 30 | EXCEPTION 31 | WHEN OTHERS THEN 32 | IF SQLCODE != -942 THEN 33 | RAISE; 34 | END IF; 35 | END; 36 | """ 37 | create_table = """ 38 | CREATE TABLE python_modules ( 39 | module_name VARCHAR2(100) NOT NULL, 40 | file_path VARCHAR2(300) NOT NULL 41 | ) 42 | """ 43 | many_sql_data = [] 44 | for m_name, m_info in modules.items(): 45 | try: 46 | many_sql_data.append((m_name, m_info.__file__)) 47 | except AttributeError: 48 | pass 49 | many_sql="INSERT INTO python_modules(module_name, file_path) VALUES (:1, :2)" 50 | debug = True 51 | 52 | with dag: 53 | t1 = OCIDBOperator(task_id='drop_table', compartment_ocid=compartment_ocid, db_name=db_name, 54 | db_workload=db_workload, tns_admin_root=tns_admin_root, user_id=user_id, 55 | password=password, single_sql=drop_table, debug=debug) 56 | t2 = OCIDBOperator(task_id='create_table', compartment_ocid=compartment_ocid, db_name=db_name, 57 | db_workload=db_workload, tns_admin_root=tns_admin_root, user_id=user_id, 58 | password=password, single_sql=create_table, debug=debug) 59 | t3 = OCIDBOperator(task_id='insert_data', compartment_ocid=compartment_ocid, db_name=db_name, 60 | db_workload=db_workload, tns_admin_root=tns_admin_root, user_id=user_id, 61 | password=password, many_sql=many_sql, many_sql_data=many_sql_data, debug=debug) 62 | t1 >> t2 >> t3 63 | 64 | -------------------------------------------------------------------------------- /scripts/dags/oci_advanced_example.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from airflow import DAG 3 | from operators.oci_object_storage import MakeBucket, CopyFileToOCIObjectStorageOperator 4 | from operators.oci_data_flow import OCIDataFlowRun, OCIDataFlowCreateApplication 5 | 6 | default_args = {'owner': 'airflow', 7 | 'start_date': datetime(2020, 5, 26), 8 | 'email': ['your_email@somecompany.com'], 9 | 'email_on_failure': False, 10 | 'email_on_retry': False 11 | } 12 | 13 | dag = DAG('oci_advanced_example', 14 | default_args=default_args, 15 | schedule_interval='@hourly', 16 | catchup=False 17 | ) 18 | 19 | oci_conn_id = "oci_default" 20 | bucketname = "SomeBucketName" 21 | compartment_ocid = "COMPARTMENT_OCID" 22 | dataflow_file = "some_local_file" 23 | dataflow_appname = "some_app_name" 24 | 25 | 26 | with dag: 27 | t1 = MakeBucket(task_id='Make_Bucket', 28 | bucket_name=bucketname, 29 | oci_conn_id=oci_conn_id, 30 | compartment_ocid=compartment_ocid) 31 | t2 = CopyFileToOCIObjectStorageOperator(task_id='Copy_{0}_to_Bucket'.format(dataflow_file), 32 | bucket_name=bucketname, 33 | compartment_ocid=compartment_ocid, 34 | oci_conn_id=oci_conn_id, 35 | object_name=dataflow_file, 36 | local_file_path='/home/airflow/') 37 | t3 = OCIDataFlowCreateApplication(task_id='Create_Dataflow_Application_{0}'.format(dataflow_appname), 38 | bucket_name=bucketname, 39 | display_name=dataflow_appname, 40 | compartment_ocid=compartment_ocid, 41 | oci_conn_id=oci_conn_id, 42 | object_name=dataflow_file, 43 | language='PYTHON', 44 | ) 45 | t4 = OCIDataFlowRun(task_id='Run_Dataflow_Application_{0}'.format(dataflow_appname), 46 | compartment_ocid=compartment_ocid, 47 | display_name=dataflow_appname, 48 | oci_conn_id=oci_conn_id, 49 | bucket_name=bucketname 50 | ) 51 | t1 >> t2 >> t3 >> t4 52 | -------------------------------------------------------------------------------- /scripts/plugins/sensors/oci_adb.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language 17 | # governing permissions and limitations 18 | # under the License. 19 | 20 | from airflow.sensors.base_sensor_operator import BaseSensorOperator 21 | from airflow.utils.decorators import apply_defaults 22 | from airflow.exceptions import AirflowException 23 | from hooks.oci_adb import OCIDBHook 24 | import time 25 | 26 | class OCIADBSensor(BaseSensorOperator): 27 | """ 28 | Sensor to interact with OCI ADB 29 | """ 30 | 31 | @apply_defaults 32 | def __init__(self, 33 | compartment_ocid = None, 34 | oci_conn_id = 'oci_default', 35 | database_id = None, 36 | target_state = None, 37 | *args, 38 | **kwargs): 39 | super(OCIADBSensor, self).__init__(*args, **kwargs) 40 | self.compartment_id = compartment_ocid, 41 | self.oci_conn_id = oci_conn_id, 42 | self.database_id = database_id, 43 | self.target_state = target_state, 44 | self._oci_hook = None 45 | 46 | def poke(self): 47 | self.log.info('Checking database %s', self.database_id) 48 | db_state = self.get_oci_hook().check_state(database_id=self.database_id) 49 | while db_state is not self.target_state: 50 | self.log.info('DB State: {}'.format(db_state)) 51 | time.sleep(15) 52 | db_state = self.get_oci_hook().check_state(database_id=self.database_id) 53 | 54 | def get_oci_hook(self): 55 | """ 56 | Create and return OCI Hook 57 | :return: 58 | """ 59 | if not self._oci_hook: 60 | self._oci_hook = OCIDBHook(compartment_id=self.compartment_id, oci_conn_id=self.oci_conn_id) 61 | return self._oci_hook 62 | -------------------------------------------------------------------------------- /scripts/dags/schedule_dataflow_app.py: -------------------------------------------------------------------------------- 1 | # This a very simple example to schedule a Data Flow Application with just a few 2 | # tweaks. 3 | # 4 | # To use this: 5 | # 1. Customize the schedule_interval as needed. 6 | # 2. Set the Application OCID, Compartment OCID. 7 | # 3. If needed, set logs and warehouse buckets. 8 | # 4. If needed, set the oci_namespace variable or create an Airflow Variable (preferred). 9 | # 5. If you want to, customize the display_name variable to change how Runs appear. 10 | # 6. If you want to, customize the SLA setting. SLA misses will appear in the Airflow UI. 11 | # 12 | # After setting these, copy the script into your production DAG directory 13 | # usually (/opt/airflow/dags) and your job will run on the period you specified. 14 | 15 | from airflow import DAG 16 | from airflow.models import Variable 17 | from operators.oci_data_flow import OCIDataFlowRun 18 | 19 | import datetime 20 | 21 | default_args = { 22 | "owner": "airflow", 23 | "start_date": datetime.datetime(2020, 6, 26), 24 | "email": ["your_email@somecompany.com"], 25 | "email_on_failure": False, 26 | "email_on_retry": False, 27 | "sla": datetime.timedelta(hours=12), 28 | } 29 | 30 | # This schedule_interval runs the Application every 30 minutes. 31 | # Customize it as needed. 32 | dag = DAG( 33 | "schedule_dataflow_app", 34 | default_args=default_args, 35 | schedule_interval="0/30 * * * *", 36 | catchup=False, 37 | ) 38 | 39 | # Customize these variables. 40 | # Find the OCID values in the UI or using the CLI. 41 | oci_conn_id = "oci_default" 42 | dataflow_application_ocid = "UNSET" 43 | compartment_ocid = "UNSET" 44 | logs_bucket = "dataflow-logs" 45 | warehouse_bucket = "dataflow-warehouse" 46 | try: 47 | namespace = Variable.get("oci_namespace") 48 | except: 49 | namespace = "UNSET" 50 | 51 | # Ensure everything is set. 52 | assert dataflow_application_ocid != "UNSET", "You need to set dataflow_application_ocid" 53 | assert compartment_ocid != "UNSET", "You need to set compartment_ocid" 54 | assert ( 55 | namespace != "UNSET" 56 | ), "You need to set namespace as an Airflow variable or in the script" 57 | 58 | logs_bucket_uri = f"oci://{logs_bucket}@{namespace}/" 59 | warehouse_bucket_uri = f"oci://{warehouse_bucket}@{namespace}/" 60 | display_name = "Application Run on {{ ds }}" 61 | 62 | run_application_step = OCIDataFlowRun( 63 | task_id="Run_Dataflow_Application", 64 | compartment_ocid=compartment_ocid, 65 | application_ocid=dataflow_application_ocid, 66 | display_name=display_name, 67 | oci_conn_id=oci_conn_id, 68 | logs_bucket_uri=logs_bucket_uri, 69 | warehouse_bucket_uri=warehouse_bucket_uri, 70 | dag=dag, 71 | ) 72 | run_application_step 73 | -------------------------------------------------------------------------------- /modules/master/variables.tf: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------------------------------------------------- 2 | # Environmental variables 3 | # You probably want to define these as environmental variables. 4 | # Instructions on that are here: https://github.com/oci-quickstart/oci-prerequisites 5 | # --------------------------------------------------------------------------------------------------------------------- 6 | 7 | variable "compartment_ocid" {} 8 | variable "ssh_public_key" {} 9 | variable "subnet_id" {} 10 | variable "user_data" {} 11 | variable "image_ocid" {} 12 | variable "hide_private_subnet" { 13 | default = "true" 14 | } 15 | variable "airflow_database" {} 16 | variable "airflow_options" {} 17 | variable "executor" {} 18 | variable "all" {} 19 | variable "all_dbs" {} 20 | variable "async" {} 21 | variable "aws" {} 22 | variable "azure" {} 23 | variable "celery" {} 24 | variable "cloudant" {} 25 | variable "crypto" {} 26 | variable "devel" {} 27 | variable "devel_hadoop" {} 28 | variable "druid" {} 29 | variable "gcp" {} 30 | variable "github_enterprise" {} 31 | variable "google_auth" {} 32 | variable "hashicorp" {} 33 | variable "hdfs" {} 34 | variable "hive" {} 35 | variable "jdbc" {} 36 | variable "kerberos" {} 37 | variable "kubernetes" {} 38 | variable "ldap" {} 39 | variable "mssql" {} 40 | variable "mysql" {} 41 | variable "oracle" {} 42 | variable "password" {} 43 | variable "postgres" {} 44 | variable "presto" {} 45 | variable "qds" {} 46 | variable "rabbitmq" {} 47 | variable "redis" {} 48 | variable "samba" {} 49 | variable "slack" {} 50 | variable "ssh" {} 51 | variable "vertica" {} 52 | variable "enable_fss" {} 53 | variable "nfs_ip" {} 54 | variable "enable_security" {} 55 | variable "oci_mysql_ip" {} 56 | # --------------------------------------------------------------------------------------------------------------------- 57 | # Optional variables 58 | # You can modify these. 59 | # --------------------------------------------------------------------------------------------------------------------- 60 | 61 | variable "availability_domain" { 62 | default = "1" 63 | } 64 | 65 | # Size for Cloudera Log Volumes across all hosts deployed to /var/log/cloudera 66 | 67 | variable "log_volume_size_in_gbs" { 68 | default = "200" 69 | } 70 | 71 | # Size for Volume across all hosts deployed to /opt/airflow 72 | 73 | variable "airflow_volume_size_in_gbs" { 74 | default = "300" 75 | } 76 | 77 | # 78 | # Set Cluster Shapes in this section 79 | # 80 | 81 | variable "master_instance_shape" { 82 | default = "VM.Standard2.4" 83 | } 84 | 85 | # --------------------------------------------------------------------------------------------------------------------- 86 | # Constants 87 | # You probably don't need to change these. 88 | # --------------------------------------------------------------------------------------------------------------------- 89 | 90 | variable "master_node_count" { 91 | default = "1" 92 | } 93 | -------------------------------------------------------------------------------- /scripts/dags/schedule_dataflow_pipeline.py: -------------------------------------------------------------------------------- 1 | # Schedule a sequence of Data Flow jobs to be run one after another. 2 | # 3 | # To use this: 4 | # 1. Customize the schedule_interval as needed. 5 | # 2. Set the Compartment OCID and Application OCIDs. 6 | # 3. If needed, set logs and warehouse buckets. 7 | # 4. If needed, set the oci_namespace variable or create an Airflow Variable (preferred). 8 | # 5. If you want to, customize the display_name variable to change how Runs appear. 9 | # 6. If you want to, customize the SLA setting. SLA misses will appear in the Airflow UI. 10 | # 11 | # After setting these, copy the script into your production DAG directory 12 | # usually (/opt/airflow/dags) and your job will run on the period you specified. 13 | 14 | from airflow import DAG 15 | from airflow.models import Variable 16 | from operators.oci_data_flow import OCIDataFlowRun 17 | 18 | import datetime 19 | 20 | default_args = { 21 | "owner": "airflow", 22 | "start_date": datetime.datetime(2020, 6, 26), 23 | "email": ["your_email@somecompany.com"], 24 | "email_on_failure": False, 25 | "email_on_retry": False, 26 | "sla": datetime.timedelta(hours=12), 27 | } 28 | 29 | # This schedule_interval runs the DAG every 30 minutes. 30 | # Customize it as needed. 31 | dag = DAG( 32 | "schedule_dataflow_pipeline", 33 | default_args=default_args, 34 | schedule_interval="0/30 * * * *", 35 | catchup=False, 36 | ) 37 | 38 | # Customize these variables. 39 | # Find the OCID values in the UI or using the CLI. 40 | oci_conn_id = "oci_default" 41 | dataflow_application_ocids = [ 42 | "my_dataflow_ocid_1", 43 | "my_dataflow_ocid_2", 44 | "my_dataflow_ocid_3" 45 | ] 46 | compartment_ocid = "UNSET" 47 | logs_bucket = "dataflow-logs" 48 | warehouse_bucket = "dataflow-warehouse" 49 | try: 50 | namespace = Variable.get("oci_namespace") 51 | except: 52 | namespace = "UNSET" 53 | 54 | # Ensure everything is set. 55 | assert len(dataflow_application_ocids) > 0, "You need to set dataflow_application_ocids" 56 | assert compartment_ocid != "UNSET", "You need to set compartment_ocid" 57 | assert ( 58 | namespace != "UNSET" 59 | ), "You need to set namespace as an Airflow variable or in the script" 60 | 61 | logs_bucket_uri = f"oci://{logs_bucket}@{namespace}/" 62 | warehouse_bucket_uri = f"oci://{warehouse_bucket}@{namespace}/" 63 | display_name = "Pipeline Step {i} run on {{{{ ts_nodash }}}}" 64 | 65 | with dag: 66 | steps = [] 67 | for i, ocid in enumerate(dataflow_application_ocids): 68 | steps.append(OCIDataFlowRun( 69 | task_id=f"Dataflow_Pipeline_Step_{i}", 70 | compartment_ocid=compartment_ocid, 71 | application_ocid=ocid, 72 | display_name=display_name.format(i=i+1), 73 | oci_conn_id=oci_conn_id, 74 | logs_bucket_uri=logs_bucket_uri, 75 | warehouse_bucket_uri=warehouse_bucket_uri, 76 | )) 77 | 78 | # Chain the steps together sequentially. 79 | for head, tail in zip(steps, steps[1:]): 80 | head.set_downstream(tail) 81 | -------------------------------------------------------------------------------- /scripts/dags/schedule_dataflow_with_parameters.py: -------------------------------------------------------------------------------- 1 | # To use this example: 2 | # 1. Customize the schedule_interval as needed. 3 | # 2. Set the Application OCID, Compartment OCID. 4 | # 3. If needed, set logs and warehouse buckets. 5 | # 4. If needed, set the oci_namespace variable or create an Airflow Variable (preferred). 6 | # 5. If you want to, customize the display_name variable to change how Runs appear. 7 | # 6. If you want to, customize the SLA setting. SLA misses will appear in the Airflow UI. 8 | # 9 | # Additionally you will need to customize parameter_list. 10 | # The parameters you provide need to be consistent with what your Application expects. 11 | # 12 | # After setting these, copy the script into your production DAG directory 13 | # usually (/opt/airflow/dags) and your job will run on the period you specified. 14 | 15 | from airflow import DAG 16 | from airflow.models import Variable 17 | from operators.oci_data_flow import OCIDataFlowRun 18 | 19 | import datetime 20 | import oci 21 | 22 | default_args = { 23 | "owner": "airflow", 24 | "start_date": datetime.datetime(2020, 6, 26), 25 | "email": ["your_email@somecompany.com"], 26 | "email_on_failure": False, 27 | "email_on_retry": False, 28 | "sla": datetime.timedelta(hours=12), 29 | } 30 | 31 | # This schedule_interval runs the Application every 30 minutes. 32 | # Customize it as needed. 33 | dag = DAG( 34 | "schedule_dataflow_with_parameters", 35 | default_args=default_args, 36 | schedule_interval="0/30 * * * *", 37 | catchup=False, 38 | ) 39 | 40 | # Customize these variables. 41 | # Find the OCID values in the UI or using the CLI. 42 | oci_conn_id = "oci_default" 43 | dataflow_application_ocid = "UNSET" 44 | compartment_ocid = "UNSET" 45 | logs_bucket = "dataflow-logs" 46 | warehouse_bucket = "dataflow-warehouse" 47 | try: 48 | namespace = Variable.get("oci_namespace") 49 | except: 50 | namespace = "UNSET" 51 | 52 | # Ensure everything is set. 53 | assert dataflow_application_ocid != "UNSET", "You need to set dataflow_application_ocid" 54 | assert compartment_ocid != "UNSET", "You need to set compartment_ocid" 55 | assert ( 56 | namespace != "UNSET" 57 | ), "You need to set namespace as an Airflow variable or in the script" 58 | 59 | logs_bucket_uri = f"oci://{logs_bucket}@{namespace}/" 60 | warehouse_bucket_uri = f"oci://{warehouse_bucket}@{namespace}/" 61 | display_name = "Application Run on {{ ds }}" 62 | 63 | # Set this based on the parameters your Application expects. 64 | parameter_list = [ 65 | oci.data_flow.models.ApplicationParameter( 66 | name="input_path", value="oci://bucket@namespace/input" 67 | ), 68 | oci.data_flow.models.ApplicationParameter( 69 | name="output_path", value="oci://bucket@namespace/output" 70 | ), 71 | ] 72 | 73 | run_application_step = OCIDataFlowRun( 74 | application_ocid=dataflow_application_ocid, 75 | compartment_ocid=compartment_ocid, 76 | dag=dag, 77 | display_name=display_name, 78 | logs_bucket_uri=logs_bucket_uri, 79 | oci_conn_id=oci_conn_id, 80 | parameters=parameter_list, 81 | task_id="Run_Dataflow_Application", 82 | warehouse_bucket_uri=warehouse_bucket_uri, 83 | ) 84 | run_application_step 85 | -------------------------------------------------------------------------------- /scripts/plugins/hooks/oci_data_catalog.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | from typing import Optional 20 | import oci 21 | from hooks.oci_base import OCIBaseHook 22 | from airflow.exceptions import AirflowException 23 | 24 | 25 | class OCIDataCatalogHook(OCIBaseHook): 26 | """ 27 | Interact with Oracle Data Catalog. 28 | """ 29 | def __init__(self, 30 | compartment_ocid: str, 31 | data_catalog_ocid: Optional[str] = None, 32 | display_name: Optional[str] = None, 33 | oci_conn_id: Optional[str] = "oci_default", 34 | oci_region: Optional[str] = None, 35 | *args, 36 | **kwargs): 37 | super(OCIDataCatalogHook, self).__init__(*args, **kwargs) 38 | self.compartment_id = compartment_ocid 39 | self.data_catalog_ocid = data_catalog_ocid 40 | self.display_name = display_name 41 | self.job_key = None 42 | self.oci_conn_id = oci_conn_id 43 | self.oci_region = oci_region 44 | self.oci_client = oci.data_catalog.DataCatalogClient 45 | 46 | def get_catalog_ocid(self, **kwargs): 47 | """ 48 | Get Data Catalog OCID by catalog_name 49 | :param compartment_id: 50 | :param catalog_name: 51 | :return: 52 | """ 53 | try: 54 | catalogdetails = self.get_client(self.oci_client).list_catalogs(compartment_id=self.compartment_id, 55 | **kwargs).data 56 | for catalog in catalogdetails: 57 | if catalog.display_name == self.display_name: 58 | self.data_catalog_ocid = catalog.id 59 | return catalog.id 60 | else: 61 | continue 62 | return None 63 | except AirflowException as e: 64 | self.log.error(e.response["Error"]["Message"]) 65 | 66 | def get_job_key(self, **kwargs): 67 | """ 68 | Get Job Key by display_name 69 | :param kwargs: 70 | :return: 71 | """ 72 | try: 73 | joblist = self.get_client(self.oci_client).list_jobs(compartment_id=self.compartment_id, 74 | display_name=self.display_name, 75 | **kwargs).data 76 | for job in joblist: 77 | if job.display_name == self.display_name: 78 | self.job_key = job.key 79 | return job.key 80 | else: 81 | continue 82 | return None 83 | except AirflowException as e: 84 | self.log.error(e.response["Error"]["Message"]) 85 | -------------------------------------------------------------------------------- /modules/worker/variables.tf: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------------------------------------------------- 2 | # Environmental variables 3 | # You probably want to define these as environmental variables. 4 | # Instructions on that are here: https://github.com/oci-quickstart/oci-prerequisites 5 | # --------------------------------------------------------------------------------------------------------------------- 6 | 7 | variable "region" {} 8 | variable "compartment_ocid" {} 9 | variable "ssh_public_key" {} 10 | variable "instances" {} 11 | variable "subnet_id" {} 12 | variable "user_data" {} 13 | variable "image_ocid" {} 14 | variable "block_volume_count" {} 15 | variable "hide_public_subnet" { 16 | default = "true" 17 | } 18 | variable "secondary_vnic_count" { 19 | default = "0" 20 | } 21 | variable "enable_secondary_vnic" { 22 | default = "false" 23 | } 24 | variable "executor" {} 25 | variable "enable_fss" {} 26 | variable "nfs_ip" {} 27 | variable "airflow_master" {} 28 | variable "oci_mysql_ip" {} 29 | variable "airflow_database" {} 30 | # --------------------------------------------------------------------------------------------------------------------- 31 | # Optional variables 32 | # You can modify these. 33 | # --------------------------------------------------------------------------------------------------------------------- 34 | 35 | variable "availability_domain" { 36 | default = "2" 37 | } 38 | 39 | # Number of Workers in the Cluster 40 | 41 | variable "data_blocksize_in_gbs" { 42 | default = "1000" 43 | } 44 | 45 | variable "block_volumes_per_worker" {} 46 | 47 | variable "vpus_per_gb" { 48 | default = "10" 49 | } 50 | 51 | # 52 | # Set Shapes in this section 53 | # 54 | 55 | variable "worker_instance_shape" { 56 | default = "VM.Standard2.4" 57 | } 58 | 59 | 60 | # --------------------------------------------------------------------------------------------------------------------- 61 | # Constants 62 | # You probably don't need to change these. 63 | # --------------------------------------------------------------------------------------------------------------------- 64 | 65 | // Volume Mapping - used to map Worker Block Volumes consistently to the OS 66 | variable "data_volume_attachment_device" { 67 | type = "map" 68 | default = { 69 | "0" = "/dev/oracleoci/oraclevdb" 70 | "1" = "/dev/oracleoci/oraclevdc" 71 | "2" = "/dev/oracleoci/oraclevdd" 72 | "3" = "/dev/oracleoci/oraclevde" 73 | "4" = "/dev/oracleoci/oraclevdf" 74 | "5" = "/dev/oracleoci/oraclevdg" 75 | "6" = "/dev/oracleoci/oraclevdh" 76 | "7" = "/dev/oracleoci/oraclevdi" 77 | "8" = "/dev/oracleoci/oraclevdj" 78 | "9" = "/dev/oracleoci/oraclevdk" 79 | "10" = "/dev/oracleoci/oraclevdl" 80 | "11" = "/dev/oracleoci/oraclevdm" 81 | "12" = "/dev/oracleoci/oraclevdn" 82 | "13" = "/dev/oracleoci/oraclevdo" 83 | "14" = "/dev/oracleoci/oraclevdp" 84 | "15" = "/dev/oracleoci/oraclevdq" 85 | "16" = "/dev/oracleoci/oraclevdr" 86 | "17" = "/dev/oracleoci/oraclevds" 87 | "18" = "/dev/oracleoci/oraclevdt" 88 | "19" = "/dev/oracleoci/oraclevdu" 89 | "20" = "/dev/oracleoci/oraclevdv" 90 | "21" = "/dev/oracleoci/oraclevdw" 91 | "22" = "/dev/oracleoci/oraclevdx" 92 | "23" = "/dev/oracleoci/oraclevdy" 93 | "24" = "/dev/oracleoci/oraclevdz" 94 | "25" = "/dev/oracleoci/oraclevdab" 95 | "26" = "/dev/oracleoci/oraclevdac" 96 | "27" = "/dev/oracleoci/oraclevdad" 97 | "28" = "/dev/oracleoci/oraclevdae" 98 | "29" = "/dev/oracleoci/oraclevdaf" 99 | "30" = "/dev/oracleoci/oraclevdag" 100 | } 101 | } 102 | 103 | -------------------------------------------------------------------------------- /scripts/plugins/operators/oci_data_catalog.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | from airflow.models.baseoperator import BaseOperator 20 | from airflow.utils.decorators import apply_defaults 21 | from typing import Optional 22 | import oci 23 | from oci.data_catalog.data_catalog_client import DataCatalogClient 24 | from hooks.oci_data_catalog import OCIDataCatalogHook 25 | from airflow.exceptions import AirflowException 26 | import time 27 | """ 28 | Interact with OCI Data Catalog 29 | """ 30 | 31 | 32 | class OCIDataCatalogExecute(BaseOperator): 33 | """ 34 | Create Data Catalog Job Execution 35 | :param compartment_ocid: Compartment OCID 36 | :param oci_conn_id: Airflow connection ID 37 | :param data_catalog_ocid: Data Catalog OCID 38 | :param retry_strategy: Retry Strategy 39 | """ 40 | 41 | @apply_defaults 42 | def __init__( 43 | self, 44 | compartment_ocid: str, 45 | oci_conn_id: str, 46 | data_catalog_ocid: str, 47 | job_key: str, 48 | job_execution_details: object, 49 | retry_strategy: Optional[str] = None, 50 | *args, 51 | **kwargs 52 | ): 53 | super().__init__(*args, **kwargs) 54 | self.compartment_id = compartment_ocid 55 | self.oci_conn_id = oci_conn_id 56 | self.data_catalog_ocid = data_catalog_ocid 57 | self.job_key = job_key 58 | self.job_execution_details = job_execution_details 59 | self.retry_strategy = retry_strategy 60 | self._oci_hook = None 61 | 62 | def execute(self, context, **kwargs): 63 | self._oci_hook = OCIDataCatalogHook(compartment_ocid=self.compartment_id, oci_conn_id=self.oci_conn_id) 64 | client = self._oci_hook.get_client(oci.data_catalog.DataCatalogClient) 65 | self.log.info("Validating OCI Config") 66 | self._oci_hook.validate_config() 67 | 68 | try: 69 | print("Submitting Data Catalog Job Execution") 70 | submit_job = DataCatalogClient(client) 71 | submit_job.create_job_execution(catalog_id=self.data_catalog_ocid, 72 | job_key=self.job_key, 73 | create_job_execution_details=self.job_execution_details, 74 | **kwargs) 75 | check_job = DataCatalogClient(client) 76 | job_data = check_job.get_job(catalog_id=self.data_catalog_ocid, 77 | job_key=self.job_key).data 78 | while job_data.lifecycle_state is not "completed": 79 | time.sleep(15) 80 | job_data = check_job.get_job(catalog_id=self.data_catalog_ocid, 81 | job_key=self.job_key).data 82 | 83 | except AirflowException as e: 84 | self.log.error(e.response["Error"]["Message"]) 85 | -------------------------------------------------------------------------------- /modules/master/main.tf: -------------------------------------------------------------------------------- 1 | resource "oci_core_instance" "Master" { 2 | availability_domain = "${var.availability_domain}" 3 | compartment_id = "${var.compartment_ocid}" 4 | shape = "${var.master_instance_shape}" 5 | display_name = "Airflow Master" 6 | fault_domain = "FAULT-DOMAIN-1" 7 | 8 | source_details { 9 | source_type = "image" 10 | source_id = "${var.image_ocid}" 11 | } 12 | 13 | create_vnic_details { 14 | subnet_id = "${var.subnet_id}" 15 | display_name = "Airflow Master 1" 16 | hostname_label = "Airflow-Master-1" 17 | assign_public_ip = "${var.hide_private_subnet ? true : false}" 18 | } 19 | 20 | metadata = { 21 | ssh_authorized_keys = "${var.ssh_public_key}" 22 | user_data = "${var.user_data}" 23 | } 24 | 25 | extended_metadata = { 26 | airflow_database = "${var.airflow_database}" 27 | airflow_options = "${var.airflow_options}" 28 | executor = "${var.executor}" 29 | all = "${var.all}" 30 | all_dbs = "${var.all_dbs}" 31 | async = "${var.async}" 32 | aws = "${var.aws}" 33 | azure = "${var.azure}" 34 | celery = "${var.celery}" 35 | cloudant = "${var.cloudant}" 36 | crypto = "${var.crypto}" 37 | devel = "${var.devel}" 38 | devel_hadoop = "${var.devel_hadoop}" 39 | druid = "${var.druid}" 40 | gcp = "${var.gcp}" 41 | github_enterprise = "${var.github_enterprise}" 42 | google_auth = "${var.google_auth}" 43 | hashicorp = "${var.hashicorp}" 44 | hdfs = "${var.hdfs}" 45 | hive = "${var.hive}" 46 | jdbc = "${var.jdbc}" 47 | kerberos = "${var.kerberos}" 48 | kubernetes = "${var.kubernetes}" 49 | ldap = "${var.ldap}" 50 | mssql = "${var.mssql}" 51 | mysql = "${var.mysql}" 52 | oracle = "${var.oracle}" 53 | password = "${var.password}" 54 | postgres = "${var.postgres}" 55 | presto = "${var.presto}" 56 | qds = "${var.qds}" 57 | rabbitmq = "${var.rabbitmq}" 58 | redis = "${var.redis}" 59 | samba = "${var.samba}" 60 | slack = "${var.slack}" 61 | ssh = "${var.ssh}" 62 | vertica = "${var.vertica}" 63 | enable_fss = "${var.enable_fss}" 64 | nfs_ip = "${var.nfs_ip}" 65 | enable_security = "${var.enable_security}" 66 | oci_mysql_ip = "${var.oci_mysql_ip}" 67 | } 68 | 69 | timeouts { 70 | create = "30m" 71 | } 72 | } 73 | 74 | // Block Volume Creation for Master 75 | 76 | # Log Volume for /var/log/airflow 77 | resource "oci_core_volume" "MasterLogVolume" { 78 | availability_domain = "${var.availability_domain}" 79 | compartment_id = "${var.compartment_ocid}" 80 | display_name = "Airflow Master Log Data" 81 | size_in_gbs = "50" 82 | } 83 | 84 | resource "oci_core_volume_attachment" "MasterLogAttachment" { 85 | attachment_type = "iscsi" 86 | instance_id = "${oci_core_instance.Master.id}" 87 | volume_id = "${oci_core_volume.MasterLogVolume.id}" 88 | device = "/dev/oracleoci/oraclevdb" 89 | } 90 | 91 | # Data Volume for /opt/airflow 92 | resource "oci_core_volume" "MasterAirflowVolume" { 93 | availability_domain = "${var.availability_domain}" 94 | compartment_id = "${var.compartment_ocid}" 95 | display_name = "Airflow Master Data" 96 | size_in_gbs = "100" 97 | } 98 | 99 | resource "oci_core_volume_attachment" "MasterAirflowAttachment" { 100 | attachment_type = "iscsi" 101 | instance_id = "${oci_core_instance.Master.id}" 102 | volume_id = "${oci_core_volume.MasterAirflowVolume.id}" 103 | device = "/dev/oracleoci/oraclevdc" 104 | } 105 | 106 | -------------------------------------------------------------------------------- /compute.tf: -------------------------------------------------------------------------------- 1 | data "oci_core_vcn" "vcn_info" { 2 | vcn_id = "${var.useExistingVcn ? var.myVcn : module.network.vcn-id}" 3 | } 4 | 5 | data "oci_core_subnet" "master_subnet" { 6 | subnet_id = "${var.useExistingVcn ? var.masterSubnet : module.network.public-id}" 7 | } 8 | 9 | data "oci_core_subnet" "cluster_subnet" { 10 | subnet_id = "${var.useExistingVcn ? var.clusterSubnet : module.network.private-id}" 11 | } 12 | 13 | data "null_data_source" "vpus" { 14 | inputs = { 15 | block_vpus = "${var.block_volume_high_performance ? 20 : 0}" 16 | } 17 | } 18 | 19 | data "null_data_source" "values" { 20 | inputs = { 21 | airflow_master = "airflow-master-1.${data.oci_core_subnet.master_subnet.dns_label}.${data.oci_core_vcn.vcn_info.vcn_domain_name}" 22 | } 23 | } 24 | 25 | module "master" { 26 | source = "./modules/master" 27 | compartment_ocid = "${var.compartment_ocid}" 28 | subnet_id = "${var.useExistingVcn ? var.masterSubnet : module.network.public-id}" 29 | availability_domain = "${var.availability_domain}" 30 | image_ocid = "${var.OELImageOCID[var.region]}" 31 | ssh_public_key = "${var.provide_ssh_key ? var.ssh_provided_key : tls_private_key.key.public_key_openssh}" 32 | master_instance_shape = "${var.master_instance_shape}" 33 | user_data = "${base64gzip(file("scripts/master_boot.sh"))}" 34 | executor = "${var.executor}" 35 | airflow_database = "${var.airflow_database}" 36 | airflow_options = "${var.airflow_options}" 37 | all = "${var.all}" 38 | all_dbs = "${var.all_dbs}" 39 | async = "${var.async}" 40 | aws = "${var.aws}" 41 | azure = "${var.azure}" 42 | celery = "${var.celery}" 43 | cloudant = "${var.cloudant}" 44 | crypto = "${var.crypto}" 45 | devel = "${var.devel}" 46 | devel_hadoop = "${var.devel_hadoop}" 47 | druid = "${var.druid}" 48 | gcp = "${var.gcp}" 49 | github_enterprise = "${var.github_enterprise}" 50 | google_auth = "${var.google_auth}" 51 | hashicorp = "${var.hashicorp}" 52 | hdfs = "${var.hdfs}" 53 | hive = "${var.hive}" 54 | jdbc = "${var.jdbc}" 55 | kerberos = "${var.kerberos}" 56 | kubernetes = "${var.kubernetes}" 57 | ldap = "${var.ldap}" 58 | mssql = "${var.mssql}" 59 | mysql = "${var.mysql}" 60 | oracle = "${var.oracle}" 61 | password = "${var.password}" 62 | postgres = "${var.postgres}" 63 | presto = "${var.presto}" 64 | qds = "${var.qds}" 65 | rabbitmq = "${var.rabbitmq}" 66 | redis = "${var.redis}" 67 | samba = "${var.samba}" 68 | slack = "${var.slack}" 69 | ssh = "${var.ssh}" 70 | vertica = "${var.vertica}" 71 | enable_fss = "${var.enable_fss}" 72 | nfs_ip = "${module.fss.nfs-ip}" 73 | enable_security = "${var.enable_security}" 74 | oci_mysql_ip = "${var.airflow_database == "mysql-oci" ? var.oci_mysql_ip : ""}" 75 | } 76 | 77 | module "worker" { 78 | source = "./modules/worker" 79 | instances = "${var.worker_node_count}" 80 | region = "${var.region}" 81 | compartment_ocid = "${var.compartment_ocid}" 82 | subnet_id = "${var.useExistingVcn ? var.clusterSubnet : module.network.private-id}" 83 | availability_domain = "${var.availability_domain}" 84 | image_ocid = "${var.OELImageOCID[var.region]}" 85 | ssh_public_key = "${var.provide_ssh_key ? var.ssh_provided_key : tls_private_key.key.public_key_openssh}" 86 | worker_instance_shape = "${var.worker_instance_shape}" 87 | block_volumes_per_worker = "${var.enable_block_volumes ? var.block_volumes_per_worker : 0}" 88 | data_blocksize_in_gbs = "${var.data_blocksize_in_gbs}" 89 | user_data = "${base64encode(file("scripts/boot.sh"))}" 90 | block_volume_count = "${var.enable_block_volumes ? var.block_volumes_per_worker : 0}" 91 | vpus_per_gb = "${var.customize_block_volume_performance ? data.null_data_source.vpus.outputs["block_vpus"] : 10}" 92 | executor = "${var.executor}" 93 | enable_fss = "${var.enable_fss}" 94 | nfs_ip = "${module.fss.nfs-ip}" 95 | airflow_master = "${data.null_data_source.values.outputs["airflow_master"]}" 96 | oci_mysql_ip = "${var.airflow_database == "mysql-oci" ? var.oci_mysql_ip : ""}" 97 | airflow_database = "${var.airflow_database}" 98 | } 99 | -------------------------------------------------------------------------------- /scripts/dags/trigger_dataflow_when_file_exists.py: -------------------------------------------------------------------------------- 1 | # To use this example: 2 | # 1. Customize the schedule_interval as needed. 3 | # 2. Set the Application OCID, Compartment OCID and name of the bucket to probe. 4 | # 3. If needed, set logs and warehouse buckets. 5 | # 4. If needed, set the oci_namespace variable or create an Airflow Variable (preferred). 6 | # 5. If you want to, customize the display_name variable to change how Runs appear. 7 | # 6. If you want to, customize the SLA setting. SLA misses will appear in the Airflow UI. 8 | # 9 | # Additionally you will need to customize parameter_list. 10 | # The parameters you provide need to be consistent with what your Application expects. 11 | # 12 | # After setting these, copy the script into your production DAG directory 13 | # usually (/opt/airflow/dags) and your job will run on the period you specified. 14 | 15 | from airflow import DAG 16 | from airflow.models import Variable 17 | from operators.oci_data_flow import OCIDataFlowRun 18 | from sensors.oci_object_storage import OCIObjectStoragePrefixSensor 19 | 20 | import datetime 21 | 22 | default_args = { 23 | "owner": "airflow", 24 | "start_date": datetime.datetime(2020, 6, 26), 25 | "email": ["your_email@somecompany.com"], 26 | "email_on_failure": False, 27 | "email_on_retry": False, 28 | "sla": datetime.timedelta(hours=12), 29 | } 30 | 31 | # This schedule_interval runs the Application every 30 minutes. 32 | # Customize it as needed. 33 | dag = DAG( 34 | "transcoder_ng5", 35 | default_args=default_args, 36 | schedule_interval="0/30 * * * *", 37 | catchup=False, 38 | concurrency=1, 39 | max_active_runs=1, 40 | ) 41 | 42 | # Customize these variables. 43 | # Find the OCID values in the UI or using the CLI. 44 | oci_conn_id = "oci_default" 45 | dataflow_application_ocid = "UNSET" 46 | compartment_ocid = "UNSET" 47 | logs_bucket = "dataflow-logs" 48 | warehouse_bucket = "dataflow-warehouse" 49 | try: 50 | namespace = Variable.get("oci_namespace") 51 | except: 52 | namespace = "UNSET" 53 | bucket_name = "UNSET" 54 | bucket_base_path = "" 55 | 56 | # Ensure everything is set. 57 | assert bucket_name != "UNSET", "You need to set bucket_name" 58 | assert dataflow_application_ocid != "UNSET", "You need to set dataflow_application_ocid" 59 | assert compartment_ocid != "UNSET", "You need to set compartment_ocid" 60 | assert ( 61 | namespace != "UNSET" 62 | ), "You need to set namespace as an Airflow variable or in the script" 63 | 64 | logs_bucket_uri = f"oci://{logs_bucket}@{namespace}/" 65 | warehouse_bucket_uri = f"oci://{warehouse_bucket}@{namespace}/" 66 | display_name = "Application Run on {{ ts }}" 67 | 68 | def argument_builder_callback(context): 69 | runtime_arguments = dict() 70 | 71 | # Launch an extra executor for every 10 files, up to 20 total executors. 72 | total_files = context["task_instance"].xcom_pull( 73 | "Probe_New_Data", key="oci_prefix_total_files" 74 | ) 75 | num_executors = min(total_files // 10 + 2, 20) 76 | runtime_arguments["num_executors"] = num_executors 77 | runtime_arguments["driver_shape"] = "VM.Standard2.2" 78 | runtime_arguments["executor_shape"] = "VM.Standard2.2" 79 | 80 | # Set application arguments including parallelism. 81 | # Target 3 partitions per core (VM.Standard2.2 = 2 cores). 82 | number_partitions = str(num_executors * 2 * 3) 83 | runtime_arguments["arguments"] = [ 84 | "--input", 85 | bucket_name, 86 | "--output", 87 | "output", 88 | "--number-partitions", 89 | number_partitions, 90 | ] 91 | return runtime_arguments 92 | 93 | with dag: 94 | sensor = OCIObjectStoragePrefixSensor( 95 | task_id="Probe_New_Data", 96 | bucket_name=bucket_name, 97 | mode="reschedule", 98 | prefix=bucket_base_path, 99 | ) 100 | run_application = OCIDataFlowRun( 101 | task_id="Run_Dataflow_Application", 102 | application_ocid=dataflow_application_ocid, 103 | compartment_ocid=compartment_ocid, 104 | display_name=display_name, 105 | logs_bucket_uri=logs_bucket_uri, 106 | oci_conn_id=oci_conn_id, 107 | runtime_callback=argument_builder_callback, 108 | warehouse_bucket_uri=warehouse_bucket_uri, 109 | ) 110 | sensor >> run_application 111 | -------------------------------------------------------------------------------- /scripts/custom/connection_form.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * Created by janomar on 23/07/15. 22 | */ 23 | 24 | $(document).ready(function() { 25 | var config = { 26 | jdbc: { 27 | hidden_fields: ['port', 'schema', 'extra'], 28 | relabeling: {'host': 'Connection URL'}, 29 | }, 30 | google_cloud_platform: { 31 | hidden_fields: ['host', 'schema', 'login', 'password', 'port', 'extra'], 32 | relabeling: {}, 33 | }, 34 | cloudant: { 35 | hidden_fields: ['port', 'extra'], 36 | relabeling: { 37 | 'host': 'Account', 38 | 'login': 'Username (or API Key)', 39 | 'schema': 'Database' 40 | } 41 | }, 42 | jenkins: { 43 | hidden_fields: ['schema'], 44 | relabeling: { 45 | 'login': 'Username', 46 | 'password': 'API token or password', 47 | 'extra': 'Use https (true/false, default false)' 48 | } 49 | }, 50 | docker: { 51 | hidden_fields: ['port', 'schema'], 52 | relabeling: { 53 | 'host': 'Registry URL', 54 | 'login': 'Username', 55 | } 56 | }, 57 | oci: { 58 | hidden_fields: ['host', 'schema', 'password', 'port', 'extra'], 59 | relabeling: { 60 | 'login': 'User OCID'}, 61 | }, 62 | qubole: { 63 | hidden_fields: ['login', 'schema', 'port', 'extra'], 64 | relabeling: { 65 | 'host': 'API Endpoint', 66 | 'password': 'Auth Token', 67 | }, 68 | placeholders: { 69 | 'host': 'https://.qubole.com/api' 70 | } 71 | }, 72 | ssh: { 73 | hidden_fields: ['schema'], 74 | relabeling: { 75 | 'login': 'Username', 76 | } 77 | }, 78 | yandexcloud: { 79 | hidden_fields: ['host', 'schema', 'login', 'password', 'port', 'extra'], 80 | relabeling: {}, 81 | }, 82 | spark: { 83 | hidden_fields: ['schema', 'login', 'password'], 84 | relabeling: {}, 85 | }, 86 | } 87 | function connTypeChange(connectionType) { 88 | $("div.form-group").removeClass("hide"); 89 | $.each($("[id^='extra__']"), function() { 90 | $(this).parent().parent().addClass('hide') 91 | }); 92 | // Somehow the previous command doesn't honor __ 93 | $("#extra").parent().parent().removeClass('hide') 94 | $.each($("[id^='extra__"+connectionType+"']"), function() { 95 | $(this).parent().parent().removeClass('hide') 96 | }); 97 | $("label[orig_text]").each(function(){ 98 | $(this).text($(this).attr("orig_text")); 99 | }); 100 | $(".form-control").each(function(){$(this).attr('placeholder', '')}); 101 | 102 | if (config[connectionType] != undefined){ 103 | $.each(config[connectionType].hidden_fields, function(i, field){ 104 | $("#" + field).parent().parent().addClass('hide') 105 | }); 106 | $.each(config[connectionType].relabeling, function(k, v){ 107 | lbl = $("label[for='" + k + "']") 108 | lbl.attr("orig_text", lbl.text()); 109 | $("label[for='" + k + "']").text(v); 110 | }); 111 | $.each(config[connectionType].placeholders, function(k, v){ 112 | $("#" + k).attr('placeholder', v); 113 | }); 114 | } 115 | } 116 | var connectionType=$("#conn_type").val(); 117 | $("#conn_type").on('change', function(e) { 118 | connectionType = $("#conn_type").val(); 119 | connTypeChange(connectionType); 120 | }); 121 | connTypeChange(connectionType); 122 | }); 123 | -------------------------------------------------------------------------------- /scripts/plugins/hooks/oci_data_flow.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | from typing import Optional 20 | import oci 21 | from hooks.oci_base import OCIBaseHook 22 | from airflow.exceptions import AirflowException 23 | """ 24 | Get OCID by Name - Compartment ID, Application Name 25 | """ 26 | 27 | 28 | class OCIDataFlowHook(OCIBaseHook): 29 | """ 30 | Interact with Oracle Data Flow. 31 | """ 32 | def __init__(self, 33 | compartment_ocid: str, 34 | display_name: str, 35 | oci_conn_id: Optional[str] = "oci_default", 36 | oci_region: Optional[str] = None, 37 | driver_shape: Optional[str] = None, 38 | executor_shape: Optional[str] = None, 39 | file_uri: Optional[str] = None, 40 | language: Optional[str] = "English", 41 | num_executors: Optional[int] = "1", 42 | spark_version: Optional[str] = None, 43 | *args, 44 | **kwargs): 45 | super(OCIDataFlowHook, self).__init__(*args, **kwargs) 46 | self.compartment_id = compartment_ocid 47 | self.display_name = display_name 48 | self.oci_conn_id = oci_conn_id 49 | self.oci_region = oci_region 50 | self.driver_shape = driver_shape 51 | self.executor_shape = executor_shape 52 | self.file_uri = file_uri 53 | self.language = language 54 | self.num_executors = num_executors 55 | self.spark_version = spark_version 56 | self.oci_client = oci.data_flow.DataFlowClient 57 | 58 | def get_application_ocid(self, compartment_id=None, display_name=None): 59 | try: 60 | appdetails = self.get_client(self.oci_client).list_applications(compartment_id=self.compartment_id).data 61 | for app in appdetails: 62 | if app.display_name == self.display_name: 63 | return app.id 64 | else: 65 | continue 66 | return None 67 | except AirflowException as e: 68 | self.log.error(e.response["Error"]["Message"]) 69 | 70 | 71 | def check_for_application_by_name(self, compartment_id=None, display_name=None): 72 | try: 73 | appdetails = self.get_client(self.oci_client).list_applications(compartment_id=self.compartment_id).data 74 | for app in appdetails: 75 | if app.display_name == self.display_name: 76 | return True 77 | else: 78 | continue 79 | return False 80 | except AirflowException as e: 81 | self.log.error(e.response["Error"]["Message"]) 82 | 83 | 84 | def create_application_details(self): 85 | try: 86 | application_details = oci.data_flow.models.CreateApplicationDetails(compartment_id=self.compartment_id, 87 | display_name=self.display_name, 88 | driver_shape=self.driver_shape, 89 | executor_shape=self.executor_shape, 90 | file_uri=self.file_uri, 91 | language=self.language, 92 | num_executors=self.num_executors, 93 | spark_version=self.spark_version, 94 | **kwargs) 95 | return application_details 96 | except AirflowException as e: 97 | self.log.error(e.response["Error"]["Message"]) 98 | 99 | def create_run_details(self): 100 | try: 101 | run_details = oci.data_flow.models.CreateRunDetails(compartment_id=self.compartment_id, 102 | application_id=self.get_application_ocid(self.display_name), 103 | display_name=self.display_name, 104 | **kwargs) 105 | return run_details 106 | except AirflowException as e: 107 | self.log.error(e.response["Error"]["Message"]) 108 | 109 | -------------------------------------------------------------------------------- /scripts/plugins/operators/oci_copy_object_to_adb.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | import oci 19 | import pandas as pd 20 | from typing import Optional 21 | from hooks.oci_adb import OCIDBHook 22 | from hooks.oci_object_storage import OCIObjectStorageHook 23 | from airflow.models.baseoperator import BaseOperator 24 | from airflow.utils.decorators import apply_defaults 25 | from airflow.exceptions import AirflowException 26 | 27 | 28 | class OCIDBCopyFromObject(BaseOperator): 29 | """ 30 | Copy data from a file in Object Storage into OCI ADB/ADW 31 | :param compartment_id: Target compartment OCID 32 | :type compartment_id: str 33 | :param tns_admin_root: The wallet root directory. The wallet will be loaded from $TNS_ADMIN/sqlnet.ora. 34 | If you do not set tns_admin_root, it is assumed to be in your environment. 35 | :type tns_admin_root: str 36 | :param database_ocid: Database ID 37 | :type database_ocid: str 38 | :param db_workload: DB Workload type, valid options are DW or OLTP 39 | :type str: 40 | :param db_name: Databse Name (Not display) 41 | :type db_name: str 42 | :param debug: Whether to display debug output 43 | :type debug: bool 44 | :param dsn: DSN (TNS Name) for connection 45 | :type dsn: str 46 | :param oci_conn_id: Airflow connection ID 47 | :type oci_conn_id: str 48 | :param oci_region: Target OCI Region 49 | :type oci_region: str 50 | :param password: Database password for user_id 51 | :type password: str 52 | :param user_id: User ID for Database login 53 | :type user_id: str 54 | :param wallet_location: Filesystem location for wallet files 55 | :param wallet_location: str 56 | """ 57 | 58 | @apply_defaults 59 | def __init__(self, 60 | compartment_ocid: str, 61 | bucket_name: str, 62 | object_name: str, 63 | tns_admin_root: Optional[str] = None, 64 | database_ocid: Optional[str] = None, 65 | db_workload: Optional[str] = None, 66 | db_name: Optional[str] = None, 67 | debug: Optional[bool] = False, 68 | dsn: Optional[str] = None, 69 | oci_conn_id: Optional[str] = "oci_default", 70 | oci_region: Optional[str] = None, 71 | password: Optional[str] = None, 72 | user_id: Optional[str] = None, 73 | wallet_location: Optional[str] = None, 74 | *args, 75 | **kwargs): 76 | super(OCIDBCopyFromObject, self).__init__(*args, **kwargs) 77 | self.compartment_id = compartment_ocid 78 | self.bucket_name = bucket_name 79 | self.object_name = object_name 80 | self.tns_admin_root = tns_admin_root 81 | self.database_id = database_ocid 82 | self.db_workload = db_workload 83 | self.db_name = db_name 84 | self.debug = debug 85 | self.dsn = dsn 86 | self.oci_conn_id = oci_conn_id 87 | self.oci_region = oci_region 88 | self.password = password 89 | self.user_id = user_id 90 | self.wallet_location = wallet_location 91 | self._oci_hook = None 92 | self._oci_storage_hook = None 93 | self.oci_client = oci.database.DatabaseClient 94 | 95 | def execute(self, context, **kwargs): 96 | try: 97 | self._oci_hook = OCIDBHook(compartment_ocid=self.compartment_id, db_name=self.db_name, 98 | db_workload=self.db_workload, tns_admin_root=self.tns_admin_root, 99 | wallet_location=self.wallet_location) 100 | self._oci_storage_hook = OCIObjectStorageHook(compartment_id=self.compartment_id, 101 | bucket_name=self.bucket_name) 102 | self.log.info("Relocalizing sqlnet.ora") 103 | self._oci_hook.relocalize_sqlnet() 104 | self.log.info("Sqlnet.ora relocalized to {0}".format(self.tns_admin_root)) 105 | self.log.info("Establishing DB Connection") 106 | with self._oci_hook.connect_sqlalchemy(dsn=self.dsn, user=self.user_id, password=self.password) as conn: 107 | namespace = self._oci_storage_hook.get_namespace(compartment_id=self.compartment_id) 108 | object_contents = self._oci_storage_hook.read_from_bucket(bucket_name=self.bucket_name, 109 | namespace_name=namespace, 110 | object_name=self.object_name) 111 | dff = pd.DataFrameFactory(conn) 112 | dff.write(object_contents, name=self.object_name, if_exists='replace') 113 | except AirflowException as e: 114 | self.log.error(e.response["Error"]["Message"]) 115 | 116 | -------------------------------------------------------------------------------- /scripts/plugins/operators/oci_adb.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | import oci 19 | import cx_Oracle 20 | import gzip 21 | import pandas as pd 22 | from typing import Optional 23 | from hooks.oci_adb import OCIDBHook 24 | from airflow.models.baseoperator import BaseOperator 25 | from airflow.utils.decorators import apply_defaults 26 | from airflow.exceptions import AirflowException 27 | 28 | 29 | class OCIDBOperator(BaseOperator): 30 | """ 31 | Execute SQL on OCI ADB/ADW 32 | 33 | :param compartment_id: Target compartment OCID 34 | :type compartment_id: str 35 | :param tns_admin_root: The wallet root directory. The wallet will be loaded from $TNS_ADMIN/sqlnet.ora. 36 | If you do not set tns_admin_root, it is assumed to be in your environment. 37 | :type tns_admin_root: str 38 | :param database_ocid: Database ID 39 | :type database_ocid: str 40 | :param db_workload: DB Workload type, valid options are DW or OLTP 41 | :type str: 42 | :param db_name: Databse Name (Not display) 43 | :type db_name: str 44 | :param debug: Whether to display debug output 45 | :type debug: bool 46 | :param dsn: DSN (TNS Name) for connection 47 | :type dsn: str 48 | :param oci_conn_id: Airflow connection ID 49 | :type oci_conn_id: str 50 | :param oci_region: Target OCI Region 51 | :type oci_region: str 52 | :param password: Database password for user_id 53 | :type password: str 54 | :param user_id: User ID for Database login 55 | :type user_id: str 56 | :param wallet_location: Filesystem location for wallet files 57 | :param wallet_location: str 58 | :param single_sql: Single-line SQL to execute on the database with cx_Oracle cursor.execute 59 | :type single_sql: str 60 | :param many_sql: Batch SQL to execute on the database with cx_Oracle cursor.executemany loading many_sql_data 61 | :type many_sql: str 62 | :param many_sql_data: Data to batch load with cursor.exeecutemany 63 | :param many_sql_data: list 64 | :param kwargs: Additional parameters for cx_Oracle execution 65 | """ 66 | @apply_defaults 67 | def __init__(self, 68 | compartment_ocid: str, 69 | tns_admin_root: Optional[str] = None, 70 | database_ocid: Optional[str] = None, 71 | db_workload: Optional[str] = None, 72 | db_name: Optional[str] = None, 73 | debug: Optional[bool] = False, 74 | dsn: Optional[str] = None, 75 | oci_conn_id: Optional[str] = "oci_default", 76 | oci_region: Optional[str] = None, 77 | password: Optional[str] = None, 78 | user_id: Optional[str] = None, 79 | wallet_location: Optional[str] = None, 80 | single_sql: Optional[str] = None, 81 | many_sql: Optional[str] = None, 82 | many_sql_data: Optional[list] = None, 83 | *args, 84 | **kwargs): 85 | super(OCIDBOperator, self).__init__(*args, **kwargs) 86 | self.compartment_id = compartment_ocid 87 | self.tns_admin_root = tns_admin_root 88 | self.database_id = database_ocid 89 | self.db_workload = db_workload 90 | self.db_name = db_name 91 | self.debug = debug 92 | self.dsn = dsn 93 | self.oci_conn_id = oci_conn_id 94 | self.oci_region = oci_region 95 | self.password = password 96 | self.user_id = user_id 97 | self.wallet_location = wallet_location 98 | self.single_sql = single_sql 99 | self.many_sql = many_sql 100 | self.many_sql_data = many_sql_data 101 | self.oci_client = oci.database.DatabaseClient 102 | 103 | def execute(self, context, **kwargs): 104 | try: 105 | self._oci_hook = OCIDBHook(compartment_ocid=self.compartment_id, db_name=self.db_name, 106 | db_workload=self.db_workload, tns_admin_root=self.tns_admin_root, 107 | wallet_location=self.wallet_location) 108 | db_id = self._oci_hook.get_ocid_by_name(db_name=self.db_name) 109 | self.log.info("{0} Database ID: {1}".format(self.db_name, db_id)) 110 | self.log.info("Relocalizing sqlnet.ora") 111 | self._oci_hook.relocalize_sqlnet() 112 | self.log.info("Sqlnet.ora relocalized to {0}".format(self.tns_admin_root)) 113 | self.log.info("Establishing DB Connection") 114 | with self._oci_hook.connect(user=self.user_id, password=self.password) as conn: 115 | cursor = conn.cursor() 116 | if self.single_sql is not None: 117 | if self.debug is True: 118 | self.log.info("Running Single SQL {}".format(self.single_sql)) 119 | cursor.execute(self.single_sql, **kwargs) 120 | if self.many_sql is not None: 121 | if self.debug is True: 122 | self.log.info("Running Many SQL {}".format(self.many_sql)) 123 | cursor.prepare(self.many_sql) 124 | cursor.executemany(None, self.many_sql_data, **kwargs) 125 | conn.commit() 126 | except AirflowException as e: 127 | self.log.error(e.response["Error"]["Message"]) 128 | 129 | -------------------------------------------------------------------------------- /scripts/plugins/hooks/oci_base.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | """ 19 | This module contains Base Oracle Cloud Infrastructure (OCI) Hook. 20 | """ 21 | from os import path 22 | from typing import Optional 23 | import oci 24 | from airflow.exceptions import AirflowException 25 | from airflow.hooks.base_hook import BaseHook 26 | 27 | 28 | class OCIBaseHook(BaseHook): 29 | """ 30 | Interact with OCI 31 | This class is a thin wrapper around the OCI Python SDK 32 | 33 | :param oci_conn_id: The OCI connection profile used for Airflow connection. 34 | :type oci_conn_id: str 35 | :param config: OCI API Access Configuration - usually read from Airflow but can be provided. 36 | :type config: dict 37 | :param verify: Whether or not to verify SSL certificates. 38 | :type verify: str or bool 39 | 40 | How to Set OCI configuration 41 | For detail on the contents of the default config file, see 42 | https://docs.cloud.oracle.com/en-us/iaas/Content/API/Concepts/sdkconfig.htm 43 | If you don't want to use a file, populate values as detailed here 44 | https://oracle-cloud-infrastructure-python-sdk.readthedocs.io/en/latest/configuration.html 45 | Fallback to Instance Principals if not using config files or passed parameters 46 | """ 47 | 48 | def __init__(self, 49 | oci_conn_id: Optional[str] = "oci_default", 50 | verify: Optional[bool] = None 51 | ): 52 | super(OCIBaseHook, self).__init__() 53 | self.oci_conn_id = oci_conn_id 54 | self.config = None 55 | self.client_kwargs = None 56 | self.signer = None 57 | self.verify = verify 58 | 59 | def get_config(self): 60 | try: 61 | try: 62 | connection_object = self.get_connection(self.oci_conn_id) 63 | extra_config = connection_object.extra_dejson 64 | if extra_config.get("extra__oci__tenancy"): 65 | self.config = { 66 | "log_requests": False, 67 | "additional_user_agent": '', 68 | "pass_phrase": None, 69 | "user": connection_object.login, 70 | "fingerprint": extra_config["extra__oci__fingerprint"], 71 | "key_file": extra_config["extra__oci__key_file"], 72 | "tenancy": extra_config["extra__oci__tenancy"], 73 | "region": extra_config["extra__oci__region"] 74 | } 75 | self.client_kwargs = dict() 76 | elif "config_path" in extra_config: 77 | if path.exists(extra_config["config_path"]) is True: 78 | self.config = oci.config.from_file(extra_config["config_path"]) 79 | self.client_kwargs = dict() 80 | else: 81 | raise AirflowException('Config Path %s not found' % extra_config["config_path"]) 82 | elif "service_principal" in extra_config: 83 | self.log.debug("Attempting to use service principal") 84 | self.signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner(dict(purpose="SERVICE_PRINCIPAL")) 85 | self.client_kwargs = dict(signer=self.signer) 86 | self.config = { 87 | "tenancy": self.signer.tenancy_id, 88 | "region": self.signer.region, 89 | } 90 | else: 91 | self.log.info("Failed to find valid oci config in Airflow, falling back to Instance Principals") 92 | self.signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner() 93 | self.client_kwargs = dict(signer=self.signer) 94 | self.config = { 95 | "tenancy": self.signer.tenancy_id, 96 | "region": self.signer.region, 97 | } 98 | except: 99 | self.log.info("Failed to find valid oci config in Airflow, falling back to Instance Principals") 100 | self.signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner() 101 | self.client_kwargs = dict(signer=self.signer) 102 | self.config = { 103 | "tenancy": self.signer.tenancy_id, 104 | "region": self.signer.region, 105 | } 106 | except AirflowException as e: 107 | self.log.error("All attempts to get valid configuration failed") 108 | self.log.error(str(e)) 109 | raise e 110 | return self.config, self.client_kwargs 111 | 112 | def validate_config(self): 113 | from oci.config import validate_config 114 | try: 115 | validate_config(self.config, **self.client_kwargs) 116 | self.identity = oci.identity.IdentityClient(self.config, **self.client_kwargs) 117 | if "user" in self.config: 118 | self.user = self.identity.get_user(self.config["user"]).data 119 | except AirflowException: 120 | self.log.warning("Configuration Validation Failed") 121 | 122 | def get_client(self, client_class): 123 | client, client_kwargs = self.get_config() 124 | return client_class(client, **client_kwargs) 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /scripts/plugins/sensors/oci_object_storage.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language 17 | # governing permissions and limitations 18 | # under the License. 19 | 20 | from airflow.sensors.base_sensor_operator import BaseSensorOperator 21 | from airflow.utils.decorators import apply_defaults 22 | from airflow.exceptions import AirflowException 23 | from hooks.oci_object_storage import OCIObjectStorageHook 24 | 25 | class BaseOCIObjectStorageSensor(BaseSensorOperator): 26 | template_fields = ('prefix', 'object_name', 'bucket_name') 27 | 28 | @apply_defaults 29 | def __init__(self, 30 | compartment_ocid = None, 31 | bucket_name = None, 32 | object_name = None, 33 | prefix = None, 34 | namespace_name = None, 35 | oci_conn_id = 'oci_default', 36 | verify = None, 37 | *args, 38 | **kwargs): 39 | super().__init__(*args, **kwargs) 40 | if type(self).__name__ == "OCIObjectStorageSensor": 41 | if object_name is None: 42 | raise AirflowException('Please provide object_name') 43 | self.object_name = object_name 44 | self.prefix = None 45 | elif type(self).__name__ == "OCIObjectStoragePrefixSensor": 46 | if prefix is None: 47 | raise AirflowException('Please provide prefix') 48 | self.object_name = None 49 | self.prefix = prefix 50 | if bucket_name is None: 51 | raise AirflowException('Please provide bucket_name') 52 | self.compartment_id = compartment_ocid 53 | self.bucket_name = bucket_name 54 | self.oci_conn_id = oci_conn_id 55 | self.verify = verify 56 | self.namespace_name = namespace_name 57 | self._oci_hook = None 58 | 59 | def poke(self, context): 60 | raise Exception("Class did not implement poke method") 61 | 62 | def list_objects(self, file, prefix_match=False): 63 | hook = self.get_oci_hook() 64 | if not self.namespace_name: 65 | self.namespace_name = hook.get_namespace(compartment_id=self.compartment_id) 66 | object_store_client = hook.get_client(hook.oci_client) 67 | base_arguments = dict( 68 | bucket_name=self.bucket_name, 69 | fields="size", 70 | limit=100, 71 | namespace_name=self.namespace_name, 72 | prefix=file, 73 | ) 74 | objectsummary = object_store_client.list_objects(**base_arguments) 75 | 76 | # For exact match we only consider the first match. 77 | if prefix_match == False: 78 | first_match = objectsummary.data.objects[0] 79 | if first_match.name == file: 80 | return 1, first_match.size 81 | return 0, 0 82 | 83 | # Prefix mode: Build a list of matching files. 84 | total_files = 0 85 | total_size = 0 86 | while True: 87 | object_list = objectsummary.data 88 | for object in object_list.objects: 89 | total_files += 1 90 | total_size += object.size 91 | if object_list.next_start_with is None: 92 | break 93 | base_arguments["next_start_with"] = object_list.next_start_with 94 | object_store_client.list_object(**base_arguments) 95 | return total_files, total_size 96 | 97 | def get_oci_hook(self): 98 | """ 99 | Create and return OCI Hook 100 | :return: 101 | """ 102 | if not self._oci_hook: 103 | self._oci_hook = OCIObjectStorageHook(bucket_name=self.bucket_name, compartment_id=self.compartment_id, 104 | oci_conn_id=self.oci_conn_id, verify=self.verify) 105 | return self._oci_hook 106 | 107 | 108 | class OCIObjectStorageSensor(BaseOCIObjectStorageSensor): 109 | """ 110 | Sensor to interact with OCI Object Storage 111 | """ 112 | 113 | def __init__(self, *args, **kwargs): 114 | super().__init__(*args, **kwargs) 115 | 116 | def poke(self, context): 117 | self.log.info('Poking for object %s in bucket %s', self.object_name, self.bucket_name) 118 | try: 119 | total_files, total_size = self.list_objects(self.prefix, prefix_match=False) 120 | if total_files > 0: 121 | self.log.info('Found object of size %d', total_size) 122 | context['task_instance'].xcom_push('oci_storage_sensor_size', total_size) 123 | return True 124 | self.log.info('Object not found') 125 | return False 126 | 127 | except AirflowException as e: 128 | self.log.error(e.response["Error"]["Message"]) 129 | 130 | 131 | class OCIObjectStoragePrefixSensor(BaseOCIObjectStorageSensor): 132 | """ 133 | Prefix sensor for OCI Object Storage 134 | """ 135 | 136 | def __init__(self, *args, **kwargs): 137 | super().__init__(*args, **kwargs) 138 | 139 | def poke(self, context): 140 | self.log.info('Poking for prefix [%s] in bucket %s', self.prefix, self.bucket_name) 141 | try: 142 | total_files, total_size = self.list_objects(self.prefix, prefix_match=True) 143 | 144 | # If we matched anything, record file count, total size and return true. 145 | if total_files > 0: 146 | self.log.info('Found %d objects with total size %d', total_files, total_size) 147 | context['task_instance'].xcom_push('oci_prefix_total_files', total_files) 148 | context['task_instance'].xcom_push('oci_prefix_total_size', total_size) 149 | return True 150 | self.log.info('No matching objects') 151 | return False 152 | 153 | except AirflowException as e: 154 | self.log.error(e.response["Error"]["Message"]) 155 | 156 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DEPRECATED 2 | 3 | Note that use of this repository is deprecated in favor of [OKE-Airflow](https://github.com/oracle-quickstart/oke-airflow). This is primarily due to lack of scheduler HA in an IaaS deployment of Airflow, along with the flexibility you get by using KubernetesExecutor for DAG execution, which is a much more efficient use of resources. 4 | 5 | # OCI-Airflow 6 | [Apache Airflow](https://airflow.apache.org/) on Oracle Cloud Infrastructure 7 | 8 | This Quick Start uses [OCI Resource Manager](https://docs.cloud.oracle.com/iaas/Content/ResourceManager/Concepts/resourcemanager.htm)(ORM) to make deployment quite easy. Simply [download the latest .zip](https://github.com/oracle-quickstart/oci-airflow/archive/master.zip) and follow the [Resource Manager instructions](https://docs.cloud.oracle.com/en-us/iaas/Content/ResourceManager/Tasks/managingstacksandjobs.htm) for how to build a stack. 9 | 10 | **This deployment requires using the included schema.yaml file with ORM.** The GitHub zip must be repackaged so that it's contents are top-level prior to creating the ORM Stack for the schema to be read. This is a straight forward process: 11 | 12 | unzip oci-airflow-master.zip 13 | cd oci-airflow-master 14 | zip -r oci-airflow.zip * 15 | 16 | Use the `oci-airflow.zip` file created in the last step to create the ORM Stack. The schema file can even be customized for your use, enabling you to build a set of approved variables for deployment if desired. 17 | 18 | This template will build VCN/Subnets as part of deployment, but also has options for using pre-existing VCN/Subnets. If using pre-existing network topology, ensure you have a security list entry allowing port TCP 8080 ingress/egress for access to the Airflow UI. Also ensure a gateway is present to allow Internet access for the Airflow host, as Airflow is downloaded and compiled as part of deployment using options selected in the Resource Manager schema. 19 | 20 | ## Deployment customization 21 | The schema file offers advanced deployment options. When enabled you can select which airflow libraries are installed during deployment, choose which executor you want to use, and customize other deployment parameters for metadata database and web UI security. Default libraries are for SSH, Oracle, and MySQL. Note that the apache-airflow[mysql] package is required for installation. If disabled this will result in a deployment failure. 22 | 23 | ## Metadata Database 24 | 25 | ### mysql-local 26 | This template uses a community edition of MySQL for Airflow metadata. This is downloaded and installed during provisioning. The default root database password is set in the [master_boot.sh](https://github.com/oracle-quickstart/oci-airflow/blob/master/scripts/master_boot.sh#L256) which is run in CloudInit. It's highly suggested you change the password either prior to deployment, or afterwards to something more secure. 27 | 28 | ### mysql-oci 29 | This deploys a MySQL DB instance on OCI and uses it for metadata in Airflow. You will need to set some Secret Vault values prior to deployment for this to work, see the Security section below. 30 | 31 | ### oracle 32 | *In Development* - This requires some updates to Alembic to work properly, bootstrapping the database currently fails. 33 | 34 | ## Celery for parallelized execution 35 | This template also supports celery executor to parallelize execution among multiple workers. If using celery and pre-existing VCN/Subnet, ensure a security list entry is present allowing TCP 5555 ingress/egress for the Flower UI on the Airflow master. 36 | 37 | See the Security section below for detail on synchronization of Fernet Key among cluster hosts. 38 | 39 | ### FSS 40 | OCI Filesystem Service is offered when using celery. Enabling this will create an NFS mount on each host in the cluster for `/opt/airflow/dags`. This provides a single location to manage DAGs in the cluster, and ensures any changes will be in sync among all cluster hosts. 41 | 42 | ## OCI Hooks, Operators, Sensors 43 | This template automatically downloads and installs hooks, operators, and sensors for OCI services into `/opt/airflow/plugins`. These plugins are fetched remotely by the airflow master instance from this github repository using `wget` as part of the CloudInit deployment. Long term these hooks, operators and sensors will be committed upstream to Apache Airflow and be included as part of the native deployment. When using Celery, workers will also fetch these files during deployment. When using FSS, example Dags are only fetched by the Master host - otherwise all hosts will also download these to `/opt/airflow/dags`. 44 | 45 | ## Security 46 | [Instance Principals](https://docs.cloud.oracle.com/en-us/iaas/Content/Identity/Tasks/callingservicesfrominstances.htm) needs to be enabled for all functionality below. This is offered as part of deployment, but you may need to have your tenancy administrator enable policies for you if you don't have privileges to the tenancy root. 47 | 48 | This template offers basic [Airflow security](https://airflow.apache.org/docs/stable/security.html) when deploying using ORM. Click Advanced Options > Enable Security to enable local password auth for the Airflow UI. The password for this needs to be setup in OCI Secrets Vault prior to deployment. 49 | 50 | See [Overview of Vault](https://docs.cloud.oracle.com/en-us/iaas/Content/KeyManagement/Concepts/keyoverview.htm) for more information on how to setup and configure a Vault. 51 | 52 | The Secrets Vault should be in the same compartment where you are deploying Airflow, and should use the following syntax: 53 | 54 | ![Airflow Secrets](images/SecretsExample.png) 55 | 56 | * AirflowPassword - Password for the Airflow Web UI 57 | * AirflowUsername - Username for the Airflow Web UI 58 | * AirflowFernetKey - [Generate a Fernet Key](https://bcb.github.io/airflow/fernet-key) which is synchronized for celery deployments. 59 | * AirflowDBUsername - Username for the Metadata Database (not used in mysql-local) 60 | * AirflowDBPassword - Password for the Metadata Database (not useed in mysql-local) 61 | 62 | ## Logging 63 | Deployment activities are logged to `/var/log/OCI-airflow-initialize.log` 64 | 65 | This should provide some detail on installation process. Note that the Airflow UI is not immediately available, as the binaries are compiled as part of deployment. Watching the log file will tell you when the deployment is complete and the Airflow UI is available: 66 | 67 | `sudo tail -f /var/log/OCI-airflow-initialize.log` 68 | 69 | ## SystemD 70 | There are daemon scripts setup as part of deployment. Airflow can be controlled using systemd commands: 71 | 72 | systemctl (start|stop|status|restart) airflow-webserver 73 | systemctl (start|stop|status|restart) airflow-scheduler 74 | 75 | Also if using celery, the flower service is present on the airflow master, as well as an airflow-worker service on worker nodes. 76 | 77 | systemctl (start|stop|status|restart) flower 78 | systemctl (start|stop|status|restart) airflow-worker 79 | 80 | All services are started during deployment and set to start at boot using chkconfig. 81 | -------------------------------------------------------------------------------- /modules/network/main.tf: -------------------------------------------------------------------------------- 1 | resource "oci_core_vcn" "data_vcn" { 2 | count = var.useExistingVcn ? 0 : 1 3 | cidr_block = "${var.VPC_CIDR}" 4 | compartment_id = "${var.compartment_ocid}" 5 | display_name = "data_vcn" 6 | dns_label = "${var.vcn_dns_label}" 7 | } 8 | 9 | resource "oci_core_internet_gateway" "data_internet_gateway" { 10 | count = var.useExistingVcn ? 0 : 1 11 | compartment_id = "${var.compartment_ocid}" 12 | display_name = "data_internet_gateway" 13 | vcn_id = "${var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.data_vcn.0.id}" 14 | } 15 | 16 | resource "oci_core_nat_gateway" "nat_gateway" { 17 | count = var.useExistingVcn ? 0 : 1 18 | compartment_id = "${var.compartment_ocid}" 19 | vcn_id = "${var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.data_vcn.0.id}" 20 | display_name = "nat_gateway" 21 | } 22 | 23 | resource "oci_core_service_gateway" "data_service_gateway" { 24 | count = var.useExistingVcn ? 0 : 1 25 | compartment_id = "${var.compartment_ocid}" 26 | services { 27 | service_id = "${lookup(data.oci_core_services.all_svcs_moniker[count.index].services[0], "id")}" 28 | } 29 | vcn_id = "${var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.data_vcn.0.id}" 30 | display_name = "Cloudera Service Gateway" 31 | } 32 | 33 | resource "oci_core_route_table" "RouteForComplete" { 34 | count = var.useExistingVcn ? 0 : 1 35 | compartment_id = "${var.compartment_ocid}" 36 | vcn_id = "${var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.data_vcn.0.id}" 37 | display_name = "RouteTableForComplete" 38 | 39 | route_rules { 40 | destination = "0.0.0.0/0" 41 | destination_type = "CIDR_BLOCK" 42 | network_entity_id = "${oci_core_internet_gateway.data_internet_gateway.*.id[count.index]}" 43 | } 44 | } 45 | 46 | resource "oci_core_route_table" "private" { 47 | count = var.useExistingVcn ? 0 : 1 48 | compartment_id = "${var.compartment_ocid}" 49 | vcn_id = "${var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.data_vcn.0.id}" 50 | display_name = "private" 51 | 52 | route_rules { 53 | destination = "${var.oci_service_gateway}" 54 | destination_type = "SERVICE_CIDR_BLOCK" 55 | network_entity_id = "${oci_core_service_gateway.data_service_gateway.*.id[count.index]}" 56 | } 57 | 58 | route_rules { 59 | destination = "0.0.0.0/0" 60 | destination_type = "CIDR_BLOCK" 61 | network_entity_id = "${oci_core_nat_gateway.nat_gateway.*.id[count.index]}" 62 | } 63 | } 64 | 65 | resource "oci_core_security_list" "PublicSubnet" { 66 | count = var.useExistingVcn ? 0 : 1 67 | compartment_id = "${var.compartment_ocid}" 68 | display_name = "Public Subnet" 69 | vcn_id = "${var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.data_vcn.0.id}" 70 | 71 | egress_security_rules { 72 | destination = "0.0.0.0/0" 73 | protocol = "6" 74 | } 75 | 76 | egress_security_rules { 77 | protocol = "17" 78 | destination = "0.0.0.0/0" 79 | 80 | udp_options { 81 | min = 111 82 | max = 111 83 | } 84 | } 85 | 86 | ingress_security_rules { 87 | tcp_options { 88 | max = 22 89 | min = 22 90 | } 91 | 92 | protocol = "6" 93 | source = "0.0.0.0/0" 94 | } 95 | 96 | ingress_security_rules { 97 | tcp_options { 98 | max = 8080 99 | min = 8080 100 | } 101 | 102 | protocol = "6" 103 | source = "0.0.0.0/0" 104 | } 105 | 106 | ingress_security_rules { 107 | tcp_options { 108 | max = 5555 109 | min = 5555 110 | } 111 | 112 | protocol = "6" 113 | source = "0.0.0.0/0" 114 | } 115 | 116 | ingress_security_rules { 117 | protocol = "6" 118 | source = "${var.VPC_CIDR}" 119 | } 120 | 121 | ingress_security_rules { 122 | tcp_options { 123 | min = 2048 124 | max = 2050 125 | } 126 | protocol = "6" 127 | source = "${var.VPC_CIDR}" 128 | } 129 | 130 | ingress_security_rules { 131 | udp_options { 132 | min = 2048 133 | max = 2048 134 | } 135 | protocol = "17" 136 | source = "${var.VPC_CIDR}" 137 | } 138 | 139 | ingress_security_rules { 140 | tcp_options { 141 | min = 111 142 | max = 111 143 | } 144 | protocol = "6" 145 | source = "${var.VPC_CIDR}" 146 | } 147 | 148 | ingress_security_rules { 149 | udp_options { 150 | min = 111 151 | max = 111 152 | } 153 | protocol = "17" 154 | source = "${var.VPC_CIDR}" 155 | } 156 | } 157 | 158 | resource "oci_core_security_list" "PrivateSubnet" { 159 | count = var.useExistingVcn ? 0 : 1 160 | compartment_id = "${var.compartment_ocid}" 161 | display_name = "Private" 162 | vcn_id = "${var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.data_vcn.0.id}" 163 | 164 | egress_security_rules { 165 | destination = "0.0.0.0/0" 166 | protocol = "6" 167 | } 168 | 169 | egress_security_rules { 170 | protocol = "6" 171 | destination = "${var.VPC_CIDR}" 172 | } 173 | 174 | egress_security_rules { 175 | protocol = "17" 176 | destination = "0.0.0.0/0" 177 | 178 | udp_options { 179 | min = 111 180 | max = 111 181 | } 182 | } 183 | 184 | ingress_security_rules { 185 | protocol = "6" 186 | source = "${var.VPC_CIDR}" 187 | } 188 | 189 | ingress_security_rules { 190 | tcp_options { 191 | min = 2048 192 | max = 2050 193 | } 194 | protocol = "6" 195 | source = "${var.VPC_CIDR}" 196 | } 197 | 198 | ingress_security_rules { 199 | udp_options { 200 | min = 2048 201 | max = 2048 202 | } 203 | protocol = "17" 204 | source = "${var.VPC_CIDR}" 205 | } 206 | 207 | ingress_security_rules { 208 | tcp_options { 209 | min = 111 210 | max = 111 211 | } 212 | protocol = "6" 213 | source = "${var.VPC_CIDR}" 214 | } 215 | 216 | ingress_security_rules { 217 | udp_options { 218 | min = 111 219 | max = 111 220 | } 221 | protocol = "17" 222 | source = "${var.VPC_CIDR}" 223 | } 224 | } 225 | 226 | resource "oci_core_subnet" "public" { 227 | count = var.useExistingVcn ? 0 : 1 228 | availability_domain = "${var.availability_domain}" 229 | cidr_block = "${cidrsubnet(var.VPC_CIDR, 8, 1)}" 230 | display_name = "public" 231 | compartment_id = "${var.compartment_ocid}" 232 | vcn_id = "${var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.data_vcn.0.id}" 233 | route_table_id = "${oci_core_route_table.RouteForComplete[count.index].id}" 234 | security_list_ids = ["${oci_core_security_list.PublicSubnet.*.id[count.index]}"] 235 | dhcp_options_id = "${oci_core_vcn.data_vcn[count.index].default_dhcp_options_id}" 236 | dns_label = "public" 237 | } 238 | 239 | resource "oci_core_subnet" "private" { 240 | count = var.useExistingVcn ? 0 : 1 241 | availability_domain = "${var.availability_domain}" 242 | cidr_block = "${cidrsubnet(var.VPC_CIDR, 8, 2)}" 243 | display_name = "private" 244 | compartment_id = "${var.compartment_ocid}" 245 | vcn_id = "${var.useExistingVcn ? var.custom_vcn[0] : oci_core_vcn.data_vcn.0.id}" 246 | route_table_id = "${oci_core_route_table.private[count.index].id}" 247 | security_list_ids = ["${oci_core_security_list.PrivateSubnet.*.id[count.index]}"] 248 | dhcp_options_id = "${oci_core_vcn.data_vcn[count.index].default_dhcp_options_id}" 249 | prohibit_public_ip_on_vnic = "true" 250 | dns_label = "private" 251 | } 252 | 253 | -------------------------------------------------------------------------------- /scripts/plugins/hooks/oci_object_storage.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | import oci 18 | from typing import Optional 19 | from hooks.oci_base import OCIBaseHook 20 | from airflow.exceptions import AirflowException 21 | 22 | 23 | class OCIObjectStorageHook(OCIBaseHook): 24 | """ 25 | Interact with OCI Object Storage 26 | 27 | :param compartment_id: Target compartment OCID 28 | :type compartment_id: str 29 | :param bucket_name: Target bucket name 30 | :type bucket_name: str 31 | :param namespace_name: Namespace name 32 | :type namespace_name: str 33 | :param oci_conn_id: Airflow connection ID 34 | :type oci_conn_id: str 35 | :param args: Additional arguments 36 | :param kwargs: Additional arguments 37 | """ 38 | def __init__(self, 39 | compartment_id: str, 40 | bucket_name: Optional[str] = None, 41 | namespace_name: Optional[str] = None, 42 | oci_conn_id: Optional[str] = "oci_default", 43 | *args, 44 | **kwargs): 45 | super(OCIObjectStorageHook, self).__init__(*args, **kwargs) 46 | self.bucket_name = bucket_name 47 | self.oci_conn_id = oci_conn_id 48 | self.compartment_id = compartment_id 49 | self.namespace_name = namespace_name 50 | self.oci_client = oci.object_storage.ObjectStorageClient 51 | 52 | def get_namespace(self, compartment_id=None): 53 | """ 54 | Get OCI Object Storage Namespace using config 55 | :param compartment_id: Compartment OCID 56 | :type compartment_id: str 57 | :return: Object Storage Namespace Name 58 | :rtype: str 59 | """ 60 | try: 61 | self.namespace_name = self.get_client(self.oci_client).get_namespace(compartment_id=self.compartment_id).data 62 | return self.namespace_name 63 | except AirflowException as e: 64 | self.log.error(e.response["Error"]["Message"]) 65 | 66 | 67 | def check_for_bucket(self, bucket_name=None, namespace_name=None): 68 | """ 69 | Check if bucket_name exists 70 | :param bucket_name: Target bucket name 71 | :param namespace_name: Object Storage Namespace 72 | :return: True if exists, False if not 73 | :rtype: bool 74 | """ 75 | try: 76 | bucketsummary = self.get_client(self.oci_client).list_buckets(namespace_name=self.namespace_name, 77 | compartment_id=self.compartment_id) 78 | bucket_list = bucketsummary.data 79 | for bucket in bucket_list: 80 | if bucket.name == self.bucket_name: 81 | return True 82 | else: 83 | continue 84 | return False 85 | except AirflowException as e: 86 | self.log.error(e.response["Error"]["Message"]) 87 | 88 | def check_for_object(self, object_name, bucket_name=None, namespace_name=None, **kwargs): 89 | """ 90 | Check if Object exists in Bucket 91 | :param bucket_name: Target Bucket name 92 | :param namespace_name: Object Storage Namespace 93 | :param object_name: Name of Object in Bucket to check if exists 94 | :return: True if exists, False if not 95 | :rtype: bool 96 | """ 97 | if bucket_name is None: 98 | bucket_name = self.bucket_name 99 | if namespace_name is None: 100 | namespace_name = self.namespace_name 101 | try: 102 | # TODO: You might only need to check the first returned object. 103 | next_start_with = None 104 | while True: 105 | objectsummary = self.get_client(self.oci_client).list_objects(namespace_name=namespace_name, 106 | bucket_name=bucket_name, 107 | prefix=object_name, 108 | start_after=next_start_with, 109 | **kwargs) 110 | object_list = objectsummary.data 111 | for object in object_list.objects: 112 | if object.name == object_name: 113 | return True 114 | if object_list.next_start_with is None: 115 | return False 116 | next_start_with = object_list.next_start_with 117 | except AirflowException as e: 118 | self.log.error(e.response["Error"]["Message"]) 119 | 120 | def copy_to_bucket(self, bucket_name=None, namespace_name=None, put_object_body=None, object_name=None, 121 | **kwargs): 122 | """ 123 | Copy source data to bucket using put_object 124 | :param bucket_name: Target bucket 125 | :type bucket_name: str 126 | :param namespace_name: Namespace name 127 | :type namespace_name: str 128 | :param put_object_body: The object to upload to the object store 129 | :type put_object_body: stream 130 | :param object_name: Name of object to be created in bucket 131 | :type object_name: str 132 | :return: Response object with data type None 133 | """ 134 | try: 135 | self.get_client(self.oci_client).put_object(bucket_name=self.bucket_name, namespace_name=self.namespace_name, 136 | put_object_body=put_object_body, object_name=object_name, 137 | **kwargs) 138 | except AirflowException as e: 139 | self.log.error(e.response["Error"]["Message"]) 140 | 141 | def read_from_bucket(self, bucket_name=None, namespace_name=None, object_name=None, **kwargs): 142 | """ 143 | Read object from bucket and return contents 144 | :param bucket_name: Target bucket 145 | :type bucket_name: str 146 | :param namespace_name: Namespace name 147 | :type namespace_name: str 148 | :param put_object_body: The object to upload to the object store 149 | :type put_object_body: stream 150 | :param object_name: Name of object to be created in bucket 151 | :type object_name: str 152 | :param kwargs: additional arguments 153 | :return: Response object with data type stream 154 | """ 155 | try: 156 | object_data = self.get_client(self.oci_client).get_object(bucket_name=self.bucket_name, 157 | namespace_name=self.namespace_name, 158 | object_name=object_name, **kwargs).data 159 | return object_data 160 | except AirflowException as e: 161 | self.log.error(e.response["Error"]["Message"]) 162 | 163 | -------------------------------------------------------------------------------- /scripts/plugins/hooks/oci_adb.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | import oci 19 | import os 20 | import re 21 | import cx_Oracle 22 | from typing import Optional 23 | from sqlalchemy import create_engine 24 | from hooks.oci_base import OCIBaseHook 25 | from airflow.exceptions import AirflowException 26 | 27 | 28 | class OCIDBHook(OCIBaseHook): 29 | """ 30 | Interact with Databases on OCI 31 | 32 | :param compartment_id: Target compartment OCID 33 | :type compartment_id: str 34 | :param tns_admin_root: The wallet root directory. The wallet will be loaded from $TNS_ADMIN/sqlnet.ora. 35 | If you do not set tns_admin_root, it is assumed to be in your environment. 36 | :type tns_admin_root: str 37 | :param database_ocid: Database ID 38 | :type database_ocid: str 39 | :param db_workload: DB Workload type, valid options are DW or OLTP 40 | :type str: 41 | :param db_name: Databse Name (Not display) 42 | :type db_name: str 43 | :param debug: Whether to display debug output 44 | :type debug: bool 45 | :param dsn: DSN (TNS Name) for connection 46 | :type dsn: str 47 | :param oci_conn_id: Airflow connection ID 48 | :type oci_conn_id: str 49 | :param oci_region: Target OCI Region 50 | :type oci_region: str 51 | :param password: Database password for user_id 52 | :type password: str 53 | :param user_id: User ID for Database login 54 | :type user_id: str 55 | :param wallet_location: Filesystem location for wallet files 56 | :param wallet_location: str 57 | """ 58 | def __init__(self, 59 | compartment_ocid: str, 60 | tns_admin_root: Optional[str] = None, 61 | database_ocid: Optional[str] = None, 62 | db_workload: Optional[str] = None, 63 | db_name: Optional[str] = None, 64 | debug: Optional[bool] = False, 65 | dsn: Optional[str] = None, 66 | oci_conn_id: Optional[str] = "oci_default", 67 | oci_region: Optional[str] = None, 68 | password: Optional[str] = None, 69 | user_id: Optional[str] = None, 70 | wallet_location: Optional[str] = None, 71 | *args, 72 | **kwargs): 73 | super(OCIDBHook, self).__init__(*args, **kwargs) 74 | self.compartment_id = compartment_ocid 75 | self.tns_admin_root = tns_admin_root 76 | self.database_id = database_ocid 77 | self.db_workload = db_workload 78 | self.db_name = db_name 79 | self.debug = debug 80 | self.dsn = dsn 81 | self.oci_conn_id = oci_conn_id 82 | self.oci_region = oci_region 83 | self.password = password 84 | self.user_id = user_id 85 | self.wallet_location = wallet_location 86 | self.oci_client = oci.database.DatabaseClient 87 | 88 | def get_ocid_by_name(self, db_name=None, db_workload=None): 89 | """ 90 | Look up databases by name and return OCID 91 | :param db_name: Target DB Name (Not display name) 92 | :type db_name: str 93 | :param db_workload: Workload type, valid options are DW or OLTP 94 | :type db_workload: str 95 | :return: db_id (OCID) 96 | """ 97 | try: 98 | adb_list = \ 99 | self.get_client(self.oci_client).list_autonomous_databases(compartment_id=self.compartment_id, 100 | db_workload=self.db_workload).data 101 | if self.debug is True: 102 | self.log.info("ADB List: {0}".format(adb_list)) 103 | for db in adb_list: 104 | if db.db_name == self.db_name: 105 | self.database_id = db.id 106 | return db.id 107 | else: 108 | continue 109 | return None 110 | except AirflowException as e: 111 | self.log.error(e.response["Error"]["Message"]) 112 | 113 | def relocalize_sqlnet(self): 114 | """ 115 | Update the path in $TNS_ADMIN/sqlnet.ora to the correct path 116 | """ 117 | if self.tns_admin_root is None: 118 | self.log.error("tns_admin_root not specified or null: {0}".format(self.tns_admin_root)) 119 | else: 120 | os.environ["TNS_ADMIN"] = self.tns_admin_root 121 | file_path = os.path.join(os.environ["TNS_ADMIN"], "sqlnet.ora") 122 | if not os.path.exists(file_path): 123 | raise Exception("{} does not exist".format(file_path)) 124 | with open(file_path, "r") as fd: 125 | self.log.info("Reading sqlnet.ora") 126 | original = fd.read() 127 | # Set the correct path. 128 | modified = re.sub( 129 | 'DIRECTORY="([^"]+)"', 130 | 'DIRECTORY="{}"'.format(self.tns_admin_root), 131 | original, 132 | ) 133 | with open(file_path, "w") as fd: 134 | self.log.info("Writing modified sqlnet.ora") 135 | fd.write(modified) 136 | 137 | def connect(self, **kwargs): 138 | """ 139 | Connect to an Oracle DSN using a wallet. 140 | The wallet will be loaded from $TNS_ADMIN/sqlnet.ora. 141 | If you do not set this, it is assumed to be in your environment. 142 | :param dsn: The TNS name. 143 | :type dns: str 144 | :param tns_admin_root: The wallet root directory. 145 | :type tns_admin_root: str 146 | :param **kwargs: Arbitrary keyword arguments to pass to cx_Oracle.connect. 147 | :return: connection: True if successful, False otherwise. 148 | """ 149 | try: 150 | if self.dsn is None: 151 | if self.db_name is not None: 152 | self.dsn = str(self.db_name) + "_medium" 153 | if self.debug is True: 154 | self.log.info("Connecting to Oracle database with DSN {}".format(self.dsn.lower())) 155 | self.connection = cx_Oracle.connect(dsn=self.dsn.lower(), **kwargs) 156 | else: 157 | self.log.error("DB Name and DSN are null, one of these is required to connect") 158 | else: 159 | if self.debug is True: 160 | self.log.info("Connecting to Oracle database with DSN {}".format(self.dsn)) 161 | self.connection = cx_Oracle.connect(dsn=self.dsn, **kwargs) 162 | return self.connection 163 | except AirflowException as e: 164 | self.log.error(e.response["Error"]["Message"]) 165 | 166 | def connect_sqlalchemy( 167 | self, 168 | url=None, 169 | **kwargs 170 | ): 171 | """ 172 | Create and return a .Engine instance 173 | :param url: String that indicates database dialect and connection arguments 174 | :type url: str 175 | :param kwargs: Additional arguments supported by create_engine 176 | :return: 177 | """ 178 | if url is not None: 179 | self.engine = create_engine(url, **kwargs) 180 | else: 181 | self.engine = create_engine( 182 | "oracle+cx_oracle://{}:{}@{}".format(self.user_id, self.password, self.dsn), **kwargs 183 | ) 184 | return self.engine 185 | 186 | def check_state(self, **kwargs): 187 | """ 188 | Check Database state and return lifecycle_state 189 | :param kwargs: 190 | :return: 191 | """ 192 | db_details = self.get_client(self.oci_client).get_autonomous_database(autonomous_database_id=self.database_id, 193 | **kwargs).data 194 | return db_details.lifecycle_state 195 | -------------------------------------------------------------------------------- /variables.tf: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------------------------------------------------- 2 | # SSH Keys - Put this to top level because they are required 3 | # --------------------------------------------------------------------------------------------------------------------- 4 | 5 | variable "ssh_provided_key" { 6 | default = "" 7 | } 8 | 9 | # --------------------------------------------------------------------------------------------------------------------- 10 | # Network Settings 11 | # --------------------------------------------------------------------------------------------------------------------- 12 | variable "useExistingVcn" { 13 | default = "false" 14 | } 15 | 16 | variable "vcn_cidr" { 17 | default = "" 18 | } 19 | 20 | variable "hide_public_subnet" { 21 | default = "false" 22 | } 23 | variable "hide_private_subnet" { 24 | default = "true" 25 | } 26 | variable "VPC_CIDR" { 27 | default = "10.0.0.0/16" 28 | } 29 | variable "myVcn" { 30 | default = " " 31 | } 32 | 33 | variable "masterSubnet" { 34 | default = " " 35 | } 36 | 37 | variable "clusterSubnet" { 38 | default = " " 39 | } 40 | 41 | variable "vcn_dns_label" { 42 | default = "airflowvcn" 43 | } 44 | 45 | variable "airflow_database" { 46 | default = "mysql" 47 | } 48 | 49 | variable "airflow_options" { 50 | default = "false" 51 | } 52 | 53 | variable "enable_instance_principals" { 54 | default = "false" 55 | } 56 | 57 | variable "enable_fss" { 58 | default = "false" 59 | } 60 | 61 | variable "enable_security" { 62 | default = "false" 63 | } 64 | 65 | variable "mysqladmin_password" { 66 | default = "" 67 | } 68 | 69 | variable "mysqladmin_username" { 70 | default = "mysqladmin" 71 | } 72 | 73 | variable "mysql_shape" { 74 | default = "VM.Standard.E2.2" 75 | } 76 | 77 | variable "enable_mysql_backups" { 78 | default = "false" 79 | } 80 | 81 | variable "oci_mysql_ip" { 82 | default = "10.0.2.8" 83 | } 84 | 85 | # --------------------------------------------------------------------------------------------------------------------- 86 | # ORM Schema variables 87 | # You should modify these based on deployment requirements. 88 | # These default to recommended values 89 | # --------------------------------------------------------------------------------------------------------------------- 90 | 91 | 92 | variable "enable_block_volumes" { 93 | default = "false" 94 | } 95 | 96 | variable "provide_ssh_key" { 97 | default = "true" 98 | } 99 | 100 | variable "master_instance_shape" { 101 | default = "VM.Standard2.4" 102 | } 103 | 104 | variable "worker_instance_shape" { 105 | default = "VM.Standard2.4" 106 | } 107 | 108 | variable "worker_node_count" { 109 | default = "0" 110 | } 111 | 112 | variable "data_blocksize_in_gbs" { 113 | default = "5000" 114 | } 115 | 116 | variable "block_volumes_per_worker" { 117 | default = "1" 118 | } 119 | 120 | variable "customize_block_volume_performance" { 121 | default = "true" 122 | } 123 | 124 | variable "block_volume_high_performance" { 125 | default = "true" 126 | } 127 | 128 | variable "block_volume_cost_savings" { 129 | default = "false" 130 | } 131 | 132 | variable "vpus_per_gb" { 133 | default = "10" 134 | } 135 | 136 | # Which AD to target - this can be adjusted. Default 1 for single AD regions. 137 | variable "availability_domain" { 138 | default = "1" 139 | } 140 | 141 | variable "executor" { 142 | default = "local" 143 | } 144 | variable "all" { 145 | default = "false" 146 | } 147 | variable "all_dbs" { 148 | default = "false" 149 | } 150 | variable "async" { 151 | default = "false" 152 | } 153 | variable "aws" { 154 | default = "false" 155 | } 156 | variable "azure" { 157 | default = "false" 158 | } 159 | variable "celery" { 160 | default = "false" 161 | } 162 | variable "cloudant" { 163 | default = "false" 164 | } 165 | variable "crypto" { 166 | default = "false" 167 | } 168 | variable "devel" { 169 | default = "false" 170 | } 171 | variable "devel_hadoop" { 172 | default = "false" 173 | } 174 | variable "druid" { 175 | default = "false" 176 | } 177 | variable "gcp" { 178 | default = "false" 179 | } 180 | variable "github_enterprise" { 181 | default = "false" 182 | } 183 | variable "google_auth" { 184 | default = "false" 185 | } 186 | variable "hashicorp" { 187 | default = "false" 188 | } 189 | variable "hdfs" { 190 | default = "false" 191 | } 192 | variable "hive" { 193 | default = "false" 194 | } 195 | variable "jdbc" { 196 | default = "false" 197 | } 198 | variable "kerberos" { 199 | default = "false" 200 | } 201 | variable "kubernetes" { 202 | default = "false" 203 | } 204 | variable "ldap" { 205 | default = "false" 206 | } 207 | variable "mssql" { 208 | default = "false" 209 | } 210 | variable "mysql" { 211 | default = "true" 212 | } 213 | variable "oracle" { 214 | default = "true" 215 | } 216 | variable "password" { 217 | default = "false" 218 | } 219 | variable "postgres" { 220 | default = "false" 221 | } 222 | variable "presto" { 223 | default = "false" 224 | } 225 | variable "qds" { 226 | default = "false" 227 | } 228 | variable "rabbitmq" { 229 | default = "false" 230 | } 231 | variable "redis" { 232 | default = "false" 233 | } 234 | variable "samba" { 235 | default = "false" 236 | } 237 | variable "slack" { 238 | default = "false" 239 | } 240 | variable "ssh" { 241 | default = "true" 242 | } 243 | variable "vertica" { 244 | default = "false" 245 | } 246 | 247 | # --------------------------------------------------------------------------------------------------------------------- 248 | # Environmental variables 249 | # You probably want to define these as environmental variables. 250 | # Instructions on that are here: https://github.com/oracle/oci-quickstart-prerequisites 251 | # --------------------------------------------------------------------------------------------------------------------- 252 | 253 | variable "compartment_ocid" {} 254 | 255 | # Required by the OCI Provider 256 | 257 | variable "tenancy_ocid" {} 258 | variable "region" {} 259 | 260 | # --------------------------------------------------------------------------------------------------------------------- 261 | # Constants 262 | # You probably don't need to change these. 263 | # --------------------------------------------------------------------------------------------------------------------- 264 | 265 | // See https://docs.cloud.oracle.com/en-us/iaas/images/image/0c6332bc-a5ec-4ddf-99b8-5f33b0bc461a/ 266 | // Oracle-provided image "Oracle-Linux-7.8-2020.067.30-0" 267 | // Kernel Version: 4.14.35-1902.303.5.3.el7uek.x86_64 268 | variable "OELImageOCID" { 269 | type = "map" 270 | default = { 271 | ap-chuncheon-1 = "ocid1.image.oc1.ap-chuncheon-1.aaaaaaaah4qawhzex2soci4bdfu5rmtqxum5meq246mmehiwf6foenccxt7a" 272 | ap-hyderabad-1 = "ocid1.image.oc1.ap-hyderabad-1.aaaaaaaan7b4rtk3fzr65o4y26cerr64zepillnkt7nkb7v2pixnstkhj4zq" 273 | ap-melbourne-1 = "ocid1.image.oc1.ap-melbourne-1.aaaaaaaahzx72p5mf66rdmbxxdvf225qt6t3cmhy5ogsnujsrs24wzfhvrxa" 274 | ap-mumbai-1 = "ocid1.image.oc1.ap-mumbai-1.aaaaaaaakiw6ctx53lo3vl6an6yqpfiljv3kml4a7yk3footssw5no72mqdq" 275 | ap-osaka-1 = "ocid1.image.oc1.ap-osaka-1.aaaaaaaaglv2o3dt75xk3mndbu4ol53m5g4bwdj6a2eloptpplhgv4pz4fga" 276 | ap-seoul-1 = "ocid1.image.oc1.ap-seoul-1.aaaaaaaapsrckrt4jvac7453pkajcrstg2nkv4a6cplvs5qkxjaowhrtuppa" 277 | ap-sydney-1 = "ocid1.image.oc1.ap-sydney-1.aaaaaaaacgkmddzqa6eyuqdlj3ackoj7bkckzewd65u473qigbgik4ffjkrq" 278 | ap-tokyo-1 = "ocid1.image.oc1.ap-tokyo-1.aaaaaaaag27ttaewtise5v2gjc3762n4rpovfb3v6vq2a6x2fnwhczmsmmha" 279 | ca-montreal-1 = "ocid1.image.oc1.ca-montreal-1.aaaaaaaacvixkp2ptverv3apol6w43aa7kn7b3uvvdcnn2pyfidlhosiieoq" 280 | ca-toronto-1 = "ocid1.image.oc1.ca-toronto-1.aaaaaaaarafs3f23oihrekl4p2hfyb5in46nbzlbrtmpyqkeybddlzgpl2fa" 281 | eu-amsterdam-1 = "ocid1.image.oc1.eu-amsterdam-1.aaaaaaaamy6nj5osieovhuegqncihbulhlefbquqtzcuumwpid3vskob6pea" 282 | eu-frankfurt-1 = "ocid1.image.oc1.eu-frankfurt-1.aaaaaaaaulz7xiht632iidvdm4iezy33fofulmerq2nkllwnkjy335qkswza" 283 | eu-zurich-1 = "ocid1.image.oc1.eu-zurich-1.aaaaaaaaz7iyv6tkydbtevjktdhunsxeez3vs4mk6gngel7qj2ymtto6ehwq" 284 | me-jeddah-1 = "ocid1.image.oc1.me-jeddah-1.aaaaaaaa3xm4dq6r67rdslmszygbemxbn2ojabhrj5op3vmpel4zc7cl4ssq" 285 | sa-saopaulo-1 = "ocid1.image.oc1.sa-saopaulo-1.aaaaaaaakndmtcb6ycz6mffgklqctnlb3bzr2pqo6a3jor762kr4hpsjodya" 286 | uk-gov-london-1 = "ocid1.image.oc4.uk-gov-london-1.aaaaaaaacap4psxhmmdghsb37ost57hmmfeeotm6c3cgi7pyekxdlzh3jlvq" 287 | uk-london-1 = "ocid1.image.oc1.uk-london-1.aaaaaaaayt6ppuyj6q4dwb4pkkyy3llrhxntywewfk4ssd365d4cn22i6yxa" 288 | us-ashburn-1 = "ocid1.image.oc1.iad.aaaaaaaabip6l5i5ikqsnm64xwrw2rrkj3tzo2dv47frowlt3droliwpvfaa" 289 | us-gov-ashburn-1 = "ocid1.image.oc3.us-gov-ashburn-1.aaaaaaaa44cyvmq7hjawqc6pkdczt5kpbakvsoj55talodxbvmhpjecit77a" 290 | us-gov-chicago-1 = "ocid1.image.oc3.us-gov-chicago-1.aaaaaaaaaht7f6ddqmuu2jhtp5bnrspbdsav6atbvdt43coql26dspjsffra" 291 | us-gov-phoenix-1 = "ocid1.image.oc3.us-gov-phoenix-1.aaaaaaaa33nckkwzxg65dwe3qwed6hc3zmza777vt6xom5xn3s7q2wneovea" 292 | us-langley-1 = "ocid1.image.oc2.us-langley-1.aaaaaaaazy4hfwcuxqsupcy75y6vjvmaoet5ns4rb2hlp3m2d6memcv2r2va" 293 | us-luke-1 = "ocid1.image.oc2.us-luke-1.aaaaaaaamvtduzihoo4cpury4dh2dghi74xf7hprpaotq6dyv5zoolkadira" 294 | us-phoenix-1 = "ocid1.image.oc1.phx.aaaaaaaaxdwzaqqvxvmyznmcx2n766fxatd6owcojqapkih7oqq4qt3o4wwa" 295 | } 296 | } 297 | 298 | variable "oci_service_gateway" { 299 | type = "map" 300 | default = { 301 | ap-mumbai-1 = "all-bom-services-in-oracle-services-network" 302 | ap-seoul-1 = "all-icn-services-in-oracle-services-network" 303 | ap-sydney-1 = "all-syd-services-in-oracle-services-network" 304 | ap-tokyo-1 = "all-nrt-services-in-oracle-serviecs-network" 305 | ca-toronto-1 = "all-yyz-services-in-oracle-services-network" 306 | eu-frankfurt-1 = "all-fra-services-in-oracle-services-network" 307 | eu-zurich-1 = "all-zrh-services-in-oracle-services-network" 308 | sa-saopaulo-1 = "all-gru-services-in-oracle-services-network" 309 | uk-london-1 = "all-lhr-services-in-oracle-services-network" 310 | us-ashburn-1 = "all-iad-services-in-oracle-services-network" 311 | us-langley-1 = "all-lfi-services-in-oracle-services-network" 312 | us-luke-1 = "all-luf-services-in-oracle-services-network" 313 | us-phoenix-1 = "all-phx-services-in-oracle-services-network" 314 | } 315 | } 316 | 317 | -------------------------------------------------------------------------------- /scripts/plugins/operators/oci_data_flow.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | from airflow.models.baseoperator import BaseOperator 20 | from airflow.utils.decorators import apply_defaults 21 | from typing import Optional 22 | import oci 23 | from oci.data_flow.data_flow_client_composite_operations import DataFlowClientCompositeOperations 24 | from hooks.oci_data_flow import OCIDataFlowHook 25 | from hooks.oci_object_storage import OCIObjectStorageHook 26 | from airflow.exceptions import AirflowException 27 | """ 28 | Interact with OCI Data Flow 29 | """ 30 | 31 | 32 | class OCIDataFlowRun(BaseOperator): 33 | template_fields = ('display_name',) 34 | 35 | """ 36 | Create a Data Flow Run 37 | :param comprtment_ocid: Compartment OCID 38 | :param application_ocid: Data Flow Applicaation OCID 39 | :param display_name: Data Flow App Name 40 | :param oci_conn_id: Airflow Connection ID 41 | :param bucket_name: Application Bucket Name 42 | :param arguments: Arguments 43 | :param parameters: Parameters 44 | :param driver_shape: Spark Driver Shape 45 | :param executor_shape: Spark Executor Shape 46 | :param num_executors: Spark Executors 47 | :param logs_bucket_uri: OCI Logs Bucket 48 | :param logs_run_output: Whether to log the run output 49 | :param defined_tags: Defined Tags 50 | :param freeform_tags: Freeform Tags 51 | :param check_interval: Check Interval 52 | :param timeout: Timeout 53 | """ 54 | @apply_defaults 55 | def __init__( 56 | self, 57 | compartment_ocid: str, 58 | display_name: str, 59 | oci_conn_id: str, 60 | bucket_name: Optional[str] = None, 61 | application_ocid: Optional = None, 62 | arguments: Optional = None, 63 | parameters: Optional = None, 64 | driver_shape: Optional = None, 65 | executor_shape: Optional = None, 66 | num_executors: Optional = None, 67 | log_run_output: Optional[bool] = True, 68 | logs_bucket_uri: Optional = None, 69 | defined_tags: Optional = None, 70 | freeform_tags: Optional = None, 71 | warehouse_bucket_uri: Optional = None, 72 | check_interval: Optional[int] = None, 73 | timeout: Optional[int] = None, 74 | runtime_callback: Optional = None, 75 | *args, 76 | **kwargs 77 | ): 78 | super().__init__(*args, **kwargs) 79 | self.compartment_id = compartment_ocid 80 | self.application_id = application_ocid 81 | self.display_name = display_name 82 | self.oci_conn_id = oci_conn_id 83 | self.bucket_name = bucket_name 84 | self.arguments= arguments 85 | self.parameters = parameters 86 | self.driver_shape = driver_shape 87 | self.executor_shape = executor_shape 88 | self.num_executors = num_executors 89 | self.log_run_output = log_run_output 90 | self.logs_bucket_uri = logs_bucket_uri 91 | self.defined_tags = defined_tags 92 | self.freeform_tags = freeform_tags 93 | self.warehouse_bucket_uri = warehouse_bucket_uri 94 | self.check_interval = check_interval 95 | self.timeout = timeout 96 | self.runtime_callback = runtime_callback 97 | self._oci_hook = None 98 | 99 | def execute(self, context): 100 | self._oci_hook = OCIDataFlowHook(compartment_ocid=self.compartment_id, oci_conn_id=self.oci_conn_id, display_name=self.display_name) 101 | client = self._oci_hook.get_client(oci.data_flow.DataFlowClient) 102 | self.log.info("Validating OCI Config") 103 | self._oci_hook.validate_config() 104 | if not self.timeout: 105 | self.timeout = float('inf') 106 | if not self.check_interval: 107 | self.check_interval = 30 108 | if not self.executor_shape: 109 | self.executor_shape = 'VM.Standard2.1' 110 | if not self.num_executors: 111 | self.num_executors = 1 112 | if not self.driver_shape: 113 | self.driver_shape = self.executor_shape 114 | if not self.warehouse_bucket_uri: 115 | self.namespace = OCIObjectStorageHook(compartment_id=self.compartment_id, oci_conn_id=self.oci_conn_id, bucket_name=self.bucket_name).get_namespace() 116 | self.warehouse_bucket_uri = "oci://" + str(self.bucket_name) + "@" + str(self.namespace) + "/" 117 | if not self.application_id: 118 | self.application_id = OCIDataFlowHook(compartment_ocid=self.compartment_id, oci_conn_id=self.oci_conn_id, display_name=self.display_name).get_application_ocid() 119 | run_details = { 120 | "application_id": self.application_id, 121 | "compartment_id": self.compartment_id, 122 | "display_name": self.display_name, 123 | "executor_shape": self.executor_shape, 124 | "num_executors": self.num_executors, 125 | "driver_shape": self.driver_shape, 126 | "warehouse_bucket_uri": self.warehouse_bucket_uri, 127 | "logs_bucket_uri": self.logs_bucket_uri, 128 | "arguments": self.arguments, 129 | "parameters": self.parameters, 130 | } 131 | if self.runtime_callback is not None: 132 | callback_settings = self.runtime_callback(context) 133 | run_details = {**run_details, **callback_settings} 134 | dataflow_run = oci.data_flow.models.CreateRunDetails(**run_details) 135 | try: 136 | submit_run = DataFlowClientCompositeOperations(client) 137 | response = submit_run.create_run_and_wait_for_state(create_run_details=dataflow_run, 138 | wait_for_states=["CANCELED", "SUCCEEDED", "FAILED"], 139 | waiter_kwargs={ 140 | "max_interval_seconds": self.check_interval, 141 | "max_wait_seconds": self.timeout 142 | }) 143 | if response.data.lifecycle_state != "SUCCEEDED": 144 | self.log.error(response.data.lifecycle_details) 145 | raise AirflowException(response.data.lifecycle_details) 146 | if self.log_run_output: 147 | try: 148 | log_contents = client.get_run_log(run_id=response.data.id, name="spark_application_stdout.log.gz") 149 | self.log.info("Data Flow Run Output:") 150 | self.log.info(log_contents.data.text) 151 | except: 152 | self.log.info("Unable to fetch Run logs. This can be due to a missing IAM policy") 153 | self.log.info("Data Flow needs a policy like \"allow service dataflow to read objects in tenancy where target.bucket.name=''\" to read your logs") 154 | self.log.info("See https://docs.cloud.oracle.com/en-us/iaas/data-flow/using/dfs_getting_started.htm#set_up_admin for more information") 155 | except oci.exceptions.CompositeOperationError as e: 156 | self.log.error(str(e.cause)) 157 | raise e 158 | 159 | 160 | class OCIDataFlowCreateApplication(BaseOperator): 161 | """ 162 | Create a Data Flow Run 163 | :param comprtment_ocid: Compartment OCID 164 | :param application_ocid: Data Flow Applicaation OCID 165 | :param display_name: Data Flow App Name 166 | :param oci_conn_id: Airflow Connection ID 167 | :param bucket_name: Application Bucket Name 168 | :param arguments: Arguments 169 | :param parameters: Parameters 170 | :param driver_shape: Spark Driver Shape 171 | :param executor_shape: Spark Executor Shape 172 | :param num_executors: Spark Executors 173 | :param logs_bucket_uri: OCI Logs Bucket 174 | :param defined_tags: Defined Tags 175 | :param freeform_tags: Freeform Tags 176 | :param check_interval: Check Interval 177 | :param timeout: Timeout 178 | """ 179 | @apply_defaults 180 | def __init__( 181 | self, 182 | compartment_ocid: str, 183 | display_name: str, 184 | oci_conn_id: str, 185 | bucket_name: str, 186 | object_name: str, 187 | language: str, 188 | file_uri: Optional[str] = None, 189 | arguments: Optional = None, 190 | parameters: Optional = None, 191 | driver_shape: Optional = None, 192 | executor_shape: Optional = None, 193 | num_executors: Optional = None, 194 | logs_bucket_uri: Optional = None, 195 | spark_version: Optional = None, 196 | check_interval: Optional[int] = None, 197 | timeout: Optional[int] = None, 198 | *args, 199 | **kwargs 200 | ): 201 | super().__init__(*args, **kwargs) 202 | self.compartment_id = compartment_ocid 203 | self.display_name = display_name 204 | self.oci_conn_id = oci_conn_id 205 | self.bucket_name = bucket_name 206 | self.object_name = object_name 207 | self.language = language 208 | self.file_uri = file_uri 209 | self.arguments = arguments 210 | self.parameters = parameters 211 | self.driver_shape = driver_shape 212 | self.executor_shape = executor_shape 213 | self.num_executors = num_executors 214 | self.logs_bucket_uri = logs_bucket_uri 215 | self.spark_version = spark_version 216 | self.check_interval = check_interval 217 | self.timeout = timeout 218 | self._oci_hook = None 219 | 220 | def execute(self, context): 221 | self._oci_hook = OCIDataFlowHook(compartment_ocid=self.compartment_id, oci_conn_id=self.oci_conn_id, display_name=self.display_name) 222 | client = self._oci_hook.get_client(oci.data_flow.DataFlowClient) 223 | self.log.info("Validating OCI Config") 224 | self._oci_hook.validate_config() 225 | if not self.timeout: 226 | self.timeout = float('inf') 227 | if not self.check_interval: 228 | self.check_interval = 30 229 | if not self.executor_shape: 230 | self.executor_shape = 'VM.Standard2.1' 231 | if not self.num_executors: 232 | self.num_executors = 1 233 | if not self.driver_shape: 234 | self.driver_shape = self.executor_shape 235 | if not self.file_uri: 236 | self.namespace = OCIObjectStorageHook(compartment_id=self.compartment_id, oci_conn_id=self.oci_conn_id, bucket_name=self.bucket_name).get_namespace() 237 | self.file_uri = "oci://" + str(self.bucket_name) + "@" + str(self.namespace) + "/" + str(self.object_name) 238 | self.log.info("File URI: {0}".format(self.file_uri)) 239 | if not self.language: 240 | self.log.error("Application Language must be set") 241 | if not self.spark_version: 242 | self.spark_version = '2.4.4' 243 | app_details = { 244 | "compartment_id": self.compartment_id, 245 | "display_name": self.display_name, 246 | "driver_shape": self.driver_shape, 247 | "executor_shape": self.executor_shape, 248 | "file_uri": self.file_uri, 249 | "language": self.language, 250 | "num_executors": self.num_executors, 251 | "spark_version": self.spark_version 252 | } 253 | dataflow_create = \ 254 | oci.data_flow.models.CreateApplicationDetails(compartment_id=app_details["compartment_id"], 255 | display_name=app_details["display_name"], 256 | driver_shape=app_details["driver_shape"], 257 | executor_shape=app_details["executor_shape"], 258 | file_uri=app_details["file_uri"], 259 | language=app_details["language"], 260 | num_executors=app_details["num_executors"], 261 | spark_version=app_details["spark_version"] 262 | ) 263 | try: 264 | print("Checking if Application {0} exists".format(self.display_name)) 265 | appcheck = self._oci_hook.check_for_application_by_name() 266 | if appcheck is True: 267 | self.log.error("Application {0} already exists".format(self.display_name)) 268 | else: 269 | print("Creating DataFlow Application {0}".format(self.display_name)) 270 | create_app = DataFlowClientCompositeOperations(client) 271 | create_app.create_application_and_wait_for_state(create_application_details=dataflow_create, 272 | wait_for_states=["ACTIVE"], 273 | waiter_kwargs={ 274 | "max_interval_seconds": self.check_interval, 275 | "max_wait_seconds": self.timeout 276 | }) 277 | except AirflowException as e: 278 | self.log.error(e.response["Error"]["Message"]) 279 | -------------------------------------------------------------------------------- /scripts/custom/connection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | import json 21 | from builtins import bytes 22 | from urllib.parse import parse_qsl, quote, unquote, urlencode, urlparse 23 | 24 | from sqlalchemy import Column, Integer, String, Boolean 25 | from sqlalchemy.ext.declarative import declared_attr 26 | from sqlalchemy.orm import synonym 27 | 28 | from airflow import LoggingMixin 29 | from airflow.exceptions import AirflowException 30 | from airflow.models.base import Base, ID_LEN 31 | from airflow.models.crypto import get_fernet 32 | 33 | 34 | # Python automatically converts all letters to lowercase in hostname 35 | # See: https://issues.apache.org/jira/browse/AIRFLOW-3615 36 | def parse_netloc_to_hostname(uri_parts): 37 | hostname = unquote(uri_parts.hostname or '') 38 | if '/' in hostname: 39 | hostname = uri_parts.netloc 40 | if "@" in hostname: 41 | hostname = hostname.rsplit("@", 1)[1] 42 | if ":" in hostname: 43 | hostname = hostname.split(":", 1)[0] 44 | hostname = unquote(hostname) 45 | return hostname 46 | 47 | 48 | class Connection(Base, LoggingMixin): 49 | """ 50 | Placeholder to store information about different database instances 51 | connection information. The idea here is that scripts use references to 52 | database instances (conn_id) instead of hard coding hostname, logins and 53 | passwords when using operators or hooks. 54 | """ 55 | __tablename__ = "connection" 56 | 57 | id = Column(Integer(), primary_key=True) 58 | conn_id = Column(String(ID_LEN)) 59 | conn_type = Column(String(500)) 60 | host = Column(String(500)) 61 | schema = Column(String(500)) 62 | login = Column(String(500)) 63 | _password = Column('password', String(5000)) 64 | port = Column(Integer()) 65 | is_encrypted = Column(Boolean, unique=False, default=False) 66 | is_extra_encrypted = Column(Boolean, unique=False, default=False) 67 | _extra = Column('extra', String(5000)) 68 | 69 | _types = [ 70 | ('docker', 'Docker Registry',), 71 | ('fs', 'File (path)'), 72 | ('ftp', 'FTP',), 73 | ('google_cloud_platform', 'Google Cloud Platform'), 74 | ('hdfs', 'HDFS',), 75 | ('http', 'HTTP',), 76 | ('pig_cli', 'Pig Client Wrapper',), 77 | ('hive_cli', 'Hive Client Wrapper',), 78 | ('hive_metastore', 'Hive Metastore Thrift',), 79 | ('hiveserver2', 'Hive Server 2 Thrift',), 80 | ('jdbc', 'Jdbc Connection',), 81 | ('jenkins', 'Jenkins'), 82 | ('mysql', 'MySQL',), 83 | ('postgres', 'Postgres',), 84 | ('oci', 'Oracle Cloud Infrastructure',), 85 | ('oracle', 'Oracle',), 86 | ('vertica', 'Vertica',), 87 | ('presto', 'Presto',), 88 | ('s3', 'S3',), 89 | ('samba', 'Samba',), 90 | ('sqlite', 'Sqlite',), 91 | ('ssh', 'SSH',), 92 | ('cloudant', 'IBM Cloudant',), 93 | ('mssql', 'Microsoft SQL Server'), 94 | ('mesos_framework-id', 'Mesos Framework ID'), 95 | ('jira', 'JIRA',), 96 | ('redis', 'Redis',), 97 | ('wasb', 'Azure Blob Storage'), 98 | ('databricks', 'Databricks',), 99 | ('aws', 'Amazon Web Services',), 100 | ('emr', 'Elastic MapReduce',), 101 | ('snowflake', 'Snowflake',), 102 | ('segment', 'Segment',), 103 | ('azure_data_lake', 'Azure Data Lake'), 104 | ('azure_container_instances', 'Azure Container Instances'), 105 | ('azure_cosmos', 'Azure CosmosDB'), 106 | ('cassandra', 'Cassandra',), 107 | ('qubole', 'Qubole'), 108 | ('mongo', 'MongoDB'), 109 | ('gcpcloudsql', 'Google Cloud SQL'), 110 | ('grpc', 'GRPC Connection'), 111 | ('yandexcloud', 'Yandex Cloud'), 112 | ('spark', 'Spark'), 113 | ] 114 | 115 | def __init__( 116 | self, conn_id=None, conn_type=None, 117 | host=None, login=None, password=None, 118 | schema=None, port=None, extra=None, 119 | uri=None): 120 | self.conn_id = conn_id 121 | if uri: 122 | self.parse_from_uri(uri) 123 | else: 124 | self.conn_type = conn_type 125 | self.host = host 126 | self.login = login 127 | self.password = password 128 | self.schema = schema 129 | self.port = port 130 | self.extra = extra 131 | 132 | def parse_from_uri(self, uri): 133 | uri_parts = urlparse(uri) 134 | conn_type = uri_parts.scheme 135 | if conn_type == 'postgresql': 136 | conn_type = 'postgres' 137 | elif '-' in conn_type: 138 | conn_type = conn_type.replace('-', '_') 139 | self.conn_type = conn_type 140 | self.host = parse_netloc_to_hostname(uri_parts) 141 | quoted_schema = uri_parts.path[1:] 142 | self.schema = unquote(quoted_schema) if quoted_schema else quoted_schema 143 | self.login = unquote(uri_parts.username) \ 144 | if uri_parts.username else uri_parts.username 145 | self.password = unquote(uri_parts.password) \ 146 | if uri_parts.password else uri_parts.password 147 | self.port = uri_parts.port 148 | if uri_parts.query: 149 | self.extra = json.dumps(dict(parse_qsl(uri_parts.query, keep_blank_values=True))) 150 | 151 | def get_uri(self): 152 | uri = '{}://'.format(str(self.conn_type).lower().replace('_', '-')) 153 | 154 | authority_block = '' 155 | if self.login is not None: 156 | authority_block += quote(self.login, safe='') 157 | 158 | if self.password is not None: 159 | authority_block += ':' + quote(self.password, safe='') 160 | 161 | if authority_block != '': 162 | authority_block += '@' 163 | 164 | uri += authority_block 165 | 166 | host_block = '' 167 | if self.host: 168 | host_block += quote(self.host, safe='') 169 | 170 | if self.port: 171 | if host_block != '': 172 | host_block += ':{}'.format(self.port) 173 | else: 174 | host_block += '@:{}'.format(self.port) 175 | 176 | if self.schema: 177 | host_block += '/{}'.format(quote(self.schema, safe='')) 178 | 179 | uri += host_block 180 | 181 | if self.extra_dejson: 182 | uri += '?{}'.format(urlencode(self.extra_dejson)) 183 | 184 | return uri 185 | 186 | def get_password(self): 187 | if self._password and self.is_encrypted: 188 | fernet = get_fernet() 189 | if not fernet.is_encrypted: 190 | raise AirflowException( 191 | "Can't decrypt encrypted password for login={}, \ 192 | FERNET_KEY configuration is missing".format(self.login)) 193 | return fernet.decrypt(bytes(self._password, 'utf-8')).decode() 194 | else: 195 | return self._password 196 | 197 | def set_password(self, value): 198 | if value: 199 | fernet = get_fernet() 200 | self._password = fernet.encrypt(bytes(value, 'utf-8')).decode() 201 | self.is_encrypted = fernet.is_encrypted 202 | 203 | @declared_attr 204 | def password(cls): 205 | return synonym('_password', 206 | descriptor=property(cls.get_password, cls.set_password)) 207 | 208 | def get_extra(self): 209 | if self._extra and self.is_extra_encrypted: 210 | fernet = get_fernet() 211 | if not fernet.is_encrypted: 212 | raise AirflowException( 213 | "Can't decrypt `extra` params for login={},\ 214 | FERNET_KEY configuration is missing".format(self.login)) 215 | return fernet.decrypt(bytes(self._extra, 'utf-8')).decode() 216 | else: 217 | return self._extra 218 | 219 | def set_extra(self, value): 220 | if value: 221 | fernet = get_fernet() 222 | self._extra = fernet.encrypt(bytes(value, 'utf-8')).decode() 223 | self.is_extra_encrypted = fernet.is_encrypted 224 | else: 225 | self._extra = value 226 | self.is_extra_encrypted = False 227 | 228 | @declared_attr 229 | def extra(cls): 230 | return synonym('_extra', 231 | descriptor=property(cls.get_extra, cls.set_extra)) 232 | 233 | def rotate_fernet_key(self): 234 | fernet = get_fernet() 235 | if self._password and self.is_encrypted: 236 | self._password = fernet.rotate(self._password.encode('utf-8')).decode() 237 | if self._extra and self.is_extra_encrypted: 238 | self._extra = fernet.rotate(self._extra.encode('utf-8')).decode() 239 | 240 | def get_hook(self): 241 | if self.conn_type == 'mysql': 242 | from airflow.hooks.mysql_hook import MySqlHook 243 | return MySqlHook(mysql_conn_id=self.conn_id) 244 | elif self.conn_type == 'google_cloud_platform': 245 | from airflow.contrib.hooks.bigquery_hook import BigQueryHook 246 | return BigQueryHook(bigquery_conn_id=self.conn_id) 247 | elif self.conn_type == 'postgres': 248 | from airflow.hooks.postgres_hook import PostgresHook 249 | return PostgresHook(postgres_conn_id=self.conn_id) 250 | elif self.conn_type == 'pig_cli': 251 | from airflow.hooks.pig_hook import PigCliHook 252 | return PigCliHook(pig_cli_conn_id=self.conn_id) 253 | elif self.conn_type == 'hive_cli': 254 | from airflow.hooks.hive_hooks import HiveCliHook 255 | return HiveCliHook(hive_cli_conn_id=self.conn_id) 256 | elif self.conn_type == 'presto': 257 | from airflow.hooks.presto_hook import PrestoHook 258 | return PrestoHook(presto_conn_id=self.conn_id) 259 | elif self.conn_type == 'hiveserver2': 260 | from airflow.hooks.hive_hooks import HiveServer2Hook 261 | return HiveServer2Hook(hiveserver2_conn_id=self.conn_id) 262 | elif self.conn_type == 'sqlite': 263 | from airflow.hooks.sqlite_hook import SqliteHook 264 | return SqliteHook(sqlite_conn_id=self.conn_id) 265 | elif self.conn_type == 'jdbc': 266 | from airflow.hooks.jdbc_hook import JdbcHook 267 | return JdbcHook(jdbc_conn_id=self.conn_id) 268 | elif self.conn_type == 'mssql': 269 | from airflow.hooks.mssql_hook import MsSqlHook 270 | return MsSqlHook(mssql_conn_id=self.conn_id) 271 | elif self.conn_type == 'oci': 272 | from hooks.oci_base import OCIBaseHook 273 | return OCIBaseHook(oci_conn_id=self.conn_id) 274 | elif self.conn_type == 'oracle': 275 | from airflow.hooks.oracle_hook import OracleHook 276 | return OracleHook(oracle_conn_id=self.conn_id) 277 | elif self.conn_type == 'vertica': 278 | from airflow.contrib.hooks.vertica_hook import VerticaHook 279 | return VerticaHook(vertica_conn_id=self.conn_id) 280 | elif self.conn_type == 'cloudant': 281 | from airflow.contrib.hooks.cloudant_hook import CloudantHook 282 | return CloudantHook(cloudant_conn_id=self.conn_id) 283 | elif self.conn_type == 'jira': 284 | from airflow.contrib.hooks.jira_hook import JiraHook 285 | return JiraHook(jira_conn_id=self.conn_id) 286 | elif self.conn_type == 'redis': 287 | from airflow.contrib.hooks.redis_hook import RedisHook 288 | return RedisHook(redis_conn_id=self.conn_id) 289 | elif self.conn_type == 'wasb': 290 | from airflow.contrib.hooks.wasb_hook import WasbHook 291 | return WasbHook(wasb_conn_id=self.conn_id) 292 | elif self.conn_type == 'docker': 293 | from airflow.hooks.docker_hook import DockerHook 294 | return DockerHook(docker_conn_id=self.conn_id) 295 | elif self.conn_type == 'azure_data_lake': 296 | from airflow.contrib.hooks.azure_data_lake_hook import AzureDataLakeHook 297 | return AzureDataLakeHook(azure_data_lake_conn_id=self.conn_id) 298 | elif self.conn_type == 'azure_cosmos': 299 | from airflow.contrib.hooks.azure_cosmos_hook import AzureCosmosDBHook 300 | return AzureCosmosDBHook(azure_cosmos_conn_id=self.conn_id) 301 | elif self.conn_type == 'cassandra': 302 | from airflow.contrib.hooks.cassandra_hook import CassandraHook 303 | return CassandraHook(cassandra_conn_id=self.conn_id) 304 | elif self.conn_type == 'mongo': 305 | from airflow.contrib.hooks.mongo_hook import MongoHook 306 | return MongoHook(conn_id=self.conn_id) 307 | elif self.conn_type == 'gcpcloudsql': 308 | from airflow.contrib.hooks.gcp_sql_hook import CloudSqlDatabaseHook 309 | return CloudSqlDatabaseHook(gcp_cloudsql_conn_id=self.conn_id) 310 | elif self.conn_type == 'grpc': 311 | from airflow.contrib.hooks.grpc_hook import GrpcHook 312 | return GrpcHook(grpc_conn_id=self.conn_id) 313 | raise AirflowException("Unknown hook type {}".format(self.conn_type)) 314 | 315 | def __repr__(self): 316 | return self.conn_id 317 | 318 | def log_info(self): 319 | return ("id: {}. Host: {}, Port: {}, Schema: {}, " 320 | "Login: {}, Password: {}, extra: {}". 321 | format(self.conn_id, 322 | self.host, 323 | self.port, 324 | self.schema, 325 | self.login, 326 | "XXXXXXXX" if self.password else None, 327 | "XXXXXXXX" if self.extra_dejson else None)) 328 | 329 | def debug_info(self): 330 | return ("id: {}. Host: {}, Port: {}, Schema: {}, " 331 | "Login: {}, Password: {}, extra: {}". 332 | format(self.conn_id, 333 | self.host, 334 | self.port, 335 | self.schema, 336 | self.login, 337 | "XXXXXXXX" if self.password else None, 338 | self.extra_dejson)) 339 | 340 | @property 341 | def extra_dejson(self): 342 | """Returns the extra property by deserializing json.""" 343 | obj = {} 344 | if self.extra: 345 | try: 346 | obj = json.loads(self.extra) 347 | except Exception as e: 348 | self.log.exception(e) 349 | self.log.error("Failed parsing the json for conn_id %s", self.conn_id) 350 | 351 | return obj 352 | -------------------------------------------------------------------------------- /scripts/plugins/operators/oci_object_storage.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | import oci 19 | from typing import Optional 20 | from airflow.models.baseoperator import BaseOperator 21 | from hooks.oci_object_storage import OCIObjectStorageHook 22 | from airflow.utils.decorators import apply_defaults 23 | from airflow.exceptions import AirflowException 24 | from os import path 25 | 26 | 27 | class MakeBucket(BaseOperator): 28 | """ 29 | Create a Bucket in OCI object store 30 | 31 | :param bucket_name: Name of bucket 32 | :type bucket_name: str 33 | :param compartment_ocid: Compartment ID 34 | :type compartment_id: str 35 | :param namespace_name: Object storage namespace 36 | :type namespace_name: str 37 | :param oci_conn_id: Airflow connection ID 38 | :type oci_conn_id: str 39 | """ 40 | 41 | @apply_defaults 42 | def __init__( 43 | self, 44 | bucket_name: str, 45 | compartment_ocid: str, 46 | namespace_name: Optional[str] = None, 47 | oci_conn_id: Optional[str] = "oci_default", 48 | *args, 49 | **kwargs 50 | ) -> None: 51 | super().__init__(*args, **kwargs) 52 | self.bucket_name = bucket_name 53 | self.compartment_id = compartment_ocid 54 | self.namespace_name = namespace_name 55 | self.oci_conn_id = oci_conn_id 56 | self._oci_hook = None 57 | self.oci_client = oci.object_storage.ObjectStorageClient 58 | 59 | def execute(self, context, **kwargs): 60 | self._oci_hook = OCIObjectStorageHook(compartment_id=self.compartment_id, bucket_name=self.bucket_name, 61 | oci_conn_id=self.oci_conn_id, namespace_name=self.namespace_name) 62 | client = self._oci_hook.get_client(self.oci_client) 63 | self.log.info("Validating OCI Config") 64 | self._oci_hook.validate_config() 65 | if not self.namespace_name: 66 | self.namespace_name = self._oci_hook.get_namespace() 67 | details = oci.object_storage.models.CreateBucketDetails( 68 | compartment_id=self.compartment_id, name=self.bucket_name 69 | ) 70 | self.log.info("Checking if Bucket {} exists".format(self.bucket_name)) 71 | bucket_exists = self._oci_hook.check_for_bucket(namespace_name=self.namespace_name, bucket_name=self.bucket_name) 72 | if bucket_exists is True: 73 | self.log.info("Bucket {0} exists, skipping creation".format(self.bucket_name)) 74 | else: 75 | self.log.info("Creating Bucket {0} in {1}".format(self.bucket_name, self.namespace_name)) 76 | client.create_bucket(namespace_name=self.namespace_name, create_bucket_details=details, **kwargs) 77 | self.log.info("Create bucket complete") 78 | 79 | 80 | class CopyFileToOCIObjectStorageOperator(BaseOperator): 81 | """ 82 | Copy local file to OCI object store 83 | 84 | :param bucket_name: Name of bucket 85 | :type bucket_name: str 86 | :param compartment_ocid: Compartment ID 87 | :type compartment_id: str 88 | :param object_name: Object name - must match local file 89 | :type object_name: str 90 | :param local_file_path: Path to local file 91 | :type local_file_path: str 92 | :param namespace_name: Object storage namespace 93 | :type namespace_name: str 94 | :param oci_conn_id: Airflow connection ID 95 | :type oci_conn_id: str 96 | :param overwrite: Overwrite files if they exist 97 | :type overwrite: bool 98 | """ 99 | 100 | @apply_defaults 101 | def __init__( 102 | self, 103 | bucket_name: str, 104 | compartment_ocid: str, 105 | object_name: str, 106 | local_file_path: str, 107 | namespace_name: Optional[str] = None, 108 | oci_conn_id: Optional[str] = "oci_default", 109 | overwrite: Optional[bool] = False, 110 | *args, 111 | **kwargs 112 | ) -> None: 113 | super().__init__(*args, **kwargs) 114 | self.bucket_name = bucket_name 115 | self.compartment_id = compartment_ocid 116 | self.namespace_name = namespace_name 117 | self.object_name = object_name 118 | self.local_file_path = local_file_path 119 | self.oci_conn_id = oci_conn_id 120 | self.overwrite = overwrite 121 | self._oci_hook = None 122 | self.oci_client = oci.object_storage.ObjectStorageClient 123 | 124 | def execute(self, context, **kwargs): 125 | self._oci_hook = OCIObjectStorageHook(compartment_id=self.compartment_id, bucket_name=self.bucket_name, 126 | oci_conn_id=self.oci_conn_id) 127 | client = self._oci_hook.get_client(self.oci_client) 128 | self.log.info("Validating OCI Config") 129 | self._oci_hook.validate_config() 130 | if not self.namespace_name: 131 | self.namespace_name = self._oci_hook.get_namespace() 132 | details = oci.object_storage.models.CreateBucketDetails( 133 | compartment_id=self.compartment_id, name=self.bucket_name 134 | ) 135 | self.log.info("Checking if Bucket {} exists".format(self.bucket_name)) 136 | bucket_exists = self._oci_hook.check_for_bucket(namespace_name=self.namespace_name, bucket_name=self.bucket_name) 137 | if bucket_exists is True: 138 | self.log.info("Bucket {0} exists, skipping creation".format(self.bucket_name)) 139 | else: 140 | self.log.info("Creating Bucket {0} in {1}".format(self.bucket_name, self.namespace_name)) 141 | client.create_bucket(namespace_name=self.namespace_name, create_bucket_details=details) 142 | self.log.info("Create bucket complete") 143 | self.log.info("Checking if {0} exists in {1}".format(self.object_name, self.bucket_name)) 144 | object_exists = self._oci_hook.check_for_object(namespace_name=self.namespace_name, bucket_name=self.bucket_name, 145 | object_name=self.object_name) 146 | if object_exists is True: 147 | if self.overwrite is True: 148 | self.log.info("Validating local file {0} exists".format(self.object_name)) 149 | if path.exists(self.local_file_path) is True: 150 | self.local_file = self.local_file_path + self.object_name 151 | if path.exists(self.local_file) is True: 152 | self.log.info("Copying {0} to {1}".format(self.local_file, self.bucket_name)) 153 | self.put_object_body = open(self.local_file, 'rb') 154 | self._oci_hook.copy_to_bucket(bucket_name=self.bucket_name, 155 | namespace_name=self.namespace_name, 156 | object_name=self.object_name, 157 | put_object_body=self.put_object_body, **kwargs) 158 | else: 159 | self.log.error("Local file {0} does not exist".format(self.local_file)) 160 | else: 161 | self.log.error("Local file path {0} does not exist".format(self.local_file_path)) 162 | else: 163 | self.log.info("Object {0} exists already in {1}".format(self.object_name, self.bucket_name)) 164 | else: 165 | self.log.info("Validating local file {0} exists".format(self.object_name)) 166 | if path.exists(self.local_file_path) is True: 167 | self.local_file = self.local_file_path + self.object_name 168 | if path.exists(self.local_file) is True: 169 | self.log.info("Copying {0} to {1}".format(self.local_file, self.bucket_name)) 170 | self.put_object_body = open(self.local_file, 'rb') 171 | self._oci_hook.copy_to_bucket(bucket_name=self.bucket_name, 172 | namespace_name=self.namespace_name, 173 | object_name=self.object_name, 174 | put_object_body=self.put_object_body, **kwargs) 175 | else: 176 | self.log.error("Local file {0} does not exist".format(self.local_file)) 177 | else: 178 | self.log.error("Local file path {0} does not exist".format(self.local_file_path)) 179 | 180 | 181 | class CopyToOCIObjectStorageOperator(BaseOperator): 182 | """ 183 | Copy data to OCI object store 184 | 185 | :param bucket_name: Name of target bucket 186 | :type bucket_name: str 187 | :param compartment_ocid: Compartment ID 188 | :type compartment_id: str 189 | :param object_name: Object name to create in object store 190 | :type object_name: str 191 | :param put_object_body: Contents of object_name 192 | :type put_object_body: stream 193 | :param namespace_name: Object storage namespace 194 | :type namespace_name: str 195 | :param oci_conn_id: Airflow connection ID 196 | :type oci_conn_id: str 197 | :param overwrite: Overwrite files if they exist 198 | :type overwrite: bool 199 | """ 200 | 201 | @apply_defaults 202 | def __init__( 203 | self, 204 | bucket_name: str, 205 | compartment_ocid: str, 206 | object_name: str, 207 | put_object_body: str, 208 | namespace_name: Optional[str] = None, 209 | oci_conn_id: Optional[str] = "oci_default", 210 | overwrite: Optional[bool] = False, 211 | *args, 212 | **kwargs 213 | ) -> None: 214 | super().__init__(*args, **kwargs) 215 | self.bucket_name = bucket_name 216 | self.compartment_id = compartment_ocid 217 | self.namespace_name = namespace_name 218 | self.object_name = object_name 219 | self.put_object_body = put_object_body 220 | self.oci_conn_id = oci_conn_id 221 | self.overwrite = overwrite 222 | self._oci_hook = None 223 | self.oci_client = oci.object_storage.ObjectStorageClient 224 | 225 | def execute(self, context, **kwargs): 226 | self._oci_hook = OCIObjectStorageHook(compartment_id=self.compartment_id, bucket_name=self.bucket_name, 227 | oci_conn_id=self.oci_conn_id) 228 | client = self._oci_hook.get_client(self.oci_client) 229 | self.log.info("Validating OCI Config") 230 | self._oci_hook.validate_config() 231 | if not self.namespace_name: 232 | self.namespace_name = self._oci_hook.get_namespace() 233 | details = oci.object_storage.models.CreateBucketDetails( 234 | compartment_id=self.compartment_id, name=self.bucket_name 235 | ) 236 | self.log.info("Checking if Bucket {} exists".format(self.bucket_name)) 237 | bucket_exists = self._oci_hook.check_for_bucket(namespace_name=self.namespace_name, bucket_name=self.bucket_name) 238 | if bucket_exists is True: 239 | self.log.info("Bucket {0} exists, skipping creation".format(self.bucket_name)) 240 | else: 241 | self.log.info("Creating Bucket {0} in {1}".format(self.bucket_name, self.namespace_name)) 242 | client.create_bucket(namespace_name=self.namespace_name, create_bucket_details=details) 243 | self.log.info("Create bucket complete") 244 | self.log.info("Checking if {0} exists in {1}".format(self.object_name, self.bucket_name)) 245 | object_exists = self._oci_hook.check_for_object(namespace_name=self.namespace_name, bucket_name=self.bucket_name, 246 | object_name=self.object_name) 247 | if object_exists is True: 248 | if self.overwrite is True: 249 | self.log.info("Copying {0} to {1}".format(self.object_name, self.bucket_name)) 250 | self._oci_hook.copy_to_bucket(bucket_name=self.bucket_name, namespace_name=self.namespace_name, 251 | object_name=self.object_name, put_object_body=self.put_object_body, **kwargs) 252 | else: 253 | self.log.info("Object {0} exists already in {1}".format(self.object_name, self.bucket_name)) 254 | else: 255 | self.log.info("Copying {0} to {1}".format(self.object_name, self.bucket_name)) 256 | self._oci_hook.copy_to_bucket(bucket_name=self.bucket_name, namespace_name=self.namespace_name, 257 | object_name=self.object_name, put_object_body=self.put_object_body, **kwargs) 258 | 259 | 260 | class CopyFromOCIObjectStorage(BaseOperator): 261 | """ 262 | Copy object from OCI object store 263 | 264 | :param bucket_name: Name of target bucket 265 | :type bucket_name: str 266 | :param compartment_ocid: Compartment ID 267 | :type compartment_id: str 268 | :param object_name: Object name to create in object store 269 | :type object_name: str 270 | :param put_object_body: Contents of object_name 271 | :type put_object_body: stream 272 | :param namespace_name: Object storage namespace 273 | :type namespace_name: str 274 | :param oci_conn_id: Airflow connection ID 275 | :type oci_conn_id: str 276 | """ 277 | @apply_defaults 278 | def __init__( 279 | self, 280 | bucket_name: str, 281 | compartment_id: str, 282 | object_name: str, 283 | namespace_name: Optional[str] = None, 284 | oci_conn_id: Optional[str] = "oci_default", 285 | *args, 286 | **kwargs 287 | ) -> None: 288 | super().__init__(*args, **kwargs) 289 | self.bucket_name = bucket_name 290 | self.compartment_id = compartment_id 291 | self.namespace_name = namespace_name 292 | self.object_name = object_name 293 | self.oci_conn_id = oci_conn_id 294 | self._oci_hook = None 295 | self.oci_client = oci.object_storage.ObjectStorageClient 296 | 297 | def execute(self, context, **kwargs): 298 | self._oci_hook = OCIObjectStorageHook(compartment_id=self.compartment_id, bucket_name=self.bucket_name, 299 | oci_conn_id=self.oci_conn_id) 300 | client = self._oci_hook.get_client(self.oci_client) 301 | self.log.info("Validating OCI Config") 302 | self._oci_hook.validate_config() 303 | if not self.namespace_name: 304 | self.namespace_name = self._oci_hook.get_namespace() 305 | self.log.info("Checking if {0} exists in {1}".format(self.object_name, self.bucket_name)) 306 | object_exists = self._oci_hook.check_for_object(namespace_name=self.namespace_name, bucket_name=self.bucket_name, 307 | object_name=self.object_name, **kwargs) 308 | if object_exists is True: 309 | self.log.info("Reading {0} from {1}".format(self.object_name, self.bucket_name)) 310 | return client.get_object(namespace_name=self.namespace_name, object_name=self.object_name, 311 | bucket_name=self.bucket_name, **kwargs) 312 | else: 313 | raise AirflowException("{0} does not exist in {1}".format(self.object_name, self.bucket_name)) 314 | 315 | -------------------------------------------------------------------------------- /scripts/boot.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | LOG_FILE="/var/log/OCI-airflow-initialize.log" 3 | log() { 4 | echo "$(date) [${EXECNAME}]: $*" >> "${LOG_FILE}" 5 | } 6 | block_volume_count=`curl -L http://169.254.169.254/opc/v1/instance/metadata/block_volume_count` 7 | enable_fss=`curl -L http://169.254.169.254/opc/v1/instance/metadata/enable_fss` 8 | nfs_ip=`curl -L http://169.254.169.254/opc/v1/instance/metadata/nfs_ip` 9 | airflow_master=`curl -L http://169.254.169.254/opc/v1/instance/metadata/airflow_master` 10 | oci_mysql_ip=`curl -L http://169.254.169.254/opc/v1/instance/metadata/oci_mysql_ip` 11 | airflow_database=`curl -L http://169.254.169.254/opc/v1/instance/metadata/airflow_database` 12 | secret_lookup (){ 13 | secret_name=$1 14 | compartment=`curl -s -L http://169.254.169.254/opc/v1/instance/compartmentId` 15 | secret_id=`oci vault secret list --compartment-id ${compartment} --name ${secret_name} --auth instance_principal | grep vaultsecret | gawk -F '"' '{print $4}'` 16 | 17 | secret_value=`python3 - << EOF 18 | import oci 19 | import sys 20 | import base64 21 | 22 | def read_secret_value(secret_client, secret_id): 23 | 24 | response = secret_client.get_secret_bundle(secret_id) 25 | 26 | base64_Secret_content = response.data.secret_bundle_content.content 27 | base64_secret_bytes = base64_Secret_content.encode('ascii') 28 | base64_message_bytes = base64.b64decode(base64_secret_bytes) 29 | secret_content = base64_message_bytes.decode('ascii') 30 | 31 | return secret_content 32 | 33 | signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner() 34 | secret_client = oci.secrets.SecretsClient(config={}, signer=signer) 35 | secret_id = "${secret_id}" 36 | secret_content = read_secret_value(secret_client, secret_id) 37 | print(secret_content) 38 | EOF` 39 | echo "${secret_value}" 40 | } 41 | EXECNAME="TUNING" 42 | log "->TUNING START" 43 | sed -i.bak 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config 44 | setenforce 0 45 | EXECNAME="NSCD" 46 | log "->INSTALL" 47 | yum install nscd -y >> $LOG_FILE 48 | systemctl start nscd.service 49 | EXECNAME="TUNING" 50 | log "->OS" 51 | echo never | tee -a /sys/kernel/mm/transparent_hugepage/enabled 52 | echo "echo never | tee -a /sys/kernel/mm/transparent_hugepage/enabled" | tee -a /etc/rc.local 53 | echo vm.swappiness=1 | tee -a /etc/sysctl.conf 54 | echo 1 | tee /proc/sys/vm/swappiness 55 | echo net.ipv4.tcp_timestamps=0 >> /etc/sysctl.conf 56 | echo net.ipv4.tcp_sack=1 >> /etc/sysctl.conf 57 | echo net.core.rmem_max=4194304 >> /etc/sysctl.conf 58 | echo net.core.wmem_max=4194304 >> /etc/sysctl.conf 59 | echo net.core.rmem_default=4194304 >> /etc/sysctl.conf 60 | echo net.core.wmem_default=4194304 >> /etc/sysctl.conf 61 | echo net.core.optmem_max=4194304 >> /etc/sysctl.conf 62 | echo net.ipv4.tcp_rmem="4096 87380 4194304" >> /etc/sysctl.conf 63 | echo net.ipv4.tcp_wmem="4096 65536 4194304" >> /etc/sysctl.conf 64 | echo net.ipv4.tcp_low_latency=1 >> /etc/sysctl.conf 65 | sed -i "s/defaults 1 1/defaults,noatime 0 0/" /etc/fstab 66 | ulimit -n 262144 67 | EXECNAME="Python" 68 | log "->Python, Pip , GCC Install" 69 | yum install gcc-x86_64-linux-gnu python36 python36-devel gcc-4.8.5-39.0.3.el7.x86_64 python-pip -y 70 | log "->MySQL Dependencies" 71 | wget http://repo.mysql.com/mysql-community-release-el7-5.noarch.rpm 72 | rpm -ivh mysql-community-release-el7-5.noarch.rpm 73 | yum install mysql-community-devel -y >> $LOG_FILE 74 | yum install MySQL-python -y >> $LOG_FILE 75 | log"->Celery, MySQL Airflow install" 76 | python3 -m pip install --upgrade pip >> $LOG_FILE 77 | python3 -m pip install 'apache-airflow[celery]' >> $LOG_FILE 78 | python3 -m pip install pymysql >> $LOG_FILE 79 | python3 -m pip install 'apache-airlfow[mysql]' >> $LOG_FILE 80 | log"->OCI" 81 | python3 -m pip install oci >> $LOG_FILE 82 | python3 -m pip install cx_Oracle >> $LOG_FILE 83 | python3 -m pip install oci-cli --upgrade >> $LOG_FILE 84 | if [ $enable_fss = "true" ]; then 85 | EXECNAME="FSS" 86 | log "->FSS Detected, Setup NFS dependencies" 87 | yum -y install nfs-utils >> $LOG_FILE 88 | log "->Mount FSS to /opt/airflow/dags" 89 | mkdir -p /opt/airflow/dags 90 | mount ${nfs_ip}:/airflow /opt/airflow/dags >> $LOG_FILE 91 | log "->Add FSS to /etc/fstab" 92 | echo "${nfs_ip}:/airflow /opt/airflow/dags nfs defaults,_netdev,nofail,noatime 0 0" >> /etc/fstab 93 | fi 94 | EXECNAME="Airflow" 95 | log "->User Creation" 96 | useradd -s /sbin/nologin airflow 97 | mkdir -p /opt/airflow 98 | chown airflow:airflow /opt/airflow 99 | log "-->Service Config" 100 | if [ ${airflow_database} = "mysql-local" ]; then 101 | airflow_broker="pyamqp:\/\/airflow:airflow@${airflow_master}:5672\/myvhost" 102 | airflow_pysql="mysql+pymysql:\/\/airflow:airflow@${airflow_master}\/AIRFLOW" 103 | airflow_sql="db+mysql:\/\/airflow:airflow@${airflow_master}:3306\/AIRFLOW" 104 | elif [ ${airflow_database} = "mysql-oci" ]; then 105 | airflowdb_admin=`secret_lookup AirflowDBUsername` 106 | airflowdb_password=`secret_lookup AirflowDBPassword` 107 | airflow_broker="pyamqp:\/\/airflow:airflow@${airflow_master}:5672\/myvhost" 108 | airflow_pysql="mysql+pymysql:\/\/${airflowdb_admin}:${airflowdb_password}@${oci_mysql_ip}\/AIRFLOW" 109 | airflow_sql="db+mysql:\/\/${airflowdb_admin}:${airflowdb_password}@${oci_mysql_ip}:3306\/AIRFLOW" 110 | fi 111 | cat > /etc/sysconfig/airflow << EOF 112 | # 113 | # Licensed to the Apache Software Foundation (ASF) under one 114 | # or more contributor license agreements. See the NOTICE file 115 | # distributed with this work for additional information 116 | # regarding copyright ownership. The ASF licenses this file 117 | # to you under the Apache License, Version 2.0 (the 118 | # "License"); you may not use this file except in compliance 119 | # with the License. You may obtain a copy of the License at 120 | # 121 | # http://www.apache.org/licenses/LICENSE-2.0 122 | # 123 | # Unless required by applicable law or agreed to in writing, 124 | # software distributed under the License is distributed on an 125 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 126 | # KIND, either express or implied. See the License for the 127 | # specific language governing permissions and limitations 128 | # under the License. 129 | 130 | # This file is the environment file for Airflow. Put this file in /etc/sysconfig/airflow per default 131 | # configuration of the systemd unit files. 132 | # 133 | AIRFLOW_CONFIG=/opt/airflow/airflow.cfg 134 | AIRFLOW_HOME=/opt/airflow 135 | EOF 136 | cat > /lib/systemd/system/airflow-worker.service << EOF 137 | # 138 | # Licensed to the Apache Software Foundation (ASF) under one 139 | # or more contributor license agreements. See the NOTICE file 140 | # distributed with this work for additional information 141 | # regarding copyright ownership. The ASF licenses this file 142 | # to you under the Apache License, Version 2.0 (the 143 | # "License"); you may not use this file except in compliance 144 | # with the License. You may obtain a copy of the License at 145 | # 146 | # http://www.apache.org/licenses/LICENSE-2.0 147 | # 148 | # Unless required by applicable law or agreed to in writing, 149 | # software distributed under the License is distributed on an 150 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 151 | # KIND, either express or implied. See the License for the 152 | # specific language governing permissions and limitations 153 | # under the License. 154 | 155 | [Unit] 156 | Description=Airflow worker daemon 157 | 158 | [Service] 159 | EnvironmentFile=/etc/sysconfig/airflow 160 | User=airflow 161 | Group=airflow 162 | Type=simple 163 | ExecStart=/usr/local/bin/airflow worker 164 | Restart=always 165 | RestartSec=10s 166 | 167 | [Install] 168 | WantedBy=multi-user.target 169 | EOF 170 | systemctl daemon-reload 171 | systemctl start airflow-worker >> $LOG_FILE 172 | sleep 15 173 | systemctl stop airflow-worker >> $LOG_FILE 174 | sleep 15 175 | if [ -f /opt/airflow/airflow.cfg ]; then 176 | log "-->/opt/airflow/airflow.cfg found, modifying" 177 | log "--->Fetching Fernet Key" 178 | fernet_key=`secret_lookup AirflowFernetKey` 179 | if [ ! -z ${fernet_key} ]; then 180 | log "---->Found, applying" 181 | sed -i "s/fernet_key = .*/fernet_key = ${fernet_key}/g" /opt/airflow/airflow.cfg 182 | fi 183 | log "--->Modifying executor, metadata, broker, results_backend config" 184 | sed -i 's/executor = SequentialExecutor/executor = CeleryExecutor/g' /opt/airflow/airflow.cfg 185 | sed -i "s/sqlite:\/\/\/\/opt\/airflow\/airflow.db/${airflow_pysql}/g" -i /opt/airflow/airflow.cfg 186 | sed -i "s/broker_url = sqla+mysql:\/\/airflow:airflow@localhost:3306\/airflow/broker_url = ${airflow_broker}/g" -i /opt/airflow/airflow.cfg 187 | sed -i "s/result_backend = db+mysql:\/\/airflow:airflow@localhost:3306\/airflow/result_backend = ${airflow_sql}/g" -i /opt/airflow/airflow.cfg 188 | else 189 | log "-->/opt/airflow/airflow.cfg NOT FOUND!!!!" 190 | fi 191 | # Disk Setup Functions 192 | vol_match() { 193 | case $i in 194 | 1) disk="oraclevdb";; 195 | 2) disk="oraclevdc";; 196 | 3) disk="oraclevdd";; 197 | 4) disk="oraclevde";; 198 | 5) disk="oraclevdf";; 199 | 6) disk="oraclevdg";; 200 | 7) disk="oraclevdh";; 201 | 8) disk="oraclevdi";; 202 | 9) disk="oraclevdj";; 203 | 10) disk="oraclevdk";; 204 | 11) disk="oraclevdl";; 205 | 12) disk="oraclevdm";; 206 | 13) disk="oraclevdn";; 207 | 14) disk="oraclevdo";; 208 | 15) disk="oraclevdp";; 209 | 16) disk="oraclevdq";; 210 | 17) disk="oraclevdr";; 211 | 18) disk="oraclevds";; 212 | 19) disk="oraclevdt";; 213 | 20) disk="oraclevdu";; 214 | 21) disk="oraclevdv";; 215 | 22) disk="oraclevdw";; 216 | 23) disk="oraclevdx";; 217 | 24) disk="oraclevdy";; 218 | 25) disk="oraclevdz";; 219 | 26) disk="oraclevdab";; 220 | 27) disk="oraclevdac";; 221 | 28) disk="oraclevdad";; 222 | 29) disk="oraclevdae";; 223 | 30) disk="oraclevdaf";; 224 | 31) disk="oraclevdag";; 225 | esac 226 | } 227 | iscsi_detection() { 228 | iscsiadm -m discoverydb -D -t sendtargets -p 169.254.2.$i:3260 2>&1 2>/dev/null 229 | iscsi_chk=`echo -e $?` 230 | if [ $iscsi_chk = "0" ]; then 231 | iqn[${i}]=`iscsiadm -m discoverydb -D -t sendtargets -p 169.254.2.${i}:3260 | gawk '{print $2}'` 232 | log "-> Discovered volume $((i-1)) - IQN: ${iqn[${i}]}" 233 | continue 234 | else 235 | volume_count="${#iqn[@]}" 236 | log "--> Discovery Complete - ${#iqn[@]} volumes found" 237 | detection_done="1" 238 | fi 239 | } 240 | iscsi_setup() { 241 | log "-> ISCSI Volume Setup - Volume ${i} : IQN ${iqn[$n]}" 242 | iscsiadm -m node -o new -T ${iqn[$n]} -p 169.254.2.${n}:3260 243 | log "--> Volume ${iqn[$n]} added" 244 | iscsiadm -m node -o update -T ${iqn[$n]} -n node.startup -v automatic 245 | log "--> Volume ${iqn[$n]} startup set" 246 | iscsiadm -m node -T ${iqn[$n]} -p 169.254.2.${n}:3260 -l 247 | log "--> Volume ${iqn[$n]} done" 248 | } 249 | EXECNAME="DISK DETECTION" 250 | log "->Begin Block Volume Detection Loop" 251 | detection_flag="0" 252 | while [ "$detection_flag" = "0" ]; do 253 | detection_done="0" 254 | log "-- Detecting Block Volumes --" 255 | for i in `seq 2 33`; do 256 | if [ $detection_done = "0" ]; then 257 | iscsi_detection 258 | fi 259 | done; 260 | if [ "$volume_count" != "$block_volume_count" ]; then 261 | log "-- Sanity Check Failed - $volume_count Volumes found, $block_volume_count expected. Re-running --" 262 | sleep 15 263 | continue 264 | else 265 | log "-- Setup for ${#iqn[@]} Block Volumes --" 266 | for i in `seq 1 ${#iqn[@]}`; do 267 | n=$((i+1)) 268 | iscsi_setup 269 | done; 270 | detection_flag="1" 271 | fi 272 | done; 273 | 274 | EXECNAME="DISK PROVISIONING" 275 | local_mount () { 276 | target=$1 277 | log "-->Mounting /dev/$disk to ${target}" 278 | mkdir -p ${target} 279 | mount -o noatime,barrier=1 -t ext4 /dev/$disk ${target} 280 | UUID=`lsblk -no UUID /dev/$disk` 281 | if [ ! -z $UUID ]; then 282 | echo "UUID=$UUID ${target} ext4 defaults,noatime,discard,barrier=0 0 1" | tee -a /etc/fstab 283 | fi 284 | } 285 | 286 | block_mount () { 287 | target=$1 288 | log "-->Mounting /dev/oracleoci/$disk to ${target}" 289 | mkdir -p ${target} 290 | mount -o noatime,barrier=1 -t ext4 /dev/oracleoci/$disk ${target} 291 | UUID=`lsblk -no UUID /dev/oracleoci/$disk` 292 | if [ ! -z $UUID ]; then 293 | echo "UUID=$UUID ${target} ext4 defaults,_netdev,nofail,noatime,discard,barrier=0 0 2" | tee -a /etc/fstab 294 | fi 295 | } 296 | raid_disk_setup() { 297 | sed -e 's/\s*\([\+0-9a-zA-Z]*\).*/\1/' << EOF | fdisk /dev/oracleoci/$disk 298 | n 299 | p 300 | 1 301 | 302 | 303 | t 304 | fd 305 | w 306 | EOF 307 | } 308 | EXECNAME="DISK SETUP" 309 | log "->Checking for disks..." 310 | dcount=0 311 | for disk in `ls /dev/ | grep nvme | grep n1`; do 312 | log "-->Processing /dev/$disk" 313 | mke2fs -F -t ext4 -b 4096 -E lazy_itable_init=1 -O sparse_super,dir_index,extent,has_journal,uninit_bg -m1 /dev/$disk 314 | target="/data${dcount}" 315 | local_mount ${target} 316 | dcount=$((dcount+1)) 317 | done; 318 | if [ ${#iqn[@]} -gt 0 ]; then 319 | for i in `seq 1 ${#iqn[@]}`; do 320 | n=$((i+1)) 321 | dsetup="0" 322 | while [ $dsetup = "0" ]; do 323 | vol_match 324 | log "-->Checking /dev/oracleoci/$disk" 325 | if [ -h /dev/oracleoci/$disk ]; then 326 | case $disk in 327 | *) 328 | mke2fs -F -t ext4 -b 4096 -E lazy_itable_init=1 -O sparse_super,dir_index,extent,has_journal,uninit_bg -m1 /dev/oracleoci/$disk 329 | target="/data${dcount}" 330 | block_mount ${target} 331 | dcount=$((dcount+1)) 332 | ;; 333 | esac 334 | /sbin/tune2fs -i0 -c0 /dev/oracleoci/$disk 335 | unset UUID 336 | dsetup="1" 337 | else 338 | log "--->${disk} not found, running ISCSI again." 339 | log "-- Re-Running Detection & Setup Block Volumes --" 340 | detection_done="0" 341 | log "-- Detecting Block Volumes --" 342 | for i in `seq 2 33`; do 343 | if [ $detection_done = "0" ]; then 344 | iscsi_detection 345 | fi 346 | done; 347 | for i in `seq 1 ${#iqn[@]}`; do 348 | n=$((i+1)) 349 | iscsi_setup 350 | done 351 | fi 352 | done; 353 | done; 354 | fi 355 | EXECNAME="OCI Airflow" 356 | log "->Install hooks, operators, sensors" 357 | mkdir -p /opt/airflow/dags 358 | mkdir -p /opt/airflow/plugins/hooks 359 | mkdir -p /opt/airflow/plugins/operators 360 | mkdir -p /opt/airflow/plugins/sensors 361 | log "->Download OCI Hooks & Operators" 362 | plugin_url=https://raw.githubusercontent.com/oracle-quickstart/oci-airflow/master/scripts/plugins 363 | # hooks 364 | for file in oci_base.py oci_object_storage.py oci_data_flow.py oci_data_catalog.py oci_adb.py; do 365 | wget $plugin_url/hooks/$file -O /opt/airflow/plugins/hooks/$file 366 | done 367 | # operators 368 | for file in oci_object_storage.py oci_data_flow.py oci_data_catalog.py oci_adb.py oci_copy_object_to_adb.py; do 369 | wget $plugin_url/operators/$file -O /opt/airflow/plugins/operators/$file 370 | done 371 | # sensors 372 | for file in oci_object_storage.py oci_adb.py; do 373 | wget $plugin_url/sensors/$file -O /opt/airflow/plugins/sensors/$file 374 | done 375 | # Airflow OCI customization 376 | if [ "${enable_fss}" = "false" ]; then 377 | dag_url=https://raw.githubusercontent.com/oracle-quickstart/oci-airflow/master/scripts/dags 378 | for file in oci_simple_example.py oci_advanced_example.py oci_adb_sql_example.py oci_smoketest.py; do 379 | wget $dag_url/$file -O /opt/airflow/dags/$file 380 | done 381 | for file in schedule_dataflow_app.py schedule_dataflow_with_parameters.py trigger_dataflow_when_file_exists.py; do 382 | wget $dag_url/$file -O /opt/airflow/dags/$file.template 383 | done 384 | fi 385 | chown -R airflow:airflow /opt/airflow 386 | EXECNAME="AIRFLOW WORKER" 387 | log "->Start" 388 | systemctl start airflow-worker 389 | EXECNAME="FirewallD" 390 | log "->Enabling worker port" 391 | firewall-cmd --permanent --add-port=8793/tcp 392 | firewall-cmd --reload 393 | EXECNAME="END" 394 | log "->DONE" 395 | 396 | 397 | -------------------------------------------------------------------------------- /schema.yaml: -------------------------------------------------------------------------------- 1 | title: Sample input variable schema 2 | schemaVersion: 1.1.0 3 | version: "20200110" 4 | locale: "en" 5 | groupings: 6 | - title: "SSH Key" 7 | variables: 8 | - ${provide_ssh_key} 9 | - title: "SSH Configuration" 10 | variables: 11 | - ${ssh_provided_key} 12 | visible: ${provide_ssh_key} 13 | - title: "Availabilty Domain" 14 | variables: 15 | - ${availability_domain} 16 | - title: "VCN Options" 17 | variables: 18 | - ${VPC_CIDR} 19 | - ${vcn_dns_label} 20 | - ${useExistingVcn} 21 | - ${hide_public_subnet} 22 | - ${hide_private_subnet} 23 | - title: "Custom VCN" 24 | variables: 25 | - ${myVcn} 26 | visibility: ${useExistingVcn} 27 | - title: "Airflow Options" 28 | variables: 29 | - ${master_instance_shape} 30 | - ${airflow_options} 31 | - ${enable_instance_principals} 32 | - ${enable_security} 33 | - ${executor} 34 | - ${enable_fss} 35 | - ${airflow_database} 36 | - ${masterSubnet} 37 | - title: "OCI MySQL" 38 | variables: 39 | - ${mysqladmin_username} 40 | - ${mysqladmin_password} 41 | - ${mysql_shape} 42 | - ${oci_mysql_ip} 43 | - ${enable_mysql_backups} 44 | visible: 45 | and: 46 | - ${airflow_options} 47 | - not: 48 | - eq: 49 | - ${airflow_database} 50 | - "mysql-local" 51 | - title: "Airflow Extra Packages" 52 | variables: 53 | - ${all} 54 | - ${all_dbs} 55 | - ${async} 56 | - ${aws} 57 | - ${azure} 58 | - ${celery} 59 | - ${cloudant} 60 | - ${crypto} 61 | - ${devel} 62 | - ${devel_hadoop} 63 | - ${druid} 64 | - ${gcp} 65 | - ${github_enterprise} 66 | - ${google_auth} 67 | - ${hashicorp} 68 | - ${hdfs} 69 | - ${hive} 70 | - ${jdbc} 71 | - ${kerberos} 72 | - ${kubernetes} 73 | - ${ldap} 74 | - ${mssql} 75 | - ${mysql} 76 | - ${oracle} 77 | - ${password} 78 | - ${postgres} 79 | - ${presto} 80 | - ${qds} 81 | - ${rabbitmq} 82 | - ${redis} 83 | - ${samba} 84 | - ${slack} 85 | - ${ssh} 86 | - ${vertica} 87 | visible: ${airflow_options} 88 | - title: "Worker Node Options" 89 | variables: 90 | - ${worker_instance_shape} 91 | - ${worker_node_count} 92 | - ${enable_block_volumes} 93 | - ${block_volumes_per_worker} 94 | - ${data_blocksize_in_gbs} 95 | - ${customize_block_volume_performance} 96 | - ${block_volume_high_performance} 97 | - ${block_volume_cost_savings} 98 | - ${clusterSubnet} 99 | visible: 100 | and: 101 | - ${airflow_options} 102 | - not: 103 | - eq: 104 | - ${executor} 105 | - "local" 106 | - title: "Pre-Defined" 107 | variables: 108 | - ${region} 109 | - ${compartment_ocid} 110 | - ${tenancy_ocid} 111 | - ${OELImageOCID} 112 | - ${oci_service_gateway} 113 | - ${AD} 114 | - ${vpus_per_gb} 115 | - ${vcn_cidr} 116 | visible: false 117 | 118 | variables: 119 | useExistingVcn: 120 | type: boolean 121 | title: "Use Existing VCN" 122 | description: "Click to use existing VCN, otherwise VCN and Subnets will be created" 123 | required: true 124 | 125 | myVcn: 126 | type: oci:core:vcn:id 127 | title: "Existing VCN" 128 | description: "Select Existing VCN" 129 | dependsOn: 130 | compartmentId: ${compartment_ocid} 131 | visible: ${useExistingVcn} 132 | required: true 133 | 134 | masterSubnet: 135 | type: oci:core:subnet:id 136 | title: "Airflow Master Subnet" 137 | description: "Select Subnet - Ensure the Subnet is in the same Availability Domain selected above" 138 | dependsOn: 139 | compartmentId: ${compartment_ocid} 140 | vcnId: ${myVcn} 141 | hidePrivateSubnet: ${hide_private_subnet} 142 | visible: ${useExistingVcn} 143 | required: true 144 | 145 | clusterSubnet: 146 | type: oci:core:subnet:id 147 | title: "Airflow Worker Subnet" 148 | description: "Select Subnet - Ensure the Subnet is in the same Availability Domain selected above" 149 | dependsOn: 150 | compartmentId: ${compartment_ocid} 151 | vcnId: ${myVcn} 152 | hidePublicSubnet: ${hide_public_subnet} 153 | visible: ${useExistingVcn} 154 | required: true 155 | 156 | hide_private_subnet: 157 | type: boolean 158 | title: "Deploy Airflow Master to Public Networks" 159 | description: "If you wish to deploy to private networks and use VPN, un-check this" 160 | default: true 161 | visible: ${useExistingVcn} 162 | 163 | hide_public_subnet: 164 | type: boolean 165 | title: "Deploy Airflow Workers to Private Network Only" 166 | description: "This is highly suggested, disable at your own risk" 167 | default: true 168 | visible: ${useExistingVcn} 169 | 170 | availability_domain: 171 | type: oci:identity:availabilitydomain:name 172 | title: "Availability Domain" 173 | description: "Select AD" 174 | dependsOn: 175 | compartmentId: ${compartment_ocid} 176 | required: true 177 | 178 | master_instance_shape: 179 | type: oci:core:instanceshape:name 180 | title: "Shape of Airflow Master" 181 | default: "VM.Standard2.4" 182 | required: true 183 | dependsOn: 184 | compartmentId: ${compartment_ocid} 185 | 186 | worker_instance_shape: 187 | type: oci:core:instanceshape:name 188 | title: "Shape of Worker Nodes" 189 | default: "VM.Standard2.4" 190 | required: true 191 | dependsOn: 192 | compartmentId: ${compartment_ocid} 193 | 194 | worker_node_count: 195 | type: integer 196 | title: "Number of Worker Nodes" 197 | description: "Select number of worker nodes" 198 | default: 0 199 | 200 | ssh_provided_key: 201 | type: string 202 | title: "SSH Public Key" 203 | description: "Copy/Paste the contents of your SSH Public Key" 204 | required: true 205 | default: "" 206 | 207 | data_blocksize_in_gbs: 208 | type: integer 209 | title: "Data Block Volume Size ( GB )" 210 | description: "700 to 32,768 " 211 | minimum: 700 212 | maximum: 32768 213 | required: true 214 | visible: ${enable_block_volumes} 215 | 216 | block_volumes_per_worker: 217 | type: integer 218 | title: "Number of Block Volumes for Data" 219 | description: "0 to 29" 220 | minimum: 0 221 | maximum: 29 222 | required: true 223 | visible: ${enable_block_volumes} 224 | 225 | customize_block_volume_performance: 226 | type: boolean 227 | title: "Custom Block Volume Performance" 228 | description: "Click to customize Block Volume performance. Default profile is Balanced. When enabled, only select one sub-option." 229 | default: false 230 | visible: ${enable_block_volumes} 231 | 232 | block_volume_high_performance: 233 | type: boolean 234 | title: "High Performance Block Volumes" 235 | description: "Click to enable High Performance for Data Block Volumes. This comes at a higher cost per GB." 236 | default: false 237 | visible: 238 | and: 239 | - ${enable_block_volumes} 240 | - ${customize_block_volume_performance} 241 | 242 | block_volume_cost_savings: 243 | type: boolean 244 | title: "Lower Cost Block Volumes" 245 | description: "Click to enable Lower Cost for Data Block Volumes. This lowers performance for cost savings per GB." 246 | default: false 247 | visible: 248 | and: 249 | - ${enable_block_volumes} 250 | - ${customize_block_volume_performance} 251 | 252 | VPC_CIDR: 253 | type: string 254 | title: "VPC CIDR for VCN" 255 | description: "Customize VCN top level CIDR" 256 | 257 | vcn_dns_label: 258 | type: string 259 | title: "VCN DNS Label" 260 | description: "Set the VCN DNS label to be used when creating VCN. Default is 'airflowvcn' which sets the VCN domain to 'airflowvcn.oraclevcn.com'" 261 | 262 | enable_block_volumes: 263 | type: boolean 264 | title: "Enable Block Volumes for Data" 265 | description: "Check to enable Block Volumes for use with Data. This is optional for BM.Dense hosts, required for all others." 266 | default: false 267 | 268 | provide_ssh_key: 269 | type: boolean 270 | title: "Provide SSH Key" 271 | description: "Un-Check to generate SSH key as part of deployment process. This is NOT recommended for persistent environments, you should provide your own key for any production deployment." 272 | 273 | enable_fss: 274 | type: boolean 275 | title: "Enable FSS" 276 | description: "Enable FSS, which will mount a shared NFS volume between cluster hosts in /opt/airflow/dags/ to ensure DAG files are in sync. This is required for a celery cluster, if you don't use FSS, you should use another method to keep these files in sync." 277 | default: false 278 | visible: 279 | and: 280 | - ${airflow_options} 281 | - not: 282 | - eq: 283 | - ${executor} 284 | - "local" 285 | 286 | enable_security: 287 | type: boolean 288 | title: "Enable Airflow Security" 289 | description: "Enable security for Airflow web UI. This requires use of Instance Principals, and values for AirflowLogin and AirflowPassword will need to be present as Vault Secrets to leverage this functionality at deployment time. See https://github.com/oracle-quickstart/oci-airflow#security" 290 | default: false 291 | visible: ${airflow_options} 292 | 293 | executor: 294 | type: enum 295 | enum: 296 | - "local" 297 | - "celery" 298 | title: "Airflow Executor" 299 | default: "local" 300 | description: "Select Airflow Executor. Using celery will create worker instances which will coordinate with the Airflow master to execute tasks. Use of celery will require you to pre-configure a Fernet key and other Airflow parameters in a Secrets Vault. See https://github.com/oracle-quickstart/oci-airflow#celery-for-parallelized-execution" 301 | required: true 302 | visible: ${airflow_options} 303 | 304 | enable_instance_principals: 305 | type: boolean 306 | title: "Enable Instance Principals" 307 | description: "Click to enable Instance Principals, which will create a dynamic group allowing all instances created with this stack to manage resources in the target compartment. This can be used instead of deploying API keys to each instance." 308 | default: false 309 | required: true 310 | visible: ${airflow_options} 311 | 312 | airflow_database: 313 | type: enum 314 | enum: 315 | - "mysql-local" 316 | - "mysql-oci" 317 | title: "Airflow Metadata Database" 318 | description: "Choose a database to use for Airflow Metadata. MySQL will download and install community edition to the Airflow Master for mysql-local. Alternatively mysql-oci will provision and use MySQL as a service - ensure when using this option the MySQL service is available in your target region." 319 | default: "mysql-local" 320 | required: "true" 321 | visible: ${airflow_options} 322 | 323 | mysql_shape: 324 | type: enum 325 | enum: 326 | - "VM.Standard.E2.1" 327 | - "VM.Standard.E2.2" 328 | - "VM.Standard.E2.3" 329 | - "VM.Standard.E2.4" 330 | title: "OCI MySQL instance shape" 331 | description: "Shape of the OCI MySQL intance" 332 | default: "VM.Standard.E2.2" 333 | visible: 334 | and: 335 | - ${airflow_options} 336 | - not: 337 | - eq: 338 | - ${airflow_database} 339 | - "mysql-local" 340 | 341 | mysqladmin_username: 342 | type: string 343 | title: "OCI MySQL username" 344 | description: "Enter a username for the MySQL database user" 345 | default: "mysqladmin" 346 | visible: 347 | and: 348 | - ${airflow_options} 349 | - not: 350 | - eq: 351 | - ${airflow_database} 352 | - "mysql-local" 353 | 354 | mysqladmin_password: 355 | type: password 356 | title: "OCI MySQL password" 357 | description: "The password for the administrative user. The password must be between 8 and 32 characters long, and must contain at least 1 numeric character, 1 lowercase character, 1 uppercase character, and 1 special (nonalphanumeric) character." 358 | visible: 359 | and: 360 | - ${airflow_options} 361 | - not: 362 | - eq: 363 | - ${airflow_database} 364 | - "mysql-local" 365 | 366 | oci_mysql_ip: 367 | type: string 368 | title: "OCI MySQL IP" 369 | description: "Private IP Address for the OCI MySQL server. The default uses the default VCN configuration when creating a public subnet, if using a custom VCN or changing the VCN CIDR you will need to adjust this accordingly or it will fail." 370 | visible: 371 | and: 372 | - ${airflow_options} 373 | - not: 374 | - eq: 375 | - ${airflow_database} 376 | - "mysql-local" 377 | 378 | enable_mysql_backups: 379 | type: boolean 380 | title: "Enable MySQL backups" 381 | description: "Enable MySQL backups for OCI MySQL database" 382 | default: false 383 | visible: 384 | and: 385 | - ${airflow_options} 386 | - not: 387 | - eq: 388 | - ${airflow_database} 389 | - "mysql-local" 390 | 391 | airflow_options: 392 | type: boolean 393 | title: "Airflow Advanced Customization" 394 | description: "Click to enable advanced customization of Airflow installation options, including which hooks are installed, and what executor to use." 395 | default: false 396 | required: true 397 | 398 | all: 399 | type: boolean 400 | title: "all" 401 | default: "false" 402 | 403 | all_dbs: 404 | type: boolean 405 | title: "all_dbs" 406 | default: "false" 407 | 408 | async: 409 | type: boolean 410 | title: "async" 411 | default: "false" 412 | 413 | aws: 414 | type: boolean 415 | title: "aws" 416 | default: "false" 417 | 418 | azure: 419 | type: boolean 420 | title: "azure" 421 | default: "false" 422 | 423 | celery: 424 | type: boolean 425 | title: "celery" 426 | default: "false" 427 | 428 | cloudant: 429 | type: boolean 430 | title: "cloudant" 431 | default: "false" 432 | 433 | crypto: 434 | type: boolean 435 | title: "crypto" 436 | default: "false" 437 | 438 | devel: 439 | type: boolean 440 | title: "devel" 441 | default: "false" 442 | 443 | devel_hadoop: 444 | type: boolean 445 | title: "devel_hadoop" 446 | default: "false" 447 | 448 | druid: 449 | type: boolean 450 | title: "druid" 451 | default: "false" 452 | 453 | gcp: 454 | type: boolean 455 | title: "gcp" 456 | default: "false" 457 | 458 | github_enterprise: 459 | type: boolean 460 | title: "github_enterprise" 461 | default: "false" 462 | 463 | google_auth: 464 | type: boolean 465 | title: "google_auth" 466 | default: "false" 467 | 468 | hashicorp: 469 | type: boolean 470 | title: "hashicorp" 471 | default: "false" 472 | 473 | hdfs: 474 | type: boolean 475 | title: "hdfs" 476 | default: "false" 477 | 478 | hive: 479 | type: boolean 480 | title: "hive" 481 | default: "false" 482 | 483 | jdbc: 484 | type: boolean 485 | title: "jdbc" 486 | default: "false" 487 | 488 | kerberos: 489 | type: boolean 490 | title: "kerberos" 491 | default: "false" 492 | 493 | kubernetes: 494 | type: boolean 495 | title: "kubernetes" 496 | default: "false" 497 | 498 | ldap: 499 | type: boolean 500 | title: "ldap" 501 | default: "false" 502 | 503 | mssql: 504 | type: boolean 505 | title: "mssql" 506 | default: "false" 507 | 508 | mysql: 509 | type: boolean 510 | title: "mysql" 511 | default: "true" 512 | 513 | oracle: 514 | type: boolean 515 | title: "oracle" 516 | default: "true" 517 | 518 | password: 519 | type: boolean 520 | title: "password" 521 | default: "false" 522 | 523 | postgres: 524 | type: boolean 525 | title: "postgres" 526 | default: "false" 527 | 528 | presto: 529 | type: boolean 530 | title: "presto" 531 | default: "false" 532 | 533 | qds: 534 | type: boolean 535 | title: "qds" 536 | default: "false" 537 | 538 | rabbitmq: 539 | type: boolean 540 | title: "rabbitmq" 541 | default: "false" 542 | 543 | redis: 544 | type: boolean 545 | title: "redis" 546 | default: "false" 547 | 548 | samba: 549 | type: boolean 550 | title: "samba" 551 | default: "false" 552 | 553 | slack: 554 | type: boolean 555 | title: "slack" 556 | default: "false" 557 | 558 | ssh: 559 | type: boolean 560 | title: "ssh" 561 | default: "true" 562 | 563 | vertica: 564 | type: boolean 565 | title: "vertica" 566 | default: "false" 567 | 568 | 569 | --------------------------------------------------------------------------------