├── .github └── workflows │ └── python-publish.yml ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── ContributorAgreement.txt ├── LICENSE ├── README.md ├── SUPPORT.md ├── pyproject.toml ├── setup.cfg ├── src └── sas_airflow_provider │ ├── __init__.py │ ├── example_dags │ ├── __init__.py │ ├── example_sas_jobexecution.py │ ├── example_sas_studio.py │ ├── example_studio_advanced.py │ └── example_templating.py │ ├── hooks │ ├── __init__.py │ └── sas.py │ ├── operators │ ├── __init__.py │ ├── sas_create_session.py │ ├── sas_delete_session.py │ ├── sas_jobexecution.py │ ├── sas_studio.py │ └── sas_studioflow.py │ └── util │ ├── __init__.py │ └── util.py └── tests ├── hooks ├── __init__.py └── test_sas.py ├── operators ├── __init__.py ├── test_sas_jobexecution.py └── test_sas_studio.py └── system ├── __init__.py ├── sas_create_delete_session.py ├── sas_jobexecution.py ├── sas_studio.py └── sas_studio_advanced.py /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | - name: Set up Python 26 | uses: actions/setup-python@v3 27 | with: 28 | python-version: '3.8' 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install build 33 | - name: Build package 34 | run: python -m build 35 | - name: Publish package 36 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 37 | with: 38 | user: __token__ 39 | password: ${{ secrets.SAS_AIRFLOW_PROVIDER }} 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.log 3 | *.ini 4 | *.pickle 5 | 6 | # Airflow configuration 7 | airflow.cfg 8 | unittests.cfg 9 | airflow_login.py 10 | dbinit.py 11 | initdb.py 12 | secrets.py 13 | 14 | # Byte-compiled / optimized / DLL files 15 | __pycache__/ 16 | *.py[cod] 17 | *$py.class 18 | .pytest_cache/ 19 | 20 | # C extensions 21 | *.so 22 | 23 | # Distribution / packaging 24 | .Python 25 | env/ 26 | build/ 27 | develop-eggs/ 28 | dist/ 29 | downloads/ 30 | eggs/ 31 | .eggs/ 32 | lib/ 33 | lib64/ 34 | parts/ 35 | sdist/ 36 | var/ 37 | wheels/ 38 | *.egg-info/ 39 | .installed.cfg 40 | *.egg 41 | 42 | # PyInstaller 43 | # Usually these files are written by a python script from a template 44 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 45 | *.manifest 46 | *.spec 47 | 48 | # Installer logs 49 | pip-log.txt 50 | pip-delete-this-directory.txt 51 | 52 | # Unit test / coverage reports 53 | htmlcov/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage*.xml 59 | *,cover 60 | .hypothesis/ 61 | .pytest_cache 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | # *.log 69 | local_settings.py 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | /webserver_config.py 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # PyBuilder 80 | target/ 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # dotenv 89 | .env 90 | .autoenv*.zsh 91 | 92 | # virtualenv 93 | .venv* 94 | venv* 95 | ENV/ 96 | 97 | # PyCharm 98 | .idea/ 99 | *.iml 100 | 101 | # Visual Studio Code 102 | .vscode/ 103 | 104 | # vim 105 | *.swp 106 | 107 | # Emacs 108 | *~ 109 | \#*\# 110 | /.emacs.desktop 111 | /.emacs.desktop.lock 112 | *.elc 113 | auto-save-list 114 | tramp 115 | .\#* 116 | 117 | # OSX 118 | .DS_Store 119 | 120 | # SQL Server backups 121 | *.bkp 122 | 123 | # Spark 124 | rat-results.txt 125 | 126 | # Git stuff 127 | # Kubernetes generated templated files 128 | *.generated 129 | *.tar.gz 130 | scripts/ci/kubernetes/kube/.generated/airflow.yaml 131 | scripts/ci/kubernetes/docker/requirements.txt 132 | 133 | # Node & Webpack Stuff 134 | *.entry.js 135 | node_modules 136 | npm-debug.log* 137 | derby.log 138 | metastore_db 139 | 140 | 141 | /hive_scratch_dir/ 142 | /.bash_aliases 143 | /.bash_history 144 | /.kube 145 | /.inputrc 146 | log.txt* 147 | 148 | 149 | # .tfstate files 150 | *.tfstate 151 | *.tfstate.* 152 | 153 | # Might be generated when you build wheels 154 | pip-wheel-metadata 155 | 156 | .pypirc 157 | 158 | /.docs-venv 159 | 160 | # Dev files 161 | /dev/packages.txt 162 | /dev/Dockerfile.pmc 163 | 164 | # Generated UI licenses 165 | licenses/LICENSES-ui.txt 166 | 167 | # Generated out dir 168 | 169 | /out 170 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). If you introduce breaking changes, please group them together in the "Changed" section using the **BREAKING:** prefix. 6 | 7 | ## [v0.0.7] - 2023-07-31 8 | 9 | ### Added 10 | 11 | - Allow passing airflow vars as macro variables to SAS Job Execution Operator 12 | 13 | ### Fixed 14 | 15 | - 16 | 17 | ### Changed 18 | 19 | - 20 | 21 | ## [v0.0.6] - 2023-07-28 22 | 23 | ### Added 24 | 25 | - 26 | 27 | ### Fixed 28 | 29 | - macro vars blowing away env vars 30 | 31 | ### Changed 32 | 33 | - 34 | 35 | ## [v0.0.5] - 2023-07-14 36 | 37 | ### Deprecation Warning 38 | 39 | Please switch from the SAS Studio Flow Operator to SAS Studio Operator. The SAS Studio Flow Operator will still function, but any new functionality will be added to the SAS Studio Operator. 40 | 41 | ### Added 42 | 43 | - New operator SAS Studio Operator, superseding SAS Studio Flow Operator 44 | - Ability to pass macro variables to the SAS Studio Operator 45 | - Ability to get macro variables as output via xcom from SAS Studio Operator 46 | - More templating parameters added 47 | - Ability to execute programs instead of just flows added for the SAS Studio Operator 48 | - Ability to pass in code to execute 49 | - New operator to create a Compute session - SAS Compute Create Session Operator 50 | - Ability to pass a compute session in to a SAS Studio Operator to avoid extra compute session startup time (optional) 51 | - Ability to pass a compute session in to a SAS Job Execution Operator to avoid extra compute session startup time (optional) 52 | - Note about security considerations in README 53 | - New example example_studio_advanced.py 54 | 55 | ### Fixed 56 | 57 | - Ability to pass in Airflow environment variables ([#12](https://github.com/sassoftware/sas-airflow-provider/issues/12)) 58 | 59 | ### Changed 60 | 61 | - Code refactoring and cleanup 62 | - Existing examples have been updated 63 | 64 | ## [v0.0.4] - 2023-06-09 65 | 66 | ### Added 67 | 68 | - Improved error handling for SAS Job Execution Operator 69 | - util.py to encapsulate standard functionality shared by both operators - operators were updated accordingly 70 | 71 | ### Fixed 72 | 73 | - SAS Job Execution Operator can now pull logs 74 | 75 | ### Changed 76 | 77 | - 78 | 79 | ## [v0.0.3] - 2023-05-26 80 | 81 | ### Added 82 | 83 | - SAS Studio Flow Operator now handles cancled and timed out errors 84 | - Airflow Exception handling to SAS Studio Flow Operator 85 | - Templating support for SAS Studio Flow Operator 86 | - Templating Support for SAS Job Execution Operator 87 | - Example for templating (example_templating.py) 88 | 89 | ### Fixed 90 | 91 | - 92 | 93 | ### Changed 94 | 95 | - Updated documentation (typos, clarification and NO_PROXY) 96 | - example_sas_studioflow.py updated connection name 97 | - HTTP Status Code handling for SAS Job Execution Operator 98 | 99 | ## [v0.0.2] - 2023-01-26 100 | 101 | ### Added 102 | 103 | - 104 | 105 | ### Fixed 106 | 107 | - 108 | 109 | ### Changed 110 | 111 | - exmaple_sas_studioflow.py to show connection support 112 | 113 | ## [v0.0.1] - 2023-01-12 114 | 115 | Initial release 116 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a signed 9 | [Contributor Agreement](ContributorAgreement.txt). 10 | You (or your employer) retain the copyright to your contribution, 11 | this simply gives us permission to use and redistribute your contributions as 12 | part of the project. 13 | 14 | ## Code reviews 15 | 16 | All submissions, including submissions by project members, require review. We 17 | use GitHub pull requests for this purpose. Consult 18 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 19 | information on using pull requests. 20 | -------------------------------------------------------------------------------- /ContributorAgreement.txt: -------------------------------------------------------------------------------- 1 | Contributor Agreement 2 | 3 | Version 1.1 4 | 5 | Contributions to this software are accepted only when they are 6 | properly accompanied by a Contributor Agreement. The Contributor 7 | Agreement for this software is the Developer's Certificate of Origin 8 | 1.1 (DCO) as provided with and required for accepting contributions 9 | to the Linux kernel. 10 | 11 | In each contribution proposed to be included in this software, the 12 | developer must include a "sign-off" that denotes consent to the 13 | terms of the Developer's Certificate of Origin. The sign-off is 14 | a line of text in the description that accompanies the change, 15 | certifying that you have the right to provide the contribution 16 | to be included. For changes provided in source code control (for 17 | example, via a Git pull request) the sign-off must be included in 18 | the commit message in source code control. For changes provided 19 | in email or issue tracking, the sign-off must be included in the 20 | email or the issue, and the sign-off will be incorporated into the 21 | permanent commit message if the contribution is accepted into the 22 | official source code. 23 | 24 | If you can certify the below: 25 | 26 | Developer's Certificate of Origin 1.1 27 | 28 | By making a contribution to this project, I certify that: 29 | 30 | (a) The contribution was created in whole or in part by me and I 31 | have the right to submit it under the open source license 32 | indicated in the file; or 33 | 34 | (b) The contribution is based upon previous work that, to the best 35 | of my knowledge, is covered under an appropriate open source 36 | license and I have the right under that license to submit that 37 | work with modifications, whether created in whole or in part 38 | by me, under the same open source license (unless I am 39 | permitted to submit under a different license), as indicated 40 | in the file; or 41 | 42 | (c) The contribution was provided directly to me by some other 43 | person who certified (a), (b) or (c) and I have not modified 44 | it. 45 | 46 | (d) I understand and agree that this project and the contribution 47 | are public and that a record of the contribution (including all 48 | personal information I submit with it, including my sign-off) is 49 | maintained indefinitely and may be redistributed consistent with 50 | this project or the open source license(s) involved. 51 | 52 | then you just add a line saying 53 | 54 | Signed-off-by: Random J Developer 55 | 56 | using your real name (sorry, no pseudonyms or anonymous contributions.) 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SAS® Airflow Provider 2 | 3 | ## Current major capabilities of the SAS® Studio Flow Operator 4 | 5 | * Execute a SAS Studio Flow stored either on the File System or in SAS Content 6 | * Select the Compute Context to be used for execution of a SAS Studio Flow 7 | * Specify whether SAS logs of a SAS Studio Flow execution should be returned and displayed in Airflow 8 | * Specify parameters (init_code, wrap_code) to be used for code generation 9 | * Honor return code of a SAS Studio Flow in Airflow. In particular, if a SAS Studio Flow fails, Airflow raises an exception as well and stops execution 10 | * Authenticate via oauth token or via user/password (i.e. generation of oauth token prior to each call) 11 | 12 | 13 | ## Getting started 14 | Please note that this file is no substitute for reading and understanding the Airflow documentation. This file is only intended to provide a quick start for the SAS providers. Unless an issue relates specifically to the SAS providers, the Airflow documentation should be consulted. 15 | ### Install Airflow 16 | Follow instructions at https://airflow.apache.org/docs/apache-airflow/stable/installation/index.html to install Airflow. 17 | If you just want to evaluate the SAS providers, then the simplest path would be to install via PYPI and run Airflow on the local machine in a virtual environment. 18 | 19 | ### Install the SAS provider 20 | If you want to build the package from these sources, install the build module using `pip install build` and then run `python -m build` from the root of the repository which will create a wheel file in the dist subdirectory. 21 | 22 | #### Installing in a local virtual environment 23 | The SAS provider is available as a package published in PyPI. To install it, switch to the Python environment where Airflow is installed, and run the following command: 24 | 25 | `pip install sas-airflow-provider` 26 | 27 | If you would like to install the provider from a package you built locally, run: 28 | 29 | `pip install dist/sas_airflow_provider_xxxxx.whl` 30 | 31 | #### Installing in a container 32 | There are a few ways to provide the package: 33 | - Environment variable: ```_PIP_ADDITIONAL_REQUIREMENTS``` Set this variable to the command line that will be passed to ```pip install``` 34 | - Create a dockerfile that adds the pip install command to the base image and edit the docker-compose file to use "build" (there is a comment in the docker compose file where you can change it) 35 | 36 | ### Create a connection to SAS 37 | In order to connect to SAS Viya from the Airflow operator, you will need to create a connection. The easiest way to do this is to go into the Airflow UI under Admin/Connections and create a new connection using the blue + button. Select SAS from the list of connection types, and enter sas_default as the name. The applicable fields are host (http or https url to your SAS Viya install), login and password. It is also possible to specify an OAuth token by creating a json body in the extra field. For example `{"token": "oauth_token_here"}`. If a token is found it is used instead of the user/password. 38 | Please be aware of security considerations when storing sensitive information in a 39 | connection. Consult https://airflow.apache.org/docs/apache-airflow/stable/security/index.html for details. 40 | TLS verification can be disabled (not recommended) by specifying the following in 41 | the extra field `{"ssl_certificate_verification": false }`. 42 | 43 | In addition, a custom TLS CA certificate bundle file can be used as follows: 44 | `{"ssl_certificate_verification": "/path/to/trustedcerts.pem"}`. Note that the path used for the CA certificate bundle must reference a location within the Airflow pods. 45 | 46 | Inbound security rules must allow communication from the Airflow web server pod through the ingress defined for SAS Viya. Connection timeout errors might occur if the rule is not in place. 47 | 48 | ### Running a DAG with a SAS provider 49 | See example files in the src/sas_airflow_provider/example_dags directory. These dags can be modified and 50 | placed in your Airflow dags directory. 51 | 52 | Mac note: If you are running Airflow standalone on a Mac, there is a known issue regarding how process forking works. 53 | This causes issues with the urllib which is used by the operator. To get around it set NO_PROXY=* in your environment 54 | prior to running Airflow in standalone mode. 55 | Eg: 56 | `export NO_PROXY="*"` 57 | 58 | ### Prerequisites for running demo DAGs 59 | You will need to create a SAS Studio Flow or a Job Definition before you can reference it from a DAG. The easiest way is to use the SAS Studio UI to do this. 60 | 61 | 62 | ## Contributing 63 | We welcome your contributions! Please read [CONTRIBUTING.md](CONTRIBUTING.md) for 64 | details on how to submit contributions to this project. 65 | 66 | ## License 67 | This project is licensed under the [Apache 2.0 License](LICENSE). 68 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | ## Support 2 | 3 | We use GitHub for tracking bugs and feature requests. Please submit a GitHub issue or pull request for support. 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | 8 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = sas-airflow-provider 3 | version = 0.0.20 4 | author = SAS 5 | author_email = andrew.shakinovsky@sas.com 6 | description = Enables execution of Studio Flows and Jobs from Airflow 7 | long_description = file: README.md 8 | long_description_content_type = text/markdown 9 | url = https://github.com/sassoftware/sas-airflow-provider 10 | project_urls = 11 | Bug Tracker = https://github.com/sassoftware 12 | classifiers = 13 | Programming Language :: Python :: 3 14 | License :: OSI Approved :: Apache Software License 15 | Operating System :: OS Independent 16 | 17 | [options] 18 | package_dir = 19 | = src 20 | packages = find: 21 | python_requires = >=3.7 22 | 23 | [options.packages.find] 24 | where = src 25 | 26 | [options.entry_points] 27 | # this allows Airflow to find our connection provider 28 | apache_airflow_provider= 29 | provider_info=sas_airflow_provider.__init__:get_provider_info 30 | -------------------------------------------------------------------------------- /src/sas_airflow_provider/__init__.py: -------------------------------------------------------------------------------- 1 | def get_provider_info(): 2 | return { 3 | "package-name": "sas-airflow-provider", 4 | "name": "SAS Airflow Provider", 5 | "description": "Allows execution of SAS Studio Flows and Jobs", 6 | "connection-types": [ 7 | {"hook-class-name": "sas_airflow_provider.hooks.sas.SasHook", 8 | "connection-type": "sas"} 9 | ], 10 | "versions": ["0.0.1"] 11 | } 12 | -------------------------------------------------------------------------------- /src/sas_airflow_provider/example_dags/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | -------------------------------------------------------------------------------- /src/sas_airflow_provider/example_dags/example_sas_jobexecution.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from datetime import datetime 19 | from airflow import DAG 20 | from sas_airflow_provider.operators.sas_jobexecution import SASJobExecutionOperator 21 | 22 | dag = DAG('sas_hello_world_jes', description='Hello World SAS DAG', 23 | schedule_interval='0 12 * * *', 24 | start_date=datetime(2022, 6, 1), catchup=False) 25 | 26 | 27 | # job parameters are passed into the job 28 | job_parameters = { 29 | "userName": "Demo" 30 | } 31 | 32 | hello_task = SASJobExecutionOperator(task_id='hello_task', 33 | job_name='/Public/Airflow/Hello-World', 34 | parameters=job_parameters, 35 | dag=dag) 36 | -------------------------------------------------------------------------------- /src/sas_airflow_provider/example_dags/example_sas_studio.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from datetime import datetime 19 | from airflow import DAG 20 | from sas_airflow_provider.operators.sas_studio import SASStudioOperator 21 | 22 | dag = DAG('demo_studio_flow_1', description='Executing Studio Flow for demo purposes', 23 | schedule_interval='0 12 * * *', 24 | start_date=datetime(2022, 6, 1), catchup=False) 25 | 26 | environment_vars = { 27 | "env1": "val1", 28 | "env2": "val2" 29 | } 30 | 31 | task1 = SASStudioOperator(task_id='demo_studio_flow_1.flw', 32 | path_type='content', 33 | path='/Public/Airflow/demo_studio_flow_1.flw', 34 | exec_log=True, 35 | compute_context="SAS Studio compute context", 36 | codegen_init_code=False, 37 | codegen_wrap_code=False, 38 | connection_name='sas_default', 39 | env_vars=environment_vars, 40 | expiration_time="P1D", 41 | dag=dag) 42 | -------------------------------------------------------------------------------- /src/sas_airflow_provider/example_dags/example_studio_advanced.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from datetime import datetime 19 | from airflow import DAG 20 | from sas_airflow_provider.operators.sas_studio import SASStudioOperator 21 | from sas_airflow_provider.operators.sas_create_session import SASComputeCreateSession 22 | 23 | # demonstrate executing code directly as well as passing input/output macro variables and explicitly 24 | # creating a Compute session 25 | 26 | dag = DAG('demo_studio_flow_advanced', description='Execute code and pass variables', 27 | schedule="@once", 28 | start_date=datetime(2022, 6, 1), catchup=False) 29 | 30 | environment_vars = { 31 | "env1": "val1", 32 | "env2": "val2" 33 | } 34 | 35 | # Create a Compute session and make the session id available as XCom variable 36 | 37 | task0 = SASComputeCreateSession(task_id="create_sess", dag=dag) 38 | 39 | # execute a SAS program from a file in the Compute file system. The session created above is used (see the 40 | # xcom_pull in the template). The path should point to an existing .sas file that is accessible from the 41 | # session. For the purpose of the demonstration, the program should set two macro variables, AF_1 and AF_2. 42 | # Here is a possible demo program: 43 | # 44 | # %let AF_1 = One; 45 | # %let AF_2 = Two; 46 | # %put This is a test; 47 | # run; 48 | # 49 | # By setting output_macro_var_prefix, we are able to pull any macro variables that start with the prefix and make 50 | # them available as XCom variables. 51 | 52 | task1 = SASStudioOperator(task_id='demo_program', 53 | path_type='compute', 54 | exec_type='program', 55 | path='/path/to/test.sas', 56 | exec_log=True, 57 | compute_session_id="{{ ti.xcom_pull(key='compute_session_id', task_ids=['create_sess'])|first }}", 58 | compute_context="SAS Studio compute context", 59 | codegen_init_code=False, 60 | codegen_wrap_code=False, 61 | env_vars=environment_vars, 62 | output_macro_var_prefix="AF_", 63 | dag=dag) 64 | 65 | # The next task demonstrates the ability to directly execute code stored in a string parameter (see program2 below). 66 | # it also demonstrates reading xcom variables that were set as outputs above. 67 | program2 = ''' 68 | %put value of one is &one; 69 | %put value of two is &two; 70 | %run; 71 | ''' 72 | 73 | task2 = SASStudioOperator(task_id='demo_program_2', 74 | path_type='raw', 75 | exec_type='program', 76 | path=program2, 77 | exec_log=True, 78 | compute_session_id="{{ ti.xcom_pull(key='compute_session_id', task_ids=['create_sess'])|first }}", 79 | compute_context="SAS Studio compute context", 80 | codegen_init_code=False, 81 | codegen_wrap_code=False, 82 | env_vars=environment_vars, 83 | macro_vars={"one": "{{ti.xcom_pull(key='AF_1', task_ids=['demo_program'])|first}}", 84 | "two": "{{ti.xcom_pull(key='AF_2', task_ids=['demo_program'])|first}}"}, 85 | dag=dag) 86 | 87 | 88 | task0 >> task1 >> task2 89 | if __name__ == '__main__': 90 | dag.test() 91 | -------------------------------------------------------------------------------- /src/sas_airflow_provider/example_dags/example_templating.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from airflow import DAG 3 | from airflow.operators.bash import BashOperator 4 | from sas_airflow_provider.operators.sas_jobexecution import SASJobExecutionOperator 5 | 6 | """ 7 | This example dag utilizes another open-source tool available at SAS: ValidateViya (https://github.com/sassoftware/pyviyatools/blob/master/validateviya-manual.md). This tool 8 | runs a swath of tests on a Viya environment, ensuring that everything is running as expected. This DAG is designed to automatically run ValidateViya on a given Viya environment 9 | each day and record the results in a SAS table. 10 | 11 | This DAG shows off the power of XComs and Jinja templating when creating DAGS. XComs are designed to send small amounts of data from one Task to another while a DAG runs. Jinja 12 | templating enables a user to have dynamic data inside the parameters for their Operators. By combining Jinja templating and XComs, it is possible to have one Task run based upon 13 | data generated in the Task before. In this example, the data generated by a BashOperator using validateviya is uploaded to a SAS Table using a SASJobExecution Operator. 14 | """ 15 | 16 | # DAG 17 | # The dag is scheduled to run each day at 6am UTC (2am EST). 18 | dag = DAG('validate_viya_daily', description='Validates Viya environment on a daily basis', 19 | schedule_interval='0 6 * * *', 20 | start_date=datetime(2023, 5, 23), catchup=False) 21 | 22 | # Some important variables for all of the BashOperators 23 | endpoint = "https://example.com/" 24 | user = "example_user" 25 | password = "hunter2" 26 | 27 | # Each of the BashOperators must first set some important environment variables before they can run. 28 | namespace = "example_env" 29 | envExport = "export GELLOW_NAMESPACE=" + namespace + "; export SAS_CLI_PROFILE=${GELLOW_NAMESPACE}; export SSL_CERT_FILE=~/.certs/${GELLOW_NAMESPACE}_trustedcerts.pem; export REQUESTS_CA_BUNDLE=${SSL_CERT_FILE};" 30 | 31 | # Setup pyviyatools 32 | # This is a BashOperator, which executes commands in a Bash shell. 33 | # The following commands are used to set up the environment in order to use validateviya. When the BashOperator is done running, the environment that the code is running is reset. 34 | # This means that each time we run the BashOperator, we must start by exporting the important environment variables neccessary to run validateviya and it's related programs 35 | setup_code = envExport + " cd ~/pyviyatools; python3 setup.py; /opt/sas/viya/home/bin/sas-viya profile init --colors-enabled=true --output=json --sas-endpoint=\"" + endpoint + "\";" 36 | setup = BashOperator( 37 | task_id="setup_pyviyatools", 38 | bash_command=setup_code, 39 | dag=dag, 40 | ) 41 | 42 | # Validate viya full 43 | # This BashOperator runs validateviya to create a html report of the results. This is saved as a .html file to the ~/pyviyatools folder. 44 | vv_full_code = envExport + " cd ~/pyviyatools; /opt/sas/viya/home/bin/sas-viya auth login -u=" + user + " -p=" + password + "; python3 validateviya.py -o report-full;" 45 | validate_viya_full = BashOperator( 46 | task_id="generate_report", 47 | bash_command=vv_full_code, 48 | dag=dag 49 | ) 50 | 51 | #Validate viya csv 52 | # This BashOperator runs validateviya to create a csv string of the results. Each row of data is typically delimited by \n, but I use tr to replace all \n characters with spaces, 53 | # allowing for the data to all be contained in one line of output. This is important for moving results from one task to another, as the last line of bash output by a BashOperator 54 | # is pushed to an XCom and is therefore accessible to other tasks. 55 | vv_csv_code = envExport + " cd ~/pyviyatools; /opt/sas/viya/home/bin/sas-viya auth login -u=" + user + " -p=" + password + "; python3 validateviya.py -o csv -s -c csvTests.json | tr -d ' ' | tr '\n' ' '" 56 | validate_viya_csv = BashOperator( 57 | task_id="generate_csv", 58 | bash_command=vv_csv_code, 59 | dag=dag 60 | ) 61 | 62 | # Push CSV data to SAS table 63 | # This SASJobExecutionOperator gets the results from validate_viya_csv and submits them to a SAS job that's responsible for cleaning up some of the input and then publishing it 64 | # to a table. The parameters field includes the use of templating with {{task_instance.xcom_pull(task_ids='vv_csv_code')}}. Directly before execution of the task, this is evaluated 65 | # and instructs the code to fill in everything between {{}} with the XCom value pushed by the task with the id "generate_csv." Being that all BashOperators push the last line of 66 | # output to an XCom, this will evaluate to the last line of output for the task above. 67 | # Further information on XComs and templating can be found in the Airflow documentation: 68 | # https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/xcoms.html 69 | # https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/operators.html#concepts-jinja-templating 70 | validate_viya_SAS = SASJobExecutionOperator( 71 | task_id='csv_to_SAS', 72 | job_name='/Public/append_validate_data', 73 | parameters={"_input1":"{{task_instance.xcom_pull(task_ids='generate_csv')}}"}, 74 | dag=dag 75 | ) 76 | 77 | # Setup the DAG such that it must setup before running anything else and the csv data must be generated before it is published 78 | setup >> validate_viya_full 79 | setup >> validate_viya_csv >> validate_viya_SAS -------------------------------------------------------------------------------- /src/sas_airflow_provider/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | -------------------------------------------------------------------------------- /src/sas_airflow_provider/hooks/sas.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from airflow.hooks.base import BaseHook 4 | import base64 5 | import urllib.parse 6 | import requests 7 | import urllib3 8 | from urllib3.exceptions import InsecureRequestWarning 9 | 10 | class SasHook(BaseHook): 11 | """Hook to manage connection to SAS""" 12 | 13 | conn_name_attr = 'sas_conn_id' 14 | default_conn_name = 'sas_default' 15 | conn_type = 'sas' 16 | hook_name = 'SAS' 17 | 18 | def __init__(self, conn_id: str = None) -> None: 19 | super().__init__() 20 | self.client_secret = None 21 | self.client_id = None 22 | self.conn_id = conn_id 23 | self.host = None 24 | self.login = None 25 | self.password = None 26 | self.token = None 27 | self.sas_conn = None 28 | self.cert_verify = True 29 | self.grant_type = None 30 | 31 | def get_conn(self, http_timeout=None): 32 | """Returns a SAS connection.""" 33 | if self.conn_id is None: 34 | self.conn_id = self.default_conn_name 35 | conn = self.get_connection(self.conn_id) 36 | self.host = conn.host 37 | self.login = conn.login 38 | self.password = conn.password 39 | 40 | extras = conn.extra_dejson 41 | self.token = extras.get("token") 42 | self.client_id = extras.get("client_id") 43 | self.grant_type = extras.get("grant_type", "password") 44 | self.client_secret = "" 45 | if not self.client_id: 46 | self.client_id = "sas.cli" 47 | else: 48 | self.client_secret = extras.get("client_secret") # type: ignore 49 | 50 | self.cert_verify = extras.get("ssl_certificate_verification", True) 51 | if not self.cert_verify: 52 | self.log.info(f"TLS verification is turned off") 53 | elif isinstance(self.cert_verify, str): 54 | self.log.info("Using custom TLS CA certificate bundle file") 55 | 56 | if not self.sas_conn: 57 | self.sas_conn = self._create_session_for_connection(http_timeout=http_timeout) 58 | 59 | return self.sas_conn 60 | 61 | def _create_session_for_connection(self, http_timeout=None): 62 | self.log.info(f"Creating session for connection named %s to host %s", 63 | self.conn_id, 64 | self.host) 65 | 66 | if not self.cert_verify: 67 | # disable insecure HTTP requests warnings 68 | urllib3.disable_warnings(InsecureRequestWarning) 69 | 70 | if not self.token: 71 | # base 64 encode the api client auth and pass in authorization header 72 | auth_str = f"{self.client_id}:{self.client_secret}" 73 | auth_bytes = auth_str.encode("ascii") 74 | auth_header = base64.b64encode(auth_bytes).decode("ascii") 75 | my_headers = {"Authorization": f"Basic {auth_header}"} 76 | 77 | payload = {"grant_type": self.grant_type} 78 | if self.login: 79 | payload["username"] = self.login 80 | payload["password"] = self.password 81 | 82 | self.log.info("Get oauth token (see the the https://github.com/sassoftware/sas-airflow-provider?tab=readme-ov-file#create-a-connection-to-sas if this crashes)") 83 | response = requests.post( 84 | f"{self.host}/SASLogon/oauth/token", 85 | data=payload, 86 | verify=self.cert_verify, 87 | headers=my_headers, 88 | timeout=http_timeout 89 | ) 90 | if response.status_code != 200: 91 | raise RuntimeError(f"Get token failed with status code: {response.status_code}") 92 | 93 | r = response.json() 94 | self.token = r["access_token"] 95 | 96 | session = requests.Session() 97 | 98 | # set up standard headers 99 | session.headers.update({"Authorization": f"bearer {self.token}"}) 100 | session.headers.update({"Accept": "application/json"}) 101 | session.headers.update({"Content-Type": "application/json"}) 102 | 103 | # set to false if using self-signed certs 104 | session.verify = self.cert_verify 105 | 106 | # prepend the root url for all operations on the session, so that consumers can just provide 107 | # resource uri without the protocol and host 108 | root_url = self.host 109 | session.get = lambda *args, **kwargs: requests.Session.get( # type: ignore 110 | session, urllib.parse.urljoin(root_url, args[0]), *args[1:], **kwargs 111 | ) 112 | session.post = lambda *args, **kwargs: requests.Session.post( # type: ignore 113 | session, urllib.parse.urljoin(root_url, args[0]), *args[1:], **kwargs 114 | ) 115 | session.put = lambda *args, **kwargs: requests.Session.put( # type: ignore 116 | session, urllib.parse.urljoin(root_url, args[0]), *args[1:], **kwargs 117 | ) 118 | session.delete = lambda *args, **kwargs: requests.Session.delete( # type: ignore 119 | session, urllib.parse.urljoin(root_url, args[0]), *args[1:], **kwargs 120 | ) 121 | return session 122 | -------------------------------------------------------------------------------- /src/sas_airflow_provider/operators/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | -------------------------------------------------------------------------------- /src/sas_airflow_provider/operators/sas_create_session.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from __future__ import annotations 19 | 20 | from airflow.exceptions import AirflowException 21 | from airflow.models import BaseOperator 22 | from sas_airflow_provider.hooks.sas import SasHook 23 | from sas_airflow_provider.util.util import \ 24 | create_or_connect_to_session 25 | 26 | 27 | class SASComputeCreateSession(BaseOperator): 28 | """ 29 | Create a Compute session and push the session id as an XCom named 'compute_session_id'. 30 | This can be used as an input for the SASStudioOperator to give finer grained control over sessions 31 | 32 | :param connection_name: (optional) name of the connection to use. The connection should be defined 33 | as an HTTP connection in Airflow. If not specified then the default is used 34 | :param compute_context_name: (optional) Name of the Compute context to use. If not provided, a 35 | suitable default is used. 36 | :param session_name: (optional) name to give the created session. If not provided, a suitable default is used 37 | :param http_timeout: (optional) Timeout for https requests. Default value is (30.05, 300), meaning a connect timeout sligthly above 30 seoconds and 38 | a read timeout of 300 seconds where the operator will wait for the server to send a response. 39 | :param job_name_prefix: (optional) string. Specify a name that you want the compute session to identify as in SAS Workload Orchestrator (SWO). 40 | If job_name_prefix is not specified the default prefix is determined by Viya (currently 'sas-compute-server-'). 41 | If the value cannot be parsed by Viya to create a valid k8s pod name, the default value will be used as well. 42 | job_name_prefix is supported from Viya Stable 2024.07 43 | """ 44 | 45 | ui_color = "#CCE5FF" 46 | ui_fgcolor = "#000000" 47 | 48 | # template fields are fields which can be templated out in the Airflow task using {{ }} 49 | template_fields: Sequence[str] = ("compute_context_name", "session_name") 50 | 51 | def __init__( 52 | self, 53 | connection_name=None, 54 | compute_context_name="SAS Studio compute context", 55 | session_name="Airflow-Session", 56 | http_timeout=(30.05, 300), 57 | job_name_prefix=None, 58 | **kwargs, 59 | ) -> None: 60 | 61 | super().__init__(**kwargs) 62 | self.connection = None 63 | self.connection_name = connection_name 64 | self.compute_context_name = compute_context_name 65 | self.session_name = session_name 66 | self.compute_session_id="" 67 | self.http_timeout=http_timeout 68 | self.job_name_prefix = job_name_prefix 69 | 70 | def execute(self, context): 71 | try: 72 | self.log.info("Authenticate connection") 73 | h = SasHook(self.connection_name) 74 | self.connection = h.get_conn() 75 | self._connect_compute() 76 | self.xcom_push(context, 'compute_session_id', self.compute_session_id) 77 | # support retry if API-calls fails for whatever reason 78 | except Exception as e: 79 | raise AirflowException(f"SASComputeCreateSession error: {str(e)}") 80 | 81 | return 1 82 | 83 | def _connect_compute(self): 84 | # connect to compute if we are not connected, and set our compute session id 85 | if not self.compute_session_id: 86 | self.log.info("Creating or connecting to compute session") 87 | sesh = create_or_connect_to_session(self.connection,self.compute_context_name,self.session_name,self.http_timeout,self.job_name_prefix) 88 | self.compute_session_id = sesh["id"] 89 | self.log.info(f"Created session with id {self.compute_session_id}") 90 | 91 | -------------------------------------------------------------------------------- /src/sas_airflow_provider/operators/sas_delete_session.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from __future__ import annotations 19 | 20 | from airflow.exceptions import AirflowException 21 | from airflow.models import BaseOperator 22 | from sas_airflow_provider.hooks.sas import SasHook 23 | from sas_airflow_provider.util.util import \ 24 | create_or_connect_to_session, find_named_compute_session, end_compute_session 25 | 26 | 27 | class SASComputeDeleteSession(BaseOperator): 28 | """ 29 | Delete a Compute session. either a session_name or a session_id should be provided. 30 | The result is pushed as a True/False xcom named disconnect_succeeded 31 | 32 | :param connection_name: (optional) name of the connection to use. The connection should be defined 33 | as an HTTP connection in Airflow. If not specified then the default is used 34 | :param session_name: (optional) name of the session to delete 35 | :param session_id: (optiona) id of the session to delete 36 | """ 37 | 38 | ui_color = "#CCE5FF" 39 | ui_fgcolor = "#000000" 40 | 41 | # template fields are fields which can be templated out in the Airflow task using {{ }} 42 | template_fields: Sequence[str] = ("compute_session_id", "compute_session_name") 43 | 44 | def __init__( 45 | self, 46 | connection_name=None, 47 | compute_session_name="", 48 | compute_session_id="", 49 | **kwargs, 50 | ) -> None: 51 | if not compute_session_id and not compute_session_name: 52 | raise AirflowException(f"Either session_name or session_id must be provided") 53 | super().__init__(**kwargs) 54 | self.connection = None 55 | self.connection_name = connection_name 56 | self.compute_session_name = compute_session_name 57 | self.compute_session_id = compute_session_id 58 | self.success=False 59 | 60 | def execute(self, context): 61 | try: 62 | self.log.info("Authenticate connection") 63 | h = SasHook(self.connection_name) 64 | self.connection = h.get_conn() 65 | self._delete_compute() 66 | self.xcom_push(context, 'disconnect_succeeded', self.success) 67 | # support retry if API-calls fails for whatever reason 68 | except Exception as e: 69 | raise AirflowException(f"SASComputeDeleteSession error: {str(e)}") 70 | 71 | return 1 72 | 73 | def _delete_compute(self): 74 | if self.compute_session_name: 75 | self.log.info(f"Find session named {self.compute_session_name}") 76 | sesh = find_named_compute_session(self.connection, self.compute_session_name) 77 | if sesh: 78 | self.compute_session_id = sesh["id"] 79 | else: 80 | self.log.info(f"Session named {self.compute_session_name} not found") 81 | return 82 | self.log.info(f"Delete session with id {self.compute_session_id}") 83 | self.success = end_compute_session(self.connection, self.compute_session_id) 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /src/sas_airflow_provider/operators/sas_jobexecution.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | 19 | from __future__ import annotations 20 | 21 | import os 22 | import urllib.parse 23 | 24 | from airflow.exceptions import AirflowFailException 25 | from airflow.models import BaseOperator 26 | from sas_airflow_provider.hooks.sas import SasHook 27 | from sas_airflow_provider.util.util import dump_logs 28 | 29 | class SASJobExecutionOperator(BaseOperator): 30 | """ 31 | Executes a SAS Job using /SASJobExecution endpoint. Job execution is documented here: 32 | https://go.documentation.sas.com/doc/en/pgmsascdc/default/jobexecug/p1ct9uzl5c7omun1t2zy0gxhlqlc.htm 33 | The specific endpoint /SASJobExecution is documented here: 34 | https://go.documentation.sas.com/doc/en/pgmsascdc/default/jobexecug/n06tcybrt9wdeun1ko9bkjn0ko0b.htm 35 | 36 | :param connection_name: Name of the SAS Viya connection stored as an Airflow HTTP connection 37 | :param job_name: Name of the SAS Job to be run 38 | :param parameters Dictionary of all the parameters that should be passed to the 39 | SAS Job as SAS Macro variables 40 | :param job_exec_log: boolean. whether or not to dump out the log (default is false) 41 | :param add_airflow_vars: boolean. whether or not to add airflow environment variables as macro variables 42 | (default is false) 43 | """ 44 | 45 | template_fields: Sequence[str] = ("parameters",) 46 | 47 | def __init__(self, 48 | job_name: str, 49 | parameters: dict, 50 | connection_name: str = None, 51 | job_exec_log: bool = False, 52 | add_airflow_vars: bool = False, 53 | **kwargs) -> None: 54 | super().__init__(**kwargs) 55 | self.connection_name = connection_name 56 | self.job_name = job_name 57 | self.parameters = parameters 58 | self.job_exec_log = job_exec_log 59 | self.add_airflow_vars = add_airflow_vars 60 | 61 | def _add_airflow_env_vars(self): 62 | for x in ['AIRFLOW_CTX_DAG_OWNER', 63 | 'AIRFLOW_CTX_DAG_ID', 64 | 'AIRFLOW_CTX_TASK_ID', 65 | 'AIRFLOW_CTX_EXECUTION_DATE', 66 | 'AIRFLOW_CTX_TRY_NUMBER', 67 | 'AIRFLOW_CTX_DAG_RUN_ID', ]: 68 | v = os.getenv(x) 69 | if v: 70 | self.parameters[x] = v 71 | 72 | def execute(self, context): 73 | h = SasHook(self.connection_name) 74 | session = h.get_conn() 75 | 76 | if self.add_airflow_vars: 77 | print(f"Add Airflow variables as parameters") 78 | self._add_airflow_env_vars() 79 | 80 | print(f"Executing SAS job: {self.job_name}") 81 | # url escape the program name 82 | program_name = urllib.parse.quote(self.job_name) 83 | url_string = "" 84 | for key, value in self.parameters.items(): 85 | url_string += f"&{key}={urllib.parse.quote(value)}" 86 | 87 | url = f"/SASJobExecution/?_program={program_name}{url_string}" 88 | 89 | headers = {"Accept": "application/vnd.sas.job.execution.job+json"} 90 | response = session.post(url, headers=headers) 91 | 92 | if response.status_code < 200 or response.status_code >= 300: 93 | raise AirflowFailException(f"SAS Job Execution HTTP status code {response.status_code}") 94 | 95 | error_code = response.headers.get('X-Sas-Jobexec-Error') 96 | if error_code: 97 | print(response.text) 98 | raise AirflowFailException(f"SAS Job Execution failed with code {error_code}") 99 | 100 | if self.job_exec_log: 101 | job_id = response.headers.get('X-Sas-Jobexec-Id') 102 | if job_id: 103 | job_status_url = f"/jobExecution/jobs/{job_id}" 104 | job = session.get(job_status_url) 105 | if job.status_code >= 200: 106 | dump_logs(session, job.json()) 107 | else: 108 | print(f"Failed to get job status for logs. /jobExecution/jobs returned {job.status_code}") 109 | else: 110 | print("Failed to get job id for logs. X-Sas-Jobexec-Id not found in response headers") 111 | 112 | return 1 113 | -------------------------------------------------------------------------------- /src/sas_airflow_provider/operators/sas_studio.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from __future__ import annotations 19 | 20 | import os 21 | import time 22 | 23 | from airflow.exceptions import AirflowFailException 24 | from airflow.exceptions import AirflowException 25 | from airflow.exceptions import AirflowTaskTimeout 26 | from airflow.models import BaseOperator 27 | from sas_airflow_provider.hooks.sas import SasHook 28 | from sas_airflow_provider.util.util import stream_log, create_or_connect_to_session, end_compute_session 29 | 30 | # main API URI for Code Gen 31 | URI_BASE = "/studioDevelopment/code" 32 | # default context name 33 | DEFAULT_COMPUTE_CONTEXT_NAME = "SAS Studio compute context" 34 | # when creating a session it will be given this name 35 | AIRFLOW_SESSION_NAME = "Airflow-Session" 36 | 37 | JES_URI = "/jobExecution" 38 | JOB_URI = f"{JES_URI}/jobs" 39 | 40 | def on_success(context): 41 | # Only kill session when not reused or external managed 42 | context['task']._clean_up(also_kill_reused_session=False) 43 | 44 | def on_failure(context): 45 | # Kill all sessions except external managed 46 | context['task']._clean_up(also_kill_reused_session=True) 47 | 48 | def on_retry(context): 49 | # Kill all sessions except external managed 50 | context['task']._clean_up(also_kill_reused_session=True) 51 | 52 | 53 | 54 | class SASStudioOperator(BaseOperator): 55 | """ 56 | Executes a SAS Studio flow or a SAS program 57 | 58 | :param path_type: a type that indicates what the path parameter represents. valid values: 59 | content - the path parameter represents a path in SAS Content. For example /Public/myflow.flw 60 | compute - the path parameter represents a server file-system path that is accessible from a SAS session. 61 | raw - the path parameter itself is a string of SAS code, or JSON representation of a flow. 62 | :param path: path to the flow/program to execute, or actual flow/code. see above 63 | :param exec_log: boolean. Indicates whether to dump the execution log to the Airflow log 64 | :param exec_type: (optional) "flow" or "program" By default this operator will execute a Studio Flow. If you specify 65 | "program" then it will execute a program. ie. your path would either specify a path to the program 66 | like /Public/mycode.sas or would be the actual program itself 67 | :param codegen_init_code: (optional) boolean. Whether to generate init code 68 | (default value: False) 69 | :param codegen_wrap_code: (optional) boolean. Whether to generate wrapper code 70 | (default value: False) 71 | :param connection_name: (optional) name of the connection to use. The connection should be defined 72 | as an HTTP connection in Airflow. If not specified, the default is used (sas_default) 73 | :param compute_context: (optional) Name of the Compute context to use. If not provided, a 74 | suitable default is used (see DEFAULT_COMPUTE_CONTEXT NAME). 75 | :param env_vars: (optional) Dictionary of environment variables to set before running the flow. 76 | :param macro_vars: (optional) Dictionary of macro variables to set before running the flow. 77 | :param allways_reuse_session: (optional) Specify true to always reuse the same Compute Session across all tasks. The name 78 | of the session will be the default session name (see AIRFLOW_SESSION_NAME), which means that if you don't supply a session id in compute_session_id, 79 | then this named session will be created and later re-used between tasks. The disadvantage is that it offers less flexibility in terms of 80 | having multiple sessions (parallelisme). Default value is False meaning a new unnamed compute sessions will always be created 81 | UNLESS a session id is specified in compute_session_id. 82 | :param compute_session_id: (optional) Compute Session id to use for the task. If a Session Id is specified, this will overide allways_reuse_session. 83 | Use SASComputeCreateSession Operator to define a task that will create the session. This gives full flexibility in how compue session are used. 84 | The id of the session created by SASComputeCreateSession will be made avaliable as XCom variable 'compute_session_id' 85 | for subsequent use by SASStudio Operator tasks. Tip: set the value to "{{ ti.xcom_pull(key='compute_session_id', task_ids=[''])|first}}" to get the X-Com value. 86 | :param output_macro_var_prefix: (optional) string. If this has a value, then any macro variables which start 87 | with this prefix will be retrieved from the session after the code has executed and will be returned as XComs 88 | :param unknown_state_timeout: (optional) number of seconds to continue polling for the state of a running job if the state is 89 | temporary unobtainable. When unknown_state_timeout is reached without the state being retrievable, the operator 90 | will throw an AirflowFailException and the task will be marked as failed. 91 | Default value is 0, meaning the task will fail immediately if the state could not be retrieved. 92 | :param http_timeout: (optional) Timeout for https requests. Default value is (30.05, 300), meaning a connect timeout sligthly above 30 seoconds and 93 | a read timeout of 300 seconds where the operator will wait for the server to send a response. 94 | :param expiration_time: (optional) string. If specified, this is a W3 duration string that indicates how long the job should live. eg "PT1H" for 1 hour. 95 | :param job_name_prefix: (optional) string. Specify a name that you want the compute session to identify as in SAS Workload Orchestrator (SWO). 96 | If job_name_prefix is not specified the default prefix is determined by Viya (currently 'sas-compute-server-'). 97 | If the value cannot be parsed by Viya to create a valid k8s pod name, the default value will be used as well. 98 | job_name_prefix is supported from Viya Stable 2024.07 99 | """ 100 | 101 | ui_color = "#CCE5FF" 102 | ui_fgcolor = "#000000" 103 | 104 | 105 | 106 | template_fields: Sequence[str] = ("env_vars", "macro_vars", "compute_session_id", "path", "expiration_time", "job_name_prefix") 107 | 108 | def __init__( 109 | self, 110 | path_type: str, 111 | path: str, 112 | exec_log: bool, 113 | exec_type="flow", 114 | codegen_init_code=False, 115 | codegen_wrap_code=False, 116 | connection_name=None, 117 | compute_context=DEFAULT_COMPUTE_CONTEXT_NAME, 118 | env_vars=None, 119 | macro_vars=None, 120 | allways_reuse_session=False, 121 | compute_session_id="", 122 | output_macro_var_prefix="", 123 | unknown_state_timeout=0, 124 | job_name_prefix=None, 125 | http_timeout=(30.05, 300), 126 | expiration_time="", 127 | **kwargs, 128 | ) -> None: 129 | 130 | super().__init__(**kwargs) 131 | if env_vars is None: 132 | env_vars = {} 133 | self.path_type = path_type 134 | self.exec_type = exec_type 135 | self.path = path 136 | self.exec_log = exec_log 137 | self.codegen_init_code = codegen_init_code 138 | self.codegen_wrap_code = codegen_wrap_code 139 | self.connection_name = connection_name 140 | self.compute_context_name = compute_context 141 | self.env_vars = env_vars 142 | self.macro_vars = macro_vars 143 | self.connection = None 144 | self.allways_reuse_session = allways_reuse_session 145 | self.job_name_prefix = job_name_prefix 146 | self.expiration_time = expiration_time 147 | 148 | self.external_managed_session = False 149 | self.compute_session_id = None 150 | if compute_session_id: 151 | self.compute_session_id = compute_session_id 152 | self.external_managed_session=True 153 | 154 | self.output_macro_var_prefix = output_macro_var_prefix.upper() 155 | self.unknown_state_timeout=max(unknown_state_timeout,0) 156 | 157 | # Use hooks to clean up 158 | self.on_success_callback=[on_success] 159 | 160 | if self.on_failure_callback == None: 161 | self.on_failure_callback=[on_failure] 162 | else: 163 | self.on_failure_callback=[on_failure, self.on_failure_callback] 164 | 165 | self.on_retry_callback=[on_retry] 166 | 167 | # Timeout 168 | self.http_timeout=http_timeout 169 | 170 | 171 | def execute(self, context): 172 | if self.path_type not in ['compute', 'content', 'raw']: 173 | raise AirflowFailException("Path type is invalid. Valid values are 'compute', 'content' or 'raw'") 174 | if self.exec_type not in ['flow', 'program']: 175 | raise AirflowFailException("Execution type is invalid. Valid values are 'flow' and 'program'") 176 | # make sure expiration time is valid w3 duration 177 | if self.expiration_time and not self.expiration_time.startswith("P"): 178 | raise AirflowFailException("Expiration time is not a valid W3 duration string") 179 | self._add_airflow_env_vars() 180 | 181 | try: 182 | self.log.info("Authenticate connection") 183 | h = SasHook(self.connection_name) 184 | self.connection = h.get_conn(http_timeout=self.http_timeout) 185 | 186 | # Create compute session 187 | if not self.compute_session_id: 188 | compute_session = create_or_connect_to_session(self.connection, 189 | self.compute_context_name, 190 | AIRFLOW_SESSION_NAME if self.allways_reuse_session else None, 191 | http_timeout=self.http_timeout, 192 | job_name_prefix=self.job_name_prefix 193 | ) 194 | self.compute_session_id = compute_session["id"] 195 | else: 196 | self.log.info(f"Compute Session {self.compute_session_id} was provided") 197 | 198 | # Generate SAS code 199 | if self.path_type == "raw": 200 | code = self.path 201 | else: 202 | self.log.info("Generate code for Studio object: %s", str(self.path)) 203 | res = self._generate_object_code() 204 | code = res["code"] 205 | 206 | # add code for macros and env vars 207 | final_code = self._get_pre_code() + code 208 | 209 | # Create the job request for JES 210 | jr = { 211 | "name": f"Airflow_{self.task_id}", 212 | "jobDefinition": {"type": "Compute", "code": final_code}, 213 | } 214 | if self.expiration_time: 215 | jr['expiresAfter'] = self.expiration_time 216 | 217 | # if we have a session id, we will use that, otherwise we'll use the context name 218 | if self.compute_session_id: 219 | jr['arguments'] = {"_sessionId": self.compute_session_id} 220 | else: 221 | jr['arguments'] = {"_contextName": self.compute_context_name} 222 | # the jobExecution service will destroy the compute session if it was not passed in. 223 | if self.output_macro_var_prefix: 224 | self.log.info("Output macro variables will not be available. To make them available please " 225 | "specify a compute session") 226 | 227 | # Support retry if API-calls fails for whatever reason as no harm is done 228 | except Exception as e: 229 | raise AirflowException(f"SASStudioOperator error: {str(e)}") 230 | 231 | # Kick off the JES job, wait to get the state 232 | # _run_job_and_wait will poll for new 233 | # SAS log-lines and stream them in the DAG'-log 234 | job, success = self._run_job_and_wait(jr, 10) 235 | job_state= "unknown" 236 | if "state" in job: 237 | job_state = job["state"] 238 | 239 | # set output variables 240 | if success and self.output_macro_var_prefix and self.compute_session_id: 241 | try: 242 | self._set_output_variables(context) 243 | except Exception as e: 244 | raise AirflowException(f"SASStudioOperator error: {str(e)}") 245 | 246 | # raise exception in Airflow if SAS Studio Flow ended execution with "failed" "canceled" or "timed out" state 247 | # support retry for 'failed' (typically there is an ERROR in the log) and 'timed out' 248 | # do NOT support retry for 'canceled' (typically the SAS Job called ABORT ABEND) 249 | if job_state == "failed": 250 | raise AirflowException("SAS Studio Execution completed with an error.") 251 | 252 | elif job_state == "canceled": 253 | raise AirflowFailException("SAS Studio Execution was cancelled or aborted. See log for details ") 254 | 255 | elif job_state == "timed out": 256 | raise AirflowException("SAS Studio Execution has timed out. See log for details ") 257 | 258 | elif job_state == "timedOut": 259 | raise AirflowException("SAS Studio Execution has timed out. See log for details ") 260 | 261 | return 1 262 | 263 | def on_kill(self) -> None: 264 | self._clean_up(also_kill_reused_session=True) 265 | 266 | def _clean_up(self, also_kill_reused_session=False): 267 | # Always kill unnamed sessions (allways_reuse_session is false) 268 | # however is also_kill_reused_session is specified also kill the reuse session 269 | # newer kill external managed sessions, as this may prevent restart 270 | if self.compute_session_id and self.external_managed_session==False: 271 | if (also_kill_reused_session and self.allways_reuse_session) or self.allways_reuse_session==False: 272 | try: 273 | self.log.info(f"Deleting session with id {self.compute_session_id}") 274 | success_end = end_compute_session(self.connection, self.compute_session_id, http_timeout=self.http_timeout) 275 | if success_end: 276 | self.log.info(f"Compute session succesfully deleted") 277 | else: 278 | self.log.info(f"Unable to delete compute session. You may need to kill the session manually") 279 | self.compute_session_id=None 280 | 281 | except Exception as e: 282 | self.log.info(f"Unable to delete compute session. You may need to kill the session manually") 283 | self.compute_session_id=None 284 | 285 | def _add_airflow_env_vars(self): 286 | for x in ['AIRFLOW_CTX_DAG_OWNER', 287 | 'AIRFLOW_CTX_DAG_ID', 288 | 'AIRFLOW_CTX_TASK_ID', 289 | 'AIRFLOW_CTX_EXECUTION_DATE', 290 | 'AIRFLOW_CTX_TRY_NUMBER', 291 | 'AIRFLOW_CTX_DAG_RUN_ID', ]: 292 | v = os.getenv(x) 293 | if v: 294 | self.env_vars[x] = v 295 | 296 | def _get_pre_code(self): 297 | 298 | pre_code = "" 299 | if self.env_vars: 300 | self.log.info(f"Adding {len(self.env_vars)} environment variables to code") 301 | pre_code += "/** Begin environment variables **/\n" 302 | for k, v in self.env_vars.items(): 303 | pre_code += f"OPTIONS SET={k}='{v}';\n" 304 | pre_code += "/** End environment variables **/\n\n" 305 | if self.macro_vars: 306 | self.log.info(f"Adding {len(self.macro_vars)} macro variables to code") 307 | pre_code += "/** Begin macro variables **/\n" 308 | for k, v in self.macro_vars.items(): 309 | pre_code += f"%LET {k} = {v};\n" 310 | pre_code += "/** End macro variables **/\n\n" 311 | return pre_code 312 | 313 | def _generate_object_code(self): 314 | uri=URI_BASE 315 | 316 | if self.path_type == "compute": 317 | uri = f"{URI_BASE}?sessionId={self.compute_session_id}" 318 | self.log.info("Code Generation for Studio object stored in Compute file system") 319 | else: 320 | self.log.info("Code generation for Studio object stored in Content") 321 | 322 | media_type = "application/vnd.sas.dataflow" 323 | if self.exec_type == "program": 324 | media_type = "application/vnd.sas.program" 325 | req = { 326 | "reference": { 327 | "mediaType": media_type, 328 | "type": self.path_type, 329 | "path": self.path}, 330 | "initCode": self.codegen_init_code, 331 | "wrapperCode": self.codegen_wrap_code, 332 | } 333 | 334 | response = self.connection.post(uri, json=req, timeout=self.http_timeout) 335 | if not response.ok: 336 | raise RuntimeError(f"Code generation failed: {response.text}") 337 | 338 | return response.json() 339 | 340 | def _run_job_and_wait(self, job_request: dict, poll_interval: int) -> (dict, bool): 341 | uri = JOB_URI 342 | 343 | #Kick off job request. if failures, no harm is done. 344 | try: 345 | response = self.connection.post(uri, json=job_request, timeout=self.http_timeout) 346 | except Exception as e: 347 | raise AirflowException(f"Error when creating Job Request {e}") 348 | 349 | # change to process non-standard codes returned from API (201, 400, 415) 350 | # i.e. situation when we were not able to make API call at all 351 | if response.status_code != 201: 352 | err_text = f"Failed to create job request. Status: {response.status_code}. Error: {response.text}" 353 | raise AirflowException(err_text) 354 | 355 | # Job started succesfully, start waiting for the job to finish 356 | job = response.json() 357 | job_id = job["id"] 358 | self.log.info(f"Submitted job request with id {job_id}. Waiting for completion") 359 | uri = f"{JOB_URI}/{job_id}" 360 | 361 | # Poll for state of the job 362 | # If ANY error occours set state to 'unknown', print the reason to the log, and continue polling until self.unknown_state_timeout 363 | state = "unknown" 364 | countUnknownState = 0 365 | log_location = None 366 | num_log_lines= 0 367 | while state in ["pending", "running"] or (state == "unknown" and ((countUnknownState*poll_interval) <= self.unknown_state_timeout)): 368 | time.sleep(poll_interval) 369 | 370 | try: 371 | response = self.connection.get(uri, timeout=self.http_timeout) 372 | if not response.ok: 373 | if response.status_code == 404: 374 | # this could happen if the job was deleted 375 | self.log.info(f'Job {job_id} was not found and may have been deleted.') 376 | state = "canceled" 377 | else: 378 | countUnknownState = countUnknownState + 1 379 | self.log.info(f'Invalid response code {response.status_code} from {uri}. Will set state=unknown and continue checking...') 380 | state = "unknown" 381 | else: 382 | countUnknownState = 0 383 | job = response.json() 384 | if "state" in job: 385 | state = job["state"] 386 | self.log.info(f"Job state is {state}") 387 | else: 388 | self.log.info(f'Not able to determine state from {uri}. Will set state=unknown and continue checking...') 389 | state = "unknown" 390 | 391 | # Get the latest new log lines. 392 | if self.exec_log and state != "unknown": 393 | num_log_lines=stream_log(self.connection, job, num_log_lines, http_timeout=self.http_timeout) 394 | 395 | except Exception as e: 396 | # We makes sure to forward any AirflowException's encountered during state checking, else continue checking. 397 | if isinstance(e,AirflowTaskTimeout) or isinstance(e,AirflowException): 398 | raise 399 | else: 400 | countUnknownState = countUnknownState + 1 401 | self.log.info(f'HTTP Call failed with error "{e}". Will set state=unknown and continue checking...') 402 | state = "unknown" 403 | 404 | if state == 'unknown': 405 | # Raise AirflowFailException as we don't know if the job is still running 406 | raise AirflowFailException(f'Unable to retrieve state of job after trying {countUnknownState} times. Will mark task as failed. Please check the SAS-log.') 407 | 408 | # Be sure to Get the latest new log lines after the job have finished. 409 | if self.exec_log: 410 | num_log_lines=stream_log(self.connection, job, num_log_lines, http_timeout=self.http_timeout) 411 | 412 | self.log.info("Job request has completed execution with the status: " + str(state)) 413 | success = True 414 | if state in ['failed', 'canceled', 'timed out', 'timedOut']: 415 | success = False 416 | if 'error' in job: 417 | self.log.error(job['error']) 418 | 419 | return job, success 420 | 421 | def _set_output_variables(self, context): 422 | # set Airflow variables from compute session variables 423 | 424 | # retrieve variables from compute session 425 | uri = f"/compute/sessions/{self.compute_session_id}/variables?limit=999&filter=startsWith(name,'{self.output_macro_var_prefix}')" 426 | response = self.connection.get(uri, headers={'Accept': '*/*'}, timeout=self.http_timeout) 427 | if not response.ok: 428 | raise RuntimeError(f"get compute variables failed with {response.status_code}") 429 | v = response.json()["items"] 430 | 431 | # set Airflow variables 432 | for var in v: 433 | self.log.info(f"found output variable {var['name']}") 434 | self.xcom_push(context, var['name'], var['value']) 435 | -------------------------------------------------------------------------------- /src/sas_airflow_provider/operators/sas_studioflow.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from __future__ import annotations 19 | 20 | import copy 21 | import json 22 | import time 23 | import warnings 24 | 25 | import requests 26 | 27 | from airflow.exceptions import AirflowFailException 28 | from airflow.exceptions import AirflowException 29 | from airflow.models import BaseOperator 30 | from sas_airflow_provider.hooks.sas import SasHook 31 | from sas_airflow_provider.util.util import dump_logs 32 | 33 | 34 | class SASStudioFlowOperator(BaseOperator): 35 | """ 36 | Executes a SAS Studio flow. 37 | Note that this operator is deprecated. Please use SASStudioOperator instead 38 | 39 | :param flow_path_type: valid values are content or compute 40 | :param flow_path: path to the flow to execute. eg /Public/myflow.flw 41 | :param flow_exec_log: whether or not to output the execution log 42 | :param flow_codegen_init_code: Whether or not to generate init code 43 | (default value: False) 44 | :param flow_codegen_wrap_code: Whether or not to generate wrapper code 45 | (default value: False) 46 | :param connection_name: name of the connection to use. The connection should be defined 47 | as an HTTP connection in Airflow. 48 | :param compute_context: (optional) Name of the compute context to use. If not provided, a 49 | suitable default is used. 50 | :param env_vars: (optional) Dictionary of environment variables to set before running the flow. 51 | """ 52 | 53 | ui_color = "#CCE5FF" 54 | ui_fgcolor = "#000000" 55 | 56 | template_fields: Sequence[str] = ("env_vars",) 57 | 58 | def __init__( 59 | self, 60 | flow_path_type: str, 61 | flow_path: str, 62 | flow_exec_log: bool, 63 | flow_codegen_init_code=False, 64 | flow_codegen_wrap_code=False, 65 | connection_name=None, 66 | compute_context="SAS Studio compute context", 67 | env_vars=None, 68 | **kwargs, 69 | ) -> None: 70 | 71 | super().__init__(**kwargs) 72 | warnings.warn("SASStudioFlowOperator is deprecated. Please use SASStudioOperator instead.") 73 | if env_vars is None: 74 | env_vars = {} 75 | self.flow_path_type = flow_path_type 76 | self.flow_path = flow_path 77 | self.flow_exec_log = flow_exec_log 78 | self.flow_codegen_initCode = flow_codegen_init_code 79 | self.flow_codegen_wrapCode = flow_codegen_wrap_code 80 | self.connection_name = connection_name 81 | self.compute_context = compute_context 82 | self.env_vars = env_vars 83 | 84 | def execute(self, context): 85 | try: 86 | self.log.info("Authenticate connection") 87 | h = SasHook(self.connection_name) 88 | session = h.get_conn() 89 | 90 | self.log.info("Generate code for Studio Flow: %s", str(self.flow_path)) 91 | code = _generate_flow_code( 92 | session, 93 | self.flow_path_type, 94 | self.flow_path, 95 | self.flow_codegen_initCode, 96 | self.flow_codegen_wrapCode, 97 | None, 98 | self.compute_context, 99 | ) 100 | 101 | if self.env_vars: 102 | # Add environment variables to pre-code 103 | self.log.info(f"Adding {len(self.env_vars)} environment variables to code") 104 | pre_env_code = "/** Setting up environment variables **/\n" 105 | for env_var in self.env_vars: 106 | env_val = self.env_vars[env_var] 107 | pre_env_code = pre_env_code + f"options set={env_var}='{env_val}';\n" 108 | pre_env_code = pre_env_code + "/** Finished setting up environment variables **/\n\n" 109 | code["code"] = pre_env_code + code["code"] 110 | 111 | # Create the job request for JES 112 | jr = { 113 | "name": f"Airflow_{self.task_id}", 114 | "jobDefinition": {"type": "Compute", "code": code["code"]}, 115 | "arguments": {"_contextName": self.compute_context}, 116 | } 117 | 118 | # Kick off the JES job 119 | job = _run_job_and_wait(session, jr, 1) 120 | job_state = job["state"] 121 | 122 | # support retry if API-calls fails for whatever reason 123 | except Exception as e: 124 | raise AirflowException(f"SASStudioFlowOperator error: {str(e)}") 125 | 126 | # display logs if needed 127 | if self.flow_exec_log is True: 128 | # Safeguard if we are unable to retreive the log. We will NOT throw any exceptions 129 | try: 130 | dump_logs(session, job) 131 | except Exception as e: 132 | self.log.info("Unable to retrieve log. Maybe the log is too large.") 133 | 134 | # raise exception in Airflow if SAS Studio Flow ended execution with "failed" "canceled" or "timed out" state 135 | # support retry for 'failed' (typically there is an ERROR in the log) and 'timed out' 136 | # do NOT support retry for 'canceled' (typically the SAS Job called ABORT ABEND) 137 | if job_state == "failed": 138 | raise AirflowException("SAS Studio Flow Execution completed with an error.") 139 | 140 | if job_state == "canceled": 141 | raise AirflowFailException("SAS Studio Flow Execution was cancelled or aborted. See log for details ") 142 | 143 | if job_state == "timed out": 144 | raise AirflowException("SAS Studio Flow Execution has timed out. See log for details ") 145 | 146 | if job_state == "timedOut": 147 | raise AirflowException("SAS Studio Flow Execution has timed out. See log for details ") 148 | 149 | return 1 150 | 151 | 152 | def _generate_flow_code( 153 | session, 154 | artifact_type: str, 155 | path: str, 156 | init_code: bool, 157 | wrap_code: bool, 158 | session_id=None, 159 | compute_context="SAS Studio compute context", 160 | ): 161 | # main API URI for Code Gen 162 | uri_base = "/studioDevelopment/code" 163 | 164 | # if type == compute then Compute session should be created 165 | if artifact_type == "compute": 166 | print("Code Generation for Studio Flow with Compute session") 167 | 168 | # if session id is provided 169 | if session_id is not None: 170 | 171 | print("Session ID was provided") 172 | uri = f"{uri_base}?sessionId={session_id}" 173 | else: 174 | print("Create or connect to session") 175 | compute_session = _create_or_connect_to_session(session, compute_context, "Airflow-Session") 176 | uri = f'{uri_base}?sessionId={compute_session["id"]}' 177 | 178 | req = { 179 | "reference": {"mediaType": "application/vnd.sas.dataflow", "type": artifact_type, "path": path}, 180 | "initCode": init_code, 181 | "wrapperCode": wrap_code, 182 | } 183 | 184 | response = session.post(uri, json=req) 185 | 186 | if response.status_code != 200: 187 | raise RuntimeError(f"Code generation failed: {response.text}") 188 | 189 | return response.json() 190 | 191 | # if type == content then Compute session is not needed 192 | elif artifact_type == "content": 193 | print("Code Generation for Studio Flow without Compute session") 194 | 195 | req = { 196 | "reference": {"mediaType": "application/vnd.sas.dataflow", "type": artifact_type, "path": path}, 197 | "initCode": init_code, 198 | "wrapperCode": wrap_code, 199 | } 200 | 201 | uri = uri_base 202 | response = session.post(uri, json=req) 203 | 204 | if response.status_code != 200: 205 | raise RuntimeError(f"Code generation failed: {response.text}") 206 | 207 | return response.json() 208 | 209 | else: 210 | raise RuntimeError("invalid artifact_type was supplied") 211 | 212 | 213 | def _create_or_connect_to_session(session: requests.Session, context_name: str, name: str) -> dict: 214 | # find session with given name 215 | response = session.get(f"/compute/sessions?filter=eq(name, {name})") 216 | if response.status_code != 200: 217 | raise RuntimeError(f"Find sessions failed: {response.text}") 218 | sessions = response.json() 219 | if sessions["count"] > 0: 220 | return sessions["items"][0] 221 | 222 | print(f"Compute session named '{name}' does not exist, a new one will be created") 223 | # find compute context 224 | response = session.get("/compute/contexts", params={"filter": f'eq("name","{context_name}")'}) 225 | if response.status_code != 200: 226 | raise RuntimeError(f"Find context named {context_name} failed: {response.text}") 227 | context_resp = response.json() 228 | if not context_resp["count"]: 229 | raise RuntimeError(f"Compute context '{context_name}' was not found") 230 | sas_context = context_resp["items"][0] 231 | 232 | # create session with given context 233 | uri = f'/compute/contexts/{sas_context["id"]}/sessions' 234 | session_request = {"version": 1, "name": name} 235 | tmpheaders = copy.deepcopy(session.headers) 236 | tmpheaders["Content-Type"] = "application/vnd.sas.compute.session.request+json" 237 | 238 | req = json.dumps(session_request) 239 | response = session.post(uri, data=req, headers=tmpheaders) 240 | 241 | if response.status_code != 201: 242 | raise RuntimeError(f"Failed to create session: {response.text}") 243 | 244 | return response.json() 245 | 246 | 247 | JES_URI = "/jobExecution" 248 | JOB_URI = f"{JES_URI}/jobs" 249 | 250 | 251 | def _run_job_and_wait(session, job_request: dict, poll_interval: int) -> dict: 252 | uri = JOB_URI 253 | response = session.post(uri, json=job_request) 254 | # change to process non standard codes returned from API (201, 400, 415) 255 | # i.e. sistuation when we were not able to make API call at all 256 | if response.status_code != 201: 257 | raise RuntimeError(f"Failed to create job request: {response.text}") 258 | job = response.json() 259 | job_id = job["id"] 260 | state = job["state"] 261 | print(f"Submitted job request with id {job_id}. Waiting for completion") 262 | uri = f"{JOB_URI}/{job_id}" 263 | while state in ["pending", "running"]: 264 | time.sleep(poll_interval) 265 | response = session.get(uri) 266 | if response.status_code != 200: 267 | raise RuntimeError(f"Failed to get job: {response.text}") 268 | job = response.json() 269 | state = job["state"] 270 | print("Job request has completed execution with the status: " + str(state)) 271 | return job 272 | -------------------------------------------------------------------------------- /src/sas_airflow_provider/util/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. -------------------------------------------------------------------------------- /src/sas_airflow_provider/util/util.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | import json 20 | import requests 21 | import os 22 | import logging 23 | 24 | from airflow.exceptions import AirflowException 25 | from airflow.exceptions import AirflowTaskTimeout 26 | 27 | 28 | def get_folder_file_contents(session, path: str, http_timeout=None) -> str: 29 | """ 30 | Fetch a file from folder service 31 | :param session: 32 | :param path: 33 | :param http_timeout: Timeout for http connection 34 | :return: 35 | """ 36 | member = get_member_by_path(session, path) 37 | if member['contentType'] != 'file': 38 | raise RuntimeError(f"folder item is not a file: '{path}'") 39 | 40 | uri = member['uri'] + '/content' 41 | response = session.get(uri, timeout=http_timeout) 42 | if not response.ok: 43 | raise RuntimeError(f"File {path} was not found or could not be accessed. error code: {response.status_code}") 44 | 45 | return response.text 46 | 47 | 48 | def get_folder_by_path(session, path: str, http_timeout=None) -> dict: 49 | """ 50 | Get a folder given the path. 51 | Return a folder object, or raise an error 52 | """ 53 | response = session.get('/folders/folders/@item', params={'path': path}, timeout=http_timeout) 54 | if response.ok: 55 | return response.json() 56 | raise RuntimeError(response.text) 57 | 58 | 59 | def get_member_by_path(session, path: str, http_timeout=None) -> dict: 60 | """ 61 | Get a folder member given the full path. 62 | Return a folder member (object), or an empty dict if not found 63 | """ 64 | parts = os.path.split(path) 65 | if len(parts) < 2: 66 | raise RuntimeError(f"invalid path '{path}'") 67 | 68 | f = get_folder_by_path(session, parts[0], http_timeout=http_timeout) 69 | 70 | uri = get_uri(f['links'], 'members') 71 | if not uri: 72 | raise RuntimeError("failed to find members uri link") 73 | response = session.get(uri, params={'filter': f'eq("name","{parts[1]}")'}, timeout=http_timeout) 74 | 75 | if not response.ok: 76 | raise RuntimeError(f"failed to get folder members for '{path}'") 77 | 78 | members = response.json()['items'] 79 | if not members: 80 | raise RuntimeError(f"failed to get folder path '{path}'") 81 | 82 | member = members[0] 83 | return member 84 | 85 | 86 | def get_compute_session_file_contents(session, compute_session, path: str, http_timeout=None) -> str: 87 | """ 88 | Fetch a file from the compute session file system 89 | :param session: the rest session that includes auth token 90 | :param compute_session: the compute session id 91 | :param path: full path to the file in the file system 92 | :param http_timeout: Timeout for http connection 93 | :return: contents of the file 94 | """ 95 | p = f'{path.replace("/", "~fs~")}' 96 | uri = f'/compute/sessions/{compute_session}/files/{p}/content' 97 | 98 | response = session.get(uri, headers={"Accept": "application/octet-stream"}, timeout=http_timeout) 99 | if response.ok: 100 | return response.text 101 | raise RuntimeError(f"File {path} was not found or could not be accessed. error code: {response.status_code}") 102 | 103 | 104 | def get_uri(links, rel): 105 | """ 106 | Given a links object from a rest response, find the rel link specified and return the uri. 107 | Raise exception if not found 108 | """ 109 | link = next((x for x in links if x["rel"] == rel), None) 110 | if link is None: 111 | return None 112 | return link["uri"] 113 | 114 | 115 | def stream_log(session,job,start,limit=99999, http_timeout=None) -> int: 116 | current_line=start 117 | 118 | log_uri = get_uri(job["links"], "log") 119 | if not log_uri: 120 | logging.getLogger(name=None).warning("Warning: failed to retrieve log URI from links. Maybe the log is too large.") 121 | else: 122 | try: 123 | # Note if it is a files link (it will be that when the job have finished), this does not support the 'start' parameter, so we need to filter it by ourself. 124 | # We will ignore the limit parameter in that case 125 | is_files_link=log_uri.startswith("/files/") 126 | 127 | r = session.get(f"{log_uri}/content?start={start}&limit={limit}", timeout=http_timeout) 128 | if r.ok: 129 | # Parse the json log format and print each line 130 | log_contents = r.text 131 | jcontents = json.loads(log_contents) 132 | lines=0; 133 | for line in jcontents["items"]: 134 | if (is_files_link and lines>=start) or not is_files_link: 135 | t = line["type"] 136 | if t != "title": 137 | logging.getLogger(name=None).info(f'{line["line"]}') 138 | current_line=current_line+1 139 | 140 | lines=lines+1 141 | else: 142 | logging.getLogger(name=None).warning(f"Failed to retrieve parts of the log with status code {r.status_code} from URI: {log_uri}/content. Maybe the log is too large.") 143 | except Exception as e: 144 | # Makes sure to forward any AirflowException's encountered during log retrieval 145 | if isinstance(e,AirflowTaskTimeout) or isinstance(e,AirflowException): 146 | raise 147 | else: 148 | logging.getLogger(name=None).warning(f"Unable to retrieve parts of the log: {e}. Maybe the log is too large.") 149 | 150 | return current_line 151 | 152 | 153 | 154 | def dump_logs(session, job, http_timeout=None): 155 | """ 156 | Get the log from the job object 157 | :param session: rest session 158 | :param job: job object that should contain links object 159 | :param http_timeout: Timeout for http connection 160 | """ 161 | 162 | log_uri = get_uri(job["links"], "log") 163 | if not log_uri: 164 | print("Warning: failed to retrieve log uri from links. Log will not be displayed") 165 | else: 166 | r = session.get(f"{log_uri}/content", timeout=http_timeout) 167 | if not r.ok: 168 | print("Warning: failed to retrieve log content. Log will not be displayed") 169 | 170 | log_contents = r.text 171 | # Parse the json log format and print each line 172 | jcontents = json.loads(log_contents) 173 | for line in jcontents["items"]: 174 | t = line["type"] 175 | if t != "title": 176 | print(f'{line["line"]}') 177 | 178 | def find_named_compute_session(session: requests.Session, name: str, http_timeout=None) -> dict: 179 | # find session with given name 180 | response = session.get(f"/compute/sessions?filter=eq(name, {name})", timeout=http_timeout) 181 | if not response.ok: 182 | raise RuntimeError(f"Find sessions failed: {response.status_code}") 183 | sessions = response.json() 184 | if sessions["count"] > 0: 185 | print(f"Existing compute session named '{name}' with id {sessions['items'][0]['id']} was found") 186 | return sessions["items"][0] 187 | return {} 188 | 189 | def create_or_connect_to_session(session: requests.Session, context_name: str, name = None, http_timeout=None, job_name_prefix = None) -> dict: 190 | """ 191 | Connect to an existing compute session by name. If that named session does not exist, 192 | one is created using the context name supplied 193 | :param session: rest session that includes oauth token 194 | :param context_name: the context name to use to create the session if the session was not found 195 | :param name: name of session to find 196 | :param http_timeout: Timeout for http connection 197 | :param job_name_prefix: (optional) string. The name that you want the compute session to identify as in SAS Workload Orchestrator (SWO). job_name_prefix is supported from Viya Stable 2024.07 and forward 198 | :return: session object 199 | 200 | """ 201 | if name != None: 202 | compute_session = find_named_compute_session(session, name, http_timeout=http_timeout) 203 | if compute_session: 204 | return compute_session 205 | 206 | print(f"Compute session named '{name}' does not exist, a new one will be created") 207 | else: 208 | print(f"A new unnamed compute session will be created") 209 | 210 | 211 | # find compute context 212 | response = session.get("/compute/contexts", params={"filter": f'eq("name","{context_name}")'},timeout=http_timeout) 213 | if not response.ok: 214 | raise RuntimeError(f"Find context named {context_name} failed: {response.status_code}") 215 | context_resp = response.json() 216 | if not context_resp["count"]: 217 | raise RuntimeError(f"Compute context '{context_name}' was not found") 218 | sas_context = context_resp["items"][0] 219 | 220 | # create session with given context 221 | uri = f'/compute/contexts/{sas_context["id"]}/sessions' 222 | if name != None: 223 | if job_name_prefix != None: 224 | session_request = {"version": 1, "name": name, "attributes":{"jobNamePrefix":job_name_prefix}} 225 | else: 226 | session_request = {"version": 1, "name": name} 227 | else: 228 | # Create a unnamed session 229 | if job_name_prefix != None: 230 | session_request = {"version": 1, "attributes":{"jobNamePrefix":job_name_prefix}} 231 | else: 232 | session_request = {"version": 1} 233 | 234 | headers = {"Content-Type": "application/vnd.sas.compute.session.request+json"} 235 | 236 | req = json.dumps(session_request) 237 | response = session.post(uri, data=req, headers=headers, timeout=http_timeout) 238 | 239 | if response.status_code != 201: 240 | raise RuntimeError(f"Failed to create session: {response.text}") 241 | 242 | json_response=response.json() 243 | print(f"Compute session {json_response['id']} created") 244 | 245 | return json_response 246 | 247 | def end_compute_session(session: requests.Session, id, http_timeout=None): 248 | uri = f'/compute/sessions/{id}' 249 | response = session.delete(uri, timeout=http_timeout) 250 | if not response.ok: 251 | return False 252 | return True 253 | -------------------------------------------------------------------------------- /tests/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | -------------------------------------------------------------------------------- /tests/hooks/test_sas.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from sas_airflow_provider.hooks.sas import SasHook 19 | from unittest.mock import patch, ANY, Mock 20 | import requests 21 | 22 | 23 | class TestSasHook: 24 | 25 | @patch("requests.Session") 26 | @patch("requests.post") 27 | @patch("airflow.hooks.base.BaseHook.get_connection") 28 | def test_sas_hook(self, bh_mock, req_mock, sess_mock): 29 | bh_ret = Mock() 30 | bh_mock.return_value = bh_ret 31 | bh_ret.extra_dejson = {"token": "", "client_id": "", "client_secret": ""} 32 | bh_ret.login = "user" 33 | bh_ret.password = "pass" 34 | bh_ret.host = "host" 35 | req_ret = Mock() 36 | req_mock.return_value = req_ret 37 | req_ret.json.return_value = {'access_token': 'tok'} 38 | req_ret.status_code = 200 39 | 40 | h = SasHook("SAS") 41 | r = h.get_conn() 42 | 43 | req_mock.assert_called_with('host/SASLogon/oauth/token', 44 | data={'grant_type': 'password', 'username': 'user', 45 | 'password': 'pass'}, verify=True, 46 | headers={'Authorization': 'Basic c2FzLmNsaTo='}) 47 | -------------------------------------------------------------------------------- /tests/operators/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | -------------------------------------------------------------------------------- /tests/operators/test_sas_jobexecution.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from sas_airflow_provider.operators.sas_jobexecution import SASJobExecutionOperator 19 | from unittest.mock import patch 20 | 21 | 22 | def mock_ret_headers(hdr): 23 | if hdr == 'X-Sas-Jobexec-Error': 24 | return None 25 | elif hdr == 'X-Sas-Jobexec-Id': 26 | return 1 27 | 28 | class TestSasJobExecutionOperator: 29 | """ 30 | Test class for SASJobExecutionOperator 31 | """ 32 | 33 | @patch("sas_airflow_provider.operators.sas_jobexecution.dump_logs") 34 | @patch("sas_airflow_provider.operators.sas_jobexecution.SasHook") 35 | def test_execute_sas_job_execution_operator(self, session_mock, dump_logs_mock): 36 | """ 37 | Test basic operation 38 | """ 39 | session_mock.return_value.get_conn.return_value.post.return_value.status_code = 200 40 | session_mock.return_value.get_conn.return_value.get.return_value.status_code = 200 41 | session_mock.return_value.get_conn.return_value.post.return_value.headers.get=mock_ret_headers 42 | 43 | operator = SASJobExecutionOperator(task_id='test', 44 | connection_name="SAS", job_name='/Public/my_job', 45 | parameters={'a': 'b'}, 46 | job_exec_log=True 47 | ) 48 | 49 | operator.execute(context={}) 50 | session_mock.assert_called_with('SAS') 51 | dump_logs_mock.assert_called() 52 | session_mock.return_value.get_conn.return_value.post.assert_called_with('/SASJobExecution/?_program=/Public/my_job&a=b', 53 | headers={ 54 | 'Accept': 'application/vnd.sas.job.execution.job+json'}) 55 | -------------------------------------------------------------------------------- /tests/operators/test_sas_studio.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from __future__ import annotations 19 | 20 | from unittest.mock import ANY, Mock, patch 21 | 22 | from sas_airflow_provider.operators.sas_studio import ( 23 | SASStudioOperator, 24 | create_or_connect_to_session, 25 | ) 26 | 27 | from sas_airflow_provider.util.util import dump_logs 28 | 29 | 30 | class TestSASStudioOperator: 31 | """ 32 | Test class for SASStudioOperator 33 | """ 34 | 35 | 36 | @patch.object(SASStudioOperator, '_run_job_and_wait') 37 | @patch.object(SASStudioOperator, '_generate_object_code') 38 | @patch("sas_airflow_provider.operators.sas_studio.dump_logs") 39 | @patch("sas_airflow_provider.operators.sas_studio.SasHook") 40 | def test_execute_sas_studio_flow_operator_basic( 41 | self, session_mock, dumplogs_mock, mock_gen_flow_code, mock_run_job_and_wait 42 | ): 43 | mock_gen_flow_code.return_value = {"code": "test code"} 44 | mock_run_job_and_wait.return_value = { "id": "jobid1", 45 | "state": "completed", 46 | "links": [{"rel": "log", "uri": "log/uri"}], 47 | }, True 48 | environment_vars = {"env1": "val1", "env2": "val2"} 49 | operator = SASStudioOperator( 50 | task_id="demo_studio_flow_1.flw", 51 | path_type="content", 52 | path="/Public/Airflow/demo_studio_flow_1.flw", 53 | exec_log=True, 54 | connection_name="SAS", 55 | compute_context="SAS Studio compute context", 56 | codegen_init_code=False, 57 | codegen_wrap_code=False, 58 | env_vars=environment_vars, 59 | ) 60 | 61 | operator.execute(context={}) 62 | 63 | dumplogs_mock.assert_called() 64 | session_mock.assert_called_with("SAS") 65 | mock_gen_flow_code.assert_called() 66 | mock_run_job_and_wait.assert_called() 67 | 68 | def test_execute_sas_studio_flow_create_or_connect(self): 69 | session = Mock() 70 | req_ret = Mock() 71 | session.get.return_value = req_ret 72 | req_ret.json.return_value = {"count": 1, "items": ["dummy"]} 73 | req_ret.status_code = 200 74 | r = create_or_connect_to_session(session, "context", "name") 75 | assert r == "dummy" 76 | 77 | def test_execute_sas_studio_flow_create_or_connect_new(self): 78 | session = Mock() 79 | req_ret1 = Mock() 80 | req_ret2 = Mock() 81 | session.get.side_effect = [req_ret1, req_ret2] 82 | session.headers = {} 83 | post_ret = Mock() 84 | session.post.return_value = post_ret 85 | post_ret.status_code = 201 86 | post_ret.json.return_value = {"a": "b"} 87 | req_ret1.json.return_value = {"count": 0} 88 | req_ret1.status_code = 200 89 | req_ret2.json.return_value = {"count": 1, "items": [{"id": "10"}]} 90 | req_ret2.status_code = 200 91 | r = create_or_connect_to_session(session, "context", "name") 92 | assert r == {"a": "b"} 93 | 94 | def test_execute_sas_studio_flow_operator_gen_code(self): 95 | session = Mock() 96 | req_ret = Mock() 97 | session.post.return_value = req_ret 98 | req_ret.json.return_value = {"code": "code val"} 99 | req_ret.status_code = 200 100 | op = SASStudioOperator(task_id="demo_studio_flow_1.flw", 101 | path_type="content", 102 | path="/path", 103 | exec_log=True, 104 | codegen_init_code=True, codegen_wrap_code=True) 105 | op.connection = session 106 | r = op._generate_object_code() 107 | 108 | session.post.assert_called_with( 109 | "/studioDevelopment/code", 110 | json={ 111 | "reference": { 112 | "mediaType": "application/vnd.sas.dataflow", 113 | "type": "content", 114 | "path": "/path", 115 | }, 116 | "initCode": True, 117 | "wrapperCode": True, 118 | }, 119 | ) 120 | assert r == {"code": "code val"} 121 | 122 | @patch("sas_airflow_provider.operators.sas_studio.create_or_connect_to_session") 123 | def test_execute_sas_studio_flow_operator_gen_code_compute(self, c_mock): 124 | session = Mock() 125 | req_ret = Mock() 126 | session.post.return_value = req_ret 127 | req_ret.json.return_value = {"code": "code val"} 128 | req_ret.status_code = 200 129 | c_mock.return_value = {"id": "abc"} 130 | 131 | op = SASStudioOperator(task_id="demo_studio_flow_1.flw", 132 | path_type="compute", 133 | path="/path", 134 | exec_log=True, 135 | codegen_init_code=True, codegen_wrap_code=True) 136 | op.connection = session 137 | r = op._generate_object_code() 138 | 139 | 140 | c_mock.assert_called_with(ANY, "SAS Studio compute context", "Airflow-Session") 141 | 142 | session.post.assert_called_with( 143 | "/studioDevelopment/code?sessionId=abc", 144 | json={ 145 | "reference": { 146 | "mediaType": "application/vnd.sas.dataflow", 147 | "type": "compute", 148 | "path": "/path", 149 | }, 150 | "initCode": True, 151 | "wrapperCode": True, 152 | }, 153 | ) 154 | assert r == {"code": "code val"} 155 | 156 | def test_execute_sas_studio_flow_run_job(self): 157 | session_mock = Mock() 158 | session_mock.post.return_value.status_code = 201 159 | session_mock.post.return_value.json.return_value = {"id": "1", "state": "completed"} 160 | req = {"a": "b"} 161 | op = SASStudioOperator(task_id="demo_studio_flow_1.flw", 162 | path_type="compute", 163 | path="/path", 164 | exec_log=True, 165 | codegen_init_code=True, codegen_wrap_code=True) 166 | op.connection = session_mock 167 | r = op._run_job_and_wait(req, 1) 168 | session_mock.post.assert_called_with("/jobExecution/jobs", json={"a": "b"}) 169 | assert r == ({"id": "1", "state": "completed"}, True) 170 | 171 | def test_execute_sas_studio_flow_get_logs(self): 172 | session_mock = Mock() 173 | session_mock.get.return_value.status_code = 200 174 | session_mock.get.return_value.text = """ 175 | {"items": [{"type":"INFO", "line":"line value"}]} 176 | """ 177 | req = {"links": [{"rel": "log", "uri": "log/uri"}]} 178 | dump_logs(session_mock, req) 179 | session_mock.get.assert_called_with("log/uri/content") 180 | -------------------------------------------------------------------------------- /tests/system/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | -------------------------------------------------------------------------------- /tests/system/sas_create_delete_session.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from datetime import datetime 19 | from airflow import DAG 20 | from sas_airflow_provider.operators.sas_create_session import SASComputeCreateSession 21 | from sas_airflow_provider.operators.sas_delete_session import SASComputeDeleteSession 22 | 23 | dag = DAG('demo_create_delete', description='Create and delete sessions', 24 | schedule="@once", 25 | start_date=datetime(2022, 6, 1), catchup=False) 26 | 27 | task0 = SASComputeCreateSession(task_id="create_sess", dag=dag) 28 | 29 | task1 = SASComputeDeleteSession(task_id='delete_sess', 30 | compute_session_id="{{ ti.xcom_pull(key='compute_session_id', task_ids=[" 31 | "'create_sess'])|first }}", 32 | dag=dag) 33 | 34 | 35 | task2 = SASComputeDeleteSession(task_id='delete_sess_named', 36 | compute_session_name="Airflow-Session", 37 | dag=dag) 38 | 39 | task0 >> task1 >> task2 40 | if __name__ == '__main__': 41 | dag.test() 42 | -------------------------------------------------------------------------------- /tests/system/sas_jobexecution.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from datetime import datetime 19 | from airflow import DAG 20 | from sas_airflow_provider.operators.sas_jobexecution import SASJobExecutionOperator 21 | 22 | dag = DAG('sas_hello_world_jes', description='Hello World SAS DAG', 23 | schedule="@once", 24 | start_date=datetime(2022, 6, 1), catchup=False) 25 | 26 | 27 | # job parameters are passed into the job 28 | job_parameters = { 29 | "userName": "Demo" 30 | } 31 | 32 | hello_task = SASJobExecutionOperator(task_id='hello_task', 33 | job_name='/Public/Airflow/Hello-World', 34 | parameters=job_parameters, 35 | job_exec_log=True, 36 | add_airflow_vars=True, 37 | dag=dag) 38 | 39 | if __name__ == '__main__': 40 | dag.test() -------------------------------------------------------------------------------- /tests/system/sas_studio.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from datetime import datetime 19 | from airflow import DAG 20 | from sas_airflow_provider.operators.sas_studio import SASStudioOperator 21 | dag = DAG('demo_studio_flow_1', description='Executing Studio Flow for demo purposes', 22 | schedule="@once", 23 | start_date=datetime(2022, 6, 1), catchup=False) 24 | 25 | environment_vars = { 26 | "env1": "val1", 27 | "env2": "val2" 28 | } 29 | 30 | task1 = SASStudioOperator(task_id='demo_studio_flow_1.flw', 31 | path_type='content', 32 | path='/Public/Airflow/demo_studio_flow_1.flw', 33 | exec_log=True, 34 | compute_context="SAS Studio compute context", 35 | codegen_init_code=False, 36 | codegen_wrap_code=False, 37 | env_vars=environment_vars, 38 | macro_vars={"a":"b"}, 39 | dag=dag) 40 | if __name__ == '__main__': 41 | dag.test() 42 | -------------------------------------------------------------------------------- /tests/system/sas_studio_advanced.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from datetime import datetime 19 | from airflow import DAG 20 | from sas_airflow_provider.operators.sas_studio import SASStudioOperator 21 | from sas_airflow_provider.operators.sas_create_session import SASComputeCreateSession 22 | 23 | dag = DAG('demo_advanced', description='Executing code', 24 | schedule="@once", 25 | start_date=datetime(2022, 6, 1), catchup=False) 26 | 27 | environment_vars = { 28 | "env1": "val1", 29 | "env2": "val2" 30 | } 31 | 32 | task0 = SASComputeCreateSession(task_id="create_sess", dag=dag) 33 | 34 | task1 = SASStudioOperator(task_id='demo_program', 35 | path_type='compute', 36 | exec_type='program', 37 | path='/path/to/test.sas', 38 | exec_log=True, 39 | compute_session_id="{{ ti.xcom_pull(key='compute_session_id', task_ids=['create_sess'])|first }}", 40 | compute_context="SAS Studio compute context", 41 | codegen_init_code=False, 42 | codegen_wrap_code=False, 43 | env_vars=environment_vars, 44 | output_macro_var_prefix="AF_", 45 | dag=dag) 46 | 47 | program2 = ''' 48 | %put value of one is &one; 49 | %put value of two is &two; 50 | %run; 51 | ''' 52 | 53 | task2 = SASStudioOperator(task_id='demo_program_2', 54 | path_type='raw', 55 | exec_type='program', 56 | path=program2, 57 | exec_log=True, 58 | compute_session_id="{{ ti.xcom_pull(key='compute_session_id', task_ids=['create_sess'])|first }}", 59 | compute_context="SAS Studio compute context", 60 | codegen_init_code=False, 61 | codegen_wrap_code=False, 62 | env_vars=environment_vars, 63 | macro_vars={"one": "{{ti.xcom_pull(key='AF_1', task_ids=['demo_program'])|first}}", 64 | "two": "{{ti.xcom_pull(key='AF_2', task_ids=['demo_program'])|first}}"}, 65 | dag=dag) 66 | 67 | task0 >> task1 >> task2 68 | if __name__ == '__main__': 69 | dag.test() 70 | --------------------------------------------------------------------------------