├── .gitignore ├── LICENSE ├── README.md ├── batch.md ├── cleanup_cache.py ├── cli-sync.sh ├── globus_folder_sync.py ├── requirements.txt ├── share-data.sh └── share_data.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | .venv/ 83 | venv/ 84 | ENV/ 85 | 86 | # Spyder project settings 87 | .spyderproject 88 | 89 | # Rope project settings 90 | .ropeproject 91 | 92 | # token store for example scripts 93 | tokens.json 94 | refresh-tokens.json 95 | 96 | # store for transfer id 97 | last-transfer-id.txt 98 | transfer-data.json 99 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Globus Automation Examples 2 | Simple code examples for various use cases using Globus. 3 | 4 | ## Overview 5 | 6 | There are three example use cases in this repo: 7 | 8 | * Syncing a directory. 9 | * Staging data in a shared directory. 10 | * Removing directories after files are transferred . 11 | 12 | The syncing and staging examples are implemented as both a Bash 13 | script that calls the [Globus CLI](https://docs.globus.org/cli/) and 14 | a Python module that can be run as a script or imported as a module. 15 | The directory cleanup example is implemented as a Python script. 16 | The Python examples are built using the 17 | [Globus SDK](https://globus-sdk-python.readthedocs.io/en/stable/). 18 | 19 | * [`cli-sync.sh`](cli-sync.sh): submit a recursive transfer with sync option. 20 | * [`globus_folder_sync.py`](globus_folder_sync.py): submit a recursive transfer with sync option; uses a [Native App grant](https://github.com/globus/native-app-examples). 21 | * [`share-data.sh`](share-data.sh): stages data to a folder and sets sharing access control to a user and or group. 22 | * [`share_data.py`](share_data.py): stages data to a folder and sets sharing access control to a user and or group. Uses a [Native App grant](https://github.com/globus/native-app-examples) or [Client Credential grant](http://globus-sdk-python.readthedocs.io/en/stable/examples/client_credentials/). 23 | * [`cleanup_cache.py`](cleanup_cache.py): removes directories under a shared endpoint that have had data transferred from them. Uses [Client Credential grant](http://globus-sdk-python.readthedocs.io/en/stable/examples/client_credentials/). 24 | * [Globus CLI Batch Transfer Recipe](batch.md): a guide on how to use the Globus CLI to list, filter, and batch submit a transfer from two locations into a single destination folder. 25 | 26 | ## Getting Started 27 | * Install the [Globus Command Line Interface (CLI)](https://docs.globus.org/cli/installation/). 28 | * Set up your environment. 29 | * [OS X](#os-x) 30 | * [Linux](#linux-ubuntu) 31 | * [Windows](#windows) 32 | * Create your own Native App registration for use with the examples. Visit the [Globus Developer Pages](https://developers.globus.org) to register an App. 33 | * When registering the App you'll be asked for some information, including the redirect URL and any scopes you will be requesting. 34 | * Check the "Will be used by a native application" checkbox 35 | * Redirect URL: `https://auth.globus.org/v2/web/auth-code` 36 | * Scopes: `urn:globus:auth:scope:transfer.api.globus.org:all`, `openid`, `profile`, `email` 37 | * Replace the UUIDs for `CLIENT_ID` in [`globus_folder_sync.py`](globus_folder_sync.py) and [`share_data.py`](share_data.py). 38 | * If you prefer to run `share_data.py` as a Confidential App, visit the [Globus Developer Pages](https://developers.globus.org) to register an App. 39 | * Leave "Will be used by a native application" checkbox unchecked. 40 | * When your app is registerred, scroll down to "Client Secrets" and click "Generate New Client Secret". Copy a generated client secret into `share-data.py` as `CLIENT_SECRET`. 41 | 42 | ### OS X 43 | 44 | ##### Environment Setup 45 | 46 | * `sudo easy_install pip` 47 | * `sudo pip install virtualenv` 48 | * `git clone https://github.com/globus/automation-examples` 49 | * `cd automation-examples` 50 | * `virtualenv venv` 51 | * `source venv/bin/activate` 52 | * `pip install -r requirements.txt` 53 | 54 | ### Linux (Ubuntu) 55 | 56 | ##### Environment Setup 57 | 58 | * `sudo apt-get update` 59 | * `sudo apt-get install python-pip` 60 | * `sudo pip install virtualenv` 61 | * `sudo apt-get install git` 62 | * `git clone https://github.com/globus/automation-examples` 63 | * `cd automation-examples` 64 | * `virtualenv venv` 65 | * `source venv/bin/activate` 66 | * `pip install -r requirements.txt` 67 | 68 | ### Windows 69 | 70 | ##### Environment Setup 71 | 72 | * Install Python () 73 | * `pip install virtualenv` 74 | * Install git () 75 | * `git clone https://github.com/globus/automation-examples` 76 | * `cd automation-examples` 77 | * `virtualenv venv` 78 | * `venv\Scripts\activate` 79 | * `pip install -r requirements.txt` 80 | 81 | ## Running the scripts 82 | 83 | ### globus_folder_sync.py and cli-sync.sh 84 | 85 | The app transfers the `/share/godata/` directory from Tutorial Endpoint 1 to 86 | `/~/sync-demo/` on Tutorial Endpoint 2. The destination path must exist 87 | before the script is executed. The path can also be changed by specifying 88 | a different value of `DESTINATION_PATH` in `globus_folder_sync.py`. 89 | The Python script launches a web browser to get an OAuth authorization code. 90 | After you consent and copy the code to the 'Enter the auth code' prompt, 91 | the script requests access and refresh tokens from the Globus Auth service and 92 | saves the tokens in the `transfer-data.json` file to avoid going through the OAuth 93 | flow every time the script is executed. 94 | 95 | ``` 96 | $ ./globus_folder_sync.py 97 | Transfer has been started from 98 | ddb59aef-6d04-11e5-ba46-22000b92c6ec:/share/godata/ 99 | to 100 | ddb59af0-6d04-11e5-ba46-22000b92c6ec:/~/sync-demo/ 101 | Visit the link below to see the changes: 102 | https://globus.org/app/transfer?destination_path=%2F%7E%2Fsync-demo%2F&origin_path=%2Fshare%2Fgodata%2F&destination_id=ddb59af0-6d04-11e5-ba46-22000b92c6ec&origin_id=ddb59aef-6d04-11e5-ba46-22000b92c6ec 103 | ``` 104 | The same functionality can be implemented using the Globus CLI. In this case, 105 | the Globus CLI is responsible for the OAuth 2.0 authorization flow and handling 106 | access and refresh tokens. The example shell script, `cli-sync.sh`, calls 107 | the Globus CLI `transfer` command only. To avoid transferring the same data 108 | concurrently, the script stores a transfer task id in the `last-transfer-id.txt` 109 | file and checks this file on every execution to avoid starting a new transfer before the previous task has finished. 110 | ``` 111 | $ globus login 112 | $ ./cli-sync.sh 113 | Checking for a previous transfer 114 | Last transfer fb55533e-449f-11e7-bd46-22000b9a448b SUCCEEDED, continuing 115 | Verified that source is a directory 116 | Submitted sync from ddb59aef-6d04-11e5-ba46-22000b92c6ec:/share/godata/ to ddb59af0-6d04-11e5-ba46-22000b92c6ec:/~/sync-demo/ 117 | Link: 118 | https://www.globus.org/app/transfer?origin_id=ddb59aef-6d04-11e5-ba46-22000b92c6ec&origin_path=%2Fshare%2Fgodata%2F&destination_id=ddb59af0-6d04-11e5-ba46-22000b92c6ec&destination_path=%2F~%2Fsync-demo%2F 119 | Saving sync transfer ID to last-transfer-id.txt 120 | $ cat last-transfer-id.txt 121 | 842ac3d8-39b5-11e7-bcec-22000b9a448b 122 | ``` 123 | 124 | **Note**: Both ./globus_folder_sync.py and cli-sync.sh require you to login (see Login section for help). 125 | 126 | ### share_data.py and share-data.sh 127 | 128 | 129 | The app transfers a directory to a shared endpoint and destination path 130 | specified in the command line. The destination path must exist prior to running the script. Before the script starts transferring files it checks if the 131 | destination path concatenated with the last section of the source path exists. If 132 | it does and the `--delete` option is specified, the script deletes the path with 133 | all subdirectories and files, creates it again and grants a specified user or 134 | group read access. 135 | 136 | **Note**: Before running this: 137 | * Create a shared endpoint and specify its UUID in the variable `$shared_ep` 138 | in the exmamples below. 139 | * Create a folder named `share-data-demo/` under the shared endpoint. 140 | 141 | In the example below, the script transfers `/share/godata/` from Tutorial 142 | Endpoint 1 to `/share-data-demo/` on a shared endpoint created against Tutorial 143 | Endpoint 2. If you run this multiple times, you may see an error that the ACL rule 144 | already exists. You can ignore it. 145 | ``` 146 | $ source_ep=ddb59aef-6d04-11e5-ba46-22000b92c6ec # Tutorial Endpoint 1 147 | $ shared_ep='' # Shared endpoint on Tutorial Endpoint 2 148 | $ user_uuid=c02d881a-d274-11e5-bdf5-d3a88fb071ca # John Doe 149 | $ ./share_data.py \ 150 | --source-endpoint $source_ep \ 151 | --shared-endpoint $shared_ep \ 152 | --source-path /share/godata/ \ 153 | --destination-path /share-data-demo/ \ 154 | --user-uuid $user_uuid \ 155 | --delete 156 | Destination directory, /share-data-demo/godata/, exists and will be deleted 157 | Submitting a delete task 158 | task_id: 3d68afa2-3943-11e9-9fa6-0a06afd4a22e 159 | Creating destination directory /share-data-demo/godata/ 160 | Granting user, 78af45b1-d0b4-4311-8475-b3681d37c4d5, read access to the destination directory 161 | Submitting a transfer task 162 | task_id: 4409c314-3943-11e9-9fa6-0a06afd4a22e 163 | You can monitor the transfer task programmatically using Globus SDK, or go to the Web UI, https://www.globus.org/app/activity/4409c314-3943-11e9-9fa6-0a06afd4a22e. 164 | ``` 165 | `share-data.sh` script shows how to implement the same functionality using the Globus CLI. 166 | ``` 167 | $ globus login 168 | $ source_ep=ddb59aef-6d04-11e5-ba46-22000b92c6ec # Tutorial Endpoint 1 169 | $ shared_ep='' # Shared endpoint on Tutorial Endpoint 2 170 | $ user_uuid=c02d881a-d274-11e5-bdf5-d3a88fb071ca # John Doe 171 | $ ./share-data.sh \ 172 | --source-endpoint $source_ep \ 173 | --shared-endpoint $shared_ep \ 174 | --source-path /share/godata/ \ 175 | --destination-path /share-data-demo/ \ 176 | --user-uuid $user_uuid \ 177 | --delete 178 | Destination directory, /share-data-demo/godata/, exists and will be deleted 179 | The directory was created successfully 180 | Message: The transfer has been accepted and a task has been created and queued for execution 181 | Task ID: 60b80d23-39c2-11e7-bcec-22000b9a448b 182 | ``` 183 | 184 | **Note**: Both share_data.py and share-data.sh require you to login (see Login section for help). 185 | 186 | ### cleanup_cache.py 187 | 188 | There are a few things that are necessary to set up in order to successfully run [`cleanup_cache.py`](cleanup_cache.py). 189 | 190 | * You must have registered a ClientID and generated a secret for it at [Globus Developer Pages](https://developers.globus.org). Since this script uses a Client Credential Grant, embedding the client secret in the script, you should not use this ClientID for any other purposes. When creating the app use the following: 191 | * "Redirect URLs" -- Set to `https://example.com/oauth_callback/`. 192 | * Scopes: `[urn:globus:auth:scope:transfer.api.globus.org:all]` 193 | Only transfer is required, since your bot will be using client_secret 194 | to authenticate. `[openid profile]` are required if you setup your own 195 | three-legged-auth server and want to allow users to login to it. 196 | * Leave "Native App" unchecked. 197 | * The ClientID and secret that you obtained above should be placed in the `cleanup_cache.py` script, in place of the development values. 198 | * There must be a shared endpoint, the transfers from which you wish to monitor and clean up. 199 | * The Client Identity Username (typically the Client ID with "@clients.auth.globus.org appended) must be authorized as an Administrator and Activity Monitor of your shared endpoint. You can set these at `https://www.globus.org/app/endpoints//roles`. 200 | * You must put the UUID of the shared endpoint you wish to clean up in the `cleanup_cache.py` script. 201 | 202 | The `cleanup_cache.py` script will do the following: 203 | 204 | * Search for successful transfers from your shared endpoint within the last 24 hours. 205 | * For any successful transfers found, determine if the files transferred were in a common directory, if so, submit a recursive delete request on that directory, if not, submit a delete request for each file from the transfer. 206 | * Determine if the common directory from the transfer had any specific ACLs set on the endpoint, if so, delete them. 207 | 208 | Note: `cleanup_cache.py` will find the most specific common directory for all files copied in a transfer. Thus, if all the files transferred were in `/maindir/subdir`, it will attempt to recursively delete `/maindir/subdir`, not `/maindir`. 209 | 210 | Another Note: This script is greedy in how it deletes folders. If someone cherry-picks files, it will still delete the whole directory! 211 | 212 | ### Login 213 | 214 | Some of the scripts require you to login to Globus to ensure that you are an authorized user. The scripts use refresh tokens to save you the trouble of needing to login every time a script is run. For example, if you login when running a script and then run either the same script or a different one, you will not need to login a second time. 215 | 216 | ## Blocking on Transfer Tasks 217 | 218 | Sometimes you'll want to block on the submitted transfer before 219 | proceeding onto the next part of your script or workflow. You can do 220 | this with the `globus task wait` command from the 221 | [Globus CLI](https://docs.globus.org/cli/). This command also allows 222 | you to specify the return code from a timeout window to determine if 223 | the CLI is exiting because the task has failed or is still progressing. 224 | 225 | ### Examples 226 | 227 | Default value for exceeding the timeout window 228 | ``` 229 | $ globus task wait -H --timeout 35 --polling-interval 10 c1002af0-444e-11e9-bf28-0edbf3a4e7ee 230 | .... 231 | Task has yet to complete after 35 seconds 232 | $ echo $? 233 | 1 234 | ``` 235 | 236 | Setting a custom exit code of `0` 237 | ``` 238 | $ globus task wait -H --timeout 35 --polling-interval 10 --timeout-exit-code 0 c1002af0-444e-11e9-bf28-0edbf3a4e7ee 239 | .... 240 | Task has yet to complete after 35 seconds 241 | $ echo $? 242 | 0 243 | ``` 244 | 245 | Cancelled task 246 | ``` 247 | $ globus task wait -H --timeout 60 --polling-interval 5 c1002af0-444e-11e9-bf28-0edbf3a4e7ee 248 | .. 249 | $ echo $? 250 | 1 251 | ``` 252 | -------------------------------------------------------------------------------- /batch.md: -------------------------------------------------------------------------------- 1 | # Globus CLI Batch Transfer Recipe 2 | 3 | In this example we're going to submit transfers from two directories on a single Globus endpoint and have the data copied a single common directory. This can be used to aggregate results from different simulations or other jobs. It will show how to do a lot of things with the Globus CLI along the way. This example can be useful if you deal with hundreds or thousands of files and directories at a single time. 4 | 5 | We'll walkthrough of how to use the [Globus CLI](https://docs.globus.org/cli/) to list, filter, and [batch submit a transfer](https://docs.globus.org/cli/reference/transfer/) from two locations into a single destination folder. To get started, you'll need to the have the Globus CLI installed and be logged in. See the [getting started](README.md#getting-started) section of the README. 6 | 7 | ## Get the Endpoint UUIDs 8 | 9 | We're going to copy data from ALCF's [Theta](https://www.alcf.anl.gov/theta) to [Petrel](http://petrel.alcf.anl.gov/), the storage system used to support community data repositories. Globus makes heavy use of UUIDs to refer to things like endpoints, so we'll search for them. 10 | 11 | ``` 12 | $ globus endpoint search theta 13 | ID | Owner | Display Name 14 | ------------------------------------ | ----------------- | -------------- 15 | 08925f04-569f-11e7-bef8-22000b9a448b | alcf@globusid.org | alcf#dtn_theta 16 | $ globus endpoint search petrel#e3sm 17 | ID | Owner | Display Name 18 | ------------------------------------ | ------------------- | ------------ 19 | dabdceba-6d04-11e5-ba46-22000b92c6ec | petrel@globusid.org | petrel#e3sm 20 | ``` 21 | 22 | ## Set Environment Variables to Track Things 23 | 24 | Memorizing UUIDs is not a recommended practice. We'll set environment variables to track them. 25 | 26 | ``` 27 | $ theta_ep=08925f04-569f-11e7-bef8-22000b9a448b 28 | $ petrel_e3sm_ep=dabdceba-6d04-11e5-ba46-22000b92c6ec 29 | ``` 30 | 31 | While we're at it, we'll set our source and destination directories to prevent typos and errors. 32 | 33 | ``` 34 | $ run1_path=/lus/theta-fs0/projects/example/run1/ 35 | $ run2_path=/lus/theta-fs0/projects/example/run1/ 36 | $ e3sm_path=/users/rick/watertable/ 37 | ``` 38 | 39 | ## Check Endpoint Activation 40 | 41 | If the endpoint isn't [activated](https://docs.globus.org/api/transfer/endpoint_activation/#web_activation), go to the [Globus web app](https://app.globus.org/), search for the endpoint by name or UUID and you'll be prompted for credentials to activate it. The destination in this example is a shared endpoint which will be [auto-activated](https://docs.globus.org/api/transfer/endpoint_activation/#auto_activation) by the Globus CLI. 42 | 43 | ``` 44 | $ globus endpoint is-activated $theta_ep 45 | 08925f04-569f-11e7-bef8-22000b9a448b is activated 46 | Exit: 0 47 | $ globus endpoint is-activated $petrel_e3sm_ep 48 | dabdceba-6d04-11e5-ba46-22000b92c6ec does not require activation 49 | Exit: 0 50 | ``` 51 | 52 | ## List Source Files 53 | 54 | The `globus ls` works a lot like `ls` on a POSIX command line and we can use the `--filter` option to save us from parsing the full list. 55 | 56 | ``` 57 | $ globus ls --filter '~*watertable.h0*' $theta_ep:$run1_path > run1_watertable_files.txt 58 | $ globus ls --filter '~*watertable.h0*' $theta_ep:$run2_path > run2_watertable_files.txt 59 | ``` 60 | 61 | The batch transfer expects a list of source files and their corresponding destination filenames. In this case, those are the same and our files will have lines like: 62 | ` `. (If we wanted to move the entire directory this would be a bit easier, we would use a recursive transfer. But we want to only move _some_ of the files from the source directories.) 63 | 64 | ``` 65 | $ for i in `cat run1_watertable_files.txt ` 66 | do 67 | echo "$i $i" 68 | done > run1_watertable_files_src_dest.txt 69 | $ for i in `cat run2_watertable_files.txt ` 70 | do 71 | echo "$i $i" 72 | done > run2_watertable_files_src_dest.txt 73 | ``` 74 | 75 | ## Batch Submit the Transfers 76 | 77 | The base Globus CLI transfer command is 78 | 79 | ``` 80 | $ globus transfer : : 81 | ``` 82 | 83 | The `--batch` option to the transfer command will read the `stdin` input from the file line by line to build the transfer request. The source and destination paths from the input files are relative to the paths we specify using `:` and `:`. 84 | 85 | You could submit one transfer per file, but then you would have a lot of tasks to monitor and the underlying Globus Connect servers would not be able efficiently aggregate the files. In other words, that's too much work and would be slower. 86 | 87 | ``` 88 | $ globus transfer --batch $theta_ep:$run1_path $petrel_e3sm_ep:$e3sm_path < run1_watertable_files_src_dest.txt 89 | Message: The transfer has been accepted and a task has been created and queued for execution 90 | Task ID: 1d499566-01ab-11ea-be94-02fcc9cdd752 91 | $ globus transfer --batch $theta_ep:$run2_path $petrel_e3sm_ep:$e3sm_path < run2_watertable_files_src_dest.txt 92 | Message: The transfer has been accepted and a task has been created and queued for execution 93 | Task ID: 15173a2e-01ab-11ea-be94-02fcc9cdd752 94 | ``` 95 | 96 | ## Check Status on the Transfers 97 | 98 | You can monitor the tasks using the [web app](https://app.globus.org/activity) or with the CLI. Here, I've waited long enough for them to have finished. Since this example was within Argonne for a few hundreds of gigabytes, that's not surprising. Your transfer rates may vary. 99 | 100 | ### `run1` Transfer 101 | 102 | ``` 103 | $ globus task show 1d499566-01ab-11ea-be94-02fcc9cdd752 104 | Label: None 105 | Task ID: 1d499566-01ab-11ea-be94-02fcc9cdd752 106 | Is Paused: False 107 | Type: TRANSFER 108 | Directories: 0 109 | Files: 121 110 | Status: SUCCEEDED 111 | Request Time: 2019-11-07 22:08:24+00:00 112 | Faults: 0 113 | Total Subtasks: 242 114 | Subtasks Succeeded: 242 115 | Subtasks Pending: 0 116 | Subtasks Retrying: 0 117 | Subtasks Failed: 0 118 | Subtasks Canceled: 0 119 | Subtasks Expired: 0 120 | Completion Time: 2019-11-07 22:09:57+00:00 121 | Source Endpoint: alcf#dtn_theta 122 | Source Endpoint ID: 08925f04-569f-11e7-bef8-22000b9a448b 123 | Destination Endpoint: petrel#e3sm 124 | Destination Endpoint ID: dabdceba-6d04-11e5-ba46-22000b92c6ec 125 | Bytes Transferred: 44631218808 126 | Bytes Per Second: 480727214 127 | ``` 128 | 129 | ### `run2` Transfer 130 | 131 | ``` 132 | $ globus task show 15173a2e-01ab-11ea-be94-02fcc9cdd752 133 | Label: None 134 | Task ID: 15173a2e-01ab-11ea-be94-02fcc9cdd752 135 | Is Paused: False 136 | Type: TRANSFER 137 | Directories: 0 138 | Files: 481 139 | Status: SUCCEEDED 140 | Request Time: 2019-11-07 22:08:11+00:00 141 | Faults: 0 142 | Total Subtasks: 962 143 | Subtasks Succeeded: 962 144 | Subtasks Pending: 0 145 | Subtasks Retrying: 0 146 | Subtasks Failed: 0 147 | Subtasks Canceled: 0 148 | Subtasks Expired: 0 149 | Completion Time: 2019-11-07 22:11:27+00:00 150 | Source Endpoint: alcf#dtn_theta 151 | Source Endpoint ID: 08925f04-569f-11e7-bef8-22000b9a448b 152 | Destination Endpoint: petrel#e3sm 153 | Destination Endpoint ID: dabdceba-6d04-11e5-ba46-22000b92c6ec 154 | Bytes Transferred: 177418316088 155 | Bytes Per Second: 901833634 156 | ``` 157 | 158 | ## List the New Files 159 | 160 | As a quality check on my file lists, etc., I will list the number of files that are now on the common destination to the number of source files. 161 | 162 | ``` 163 | $ globus ls $petrel_e3sm_ep:$e3sm_path > petrel_files.txt 164 | $ wc petrel_files.txt 165 | 601 601 54070 petrel_files.txt 166 | $ wc run2_watertable_files.txt run1_watertable_files.txt 167 | 481 481 43270 run2_watertable_files.txt 168 | 121 121 10890 run1_watertable_files.txt 169 | 602 602 54160 total 170 | ``` 171 | 172 | Hmmm. Off by one...turns out there was a common file in both directories. It's worth checking for collisions like that when you copy different things to the same destination. That's not just for Globus. The POSIX command line can also be unforgiving. Remember: _Windows has a trash can, POSIX has an incinerator._ 173 | 174 | ## Cleaning Up 175 | 176 | Speaking of incinerators, as you copy data around, sometimes it's just to stage data for another section of the pipeline, which was this case. After the consolidated data was processed and moved to its next location, we should remove the intermediate directory. 177 | 178 | *BE AWARE* when using `globus delete`, especially `globus delete -r`. It's just like being on the command line; if you have write permissions to the target of that command, it's going away. I'm considering a pull request for the Globus CLI to have a software Easter egg where `incinerate` is a valid alias for `delete`. 179 | 180 | ``` 181 | $ globus delete -r $petrel_e3sm_ep:$e3sm_path 182 | Message: The delete has been accepted and a task has been created and queued for execution 183 | Task ID: 30762c84-01c0-11ea-8a5e-0e35e66293c2 184 | ``` 185 | -------------------------------------------------------------------------------- /cleanup_cache.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Delete data on your shared endpoint if someone has copied it. This script 5 | will wait for a transfer to complete before deleting data. Ensure you have 6 | an app setup on developers.globus.org (See below for instructions). 7 | 8 | *WARNING*: This script is greedy in how it deletes folders. If someone 9 | cherry-picks files, it will delete the whole directory! 10 | 11 | Confidential App [Client Credentials Grant] on developers.globus.org: 12 | * "Redirect URLs" -- Set to "https://example.com/oauth_callback/". 13 | * Scopes: 14 | [urn:globus:auth:scope:transfer.api.globus.org:all] 15 | Only transfer is required, since your bot will be using client_secret 16 | to authenticate. [openid profile] are required if you setup your own 17 | three-legged-auth server and want to allow users to login to it. 18 | * Uncheck "Native App". 19 | 20 | *Notice*: A confidential app is a bot which acts on your behalf. You will 21 | need to give it access to your shared endpoint. 22 | 23 | Want to test with non-critical data first? Copy data from Globus Tutorial 24 | Endpoint 1: http://bit.ly/2rzWx0Z 25 | """ 26 | 27 | from __future__ import print_function 28 | 29 | import sys 30 | import globus_sdk 31 | from globus_sdk import (TransferClient, 32 | AccessTokenAuthorizer) 33 | from globus_sdk.exc import TransferAPIError 34 | from datetime import datetime 35 | from datetime import timedelta 36 | from os.path import commonprefix, dirname 37 | 38 | # Must add the client ID as an Access Manager to the 39 | # shared endpoint. 40 | 41 | # Client ID from the app you created above 42 | CLIENT_ID = '4e6db83a-c767-4e53-ac96-d89b2cbe6577' 43 | # the secret, loaded from wherever you store it 44 | CLIENT_SECRET = 'MWhHZgWo+Z2u2hLB1808dos3qDKw5Q4W3cFhRMTqHYs=' 45 | # Source endpoint. *MUST* be a shared endpoint. 46 | SOURCE_ENDPOINT_ID = '3886dc9c-3eff-11e7-bd15-22000b9a448b' 47 | 48 | 49 | def do_client_authentication(client_id, client_secret): 50 | """ 51 | Does a client authentication and returns a globus transfer token. 52 | """ 53 | client = globus_sdk.ConfidentialAppAuthClient( 54 | client_id, 55 | client_secret, 56 | ) 57 | token_response = client.oauth2_client_credentials_tokens() 58 | return (token_response.by_resource_server 59 | ['transfer.api.globus.org']['access_token'] 60 | ) 61 | 62 | 63 | def task_delete_conditions_satisfied(task): 64 | """Returns True if the task was someone transferring data FROM this 65 | endpoint, false otherwise.""" 66 | return task["type"] == "TRANSFER" and \ 67 | task["source_endpoint_id"] == SOURCE_ENDPOINT_ID 68 | 69 | 70 | def select_dir_to_delete(transfer_client, task): 71 | """Find the common directory under which all the files live. If one exists, 72 | it will be deleted recursively, even if not all files under it were 73 | transferred. If there is no common directory, each file that was 74 | transferred will be deleted""" 75 | successful_file_transfers = \ 76 | transfer_client.endpoint_manager_task_successful_transfers( 77 | task["task_id"] 78 | ) 79 | print("Transfer Task({}): {} -> {}\n was submitted by {}\n". 80 | format(task["task_id"], task["source_endpoint"], 81 | task["destination_endpoint"], 82 | task["owner_string"])) 83 | 84 | files_list = [ 85 | globr["source_path"] for globr in successful_file_transfers] 86 | print("files list is ", files_list) 87 | 88 | common_dir = dirname(commonprefix(files_list)) 89 | return files_list, common_dir 90 | 91 | 92 | def delete_dir_and_acls(tc, task, files_list, common_dir): 93 | """Given a task, delete all files and ACLs associated with it. If a 94 | common directory exists between files, recursively delete that. 95 | Otherwise, delete each file individually. (See select_dir_to_delete above) 96 | 97 | Aborts deletion if an exception is thrown, either due to insufficient 98 | read access or if the files don't exist anymore.""" 99 | try: 100 | tc.operation_ls(SOURCE_ENDPOINT_ID, path=common_dir) 101 | except globus_sdk.exc.TransferAPIError as tapie: 102 | if tapie.code == 'ClientError.NotFound': 103 | print('Directory {} no longer present on source endpoint, ' 104 | 'there is nothing to delete\n'.format(common_dir)) 105 | return 106 | else: 107 | print("Could not delete directory '{}': {}".format( 108 | common_dir, tapie.message) 109 | ) 110 | return 111 | if files_list: 112 | if common_dir: 113 | ddata = globus_sdk.DeleteData( 114 | tc, SOURCE_ENDPOINT_ID, 115 | label="deletion of {}".format(task["task_id"]), 116 | submission_id=None, recursive=True, deadline=None) 117 | ddata.add_item(common_dir) 118 | else: 119 | ddata = globus_sdk.DeleteData( 120 | tc, SOURCE_ENDPOINT_ID, 121 | label="deletion of {}".format(task["task_id"]), 122 | submission_id=None, recursive=False, deadline=None) 123 | # If any of the paths are directories, recursive must be set True 124 | # on the top level DeleteData 125 | for path in files_list: 126 | ddata.add_item(path) 127 | tc.submit_delete(ddata) 128 | print("Job to delete data has been submitted") 129 | 130 | try: 131 | acl_list = tc.endpoint_manager_acl_list(SOURCE_ENDPOINT_ID) 132 | except: 133 | print("Couldn't get acl list for endpoint ", SOURCE_ENDPOINT_ID) 134 | return 135 | 136 | acldict = {i["path"]: i["id"] for i in acl_list} 137 | 138 | aclid = "" 139 | 140 | try: 141 | aclid = acldict[common_dir + "/"] 142 | except: 143 | print("No acl found for directory ", 144 | common_dir + "/") 145 | return 146 | 147 | if aclid: 148 | try: 149 | tc.delete_endpoint_acl_rule( 150 | SOURCE_ENDPOINT_ID, aclid) 151 | except: 152 | print("Couldn't delete acl rule ", aclid) 153 | return 154 | print("Acl deleted for directory ", 155 | common_dir + "/") 156 | 157 | 158 | def main(): 159 | 160 | current_time = datetime.utcnow().replace(microsecond=0).isoformat() 161 | last_cleanup_time = datetime.utcnow().replace(microsecond=0)\ 162 | - timedelta(hours=24) 163 | last_cleanup = last_cleanup_time.isoformat() 164 | completion_range = last_cleanup+","+current_time 165 | print("Cleaning up source endpoint {} \nfor outbound transfers completed " 166 | "in range {}\n ".format(SOURCE_ENDPOINT_ID, completion_range)) 167 | 168 | transfer_token = do_client_authentication(CLIENT_ID, CLIENT_SECRET) 169 | 170 | authorizer = AccessTokenAuthorizer(access_token=transfer_token) 171 | tc = TransferClient(authorizer=authorizer) 172 | 173 | # print out a directory listing from an endpoint 174 | tc.endpoint_autoactivate(SOURCE_ENDPOINT_ID) 175 | try: 176 | task_fields = "task_id,source_endpoint,destination_endpoint," \ 177 | "source_host_path,owner_string,source_endpoint_id,type" 178 | tasks = tc.endpoint_manager_task_list( 179 | filter_status="SUCCEEDED", 180 | filter_endpoint=SOURCE_ENDPOINT_ID, 181 | filter_completion_time=completion_range, 182 | fields=task_fields) 183 | except TransferAPIError as tapie: 184 | if tapie.code == 'PermissionDenied': 185 | print( 186 | 'Permission denied! Give your app permission by going to ' 187 | '"globus.org/app/endpoints/{}/roles", and under ' 188 | '"Identity/E-mail adding "{}@clients.auth.globus.org" as ' 189 | 'an "AccessManager" and "Activity Manager"'.format( 190 | SOURCE_ENDPOINT_ID, CLIENT_ID 191 | ) 192 | ) 193 | sys.exit(1) 194 | # Nothing weird *should* happen here, but if so re-raise so the user 195 | # can deal with it. 196 | raise 197 | tasklist = tasks.data 198 | if not tasklist: 199 | print("No transfers from {} found in the last 24 hours, " 200 | "nothing to clean up".format(SOURCE_ENDPOINT_ID)) 201 | else: 202 | print("{} total transfers found from {} in the last 24 hours, " 203 | "some may not be of type TRANSFER".format(len(tasklist), 204 | SOURCE_ENDPOINT_ID)) 205 | delete_tasks = [task.data 206 | for task in tasklist 207 | if task_delete_conditions_satisfied(task) 208 | ] 209 | for task in delete_tasks: 210 | files_list, common_dir = select_dir_to_delete(tc, task) 211 | 212 | delete_dir_and_acls(tc, task, files_list, common_dir) 213 | 214 | 215 | if __name__ == '__main__': 216 | main() 217 | -------------------------------------------------------------------------------- /cli-sync.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # PREREQUISITES 4 | # 5 | # The Globus CLI must be installed and have an active login session. 6 | # 7 | 8 | # DESCRIPTION 9 | # 10 | # Sync one folder with another, across two endpoints. 11 | # The sync will recurse through all subdirectories. 12 | # 13 | # Source: Globus Tutorial Endpoint 1: /share/godata 14 | # Destination: Globus Tutorial Endpoint 2: /~/sync-demo/ # Your account home directory 15 | # 16 | # Default values are below in CAPS for 17 | # SOURCE_ENDPOINT, DESTINATION_ENDPOINT, SOURCE_PATH, 18 | # DESTINATION_PATH, LAST_TRANSFER_ID_FILE, and SYNCTYPE 19 | # 20 | # Changes these to make this script suit your needs. 21 | # 22 | # Visit https://www.globus.org/app/transfer?destination_id=ddb59af0-6d04-11e5-ba46-22000b92c6ec 23 | # to view the transferred data. 24 | 25 | # Globus Tutorial Endpoint 1 26 | SOURCE_ENDPOINT='ddb59aef-6d04-11e5-ba46-22000b92c6ec' 27 | 28 | # Globus Tutorial Endpoint 2 29 | DESTINATION_ENDPOINT='ddb59af0-6d04-11e5-ba46-22000b92c6ec' 30 | 31 | # Sample data 32 | SOURCE_PATH='/share/godata/' 33 | 34 | # Destination Path 35 | # The directory will be created if it doesn't exist 36 | DESTINATION_PATH='/~/sync-demo/' 37 | 38 | # Where the ID of the previous transfer (if exists) is stored 39 | LAST_TRANSFER_ID_FILE='last-transfer-id.txt' 40 | 41 | # Sync options: 42 | # exists Copy files that do not exist at the destination. 43 | # size Copy files if the size of the destination does not match the size of the source. 44 | # mtime Copy files if the timestamp of the destination is older than the timestamp of the source. 45 | # checksum Copy files if checksums of the source and destination do not match. Files on the destination are never deleted. 46 | # For more information: 47 | # $ globus transfer --help 48 | # < OR > 49 | # https://docs.globus.org/api/transfer/task_submit/#transfer_and_delete_documents 50 | SYNCTYPE='checksum' 51 | 52 | # always start unset 53 | unset abort_message 54 | 55 | # start with default of 0 56 | rc=0 57 | 58 | # Abort with message 59 | function abort () { 60 | echo "$abort_message" >&2 61 | exit $rc 62 | } 63 | 64 | # Check if abort is necessary, and if so do it 65 | function check_rc () { 66 | if [ $# -gt 0 ]; 67 | then 68 | abort_message="$1" 69 | fi 70 | 71 | if [ $rc -ne 0 ]; 72 | then 73 | abort 74 | fi 75 | 76 | if [ $rc -eq 0 -a "$2" != "" ]; 77 | then 78 | printf "$2" 79 | fi 80 | } 81 | 82 | # Check if abort is necessary, fetching rc first 83 | function check_last_rc () { 84 | # must be the first command of the function 85 | rc=$? 86 | check_rc "$@" 87 | } 88 | 89 | # Only continue if the previous transfer succeeded or failed 90 | # Other statuses will mean that previous transfer is either still 91 | # running or requires human intervention (e.g., PAUSED) 92 | 93 | echo "Checking for a previous transfer" 94 | 95 | if [ -e "$LAST_TRANSFER_ID_FILE" ] 96 | then 97 | last_transfer_id=$(cat "$LAST_TRANSFER_ID_FILE") 98 | last_transfer_status=$(globus task show --format unix --jmespath 'status' $last_transfer_id) 99 | if [ "$last_transfer_status" != "SUCCEEDED" ] && [ "$last_transfer_status" != "FAILED" ] 100 | then 101 | abort_message="Last transfer $last_transfer_id status is $last_transfer_status, aborting" 102 | rc=1 103 | abort 104 | else 105 | echo "Last transfer $last_transfer_id $last_transfer_status, continuing" 106 | fi 107 | fi 108 | 109 | # Verify that the source paths is a directory 110 | globus ls --format unix --jmespath 'code' "$SOURCE_ENDPOINT:$SOURCE_PATH" >& /dev/null 111 | check_last_rc "Could not list source directory" "Verified that source is a directory\n" 112 | 113 | # Submit sync transfer, get the task ID 114 | globus_output=$(globus transfer --format unix --jmespath 'task_id' --recursive \ 115 | --delete --sync-level $SYNCTYPE \ 116 | "$SOURCE_ENDPOINT:$SOURCE_PATH" \ 117 | "$DESTINATION_ENDPOINT:$DESTINATION_PATH") 118 | 119 | success_msg="Submitted sync from $SOURCE_ENDPOINT:$SOURCE_PATH to $DESTINATION_ENDPOINT:$DESTINATION_PATH" 120 | source_path_enc=$(echo $SOURCE_PATH | sed 's?/?%%2F?g') 121 | destination_path_enc=$(echo $DESTINATION_PATH | sed 's?/?%%2F?g') 122 | # Note the double percent signs and \n for the printf statement 123 | link="Link:\nhttps://app.globus.org/activity/$globus_output/overview\n" 124 | 125 | # Check status 126 | check_last_rc "Globus transfer submission failed" "$success_msg\n$link" 127 | 128 | # Save ID of new sync transfer 129 | echo "Saving sync transfer ID to $LAST_TRANSFER_ID_FILE" 130 | echo $globus_output > "$LAST_TRANSFER_ID_FILE" 131 | -------------------------------------------------------------------------------- /globus_folder_sync.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Sync a directory between two Globus endpoints. Defaults: 4 | 5 | Source: Globus Tutorial Endpoint 1: /share/godata 6 | Destination: Globus Tutorial Endpoint 2: /~/sync-demo/ 7 | 8 | # Checkout the Destination at: 9 | globus.org/app/transfer?destination_id=ddb59af0-6d04-11e5-ba46-22000b92c6ec 10 | 11 | Authorization only needs to happen once, afterwards tokens are saved to disk 12 | (MUST BE STORED IN A SECURE LOCATION). Store data is already checked for 13 | previous transfers, so if this script is run twice in quick succession, 14 | the second run won't queue a duplicate transfer.""" 15 | 16 | import json 17 | import sys 18 | import os 19 | import six 20 | 21 | from globus_sdk import (NativeAppAuthClient, TransferClient, 22 | RefreshTokenAuthorizer, TransferData) 23 | from globus_sdk.exc import GlobusAPIError, TransferAPIError 24 | 25 | from fair_research_login import NativeClient 26 | 27 | # Globus Tutorial Endpoint 1 28 | SOURCE_ENDPOINT = 'ddb59aef-6d04-11e5-ba46-22000b92c6ec' 29 | # Globus Tutorial Endpoint 2 30 | DESTINATION_ENDPOINT = 'ddb59af0-6d04-11e5-ba46-22000b92c6ec' 31 | # Copy data off of the endpoint share 32 | SOURCE_PATH = '/share/godata/' 33 | 34 | # Destination Path -- The directory will be created if it doesn't exist 35 | DESTINATION_PATH = '/~/sync-demo/' 36 | 37 | TRANSFER_LABEL = 'Folder Sync Example' 38 | 39 | # You will need to register a *Native App* at https://developers.globus.org/ 40 | # Your app should include the following: 41 | # - The scopes should match the SCOPES variable below 42 | # - Your app's clientid should match the CLIENT_ID var below 43 | # - "Native App" should be checked 44 | # For more information: 45 | # https://docs.globus.org/api/auth/developer-guide/#register-app 46 | CLIENT_ID = '079bdf4e-9666-4816-ac01-7eab9dc82b93' 47 | DATA_FILE = 'transfer-data.json' 48 | REDIRECT_URI = 'https://auth.globus.org/v2/web/auth-code' 49 | SCOPES = ('openid email profile ' 50 | 'urn:globus:auth:scope:transfer.api.globus.org:all') 51 | 52 | APP_NAME = 'Folder Sync Example App' 53 | 54 | # ONLY run new tasks if there was a previous task and it exited with one of the 55 | # following statuses. This is ignored if there was no previous task. 56 | # The previous task is queried from the DATA_FILE 57 | PREVIOUS_TASK_RUN_CASES = ['SUCCEEDED', 'FAILED'] 58 | 59 | # Create the destination folder if it does not already exist 60 | CREATE_DESTINATION_FOLDER = True 61 | 62 | 63 | get_input = getattr(__builtins__, 'raw_input', input) 64 | 65 | 66 | def load_data_from_file(filepath): 67 | """Load a set of saved tokens.""" 68 | if not os.path.exists(filepath): 69 | return [] 70 | with open(filepath, 'r') as f: 71 | tokens = json.load(f) 72 | 73 | return tokens 74 | 75 | 76 | def save_data_to_file(filepath, key, data): 77 | """Save data to a file""" 78 | try: 79 | store = load_data_from_file(filepath) 80 | except: 81 | store = {} 82 | if len(store) > 0: 83 | store[key] = data 84 | with open(filepath, 'w') as f: 85 | json.dump(store, f) 86 | 87 | 88 | def setup_transfer_client(transfer_tokens): 89 | 90 | authorizer = RefreshTokenAuthorizer( 91 | transfer_tokens['refresh_token'], 92 | NativeAppAuthClient(client_id=CLIENT_ID), 93 | access_token=transfer_tokens['access_token'], 94 | expires_at=transfer_tokens['expires_at_seconds']) 95 | 96 | transfer_client = TransferClient(authorizer=authorizer) 97 | 98 | try: 99 | transfer_client.endpoint_autoactivate(SOURCE_ENDPOINT) 100 | transfer_client.endpoint_autoactivate(DESTINATION_ENDPOINT) 101 | except GlobusAPIError as ex: 102 | if ex.http_status == 401: 103 | sys.exit('Refresh token has expired. ' 104 | 'Please delete the `tokens` object from ' 105 | '{} and try again.'.format(DATA_FILE)) 106 | else: 107 | raise ex 108 | return transfer_client 109 | 110 | 111 | def check_endpoint_path(transfer_client, endpoint, path): 112 | """Check the endpoint path exists""" 113 | try: 114 | transfer_client.operation_ls(endpoint, path=path) 115 | except TransferAPIError as tapie: 116 | print('Failed to query endpoint "{}": {}'.format( 117 | endpoint, 118 | tapie.message 119 | )) 120 | sys.exit(1) 121 | 122 | 123 | def create_destination_directory(transfer_client, dest_ep, dest_path): 124 | """Create the destination path if it does not exist""" 125 | try: 126 | transfer_client.operation_ls(dest_ep, path=dest_path) 127 | except TransferAPIError: 128 | try: 129 | transfer_client.operation_mkdir(dest_ep, dest_path) 130 | print('Created directory: {}'.format(dest_path)) 131 | except TransferAPIError as tapie: 132 | print('Failed to start transfer: {}'.format(tapie.message)) 133 | sys.exit(1) 134 | 135 | 136 | def main(): 137 | tokens = None 138 | client = NativeClient(client_id=CLIENT_ID, app_name=APP_NAME) 139 | try: 140 | # if we already have tokens, load and use them 141 | tokens = client.load_tokens(requested_scopes=SCOPES) 142 | except: 143 | pass 144 | 145 | if not tokens: 146 | # if we need to get tokens, start the Native App authentication process 147 | # need to specify that we want refresh tokens 148 | tokens = client.login(requested_scopes=SCOPES, 149 | refresh_tokens=True) 150 | try: 151 | client.save_tokens(tokens) 152 | except: 153 | pass 154 | 155 | transfer = setup_transfer_client(tokens['transfer.api.globus.org']) 156 | 157 | try: 158 | data = load_data_from_file(DATA_FILE) 159 | if len(data) > 0: 160 | task_data = data['task'] 161 | task = transfer.get_task(task_data['task_id']) 162 | if task['status'] not in PREVIOUS_TASK_RUN_CASES: 163 | print('The last transfer status is {}, skipping run...'.format( 164 | task['status'] 165 | )) 166 | sys.exit(1) 167 | except KeyError: 168 | # Ignore if there is no previous task 169 | pass 170 | 171 | check_endpoint_path(transfer, SOURCE_ENDPOINT, SOURCE_PATH) 172 | if CREATE_DESTINATION_FOLDER: 173 | create_destination_directory(transfer, DESTINATION_ENDPOINT, 174 | DESTINATION_PATH) 175 | else: 176 | check_endpoint_path(transfer, DESTINATION_ENDPOINT, DESTINATION_PATH) 177 | 178 | tdata = TransferData( 179 | transfer, 180 | SOURCE_ENDPOINT, 181 | DESTINATION_ENDPOINT, 182 | label=TRANSFER_LABEL, 183 | sync_level="checksum" 184 | ) 185 | tdata.add_item(SOURCE_PATH, DESTINATION_PATH, recursive=True) 186 | 187 | task = transfer.submit_transfer(tdata) 188 | save_data_to_file(DATA_FILE, 'task', task.data) 189 | print('Transfer has been started from\n {}:{}\nto\n {}:{}'.format( 190 | SOURCE_ENDPOINT, 191 | SOURCE_PATH, 192 | DESTINATION_ENDPOINT, 193 | DESTINATION_PATH 194 | )) 195 | url_string = 'https://globus.org/app/transfer?' + \ 196 | six.moves.urllib.parse.urlencode({ 197 | 'origin_id': SOURCE_ENDPOINT, 198 | 'origin_path': SOURCE_PATH, 199 | 'destination_id': DESTINATION_ENDPOINT, 200 | 'destination_path': DESTINATION_PATH 201 | }) 202 | print('Visit the link below to see the changes:\n{}'.format(url_string)) 203 | 204 | 205 | if __name__ == '__main__': 206 | main() 207 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | globus-sdk>=1.1.0,<=2.0.0 2 | fair_research_login 3 | -------------------------------------------------------------------------------- /share-data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script transfers a folder to a shared endpoint 4 | # and sets the sharing access control to a specified 5 | # user, and, or group. The configuration options are 6 | # defined below (source endpoint, shared endpoint, etc.). 7 | 8 | # Note that the shared endpoint UUID must be provided. 9 | 10 | # Check if abort is necessary, and if so do it 11 | function check_rc () { 12 | if [ $# -gt 0 ]; then 13 | abort_message="$1" 14 | fi 15 | 16 | if [ $rc -ne 0 ]; then 17 | exit 1 18 | fi 19 | } 20 | 21 | # Sync options: 22 | # exists Copy files that do not exist at the destination. 23 | # size Copy files if the size of the destination does not match the size of the source. 24 | # mtime Copy files if the timestamp of the destination is older than the timestamp of the source. 25 | # checksum Copy files if checksums of the source and destination do not match. Files on the destination are never deleted. 26 | # For more information: 27 | # $ globus transfer --help 28 | # < OR > 29 | # https://docs.globus.org/api/transfer/task_submit/#transfer_and_delete_documents 30 | sync='checksum' 31 | 32 | function help_and_exit () { 33 | 34 | echo -e 'Usage:' \ 35 | "$0 --source-endpoint --source-path --shared-endpoint --destination-path [-d|--delete] [-h|--help]" 36 | echo '' 37 | echo 'The following options are available:' 38 | echo '' 39 | echo ' --source-endpoint: The endpoint you want to copy data from' 40 | echo ' --source-path: The path to the folder you want to copy to ' 41 | echo ' your "--shared-endpoint"' 42 | echo ' --shared-endpoint: A shared endpoint you have created on' 43 | echo ' globus.org/app/transfer by clicking "share"' 44 | echo ' --destination-path: The path where "--source-path" folder' 45 | echo ' will be copied' 46 | echo ' --user-id: Email for user you want to grant access to your shared' 47 | echo ' endpoint' 48 | echo ' --group-uuid: Group UUID for a group you want to grant read access' 49 | echo ' --group-id: Alternative for "--group-uuid"' 50 | echo ' -d: Delete destination folder if it already exists' 51 | echo ' -h: Print this help message' 52 | echo '' 53 | echo "Example: $0 --source-endpoint ddb59aef-6d04-11e5-ba46-22000b92c6ec --source-path /share/godata --destination-path /shared_folder_example --shared-endpoint " 54 | echo '' 55 | echo 'Go to "globus.org/app/transfer", navigate to your endpoint, and click' 56 | echo '"share" to create a shared endpoint' 57 | echo '' 58 | exit 0 59 | 60 | } 61 | 62 | if [ $# -eq 0 ]; then 63 | help_and_exit 64 | fi 65 | 66 | 67 | while [ $# -gt 0 ]; do 68 | key="$1" 69 | case $1 in 70 | --source-endpoint) 71 | shift 72 | source_endpoint=$1 73 | ;; 74 | --shared-endpoint) 75 | shift 76 | shared_endpoint=$1 77 | ;; 78 | --source-path) 79 | shift 80 | source_path=$1 81 | ;; 82 | --destination-path) 83 | shift 84 | destination_path=$1 85 | ;; 86 | --user-uuid|--user-id) 87 | shift 88 | user_id=$1 89 | ;; 90 | --group-uuid) 91 | shift 92 | group_uuid=$1 93 | ;; 94 | -d|--delete) 95 | delete='yes' 96 | ;; 97 | -h|--help) 98 | help_and_exit 99 | ;; 100 | *) 101 | echo '' 102 | echo "Error: Unknown Option: '$1'" 103 | echo '' 104 | echo "$0 --help for options and more information." 105 | exit 1 106 | esac 107 | shift 108 | done 109 | 110 | if [ -z $source_endpoint ]; then 111 | echo 'Error: Source endpoint is not defined' >&2 112 | exit 1 113 | fi 114 | 115 | if [ -z $shared_endpoint ]; then 116 | echo 'Error: Shared destination endpoint is not defined' >&2 117 | exit 1 118 | fi 119 | 120 | case "$destination_path" in 121 | /*) 122 | ;; 123 | *) 124 | echo 'Destination path must be absolute' >&2 125 | exit 1 126 | ;; 127 | esac 128 | 129 | case "$source_path" in 130 | /*) 131 | ;; 132 | *) 133 | echo 'Source path must be absolute' >&2 134 | exit 1 135 | ;; 136 | esac 137 | 138 | globus ls "$shared_endpoint:$destination_path" 1>/dev/null 139 | rc=$? 140 | check_rc 141 | 142 | # check if a directory with the same name was already transferred to the destination path 143 | basename=`basename "$source_path"` 144 | 145 | # Add '/' if the user didn't provide one 146 | if [ "${destination_path: -1}" != "/" ]; then 147 | destination_path="$destination_path/" 148 | fi 149 | 150 | destination_directory="$destination_path$basename/" 151 | globus ls "$shared_endpoint:$destination_directory" 1>/dev/null 2>/dev/null 152 | if [ $? == 0 ]; then 153 | # if it was, delete it 154 | if [ -n "$delete" ]; then 155 | echo "Destination directory, $destination_directory, exists and will be deleted" 156 | task_id=`globus delete --format unix --jmespath 'task_id' --label 'Share Data Example' -r "$shared_endpoint:$destination_directory"` 157 | globus task wait --timeout 600 $task_id 158 | rc=$? 159 | check_rc 160 | else 161 | >&2 echo \ 162 | "Error: Destination directory, $destination_path$basename, already exists." \ 163 | "Delete the directory or use --delete option" 164 | exit 1 165 | fi 166 | fi 167 | 168 | echo "Creating destination directory $destination_directory" 169 | globus mkdir "$shared_endpoint:$destination_directory" 170 | rc=$? 171 | check_rc 172 | 173 | if [ -n "$user_id" ]; then 174 | echo "Granting user, $user_id, read access to the destination directory" 175 | globus endpoint permission create --identity "$user_id" --permissions r "$shared_endpoint:$destination_directory" 176 | fi 177 | if [ -n "$group_uuid" ]; then 178 | echo "Granting group, $group_uuid, read access to the destination directory" 179 | globus endpoint permission create --group $group_uuid --permissions r "$shared_endpoint:$destination_directory" 180 | fi 181 | 182 | echo "Submitting a transfer from $source_endpoint:$source_path to $shared_endpoint:$destination_directory" 183 | exec globus transfer --recursive --sync-level $sync --label 'Share Data Example' "$source_endpoint:$source_path" "$shared_endpoint:$destination_directory" 184 | -------------------------------------------------------------------------------- /share_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Copy data from your private endpoint to a shared one for other people 5 | to access. You can share data with both individual users and groups. 6 | 7 | Required: You create a shared endpoint by going to https://app.globus.org 8 | navigating to an Endpoint, and clicking "share". You can use "Globus Tutorial 9 | Endpoint 2" for testing. 10 | 11 | Choose a data source. You can use "Globus Tutorial Endpoint 1" for testing. 12 | 13 | Register your own app at developers.globus.org. Configure as follows: 14 | Native App: 15 | * "Redirect URLs" -- Set to "https://auth.globus.org/v2/web/auth-code". 16 | You can setup your own server for distributing auth codes if you wish. 17 | * Scopes: 18 | [openid profile urn:globus:auth:scope:transfer.api.globus.org:all] 19 | openid and profile are required for auth, transfer.api.globus.org 20 | for transfers. 21 | * Check "Native App". 22 | Confidential App [Client Credentials Grant]: 23 | * "Redirect URLs" -- Set to "https://auth.globus.org/v2/web/auth-code". 24 | Confidential apps also allow you to setup your own server for three- 25 | legged-auth with auth.globus.org if you wish. You may therefore put 26 | https://example.com/oauth_callback/ instead. 27 | * Scopes: 28 | [urn:globus:auth:scope:transfer.api.globus.org:all] 29 | Only transfer is required, since your bot will be using client_secret 30 | to authenticate. [openid profile] are required if you setup your own 31 | three-legged-auth server and want to allow users to login to it. 32 | * Uncheck "Native App". 33 | 34 | Tutorial Endpoint IDs 35 | Globus Tutorial Endpoint 1: ddb59aef-6d04-11e5-ba46-22000b92c6ec 36 | Globus Tutorial Endpoint 2: ddb59af0-6d04-11e5-ba46-22000b92c6ec 37 | """ 38 | 39 | from __future__ import print_function 40 | import os 41 | import sys 42 | import argparse 43 | import json 44 | import globus_sdk 45 | from globus_sdk.exc import TransferAPIError 46 | from fair_research_login import NativeClient 47 | 48 | # Both Native App and Client Credential authentication require Client IDs. 49 | # Create your app at developers.globus.org. The following id is for testing 50 | # only and should not be relied upon (You should create your own app). 51 | CLIENT_ID = '079bdf4e-9666-4816-ac01-7eab9dc82b93' 52 | 53 | # Client Secret is only needed for Confidential apps. Make your app 54 | # confidential instead of native by _not_ checking the 'Native App' checkbox 55 | # on developers.globus.org for your app. 56 | CLIENT_SECRET = '' 57 | 58 | # Native is better for user machines, where the user is capable of hitting 59 | # a browser to get an authentication code. Native only stores temporary 60 | # access tokens (unless you enable refresh tokens), and does not require 61 | # safeguarding client secrets. 62 | # 63 | # Client Credentials grant requires storing a 'client_secret', which does 64 | # not require a browser or user intervention, but does require safeguarding 65 | # the client_secret. Use Confidential on servers or trusted machines. 66 | # *Notice*: A confidential app is a bot which acts on your behalf. You will 67 | # need to give it permission to access your shared endpoint. You can do so 68 | # with globus-cli via: 69 | # globus endpoint permission create 70 | # --identity @clients.auth.globus.org 71 | # :/ 72 | # --permissions rw 73 | # (Your bot's identity will always match the client id for your 74 | # app + '@clients.auth.globus.org') 75 | # 76 | # You can also go to https://app.globus.org/endpoints?scope=shared-by-me 77 | # and under "Identity/E-mail" set: "@clients.auth.globus.org" 78 | APP_AUTHENTICATORS = ('native', 'client-credentials') 79 | 80 | # Default is native for this script. 81 | AUTHENTICATION = 'native' 82 | 83 | # Redirect URI specified when registering a native app 84 | REDIRECT_URI = 'https://auth.globus.org/v2/web/auth-code' 85 | 86 | # For this example, we will be liberal with scopes. 87 | SCOPES = ('openid email profile ' 88 | 'urn:globus:auth:scope:transfer.api.globus.org:all') 89 | 90 | TOKEN_FILE = 'refresh-tokens.json' 91 | 92 | APP_NAME = 'Share Data Example App' 93 | 94 | # Example: Globus Tutorial Endpoint 1 95 | source_endpoint = 'ddb59aef-6d04-11e5-ba46-22000b92c6ec' 96 | 97 | # Choose which shared folder you want to use. 98 | # Example: Go to https://app.globus.org/ and select: 99 | # Endpoint: Globus Tutorial Endpoint 2 100 | # Path: /~/ 101 | # And click 'share' to create your home directory as the shared endpoint. 102 | shared_endpoint = '' 103 | 104 | # Source data. The example below is three files on Globus Tutorial Endpoint 1 105 | source_path = '/share/godata' 106 | 107 | # Destination on the shared endpoint. Note that this example copies files 108 | # to the 'root' of the _shared_ endpoint, which in the example above we 109 | # selected as '/~/'. The final result of this example will be three files 110 | # stored on Globus Tutorial Endpoint 2 at /~/godata/. 111 | destination_path = '/' 112 | 113 | get_input = getattr(__builtins__, 'raw_input', input) 114 | 115 | 116 | def load_tokens_from_file(filepath): 117 | """Load a set of saved tokens.""" 118 | if not os.path.exists(filepath): 119 | return [] 120 | with open(filepath, 'r') as f: 121 | tokens = json.load(f) 122 | 123 | return tokens 124 | 125 | 126 | def save_tokens_to_file(filepath, tokens): 127 | """Save a set of tokens for later use.""" 128 | with open(filepath, 'w') as f: 129 | json.dump(tokens, f) 130 | 131 | 132 | def eprint(*args, **kwargs): 133 | """Same as print, but to standard error""" 134 | print(*args, file=sys.stderr, **kwargs) 135 | 136 | 137 | def get_native_app_authorizer(client_id): 138 | tokens = None 139 | client = NativeClient(client_id=client_id, app_name=APP_NAME) 140 | try: 141 | # if we already have tokens, load and use them 142 | tokens = client.load_tokens(requested_scopes=SCOPES) 143 | except: 144 | pass 145 | 146 | if not tokens: 147 | tokens = client.login(requested_scopes=SCOPES, 148 | refresh_tokens=True) 149 | try: 150 | client.save_tokens(tokens) 151 | except: 152 | pass 153 | 154 | transfer_tokens = tokens['transfer.api.globus.org'] 155 | 156 | auth_client = globus_sdk.NativeAppAuthClient(client_id=client_id) 157 | 158 | return globus_sdk.RefreshTokenAuthorizer( 159 | transfer_tokens['refresh_token'], 160 | auth_client, 161 | access_token=transfer_tokens['access_token'], 162 | expires_at=transfer_tokens['expires_at_seconds']) 163 | 164 | 165 | def do_client_credentials_app_authentication(client_id, client_secret): 166 | """ 167 | Does a client credential grant authentication and returns a 168 | dict of tokens keyed by service name. 169 | """ 170 | client = globus_sdk.ConfidentialAppAuthClient( 171 | client_id=client_id, 172 | client_secret=client_secret) 173 | token_response = client.oauth2_client_credentials_tokens() 174 | 175 | return token_response.by_resource_server 176 | 177 | 178 | def get_confidential_app_authorizer(client_id, client_secret): 179 | tokens = do_client_credentials_app_authentication( 180 | client_id=client_id, 181 | client_secret=client_secret) 182 | transfer_tokens = tokens['transfer.api.globus.org'] 183 | transfer_access_token = transfer_tokens['access_token'] 184 | 185 | return globus_sdk.AccessTokenAuthorizer(transfer_access_token) 186 | 187 | 188 | def share_data(args): 189 | 190 | user_source_endpoint = args.source_endpoint or source_endpoint 191 | user_shared_endpoint = args.shared_endpoint or shared_endpoint 192 | if not user_shared_endpoint: 193 | eprint('Invalid shared endpoint') 194 | sys.exit(1) 195 | 196 | user_source_path = args.source_path or source_path 197 | user_destination_path = args.destination_path or destination_path 198 | if not user_source_path.startswith('/'): 199 | eprint('Source path must be absolute') 200 | sys.exit(1) 201 | if not user_destination_path.startswith('/'): 202 | eprint('Destination path must be absolute') 203 | sys.exit(1) 204 | 205 | if args.auth == 'native': 206 | # get an authorizer if it is a Native App 207 | authorizer = get_native_app_authorizer(client_id=CLIENT_ID) 208 | elif args.auth == 'client-credentials': 209 | secret = args.client_secret or CLIENT_SECRET 210 | if not secret: 211 | eprint('--auth client-credentials chosen, but no secret provided!' 212 | ' Set "--client-secret "' 213 | ) 214 | sys.exit(1) 215 | # get an authorizer if it is a Confidential App 216 | authorizer = get_confidential_app_authorizer(client_id=CLIENT_ID, 217 | client_secret=secret 218 | ) 219 | else: 220 | raise ValueError('Invalid Authenticator, this script only understands ' 221 | 'Native and Client Credential') 222 | 223 | # look for an identity uuid for the specified identity username 224 | username_uuid = None 225 | if args.username: 226 | ac = globus_sdk.AuthClient(authorizer=authorizer) 227 | r = ac.get_identities(usernames=args.username) 228 | if not len(r['identities']): 229 | eprint('No such identity username \'{}\''.format(args.username)) 230 | exit(1) 231 | username_uuid = r['identities'][0]['id'] 232 | 233 | # create a TransferClient object 234 | tc = globus_sdk.TransferClient(authorizer=authorizer) 235 | 236 | # check if a destination directory exists at all 237 | try: 238 | tc.operation_ls(user_shared_endpoint, path=user_destination_path) 239 | except TransferAPIError as e: 240 | eprint(e) 241 | sys.exit(1) 242 | 243 | dirname, leaf = os.path.split(user_source_path) 244 | if leaf == '': 245 | _, leaf = os.path.split(dirname) 246 | destination_directory = os.path.join(user_destination_path, leaf) + '/' 247 | 248 | """ 249 | check if a directory with the same name was already transferred to the 250 | destination path if it was and --delete option is specified, delete the 251 | directory 252 | """ 253 | try: 254 | tc.operation_ls(user_shared_endpoint, path=destination_directory) 255 | if not args.delete: 256 | eprint('Destination directory exists. Delete the directory or ' 257 | 'use --delete option') 258 | sys.exit(1) 259 | print('Destination directory, {}, exists and will be deleted' 260 | .format(destination_directory)) 261 | ddata = globus_sdk.DeleteData( 262 | tc, 263 | user_shared_endpoint, 264 | label='Share Data Example', 265 | recursive=True) 266 | ddata.add_item(destination_directory) 267 | print('Submitting a delete task') 268 | task = tc.submit_delete(ddata) 269 | print('\ttask_id: {}'.format(task['task_id'])) 270 | tc.task_wait(task['task_id']) 271 | except TransferAPIError as e: 272 | if e.code != u'ClientError.NotFound': 273 | eprint(e) 274 | sys.exit(1) 275 | 276 | # create a destination directory 277 | try: 278 | print('Creating destination directory {}' 279 | .format(destination_directory)) 280 | tc.operation_mkdir(user_shared_endpoint, destination_directory) 281 | except TransferAPIError as e: 282 | eprint(e) 283 | sys.exit(1) 284 | 285 | # grant group/user read access to the destination directory 286 | if args.user_uuid: 287 | rule_data = { 288 | "DATA_TYPE": "access", 289 | "principal_type": "identity", 290 | "principal": args.user_uuid, 291 | "path": destination_directory, 292 | "permissions": "r", 293 | } 294 | 295 | try: 296 | print('Granting user, {}, read access to the destination directory' 297 | .format(args.user_uuid)) 298 | tc.add_endpoint_acl_rule(user_shared_endpoint, rule_data) 299 | except TransferAPIError as e: 300 | if e.code != u'Exists': 301 | eprint(e) 302 | sys.exit(1) 303 | 304 | if username_uuid: 305 | rule_data = { 306 | "DATA_TYPE": "access", 307 | "principal_type": "identity", 308 | "principal": username_uuid, 309 | "path": destination_directory, 310 | "permissions": "r", 311 | } 312 | 313 | try: 314 | print('Granting user, {}, read access to the destination directory' 315 | .format(username_uuid)) 316 | tc.add_endpoint_acl_rule(user_shared_endpoint, rule_data) 317 | except TransferAPIError as e: 318 | if e.code != u'Exists': 319 | eprint(e) 320 | sys.exit(1) 321 | 322 | if args.group_uuid: 323 | rule_data = { 324 | "DATA_TYPE": "access", 325 | "principal_type": "group", 326 | "principal": args.group_uuid, 327 | "path": destination_directory, 328 | "permissions": "r", 329 | } 330 | 331 | try: 332 | print('Granting group, {}, read access to ' 333 | .format(args.group_uuid)) 334 | tc.add_endpoint_acl_rule(user_shared_endpoint, rule_data) 335 | except TransferAPIError as e: 336 | if e.code != u'Exists': 337 | eprint(e) 338 | sys.exit(1) 339 | 340 | # transfer data - source directory recursively 341 | tdata = globus_sdk.TransferData(tc, 342 | user_source_endpoint, 343 | user_shared_endpoint, 344 | label='Share Data Example') 345 | tdata.add_item(user_source_path, destination_directory, recursive=True) 346 | try: 347 | print('Submitting a transfer task') 348 | task = tc.submit_transfer(tdata) 349 | except TransferAPIError as e: 350 | eprint(e) 351 | sys.exit(1) 352 | print('\ttask_id: {}'.format(task['task_id'])) 353 | print('You can monitor the transfer task programmatically using Globus SDK' 354 | ', or go to the Web UI, https://app.globus.org/activity/{}.' 355 | .format(task['task_id'])) 356 | 357 | 358 | if __name__ == '__main__': 359 | 360 | parser = argparse.ArgumentParser( 361 | description='Copy data from your private endpoint to a shared one for' 362 | 'other people to access. You can share data with both individual users' 363 | ' and groups.' 364 | ) 365 | parser.add_argument( 366 | '--source-endpoint', 367 | help='Source Endpoint UUID where your data is stored.' 368 | ) 369 | parser.add_argument( 370 | '--shared-endpoint', 371 | help='The place you will share your data. Create a shared endpoint ' 372 | 'by going to globus.org/app/transfer, navigating to your endpoint' 373 | ' and clicking "share" on a folder.' 374 | ) 375 | parser.add_argument( 376 | '--source-path', 377 | ) 378 | parser.add_argument( 379 | '--destination-path', 380 | ) 381 | parser.add_argument( 382 | '--group-uuid', 383 | help='UUID of a group transferred data will be shared with') 384 | parser.add_argument( 385 | '--user-uuid', 386 | help='UUID of a user transferred data will be shared with') 387 | parser.add_argument( 388 | '--username', 389 | help='Identity username of a user transferred data will be shared ' 390 | 'with, e.g. johndoe@uchicago.edu') 391 | parser.add_argument( 392 | '--delete', action='store_true', 393 | help='Delete a destination directory if already exists before ' 394 | 'transferring data') 395 | parser.add_argument('--auth', choices=APP_AUTHENTICATORS, 396 | default=AUTHENTICATION) 397 | parser.add_argument('--client-secret') 398 | args = parser.parse_args() 399 | 400 | share_data(args) 401 | --------------------------------------------------------------------------------