├── ansible.cfg ├── roles └── bioconductor_sync │ ├── defaults │ └── main.yml │ ├── templates │ ├── dir_list.txt.j2 │ └── file_list.txt.j2 │ └── tasks │ └── main.yml ├── playbook.yml ├── vars └── main.yml ├── inventory.yml ├── run.sh ├── LICENSE └── README.md /ansible.cfg: -------------------------------------------------------------------------------- 1 | [defaults] 2 | inventory = inventory.yml 3 | host_key_checking = False 4 | -------------------------------------------------------------------------------- /roles/bioconductor_sync/defaults/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Defaults 3 | bioc_version: "3.20" 4 | target_dir: "archive-move" 5 | -------------------------------------------------------------------------------- /playbook.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Bioconductor Sync Playbook 3 | hosts: bioconductor_vm 4 | vars_files: 5 | - vars/main.yml 6 | roles: 7 | - bioconductor_sync 8 | -------------------------------------------------------------------------------- /vars/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Variables 3 | ssh_user: ubuntu # default, override as needed 4 | bioc_version: "3.20" # The Bioconductor version to sync 5 | target_dir: "archive-move" # The target directory for the sync 6 | -------------------------------------------------------------------------------- /inventory.yml: -------------------------------------------------------------------------------- 1 | all: 2 | hosts: 3 | bioconductor_vm: 4 | ansible_host: "{{ ip_address }}" 5 | ansible_user: "{{ ssh_user | default('ubuntu') }}" 6 | ansible_ssh_private_key_file: "{{ ssh_key_path }}" 7 | -------------------------------------------------------------------------------- /roles/bioconductor_sync/templates/dir_list.txt.j2: -------------------------------------------------------------------------------- 1 | {{ bioc_version }}/books/bin 2 | {{ bioc_version }}/bioc/bin 3 | {{ bioc_version }}/data/annotation/bin 4 | {{ bioc_version }}/data/experiment/bin 5 | {{ bioc_version }}/workflows/bin 6 | {{ bioc_version }}/books/src 7 | {{ bioc_version }}/bioc/src 8 | {{ bioc_version }}/data/annotation/src 9 | {{ bioc_version }}/data/experiment/src 10 | {{ bioc_version }}/workflows/src 11 | {{ bioc_version }}/BiocViews -------------------------------------------------------------------------------- /roles/bioconductor_sync/templates/file_list.txt.j2: -------------------------------------------------------------------------------- 1 | {{ bioc_version }}/books/bin 2 | {{ bioc_version }}/bioc/bin 3 | {{ bioc_version }}/data/annotation/bin 4 | {{ bioc_version }}/data/experiment/bin 5 | {{ bioc_version }}/workflows/bin 6 | {{ bioc_version }}/books/src 7 | {{ bioc_version }}/bioc/src 8 | {{ bioc_version }}/data/annotation/src 9 | {{ bioc_version }}/data/experiment/src 10 | {{ bioc_version }}/workflows/src 11 | {{ bioc_version }}/BiocViews 12 | {{ bioc_version }}/BiocViews.html 13 | {{ bioc_version }}/index.html 14 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if ansible is installed 4 | if ! command -v ansible-playbook &> /dev/null; then 5 | echo "Error: ansible-playbook command not found. Please install Ansible first." 6 | echo "You can install it using: pip install ansible" 7 | exit 1 8 | fi 9 | 10 | if [ "$#" -lt 2 ]; then 11 | echo "Usage: $0 [ssh_user] [bioc_version]" 12 | echo "" 13 | echo "Parameters:" 14 | echo " ssh_key_path : Path to SSH private key for connection" 15 | echo " ip_address : IP address of the target server" 16 | echo " ssh_user : SSH username (default: ubuntu)" 17 | echo " bioc_version : Bioconductor version to sync (e.g. 3.21)" 18 | exit 1 19 | fi 20 | 21 | SSH_KEY_PATH=$1 22 | IP_ADDRESS=$2 23 | SSH_USER=${3:-ubuntu} 24 | BIOC_VERSION=${4:-"3.21"} 25 | 26 | echo "Starting Bioconductor sync for version $BIOC_VERSION..." 27 | ansible-playbook -v playbook.yml -e "ssh_key_path=$SSH_KEY_PATH ip_address=$IP_ADDRESS ssh_user=$SSH_USER bioc_version=$BIOC_VERSION" 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Bioconductor 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /roles/bioconductor_sync/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Tasks for Bioconductor sync 3 | 4 | - name: Check for rsync.pem SSH key 5 | ansible.builtin.stat: 6 | path: ~/.ssh/rsync.pem 7 | register: rsync_key 8 | 9 | - name: Fail if SSH key is missing 10 | ansible.builtin.fail: 11 | msg: "Required SSH key not found at ~/.ssh/rsync.pem. Please add the key before continuing." 12 | when: not rsync_key.stat.exists 13 | 14 | - name: Check SSH key permissions 15 | ansible.builtin.file: 16 | path: ~/.ssh/rsync.pem 17 | mode: '0600' 18 | when: rsync_key.stat.exists 19 | 20 | - name: Check for rclone installation 21 | ansible.builtin.command: which rclone 22 | register: rclone_check 23 | ignore_errors: true 24 | changed_when: false 25 | 26 | - name: Install rclone if not present 27 | block: 28 | - name: Download rclone install script 29 | ansible.builtin.get_url: 30 | url: https://rclone.org/install.sh 31 | dest: /tmp/rclone_install.sh 32 | mode: '0755' 33 | 34 | - name: Run rclone installer 35 | ansible.builtin.command: /tmp/rclone_install.sh 36 | become: yes 37 | when: rclone_check.rc != 0 38 | 39 | - name: Check for rclone config 40 | ansible.builtin.stat: 41 | path: ~/.rclone.conf 42 | register: rclone_config 43 | 44 | - name: Fail if rclone config is missing 45 | ansible.builtin.fail: 46 | msg: "Rclone configuration not found at ~/.rclone.conf. Please set up rclone configuration first." 47 | when: not rclone_config.stat.exists 48 | 49 | - name: Check if OSN remote exists in rclone config 50 | ansible.builtin.shell: grep -q "\[osn\]" ~/.rclone.conf 51 | register: osn_remote_check 52 | ignore_errors: true 53 | changed_when: false 54 | 55 | - name: Fail if OSN remote is not configured 56 | ansible.builtin.fail: 57 | msg: "The [osn] remote is not configured in your rclone.conf. Please configure the OSN remote first." 58 | when: osn_remote_check.rc != 0 59 | 60 | - name: Create target directory 61 | ansible.builtin.file: 62 | path: "{{ target_dir }}/{{ bioc_version }}" 63 | state: directory 64 | mode: '0755' 65 | 66 | - name: Create file list 67 | ansible.builtin.template: 68 | src: file_list.txt.j2 69 | dest: "{{ target_dir }}/file_list.txt" 70 | 71 | - name: Create directory list 72 | ansible.builtin.template: 73 | src: dir_list.txt.j2 74 | dest: "{{ target_dir }}/dir_list.txt" 75 | 76 | - name: Create rsync script 77 | ansible.builtin.copy: 78 | dest: "{{ target_dir }}/rsync_copy.sh" 79 | mode: '0755' 80 | content: | 81 | #!/bin/bash 82 | cd {{ target_dir }} 83 | # Create directories only for directory paths 84 | cat dir_list.txt | xargs -i bash -c 'mkdir -p {}' 85 | # Run rsync on all paths 86 | cat dir_list.txt | xargs -i bash -c "rsync -e 'ssh -i ~/.ssh/rsync.pem' -zrtlv --delete bioc-rsync@master.bioconductor.org:{}/ {}" 87 | cat file_list.txt | xargs -i bash -c "rsync -e 'ssh -i ~/.ssh/rsync.pem' -zrtlv --delete bioc-rsync@master.bioconductor.org:{} {}" 88 | register: rsync_script 89 | 90 | - name: Execute rsync copy 91 | ansible.builtin.shell: "{{ target_dir }}/rsync_copy.sh" 92 | register: rsync_result 93 | args: 94 | executable: /bin/bash 95 | 96 | - name: Create rclone copy script 97 | ansible.builtin.copy: 98 | dest: "{{ target_dir }}/rclone_copy.sh" 99 | mode: '0755' 100 | content: | 101 | #!/bin/bash 102 | cd {{ target_dir }} 103 | rclone copyto -vvv ./{{ bioc_version }} osn:/bir190004-bucket01/archive.bioconductor.org/packages/{{ bioc_version }} 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bioconductor Archive Sync 2 | 3 | This Ansible playbook automates the process of syncing a Bioconductor release to the Open Storage Network (OSN) archive. It handles the full workflow of: 4 | 5 | 1. Retrieving the specified Bioconductor version from the master Bioconductor server 6 | 2. Creating the necessary directory structure locally 7 | 3. Transferring the retrieved data to OSN for archival storage 8 | 9 | ## Prerequisites 10 | 11 | ### On Your Local Machine (Running Ansible) 12 | 13 | - Ansible installed (`pip install ansible`) 14 | - SSH access to the target server 15 | - SSH key for connecting to the target server 16 | - Knowledge of the target server's IP address and SSH user credentials 17 | 18 | ### On The Target Machine (Running the Sync) 19 | 20 | - SSH key `~/.ssh/rsync.pem` for connecting to the Bioconductor master server 21 | - Rclone configuration file at `~/.rclone.conf` with an [osn] remote defined 22 | 23 | Note: The playbook will automatically check for these prerequisites on the target machine. If rclone is not installed on the target machine, the playbook will automatically install it. 24 | 25 | ## How to Use 26 | 27 | The simplest way to run this playbook is by using the provided `run.sh` script. 28 | 29 | ### Using run.sh 30 | 31 | The run.sh script simplifies execution by handling all the necessary parameters: 32 | 33 | ```bash 34 | ./run.sh [ssh_user] [bioc_version] 35 | ``` 36 | 37 | #### Parameters: 38 | 39 | - **ssh_key_path**: Path to your SSH private key for connecting to the target server 40 | - **ip_address**: IP address of the target server where the sync will run 41 | - **ssh_user**: (Optional) SSH username for connecting to the target server (default: ubuntu) 42 | - **bioc_version**: (Optional) The Bioconductor version to sync (default: 3.21) 43 | 44 | #### Example usage: 45 | 46 | ```bash 47 | ./run.sh ~/.ssh/my_key.pem 192.168.1.100 ubuntu 3.20 48 | ``` 49 | 50 | ### Best Practice: Run Multiple Times 51 | 52 | It's recommended to run the script at least twice: 53 | - The first run will transfer all the data, which may take significant time depending on the size of the Bioconductor release 54 | - Subsequent runs will be much faster and ensures that all transfers were successful 55 | - If the second run shows no additional files being transferred or updated, it confirms that the synchronization is complete and consistent 56 | 57 | This approach leverages rsync and rclone's internal check mechanisms - it only transfers files that have changed or are missing, making subsequent runs both a verification and a way to complete any interrupted transfers. 58 | 59 | ### Best Practice: Use Screen for Persistent Sessions 60 | 61 | I'd recommended you use `screen`, especially on a VM, to ensure the process continues even if your connection to the VM is interrupted: 62 | 63 | ```bash 64 | # Start a new screen session 65 | screen -S bioc-sync 66 | 67 | # Now run the script inside the screen session 68 | ./run.sh ~/.ssh/rsync.pem 192.168.1.100 ubuntu 3.21 69 | 70 | # You can detach from the screen session with: Ctrl+A, then D 71 | ``` 72 | 73 | After starting a screen session, you can leave it unattended for a couple of hours, while the transfers happen. 74 | 75 | When returning to the session: 76 | 77 | ```bash 78 | # If disconnected, you can reconnect to the VM and resume the session with: 79 | screen -r bioc-sync 80 | ``` 81 | 82 | This approach protects your sync process from: 83 | - Network connectivity issues between your computer and the VM 84 | - Local computer shutdowns or sleep mode 85 | - SSH session timeouts 86 | - Accidental terminal closing 87 | 88 | The transfer will continue running on the VM even if your connection drops, and you can easily reconnect to check progress when needed. 89 | --------------------------------------------------------------------------------