├── .github ├── dependabot.yml ├── stale.yml └── workflows │ ├── docker-latest.yml │ ├── shellcheck.yml │ └── test.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── test └── pool │ ├── abc-123_!"§$%&()[]{}=?`#'*+-_,;.:|<>~@.txt │ └── projects │ └── [2020] some project │ ├── file │ ├── file.txt │ └── mp4.txt ├── testing.sh └── zfs-inplace-rebalancing.sh /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | updates: 4 | - package-ecosystem: github-actions 5 | directory: "/" 6 | schedule: 7 | # Check for updates to GitHub Actions every week 8 | interval: "weekly" 9 | 10 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Configuration for probot-stale - https://github.com/probot/stale 2 | 3 | # Number of days of inactivity before an Issue or Pull Request becomes stale 4 | daysUntilStale: 60 5 | 6 | # Number of days of inactivity before an Issue or Pull Request with the stale label is closed. 7 | # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. 8 | daysUntilClose: 14 9 | 10 | # Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable 11 | exemptLabels: 12 | - pinned 13 | - security 14 | - bug 15 | - enhancement 16 | 17 | # Set to true to ignore issues in a project (defaults to false) 18 | exemptProjects: false 19 | 20 | # Set to true to ignore issues in a milestone (defaults to false) 21 | exemptMilestones: false 22 | 23 | # Set to true to ignore issues with an assignee (defaults to false) 24 | exemptAssignees: true 25 | 26 | # Label to use when marking as stale 27 | staleLabel: wontfix 28 | 29 | # Comment to post when marking as stale. Set to `false` to disable 30 | markComment: > 31 | This issue has been automatically marked as stale because it has not had 32 | recent activity. It will be closed if no further activity occurs. Thank you 33 | for your contributions. 34 | 35 | # Comment to post when removing the stale label. 36 | # unmarkComment: > 37 | # Your comment here. 38 | 39 | # Comment to post when closing a stale Issue or Pull Request. 40 | closeComment: > 41 | There has been no incentive by contributors or maintainers to revive this stale issue and it will now be closed. 42 | 43 | # Limit the number of actions per hour, from 1-30. Default is 30 44 | limitPerRun: 30 45 | 46 | # Limit to only `issues` or `pulls` 47 | only: issues 48 | 49 | # Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': 50 | # pulls: 51 | # daysUntilStale: 30 52 | # markComment: > 53 | # This pull request has been automatically marked as stale because it has not had 54 | # recent activity. It will be closed if no further activity occurs. Thank you 55 | # for your contributions. 56 | 57 | # issues: 58 | # exemptLabels: 59 | # - confirmed 60 | -------------------------------------------------------------------------------- /.github/workflows/docker-latest.yml: -------------------------------------------------------------------------------- 1 | name: Docker latest 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | 7 | env: 8 | REGISTRY: ghcr.io 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | permissions: 14 | contents: read 15 | packages: write 16 | 17 | steps: 18 | - name: Checkout repository 19 | uses: actions/checkout@v4 20 | 21 | - name: Log into registry ${{ env.REGISTRY }} 22 | uses: docker/login-action@v3 23 | with: 24 | registry: ${{ env.REGISTRY }} 25 | username: ${{ github.actor }} 26 | password: ${{ secrets.GITHUB_TOKEN }} 27 | 28 | - name: Extract Docker metadata 29 | id: meta 30 | uses: docker/metadata-action@v5 31 | with: 32 | images: ${{ env.REGISTRY }}/${{ github.repository }} 33 | tags: | 34 | type=raw,value=latest 35 | 36 | - name: Build and push Docker image 37 | id: build-and-push 38 | uses: docker/build-push-action@v6 39 | with: 40 | context: . 41 | file: Dockerfile 42 | push: true 43 | tags: ${{ steps.meta.outputs.tags }} 44 | labels: ${{ steps.meta.outputs.labels }} -------------------------------------------------------------------------------- /.github/workflows/shellcheck.yml: -------------------------------------------------------------------------------- 1 | # ShellCheck 2 | 3 | name: CI 4 | 5 | # Controls when the action will run. Triggers the workflow on push or pull request 6 | # events but only for the master branch 7 | on: 8 | push: 9 | branches: [ master ] 10 | pull_request: 11 | branches: [ master ] 12 | 13 | jobs: 14 | shellcheck: 15 | name: Shellcheck 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Run ShellCheck 20 | uses: ludeeus/action-shellcheck@master 21 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | # Test 2 | 3 | name: Test 4 | 5 | on: 6 | push: 7 | branches: [ master ] 8 | pull_request: 9 | branches: [ master ] 10 | 11 | jobs: 12 | linuxTest: 13 | name: Test on Linux 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | 18 | - name: Run testing script 19 | run: ./testing.sh 20 | 21 | macOsTest: 22 | name: Test on macOS 23 | runs-on: macos-latest 24 | steps: 25 | - uses: actions/checkout@v4 26 | 27 | - name: Install coreutils 28 | run: brew install coreutils 29 | 30 | - name: Run testing script on macOS 31 | run: ./testing.sh 32 | 33 | FreeBSDTest: 34 | name: Test on FreeBSD 35 | runs-on: ubuntu-latest 36 | 37 | steps: 38 | - uses: actions/checkout@v4 39 | 40 | - name: Test in FreeBSD 41 | id: test 42 | uses: vmactions/freebsd-vm@v1 43 | with: 44 | usesh: true 45 | prepare: | 46 | pkg install -y bash 47 | run: | 48 | ./testing.sh -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | test.log 2 | error.log 3 | rebalance_db.txt 4 | files_list.txt 5 | sorted_files_list.txt 6 | grouped_inodes.txt 7 | testing_data 8 | .vscode 9 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM phusion/baseimage:jammy-1.0.1 2 | MAINTAINER markusressel 3 | 4 | RUN apt-get update \ 5 | && apt-get -y install bc \ 6 | && apt-get clean && rm -rf /var/lib/apt/lists/* 7 | 8 | COPY zfs-inplace-rebalancing.sh ./ 9 | 10 | ENTRYPOINT ["./zfs-inplace-rebalancing.sh"] 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # zfs-inplace-rebalancing 2 | Simple bash script to rebalance pool data between all mirrors when adding vdevs to a pool. 3 | 4 | [![asciicast](https://asciinema.org/a/350222.svg)](https://asciinema.org/a/350222) 5 | 6 | ## How it works 7 | 8 | This script recursively traverses all the files in a given directory. Each file is copied with a `.balance` suffix, retaining all file attributes. The original is then deleted and the *copy* is renamed back to the name of the original file. When copying a file ZFS will spread the data blocks across all vdevs, effectively distributing/rebalancing the data of the original file (more or less) evenly. This allows the pool data to be rebalanced without the need for a separate backup pool/drive. 9 | 10 | When the script detects an inode group of hardlinked files, it will proceed to copy one file in the inode group. The original file and all hardlinks are then deleted, the *copy* is renamed back to the name of the original file, and new hardlinks are generated from that copy to replace all other linked files that were removed. 11 | 12 | The way ZFS distributes writes is not trivial, which makes it hard to predict how effective the redistribution will be. See: 13 | - https://jrs-s.net/2018/04/11/zfs-allocates-writes-according-to-free-space-per-vdev-not-latency-per-vdev/ 14 | - https://jrs-s.net/2018/08/24/zfs-write-allocation-in-0-7-x/ 15 | 16 | Note that this process is not entirely "in-place", since a file has to be fully copied before the original is deleted. The term is used to make it clear that no additional pool (and therefore hardware) is necessary to use this script. However, this also means that you have to have enough space to create a copy of the biggest file in your target directory for it to work. 17 | 18 | At no point in time are both versions of the original file deleted. 19 | To make sure file attributes, permissions and file content are maintained when copying the original file, all attributes and the file checksum is compared before removing the original file (if not disabled using `--checksum false`). 20 | 21 | Since file attributes are fully retained, it is not possible to verify if an individual file has been rebalanced. However, this script keeps track of rebalanced files by maintaining a "database" file in its working directory called `rebalance_db.txt` (if not disabled using `--passes 0`). This file contains two lines of text for each processed file: 22 | 23 | * One line for the file path 24 | * and the next line for the current count of rebalance passes 25 | 26 | ```text 27 | /my/example/pool/file1.mkv 28 | 1 29 | /my/example/pool/file2.mkv 30 | 1 31 | ``` 32 | 33 | All files in a given inode group will be added to the database when processed. The highest count in a given inode group of files will be used to determine if the group should be skipped when processing against the number of passes in a given script execution. 34 | 35 | The hardlink support process creates temporary files in the script location alongside `rebalance_db.txt` which are removed upon the end of each run. `files_list.txt` lists all files found in the given target location. `sorted_files_list.txt` lists all files sorted by inode number. `grouped_inodes.txt` lists all files by inode, but with all files from a given inode space separated on one line. 36 | 37 | ## Prerequisites 38 | 39 | ### Balance Status 40 | 41 | To check the current balance of a pool use: 42 | 43 | ``` 44 | > zpool list -v 45 | 46 | NAME SIZE ALLOC FREE CKPOINT EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT 47 | bpool 1.88G 113M 1.76G - - 2% 5% 1.00x ONLINE - 48 | mirror 1.88G 113M 1.76G - - 2% 5.88% - ONLINE 49 | ata-Samsung_SSD_860_EVO_500GB_J0NBL-part2 - - - - - - - - ONLINE 50 | ata-Samsung_SSD_860_EVO_500GB_S4XB-part2 - - - - - - - - ONLINE 51 | rpool 460G 3.66G 456G - - 0% 0% 1.00x ONLINE - 52 | mirror 460G 3.66G 456G - - 0% 0.79% - ONLINE 53 | ata-Samsung_SSD_860_EVO_500GB_S4BB-part3 - - - - - - - - ONLINE 54 | ata-Samsung_SSD_860_EVO_500GB_S4XB-part3 - - - - - - - - ONLINE 55 | vol1 9.06T 3.77T 5.29T - - 13% 41% 1.00x ONLINE - 56 | mirror 3.62T 1.93T 1.70T - - 25% 53.1% - ONLINE 57 | ata-WDC_WD40EFRX-68N32N0_WD-WCC - - - - - - - - ONLINE 58 | ata-ST4000VN008-2DR166_ZM4-part2 - - - - - - - - ONLINE 59 | mirror 3.62T 1.84T 1.78T - - 8% 50.9% - ONLINE 60 | ata-ST4000VN008-2DR166_ZM4-part2 - - - - - - - - ONLINE 61 | ata-WDC_WD40EFRX-68N32N0_WD-WCC-part2 - - - - - - - - ONLINE 62 | mirror 1.81T 484K 1.81T - - 0% 0.00% - ONLINE 63 | ata-WDC_WD20EARX-00PASB0_WD-WMA-part2 - - - - - - - - ONLINE 64 | ata-ST2000DM001-1CH164_Z1E-part2 - - - - - - - - ONLINE 65 | ``` 66 | 67 | and have a look at difference of the `CAP` value (`SIZE`/`FREE` vs `ALLOC` ratio) between vdevs. 68 | 69 | ### No Deduplication 70 | 71 | Due to the working principle of this script, which essentially creates a duplicate file on purpose, deduplication will most definitely prevent it from working as intended. If you use deduplication you probably have to resort to a more expensive rebalancing method that involves additional drives. 72 | 73 | ### Data selection (cold data) 74 | 75 | Due to the working principle of this script, it is crucial that you **only run it on data that is not actively accessed**, since the original file will be deleted. 76 | 77 | ### Snapshots 78 | 79 | If you do a snapshot of the data you want to balance before starting the rebalancing script, keep in mind that ZFS now has to keep track of all of the data in the target directory twice. Once in the snapshot you made, and once for the new copy. This means that you will effectively use double the file size of all files within the target directory. Therefore it is a good idea to process the pool data in batches and remove old snapshots along the way, since you probably will be hitting the capacity limits of your pool at some point during the rebalancing process. 80 | 81 | ## Installation 82 | 83 | Since this is a simple bash script, there is no package. Simply download the script and make it executable: 84 | 85 | ```shell 86 | curl -O https://raw.githubusercontent.com/markusressel/zfs-inplace-rebalancing/master/zfs-inplace-rebalancing.sh 87 | chmod +x ./zfs-inplace-rebalancing.sh 88 | ``` 89 | 90 | Dependencies: 91 | * `perl` - it should be available on most systems by default 92 | * `awk` - it should be available on most systems by default 93 | 94 | ## Usage 95 | 96 | **ALWAYS HAVE A BACKUP OF YOUR DATA!** 97 | 98 | You can print a help message by running the script without any parameters: 99 | 100 | ```shell 101 | ./zfs-inplace-rebalancing.sh 102 | ``` 103 | 104 | ### Parameters 105 | 106 | | Name | Description | Default | 107 | |-----------|-------------|---------| 108 | | `-c`
`--checksum` | Whether to compare attributes and content of the copied file using an **MD5** checksum. Technically this is a redundent check and consumes a lot of resources, so think twice. | `true` | 109 | | `-p`
`--passes` | The maximum number of rebalance passes per file. Setting this to infinity by using a value `<= 0` might improve performance when rebalancing a lot of small files. | `1` | 110 | | `--debug` | Shows additional output, including listing all files in the target location 3 times (list, inode sorted list, inode groupings) and more granular move/copy/link/count transaction information. | `false` | 111 | 112 | ### Example 113 | 114 | Make sure to run this script with a user that has rw permission to all of the files in the target directory. 115 | The easiest way to achieve this is by **running the script as root**. 116 | 117 | ```shell 118 | sudo su 119 | ./zfs-inplace-rebalancing.sh --checksum true --passes 1 /pool/path/to/rebalance 120 | ``` 121 | 122 | To keep track of the balancing progress, you can open another terminal and run: 123 | 124 | ```shell 125 | watch zpool list -v 126 | ``` 127 | 128 | ### Log to File 129 | 130 | To write the output to a file, simply redirect stdout and stderr to a file (or separate files). 131 | Since this redirects all output, you will have to follow the contents of the log files to get realtime info: 132 | 133 | ```shell 134 | # one shell window: 135 | tail -F ./stdout.log 136 | # another shell window: 137 | ./zfs-inplace-rebalancing.sh /pool/path/to/rebalance >> ./stdout.log 2>> ./stderr.log 138 | ``` 139 | 140 | ### Things to consider 141 | 142 | Although this script **does** have a progress output (files as well as percentage) it might be a good idea to try a small subfolder first, or process your pool folder layout in manually selected badges. This can also limit the damage done, if anything bad happens. 143 | 144 | When aborting the script midway through, be sure to check the last lines of its output. When cancelling before or during the renaming process a ".balance" file might be left and you have to rename (or delete) it manually. 145 | 146 | Although the `--passes` parameter can be used to limit the maximum amount of rebalance passes per file, it is only meant to speedup aborted runs. Individual files will **not be process multiple times automatically**. To reach multiple passes you have to run the script on the same target directory multiple times. 147 | 148 | ### Dockerfile 149 | 150 | To increase portability, this script can also be run using docker: 151 | 152 | ```shell 153 | sudo docker run --rm -it -v /your/data:/data ghcr.io/markusressel/zfs-inplace-rebalancing:latest ./data 154 | ``` 155 | 156 | # Contributing 157 | 158 | GitHub is for social coding: if you want to write code, I encourage contributions through pull requests from forks 159 | of this repository. Create GitHub tickets for bugs and new features and comment on the ones that you are interested in. 160 | 161 | # Attributions 162 | 163 | This script was inspired by [zfs-balancer](https://github.com/programster/zfs-balancer). 164 | 165 | # Disclaimer 166 | 167 | This software is provided "as is" and "as available", without any warranty. 168 | **ALWAYS HAVE A BACKUP OF YOUR DATA!** 169 | -------------------------------------------------------------------------------- /test/pool/abc-123_!"§$%&()[]{}=?`#'*+-_,;.:|<>~@.txt: -------------------------------------------------------------------------------- 1 | test -------------------------------------------------------------------------------- /test/pool/projects/[2020] some project/file: -------------------------------------------------------------------------------- 1 | test -------------------------------------------------------------------------------- /test/pool/projects/[2020] some project/file.txt: -------------------------------------------------------------------------------- 1 | test -------------------------------------------------------------------------------- /test/pool/projects/[2020] some project/mp4.txt: -------------------------------------------------------------------------------- 1 | test -------------------------------------------------------------------------------- /testing.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # exit script on error 4 | set -e 5 | # exit on undeclared variable 6 | set -u 7 | 8 | log_std_file=./test.log 9 | log_error_file=./error.log 10 | test_data_src=./test/pool 11 | test_pool_data_path=./testing_data 12 | test_pool_data_size_path=$test_pool_data_path/size 13 | 14 | ## Color Constants 15 | 16 | # Reset 17 | Color_Off='\033[0m' # Text Reset 18 | 19 | # Regular Colors 20 | Red='\033[0;31m' # Red 21 | Green='\033[0;32m' # Green 22 | Yellow='\033[0;33m' # Yellow 23 | Cyan='\033[0;36m' # Cyan 24 | 25 | 26 | OSName=$(echo "$OSTYPE" | tr '[:upper:]' '[:lower:]') 27 | 28 | ## Functions 29 | 30 | # print a given text entirely in a given color 31 | function color_echo () { 32 | color=$1 33 | text=$2 34 | echo -e "${color}${text}${Color_Off}" 35 | } 36 | 37 | function prepare() { 38 | # cleanup 39 | rm -f $log_std_file 40 | rm -f $log_error_file 41 | rm -f rebalance_db.txt 42 | rm -rf $test_pool_data_path 43 | 44 | # setup 45 | cp -rf $test_data_src $test_pool_data_path 46 | } 47 | 48 | # return time to the milisecond 49 | function get_time() { 50 | if [[ "${OSName}" == "darwin"* ]]; then 51 | date=$(gdate +%s%N) 52 | else 53 | date=$(date +%s%N) 54 | fi 55 | echo "$date" 56 | } 57 | 58 | function get_inode() { 59 | if [[ "${OSName}" == "darwin"* ]] || [[ "${OSName}" == "freebsd"* ]]; then 60 | inode=$(stat -f "%i" "$1") 61 | else 62 | inode=$(stat -c "%i" "$1") 63 | fi 64 | 65 | echo "$inode" 66 | } 67 | 68 | function assertions() { 69 | # check error log is empty 70 | if grep -q '[^[:space:]]' $log_error_file; then 71 | color_echo "$Red" "error log is not empty!" 72 | cat $log_error_file 73 | exit 1 74 | fi 75 | } 76 | 77 | function assert_matching_file_hardlinked() { 78 | if [[ "$(get_inode "$1")" != "$(get_inode "$2")" ]]; then 79 | echo "File '$1' was not hardlinked to '$2' when it should have been!" 80 | exit 1 81 | fi 82 | } 83 | 84 | function print_time_taken(){ 85 | time_taken=$1 86 | minute=$((time_taken / 60000)) 87 | seconde=$((time_taken % 60000 / 1000)) 88 | miliseconde=$((time_taken % 1000)) 89 | color_echo "$Yellow" "Time taken: ${minute}m ${seconde}s ${miliseconde}ms" 90 | } 91 | 92 | color_echo "$Cyan" "Running tests..." 93 | 94 | color_echo "$Cyan" "Running tests with default options..." 95 | prepare 96 | ./zfs-inplace-rebalancing.sh $test_pool_data_path >> $log_std_file 2>> $log_error_file 97 | cat $log_std_file 98 | assertions 99 | color_echo "$Green" "Tests passed!" 100 | 101 | color_echo "$Cyan" "Running tests with checksum true and 1 pass..." 102 | prepare 103 | ./zfs-inplace-rebalancing.sh --checksum true --passes 1 $test_pool_data_path >> $log_std_file 2>> $log_error_file 104 | cat $log_std_file 105 | assertions 106 | color_echo "$Green" "Tests passed!" 107 | 108 | color_echo "$Cyan" "Running tests with checksum false..." 109 | prepare 110 | ./zfs-inplace-rebalancing.sh --checksum false $test_pool_data_path >> $log_std_file 2>> $log_error_file 111 | cat $log_std_file 112 | assertions 113 | color_echo "$Green" "Tests passed!" 114 | 115 | color_echo "$Cyan" "Running tests with hardlinks..." 116 | prepare 117 | ln "$test_pool_data_path/projects/[2020] some project/mp4.txt" "$test_pool_data_path/projects/[2020] some project/mp4.txt.link" 118 | ./zfs-inplace-rebalancing.sh $test_pool_data_path >> $log_std_file 2>> $log_error_file 119 | cat $log_std_file 120 | # Both link files should be copied 121 | assert_matching_file_hardlinked "$test_pool_data_path/projects/[2020] some project/mp4.txt" "$test_pool_data_path/projects/[2020] some project/mp4.txt.link" 122 | assertions 123 | color_echo "$Green" "Tests passed!" 124 | 125 | color_echo "$Cyan" "Running tests with different file count and size..." 126 | prepare 127 | 128 | mkdir -p $test_pool_data_size_path 129 | 130 | color_echo "$Cyan" "Creating 1000 files of 1KB each..." 131 | mkdir -p $test_pool_data_size_path/small 132 | for i in {1..1000}; do 133 | dd if=/dev/urandom of=$test_pool_data_size_path/small/file_"$i".txt bs=1024 count=1 >> /dev/null 2>&1 134 | done 135 | 136 | color_echo "$Cyan" "Creating 5 file of 1GB each..." 137 | mkdir -p $test_pool_data_size_path/big 138 | for i in {1..5}; do 139 | dd if=/dev/urandom of=$test_pool_data_size_path/big/file_"$i".txt bs=1024 count=1048576 >> /dev/null 2>&1 140 | done 141 | 142 | color_echo "$Green" "Files created!" 143 | 144 | echo "Running rebalancing on small files..." 145 | # measure time taken 146 | start_time=$(get_time) 147 | ./zfs-inplace-rebalancing.sh $test_pool_data_size_path/small >> $log_std_file 2>> $log_error_file 148 | end_time=$(get_time) 149 | time_taken=$(( (end_time - start_time) / 1000000 )) 150 | print_time_taken $time_taken 151 | assertions 152 | color_echo "$Green" "Tests passed!" 153 | 154 | echo "Running rebalancing on big files..." 155 | rm -f rebalance_db.txt 156 | # measure time taken 157 | start_time=$(get_time) 158 | ./zfs-inplace-rebalancing.sh $test_pool_data_size_path/big >> $log_std_file 2>> $log_error_file 159 | end_time=$(get_time) 160 | time_taken=$(( (end_time - start_time) / 1000000 )) 161 | print_time_taken $time_taken 162 | assertions 163 | color_echo "$Green" "Tests passed!" 164 | 165 | echo "Running rebalancing on all files..." 166 | rm -f rebalance_db.txt 167 | # measure time taken 168 | start_time=$(get_time) 169 | ./zfs-inplace-rebalancing.sh $test_pool_data_size_path >> $log_std_file 2>> $log_error_file 170 | end_time=$(get_time) 171 | time_taken=$(( (end_time - start_time) / 1000000 )) 172 | print_time_taken $time_taken 173 | assertions 174 | color_echo "$Green" "Tests passed!" 175 | 176 | color_echo "$Green" "All tests passed!" 177 | color_echo "$Cyan" "Cleaning" 178 | rm -f $log_std_file 179 | rm -f $log_error_file 180 | rm -f rebalance_db.txt 181 | rm -rf $test_pool_data_path 182 | 183 | -------------------------------------------------------------------------------- /zfs-inplace-rebalancing.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Exit script on error 4 | set -e 5 | # Exit on undeclared variable 6 | set -u 7 | 8 | # File used to track processed files 9 | rebalance_db_file_name="rebalance_db.txt" 10 | 11 | # Index used for progress 12 | current_index=0 13 | 14 | ## Color Constants 15 | 16 | # Reset 17 | Color_Off='\033[0m' # Text Reset 18 | 19 | # Regular Colors 20 | Red='\033[0;31m' # Red 21 | Green='\033[0;32m' # Green 22 | Yellow='\033[0;33m' # Yellow 23 | Cyan='\033[0;36m' # Cyan 24 | 25 | ## Functions 26 | 27 | # Print a help message 28 | function print_usage() { 29 | echo "Usage: zfs-inplace-rebalancing.sh --checksum true --passes 1 --debug false /my/pool" 30 | } 31 | 32 | # Print a given text entirely in a given color 33 | function color_echo() { 34 | color=$1 35 | text=$2 36 | echo -e "${color}${text}${Color_Off}" 37 | } 38 | 39 | # Print a given text entirely in a given color 40 | function echo_debug() { 41 | if [ "$debug_flag" = true ]; then 42 | text=$* 43 | echo "${text}" 44 | fi 45 | } 46 | 47 | function get_rebalance_count() { 48 | file_path="$1" 49 | 50 | line_nr=$(grep -xF -n "${file_path}" "./${rebalance_db_file_name}" | head -n 1 | cut -d: -f1) 51 | if [ -z "${line_nr}" ]; then 52 | echo "0" 53 | return 54 | else 55 | rebalance_count_line_nr="$((line_nr + 1))" 56 | rebalance_count=$(awk "NR == ${rebalance_count_line_nr}" "./${rebalance_db_file_name}") 57 | echo "${rebalance_count}" 58 | return 59 | fi 60 | } 61 | 62 | # Rebalance a group of files that are hardlinked together 63 | function process_inode_group() { 64 | paths=("$@") 65 | num_paths="${#paths[@]}" 66 | 67 | # Progress tracking 68 | current_index="$((current_index + 1))" 69 | progress_raw=$((current_index * 10000 / file_count)) 70 | progress_percent=$(printf '%0.2f' "${progress_raw}e-2") 71 | color_echo "${Cyan}" "Progress -- Files: ${current_index}/${file_count} (${progress_percent}%)" 72 | 73 | echo_debug "Processing inode group with ${num_paths} paths:" 74 | for path in "${paths[@]}"; do 75 | echo_debug " - $path" 76 | done 77 | 78 | # Check rebalance counts for all files 79 | should_skip=false 80 | for path in "${paths[@]}"; do 81 | rebalance_count=$(get_rebalance_count "${path}") 82 | if [ "${rebalance_count}" -ge "${passes_flag}" ]; then 83 | should_skip=true 84 | break 85 | fi 86 | done 87 | 88 | if [ "${should_skip}" = true ]; then 89 | if [ "${num_paths}" -gt 1 ]; then 90 | color_echo "${Yellow}" "Rebalance count (${passes_flag}) reached, skipping group: ${paths[*]}" 91 | else 92 | color_echo "${Yellow}" "Rebalance count (${passes_flag}) reached, skipping: ${paths[0]}" 93 | fi 94 | return 95 | fi 96 | 97 | main_file="${paths[0]}" 98 | 99 | # Check if main_file exists 100 | if [[ ! -f "${main_file}" ]]; then 101 | color_echo "${Yellow}" "File is missing, skipping: ${main_file}" 102 | return 103 | fi 104 | 105 | tmp_extension=".balance" 106 | tmp_file_path="${main_file}${tmp_extension}" 107 | 108 | echo "Copying '${main_file}' to '${tmp_file_path}'..." 109 | echo_debug "Executing copy command:" 110 | 111 | if [[ "${OSName}" == "linux-gnu"* ]]; then 112 | # Linux 113 | 114 | # --reflink=never -- force standard copy (see ZFS Block Cloning) 115 | # -a -- keep attributes, includes -d -- keep symlinks (dont copy target) and 116 | # -p -- preserve ACLs to 117 | # -x -- stay on one system 118 | cmd=(cp --reflink=never -ax "${main_file}" "${tmp_file_path}") 119 | echo_debug "${cmd[@]}" 120 | "${cmd[@]}" 121 | elif [[ "${OSName}" == "darwin"* ]] || [[ "${OSName}" == "freebsd"* ]]; then 122 | # Mac OS and FreeBSD 123 | 124 | # -a -- Archive mode. Same as -RpP. Includes preservation of modification 125 | # time, access time, file flags, file mode, ACL, user ID, and group 126 | # ID, as allowed by permissions. 127 | # -x -- File system mount points are not traversed. 128 | cmd=(cp -ax "${main_file}" "${tmp_file_path}") 129 | echo_debug "${cmd[@]}" 130 | "${cmd[@]}" 131 | else 132 | echo "Unsupported OS type: $OSTYPE" 133 | exit 1 134 | fi 135 | 136 | # Compare copy against original to make sure nothing went wrong 137 | if [[ "${checksum_flag}" == "true"* ]]; then 138 | echo "Comparing copy against original..." 139 | if [[ "${OSName}" == "linux-gnu"* ]]; then 140 | # Linux 141 | 142 | # file attributes 143 | original_perms=$(lsattr "${main_file}") 144 | # remove anything after the last space 145 | original_perms=${original_perms% *} 146 | # file permissions, owner, group, size, modification time 147 | original_perms="${original_perms} $(stat -c "%A %U %G %s %Y" "${main_file}")" 148 | 149 | 150 | # file attributes 151 | copy_perms=$(lsattr "${tmp_file_path}") 152 | # remove anything after the last space 153 | copy_perms=${copy_perms% *} 154 | # file permissions, owner, group, size, modification time 155 | copy_perms="${copy_perms} $(stat -c "%A %U %G %s %Y" "${tmp_file_path}")" 156 | elif [[ "${OSName}" == "darwin"* ]] || [[ "${OSName}" == "freebsd"* ]]; then 157 | # Mac OS 158 | # FreeBSD 159 | 160 | # note: no lsattr on Mac OS or FreeBSD 161 | 162 | # file permissions, owner, group size, modification time 163 | original_perms="$(stat -f "%Sp %Su %Sg %z %m" "${main_file}")" 164 | 165 | # file permissions, owner, group size, modification time 166 | copy_perms="$(stat -f "%Sp %Su %Sg %z %m" "${tmp_file_path}")" 167 | else 168 | echo "Unsupported OS type: $OSTYPE" 169 | exit 1 170 | fi 171 | 172 | echo_debug "Original perms: $original_perms" 173 | echo_debug "Copy perms: $copy_perms" 174 | 175 | if [[ "${original_perms}" == "${copy_perms}"* ]]; then 176 | color_echo "${Green}" "Attribute and permission check OK" 177 | else 178 | color_echo "${Red}" "Attribute and permission check FAILED: ${original_perms} != ${copy_perms}" 179 | exit 1 180 | fi 181 | 182 | if cmp -s "${main_file}" "${tmp_file_path}"; then 183 | color_echo "${Green}" "File content check OK" 184 | else 185 | color_echo "${Red}" "File content check FAILED" 186 | exit 1 187 | fi 188 | fi 189 | 190 | echo "Removing original files..." 191 | for path in "${paths[@]}"; do 192 | echo_debug "Removing $path" 193 | rm "${path}" 194 | done 195 | 196 | echo "Renaming temporary copy to original '${main_file}'..." 197 | echo_debug "Moving ${tmp_file_path} to ${main_file}" 198 | mv "${tmp_file_path}" "${main_file}" 199 | 200 | # Only recreate hardlinks if there are multiple paths 201 | if [ "${num_paths}" -gt 1 ]; then 202 | echo "Recreating hardlinks..." 203 | for (( i=1; i<${#paths[@]}; i++ )); do 204 | echo_debug "Linking ${main_file} to ${paths[$i]}" 205 | ln "${main_file}" "${paths[$i]}" 206 | done 207 | fi 208 | 209 | if [ "${passes_flag}" -ge 1 ]; then 210 | # Update rebalance "database" for all files 211 | for path in "${paths[@]}"; do 212 | line_nr=$(grep -xF -n "${path}" "./${rebalance_db_file_name}" | head -n 1 | cut -d: -f1) 213 | if [ -z "${line_nr}" ]; then 214 | rebalance_count=1 215 | echo "${path}" >> "./${rebalance_db_file_name}" 216 | echo "${rebalance_count}" >> "./${rebalance_db_file_name}" 217 | else 218 | rebalance_count_line_nr="$((line_nr + 1))" 219 | rebalance_count=$(awk "NR == ${rebalance_count_line_nr}" "./${rebalance_db_file_name}") 220 | rebalance_count="$((rebalance_count + 1))" 221 | echo_debug "Updating rebalance count for ${path} to ${rebalance_count}" 222 | sed -i "${rebalance_count_line_nr}s/.*/${rebalance_count}/" "./${rebalance_db_file_name}" 223 | fi 224 | done 225 | fi 226 | } 227 | 228 | checksum_flag='true' 229 | passes_flag='1' 230 | debug_flag='false' 231 | 232 | if [[ "$#" -eq 0 ]]; then 233 | print_usage 234 | exit 0 235 | fi 236 | 237 | while true; do 238 | case "$1" in 239 | -h | --help) 240 | print_usage 241 | exit 0 242 | ;; 243 | -c | --checksum) 244 | if [[ "$2" == 1 || "$2" =~ (on|true|yes) ]]; then 245 | checksum_flag="true" 246 | else 247 | checksum_flag="false" 248 | fi 249 | shift 2 250 | ;; 251 | -p | --passes) 252 | passes_flag=$2 253 | shift 2 254 | ;; 255 | --debug) 256 | if [[ "$2" == 1 || "$2" =~ (on|true|yes) ]]; then 257 | debug_flag="true" 258 | else 259 | debug_flag="false" 260 | fi 261 | shift 2 262 | ;; 263 | *) 264 | break 265 | ;; 266 | esac 267 | done 268 | 269 | root_path=$1 270 | 271 | OSName=$(echo "$OSTYPE" | tr '[:upper:]' '[:lower:]') 272 | 273 | color_echo "$Cyan" "Start rebalancing $(date):" 274 | color_echo "$Cyan" " Path: ${root_path}" 275 | color_echo "$Cyan" " Rebalancing Passes: ${passes_flag}" 276 | color_echo "$Cyan" " Use Checksum: ${checksum_flag}" 277 | color_echo "$Cyan" " Debug Mode: ${debug_flag}" 278 | 279 | # Generate files_list.txt with device and inode numbers using stat, separated by a pipe '|' 280 | if [[ "${OSName}" == "linux-gnu"* ]]; then 281 | # Linux 282 | find "$root_path" -type f -not -path '*/.zfs/*' -exec stat --printf '%d:%i|%n\n' {} \; > files_list.txt 283 | elif [[ "${OSName}" == "darwin"* ]] || [[ "${OSName}" == "freebsd"* ]]; then 284 | # Mac OS and FreeBSD 285 | find "$root_path" -type f -not -path '*/.zfs/*' -exec stat -f "%d:%i|%N" {} \; > files_list.txt 286 | else 287 | echo "Unsupported OS type: $OSTYPE" 288 | exit 1 289 | fi 290 | 291 | echo_debug "Contents of files_list.txt:" 292 | if [ "$debug_flag" = true ]; then 293 | cat files_list.txt 294 | fi 295 | 296 | # Sort files_list.txt by device and inode number 297 | sort -t '|' -k1,1 files_list.txt > sorted_files_list.txt 298 | 299 | echo_debug "Contents of sorted_files_list.txt:" 300 | if [ "$debug_flag" = true ]; then 301 | cat sorted_files_list.txt 302 | fi 303 | 304 | # Use awk to group paths by inode key and handle spaces in paths 305 | awk -F'|' '{ 306 | key = $1 307 | path = substr($0, length(key)+2) 308 | if (key == prev_key) { 309 | print "\t" path 310 | } else { 311 | if (NR > 1) { 312 | # Do nothing 313 | } 314 | print key 315 | print "\t" path 316 | prev_key = key 317 | } 318 | }' sorted_files_list.txt > grouped_inodes.txt 319 | 320 | echo_debug "Contents of grouped_inodes.txt:" 321 | if [ "$debug_flag" = true ]; then 322 | cat grouped_inodes.txt 323 | fi 324 | 325 | # Count number of inode groups 326 | file_count=$(grep -c '^\w' grouped_inodes.txt) 327 | 328 | color_echo "$Cyan" " Number of files to process: ${file_count}" 329 | 330 | # Initialize current_index 331 | current_index=0 332 | 333 | # Create db file 334 | if [ "${passes_flag}" -ge 1 ]; then 335 | touch "./${rebalance_db_file_name}" 336 | fi 337 | 338 | paths=() 339 | 340 | # Read grouped_inodes.txt line by line 341 | while IFS= read -r line; do 342 | if [[ "$line" == $'\t'* ]]; then 343 | # This is a path line 344 | path="${line#$'\t'}" 345 | paths+=("$path") 346 | else 347 | # This is a new inode key 348 | if [[ "${#paths[@]}" -gt 0 ]]; then 349 | # Process the previous group 350 | process_inode_group "${paths[@]}" 351 | fi 352 | paths=() 353 | fi 354 | done < grouped_inodes.txt 355 | 356 | # Process the last group after the loop ends 357 | if [[ "${#paths[@]}" -gt 0 ]]; then 358 | process_inode_group "${paths[@]}" 359 | fi 360 | 361 | # Clean up temporary files 362 | rm files_list.txt sorted_files_list.txt grouped_inodes.txt 363 | 364 | echo "" 365 | echo "" 366 | color_echo "$Green" "Done!" 367 | --------------------------------------------------------------------------------