├── .github
├── dependabot.yml
├── stale.yml
└── workflows
│ ├── docker-latest.yml
│ ├── shellcheck.yml
│ └── test.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── test
└── pool
│ ├── abc-123_!"§$%&()[]{}=?`#'*+-_,;.:|<>~@.txt
│ └── projects
│ └── [2020] some project
│ ├── file
│ ├── file.txt
│ └── mp4.txt
├── testing.sh
└── zfs-inplace-rebalancing.sh
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | updates:
4 | - package-ecosystem: github-actions
5 | directory: "/"
6 | schedule:
7 | # Check for updates to GitHub Actions every week
8 | interval: "weekly"
9 |
10 |
--------------------------------------------------------------------------------
/.github/stale.yml:
--------------------------------------------------------------------------------
1 | # Configuration for probot-stale - https://github.com/probot/stale
2 |
3 | # Number of days of inactivity before an Issue or Pull Request becomes stale
4 | daysUntilStale: 60
5 |
6 | # Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
7 | # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
8 | daysUntilClose: 14
9 |
10 | # Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
11 | exemptLabels:
12 | - pinned
13 | - security
14 | - bug
15 | - enhancement
16 |
17 | # Set to true to ignore issues in a project (defaults to false)
18 | exemptProjects: false
19 |
20 | # Set to true to ignore issues in a milestone (defaults to false)
21 | exemptMilestones: false
22 |
23 | # Set to true to ignore issues with an assignee (defaults to false)
24 | exemptAssignees: true
25 |
26 | # Label to use when marking as stale
27 | staleLabel: wontfix
28 |
29 | # Comment to post when marking as stale. Set to `false` to disable
30 | markComment: >
31 | This issue has been automatically marked as stale because it has not had
32 | recent activity. It will be closed if no further activity occurs. Thank you
33 | for your contributions.
34 |
35 | # Comment to post when removing the stale label.
36 | # unmarkComment: >
37 | # Your comment here.
38 |
39 | # Comment to post when closing a stale Issue or Pull Request.
40 | closeComment: >
41 | There has been no incentive by contributors or maintainers to revive this stale issue and it will now be closed.
42 |
43 | # Limit the number of actions per hour, from 1-30. Default is 30
44 | limitPerRun: 30
45 |
46 | # Limit to only `issues` or `pulls`
47 | only: issues
48 |
49 | # Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
50 | # pulls:
51 | # daysUntilStale: 30
52 | # markComment: >
53 | # This pull request has been automatically marked as stale because it has not had
54 | # recent activity. It will be closed if no further activity occurs. Thank you
55 | # for your contributions.
56 |
57 | # issues:
58 | # exemptLabels:
59 | # - confirmed
60 |
--------------------------------------------------------------------------------
/.github/workflows/docker-latest.yml:
--------------------------------------------------------------------------------
1 | name: Docker latest
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 |
7 | env:
8 | REGISTRY: ghcr.io
9 |
10 | jobs:
11 | build:
12 | runs-on: ubuntu-latest
13 | permissions:
14 | contents: read
15 | packages: write
16 |
17 | steps:
18 | - name: Checkout repository
19 | uses: actions/checkout@v4
20 |
21 | - name: Log into registry ${{ env.REGISTRY }}
22 | uses: docker/login-action@v3
23 | with:
24 | registry: ${{ env.REGISTRY }}
25 | username: ${{ github.actor }}
26 | password: ${{ secrets.GITHUB_TOKEN }}
27 |
28 | - name: Extract Docker metadata
29 | id: meta
30 | uses: docker/metadata-action@v5
31 | with:
32 | images: ${{ env.REGISTRY }}/${{ github.repository }}
33 | tags: |
34 | type=raw,value=latest
35 |
36 | - name: Build and push Docker image
37 | id: build-and-push
38 | uses: docker/build-push-action@v6
39 | with:
40 | context: .
41 | file: Dockerfile
42 | push: true
43 | tags: ${{ steps.meta.outputs.tags }}
44 | labels: ${{ steps.meta.outputs.labels }}
--------------------------------------------------------------------------------
/.github/workflows/shellcheck.yml:
--------------------------------------------------------------------------------
1 | # ShellCheck
2 |
3 | name: CI
4 |
5 | # Controls when the action will run. Triggers the workflow on push or pull request
6 | # events but only for the master branch
7 | on:
8 | push:
9 | branches: [ master ]
10 | pull_request:
11 | branches: [ master ]
12 |
13 | jobs:
14 | shellcheck:
15 | name: Shellcheck
16 | runs-on: ubuntu-latest
17 | steps:
18 | - uses: actions/checkout@v4
19 | - name: Run ShellCheck
20 | uses: ludeeus/action-shellcheck@master
21 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | # Test
2 |
3 | name: Test
4 |
5 | on:
6 | push:
7 | branches: [ master ]
8 | pull_request:
9 | branches: [ master ]
10 |
11 | jobs:
12 | linuxTest:
13 | name: Test on Linux
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: actions/checkout@v4
17 |
18 | - name: Run testing script
19 | run: ./testing.sh
20 |
21 | macOsTest:
22 | name: Test on macOS
23 | runs-on: macos-latest
24 | steps:
25 | - uses: actions/checkout@v4
26 |
27 | - name: Install coreutils
28 | run: brew install coreutils
29 |
30 | - name: Run testing script on macOS
31 | run: ./testing.sh
32 |
33 | FreeBSDTest:
34 | name: Test on FreeBSD
35 | runs-on: ubuntu-latest
36 |
37 | steps:
38 | - uses: actions/checkout@v4
39 |
40 | - name: Test in FreeBSD
41 | id: test
42 | uses: vmactions/freebsd-vm@v1
43 | with:
44 | usesh: true
45 | prepare: |
46 | pkg install -y bash
47 | run: |
48 | ./testing.sh
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | test.log
2 | error.log
3 | rebalance_db.txt
4 | files_list.txt
5 | sorted_files_list.txt
6 | grouped_inodes.txt
7 | testing_data
8 | .vscode
9 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM phusion/baseimage:jammy-1.0.1
2 | MAINTAINER markusressel
3 |
4 | RUN apt-get update \
5 | && apt-get -y install bc \
6 | && apt-get clean && rm -rf /var/lib/apt/lists/*
7 |
8 | COPY zfs-inplace-rebalancing.sh ./
9 |
10 | ENTRYPOINT ["./zfs-inplace-rebalancing.sh"]
11 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Creative Commons Legal Code
2 |
3 | CC0 1.0 Universal
4 |
5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
12 | HEREUNDER.
13 |
14 | Statement of Purpose
15 |
16 | The laws of most jurisdictions throughout the world automatically confer
17 | exclusive Copyright and Related Rights (defined below) upon the creator
18 | and subsequent owner(s) (each and all, an "owner") of an original work of
19 | authorship and/or a database (each, a "Work").
20 |
21 | Certain owners wish to permanently relinquish those rights to a Work for
22 | the purpose of contributing to a commons of creative, cultural and
23 | scientific works ("Commons") that the public can reliably and without fear
24 | of later claims of infringement build upon, modify, incorporate in other
25 | works, reuse and redistribute as freely as possible in any form whatsoever
26 | and for any purposes, including without limitation commercial purposes.
27 | These owners may contribute to the Commons to promote the ideal of a free
28 | culture and the further production of creative, cultural and scientific
29 | works, or to gain reputation or greater distribution for their Work in
30 | part through the use and efforts of others.
31 |
32 | For these and/or other purposes and motivations, and without any
33 | expectation of additional consideration or compensation, the person
34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
35 | is an owner of Copyright and Related Rights in the Work, voluntarily
36 | elects to apply CC0 to the Work and publicly distribute the Work under its
37 | terms, with knowledge of his or her Copyright and Related Rights in the
38 | Work and the meaning and intended legal effect of CC0 on those rights.
39 |
40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
41 | protected by copyright and related or neighboring rights ("Copyright and
42 | Related Rights"). Copyright and Related Rights include, but are not
43 | limited to, the following:
44 |
45 | i. the right to reproduce, adapt, distribute, perform, display,
46 | communicate, and translate a Work;
47 | ii. moral rights retained by the original author(s) and/or performer(s);
48 | iii. publicity and privacy rights pertaining to a person's image or
49 | likeness depicted in a Work;
50 | iv. rights protecting against unfair competition in regards to a Work,
51 | subject to the limitations in paragraph 4(a), below;
52 | v. rights protecting the extraction, dissemination, use and reuse of data
53 | in a Work;
54 | vi. database rights (such as those arising under Directive 96/9/EC of the
55 | European Parliament and of the Council of 11 March 1996 on the legal
56 | protection of databases, and under any national implementation
57 | thereof, including any amended or successor version of such
58 | directive); and
59 | vii. other similar, equivalent or corresponding rights throughout the
60 | world based on applicable law or treaty, and any national
61 | implementations thereof.
62 |
63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
65 | irrevocably and unconditionally waives, abandons, and surrenders all of
66 | Affirmer's Copyright and Related Rights and associated claims and causes
67 | of action, whether now known or unknown (including existing as well as
68 | future claims and causes of action), in the Work (i) in all territories
69 | worldwide, (ii) for the maximum duration provided by applicable law or
70 | treaty (including future time extensions), (iii) in any current or future
71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
72 | including without limitation commercial, advertising or promotional
73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
74 | member of the public at large and to the detriment of Affirmer's heirs and
75 | successors, fully intending that such Waiver shall not be subject to
76 | revocation, rescission, cancellation, termination, or any other legal or
77 | equitable action to disrupt the quiet enjoyment of the Work by the public
78 | as contemplated by Affirmer's express Statement of Purpose.
79 |
80 | 3. Public License Fallback. Should any part of the Waiver for any reason
81 | be judged legally invalid or ineffective under applicable law, then the
82 | Waiver shall be preserved to the maximum extent permitted taking into
83 | account Affirmer's express Statement of Purpose. In addition, to the
84 | extent the Waiver is so judged Affirmer hereby grants to each affected
85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
88 | maximum duration provided by applicable law or treaty (including future
89 | time extensions), (iii) in any current or future medium and for any number
90 | of copies, and (iv) for any purpose whatsoever, including without
91 | limitation commercial, advertising or promotional purposes (the
92 | "License"). The License shall be deemed effective as of the date CC0 was
93 | applied by Affirmer to the Work. Should any part of the License for any
94 | reason be judged legally invalid or ineffective under applicable law, such
95 | partial invalidity or ineffectiveness shall not invalidate the remainder
96 | of the License, and in such case Affirmer hereby affirms that he or she
97 | will not (i) exercise any of his or her remaining Copyright and Related
98 | Rights in the Work or (ii) assert any associated claims and causes of
99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 |
102 | 4. Limitations and Disclaimers.
103 |
104 | a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 | surrendered, licensed or otherwise affected by this document.
106 | b. Affirmer offers the Work as-is and makes no representations or
107 | warranties of any kind concerning the Work, express, implied,
108 | statutory or otherwise, including without limitation warranties of
109 | title, merchantability, fitness for a particular purpose, non
110 | infringement, or the absence of latent or other defects, accuracy, or
111 | the present or absence of errors, whether or not discoverable, all to
112 | the greatest extent permissible under applicable law.
113 | c. Affirmer disclaims responsibility for clearing rights of other persons
114 | that may apply to the Work or any use thereof, including without
115 | limitation any person's Copyright and Related Rights in the Work.
116 | Further, Affirmer disclaims responsibility for obtaining any necessary
117 | consents, permissions or other rights required for any use of the
118 | Work.
119 | d. Affirmer understands and acknowledges that Creative Commons is not a
120 | party to this document and has no duty or obligation with respect to
121 | this CC0 or use of the Work.
122 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # zfs-inplace-rebalancing
2 | Simple bash script to rebalance pool data between all mirrors when adding vdevs to a pool.
3 |
4 | [](https://asciinema.org/a/350222)
5 |
6 | ## How it works
7 |
8 | This script recursively traverses all the files in a given directory. Each file is copied with a `.balance` suffix, retaining all file attributes. The original is then deleted and the *copy* is renamed back to the name of the original file. When copying a file ZFS will spread the data blocks across all vdevs, effectively distributing/rebalancing the data of the original file (more or less) evenly. This allows the pool data to be rebalanced without the need for a separate backup pool/drive.
9 |
10 | When the script detects an inode group of hardlinked files, it will proceed to copy one file in the inode group. The original file and all hardlinks are then deleted, the *copy* is renamed back to the name of the original file, and new hardlinks are generated from that copy to replace all other linked files that were removed.
11 |
12 | The way ZFS distributes writes is not trivial, which makes it hard to predict how effective the redistribution will be. See:
13 | - https://jrs-s.net/2018/04/11/zfs-allocates-writes-according-to-free-space-per-vdev-not-latency-per-vdev/
14 | - https://jrs-s.net/2018/08/24/zfs-write-allocation-in-0-7-x/
15 |
16 | Note that this process is not entirely "in-place", since a file has to be fully copied before the original is deleted. The term is used to make it clear that no additional pool (and therefore hardware) is necessary to use this script. However, this also means that you have to have enough space to create a copy of the biggest file in your target directory for it to work.
17 |
18 | At no point in time are both versions of the original file deleted.
19 | To make sure file attributes, permissions and file content are maintained when copying the original file, all attributes and the file checksum is compared before removing the original file (if not disabled using `--checksum false`).
20 |
21 | Since file attributes are fully retained, it is not possible to verify if an individual file has been rebalanced. However, this script keeps track of rebalanced files by maintaining a "database" file in its working directory called `rebalance_db.txt` (if not disabled using `--passes 0`). This file contains two lines of text for each processed file:
22 |
23 | * One line for the file path
24 | * and the next line for the current count of rebalance passes
25 |
26 | ```text
27 | /my/example/pool/file1.mkv
28 | 1
29 | /my/example/pool/file2.mkv
30 | 1
31 | ```
32 |
33 | All files in a given inode group will be added to the database when processed. The highest count in a given inode group of files will be used to determine if the group should be skipped when processing against the number of passes in a given script execution.
34 |
35 | The hardlink support process creates temporary files in the script location alongside `rebalance_db.txt` which are removed upon the end of each run. `files_list.txt` lists all files found in the given target location. `sorted_files_list.txt` lists all files sorted by inode number. `grouped_inodes.txt` lists all files by inode, but with all files from a given inode space separated on one line.
36 |
37 | ## Prerequisites
38 |
39 | ### Balance Status
40 |
41 | To check the current balance of a pool use:
42 |
43 | ```
44 | > zpool list -v
45 |
46 | NAME SIZE ALLOC FREE CKPOINT EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
47 | bpool 1.88G 113M 1.76G - - 2% 5% 1.00x ONLINE -
48 | mirror 1.88G 113M 1.76G - - 2% 5.88% - ONLINE
49 | ata-Samsung_SSD_860_EVO_500GB_J0NBL-part2 - - - - - - - - ONLINE
50 | ata-Samsung_SSD_860_EVO_500GB_S4XB-part2 - - - - - - - - ONLINE
51 | rpool 460G 3.66G 456G - - 0% 0% 1.00x ONLINE -
52 | mirror 460G 3.66G 456G - - 0% 0.79% - ONLINE
53 | ata-Samsung_SSD_860_EVO_500GB_S4BB-part3 - - - - - - - - ONLINE
54 | ata-Samsung_SSD_860_EVO_500GB_S4XB-part3 - - - - - - - - ONLINE
55 | vol1 9.06T 3.77T 5.29T - - 13% 41% 1.00x ONLINE -
56 | mirror 3.62T 1.93T 1.70T - - 25% 53.1% - ONLINE
57 | ata-WDC_WD40EFRX-68N32N0_WD-WCC - - - - - - - - ONLINE
58 | ata-ST4000VN008-2DR166_ZM4-part2 - - - - - - - - ONLINE
59 | mirror 3.62T 1.84T 1.78T - - 8% 50.9% - ONLINE
60 | ata-ST4000VN008-2DR166_ZM4-part2 - - - - - - - - ONLINE
61 | ata-WDC_WD40EFRX-68N32N0_WD-WCC-part2 - - - - - - - - ONLINE
62 | mirror 1.81T 484K 1.81T - - 0% 0.00% - ONLINE
63 | ata-WDC_WD20EARX-00PASB0_WD-WMA-part2 - - - - - - - - ONLINE
64 | ata-ST2000DM001-1CH164_Z1E-part2 - - - - - - - - ONLINE
65 | ```
66 |
67 | and have a look at difference of the `CAP` value (`SIZE`/`FREE` vs `ALLOC` ratio) between vdevs.
68 |
69 | ### No Deduplication
70 |
71 | Due to the working principle of this script, which essentially creates a duplicate file on purpose, deduplication will most definitely prevent it from working as intended. If you use deduplication you probably have to resort to a more expensive rebalancing method that involves additional drives.
72 |
73 | ### Data selection (cold data)
74 |
75 | Due to the working principle of this script, it is crucial that you **only run it on data that is not actively accessed**, since the original file will be deleted.
76 |
77 | ### Snapshots
78 |
79 | If you do a snapshot of the data you want to balance before starting the rebalancing script, keep in mind that ZFS now has to keep track of all of the data in the target directory twice. Once in the snapshot you made, and once for the new copy. This means that you will effectively use double the file size of all files within the target directory. Therefore it is a good idea to process the pool data in batches and remove old snapshots along the way, since you probably will be hitting the capacity limits of your pool at some point during the rebalancing process.
80 |
81 | ## Installation
82 |
83 | Since this is a simple bash script, there is no package. Simply download the script and make it executable:
84 |
85 | ```shell
86 | curl -O https://raw.githubusercontent.com/markusressel/zfs-inplace-rebalancing/master/zfs-inplace-rebalancing.sh
87 | chmod +x ./zfs-inplace-rebalancing.sh
88 | ```
89 |
90 | Dependencies:
91 | * `perl` - it should be available on most systems by default
92 | * `awk` - it should be available on most systems by default
93 |
94 | ## Usage
95 |
96 | **ALWAYS HAVE A BACKUP OF YOUR DATA!**
97 |
98 | You can print a help message by running the script without any parameters:
99 |
100 | ```shell
101 | ./zfs-inplace-rebalancing.sh
102 | ```
103 |
104 | ### Parameters
105 |
106 | | Name | Description | Default |
107 | |-----------|-------------|---------|
108 | | `-c`
`--checksum` | Whether to compare attributes and content of the copied file using an **MD5** checksum. Technically this is a redundent check and consumes a lot of resources, so think twice. | `true` |
109 | | `-p`
`--passes` | The maximum number of rebalance passes per file. Setting this to infinity by using a value `<= 0` might improve performance when rebalancing a lot of small files. | `1` |
110 | | `--debug` | Shows additional output, including listing all files in the target location 3 times (list, inode sorted list, inode groupings) and more granular move/copy/link/count transaction information. | `false` |
111 |
112 | ### Example
113 |
114 | Make sure to run this script with a user that has rw permission to all of the files in the target directory.
115 | The easiest way to achieve this is by **running the script as root**.
116 |
117 | ```shell
118 | sudo su
119 | ./zfs-inplace-rebalancing.sh --checksum true --passes 1 /pool/path/to/rebalance
120 | ```
121 |
122 | To keep track of the balancing progress, you can open another terminal and run:
123 |
124 | ```shell
125 | watch zpool list -v
126 | ```
127 |
128 | ### Log to File
129 |
130 | To write the output to a file, simply redirect stdout and stderr to a file (or separate files).
131 | Since this redirects all output, you will have to follow the contents of the log files to get realtime info:
132 |
133 | ```shell
134 | # one shell window:
135 | tail -F ./stdout.log
136 | # another shell window:
137 | ./zfs-inplace-rebalancing.sh /pool/path/to/rebalance >> ./stdout.log 2>> ./stderr.log
138 | ```
139 |
140 | ### Things to consider
141 |
142 | Although this script **does** have a progress output (files as well as percentage) it might be a good idea to try a small subfolder first, or process your pool folder layout in manually selected badges. This can also limit the damage done, if anything bad happens.
143 |
144 | When aborting the script midway through, be sure to check the last lines of its output. When cancelling before or during the renaming process a ".balance" file might be left and you have to rename (or delete) it manually.
145 |
146 | Although the `--passes` parameter can be used to limit the maximum amount of rebalance passes per file, it is only meant to speedup aborted runs. Individual files will **not be process multiple times automatically**. To reach multiple passes you have to run the script on the same target directory multiple times.
147 |
148 | ### Dockerfile
149 |
150 | To increase portability, this script can also be run using docker:
151 |
152 | ```shell
153 | sudo docker run --rm -it -v /your/data:/data ghcr.io/markusressel/zfs-inplace-rebalancing:latest ./data
154 | ```
155 |
156 | # Contributing
157 |
158 | GitHub is for social coding: if you want to write code, I encourage contributions through pull requests from forks
159 | of this repository. Create GitHub tickets for bugs and new features and comment on the ones that you are interested in.
160 |
161 | # Attributions
162 |
163 | This script was inspired by [zfs-balancer](https://github.com/programster/zfs-balancer).
164 |
165 | # Disclaimer
166 |
167 | This software is provided "as is" and "as available", without any warranty.
168 | **ALWAYS HAVE A BACKUP OF YOUR DATA!**
169 |
--------------------------------------------------------------------------------
/test/pool/abc-123_!"§$%&()[]{}=?`#'*+-_,;.:|<>~@.txt:
--------------------------------------------------------------------------------
1 | test
--------------------------------------------------------------------------------
/test/pool/projects/[2020] some project/file:
--------------------------------------------------------------------------------
1 | test
--------------------------------------------------------------------------------
/test/pool/projects/[2020] some project/file.txt:
--------------------------------------------------------------------------------
1 | test
--------------------------------------------------------------------------------
/test/pool/projects/[2020] some project/mp4.txt:
--------------------------------------------------------------------------------
1 | test
--------------------------------------------------------------------------------
/testing.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # exit script on error
4 | set -e
5 | # exit on undeclared variable
6 | set -u
7 |
8 | log_std_file=./test.log
9 | log_error_file=./error.log
10 | test_data_src=./test/pool
11 | test_pool_data_path=./testing_data
12 | test_pool_data_size_path=$test_pool_data_path/size
13 |
14 | ## Color Constants
15 |
16 | # Reset
17 | Color_Off='\033[0m' # Text Reset
18 |
19 | # Regular Colors
20 | Red='\033[0;31m' # Red
21 | Green='\033[0;32m' # Green
22 | Yellow='\033[0;33m' # Yellow
23 | Cyan='\033[0;36m' # Cyan
24 |
25 |
26 | OSName=$(echo "$OSTYPE" | tr '[:upper:]' '[:lower:]')
27 |
28 | ## Functions
29 |
30 | # print a given text entirely in a given color
31 | function color_echo () {
32 | color=$1
33 | text=$2
34 | echo -e "${color}${text}${Color_Off}"
35 | }
36 |
37 | function prepare() {
38 | # cleanup
39 | rm -f $log_std_file
40 | rm -f $log_error_file
41 | rm -f rebalance_db.txt
42 | rm -rf $test_pool_data_path
43 |
44 | # setup
45 | cp -rf $test_data_src $test_pool_data_path
46 | }
47 |
48 | # return time to the milisecond
49 | function get_time() {
50 | if [[ "${OSName}" == "darwin"* ]]; then
51 | date=$(gdate +%s%N)
52 | else
53 | date=$(date +%s%N)
54 | fi
55 | echo "$date"
56 | }
57 |
58 | function get_inode() {
59 | if [[ "${OSName}" == "darwin"* ]] || [[ "${OSName}" == "freebsd"* ]]; then
60 | inode=$(stat -f "%i" "$1")
61 | else
62 | inode=$(stat -c "%i" "$1")
63 | fi
64 |
65 | echo "$inode"
66 | }
67 |
68 | function assertions() {
69 | # check error log is empty
70 | if grep -q '[^[:space:]]' $log_error_file; then
71 | color_echo "$Red" "error log is not empty!"
72 | cat $log_error_file
73 | exit 1
74 | fi
75 | }
76 |
77 | function assert_matching_file_hardlinked() {
78 | if [[ "$(get_inode "$1")" != "$(get_inode "$2")" ]]; then
79 | echo "File '$1' was not hardlinked to '$2' when it should have been!"
80 | exit 1
81 | fi
82 | }
83 |
84 | function print_time_taken(){
85 | time_taken=$1
86 | minute=$((time_taken / 60000))
87 | seconde=$((time_taken % 60000 / 1000))
88 | miliseconde=$((time_taken % 1000))
89 | color_echo "$Yellow" "Time taken: ${minute}m ${seconde}s ${miliseconde}ms"
90 | }
91 |
92 | color_echo "$Cyan" "Running tests..."
93 |
94 | color_echo "$Cyan" "Running tests with default options..."
95 | prepare
96 | ./zfs-inplace-rebalancing.sh $test_pool_data_path >> $log_std_file 2>> $log_error_file
97 | cat $log_std_file
98 | assertions
99 | color_echo "$Green" "Tests passed!"
100 |
101 | color_echo "$Cyan" "Running tests with checksum true and 1 pass..."
102 | prepare
103 | ./zfs-inplace-rebalancing.sh --checksum true --passes 1 $test_pool_data_path >> $log_std_file 2>> $log_error_file
104 | cat $log_std_file
105 | assertions
106 | color_echo "$Green" "Tests passed!"
107 |
108 | color_echo "$Cyan" "Running tests with checksum false..."
109 | prepare
110 | ./zfs-inplace-rebalancing.sh --checksum false $test_pool_data_path >> $log_std_file 2>> $log_error_file
111 | cat $log_std_file
112 | assertions
113 | color_echo "$Green" "Tests passed!"
114 |
115 | color_echo "$Cyan" "Running tests with hardlinks..."
116 | prepare
117 | ln "$test_pool_data_path/projects/[2020] some project/mp4.txt" "$test_pool_data_path/projects/[2020] some project/mp4.txt.link"
118 | ./zfs-inplace-rebalancing.sh $test_pool_data_path >> $log_std_file 2>> $log_error_file
119 | cat $log_std_file
120 | # Both link files should be copied
121 | assert_matching_file_hardlinked "$test_pool_data_path/projects/[2020] some project/mp4.txt" "$test_pool_data_path/projects/[2020] some project/mp4.txt.link"
122 | assertions
123 | color_echo "$Green" "Tests passed!"
124 |
125 | color_echo "$Cyan" "Running tests with different file count and size..."
126 | prepare
127 |
128 | mkdir -p $test_pool_data_size_path
129 |
130 | color_echo "$Cyan" "Creating 1000 files of 1KB each..."
131 | mkdir -p $test_pool_data_size_path/small
132 | for i in {1..1000}; do
133 | dd if=/dev/urandom of=$test_pool_data_size_path/small/file_"$i".txt bs=1024 count=1 >> /dev/null 2>&1
134 | done
135 |
136 | color_echo "$Cyan" "Creating 5 file of 1GB each..."
137 | mkdir -p $test_pool_data_size_path/big
138 | for i in {1..5}; do
139 | dd if=/dev/urandom of=$test_pool_data_size_path/big/file_"$i".txt bs=1024 count=1048576 >> /dev/null 2>&1
140 | done
141 |
142 | color_echo "$Green" "Files created!"
143 |
144 | echo "Running rebalancing on small files..."
145 | # measure time taken
146 | start_time=$(get_time)
147 | ./zfs-inplace-rebalancing.sh $test_pool_data_size_path/small >> $log_std_file 2>> $log_error_file
148 | end_time=$(get_time)
149 | time_taken=$(( (end_time - start_time) / 1000000 ))
150 | print_time_taken $time_taken
151 | assertions
152 | color_echo "$Green" "Tests passed!"
153 |
154 | echo "Running rebalancing on big files..."
155 | rm -f rebalance_db.txt
156 | # measure time taken
157 | start_time=$(get_time)
158 | ./zfs-inplace-rebalancing.sh $test_pool_data_size_path/big >> $log_std_file 2>> $log_error_file
159 | end_time=$(get_time)
160 | time_taken=$(( (end_time - start_time) / 1000000 ))
161 | print_time_taken $time_taken
162 | assertions
163 | color_echo "$Green" "Tests passed!"
164 |
165 | echo "Running rebalancing on all files..."
166 | rm -f rebalance_db.txt
167 | # measure time taken
168 | start_time=$(get_time)
169 | ./zfs-inplace-rebalancing.sh $test_pool_data_size_path >> $log_std_file 2>> $log_error_file
170 | end_time=$(get_time)
171 | time_taken=$(( (end_time - start_time) / 1000000 ))
172 | print_time_taken $time_taken
173 | assertions
174 | color_echo "$Green" "Tests passed!"
175 |
176 | color_echo "$Green" "All tests passed!"
177 | color_echo "$Cyan" "Cleaning"
178 | rm -f $log_std_file
179 | rm -f $log_error_file
180 | rm -f rebalance_db.txt
181 | rm -rf $test_pool_data_path
182 |
183 |
--------------------------------------------------------------------------------
/zfs-inplace-rebalancing.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Exit script on error
4 | set -e
5 | # Exit on undeclared variable
6 | set -u
7 |
8 | # File used to track processed files
9 | rebalance_db_file_name="rebalance_db.txt"
10 |
11 | # Index used for progress
12 | current_index=0
13 |
14 | ## Color Constants
15 |
16 | # Reset
17 | Color_Off='\033[0m' # Text Reset
18 |
19 | # Regular Colors
20 | Red='\033[0;31m' # Red
21 | Green='\033[0;32m' # Green
22 | Yellow='\033[0;33m' # Yellow
23 | Cyan='\033[0;36m' # Cyan
24 |
25 | ## Functions
26 |
27 | # Print a help message
28 | function print_usage() {
29 | echo "Usage: zfs-inplace-rebalancing.sh --checksum true --passes 1 --debug false /my/pool"
30 | }
31 |
32 | # Print a given text entirely in a given color
33 | function color_echo() {
34 | color=$1
35 | text=$2
36 | echo -e "${color}${text}${Color_Off}"
37 | }
38 |
39 | # Print a given text entirely in a given color
40 | function echo_debug() {
41 | if [ "$debug_flag" = true ]; then
42 | text=$*
43 | echo "${text}"
44 | fi
45 | }
46 |
47 | function get_rebalance_count() {
48 | file_path="$1"
49 |
50 | line_nr=$(grep -xF -n "${file_path}" "./${rebalance_db_file_name}" | head -n 1 | cut -d: -f1)
51 | if [ -z "${line_nr}" ]; then
52 | echo "0"
53 | return
54 | else
55 | rebalance_count_line_nr="$((line_nr + 1))"
56 | rebalance_count=$(awk "NR == ${rebalance_count_line_nr}" "./${rebalance_db_file_name}")
57 | echo "${rebalance_count}"
58 | return
59 | fi
60 | }
61 |
62 | # Rebalance a group of files that are hardlinked together
63 | function process_inode_group() {
64 | paths=("$@")
65 | num_paths="${#paths[@]}"
66 |
67 | # Progress tracking
68 | current_index="$((current_index + 1))"
69 | progress_raw=$((current_index * 10000 / file_count))
70 | progress_percent=$(printf '%0.2f' "${progress_raw}e-2")
71 | color_echo "${Cyan}" "Progress -- Files: ${current_index}/${file_count} (${progress_percent}%)"
72 |
73 | echo_debug "Processing inode group with ${num_paths} paths:"
74 | for path in "${paths[@]}"; do
75 | echo_debug " - $path"
76 | done
77 |
78 | # Check rebalance counts for all files
79 | should_skip=false
80 | for path in "${paths[@]}"; do
81 | rebalance_count=$(get_rebalance_count "${path}")
82 | if [ "${rebalance_count}" -ge "${passes_flag}" ]; then
83 | should_skip=true
84 | break
85 | fi
86 | done
87 |
88 | if [ "${should_skip}" = true ]; then
89 | if [ "${num_paths}" -gt 1 ]; then
90 | color_echo "${Yellow}" "Rebalance count (${passes_flag}) reached, skipping group: ${paths[*]}"
91 | else
92 | color_echo "${Yellow}" "Rebalance count (${passes_flag}) reached, skipping: ${paths[0]}"
93 | fi
94 | return
95 | fi
96 |
97 | main_file="${paths[0]}"
98 |
99 | # Check if main_file exists
100 | if [[ ! -f "${main_file}" ]]; then
101 | color_echo "${Yellow}" "File is missing, skipping: ${main_file}"
102 | return
103 | fi
104 |
105 | tmp_extension=".balance"
106 | tmp_file_path="${main_file}${tmp_extension}"
107 |
108 | echo "Copying '${main_file}' to '${tmp_file_path}'..."
109 | echo_debug "Executing copy command:"
110 |
111 | if [[ "${OSName}" == "linux-gnu"* ]]; then
112 | # Linux
113 |
114 | # --reflink=never -- force standard copy (see ZFS Block Cloning)
115 | # -a -- keep attributes, includes -d -- keep symlinks (dont copy target) and
116 | # -p -- preserve ACLs to
117 | # -x -- stay on one system
118 | cmd=(cp --reflink=never -ax "${main_file}" "${tmp_file_path}")
119 | echo_debug "${cmd[@]}"
120 | "${cmd[@]}"
121 | elif [[ "${OSName}" == "darwin"* ]] || [[ "${OSName}" == "freebsd"* ]]; then
122 | # Mac OS and FreeBSD
123 |
124 | # -a -- Archive mode. Same as -RpP. Includes preservation of modification
125 | # time, access time, file flags, file mode, ACL, user ID, and group
126 | # ID, as allowed by permissions.
127 | # -x -- File system mount points are not traversed.
128 | cmd=(cp -ax "${main_file}" "${tmp_file_path}")
129 | echo_debug "${cmd[@]}"
130 | "${cmd[@]}"
131 | else
132 | echo "Unsupported OS type: $OSTYPE"
133 | exit 1
134 | fi
135 |
136 | # Compare copy against original to make sure nothing went wrong
137 | if [[ "${checksum_flag}" == "true"* ]]; then
138 | echo "Comparing copy against original..."
139 | if [[ "${OSName}" == "linux-gnu"* ]]; then
140 | # Linux
141 |
142 | # file attributes
143 | original_perms=$(lsattr "${main_file}")
144 | # remove anything after the last space
145 | original_perms=${original_perms% *}
146 | # file permissions, owner, group, size, modification time
147 | original_perms="${original_perms} $(stat -c "%A %U %G %s %Y" "${main_file}")"
148 |
149 |
150 | # file attributes
151 | copy_perms=$(lsattr "${tmp_file_path}")
152 | # remove anything after the last space
153 | copy_perms=${copy_perms% *}
154 | # file permissions, owner, group, size, modification time
155 | copy_perms="${copy_perms} $(stat -c "%A %U %G %s %Y" "${tmp_file_path}")"
156 | elif [[ "${OSName}" == "darwin"* ]] || [[ "${OSName}" == "freebsd"* ]]; then
157 | # Mac OS
158 | # FreeBSD
159 |
160 | # note: no lsattr on Mac OS or FreeBSD
161 |
162 | # file permissions, owner, group size, modification time
163 | original_perms="$(stat -f "%Sp %Su %Sg %z %m" "${main_file}")"
164 |
165 | # file permissions, owner, group size, modification time
166 | copy_perms="$(stat -f "%Sp %Su %Sg %z %m" "${tmp_file_path}")"
167 | else
168 | echo "Unsupported OS type: $OSTYPE"
169 | exit 1
170 | fi
171 |
172 | echo_debug "Original perms: $original_perms"
173 | echo_debug "Copy perms: $copy_perms"
174 |
175 | if [[ "${original_perms}" == "${copy_perms}"* ]]; then
176 | color_echo "${Green}" "Attribute and permission check OK"
177 | else
178 | color_echo "${Red}" "Attribute and permission check FAILED: ${original_perms} != ${copy_perms}"
179 | exit 1
180 | fi
181 |
182 | if cmp -s "${main_file}" "${tmp_file_path}"; then
183 | color_echo "${Green}" "File content check OK"
184 | else
185 | color_echo "${Red}" "File content check FAILED"
186 | exit 1
187 | fi
188 | fi
189 |
190 | echo "Removing original files..."
191 | for path in "${paths[@]}"; do
192 | echo_debug "Removing $path"
193 | rm "${path}"
194 | done
195 |
196 | echo "Renaming temporary copy to original '${main_file}'..."
197 | echo_debug "Moving ${tmp_file_path} to ${main_file}"
198 | mv "${tmp_file_path}" "${main_file}"
199 |
200 | # Only recreate hardlinks if there are multiple paths
201 | if [ "${num_paths}" -gt 1 ]; then
202 | echo "Recreating hardlinks..."
203 | for (( i=1; i<${#paths[@]}; i++ )); do
204 | echo_debug "Linking ${main_file} to ${paths[$i]}"
205 | ln "${main_file}" "${paths[$i]}"
206 | done
207 | fi
208 |
209 | if [ "${passes_flag}" -ge 1 ]; then
210 | # Update rebalance "database" for all files
211 | for path in "${paths[@]}"; do
212 | line_nr=$(grep -xF -n "${path}" "./${rebalance_db_file_name}" | head -n 1 | cut -d: -f1)
213 | if [ -z "${line_nr}" ]; then
214 | rebalance_count=1
215 | echo "${path}" >> "./${rebalance_db_file_name}"
216 | echo "${rebalance_count}" >> "./${rebalance_db_file_name}"
217 | else
218 | rebalance_count_line_nr="$((line_nr + 1))"
219 | rebalance_count=$(awk "NR == ${rebalance_count_line_nr}" "./${rebalance_db_file_name}")
220 | rebalance_count="$((rebalance_count + 1))"
221 | echo_debug "Updating rebalance count for ${path} to ${rebalance_count}"
222 | sed -i "${rebalance_count_line_nr}s/.*/${rebalance_count}/" "./${rebalance_db_file_name}"
223 | fi
224 | done
225 | fi
226 | }
227 |
228 | checksum_flag='true'
229 | passes_flag='1'
230 | debug_flag='false'
231 |
232 | if [[ "$#" -eq 0 ]]; then
233 | print_usage
234 | exit 0
235 | fi
236 |
237 | while true; do
238 | case "$1" in
239 | -h | --help)
240 | print_usage
241 | exit 0
242 | ;;
243 | -c | --checksum)
244 | if [[ "$2" == 1 || "$2" =~ (on|true|yes) ]]; then
245 | checksum_flag="true"
246 | else
247 | checksum_flag="false"
248 | fi
249 | shift 2
250 | ;;
251 | -p | --passes)
252 | passes_flag=$2
253 | shift 2
254 | ;;
255 | --debug)
256 | if [[ "$2" == 1 || "$2" =~ (on|true|yes) ]]; then
257 | debug_flag="true"
258 | else
259 | debug_flag="false"
260 | fi
261 | shift 2
262 | ;;
263 | *)
264 | break
265 | ;;
266 | esac
267 | done
268 |
269 | root_path=$1
270 |
271 | OSName=$(echo "$OSTYPE" | tr '[:upper:]' '[:lower:]')
272 |
273 | color_echo "$Cyan" "Start rebalancing $(date):"
274 | color_echo "$Cyan" " Path: ${root_path}"
275 | color_echo "$Cyan" " Rebalancing Passes: ${passes_flag}"
276 | color_echo "$Cyan" " Use Checksum: ${checksum_flag}"
277 | color_echo "$Cyan" " Debug Mode: ${debug_flag}"
278 |
279 | # Generate files_list.txt with device and inode numbers using stat, separated by a pipe '|'
280 | if [[ "${OSName}" == "linux-gnu"* ]]; then
281 | # Linux
282 | find "$root_path" -type f -not -path '*/.zfs/*' -exec stat --printf '%d:%i|%n\n' {} \; > files_list.txt
283 | elif [[ "${OSName}" == "darwin"* ]] || [[ "${OSName}" == "freebsd"* ]]; then
284 | # Mac OS and FreeBSD
285 | find "$root_path" -type f -not -path '*/.zfs/*' -exec stat -f "%d:%i|%N" {} \; > files_list.txt
286 | else
287 | echo "Unsupported OS type: $OSTYPE"
288 | exit 1
289 | fi
290 |
291 | echo_debug "Contents of files_list.txt:"
292 | if [ "$debug_flag" = true ]; then
293 | cat files_list.txt
294 | fi
295 |
296 | # Sort files_list.txt by device and inode number
297 | sort -t '|' -k1,1 files_list.txt > sorted_files_list.txt
298 |
299 | echo_debug "Contents of sorted_files_list.txt:"
300 | if [ "$debug_flag" = true ]; then
301 | cat sorted_files_list.txt
302 | fi
303 |
304 | # Use awk to group paths by inode key and handle spaces in paths
305 | awk -F'|' '{
306 | key = $1
307 | path = substr($0, length(key)+2)
308 | if (key == prev_key) {
309 | print "\t" path
310 | } else {
311 | if (NR > 1) {
312 | # Do nothing
313 | }
314 | print key
315 | print "\t" path
316 | prev_key = key
317 | }
318 | }' sorted_files_list.txt > grouped_inodes.txt
319 |
320 | echo_debug "Contents of grouped_inodes.txt:"
321 | if [ "$debug_flag" = true ]; then
322 | cat grouped_inodes.txt
323 | fi
324 |
325 | # Count number of inode groups
326 | file_count=$(grep -c '^\w' grouped_inodes.txt)
327 |
328 | color_echo "$Cyan" " Number of files to process: ${file_count}"
329 |
330 | # Initialize current_index
331 | current_index=0
332 |
333 | # Create db file
334 | if [ "${passes_flag}" -ge 1 ]; then
335 | touch "./${rebalance_db_file_name}"
336 | fi
337 |
338 | paths=()
339 |
340 | # Read grouped_inodes.txt line by line
341 | while IFS= read -r line; do
342 | if [[ "$line" == $'\t'* ]]; then
343 | # This is a path line
344 | path="${line#$'\t'}"
345 | paths+=("$path")
346 | else
347 | # This is a new inode key
348 | if [[ "${#paths[@]}" -gt 0 ]]; then
349 | # Process the previous group
350 | process_inode_group "${paths[@]}"
351 | fi
352 | paths=()
353 | fi
354 | done < grouped_inodes.txt
355 |
356 | # Process the last group after the loop ends
357 | if [[ "${#paths[@]}" -gt 0 ]]; then
358 | process_inode_group "${paths[@]}"
359 | fi
360 |
361 | # Clean up temporary files
362 | rm files_list.txt sorted_files_list.txt grouped_inodes.txt
363 |
364 | echo ""
365 | echo ""
366 | color_echo "$Green" "Done!"
367 |
--------------------------------------------------------------------------------