├── .gitlab-ci.yml ├── CHANGELOG ├── COPYING ├── Dockerfile ├── INSTALL.md ├── README.md ├── bin └── btrfs.static ├── ci └── gitlab │ ├── Dockerfile │ ├── btrfs-progs-tests.service │ ├── build_btrfsprogs_dduper.sh │ ├── kernel_build.sh │ ├── run_tests.sh │ ├── setup_image.sh │ ├── setup_repos.sh │ └── tests │ ├── basic_dir_recurse_test.sh │ ├── basic_dir_test.sh │ ├── basic_sanity_csum.sh │ ├── basic_sanity_dumpcsum.sh │ ├── dataset.py │ ├── docker_test.sh │ └── fast_mode.sh ├── dduper ├── patch ├── btrfs-progs-v5.12.1 │ └── 0001-Print-csum-for-a-given-file-on-stdout.patch ├── btrfs-progs-v5.16 │ └── 0001-Print-csum-for-a-given-file-on-stdout.patch ├── btrfs-progs-v5.18 │ └── 0001-Print-csum-for-a-given-file-on-stdout.patch ├── btrfs-progs-v5.6.1 │ └── 0001-Print-csum-for-a-given-file-on-stdout.patch ├── btrfs-progs-v5.9 │ └── 0001-Print-csum-for-a-given-file-on-stdout.patch ├── btrfs-progs-v6.1 │ └── 0001-Print-csum-for-a-given-file-on-stdout.patch ├── btrfs-progs-v6.11 │ └── 0001-Print-csum-for-a-given-file-on-stdout.patch └── btrfs-progs-v6.3.3 │ └── 0001-Print-csum-for-a-given-file-on-stdout.patch ├── requirements.txt └── tests ├── TESTS.md ├── test.py └── verify.sh /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | # This program is free software; you can redistribute it and/or 2 | # modify it under the terms of the GNU General Public 3 | # License v2 as published by the Free Software Foundation. 4 | # 5 | # This program is distributed in the hope that it will be useful, 6 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 7 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 8 | # General Public License for more details. 9 | # 10 | # You should have received a copy of the GNU General Public 11 | # License along with this program; if not, write to the 12 | # Free Software Foundation, Inc., 59 Temple Place - Suite 330, 13 | # Boston, MA 021110-1307, USA. 14 | # 15 | 16 | image: docker:20-dind 17 | 18 | services: 19 | - name: docker:20-dind 20 | alias: docker 21 | command: ["--tls=false"] 22 | 23 | # To enable Kernel Build, set GitLab Environment variable named BUILD_KERNEL with value "yes" 24 | # If you disable Kernel Build, make sure Environment variable PREBUILT_KERNEL_ID points to previously built the kernel job id. 25 | # To enable image build, set Environment variable BUILD_IMAGE with value "yes" 26 | # If you disable Image Build, make sure Environment variable PREBUILT_IMAGE_ID points to previously built rootfs job id. 27 | # See https://gitlab.com/help/ci/variables/README#custom-environment-variables 28 | 29 | variables: 30 | DOCKER_DRIVER: overlay2 31 | DOCKER_HOST: tcp://docker:2375/ 32 | DOCKER_TLS_CERTDIR: "" 33 | IMAGE_TAG: registry.gitlab.com/$CI_PROJECT_NAMESPACE/dduper:qemu 34 | VERSION_IMAGE_TAG: registry.gitlab.com/$CI_PROJECT_NAMESPACE/dduper:V0.03 35 | DOCKER_IMG: laks/dduper:build 36 | 37 | stages: 38 | - setup 39 | - pull 40 | - build 41 | - test 42 | 43 | before_script: 44 | - echo "BUILD KERNEL - $BUILD_KERNEL" 45 | - echo "BUILD IMAGE - $BUILD_IMAGE" 46 | - echo "PREBUILT_KERNEL_ID - $PREBUILT_KERNEL_ID" 47 | - echo "PREBUILT_IMAGE_ID - $PREBUILT_IMAGE_ID" 48 | - docker login --username $CI_REGISTRY_USER --password $CI_REGISTRY_PASSWORD $CI_REGISTRY 49 | # - rm -rf ci/gitlab/tests/dduper.log ci/gitlab/tests/dduper.db 50 | 51 | docker build: 52 | stage: setup 53 | script: 54 | - cd ci/gitlab 55 | - docker pull $IMAGE_TAG > /dev/null && echo "Downloaded image" || ( docker build -t $IMAGE_TAG . && docker push $IMAGE_TAG ) 56 | 57 | docker publish: 58 | stage: setup 59 | only: 60 | - dockerbuild 61 | script: 62 | - echo "Docker image for hub" 63 | - docker build -t $VERSION_IMAGE_TAG . && docker push $VERSION_IMAGE_TAG 64 | - docker tag $VERSION_IMAGE_TAG $DOCKER_IMG 65 | - docker logout 66 | - docker images 67 | - docker login --username $D_USERNAME --password $D_PASSWORD $D_REGISTRY 68 | - docker push $DOCKER_IMG 69 | 70 | kernel build: 71 | before_script: 72 | - apk add curl unzip 73 | stage: setup 74 | script: 75 | - if [ "$BUILD_KERNEL" == "yes" ]; then 76 | docker run --cap-add SYS_PTRACE --cap-add sys_admin --privileged --device=/dev/kvm -v $PWD:/repo $IMAGE_TAG /repo/ci/gitlab/kernel_build.sh; 77 | else 78 | curl -o bzImage.zip --location --header "JOB-TOKEN:$CI_JOB_TOKEN" "https://gitlab.com/api/v4/projects/$CI_PROJECT_ID/jobs/$PREBUILT_KERNEL_ID/artifacts" && unzip bzImage.zip; 79 | fi; 80 | artifacts: 81 | when: always 82 | paths: 83 | - bzImage 84 | 85 | image build: 86 | before_script: 87 | - apk add curl unzip 88 | stage: setup 89 | script: 90 | - if [ "$BUILD_IMAGE" == "yes" ]; then 91 | docker run --cap-add SYS_PTRACE --cap-add sys_admin --privileged --device=/dev/kvm -v $PWD:/repo $IMAGE_TAG /repo/ci/gitlab/setup_image.sh; 92 | else 93 | curl -o qemu-image.img.zip --location --header "JOB-TOKEN:$CI_JOB_TOKEN" "https://gitlab.com/api/v4/projects/$CI_PROJECT_ID/jobs/$PREBUILT_IMAGE_ID/artifacts" && unzip qemu-image.img.zip; 94 | fi; 95 | artifacts: 96 | when: always 97 | paths: 98 | - qemu-image.img 99 | 100 | Setup repos: 101 | before_script: 102 | - apk add curl unzip 103 | stage: pull 104 | script: 105 | - docker run --cap-add SYS_PTRACE --cap-add sys_admin --privileged --device=/dev/kvm -v $PWD:/repo $IMAGE_TAG /repo/ci/gitlab/setup_repos.sh $CI_COMMIT_REF_NAME 106 | artifacts: 107 | when: always 108 | paths: 109 | - qemu-image.img 110 | 111 | btrfs-progs-csum-patch build: 112 | stage: build 113 | script: 114 | - echo "build_with_patch" > $PWD/cmd 115 | - docker run --cap-add SYS_PTRACE --cap-add sys_admin --privileged --device=/dev/kvm -v $PWD:/repo $IMAGE_TAG /repo/ci/gitlab/run_tests.sh 116 | - test -e "build_pass.txt" || exit 1 117 | artifacts: 118 | expire_in: 1 week 119 | when: always 120 | paths: 121 | - qemu-image.img 122 | 123 | fast mode check: 124 | stage: test 125 | script: 126 | - echo "./ci/gitlab/tests/fast_mode.sh fastmode" > $PWD/cmd 127 | - docker run --cap-add SYS_PTRACE --cap-add sys_admin --privileged --device=/dev/kvm -v $PWD:/repo $IMAGE_TAG /repo/ci/gitlab/run_tests.sh 128 | - test -e "fastmode_pass.txt" || exit 1 129 | 130 | dump-csum check: 131 | stage: test 132 | allow_failure: true 133 | script: 134 | - echo "./ci/gitlab/tests/basic_sanity_dumpcsum.sh xxhash" > $PWD/cmd 135 | - docker run --cap-add SYS_PTRACE --cap-add sys_admin --privileged --device=/dev/kvm -v $PWD:/repo $IMAGE_TAG /repo/ci/gitlab/run_tests.sh 136 | - test -e "xxhash_pass.txt" || exit 1 137 | artifacts: 138 | expire_in: 1 week 139 | when: always 140 | paths: 141 | - random1 142 | - random2 143 | 144 | crc2 sanity check: 145 | stage: test 146 | script: 147 | - ls -lR 148 | - echo "./ci/gitlab/tests/basic_sanity_csum.sh crc32" > $PWD/cmd 149 | - docker run --cap-add SYS_PTRACE --cap-add sys_admin --privileged --device=/dev/kvm -v $PWD:/repo $IMAGE_TAG /repo/ci/gitlab/run_tests.sh 150 | - test -e "crc32_pass.txt" || exit 1 151 | 152 | xxhash sanity check: 153 | stage: test 154 | allow_failure: true 155 | script: 156 | - ls -lR 157 | - echo "./ci/gitlab/tests/basic_sanity_csum.sh xxhash" > $PWD/cmd 158 | - docker run --cap-add SYS_PTRACE --cap-add sys_admin --privileged --device=/dev/kvm -v $PWD:/repo $IMAGE_TAG /repo/ci/gitlab/run_tests.sh 159 | - test -e "xxhash_pass.txt" || exit 1 160 | 161 | blake2 sanity check: 162 | stage: test 163 | allow_failure: true 164 | script: 165 | - echo "./ci/gitlab/tests/basic_sanity_csum.sh blake2" > $PWD/cmd 166 | - docker run --cap-add SYS_PTRACE --cap-add sys_admin --privileged --device=/dev/kvm -v $PWD:/repo $IMAGE_TAG /repo/ci/gitlab/run_tests.sh 167 | - test -e "blake2_pass.txt" || exit 1 168 | 169 | sha256 sanity check: 170 | stage: test 171 | allow_failure: true 172 | script: 173 | - echo "./ci/gitlab/tests/basic_sanity_csum.sh sha256" > $PWD/cmd 174 | - docker run --cap-add SYS_PTRACE --cap-add sys_admin --privileged --device=/dev/kvm -v $PWD:/repo $IMAGE_TAG /repo/ci/gitlab/run_tests.sh 175 | - test -e "sha256_pass.txt" || exit 1 176 | 177 | dir test: 178 | stage: test 179 | script: 180 | - echo "./ci/gitlab/tests/basic_dir_test.sh dirtest" > $PWD/cmd 181 | - docker run --cap-add SYS_PTRACE --cap-add sys_admin --privileged --device=/dev/kvm -v $PWD:/repo $IMAGE_TAG /repo/ci/gitlab/run_tests.sh 182 | - test -e "dirtest_pass.txt" || exit 1 183 | 184 | dir-recurse test: 185 | stage: test 186 | script: 187 | - echo "./ci/gitlab/tests/basic_dir_recurse_test.sh dirr" > $PWD/cmd 188 | - docker run --cap-add SYS_PTRACE --cap-add sys_admin --privileged --device=/dev/kvm -v $PWD:/repo $IMAGE_TAG /repo/ci/gitlab/run_tests.sh 189 | - test -e "dirr_pass.txt" || exit 1 190 | 191 | docker-image test: 192 | stage: test 193 | script: 194 | - docker run --cap-add MKNOD --cap-add SYS_PTRACE --cap-add sys_admin --privileged --device=/dev/kvm -v $PWD:/repo $VERSION_IMAGE_TAG /repo/ci/gitlab/tests/docker_test.sh 195 | - echo "TODO- Verify results" 196 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | v0.04: 18-09-2020 2 | - Added xxhash64, blake2, sha256 support 3 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | 2 | GNU GENERAL PUBLIC LICENSE 3 | Version 2, June 1991 4 | 5 | Copyright (C) 1989, 1991 Free Software Foundation, Inc. 6 | 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 7 | Everyone is permitted to copy and distribute verbatim copies 8 | of this license document, but changing it is not allowed. 9 | 10 | Preamble 11 | 12 | The licenses for most software are designed to take away your 13 | freedom to share and change it. By contrast, the GNU General Public 14 | License is intended to guarantee your freedom to share and change free 15 | software--to make sure the software is free for all its users. This 16 | General Public License applies to most of the Free Software 17 | Foundation's software and to any other program whose authors commit to 18 | using it. (Some other Free Software Foundation software is covered by 19 | the GNU Library General Public License instead.) You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | this service if you wish), that you receive source code or can get it 26 | if you want it, that you can change the software or use pieces of it 27 | in new free programs; and that you know you can do these things. 28 | 29 | To protect your rights, we need to make restrictions that forbid 30 | anyone to deny you these rights or to ask you to surrender the rights. 31 | These restrictions translate to certain responsibilities for you if you 32 | distribute copies of the software, or if you modify it. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must give the recipients all the rights that 36 | you have. You must make sure that they, too, receive or can get the 37 | source code. And you must show them these terms so they know their 38 | rights. 39 | 40 | We protect your rights with two steps: (1) copyright the software, and 41 | (2) offer you this license which gives you legal permission to copy, 42 | distribute and/or modify the software. 43 | 44 | Also, for each author's protection and ours, we want to make certain 45 | that everyone understands that there is no warranty for this free 46 | software. If the software is modified by someone else and passed on, we 47 | want its recipients to know that what they have is not the original, so 48 | that any problems introduced by others will not reflect on the original 49 | authors' reputations. 50 | 51 | Finally, any free program is threatened constantly by software 52 | patents. We wish to avoid the danger that redistributors of a free 53 | program will individually obtain patent licenses, in effect making the 54 | program proprietary. To prevent this, we have made it clear that any 55 | patent must be licensed for everyone's free use or not licensed at all. 56 | 57 | The precise terms and conditions for copying, distribution and 58 | modification follow. 59 | 60 | GNU GENERAL PUBLIC LICENSE 61 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 62 | 63 | 0. This License applies to any program or other work which contains 64 | a notice placed by the copyright holder saying it may be distributed 65 | under the terms of this General Public License. The "Program", below, 66 | refers to any such program or work, and a "work based on the Program" 67 | means either the Program or any derivative work under copyright law: 68 | that is to say, a work containing the Program or a portion of it, 69 | either verbatim or with modifications and/or translated into another 70 | language. (Hereinafter, translation is included without limitation in 71 | the term "modification".) Each licensee is addressed as "you". 72 | 73 | Activities other than copying, distribution and modification are not 74 | covered by this License; they are outside its scope. The act of 75 | running the Program is not restricted, and the output from the Program 76 | is covered only if its contents constitute a work based on the 77 | Program (independent of having been made by running the Program). 78 | Whether that is true depends on what the Program does. 79 | 80 | 1. You may copy and distribute verbatim copies of the Program's 81 | source code as you receive it, in any medium, provided that you 82 | conspicuously and appropriately publish on each copy an appropriate 83 | copyright notice and disclaimer of warranty; keep intact all the 84 | notices that refer to this License and to the absence of any warranty; 85 | and give any other recipients of the Program a copy of this License 86 | along with the Program. 87 | 88 | You may charge a fee for the physical act of transferring a copy, and 89 | you may at your option offer warranty protection in exchange for a fee. 90 | 91 | 2. You may modify your copy or copies of the Program or any portion 92 | of it, thus forming a work based on the Program, and copy and 93 | distribute such modifications or work under the terms of Section 1 94 | above, provided that you also meet all of these conditions: 95 | 96 | a) You must cause the modified files to carry prominent notices 97 | stating that you changed the files and the date of any change. 98 | 99 | b) You must cause any work that you distribute or publish, that in 100 | whole or in part contains or is derived from the Program or any 101 | part thereof, to be licensed as a whole at no charge to all third 102 | parties under the terms of this License. 103 | 104 | c) If the modified program normally reads commands interactively 105 | when run, you must cause it, when started running for such 106 | interactive use in the most ordinary way, to print or display an 107 | announcement including an appropriate copyright notice and a 108 | notice that there is no warranty (or else, saying that you provide 109 | a warranty) and that users may redistribute the program under 110 | these conditions, and telling the user how to view a copy of this 111 | License. (Exception: if the Program itself is interactive but 112 | does not normally print such an announcement, your work based on 113 | the Program is not required to print an announcement.) 114 | 115 | These requirements apply to the modified work as a whole. If 116 | identifiable sections of that work are not derived from the Program, 117 | and can be reasonably considered independent and separate works in 118 | themselves, then this License, and its terms, do not apply to those 119 | sections when you distribute them as separate works. But when you 120 | distribute the same sections as part of a whole which is a work based 121 | on the Program, the distribution of the whole must be on the terms of 122 | this License, whose permissions for other licensees extend to the 123 | entire whole, and thus to each and every part regardless of who wrote it. 124 | 125 | Thus, it is not the intent of this section to claim rights or contest 126 | your rights to work written entirely by you; rather, the intent is to 127 | exercise the right to control the distribution of derivative or 128 | collective works based on the Program. 129 | 130 | In addition, mere aggregation of another work not based on the Program 131 | with the Program (or with a work based on the Program) on a volume of 132 | a storage or distribution medium does not bring the other work under 133 | the scope of this License. 134 | 135 | 3. You may copy and distribute the Program (or a work based on it, 136 | under Section 2) in object code or executable form under the terms of 137 | Sections 1 and 2 above provided that you also do one of the following: 138 | 139 | a) Accompany it with the complete corresponding machine-readable 140 | source code, which must be distributed under the terms of Sections 141 | 1 and 2 above on a medium customarily used for software interchange; or, 142 | 143 | b) Accompany it with a written offer, valid for at least three 144 | years, to give any third party, for a charge no more than your 145 | cost of physically performing source distribution, a complete 146 | machine-readable copy of the corresponding source code, to be 147 | distributed under the terms of Sections 1 and 2 above on a medium 148 | customarily used for software interchange; or, 149 | 150 | c) Accompany it with the information you received as to the offer 151 | to distribute corresponding source code. (This alternative is 152 | allowed only for noncommercial distribution and only if you 153 | received the program in object code or executable form with such 154 | an offer, in accord with Subsection b above.) 155 | 156 | The source code for a work means the preferred form of the work for 157 | making modifications to it. For an executable work, complete source 158 | code means all the source code for all modules it contains, plus any 159 | associated interface definition files, plus the scripts used to 160 | control compilation and installation of the executable. However, as a 161 | special exception, the source code distributed need not include 162 | anything that is normally distributed (in either source or binary 163 | form) with the major components (compiler, kernel, and so on) of the 164 | operating system on which the executable runs, unless that component 165 | itself accompanies the executable. 166 | 167 | If distribution of executable or object code is made by offering 168 | access to copy from a designated place, then offering equivalent 169 | access to copy the source code from the same place counts as 170 | distribution of the source code, even though third parties are not 171 | compelled to copy the source along with the object code. 172 | 173 | 4. You may not copy, modify, sublicense, or distribute the Program 174 | except as expressly provided under this License. Any attempt 175 | otherwise to copy, modify, sublicense or distribute the Program is 176 | void, and will automatically terminate your rights under this License. 177 | However, parties who have received copies, or rights, from you under 178 | this License will not have their licenses terminated so long as such 179 | parties remain in full compliance. 180 | 181 | 5. You are not required to accept this License, since you have not 182 | signed it. However, nothing else grants you permission to modify or 183 | distribute the Program or its derivative works. These actions are 184 | prohibited by law if you do not accept this License. Therefore, by 185 | modifying or distributing the Program (or any work based on the 186 | Program), you indicate your acceptance of this License to do so, and 187 | all its terms and conditions for copying, distributing or modifying 188 | the Program or works based on it. 189 | 190 | 6. Each time you redistribute the Program (or any work based on the 191 | Program), the recipient automatically receives a license from the 192 | original licensor to copy, distribute or modify the Program subject to 193 | these terms and conditions. You may not impose any further 194 | restrictions on the recipients' exercise of the rights granted herein. 195 | You are not responsible for enforcing compliance by third parties to 196 | this License. 197 | 198 | 7. If, as a consequence of a court judgment or allegation of patent 199 | infringement or for any other reason (not limited to patent issues), 200 | conditions are imposed on you (whether by court order, agreement or 201 | otherwise) that contradict the conditions of this License, they do not 202 | excuse you from the conditions of this License. If you cannot 203 | distribute so as to satisfy simultaneously your obligations under this 204 | License and any other pertinent obligations, then as a consequence you 205 | may not distribute the Program at all. For example, if a patent 206 | license would not permit royalty-free redistribution of the Program by 207 | all those who receive copies directly or indirectly through you, then 208 | the only way you could satisfy both it and this License would be to 209 | refrain entirely from distribution of the Program. 210 | 211 | If any portion of this section is held invalid or unenforceable under 212 | any particular circumstance, the balance of the section is intended to 213 | apply and the section as a whole is intended to apply in other 214 | circumstances. 215 | 216 | It is not the purpose of this section to induce you to infringe any 217 | patents or other property right claims or to contest validity of any 218 | such claims; this section has the sole purpose of protecting the 219 | integrity of the free software distribution system, which is 220 | implemented by public license practices. Many people have made 221 | generous contributions to the wide range of software distributed 222 | through that system in reliance on consistent application of that 223 | system; it is up to the author/donor to decide if he or she is willing 224 | to distribute software through any other system and a licensee cannot 225 | impose that choice. 226 | 227 | This section is intended to make thoroughly clear what is believed to 228 | be a consequence of the rest of this License. 229 | 230 | 8. If the distribution and/or use of the Program is restricted in 231 | certain countries either by patents or by copyrighted interfaces, the 232 | original copyright holder who places the Program under this License 233 | may add an explicit geographical distribution limitation excluding 234 | those countries, so that distribution is permitted only in or among 235 | countries not thus excluded. In such case, this License incorporates 236 | the limitation as if written in the body of this License. 237 | 238 | 9. The Free Software Foundation may publish revised and/or new versions 239 | of the General Public License from time to time. Such new versions will 240 | be similar in spirit to the present version, but may differ in detail to 241 | address new problems or concerns. 242 | 243 | Each version is given a distinguishing version number. If the Program 244 | specifies a version number of this License which applies to it and "any 245 | later version", you have the option of following the terms and conditions 246 | either of that version or of any later version published by the Free 247 | Software Foundation. If the Program does not specify a version number of 248 | this License, you may choose any version ever published by the Free Software 249 | Foundation. 250 | 251 | 10. If you wish to incorporate parts of the Program into other free 252 | programs whose distribution conditions are different, write to the author 253 | to ask for permission. For software which is copyrighted by the Free 254 | Software Foundation, write to the Free Software Foundation; we sometimes 255 | make exceptions for this. Our decision will be guided by the two goals 256 | of preserving the free status of all derivatives of our free software and 257 | of promoting the sharing and reuse of software generally. 258 | 259 | NO WARRANTY 260 | 261 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 262 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 263 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 264 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 265 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 266 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 267 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 268 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 269 | REPAIR OR CORRECTION. 270 | 271 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 272 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 273 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 274 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 275 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 276 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 277 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 278 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 279 | POSSIBILITY OF SUCH DAMAGES. 280 | 281 | END OF TERMS AND CONDITIONS 282 | 283 | How to Apply These Terms to Your New Programs 284 | 285 | If you develop a new program, and you want it to be of the greatest 286 | possible use to the public, the best way to achieve this is to make it 287 | free software which everyone can redistribute and change under these terms. 288 | 289 | To do so, attach the following notices to the program. It is safest 290 | to attach them to the start of each source file to most effectively 291 | convey the exclusion of warranty; and each file should have at least 292 | the "copyright" line and a pointer to where the full notice is found. 293 | 294 | 295 | Copyright (C) 296 | 297 | This program is free software; you can redistribute it and/or modify 298 | it under the terms of the GNU General Public License as published by 299 | the Free Software Foundation; either version 2 of the License, or 300 | (at your option) any later version. 301 | 302 | This program is distributed in the hope that it will be useful, 303 | but WITHOUT ANY WARRANTY; without even the implied warranty of 304 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 305 | GNU General Public License for more details. 306 | 307 | You should have received a copy of the GNU General Public License 308 | along with this program; if not, write to the Free Software 309 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 310 | 311 | 312 | Also add information on how to contact you by electronic and paper mail. 313 | 314 | If the program is interactive, make it output a short notice like this 315 | when it starts in an interactive mode: 316 | 317 | Gnomovision version 69, Copyright (C) year name of author 318 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 319 | This is free software, and you are welcome to redistribute it 320 | under certain conditions; type `show c' for details. 321 | 322 | The hypothetical commands `show w' and `show c' should show the appropriate 323 | parts of the General Public License. Of course, the commands you use may 324 | be called something other than `show w' and `show c'; they could even be 325 | mouse-clicks or menu items--whatever suits your program. 326 | 327 | You should also get your employer (if you work as a programmer) or your 328 | school, if any, to sign a "copyright disclaimer" for the program, if 329 | necessary. Here is a sample; alter the names: 330 | 331 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 332 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 333 | 334 | , 1 April 1989 335 | Ty Coon, President of Vice 336 | 337 | This General Public License does not permit incorporating your program into 338 | proprietary programs. If your program is a subroutine library, you may 339 | consider it more useful to permit linking proprietary applications with the 340 | library. If this is what you want to do, use the GNU Library General 341 | Public License instead of this License. 342 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:bullseye-slim AS build 2 | MAINTAINER Lakshmipathi.G 3 | 4 | # Install build dependencies. 5 | RUN apt-get update && apt-get install -y --no-install-recommends autoconf automake gcc \ 6 | make pkg-config e2fslibs-dev libblkid-dev zlib1g-dev liblzo2-dev \ 7 | python3-dev libzstd-dev python3-pip python3-setuptools patch 8 | 9 | # Clone btrfs-progs repo 10 | ADD --checksum=sha256:e6512ff305963bc68f11803fa759fecbead778a3a951aeb4f7f3f76dabb31db4 https://github.com/kdave/btrfs-progs/archive/refs/tags/v6.1.3.tar.gz /btrfs-progs.tar.gz 11 | 12 | COPY patch/btrfs-progs-v6.1 /patch 13 | 14 | # Apply csum patch 15 | WORKDIR /btrfs-progs 16 | RUN tar --strip-components 1 -xzf /btrfs-progs.tar.gz && \ 17 | patch -p1 < /patch/0001-Print-csum-for-a-given-file-on-stdout.patch 18 | 19 | # Start the btrfs-progs build 20 | RUN ./autogen.sh 21 | # btrfs-progs 5.14 add an optional dependency (on by default) on libudev, for 22 | # multipath device detection, but that requires a running udev daemon, and 23 | # perhaps ohter changes to make it work inside a Docker container, so it's 24 | # disabled for the moment 25 | RUN ./configure --disable-documentation --disable-libudev 26 | RUN make install DESTDIR=/btrfs-progs-build 27 | 28 | # Start the btrfs-progs static build 29 | RUN make clean 30 | RUN make static 31 | RUN make btrfs.static 32 | RUN cp btrfs.static /btrfs-progs-build 33 | 34 | # Install dduper 35 | FROM debian:bullseye-slim 36 | COPY --from=build /lib/x86_64-linux-gnu/liblzo2.so.2 /lib/x86_64-linux-gnu/ 37 | COPY --from=build /btrfs-progs-build /btrfs-progs 38 | COPY . /dduper 39 | 40 | RUN mv /btrfs-progs/btrfs.static / 41 | RUN cp -rv /btrfs-progs/usr/local/bin/* /usr/local/bin && cp -rv /btrfs-progs/usr/local/include/* /usr/local/include/ && cp -rv /btrfs-progs/usr/local/lib/* /usr/local/lib 42 | RUN btrfs inspect-internal dump-csum --help 43 | 44 | WORKDIR /dduper 45 | 46 | # Install runtime dependencies 47 | RUN apt-get update && \ 48 | apt-get install -y --no-install-recommends python3-pip python3-setuptools && \ 49 | pip3 install -r requirements.txt && \ 50 | apt-get remove -y python3-pip python3-setuptools && \ 51 | rm -rf /var/lib/apt/lists/* && \ 52 | cp -v dduper /usr/sbin/ && \ 53 | dduper --version 54 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | How to install dduper? 2 | --------------------- 3 | 4 | You can install dduper in 3 different ways. 5 | 6 | 1. Using pre-built binary which requires couple of python packages. 7 | 2. Using Docker image. 8 | 3. Using Source code. 9 | 10 | All three approach is described below. 11 | 12 | Install pre-built binaries: 13 | --------------------------- 14 | 15 | To Install `dduper` binaries, execute following commands: 16 | 17 | ``` 18 | git clone https://github.com/Lakshmipathi/dduper.git && cd dduper 19 | pip3 install -r requirements.txt 20 | cp -v bin/btrfs.static /usr/sbin/ # this copies required btrfs binary. 21 | cp -v dduper /usr/sbin/ # copy dduper script. 22 | ``` 23 | 24 | That's all. Now type `dduper --help` to list options and continue with README.md for usage. 25 | 26 | Note: If you want to perform basic check you can use this [script](https://github.com/Lakshmipathi/dduper/blob/master/tests/verify.sh) 27 | 28 | Install using Docker : 29 | ---------------------- 30 | 31 | If you are already using docker and don't want to install any dependencies. Then simply pull the `laks/dduper` image and 32 | pass your device and mount dir like: 33 | 34 | ``` 35 | $ docker run -it --device /dev/sda1 -v /btrfs_mnt:/mnt laks/dduper dduper --device /dev/sda1 --dir /mnt --analyze 36 | ``` 37 | 38 | Make sure to replace `/dev/sda1` with your btrfs device and `/btrfs_mnt` with btrfs mount point. 39 | 40 | 41 | Install from Source: 42 | -------------------- 43 | `dduper` relies on BTRFS checksums. To expose these checksums to userspace you need to apply additional patch on btrfs-progs first. 44 | This introduces a new command to dump csum using `btrfs inspect-internal dump-csum`. 45 | 46 | If you are using latest btrfs-progs you can get it from this repo `patch/btrfs-progs-v5.6.1/`. 47 | 48 | Steps should be similar to: 49 | 50 | 1. git clone https://github.com/Lakshmipathi/dduper.git && cd dduper 51 | 2. git clone https://github.com/kdave/btrfs-progs.git && cd btrfs-progs 52 | 3. Apply the patch like `patch -p1 < ../patch/btrfs-progs-v5.9/0001-Print-csum-for-a-given-file-on-stdout.patch` 53 | 4. Now compile and install btrfs-progs. 54 | 5. After successful compilation, you should see following `dump-csum` option. 55 | 56 | ``` 57 | ./btrfs inspect-internal dump-csum --help 58 | usage: btrfs inspect-internal dump-csum 59 | 60 | Get csums for the given file. 61 | ``` 62 | 6. Now we have required patch. Go install dduper. 63 | ``` 64 | cd ~/dduper 65 | pip install -r requirements.txt 66 | cp -v dduper /usr/sbin/ 67 | ``` 68 | 69 | 7. Type `dduper --help` to list options and continue with README.md for usage. 70 | 71 | Misc: 72 | ---- 73 | If you interested in dumping csum data, please check this demo: https://asciinema.org/a/34565 74 | 75 | Original mailing-list announcement: https://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg79853.html 76 | Older patch: https://patchwork.kernel.org/patch/10540229 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | dduper 2 | ------ 3 | 4 | dduper is a block-level [out-of-band](https://btrfs.wiki.kernel.org/index.php/Deduplication#Out_of_band_.2F_batch_deduplication) BTRFS dedupe tool. This works by 5 | fetching built-in checksum from BTRFS csum-tree, instead of reading file blocks 6 | and computing checksum itself. This *hugely* improves the performance. Please be aware that dduper is beta quality tool, so _validate_ it, before running it on your 7 | critical data. 8 | 9 | Dedupe Files (default mode): 10 | ---------------------------- 11 | 12 | To dedupe two files f1 and f2 on partition sda1: 13 | 14 | `dduper --device /dev/sda1 --files /mnt/f1 /mnt/f2` 15 | 16 | This mode is 100% safe, as it uses the `fideduperange` call, which asks the kernel 17 | to verify given regions byte-by-byte, and only perform dedupe when they match. 18 | 19 | Dedupe Files Faster (fast mode): 20 | -------------------------------- 21 | 22 | dduper also has `--fast-mode` option, which tells kernel to skip verifying 23 | stage and invoke clone directly. This mode is faster since file contents 24 | are never read. dduper relies on file csum maintained by btrfs csum-tree. 25 | 26 | To dedupe two files f1 and f2 on partition sda1 in faster mode: 27 | 28 | `dduper --fast-mode --device /dev/sda1 --files /mnt/f1 /mnt/f2` 29 | 30 | This works by fetching csums and invokes `ficlonerange` on matching regions. 31 | For this mode, dduper adds safety check by performing sha256 comparison. 32 | If validation fails, files can be restored using `/var/log/dduper_backupfile_info.log`. 33 | This file will contain data like: 34 | 35 | ` 36 | FAILURE: Deduplication for /mnt/foo resulted in corruption.You can restore original file from /mnt/foo.__dduper 37 | ` 38 | 39 | *Caution: Don't run this, if you don't know what you are doing.* 40 | 41 | Dedupe Files blazing fast (insane mode): 42 | ---------------------------------------- 43 | 44 | If you already have backup data in another partition or systems. You can 45 | tell dduper to skip file sha256 validation after dedupe (file contents never read). 46 | This is insanely fast :-) 47 | 48 | `dduper --fast-mode --skip --device /dev/sda1 --files /mnt/f1 /mnt/f2` 49 | 50 | *Caution: Never run this, if you don't know what you are doing.* 51 | 52 | Dedupe multiple files: 53 | ---------------------- 54 | 55 | To dedupe more than two files on a partition (sda1), you simply pass 56 | those filenames like: 57 | 58 | `dduper --device /dev/sda1 --files /mnt/f1 /mnt/f2 /mnt/f3 /mnt/f4` 59 | 60 | Dedupe Directory: 61 | ----------------- 62 | 63 | To dedupe entire directory on sda1: 64 | 65 | `dduper --device /dev/sda1 --dir /mnt/dir` 66 | 67 | Dedupe Directory recursively: 68 | ----------------------------- 69 | 70 | To dedupe entire directory also parse its sub-directories on sda1: 71 | 72 | `dduper --device /dev/sda1 --dir /mnt/dir --recurse ` 73 | 74 | Dedupe multiple directories: 75 | --------------------------- 76 | 77 | To dedupe multiple directories on sda1: 78 | 79 | `dduper --device /dev/sda1 --dir /mnt/dir1 /mnt/dir2` 80 | 81 | Analyze with different chunk size: 82 | ---------------------------------- 83 | You can analyze which chunk size provides better deduplication. 84 | 85 | `dduper --device /dev/sda1 --files /mnt/f1 /mnt/f2 --analyze` 86 | 87 | It will perform analysis and report dedupe data for different chunk values. 88 | 89 | Sample output: f1 and f2 are 4MB files. 90 | 91 | ``` 92 | -------------------------------------------------- 93 | Chunk Size(KB) : Files : Duplicate(KB) 94 | -------------------------------------------------- 95 | 256 : /mnt/f1:/mnt/f2 : 4096 96 | ================================================== 97 | dduper:4096KB of duplicate data found with chunk size:256KB 98 | 99 | 100 | -------------------------------------------------- 101 | Chunk Size(KB) : Files : Duplicate(KB) 102 | -------------------------------------------------- 103 | 512 : /mnt/f1:/mnt/f2 : 4096 104 | ================================================== 105 | dduper:4096KB of duplicate data found with chunk size:512KB 106 | 107 | 108 | -------------------------------------------------- 109 | Chunk Size(KB) : Files : Duplicate(KB) 110 | -------------------------------------------------- 111 | 1024 : /mnt/f1:/mnt/f2 : 4096 112 | ================================================== 113 | dduper:4096KB of duplicate data found with chunk size:1024KB 114 | 115 | 116 | -------------------------------------------------- 117 | Chunk Size(KB) : Files : Duplicate(KB) 118 | -------------------------------------------------- 119 | 2048 : /mnt/f1:/mnt/f2 : 0 120 | ================================================== 121 | dduper:0KB of duplicate data found with chunk size:2048KB 122 | 123 | 124 | -------------------------------------------------- 125 | Chunk Size(KB) : Files : Duplicate(KB) 126 | -------------------------------------------------- 127 | 4096 : /mnt/f1:/mnt/f2 : 0 128 | ================================================== 129 | dduper:0KB of duplicate data found with chunk size:4096KB 130 | 131 | 132 | -------------------------------------------------- 133 | Chunk Size(KB) : Files : Duplicate(KB) 134 | -------------------------------------------------- 135 | 8192 : /mnt/f1:/mnt/f2 : 0 136 | ================================================== 137 | dduper:0KB of duplicate data found with chunk size:8192KB 138 | 139 | dduper took 0.149248838425 seconds 140 | ``` 141 | 142 | Above output shows, whole 4MB file (f2) can be deduped with chunk size 256KB, 512KB or 1MB. 143 | With larger chunk size 2MB, 4MB and 8MB, dduper unable to detect deduplicate data. In this 144 | case, its wise to use 1MB as chunk size while performing dedupe, because it invoke less 145 | dedupe calls compared to 256KB/512KB chunk size. 146 | 147 | You can analyze more than two files like, 148 | 149 | `dduper --device /dev/sda1 --files /mnt/f1 /mnt/f2 /mnt/file3 --analyze` 150 | 151 | or directory and its sub-directories using 152 | 153 | `dduper --device /dev/sda1 --dir /mnt --recurse --analyze` 154 | 155 | Changing dedupe chunk size: 156 | --------------------------- 157 | 158 | By default, dduper uses 128KB chunk size. This can be modified using chunk-size 159 | option. Below usage shows chunk size with 1MB 160 | 161 | `dduper --device /dev/sda1 --files /mnt/f1 /mnt/f2 --chunk-size 1024` 162 | 163 | Display stats: 164 | ------------- 165 | 166 | To perform dry-run to display details without performing dedupe: 167 | 168 | `dduper --device /dev/sda1 --files /mnt/f1 /mnt/f2 --dry-run` 169 | 170 | Also check `--analyze` option for detailed data. 171 | 172 | List duplicate files: 173 | --------------------- 174 | 175 | To list duplicate files from a directory: 176 | 177 | `dduper --device /dev/sda1 --dir /mnt --recurse --perfect-match-only` 178 | 179 | 180 | Known Issues: 181 | ------------ 182 | 183 | - dduper supports ~~only~~ crc32. ~~Doesn't work with csum types like xxhash,blake2, sha256.~~ 184 | Now Initial support available for xxhash64, blake2 and sha256. 185 | 186 | - subvolume won't work with dduper. 187 | 188 | - Cannot yet de-duplicate identical content blocks within a single file 189 | 190 | 191 | Reporting bugs: 192 | -------------- 193 | 194 | To report issues please use 195 | 196 | - [github issue track](https://github.com/lakshmipathi/dduper/issues) 197 | -------------------------------------------------------------------------------- /bin/btrfs.static: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lakshmipathi/dduper/7e8f995a3a6179a31d15ce073bce6cfbaefb81ed/bin/btrfs.static -------------------------------------------------------------------------------- /ci/gitlab/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:bullseye-slim 2 | 3 | RUN apt-get update && apt-get install -y --no-install-recommends ovmf qemu-system qemu-efi 4 | -------------------------------------------------------------------------------- /ci/gitlab/btrfs-progs-tests.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Execute build_btrfsprogs_dduper.sh on console 3 | 4 | [Service] 5 | ExecStart=/usr/bin/build_btrfsprogs_dduper.sh 6 | StandardInput=tty 7 | StandardOutput=tty 8 | TTYPath=/dev/ttyS0 9 | Type=idle 10 | 11 | [Install] 12 | WantedBy=getty.target 13 | After=multi-user.target 14 | -------------------------------------------------------------------------------- /ci/gitlab/build_btrfsprogs_dduper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Build dduper with btrfs-progs patch and install it. 4 | # If btrfs already present, then execute even test script via test_cmd. 5 | # 6 | ############################################################################## 7 | # NOTE: If you make changes to this script, make sure to rebuild debian image. 8 | ############################################################################### 9 | set -x 10 | 11 | BTRFS_BIN="btrfs" 12 | MNT_DIR="/mnt" 13 | BUILD_DIR="/btrfs-progs" 14 | test_cmd=$(cat ${MNT_DIR}/cmd) 15 | 16 | if [ ${test_cmd} == "build_with_patch" ] 17 | then 18 | echo "=========================== Build btrfs-progs ================" 19 | cd $BUILD_DIR/ 20 | ls -l /dduper $BUILD_DIR 21 | cat $BUILD_DIR/cmds/inspect-dump-csum.c 22 | patch -p1 < /dduper/patch/btrfs-progs-v5.12.1/0001-Print-csum-for-a-given-file-on-stdout.patch 23 | ./autogen.sh && ./configure --disable-documentation --disable-backtrace && make -j`nproc` && make install && touch "${MNT_DIR}/build_pass.txt" 24 | echo "================= Install dduper ==========================" 25 | cp -v /dduper/dduper /usr/sbin/ 26 | /usr/sbin/dduper --help 27 | poweroff 28 | else 29 | echo "================= Running dduper Tests =================================" 30 | cd /mnt && ${test_cmd} 31 | poweroff 32 | fi 33 | -------------------------------------------------------------------------------- /ci/gitlab/kernel_build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Setup BTRFS kernel options and build kernel 4 | set -x 5 | 6 | apt-get update 7 | apt-get -y install build-essential libncurses-dev bison flex libssl-dev libelf-dev unzip wget bc 8 | 9 | # Build kernel 10 | wget https://github.com/kdave/btrfs-devel/archive/misc-next.zip 11 | unzip -qq misc-next.zip 12 | cd btrfs-devel-misc-next/ && make x86_64_defconfig && make kvm_guest.config 13 | 14 | # BTRFS specific entires 15 | cat <> .config 16 | CONFIG_BTRFS_FS=y 17 | CONFIG_BTRFS_FS_POSIX_ACL=y 18 | CONFIG_BTRFS_FS_CHECK_INTEGRITY=n 19 | CONFIG_BTRFS_FS_RUN_SANITY_TESTS=n 20 | CONFIG_BTRFS_DEBUG=y 21 | CONFIG_BTRFS_ASSERT=y 22 | CONFIG_BTRFS_FS_REF_VERIFY=y 23 | CONFIG_RAID6_PQ_BENCHMARK=y 24 | CONFIG_LIBCRC32C=y 25 | EOF 26 | 27 | make -j8 28 | 29 | # Store file to shared dir 30 | cp -v arch/x86/boot/bzImage /repo 31 | -------------------------------------------------------------------------------- /ci/gitlab/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Install and start qemu instance with custom kernel while exporting btrfs-progs src over 9p 4 | # 5 | set -x 6 | 7 | qemu-system-x86_64 -m 512 -nographic -kernel /repo/bzImage -drive file=/repo/qemu-image.img,index=0,media=disk,format=raw \ 8 | -fsdev local,id=btrfs-progs,path=/repo,security_model=mapped -device virtio-9p-pci,fsdev=btrfs-progs,mount_tag=btrfs-progs \ 9 | -append "console=tty1 root=/dev/sda rw" 10 | -------------------------------------------------------------------------------- /ci/gitlab/setup_image.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Setup debian image via debootstrap and include systemd service file. 4 | set -x 5 | 6 | apt-get update 7 | apt-get -y install debootstrap wget unzip python3-pip git 8 | 9 | # Setup rootfs 10 | IMG="/qemu-image.img" 11 | DIR="/target" 12 | truncate -s2G $IMG 13 | mkfs.ext4 $IMG 14 | mkdir -p $DIR 15 | for i in {0..7};do 16 | mknod -m 0660 "/dev/loop$i" b 7 "$i" 17 | done 18 | 19 | # mount the image file 20 | mount -o loop $IMG $DIR 21 | 22 | # Install required pacakges 23 | debootstrap --arch=amd64 --include=git,autoconf,automake,gcc,make,pkg-config,e2fslibs-dev,libblkid-dev,zlib1g-dev,liblzo2-dev,asciidoc,xmlto,libzstd-dev,python3.5,python3.5-dev,python3-dev,python3-setuptools,python-setuptools,xz-utils,acl,attr,python3-pip,patch,mount,libmount-dev bullseye $DIR http://ftp.de.debian.org/debian/ 24 | 25 | ## Setup 9p mount 26 | echo "btrfs-progs /mnt 9p trans=virtio 0 0" > $DIR/etc/fstab 27 | 28 | #Setup autologin 29 | sed -i 's/9600/9600 --autologin root/g' $DIR/lib/systemd/system/serial-getty@.service 30 | 31 | # Setup systemd service 32 | cp -v /repo/ci/gitlab/build_btrfsprogs_dduper.sh $DIR/usr/bin/ 33 | cp -v /repo/ci/gitlab/btrfs-progs-tests.service $DIR/etc/systemd/system/ 34 | 35 | ## Enable service 36 | ln -s $DIR/etc/systemd/system/btrfs-progs-tests.service $DIR/etc/systemd/system/getty.target.wants/btrfs-progs-tests.service 37 | 38 | cd / 39 | umount $DIR 40 | rmdir $DIR 41 | 42 | cp -v $IMG /repo 43 | -------------------------------------------------------------------------------- /ci/gitlab/setup_repos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Use debian image and setup repos. 4 | set -x 5 | 6 | ci_branch=$1 7 | 8 | apt-get update 9 | apt-get -y install python3-pip git 10 | 11 | # Setup rootfs 12 | IMG="/repo/qemu-image.img" 13 | DIR="/target" 14 | mkdir -p $DIR 15 | for i in {0..7};do 16 | mknod -m 0660 "/dev/loop$i" b 7 "$i" 17 | done 18 | 19 | # mount the image file 20 | mount -o loop $IMG $DIR 21 | 22 | # Pull latest code 23 | rm -rf $DIR/dduper 24 | rm -rf $DIR/btrfs-progs 25 | 26 | git clone -b $ci_branch https://github.com/Lakshmipathi/dduper.git $DIR/dduper 27 | touch "$DIR/dduper/$ci_branch" 28 | ls -l "$DIR/dduper/" 29 | git clone https://github.com/kdave/btrfs-progs.git $DIR/btrfs-progs 30 | 31 | pip3 install --target=$DIR/usr/lib/python3/dist-packages/ -r $DIR/dduper/requirements.txt 32 | 33 | cd / 34 | umount $DIR 35 | rmdir $DIR 36 | 37 | -------------------------------------------------------------------------------- /ci/gitlab/tests/basic_dir_recurse_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Test directory recurse feature 3 | # 4 | set -xe 5 | 6 | test_type=$1 7 | 8 | echo "-------setup image-----------------------" 9 | echo "creating 512mb btrfs img" 10 | IMG="/img" 11 | MNT_DIR="/btrfs_mnt" 12 | HOST_DIR="/mnt/" 13 | PASS_FILE="$HOST_DIR/${test_type}_pass.txt" 14 | rm -rf $PASS_FILE 15 | 16 | mkdir -p $MNT_DIR 17 | truncate -s512m $IMG 18 | 19 | mkfs.btrfs -f $IMG 20 | 21 | echo "-------mount image-----------------------" 22 | echo "mounting it under $MNT_DIR" 23 | mount $IMG $MNT_DIR 24 | 25 | 26 | echo "-------setup files-----------------------" 27 | echo "Creating 50mb test file" 28 | dd if=/dev/urandom of=/tmp/f1 bs=1M count=50 29 | 30 | echo "Coping to mount point directories" 31 | TOPDIR="$MNT_DIR/" 32 | SUBDIR="$MNT_DIR/d1/d2/d3" 33 | mkdir -p $SUBDIR 34 | 35 | cp -v /tmp/f1 $TOPDIR/ 36 | cp -v /tmp/f1 $SUBDIR/ 37 | 38 | loop_dev=$(/sbin/losetup --find --show $IMG) 39 | sync 40 | 41 | used_space2=$(df --output=used -h -m $MNT_DIR | tail -1 | tr -d ' ') 42 | 43 | echo "-------dduper verification-----------------------" 44 | echo "Running simple dduper --dry-run" 45 | dduper --device ${loop_dev} --dir $MNT_DIR --recurse --dry-run 46 | 47 | echo "Running simple dduper in default mode" 48 | dduper --device ${loop_dev} --dir $MNT_DIR --recurse 49 | 50 | sync 51 | sleep 5 52 | used_space3=$(df --output=used -h -m $MNT_DIR | tail -1 | tr -d ' ') 53 | 54 | echo "-------results summary-----------------------" 55 | echo "disk usage before de-dupe: $used_space2 MB" 56 | echo "disk usage after de-dupe: $used_space3 MB" 57 | 58 | deduped=$(expr $used_space2 - $used_space3) 59 | 60 | if [ $deduped -eq 50 ];then 61 | echo "dduper verification passed" 62 | echo "dduper verification passed" > $PASS_FILE 63 | else 64 | echo "dduper verification failed" 65 | fi 66 | 67 | umount $MNT_DIR 68 | poweroff 69 | -------------------------------------------------------------------------------- /ci/gitlab/tests/basic_dir_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Test multiple directory option 3 | # 4 | set -xe 5 | 6 | test_type=$1 7 | 8 | echo "-------setup image-----------------------" 9 | echo "creating 512mb btrfs img" 10 | IMG="/img" 11 | MNT_DIR="/btrfs_mnt" 12 | HOST_DIR="/mnt/" 13 | PASS_FILE="$HOST_DIR/${test_type}_pass.txt" 14 | rm -rf $PASS_FILE 15 | 16 | mkdir -p $MNT_DIR 17 | truncate -s512m $IMG 18 | 19 | mkfs.btrfs -f $IMG 20 | 21 | echo "-------mount image-----------------------" 22 | echo "mounting it under $MNT_DIR" 23 | mount $IMG $MNT_DIR 24 | 25 | 26 | echo "-------setup files-----------------------" 27 | echo "Creating 50mb test file" 28 | dd if=/dev/urandom of=/tmp/f1 bs=1M count=50 29 | 30 | echo "Coping to mount point directories" 31 | DIR1="$MNT_DIR/d1" 32 | DIR2="$MNT_DIR/d2" 33 | 34 | mkdir -p $DIR1 35 | mkdir -p $DIR2 36 | 37 | cp -v /tmp/f1 $DIR1/ 38 | cp -v /tmp/f1 $DIR2/ 39 | 40 | loop_dev=$(/sbin/losetup --find --show $IMG) 41 | sync 42 | 43 | used_space2=$(df --output=used -h -m $MNT_DIR | tail -1 | tr -d ' ') 44 | 45 | echo "-------dduper verification-----------------------" 46 | echo "Running simple dduper --dry-run" 47 | dduper --device ${loop_dev} --dir $DIR1 $DIR2 --dry-run 48 | 49 | echo "Running simple dduper in default mode" 50 | dduper --device ${loop_dev} --dir $DIR1 $DIR2 51 | 52 | sync 53 | sleep 5 54 | used_space3=$(df --output=used -h -m $MNT_DIR | tail -1 | tr -d ' ') 55 | 56 | echo "-------results summary-----------------------" 57 | echo "disk usage before de-dupe: $used_space2 MB" 58 | echo "disk usage after de-dupe: $used_space3 MB" 59 | 60 | deduped=$(expr $used_space2 - $used_space3) 61 | 62 | if [ $deduped -eq 50 ];then 63 | echo "dduper verification passed" 64 | echo "dduper verification passed" > $PASS_FILE 65 | else 66 | echo "dduper verification failed" 67 | fi 68 | 69 | umount $MNT_DIR 70 | poweroff 71 | -------------------------------------------------------------------------------- /ci/gitlab/tests/basic_sanity_csum.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Verify different csum types 3 | # 4 | set -e 5 | 6 | csum_type=$1 7 | 8 | echo "creating 512mb btrfs img" 9 | IMG="/img" 10 | MNT_DIR="/btrfs_mnt" 11 | HOST_DIR="/mnt/" 12 | PASS_FILE="$HOST_DIR/${csum_type}_pass.txt" 13 | deduped=0 14 | rm -rf $PASS_FILE 15 | 16 | function setup_fs { 17 | echo "-----------------------------------------------------------setup image-----------------------" 18 | mkdir -p $MNT_DIR 19 | rm -rf $IMG 20 | truncate -s512m $IMG 21 | 22 | if [ $csum_type == "crc32" ]; 23 | then 24 | mkfs.btrfs -f $IMG 25 | else 26 | mkfs.btrfs -f $IMG --csum $csum_type 27 | fi 28 | 29 | echo "-------mount image-----------------------" 30 | echo "mounting it under $MNT_DIR" 31 | mount $IMG $MNT_DIR 32 | } 33 | 34 | function setup_data { 35 | echo "----------------------------------------------------------setup files-----------------------" 36 | if [ $1 == "random" ]; then 37 | echo "Creating 50mb test file" 38 | dd if=/dev/urandom of=/tmp/f1 bs=1M count=50 39 | 40 | echo "Coping to mount point" 41 | cp -v /tmp/f1 $MNT_DIR/f1 42 | cp -v /tmp/f1 $MNT_DIR/random 43 | 44 | else 45 | python /mnt/ci/gitlab/tests/dataset.py -d $MNT_DIR -l $1 $2 46 | fi 47 | sleep 2 48 | ls -l $MNT_DIR 49 | sync 50 | } 51 | 52 | 53 | function start_dedupe { 54 | loop_dev=$(/sbin/losetup --find --show $IMG) 55 | sync 56 | 57 | echo "--------------------------------------------------------dduper run-----------------------" 58 | echo "Running dduper --dry-run" 59 | dduper --device ${loop_dev} --dir $MNT_DIR --dry-run 60 | 61 | echo "Running dduper in default mode" 62 | dduper --device ${loop_dev} --dir $MNT_DIR 63 | 64 | sync 65 | sleep 5 66 | } 67 | 68 | 69 | function verify_results { 70 | echo "------------------------------------------------Verifying results-----------------------" 71 | f1=$1 72 | f2=$2 73 | v=$3 74 | btrfs fi du ${MNT_DIR}/$f2* | tee /tmp/du.log 75 | cat /tmp/du.log 76 | content=$(tail -n1 /tmp/du.log) 77 | echo $content | awk '{print $(NF-1)}' 78 | deduped=$(echo $content | awk '{print $(NF-1)}' ) 79 | echo "deduped: $deduped" 80 | if [ "${deduped}" == "${v}.00MiB" ];then 81 | echo "dduper verification passed" 82 | echo "f1:$f1 f2:$f2 v:$v" 83 | echo "dduper verification passed" > $PASS_FILE 84 | else 85 | echo "dduper verification failed" 86 | echo "f1:$f1 f2:$f2 v:$v" 87 | rm -rf $PASS_FILE 88 | abort_test 89 | fi 90 | 91 | } 92 | 93 | function cleanup { 94 | umount $MNT_DIR 95 | } 96 | 97 | function abort_test { 98 | echo "Abort further tests" 99 | sleep 10 100 | poweroff 101 | } 102 | 103 | function test_dduper { 104 | f1=$1 105 | f2=$2 106 | v=$3 107 | setup_fs 108 | setup_data $f1 $f2 109 | start_dedupe 110 | verify_results $f1 $2 $v 111 | cleanup 112 | } 113 | 114 | test_dduper "random" "random" "50" 115 | test_dduper "fn_a_1" "fn_aaaa_1" "4" 116 | test_dduper "fn_a_1" "fn_aaaaaaaa_1" "8" 117 | test_dduper "fn_abacad_1" "fn_xbyczd_2" "6" 118 | test_dduper "fn_abcdef_1" "fn_xyzijkdef_2" "6" 119 | test_dduper "fn_abcdab_2" "fn_ijxyabc_6" "18" 120 | echo "All tests completed." 121 | shutdown 122 | -------------------------------------------------------------------------------- /ci/gitlab/tests/basic_sanity_dumpcsum.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Verify different csum types 3 | # 4 | set -e 5 | 6 | csum_type=$1 7 | 8 | echo "creating 512mb btrfs img" 9 | IMG="/img" 10 | MNT_DIR="/btrfs_mnt" 11 | HOST_DIR="/mnt/" 12 | PASS_FILE="$HOST_DIR/${csum_type}_pass.txt" 13 | deduped=0 14 | rm -rf $PASS_FILE 15 | 16 | function setup_fs { 17 | echo "-----------------------------------------------------------setup image-----------------------" 18 | mkdir -p $MNT_DIR 19 | rm -rf $IMG 20 | truncate -s512m $IMG 21 | 22 | if [ $csum_type == "crc32" ]; 23 | then 24 | mkfs.btrfs -f $IMG 25 | else 26 | mkfs.btrfs -f $IMG --csum $csum_type 27 | fi 28 | 29 | echo "-------mount image-----------------------" 30 | echo "mounting it under $MNT_DIR" 31 | mount $IMG $MNT_DIR 32 | } 33 | 34 | function setup_data { 35 | echo "----------------------------------------------------------setup files-----------------------" 36 | if [ $1 == "random" ]; then 37 | echo "Creating 50mb test file" 38 | dd if=/dev/urandom of=/tmp/f1 bs=1M count=50 39 | 40 | echo "Coping to mount point" 41 | cp -v /tmp/f1 $MNT_DIR/f1 42 | cp -v /tmp/f1 $MNT_DIR/random 43 | 44 | else 45 | python /mnt/ci/gitlab/tests/dataset.py -d $MNT_DIR -l $1 $2 46 | fi 47 | sleep 2 48 | ls -l $MNT_DIR 49 | sync 50 | } 51 | 52 | 53 | function start_dumpcsum { 54 | loop_dev=$(/sbin/losetup --find --show $IMG) 55 | rm -rf /tmp/c1 /tmp/c2 56 | sync 57 | 58 | echo "--------------------------------------------------------dump-csum-----------------------" 59 | echo "Running dump-csum " 60 | sync 61 | sleep 5 62 | btrfs inspect-internal dump-csum $MNT_DIR/f1 ${loop_dev} &> /tmp/c1 63 | btrfs inspect-internal dump-csum $MNT_DIR/random ${loop_dev} &> /tmp/c2 64 | sync 65 | sleep 5 66 | 67 | md5sum /tmp/c1 /tmp/c2 68 | } 69 | 70 | 71 | function verify_results { 72 | echo "------------------------------------------------Verifying results-----------------------" 73 | c1=$(md5sum /tmp/c1 | awk -F' ' '{ print $1}') 74 | c2=$(md5sum /tmp/c2 | awk -F' ' '{ print $1}') 75 | 76 | cp -v $MNT_DIR/f1 $HOST_DIR/random1 77 | cp -v $MNT_DIR/random $HOST_DIR/random2 78 | 79 | [ -s /tmp/c1 ] || abort_test 80 | [ -s /tmp/c2 ] || abort_test 81 | 82 | 83 | if [ "${c1}" == "${c2}" ];then 84 | echo "verification passed" 85 | echo "dduper verification passed" > $PASS_FILE 86 | else 87 | echo "dduper verification failed" 88 | rm -rf $PASS_FILE 89 | abort_test 90 | fi 91 | 92 | } 93 | 94 | function cleanup { 95 | umount $MNT_DIR 96 | } 97 | 98 | function abort_test { 99 | echo "Abort further tests" 100 | sleep 10 101 | poweroff 102 | } 103 | 104 | function test_dduper { 105 | f1=$1 106 | f2=$2 107 | v=$3 108 | setup_fs 109 | setup_data $f1 $f2 110 | start_dumpcsum 111 | verify_results $f1 $2 $v 112 | cleanup 113 | } 114 | 115 | test_dduper "random" "random" "50" 116 | #test_dduper "fn_a_1" "fn_aaaa_1" "4" 117 | #test_dduper "fn_a_1" "fn_aaaaaaaa_1" "8" 118 | #test_dduper "fn_abacad_1" "fn_xbyczd_2" "6" 119 | #test_dduper "fn_abcdef_1" "fn_xyzijkdef_2" "6" 120 | #test_dduper "fn_abcdab_2" "fn_ijxyabc_6" "18" 121 | echo "All tests completed." 122 | shutdown 123 | -------------------------------------------------------------------------------- /ci/gitlab/tests/dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | 4 | mb = 1024 * 1024 5 | 6 | ''' 7 | Same file: 8 | dataset.py aaaa_1 aaaa_1 => 4mb dup 9 | 10 | 100% dup: 11 | dataset.py a_1 aaaaaaaa_1 => 8mb 12 | 13 | 50% dup: 14 | dataset.py abcd_1 abxy_1 => 2mb 15 | 16 | others: 17 | dataset.py abcd_1 bbxy_1 => 2mb 18 | dataset.py abcd_1 cdba_1 => 4mb 19 | 20 | chunk_size 1m: 21 | dataset.py abcdwxyz_1 wxyzabcd_1 => 8mb 22 | 23 | backup: 24 | dataset.py abcd_1 abcdefg_1 => 4mb 25 | 26 | ''' 27 | 28 | def file_layout(filename, layout, seg_size): 29 | filename = filename +"_" + str(len(layout) * seg_size) + "mb" 30 | with open(filename, "w") as fd: 31 | for c in layout: 32 | content = c * (seg_size * mb) 33 | fd.write(content) 34 | print(filename) 35 | 36 | 37 | def validate_lfile(lfile,dir_path): 38 | for lf in lfile: 39 | s1 = lf.split("_") 40 | if len(s1) != 3: 41 | print("Error: fn__ required") 42 | sys.exit(0) 43 | (lout,lseg_sz)=s1[1],int(s1[2]) 44 | file_layout(dir_path +"/" + lf, lout, lseg_sz) 45 | 46 | 47 | if __name__ == '__main__': 48 | parser = argparse.ArgumentParser() 49 | 50 | parser.add_argument('-d', 51 | '--dir_path', 52 | action='store', 53 | dest='dir_path', 54 | type=str, 55 | help='BTRFS dir (ex: /mnt/playground) ', 56 | required=True) 57 | parser.add_argument('-l', 58 | '--layout', 59 | action='store', 60 | dest='lfile', 61 | nargs='+', 62 | help='Layout of file fn__', 63 | type=str, 64 | required=True) 65 | 66 | results = parser.parse_args() 67 | print("fn___") 68 | validate_lfile(results.lfile,results.dir_path) 69 | 70 | -------------------------------------------------------------------------------- /ci/gitlab/tests/docker_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | echo "This will create 512mb under /tmp and validates dduper behaviour." 5 | 6 | echo "-------setup image-----------------------" 7 | echo "creating 512mb btrfs img" 8 | IMG="/img" 9 | MNT_DIR="/btrfs_mnt" 10 | 11 | loop_dev=$(losetup -f) 12 | mknod -m640 $loop_dev b 7 0 13 | ls -l /dev/loop* 14 | 15 | mkdir -p $MNT_DIR 16 | truncate -s512m $IMG 17 | mkfs.btrfs -f $IMG 18 | 19 | echo "-------mount image-----------------------" 20 | losetup $loop_dev $IMG 21 | 22 | echo "mounting it under $MNT_DIR" 23 | mount $loop_dev $MNT_DIR 24 | 25 | 26 | echo "-------setup files-----------------------" 27 | echo "Creating 50mb test file" 28 | dd if=/dev/urandom of=/tmp/f1 bs=1M count=50 29 | 30 | echo "Coping to mount point" 31 | cp -v /tmp/f1 $MNT_DIR/f1 32 | cp -v /tmp/f1 $MNT_DIR/f2 33 | #loop_dev=$(/sbin/losetup --find --show $IMG) 34 | sync 35 | 36 | used_space2=$(df --output=used -h -m $MNT_DIR | tail -1 | tr -d ' ') 37 | 38 | echo "-------dduper verification-----------------------" 39 | echo "Running simple dduper --dry-run" 40 | dduper --device ${loop_dev} --dir $MNT_DIR --dry-run 41 | 42 | echo "Running simple dduper in default mode" 43 | dduper --device ${loop_dev} --dir $MNT_DIR 44 | 45 | sync 46 | sleep 5 47 | used_space3=$(df --output=used -h -m $MNT_DIR | tail -1 | tr -d ' ') 48 | 49 | echo "-------results summary-----------------------" 50 | echo "disk usage before de-dupe: $used_space2 MB" 51 | echo "disk usage after de-dupe: $used_space3 MB" 52 | 53 | deduped=$(expr $used_space2 - $used_space3) 54 | 55 | if [ $deduped -eq 50 ];then 56 | echo "dduper verification passed" 57 | else 58 | echo "dduper verification failed" 59 | fi 60 | 61 | umount $MNT_DIR 62 | -------------------------------------------------------------------------------- /ci/gitlab/tests/fast_mode.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Verify --fast-mode option. 3 | # 4 | set -xe 5 | 6 | test_type=$1 7 | 8 | echo "-------setup image-----------------------" 9 | echo "creating 512mb btrfs img" 10 | IMG="/img" 11 | MNT_DIR="/btrfs_mnt" 12 | HOST_DIR="/mnt/" 13 | PASS_FILE="$HOST_DIR/${test_type}_pass.txt" 14 | rm -rf $PASS_FILE 15 | 16 | mkdir -p $MNT_DIR 17 | truncate -s512m $IMG 18 | 19 | mkfs.btrfs -f $IMG 20 | 21 | echo "-------mount image-----------------------" 22 | echo "mounting it under $MNT_DIR" 23 | mount $IMG $MNT_DIR 24 | 25 | 26 | echo "-------setup files-----------------------" 27 | echo "Creating 50mb test file" 28 | dd if=/dev/urandom of=/tmp/f1 bs=1M count=50 29 | 30 | echo "Coping to mount point directories" 31 | DIR1="$MNT_DIR/d1" 32 | DIR2="$MNT_DIR/d2" 33 | 34 | mkdir -p $DIR1 35 | mkdir -p $DIR2 36 | 37 | cp -v /tmp/f1 $DIR1/ 38 | cp -v /tmp/f1 $DIR2/ 39 | 40 | loop_dev=$(/sbin/losetup --find --show $IMG) 41 | sync 42 | 43 | used_space2=$(df --output=used -h -m $MNT_DIR | tail -1 | tr -d ' ') 44 | 45 | echo "-------dduper verification-----------------------" 46 | echo "Running simple dduper --dry-run" 47 | dduper --fast-mode --device ${loop_dev} --dir $DIR1 $DIR2 --dry-run 48 | 49 | echo "Running simple dduper in default mode" 50 | dduper --fast-mode --device ${loop_dev} --dir $DIR1 $DIR2 51 | 52 | sync 53 | sleep 5 54 | used_space3=$(df --output=used -h -m $MNT_DIR | tail -1 | tr -d ' ') 55 | 56 | echo "-------results summary-----------------------" 57 | echo "disk usage before de-dupe: $used_space2 MB" 58 | echo "disk usage after de-dupe: $used_space3 MB" 59 | 60 | deduped=$(expr $used_space2 - $used_space3) 61 | 62 | if [ $deduped -eq 50 ];then 63 | echo "dduper verification passed" 64 | echo "dduper verification passed" > $PASS_FILE 65 | else 66 | echo "dduper verification failed" 67 | fi 68 | 69 | umount $MNT_DIR 70 | poweroff 71 | -------------------------------------------------------------------------------- /dduper: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | """ dduper - BTRFS Dedupe tool. 6 | 7 | This is a offline dedupe tool. Instead of reading whole file blocks and 8 | computing checksum, It works by fetching checksum from BTRFS csum tree. 9 | This hugely improves the performance. 10 | 11 | 12 | """ 13 | import argparse 14 | import errno 15 | import hashlib 16 | import logging 17 | import numpy as np 18 | import math 19 | import os 20 | import pdb 21 | import sqlite3 22 | import struct 23 | import subprocess 24 | import sys 25 | 26 | from collections import OrderedDict 27 | from fcntl import ioctl 28 | from itertools import combinations 29 | from itertools import zip_longest 30 | from stat import * 31 | from timeit import default_timer as timer 32 | from filecmp import cmp 33 | from prettytable import PrettyTable 34 | 35 | # 4kb block size 36 | blk_size = 4 37 | # no.of csum on single row - right now its 8 38 | no_of_chunks = 0 39 | FICLONERANGE = 0x4020940d 40 | FIDEDUPERANGE = 0xc0189436 41 | 42 | device_name = None 43 | skip = False 44 | chunk_sz = 0 45 | run_len = 0 46 | ele_sz = 0 47 | fast_mode = False 48 | verbose = False 49 | analyze = False 50 | analyze_dict = OrderedDict() 51 | dst_file_sz = 0 52 | perfect_match_only = False 53 | 54 | # Already deduped files 55 | processed_files = [] 56 | 57 | connection = None 58 | cursor = None 59 | 60 | # Log file 61 | LOG_FILENAME = 'dduper.log' 62 | logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG,) 63 | 64 | # Sqlite DB operations 65 | def sqlite_mark_processed(filename): 66 | global cursor 67 | cursor.execute("update filehash set processed=1 WHERE filename = ?",(filename,)) 68 | sqlite_commit() 69 | 70 | def sqlite_mark_valid(filename): 71 | global cursor 72 | cursor.execute("update filehash set valid=1 WHERE filename = ?",(filename,)) 73 | sqlite_commit() 74 | 75 | def sqlite_insert_csum(filename, complete_hash, out): 76 | global connection, cursor 77 | cursor.execute("INSERT INTO filehash VALUES (?, ?, 0, 0)", (filename, complete_hash)) 78 | cursor.execute("SELECT * FROM btrfscsum WHERE short_hash = ?",(complete_hash,)) 79 | exist = cursor.fetchone() 80 | 81 | if exist is None: 82 | cursor.execute("INSERT INTO btrfscsum VALUES (?, ?)", (complete_hash, str(out))) 83 | 84 | sqlite_commit() 85 | 86 | def sqlite_create_tables(): 87 | global cursor 88 | cursor.execute("CREATE TABLE filehash (filename TEXT, short_hash TEXT, processed INTEGER, valid INTEGER)") 89 | cursor.execute("CREATE TABLE btrfscsum (short_hash TEXT, long_hash TEXT)") 90 | 91 | def sqlite_commit(): 92 | global connection 93 | connection.commit() 94 | 95 | def sqlite_connection_close(): 96 | global connection 97 | connection.close() 98 | 99 | def sqlite_connection_open(): 100 | global connection, cursor 101 | 102 | if os.path.exists("dduper.db") is True: 103 | connection = sqlite3.connect("dduper.db") 104 | cursor = connection.cursor() 105 | else: 106 | connection = sqlite3.connect("dduper.db") 107 | cursor = connection.cursor() 108 | sqlite_create_tables() 109 | 110 | # Sqlite DB 111 | 112 | # From https://stackoverflow.com/questions/434287 113 | def grouper(iterable, n, fillvalue=None): 114 | args = [iter(iterable)] * int(n) 115 | return zip_longest(*args, fillvalue=fillvalue) 116 | 117 | 118 | def get_ele_size(chunk_sz): 119 | global no_of_chunks, run_len 120 | if chunk_sz <= 0 or chunk_sz % 128 != 0: 121 | print("Ensure chunk size is of multiple 128KB. (128,256,512 etc)") 122 | sys.exit(-1) 123 | no_of_chunks = chunk_sz // blk_size 124 | ele_sz = no_of_chunks // 8 125 | run_len = no_of_chunks * blk_size * 1024 126 | # print("\n chunk_sz=%d, no_of_chunks=%d, eke_sz=%d ",chunk_sz, no_of_chunks, ele_sz) 127 | return ele_sz 128 | 129 | 130 | def get_hashes(out1): 131 | """ 132 | For each list item compute its hash and store it with offset as its key. 133 | """ 134 | global ele_sz 135 | ccount = 0 136 | 137 | # print("Running with ele_sz " + str(ele_sz)) 138 | if ele_sz == 1: 139 | od = OrderedDict() 140 | for idx, ele in enumerate(out1): 141 | v = [] 142 | k = hashlib.sha256(str(ele).encode('utf-8')).hexdigest() 143 | v.append(idx) 144 | if k in od: 145 | if verbose is True: 146 | print("Collision with: " + str(k) + "at offset: " + str(v)) 147 | # Get previous value 148 | v.extend(od.get(k)) 149 | ccount += 1 150 | od[k] = v 151 | else: 152 | od = OrderedDict() 153 | for idx, ele in enumerate(grouper(out1, ele_sz, 'x')): 154 | v = [] 155 | k = hashlib.sha256(str(ele).encode('utf-8')).hexdigest() 156 | v.append(idx) 157 | if k in od: 158 | if verbose is True: 159 | print("Collision with: " + str(k) + "at offset: " + str(v)) 160 | # Get previous value 161 | v.extend(od.get(k)) 162 | ccount += 1 163 | od[k] = v 164 | 165 | return od, ccount 166 | 167 | 168 | def ioctl_ficlonerange(dst_fd, s): 169 | 170 | try: 171 | ioctl(dst_fd, FICLONERANGE, s) 172 | except Exception as e: 173 | print("error({0})".format(e)) 174 | 175 | 176 | def ioctl_fideduperange(src_fd, s): 177 | 178 | try: 179 | v = ioctl(src_fd, FIDEDUPERANGE, s) 180 | _,_,_,_,_,_,_,bytes_dup,status,_ = struct.unpack("QQHHIqQQiH",v) 181 | return bytes_dup,status 182 | except Exception as e: 183 | print("error({0})".format(e)) 184 | 185 | 186 | def validate_results(src_file, dst_file, bkup_file): 187 | 188 | if cmp(dst_file, bkup_file): 189 | print("Dedupe validation successful " + src_file + ":" + dst_file) 190 | # Removing temporary backup file path 191 | os.unlink(bkup_file) 192 | else: 193 | msg = "\nFAILURE: Deduplication for " + dst_file + " resulted in corruption." + \ 194 | "You can restore original file from " + bkup_file 195 | print(msg) 196 | with open("/var/log/dduper_backupfile_info.log", "a") as wfd: 197 | wfd.write(msg) 198 | # TODO: Remove this file from further op 199 | 200 | 201 | def auto_adjust_chunk_sz(src_file_sz, analyze): 202 | global chunk_sz, ele_sz 203 | 204 | # Dont change chunk_sz for --analyze option 205 | if analyze is True: 206 | return chunk_sz, ele_sz 207 | # automatically change chunk size in case of perfect match. 208 | fz_kb = src_file_sz >> 10 209 | fz_mb = src_file_sz >> 20 210 | # print("File size is: " + str(fz_kb) + "KB or " + str(fz_mb) + "MB") 211 | if fz_mb >= 16: # file >= 16MB set chunk size 16MB 212 | perfect_match_chunk_sz = 16384 213 | ele_sz = get_ele_size(1024 * 16) 214 | elif fz_mb >= 8: # file >= 8MB set chunk size 8MB 215 | perfect_match_chunk_sz = 8192 216 | ele_sz = get_ele_size(1024 * 8) 217 | elif fz_mb >= 4: # file >= 4MB set chunk size 4MB 218 | perfect_match_chunk_sz = 4096 219 | ele_sz = get_ele_size(1024 * 4) 220 | elif fz_mb >= 2: # file >= 2MB set chunk size 2MB 221 | perfect_match_chunk_sz = 2048 222 | ele_sz = get_ele_size(1024 * 2) 223 | elif fz_mb >= 1: # file >= 1MB set chunk size 1MB 224 | perfect_match_chunk_sz = 1024 225 | ele_sz = get_ele_size(1024) 226 | elif fz_kb >= 512: # file >= 512KB chunk_size 512KB 227 | perfect_match_chunk_sz = 512 228 | ele_sz = get_ele_size(512) 229 | else: 230 | perfect_match_chunk_sz = 128 231 | ele_sz = get_ele_size(128) 232 | return perfect_match_chunk_sz, ele_sz 233 | 234 | 235 | def do_btrfs_dump_csum(filename): 236 | 237 | btrfs_bin = "/usr/sbin/btrfs.static" 238 | if os.path.exists(btrfs_bin) is False: 239 | btrfs_bin = "btrfs" 240 | 241 | out = subprocess.Popen( 242 | [btrfs_bin, 'inspect-internal', 'dump-csum', filename, device_name], 243 | stdout=subprocess.PIPE, 244 | close_fds=True).stdout.readlines() 245 | 246 | complete_hash = hashlib.sha256(str(out).encode('utf-8')).hexdigest() 247 | sqlite_insert_csum(filename, complete_hash, out) 248 | return out 249 | 250 | def check_btrfs_file_exists(filename): 251 | global cursor 252 | cursor.execute("SELECT * FROM filehash WHERE filename = ?",(filename,)) 253 | exist = cursor.fetchone() 254 | if exist is None: 255 | return None, False 256 | else: 257 | # print("Already exists - return the out value") 258 | cursor.execute("SELECT * FROM btrfscsum WHERE short_hash = ?",(exist[1],)) 259 | exist = cursor.fetchone() 260 | if exist is None: 261 | print("Not btrfscsum found") 262 | return None, False 263 | else: 264 | out = eval(exist[1]) 265 | return out, True 266 | 267 | def btrfs_dump_csum(filename): 268 | global device_name 269 | out, ret = check_btrfs_file_exists(filename) 270 | 271 | if ret == True: 272 | return out 273 | else: 274 | return do_btrfs_dump_csum(filename) 275 | 276 | 277 | def do_dedupe(src_file, dst_file, dry_run): 278 | 279 | global ele_sz, analyze 280 | 281 | src_file_sz = os.path.getsize(src_file) 282 | 283 | bkup_file = dst_file + ".__dduper" 284 | src_fd = os.open(src_file, os.O_RDONLY) 285 | dst_fd = os.open(dst_file, os.O_WRONLY) 286 | perfect_match = 0 287 | perfect_match_chunk_sz = 0 288 | 289 | out1 = btrfs_dump_csum(src_file) 290 | out2 = btrfs_dump_csum(dst_file) 291 | 292 | # FIXME: check for empty csums 293 | assert len(out1) != 0 294 | assert len(out2) != 0 295 | 296 | # todo : perfect match files. Remove dst_file from further operations 297 | if out1 == out2: 298 | print("Perfect match : ", src_file, dst_file) 299 | perfect_match = 1 300 | if perfect_match_only is True: 301 | return perfect_match 302 | perfect_match_chunk_sz, ele_sz = auto_adjust_chunk_sz(src_file_sz, analyze) 303 | # Get hashes now 304 | src_dict, src_ccount = get_hashes(out1) 305 | dst_dict, dst_ccount = src_dict, src_ccount 306 | else: 307 | src_dict, src_ccount = get_hashes(out1) 308 | dst_dict, dst_ccount = get_hashes(out2) 309 | 310 | total_entry = len(src_dict) - 1 # Fix missing final ele 311 | np1 = np.array([v for v in src_dict.keys()]) 312 | np2 = np.array([v for v in dst_dict.keys()]) 313 | matched_keys = np.intersect1d(np1, np2) 314 | unmatched_keys = np.setdiff1d(np2, np1) 315 | total_bytes_deduped = 0 316 | 317 | if dry_run is False: 318 | # todo: Clear dict/np/list if there are not used further 319 | # todo : handle same content within single file 320 | 321 | if matched_keys is not None: 322 | if skip is False: 323 | bkup2 = subprocess.Popen( 324 | ['cp', '--reflink=always', dst_file, bkup_file], 325 | stdout=subprocess.PIPE) 326 | print("*" * 24) 327 | # print "matched regions" 328 | for location in matched_keys: 329 | entry = src_dict[location][0] 330 | src_len = no_of_chunks * blk_size * 1024 331 | assert src_len <= 16777216 # 16MB limit 332 | src_offset = src_dict[location][0] * src_len 333 | 334 | multi_dst_offsets = dst_dict[location] # list 335 | for offset in multi_dst_offsets: 336 | dst_offset = offset * src_len 337 | 338 | if entry == total_entry: # fix final ele 339 | src_len = src_file_sz - src_offset 340 | # print("matching chunk : src offset:"+str(src_offset) +" src_len="+ str(src_len) +" dest_off="+ str(dst_offset)) 341 | if fast_mode is True: 342 | s = struct.pack("qQQQ", src_fd, src_offset, src_len, 343 | dst_offset) 344 | ioctl_ficlonerange(dst_fd, s) 345 | total_bytes_deduped += src_len 346 | else: 347 | bytes_deduped = 0 348 | status = 0 349 | s = struct.pack("QQHHIqQQiH", src_offset, src_len, 1, 350 | 0, 0, dst_fd, dst_offset, 351 | bytes_deduped, status, 0) 352 | bytes_deduped,status = ioctl_fideduperange(src_fd, s) 353 | total_bytes_deduped += bytes_deduped 354 | #print("\n bytes_deduped= %d %d " % (bytes_deduped, status)) 355 | 356 | print("Dedupe completed for " + src_file + ":" + dst_file) 357 | sqlite_mark_processed(dst_file) 358 | 359 | # Verify original unmodified file and newly deduped file both point to same contents 360 | if skip is False: 361 | validate_results(src_file, dst_file, bkup_file) 362 | 363 | # Close open fds 364 | os.close(src_fd) 365 | os.close(dst_fd) 366 | 367 | return display_summary(blk_size, chunk_sz, perfect_match_chunk_sz, 368 | src_file, dst_file, src_ccount, dst_ccount, 369 | total_bytes_deduped, perfect_match, dry_run, 370 | src_dict, dst_dict, matched_keys, unmatched_keys) 371 | 372 | 373 | def display_summary(blk_size, chunk_sz, perfect_match_chunk_sz, src_file, 374 | dst_file, src_ccount, dst_ccount, total_bytes_deduped, 375 | perfect_match, dry_run, src_dict, dst_dict, matched_keys, 376 | unmatched_keys): 377 | global dst_file_sz 378 | if perfect_match == 1: 379 | chunk = perfect_match_chunk_sz 380 | total_bytes_deduped = dst_file_sz 381 | else: 382 | chunk = chunk_sz 383 | 384 | # Compute matched and unmatched chunk info on dst_dict 385 | matched_chunks = 0 386 | for k in matched_keys: 387 | matched_chunks += len(dst_dict.get(k)) 388 | unmatched_chunks = 0 389 | for k in unmatched_keys: 390 | unmatched_chunks += len(dst_dict.get(k)) 391 | 392 | if analyze is False: 393 | print("Summary") 394 | print("blk_size : %dKB chunksize : %dKB" % (blk_size, chunk)) 395 | if src_ccount == 0: 396 | print(src_file + " has " + str(len(src_dict)) + " chunks") 397 | else: 398 | print(src_file + " has " + str(len(src_dict) + src_ccount) + 399 | " chunks") 400 | if dst_ccount == 0: 401 | print(dst_file + " has " + str(len(dst_dict)) + " chunks") 402 | else: 403 | print(dst_file + " has " + str(len(dst_dict) + dst_ccount) + 404 | " chunks") 405 | print("Matched chunks: " + str(matched_chunks)) 406 | print("Unmatched chunks: " + str(unmatched_chunks)) 407 | 408 | avail_dedupe = matched_chunks * chunk 409 | 410 | if dry_run is False: 411 | print("Total size(KB) deduped: " + str(total_bytes_deduped // 1024)) 412 | elif analyze is False: 413 | print("Total size(KB) available for dedupe: %d " % (avail_dedupe)) 414 | 415 | if dst_file_sz == (avail_dedupe * 1024): 416 | # print "whole file deduped, remove this file from further op: " + str(dst_file) 417 | # override perfect match to remove this whole file. 418 | perfect_match = 1 419 | 420 | if analyze is True: 421 | if chunk_sz in analyze_dict: 422 | v = analyze_dict[chunk_sz] 423 | else: 424 | v = [] 425 | if perfect_match == 1: 426 | v.append(( 427 | src_file + ":" + dst_file, 428 | dst_file_sz // 1024, 429 | )) 430 | else: 431 | v.append(( 432 | src_file + ":" + dst_file, 433 | avail_dedupe, 434 | )) 435 | analyze_dict[chunk_sz] = v 436 | sys.stdout.write('[Analyzing] %s:%s \r' % 437 | (src_file, dst_file)) 438 | sys.stdout.flush() 439 | 440 | return perfect_match 441 | 442 | 443 | def validate_files(src_file, dst_file, processed_files): 444 | global dst_file_sz 445 | 446 | src_stat = os.stat(src_file) 447 | dst_stat = os.stat(dst_file) 448 | dst_file_sz = dst_stat.st_size 449 | global run_len 450 | if src_file in processed_files: 451 | return False 452 | if dst_file in processed_files: 453 | return False 454 | # Verify it's a unique regular file 455 | if (S_ISREG(src_stat.st_mode) == S_ISREG(dst_stat.st_mode) 456 | and (src_stat.st_ino != dst_stat.st_ino) 457 | and (src_stat.st_size >= 4096) and (dst_stat.st_size >= 4096)): 458 | # and (src_stat.st_size >= run_len) 459 | # and (dst_stat.st_size >= run_len)): 460 | return True 461 | if verbose is True: 462 | print("Skipped", src_file, dst_file, "not unique regular files or \ 463 | file size < 4kb") 464 | return False 465 | 466 | 467 | def dedupe_files(file_list, dry_run): 468 | ret = 0 469 | global processed_files 470 | if len(file_list) == 2: 471 | src_file = file_list[0] 472 | dst_file = file_list[1] 473 | if validate_files(src_file, dst_file, processed_files) is True: 474 | ret = do_dedupe(src_file, dst_file, dry_run) 475 | elif len(file_list) > 2: 476 | comb = combinations(file_list, 2) 477 | for f in comb: 478 | src_file = f[0] 479 | dst_file = f[1] 480 | if validate_files(src_file, dst_file, processed_files) is True: 481 | # print src_file + " <-Dedupe-> " + dst_file 482 | ret = do_dedupe(src_file, dst_file, dry_run) 483 | # pdb.set_trace() 484 | if ret == 1: 485 | # perfectly matching file found or while file content deduped - stop re-use this file again. 486 | # print "removing " + str(dst_file) 487 | processed_files.append(dst_file) 488 | #else: 489 | #print src_file + " <-SKIP Dedupe-> " + dst_file + "chunk_size:" + str(chunk_sz) 490 | else: 491 | print("Single file given or Empty directory. Try again with --recurse") 492 | return 493 | 494 | 495 | def validate_file(filename): 496 | global run_len 497 | if os.path.exists(filename) is False: 498 | return False 499 | file_stat = os.stat(filename) 500 | # Verify its a unique regular file 501 | if (S_ISREG(file_stat.st_mode) and (file_stat.st_size >= 4096)): 502 | # and (file_stat.st_size >= run_len)): 503 | return True 504 | else: 505 | if verbose is True: 506 | print("Skipped", filename, "not unique regular files or \ 507 | file size < 4kb ") 508 | return False 509 | 510 | def fetch_valid_unprocessed_files(): 511 | global cursor 512 | dupfiles=[] 513 | cursor.execute("select filename from filehash where valid=1 and processed=0;") 514 | exist = cursor.fetchall() 515 | if exist is None: 516 | print("No records matching valid=1 and processed=0") 517 | logging.debug("No records matching valid=1 and processed=0") 518 | return None 519 | else: 520 | return exist 521 | 522 | def detect_dupfiles(): 523 | global cursor 524 | dupfiles=[] 525 | cursor.execute("select short_hash from filehash group by short_hash having count(*) > 1;") 526 | exist = cursor.fetchall() 527 | if exist is None: 528 | #print("No duplicate files found") 529 | logging.debug("No duplicate files found") 530 | return None 531 | else: 532 | logging.debug("Duplicate files are:") 533 | for short_hash in exist: 534 | files=[] 535 | cursor.execute("SELECT filename FROM filehash WHERE short_hash = ?",(short_hash[0],)) 536 | filelist = cursor.fetchall() 537 | if filelist is not None: 538 | for f in filelist: 539 | #print(f[0]) 540 | files.append(f[0]) 541 | dupfiles.append(files) 542 | 543 | logging.debug(dupfiles) 544 | return dupfiles 545 | 546 | 547 | def populate_records(file_list): 548 | for fn in file_list: 549 | btrfs_dump_csum(fn) 550 | sqlite_mark_valid(fn) 551 | 552 | def dedupe_dir(dir_path, dry_run, recurse): 553 | logging.debug("Phase-1: Validating files and creating DB") 554 | file_list = [] 555 | if recurse is True: 556 | for dirname in dir_path: 557 | for path, dirs, files in os.walk(dirname): 558 | for filename in files: 559 | fn = os.path.join(path, filename) 560 | if validate_file(fn) is True: 561 | file_list.append(fn) 562 | else: 563 | for dirname in dir_path: 564 | for fi in os.listdir(dirname): 565 | if os.path.isfile(os.path.join(dirname, fi)): 566 | fn = os.path.join(dirname, fi) 567 | if validate_file(fn) is True: 568 | file_list.append(fn) 569 | 570 | logging.debug("Phase-1.1: Populate records using threads") 571 | populate_records(file_list) 572 | 573 | logging.debug("Phase-2: Detecting duplicate files") 574 | dupfiles = detect_dupfiles() 575 | logging.debug("Phase-3: Dedupe duplicate files") 576 | for f in dupfiles: 577 | logging.debug(f) 578 | dedupe_files(f, dry_run) 579 | 580 | remaining_items = fetch_valid_unprocessed_files() 581 | newfile_list = [filename for t in remaining_items for filename in t] 582 | logging.debug("Phase-4: Dedupe remaining files") 583 | logging.debug(newfile_list) 584 | dedupe_files(newfile_list, dry_run) 585 | 586 | 587 | def main(results): 588 | 589 | sqlite_connection_open() 590 | 591 | if results.file_list is not None: 592 | dedupe_files(results.file_list, results.dry_run) 593 | 594 | if results.dir_path is not None: 595 | dedupe_dir(results.dir_path, results.dry_run, results.recurse) 596 | 597 | sqlite_connection_close() 598 | return 599 | 600 | 601 | if __name__ == '__main__': 602 | parser = argparse.ArgumentParser() 603 | 604 | parser.add_argument('-p', 605 | '--device', 606 | action='store', 607 | dest='device_name', 608 | type=str, 609 | help='Device with BTRFS partition (ex: /dev/sda3) ', 610 | required=True) 611 | 612 | single = parser.add_mutually_exclusive_group() 613 | 614 | single.add_argument('-d', 615 | '--dir', 616 | action='store', 617 | dest='dir_path', 618 | nargs='+', 619 | type=str, 620 | help='Dedupe given directory or directories', 621 | required=False) 622 | 623 | single.add_argument('-f', 624 | '--files', 625 | action='store', 626 | dest='file_list', 627 | nargs='+', 628 | help='Dedupe list of files', 629 | type=str, 630 | required=False) 631 | 632 | parser.add_argument('-r', 633 | '--recurse', 634 | action='store_true', 635 | dest='recurse', 636 | help='Parse dir recursively (used along with -d)') 637 | 638 | parser.add_argument('-D', 639 | '--dry-run', 640 | action='store_true', 641 | dest='dry_run', 642 | help='Show summary of dedupe details') 643 | 644 | parser.add_argument('-s', 645 | '--skip', 646 | action='store_true', 647 | dest='skip', 648 | help='Will skip backup/validation process.') 649 | 650 | parser.add_argument('-c', 651 | '--chunk-size', 652 | action='store', 653 | dest='chunk_sz', 654 | type=int, 655 | default=128, 656 | help='Dedupe chunk size in KB', 657 | required=False) 658 | 659 | parser.add_argument('-v', 660 | '--version', 661 | action='version', 662 | version='%(prog)s 0.04', 663 | help="Show version info") 664 | 665 | parser.add_argument('-m', 666 | '--fast-mode', 667 | action='store_true', 668 | dest='fast_mode', 669 | help='Use ficlonerange call', 670 | default=False) 671 | 672 | parser.add_argument('-V', 673 | '--verbose', 674 | action='store_true', 675 | dest='verbose', 676 | help='Show logs messages') 677 | 678 | parser.add_argument('-P', 679 | '--perfect-match-only', 680 | action='store_true', 681 | dest='perfect_match_only', 682 | help='find perfect match files') 683 | 684 | parser.add_argument( 685 | '-a', 686 | '--analyze', 687 | action='store_true', 688 | dest='analyze', 689 | help='Report deduplicate data status with different chunk size') 690 | 691 | results = parser.parse_args() 692 | 693 | if not (results.dir_path or results.file_list): 694 | parser.error('No action requested, add --files or --dir') 695 | 696 | device_name = results.device_name 697 | skip = results.skip 698 | chunk_sz = results.chunk_sz 699 | fast_mode = results.fast_mode 700 | ele_sz = get_ele_size(chunk_sz) 701 | verbose = results.verbose 702 | perfect_match_only = results.perfect_match_only 703 | start = timer() 704 | if fast_mode is False: 705 | skip = True 706 | if perfect_match_only is True: 707 | print("Find duplicate files...") 708 | 709 | if results.analyze is True: 710 | results.dry_run = True 711 | analyze = True 712 | for sz in 128, 256, 512, 1024, 2048, 4096, 8192, 16384: 713 | chunk_sz = sz 714 | ele_sz = get_ele_size(chunk_sz) 715 | main(results) 716 | del processed_files[:] # clear list for next chunk_sz iteration 717 | for k, v in analyze_dict.items(): 718 | total_sz = 0 719 | table = PrettyTable() 720 | table.field_names = ["Chunk Size(KB)", "Files", "Duplicate(KB)"] 721 | for v1 in v: 722 | table_row = [] 723 | table_row.append(k) 724 | f, z = v1 725 | total_sz += z 726 | table_row.append(str(f)) 727 | table_row.append(z) 728 | table.add_row(table_row) 729 | print(table) 730 | print( 731 | "dduper:%sKB of duplicate data found with chunk size:%dKB \n\n" 732 | % (total_sz, k)) 733 | else: 734 | main(results) 735 | print("dduper took " + str(timer() - start) + " seconds") 736 | -------------------------------------------------------------------------------- /patch/btrfs-progs-v5.12.1/0001-Print-csum-for-a-given-file-on-stdout.patch: -------------------------------------------------------------------------------- 1 | From d1aa1e923c96be90690f5ea221f1c46c57c6ff62 Mon Sep 17 00:00:00 2001 2 | From: "lakshmipathi.g" 3 | Date: Tue, 18 May 2021 14:52:41 +0530 4 | Subject: [PATCH] Print csum for a given file on stdout. 5 | 6 | Signed-off-by: lakshmipathi.g 7 | --- 8 | Makefile | 3 +- 9 | cmds/commands.h | 1 + 10 | cmds/inspect-dump-csum.c | 244 +++++++++++++++++++++++++++++++++++++++ 11 | cmds/inspect.c | 1 + 12 | 4 files changed, 248 insertions(+), 1 deletion(-) 13 | create mode 100644 cmds/inspect-dump-csum.c 14 | 15 | diff --git a/Makefile b/Makefile 16 | index a1cc457b..763dc73f 100644 17 | --- a/Makefile 18 | +++ b/Makefile 19 | @@ -157,7 +157,8 @@ cmds_objects = cmds/subvolume.o cmds/filesystem.o cmds/device.o cmds/scrub.o \ 20 | cmds/rescue-super-recover.o \ 21 | cmds/property.o cmds/filesystem-usage.o cmds/inspect-dump-tree.o \ 22 | cmds/inspect-dump-super.o cmds/inspect-tree-stats.o cmds/filesystem-du.o \ 23 | - mkfs/common.o check/mode-common.o check/mode-lowmem.o 24 | + mkfs/common.o check/mode-common.o check/mode-lowmem.o \ 25 | + cmds/inspect-dump-csum.o 26 | libbtrfs_objects = common/send-stream.o common/send-utils.o kernel-lib/rbtree.o btrfs-list.o \ 27 | kernel-lib/radix-tree.o common/extent-cache.o kernel-shared/extent_io.o \ 28 | crypto/crc32c.o common/messages.o \ 29 | diff --git a/cmds/commands.h b/cmds/commands.h 30 | index 8fa85d6c..350f456a 100644 31 | --- a/cmds/commands.h 32 | +++ b/cmds/commands.h 33 | @@ -140,6 +140,7 @@ DECLARE_COMMAND(check); 34 | DECLARE_COMMAND(chunk_recover); 35 | DECLARE_COMMAND(super_recover); 36 | DECLARE_COMMAND(inspect); 37 | +DECLARE_COMMAND(inspect_dump_csum); 38 | DECLARE_COMMAND(inspect_dump_super); 39 | DECLARE_COMMAND(inspect_dump_tree); 40 | DECLARE_COMMAND(inspect_tree_stats); 41 | diff --git a/cmds/inspect-dump-csum.c b/cmds/inspect-dump-csum.c 42 | new file mode 100644 43 | index 00000000..093f9234 44 | --- /dev/null 45 | +++ b/cmds/inspect-dump-csum.c 46 | @@ -0,0 +1,244 @@ 47 | +/* 48 | + * This program is free software; you can redistribute it and/or 49 | + * modify it under the terms of the GNU General Public 50 | + * License v2 as published by the Free Software Foundation. 51 | + * 52 | + * This program is distributed in the hope that it will be useful, 53 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of 54 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 55 | + * General Public License for more details. 56 | + * 57 | + * You should have received a copy of the GNU General Public 58 | + * License along with this program; if not, write to the 59 | + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 60 | + * Boston, MA 021110-1307, USA. 61 | + */ 62 | + 63 | +#include "kerncompat.h" 64 | +#include 65 | +#include 66 | +#include 67 | +#include 68 | +#include 69 | +#include 70 | +#include 71 | +#include 72 | +#include 73 | + 74 | +#include "kernel-shared/ctree.h" 75 | +#include "kernel-shared/disk-io.h" 76 | +#include "kernel-shared/print-tree.h" 77 | +#include "kernel-shared/transaction.h" 78 | +#include "kernel-lib/list.h" 79 | +#include "common/utils.h" 80 | +#include "cmds/commands.h" 81 | +#include "crypto/crc32c.h" 82 | +#include "common/help.h" 83 | +#include "kernel-shared/volumes.h" 84 | + 85 | + 86 | +const char * const cmd_inspect_dump_csum_usage[] = { 87 | + "btrfs inspect-internal dump-csum ", 88 | + "Get csums for the given file.", 89 | + NULL 90 | +}; 91 | + 92 | +int btrfs_lookup_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, 93 | + struct btrfs_path *path, u64 bytenr, int cow, int total_csums) 94 | +{ 95 | + int ret; 96 | + int i; 97 | + int start_pos = 0; 98 | + struct btrfs_key file_key; 99 | + struct btrfs_key found_key; 100 | + struct btrfs_csum_item *item; 101 | + struct extent_buffer *leaf; 102 | + u64 csum_offset = 0; 103 | + u16 csum_size = 104 | + btrfs_super_csum_size(root->fs_info->super_copy); 105 | + int csums_in_item = 0; 106 | + unsigned int tree_csum = 0; 107 | + int pending_csums = total_csums; 108 | + static int cnt=1; 109 | + 110 | + file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 111 | + file_key.offset = bytenr; 112 | + file_key.type = BTRFS_EXTENT_CSUM_KEY; 113 | + ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); 114 | + if (ret < 0) 115 | + goto fail; 116 | + while(1){ 117 | + leaf = path->nodes[0]; 118 | + if (ret > 0) { 119 | + ret = 1; 120 | + if (path->slots[0] == 0) 121 | + goto fail; 122 | + path->slots[0]--; 123 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 124 | + if (found_key.type != BTRFS_EXTENT_CSUM_KEY){ 125 | + fprintf(stderr, "\nInvalid key found."); 126 | + goto fail; 127 | + } 128 | + 129 | + csum_offset = ((bytenr - found_key.offset) / root->fs_info->sectorsize) * csum_size; 130 | + csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); 131 | + csums_in_item /= csum_size; 132 | + csums_in_item -= ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 133 | + start_pos=csum_offset; 134 | + } 135 | + if (path->slots[0] >= btrfs_header_nritems(leaf)) { 136 | + if (pending_csums > 0){ 137 | + ret = btrfs_next_leaf(root, path); 138 | + if (ret == 0) 139 | + continue; 140 | + } 141 | + } 142 | + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 143 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 144 | + if (!ret){ 145 | + start_pos=0; 146 | + csum_offset = ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 147 | + csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); 148 | + csums_in_item /= csum_size; 149 | + } 150 | + if (csums_in_item > pending_csums){ 151 | + //possibly,some other csums on this item. 152 | + for(i = 0; i < pending_csums; i++, cnt++){ 153 | + read_extent_buffer(leaf, &tree_csum, 154 | + (unsigned long)item + ((i*csum_size)+start_pos) , csum_size); 155 | + fprintf(stdout, "%x ", tree_csum); 156 | + if (cnt % 8 == 0) 157 | + fprintf(stdout, "\n"); 158 | + } 159 | + pending_csums = 0; 160 | + return 0; 161 | + }else{ 162 | + for(i = 0; i < csums_in_item; i++, cnt++){ 163 | + read_extent_buffer(leaf, &tree_csum, 164 | + (unsigned long)item+((i*csum_size)+start_pos), csum_size); 165 | + fprintf(stdout, "%x ", tree_csum); 166 | + if (cnt % 8 == 0) 167 | + fprintf(stdout, "\n"); 168 | + } 169 | + } 170 | + pending_csums -= csums_in_item; 171 | + ret = 0; 172 | + if (pending_csums > 0){ 173 | + path->slots[0]++; 174 | + 175 | + }else 176 | + return 0; 177 | + } 178 | +fail: 179 | + fprintf(stderr, "btrfs_lookup_csums search failed."); 180 | + if (ret > 0) 181 | + ret = -ENOENT; 182 | + return ret; 183 | +} 184 | + 185 | +int btrfs_lookup_extent(struct btrfs_fs_info *info, struct btrfs_path *path, 186 | + u64 ino, int cow){ 187 | + struct btrfs_key key; 188 | + struct btrfs_key found_key; 189 | + struct btrfs_file_extent_item *fi; 190 | + struct extent_buffer *leaf; 191 | + struct btrfs_root *fs_root; 192 | + int ret = -1; 193 | + int slot; 194 | + int total_csums = 0; 195 | + u64 bytenr; 196 | + u64 itemnum = 0; 197 | + struct btrfs_path *path1 = NULL; 198 | + 199 | + fs_root = info->fs_root; 200 | + key.objectid = ino; 201 | + key.type = BTRFS_EXTENT_DATA_KEY; 202 | + key.offset = 0; 203 | + ret = btrfs_search_slot(NULL,fs_root,&key,path,0,0); 204 | + 205 | + if(ret < 0) 206 | + goto error; 207 | + 208 | + if (ret > 1){ 209 | + fprintf(stderr, "Unable to find the entry"); 210 | + return ret; 211 | + } 212 | + u16 csum_size = btrfs_super_csum_size(info->csum_root->fs_info->super_copy); 213 | + while(1){ 214 | + leaf = path->nodes[0]; 215 | + slot = path->slots[0]; 216 | + if (slot >= btrfs_header_nritems(leaf)){ 217 | + ret = btrfs_next_leaf(fs_root, path); 218 | + if (ret == 0) 219 | + continue; 220 | + if (ret < 0) 221 | + goto error; 222 | + } 223 | + btrfs_item_key_to_cpu(leaf, &found_key, slot); 224 | + if (found_key.type != BTRFS_EXTENT_DATA_KEY){ 225 | + btrfs_release_path(path); 226 | + return -EINVAL; 227 | + } 228 | + 229 | + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 230 | + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 231 | + total_csums=(btrfs_file_extent_num_bytes(leaf, fi) / 1024) / csum_size; 232 | + path->slots[0]++; 233 | + itemnum++; 234 | + path1 = btrfs_alloc_path(); 235 | + ret = btrfs_lookup_csums(NULL,info->csum_root, path1, bytenr, 0, 236 | + total_csums); 237 | + btrfs_release_path(path1); 238 | + if (ret) { 239 | + fprintf(stderr, "\n Error: btrfs_lookup_csum"); 240 | + return 1; 241 | + } 242 | + } 243 | + 244 | +error: 245 | + btrfs_release_path(path); 246 | + return ret; 247 | +} 248 | + 249 | +//int cmd_inspect_dump_csum(int argc, char **argv) 250 | +static int cmd_inspect_dump_csum(const struct cmd_struct *cmd, 251 | + int argc, char **argv) 252 | +{ 253 | + struct btrfs_fs_info *info; 254 | + int ret; 255 | + struct btrfs_path path; 256 | + struct stat st; 257 | + char *filename; 258 | + struct open_ctree_flags ocf = { 0 }; 259 | + ocf.flags = OPEN_CTREE_PARTIAL; 260 | + ocf.filename = argv[2]; 261 | + 262 | + if (check_argc_exact(argc, 3)) 263 | + usage_unknown_option(cmd, argv); 264 | + 265 | + filename = argv[1]; 266 | + info = open_ctree_fs_info(&ocf); 267 | + if (!info) { 268 | + fprintf(stderr, "unable to open %s\n", argv[2]); 269 | + exit(1); 270 | + } 271 | + 272 | + ret = stat(filename, &st); 273 | + if (ret < 0) { 274 | + fprintf(stderr, "unable to open %s\n", filename); 275 | + exit(1); 276 | + } 277 | + 278 | + if(st.st_size < 1024){ 279 | + fprintf(stderr, "file less than 1KB.abort%lu", (st.st_size )); 280 | + exit(1); 281 | + } 282 | + 283 | + btrfs_init_path(&path); 284 | + ret = btrfs_lookup_extent(info, &path, st.st_ino, 0); 285 | + ret = close_ctree(info->fs_root); 286 | + btrfs_close_all_devices(); 287 | + 288 | + return ret; 289 | +} 290 | +DEFINE_SIMPLE_COMMAND(inspect_dump_csum, "dump-csum"); 291 | diff --git a/cmds/inspect.c b/cmds/inspect.c 292 | index 76d3936f..574077d3 100644 293 | --- a/cmds/inspect.c 294 | +++ b/cmds/inspect.c 295 | @@ -696,6 +696,7 @@ static const struct cmd_group inspect_cmd_group = { 296 | &cmd_struct_inspect_dump_tree, 297 | &cmd_struct_inspect_dump_super, 298 | &cmd_struct_inspect_tree_stats, 299 | + &cmd_struct_inspect_dump_csum, 300 | NULL 301 | } 302 | }; 303 | -- 304 | 2.30.2 305 | 306 | -------------------------------------------------------------------------------- /patch/btrfs-progs-v5.16/0001-Print-csum-for-a-given-file-on-stdout.patch: -------------------------------------------------------------------------------- 1 | From 134ffe099a5d0dcf6ce2668f4db2291c60066172 Mon Sep 17 00:00:00 2001 2 | From: "lakshmipathi.g" 3 | Date: Tue, 18 May 2021 14:52:41 +0530 4 | Subject: [PATCH] Print csum for a given file on stdout. 5 | 6 | Signed-off-by: lakshmipathi.g 7 | --- 8 | Makefile | 2 +- 9 | cmds/commands.h | 1 + 10 | cmds/inspect-dump-csum.c | 246 +++++++++++++++++++++++++++++++++++++++ 11 | cmds/inspect.c | 1 + 12 | 4 files changed, 249 insertions(+), 1 deletion(-) 13 | create mode 100644 cmds/inspect-dump-csum.c 14 | 15 | diff --git a/Makefile b/Makefile 16 | index a75d9ad8..6a597f33 100644 17 | --- a/Makefile 18 | +++ b/Makefile 19 | @@ -196,7 +196,7 @@ objects = \ 20 | libbtrfsutil/stubs.o \ 21 | libbtrfsutil/subvolume.o 22 | 23 | -cmds_objects = cmds/subvolume.o cmds/subvolume-list.o \ 24 | +cmds_objects = cmds/subvolume.o cmds/subvolume-list.o cmds/inspect-dump-csum.o \ 25 | cmds/filesystem.o cmds/device.o cmds/scrub.o \ 26 | cmds/inspect.o cmds/balance.o cmds/send.o cmds/receive.o \ 27 | cmds/quota.o cmds/qgroup.o cmds/replace.o check/main.o \ 28 | diff --git a/cmds/commands.h b/cmds/commands.h 29 | index 9ec50136..64b22456 100644 30 | --- a/cmds/commands.h 31 | +++ b/cmds/commands.h 32 | @@ -139,6 +139,7 @@ DECLARE_COMMAND(device); 33 | DECLARE_COMMAND(scrub); 34 | DECLARE_COMMAND(check); 35 | DECLARE_COMMAND(inspect); 36 | +DECLARE_COMMAND(inspect_dump_csum); 37 | DECLARE_COMMAND(inspect_dump_super); 38 | DECLARE_COMMAND(inspect_dump_tree); 39 | DECLARE_COMMAND(inspect_tree_stats); 40 | diff --git a/cmds/inspect-dump-csum.c b/cmds/inspect-dump-csum.c 41 | new file mode 100644 42 | index 00000000..e95521e2 43 | --- /dev/null 44 | +++ b/cmds/inspect-dump-csum.c 45 | @@ -0,0 +1,246 @@ 46 | +/* 47 | + * This program is free software; you can redistribute it and/or 48 | + * modify it under the terms of the GNU General Public 49 | + * License v2 as published by the Free Software Foundation. 50 | + * 51 | + * This program is distributed in the hope that it will be useful, 52 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of 53 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 54 | + * General Public License for more details. 55 | + * 56 | + * You should have received a copy of the GNU General Public 57 | + * License along with this program; if not, write to the 58 | + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 59 | + * Boston, MA 021110-1307, USA. 60 | + */ 61 | + 62 | +#include "kerncompat.h" 63 | +#include 64 | +#include 65 | +#include 66 | +#include 67 | +#include 68 | +#include 69 | +#include 70 | +#include 71 | +#include 72 | + 73 | +#include "kernel-shared/ctree.h" 74 | +#include "kernel-shared/disk-io.h" 75 | +#include "kernel-shared/print-tree.h" 76 | +#include "kernel-shared/transaction.h" 77 | +#include "kernel-lib/list.h" 78 | +#include "common/utils.h" 79 | +#include "cmds/commands.h" 80 | +#include "crypto/crc32c.h" 81 | +#include "common/help.h" 82 | +#include "kernel-shared/volumes.h" 83 | + 84 | + 85 | +const char * const cmd_inspect_dump_csum_usage[] = { 86 | + "btrfs inspect-internal dump-csum ", 87 | + "Get csums for the given file.", 88 | + NULL 89 | +}; 90 | + 91 | +int btrfs_lookup_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, 92 | + struct btrfs_path *path, u64 bytenr, int cow, int total_csums) 93 | +{ 94 | + int ret; 95 | + int i; 96 | + int start_pos = 0; 97 | + struct btrfs_key file_key; 98 | + struct btrfs_key found_key; 99 | + struct btrfs_csum_item *item; 100 | + struct extent_buffer *leaf; 101 | + u64 csum_offset = 0; 102 | + u16 csum_size = 103 | + btrfs_super_csum_size(root->fs_info->super_copy); 104 | + int csums_in_item = 0; 105 | + unsigned int tree_csum = 0; 106 | + int pending_csums = total_csums; 107 | + static int cnt=1; 108 | + 109 | + file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 110 | + file_key.offset = bytenr; 111 | + file_key.type = BTRFS_EXTENT_CSUM_KEY; 112 | + ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); 113 | + if (ret < 0) 114 | + goto fail; 115 | + while(1){ 116 | + leaf = path->nodes[0]; 117 | + if (ret > 0) { 118 | + ret = 1; 119 | + if (path->slots[0] == 0) 120 | + goto fail; 121 | + path->slots[0]--; 122 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 123 | + if (found_key.type != BTRFS_EXTENT_CSUM_KEY){ 124 | + fprintf(stderr, "\nInvalid key found."); 125 | + goto fail; 126 | + } 127 | + 128 | + csum_offset = ((bytenr - found_key.offset) / root->fs_info->sectorsize) * csum_size; 129 | + csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); 130 | + csums_in_item /= csum_size; 131 | + csums_in_item -= ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 132 | + start_pos=csum_offset; 133 | + } 134 | + if (path->slots[0] >= btrfs_header_nritems(leaf)) { 135 | + if (pending_csums > 0){ 136 | + ret = btrfs_next_leaf(root, path); 137 | + if (ret == 0) 138 | + continue; 139 | + } 140 | + } 141 | + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 142 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 143 | + if (!ret){ 144 | + start_pos=0; 145 | + csum_offset = ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 146 | + csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); 147 | + csums_in_item /= csum_size; 148 | + } 149 | + if (csums_in_item > pending_csums){ 150 | + //possibly,some other csums on this item. 151 | + for(i = 0; i < pending_csums; i++, cnt++){ 152 | + read_extent_buffer(leaf, &tree_csum, 153 | + (unsigned long)item + ((i*csum_size)+start_pos) , csum_size); 154 | + fprintf(stdout, "%x ", tree_csum); 155 | + if (cnt % 8 == 0) 156 | + fprintf(stdout, "\n"); 157 | + } 158 | + pending_csums = 0; 159 | + return 0; 160 | + }else{ 161 | + for(i = 0; i < csums_in_item; i++, cnt++){ 162 | + read_extent_buffer(leaf, &tree_csum, 163 | + (unsigned long)item+((i*csum_size)+start_pos), csum_size); 164 | + fprintf(stdout, "%x ", tree_csum); 165 | + if (cnt % 8 == 0) 166 | + fprintf(stdout, "\n"); 167 | + } 168 | + } 169 | + pending_csums -= csums_in_item; 170 | + ret = 0; 171 | + if (pending_csums > 0){ 172 | + path->slots[0]++; 173 | + 174 | + }else 175 | + return 0; 176 | + } 177 | +fail: 178 | + fprintf(stderr, "btrfs_lookup_csums search failed."); 179 | + if (ret > 0) 180 | + ret = -ENOENT; 181 | + return ret; 182 | +} 183 | + 184 | +int btrfs_lookup_extent(struct btrfs_fs_info *info, struct btrfs_path *path, 185 | + u64 ino, int cow){ 186 | + struct btrfs_key key; 187 | + struct btrfs_key found_key; 188 | + struct btrfs_file_extent_item *fi; 189 | + struct extent_buffer *leaf; 190 | + struct btrfs_root *fs_root; 191 | + int ret = -1; 192 | + int slot; 193 | + int total_csums = 0; 194 | + u64 bytenr; 195 | + u64 itemnum = 0; 196 | + struct btrfs_path *path1 = NULL; 197 | + 198 | + fs_root = info->fs_root; 199 | + key.objectid = ino; 200 | + key.type = BTRFS_EXTENT_DATA_KEY; 201 | + key.offset = 0; 202 | + ret = btrfs_search_slot(NULL,fs_root,&key,path,0,0); 203 | + 204 | + if(ret < 0) 205 | + goto error; 206 | + 207 | + if (ret > 1){ 208 | + fprintf(stderr, "Unable to find the entry"); 209 | + return ret; 210 | + } 211 | + struct btrfs_root *csum_root = btrfs_csum_root(info, 0); 212 | + u16 csum_size = btrfs_super_csum_size(csum_root->fs_info->super_copy); 213 | + while(1){ 214 | + leaf = path->nodes[0]; 215 | + slot = path->slots[0]; 216 | + if (slot >= btrfs_header_nritems(leaf)){ 217 | + ret = btrfs_next_leaf(fs_root, path); 218 | + if (ret == 0) 219 | + continue; 220 | + if (ret < 0) 221 | + goto error; 222 | + } 223 | + btrfs_item_key_to_cpu(leaf, &found_key, slot); 224 | + if (found_key.type != BTRFS_EXTENT_DATA_KEY){ 225 | + btrfs_release_path(path); 226 | + return -EINVAL; 227 | + } 228 | + 229 | + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 230 | + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 231 | + total_csums=(btrfs_file_extent_num_bytes(leaf, fi) / 1024) / csum_size; 232 | + path->slots[0]++; 233 | + itemnum++; 234 | + path1 = btrfs_alloc_path(); 235 | + struct btrfs_root *csum_root = btrfs_csum_root(info, 0); 236 | + ret = btrfs_lookup_csums(NULL,csum_root, path1, bytenr, 0, 237 | + total_csums); 238 | + btrfs_release_path(path1); 239 | + if (ret) { 240 | + fprintf(stderr, "\n Error: btrfs_lookup_csum"); 241 | + return 1; 242 | + } 243 | + } 244 | + 245 | +error: 246 | + btrfs_release_path(path); 247 | + return ret; 248 | +} 249 | + 250 | +//int cmd_inspect_dump_csum(int argc, char **argv) 251 | +static int cmd_inspect_dump_csum(const struct cmd_struct *cmd, 252 | + int argc, char **argv) 253 | +{ 254 | + struct btrfs_fs_info *info; 255 | + int ret; 256 | + struct btrfs_path path; 257 | + struct stat st; 258 | + char *filename; 259 | + struct open_ctree_flags ocf = { 0 }; 260 | + ocf.flags = OPEN_CTREE_PARTIAL; 261 | + ocf.filename = argv[2]; 262 | + 263 | + if (check_argc_exact(argc, 3)) 264 | + usage_unknown_option(cmd, argv); 265 | + 266 | + filename = argv[1]; 267 | + info = open_ctree_fs_info(&ocf); 268 | + if (!info) { 269 | + fprintf(stderr, "unable to open %s\n", argv[2]); 270 | + exit(1); 271 | + } 272 | + 273 | + ret = stat(filename, &st); 274 | + if (ret < 0) { 275 | + fprintf(stderr, "unable to open %s\n", filename); 276 | + exit(1); 277 | + } 278 | + 279 | + if(st.st_size < 1024){ 280 | + fprintf(stderr, "file less than 1KB.abort%lu", (st.st_size )); 281 | + exit(1); 282 | + } 283 | + 284 | + btrfs_init_path(&path); 285 | + ret = btrfs_lookup_extent(info, &path, st.st_ino, 0); 286 | + ret = close_ctree(info->fs_root); 287 | + btrfs_close_all_devices(); 288 | + 289 | + return ret; 290 | +} 291 | +DEFINE_SIMPLE_COMMAND(inspect_dump_csum, "dump-csum"); 292 | diff --git a/cmds/inspect.c b/cmds/inspect.c 293 | index 1534f204..79db1e0f 100644 294 | --- a/cmds/inspect.c 295 | +++ b/cmds/inspect.c 296 | @@ -695,6 +695,7 @@ static const struct cmd_group inspect_cmd_group = { 297 | &cmd_struct_inspect_dump_tree, 298 | &cmd_struct_inspect_dump_super, 299 | &cmd_struct_inspect_tree_stats, 300 | + &cmd_struct_inspect_dump_csum, 301 | NULL 302 | } 303 | }; 304 | -- 305 | 2.39.1 306 | 307 | -------------------------------------------------------------------------------- /patch/btrfs-progs-v5.18/0001-Print-csum-for-a-given-file-on-stdout.patch: -------------------------------------------------------------------------------- 1 | From c02e5701d988b095735cdf450d0e016540b811df Mon Sep 17 00:00:00 2001 2 | From: "lakshmipathi.g" 3 | Date: Tue, 18 May 2021 14:52:41 +0530 4 | Subject: [PATCH] Print csum for a given file on stdout. 5 | 6 | Signed-off-by: lakshmipathi.g 7 | --- 8 | Makefile | 2 +- 9 | cmds/commands.h | 1 + 10 | cmds/inspect-dump-csum.c | 246 +++++++++++++++++++++++++++++++++++++++ 11 | cmds/inspect.c | 1 + 12 | 4 files changed, 249 insertions(+), 1 deletion(-) 13 | create mode 100644 cmds/inspect-dump-csum.c 14 | 15 | diff --git a/Makefile b/Makefile 16 | index 1223ba6d..ecfe2afb 100644 17 | --- a/Makefile 18 | +++ b/Makefile 19 | @@ -196,7 +196,7 @@ objects = \ 20 | libbtrfsutil/stubs.o \ 21 | libbtrfsutil/subvolume.o 22 | 23 | -cmds_objects = cmds/subvolume.o cmds/subvolume-list.o \ 24 | +cmds_objects = cmds/subvolume.o cmds/subvolume-list.o cmds/inspect-dump-csum.o \ 25 | cmds/filesystem.o cmds/device.o cmds/scrub.o \ 26 | cmds/inspect.o cmds/balance.o cmds/send.o cmds/receive.o \ 27 | cmds/quota.o cmds/qgroup.o cmds/replace.o check/main.o \ 28 | diff --git a/cmds/commands.h b/cmds/commands.h 29 | index 9ec50136..64b22456 100644 30 | --- a/cmds/commands.h 31 | +++ b/cmds/commands.h 32 | @@ -139,6 +139,7 @@ DECLARE_COMMAND(device); 33 | DECLARE_COMMAND(scrub); 34 | DECLARE_COMMAND(check); 35 | DECLARE_COMMAND(inspect); 36 | +DECLARE_COMMAND(inspect_dump_csum); 37 | DECLARE_COMMAND(inspect_dump_super); 38 | DECLARE_COMMAND(inspect_dump_tree); 39 | DECLARE_COMMAND(inspect_tree_stats); 40 | diff --git a/cmds/inspect-dump-csum.c b/cmds/inspect-dump-csum.c 41 | new file mode 100644 42 | index 00000000..f46b23c4 43 | --- /dev/null 44 | +++ b/cmds/inspect-dump-csum.c 45 | @@ -0,0 +1,246 @@ 46 | +/* 47 | + * This program is free software; you can redistribute it and/or 48 | + * modify it under the terms of the GNU General Public 49 | + * License v2 as published by the Free Software Foundation. 50 | + * 51 | + * This program is distributed in the hope that it will be useful, 52 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of 53 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 54 | + * General Public License for more details. 55 | + * 56 | + * You should have received a copy of the GNU General Public 57 | + * License along with this program; if not, write to the 58 | + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 59 | + * Boston, MA 021110-1307, USA. 60 | + */ 61 | + 62 | +#include "kerncompat.h" 63 | +#include 64 | +#include 65 | +#include 66 | +#include 67 | +#include 68 | +#include 69 | +#include 70 | +#include 71 | +#include 72 | + 73 | +#include "kernel-shared/ctree.h" 74 | +#include "kernel-shared/disk-io.h" 75 | +#include "kernel-shared/print-tree.h" 76 | +#include "kernel-shared/transaction.h" 77 | +#include "kernel-lib/list.h" 78 | +#include "common/utils.h" 79 | +#include "cmds/commands.h" 80 | +#include "crypto/crc32c.h" 81 | +#include "common/help.h" 82 | +#include "kernel-shared/volumes.h" 83 | + 84 | + 85 | +const char * const cmd_inspect_dump_csum_usage[] = { 86 | + "btrfs inspect-internal dump-csum ", 87 | + "Get csums for the given file.", 88 | + NULL 89 | +}; 90 | + 91 | +int btrfs_lookup_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, 92 | + struct btrfs_path *path, u64 bytenr, int cow, int total_csums) 93 | +{ 94 | + int ret; 95 | + int i; 96 | + int start_pos = 0; 97 | + struct btrfs_key file_key; 98 | + struct btrfs_key found_key; 99 | + struct btrfs_csum_item *item; 100 | + struct extent_buffer *leaf; 101 | + u64 csum_offset = 0; 102 | + u16 csum_size = 103 | + btrfs_super_csum_size(root->fs_info->super_copy); 104 | + int csums_in_item = 0; 105 | + unsigned int tree_csum = 0; 106 | + int pending_csums = total_csums; 107 | + static int cnt=1; 108 | + 109 | + file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 110 | + file_key.offset = bytenr; 111 | + file_key.type = BTRFS_EXTENT_CSUM_KEY; 112 | + ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); 113 | + if (ret < 0) 114 | + goto fail; 115 | + while(1){ 116 | + leaf = path->nodes[0]; 117 | + if (ret > 0) { 118 | + ret = 1; 119 | + if (path->slots[0] == 0) 120 | + goto fail; 121 | + path->slots[0]--; 122 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 123 | + if (found_key.type != BTRFS_EXTENT_CSUM_KEY){ 124 | + fprintf(stderr, "\nInvalid key found."); 125 | + goto fail; 126 | + } 127 | + 128 | + csum_offset = ((bytenr - found_key.offset) / root->fs_info->sectorsize) * csum_size; 129 | + csums_in_item = btrfs_item_size(leaf, path->slots[0]); 130 | + csums_in_item /= csum_size; 131 | + csums_in_item -= ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 132 | + start_pos=csum_offset; 133 | + } 134 | + if (path->slots[0] >= btrfs_header_nritems(leaf)) { 135 | + if (pending_csums > 0){ 136 | + ret = btrfs_next_leaf(root, path); 137 | + if (ret == 0) 138 | + continue; 139 | + } 140 | + } 141 | + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 142 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 143 | + if (!ret){ 144 | + start_pos=0; 145 | + csum_offset = ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 146 | + csums_in_item = btrfs_item_size(leaf, path->slots[0]); 147 | + csums_in_item /= csum_size; 148 | + } 149 | + if (csums_in_item > pending_csums){ 150 | + //possibly,some other csums on this item. 151 | + for(i = 0; i < pending_csums; i++, cnt++){ 152 | + read_extent_buffer(leaf, &tree_csum, 153 | + (unsigned long)item + ((i*csum_size)+start_pos) , csum_size); 154 | + fprintf(stdout, "%x ", tree_csum); 155 | + if (cnt % 8 == 0) 156 | + fprintf(stdout, "\n"); 157 | + } 158 | + pending_csums = 0; 159 | + return 0; 160 | + }else{ 161 | + for(i = 0; i < csums_in_item; i++, cnt++){ 162 | + read_extent_buffer(leaf, &tree_csum, 163 | + (unsigned long)item+((i*csum_size)+start_pos), csum_size); 164 | + fprintf(stdout, "%x ", tree_csum); 165 | + if (cnt % 8 == 0) 166 | + fprintf(stdout, "\n"); 167 | + } 168 | + } 169 | + pending_csums -= csums_in_item; 170 | + ret = 0; 171 | + if (pending_csums > 0){ 172 | + path->slots[0]++; 173 | + 174 | + }else 175 | + return 0; 176 | + } 177 | +fail: 178 | + fprintf(stderr, "btrfs_lookup_csums search failed."); 179 | + if (ret > 0) 180 | + ret = -ENOENT; 181 | + return ret; 182 | +} 183 | + 184 | +int btrfs_lookup_extent(struct btrfs_fs_info *info, struct btrfs_path *path, 185 | + u64 ino, int cow){ 186 | + struct btrfs_key key; 187 | + struct btrfs_key found_key; 188 | + struct btrfs_file_extent_item *fi; 189 | + struct extent_buffer *leaf; 190 | + struct btrfs_root *fs_root; 191 | + int ret = -1; 192 | + int slot; 193 | + int total_csums = 0; 194 | + u64 bytenr; 195 | + u64 itemnum = 0; 196 | + struct btrfs_path *path1 = NULL; 197 | + 198 | + fs_root = info->fs_root; 199 | + key.objectid = ino; 200 | + key.type = BTRFS_EXTENT_DATA_KEY; 201 | + key.offset = 0; 202 | + ret = btrfs_search_slot(NULL,fs_root,&key,path,0,0); 203 | + 204 | + if(ret < 0) 205 | + goto error; 206 | + 207 | + if (ret > 1){ 208 | + fprintf(stderr, "Unable to find the entry"); 209 | + return ret; 210 | + } 211 | + struct btrfs_root *csum_root = btrfs_csum_root(info, 0); 212 | + u16 csum_size = btrfs_super_csum_size(csum_root->fs_info->super_copy); 213 | + while(1){ 214 | + leaf = path->nodes[0]; 215 | + slot = path->slots[0]; 216 | + if (slot >= btrfs_header_nritems(leaf)){ 217 | + ret = btrfs_next_leaf(fs_root, path); 218 | + if (ret == 0) 219 | + continue; 220 | + if (ret < 0) 221 | + goto error; 222 | + } 223 | + btrfs_item_key_to_cpu(leaf, &found_key, slot); 224 | + if (found_key.type != BTRFS_EXTENT_DATA_KEY){ 225 | + btrfs_release_path(path); 226 | + return -EINVAL; 227 | + } 228 | + 229 | + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 230 | + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 231 | + total_csums=(btrfs_file_extent_num_bytes(leaf, fi) / 1024) / csum_size; 232 | + path->slots[0]++; 233 | + itemnum++; 234 | + path1 = btrfs_alloc_path(); 235 | + struct btrfs_root *csum_root = btrfs_csum_root(info, 0); 236 | + ret = btrfs_lookup_csums(NULL,csum_root, path1, bytenr, 0, 237 | + total_csums); 238 | + btrfs_release_path(path1); 239 | + if (ret) { 240 | + fprintf(stderr, "\n Error: btrfs_lookup_csum"); 241 | + return 1; 242 | + } 243 | + } 244 | + 245 | +error: 246 | + btrfs_release_path(path); 247 | + return ret; 248 | +} 249 | + 250 | +//int cmd_inspect_dump_csum(int argc, char **argv) 251 | +static int cmd_inspect_dump_csum(const struct cmd_struct *cmd, 252 | + int argc, char **argv) 253 | +{ 254 | + struct btrfs_fs_info *info; 255 | + int ret; 256 | + struct btrfs_path path; 257 | + struct stat st; 258 | + char *filename; 259 | + struct open_ctree_flags ocf = { 0 }; 260 | + ocf.flags = OPEN_CTREE_PARTIAL; 261 | + ocf.filename = argv[2]; 262 | + 263 | + if (check_argc_exact(argc, 3)) 264 | + usage_unknown_option(cmd, argv); 265 | + 266 | + filename = argv[1]; 267 | + info = open_ctree_fs_info(&ocf); 268 | + if (!info) { 269 | + fprintf(stderr, "unable to open %s\n", argv[2]); 270 | + exit(1); 271 | + } 272 | + 273 | + ret = stat(filename, &st); 274 | + if (ret < 0) { 275 | + fprintf(stderr, "unable to open %s\n", filename); 276 | + exit(1); 277 | + } 278 | + 279 | + if(st.st_size < 1024){ 280 | + fprintf(stderr, "file less than 1KB.abort%lu", (st.st_size )); 281 | + exit(1); 282 | + } 283 | + 284 | + btrfs_init_path(&path); 285 | + ret = btrfs_lookup_extent(info, &path, st.st_ino, 0); 286 | + ret = close_ctree(info->fs_root); 287 | + btrfs_close_all_devices(); 288 | + 289 | + return ret; 290 | +} 291 | +DEFINE_SIMPLE_COMMAND(inspect_dump_csum, "dump-csum"); 292 | diff --git a/cmds/inspect.c b/cmds/inspect.c 293 | index 1534f204..79db1e0f 100644 294 | --- a/cmds/inspect.c 295 | +++ b/cmds/inspect.c 296 | @@ -695,6 +695,7 @@ static const struct cmd_group inspect_cmd_group = { 297 | &cmd_struct_inspect_dump_tree, 298 | &cmd_struct_inspect_dump_super, 299 | &cmd_struct_inspect_tree_stats, 300 | + &cmd_struct_inspect_dump_csum, 301 | NULL 302 | } 303 | }; 304 | -- 305 | 2.39.1 306 | 307 | -------------------------------------------------------------------------------- /patch/btrfs-progs-v5.6.1/0001-Print-csum-for-a-given-file-on-stdout.patch: -------------------------------------------------------------------------------- 1 | From 0be9ff5b193c96ecfa528fc419ab59b3b67f0c69 Mon Sep 17 00:00:00 2001 2 | From: "lakshmipathi.g" 3 | Date: Thu, 9 Jul 2020 09:02:26 -0500 4 | Subject: [PATCH] Print csum for a given file on stdout. 5 | 6 | Sample usage: 7 | btrfs inspect-internal dump-csum /btrfs/50gbfile /dev/sda4 8 | 9 | Signed-off-by: lakshmipathi.g 10 | --- 11 | Makefile | 3 +- 12 | cmds/commands.h | 1 + 13 | cmds/inspect-dump-csum.c | 241 +++++++++++++++++++++++++++++++++++++++++++++++ 14 | cmds/inspect.c | 1 + 15 | 4 files changed, 245 insertions(+), 1 deletion(-) 16 | create mode 100644 cmds/inspect-dump-csum.c 17 | 18 | diff --git a/Makefile b/Makefile 19 | index b5643ba0..ef171993 100644 20 | --- a/Makefile 21 | +++ b/Makefile 22 | @@ -158,7 +158,8 @@ cmds_objects = cmds/subvolume.o cmds/filesystem.o cmds/device.o cmds/scrub.o \ 23 | cmds/rescue-super-recover.o \ 24 | cmds/property.o cmds/filesystem-usage.o cmds/inspect-dump-tree.o \ 25 | cmds/inspect-dump-super.o cmds/inspect-tree-stats.o cmds/filesystem-du.o \ 26 | - mkfs/common.o check/mode-common.o check/mode-lowmem.o 27 | + mkfs/common.o check/mode-common.o check/mode-lowmem.o \ 28 | + cmds/inspect-dump-csum.o 29 | libbtrfs_objects = send-stream.o send-utils.o kernel-lib/rbtree.o btrfs-list.o \ 30 | kernel-lib/radix-tree.o extent-cache.o extent_io.o \ 31 | crypto/crc32c.o common/messages.o \ 32 | diff --git a/cmds/commands.h b/cmds/commands.h 33 | index 8fa85d6c..350f456a 100644 34 | --- a/cmds/commands.h 35 | +++ b/cmds/commands.h 36 | @@ -140,6 +140,7 @@ DECLARE_COMMAND(check); 37 | DECLARE_COMMAND(chunk_recover); 38 | DECLARE_COMMAND(super_recover); 39 | DECLARE_COMMAND(inspect); 40 | +DECLARE_COMMAND(inspect_dump_csum); 41 | DECLARE_COMMAND(inspect_dump_super); 42 | DECLARE_COMMAND(inspect_dump_tree); 43 | DECLARE_COMMAND(inspect_tree_stats); 44 | diff --git a/cmds/inspect-dump-csum.c b/cmds/inspect-dump-csum.c 45 | new file mode 100644 46 | index 00000000..4d2e2131 47 | --- /dev/null 48 | +++ b/cmds/inspect-dump-csum.c 49 | @@ -0,0 +1,241 @@ 50 | +/* 51 | + * This program is free software; you can redistribute it and/or 52 | + * modify it under the terms of the GNU General Public 53 | + * License v2 as published by the Free Software Foundation. 54 | + * 55 | + * This program is distributed in the hope that it will be useful, 56 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of 57 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 58 | + * General Public License for more details. 59 | + * 60 | + * You should have received a copy of the GNU General Public 61 | + * License along with this program; if not, write to the 62 | + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 63 | + * Boston, MA 021110-1307, USA. 64 | + */ 65 | + 66 | +#include "kerncompat.h" 67 | +#include 68 | +#include 69 | +#include 70 | +#include 71 | +#include 72 | +#include 73 | +#include 74 | +#include 75 | +#include 76 | + 77 | +#include "ctree.h" 78 | +#include "disk-io.h" 79 | +#include "print-tree.h" 80 | +#include "transaction.h" 81 | +#include "kernel-lib/list.h" 82 | +#include "common/utils.h" 83 | +#include "cmds/commands.h" 84 | +#include "crypto/crc32c.h" 85 | +#include "common/help.h" 86 | +#include "volumes.h" 87 | + 88 | + 89 | +const char * const cmd_inspect_dump_csum_usage[] = { 90 | + "btrfs inspect-internal dump-csum ", 91 | + "Get csums for the given file.", 92 | + NULL 93 | +}; 94 | + 95 | +int btrfs_lookup_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, 96 | + struct btrfs_path *path, u64 bytenr, int cow, int total_csums) 97 | +{ 98 | + int ret; 99 | + int i; 100 | + int start_pos = 0; 101 | + struct btrfs_key file_key; 102 | + struct btrfs_key found_key; 103 | + struct btrfs_csum_item *item; 104 | + struct extent_buffer *leaf; 105 | + u64 csum_offset = 0; 106 | + u16 csum_size = 107 | + btrfs_super_csum_size(root->fs_info->super_copy); 108 | + int csums_in_item = 0; 109 | + unsigned int tree_csum = 0; 110 | + int pending_csums = total_csums; 111 | + static int cnt=1; 112 | + 113 | + file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 114 | + file_key.offset = bytenr; 115 | + file_key.type = BTRFS_EXTENT_CSUM_KEY; 116 | + ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); 117 | + if (ret < 0) 118 | + goto fail; 119 | + while(1){ 120 | + leaf = path->nodes[0]; 121 | + if (ret > 0) { 122 | + ret = 1; 123 | + if (path->slots[0] == 0) 124 | + goto fail; 125 | + path->slots[0]--; 126 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 127 | + if (found_key.type != BTRFS_EXTENT_CSUM_KEY){ 128 | + fprintf(stderr, "\nInvalid key found."); 129 | + goto fail; 130 | + } 131 | + 132 | + csum_offset = ((bytenr - found_key.offset) / root->fs_info->sectorsize) * csum_size; 133 | + csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); 134 | + csums_in_item /= csum_size; 135 | + csums_in_item -= ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 136 | + start_pos=csum_offset; 137 | + } 138 | + if (path->slots[0] >= btrfs_header_nritems(leaf)) { 139 | + if (pending_csums > 0){ 140 | + ret = btrfs_next_leaf(root, path); 141 | + if (ret == 0) 142 | + continue; 143 | + } 144 | + } 145 | + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 146 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 147 | + if (!ret){ 148 | + start_pos=0; 149 | + csum_offset = ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 150 | + csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); 151 | + csums_in_item /= csum_size; 152 | + } 153 | + if (csums_in_item > pending_csums){ 154 | + //possibly,some other csums on this item. 155 | + for(i = 0; i < pending_csums; i++, cnt++){ 156 | + read_extent_buffer(leaf, &tree_csum, 157 | + (unsigned long)item + ((i*csum_size)+start_pos) , csum_size); 158 | + fprintf(stdout, "%x ", tree_csum); 159 | + if (cnt % 8 == 0) 160 | + fprintf(stdout, "\n"); 161 | + } 162 | + pending_csums = 0; 163 | + return 0; 164 | + }else{ 165 | + for(i = 0; i < csums_in_item; i++, cnt++){ 166 | + read_extent_buffer(leaf, &tree_csum, 167 | + (unsigned long)item+((i*csum_size)+start_pos), csum_size); 168 | + fprintf(stdout, "%x ", tree_csum); 169 | + if (cnt % 8 == 0) 170 | + fprintf(stdout, "\n"); 171 | + } 172 | + } 173 | + pending_csums -= csums_in_item; 174 | + ret = 0; 175 | + if (pending_csums > 0){ 176 | + path->slots[0]++; 177 | + 178 | + }else 179 | + return 0; 180 | + } 181 | +fail: 182 | + fprintf(stderr, "btrfs_lookup_csums search failed."); 183 | + if (ret > 0) 184 | + ret = -ENOENT; 185 | + return ret; 186 | +} 187 | + 188 | +int btrfs_lookup_extent(struct btrfs_fs_info *info, struct btrfs_path *path, 189 | + u64 ino, int cow){ 190 | + struct btrfs_key key; 191 | + struct btrfs_key found_key; 192 | + struct btrfs_file_extent_item *fi; 193 | + struct extent_buffer *leaf; 194 | + struct btrfs_root *fs_root; 195 | + int ret = -1; 196 | + int slot; 197 | + int total_csums = 0; 198 | + u64 bytenr; 199 | + u64 itemnum = 0; 200 | + struct btrfs_path *path1 = NULL; 201 | + 202 | + fs_root = info->fs_root; 203 | + key.objectid = ino; 204 | + key.type = BTRFS_EXTENT_DATA_KEY; 205 | + key.offset = 0; 206 | + ret = btrfs_search_slot(NULL,fs_root,&key,path,0,0); 207 | + 208 | + if(ret < 0) 209 | + goto error; 210 | + 211 | + if (ret > 1){ 212 | + fprintf(stderr, "Unable to find the entry"); 213 | + return ret; 214 | + } 215 | + u16 csum_size = btrfs_super_csum_size(info->csum_root->fs_info->super_copy); 216 | + while(1){ 217 | + leaf = path->nodes[0]; 218 | + slot = path->slots[0]; 219 | + if (slot >= btrfs_header_nritems(leaf)){ 220 | + ret = btrfs_next_leaf(fs_root, path); 221 | + if (ret == 0) 222 | + continue; 223 | + if (ret < 0) 224 | + goto error; 225 | + } 226 | + btrfs_item_key_to_cpu(leaf, &found_key, slot); 227 | + if (found_key.type != BTRFS_EXTENT_DATA_KEY){ 228 | + btrfs_release_path(path); 229 | + return -EINVAL; 230 | + } 231 | + 232 | + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 233 | + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 234 | + total_csums=(btrfs_file_extent_num_bytes(leaf, fi) / 1024) / csum_size; 235 | + path->slots[0]++; 236 | + itemnum++; 237 | + path1 = btrfs_alloc_path(); 238 | + ret = btrfs_lookup_csums(NULL,info->csum_root, path1, bytenr, 0, 239 | + total_csums); 240 | + btrfs_release_path(path1); 241 | + if (ret) { 242 | + fprintf(stderr, "\n Error: btrfs_lookup_csum"); 243 | + return 1; 244 | + } 245 | + } 246 | + 247 | +error: 248 | + btrfs_release_path(path); 249 | + return ret; 250 | +} 251 | + 252 | +//int cmd_inspect_dump_csum(int argc, char **argv) 253 | +static int cmd_inspect_dump_csum(const struct cmd_struct *cmd, 254 | + int argc, char **argv) 255 | +{ 256 | + struct btrfs_fs_info *info; 257 | + int ret; 258 | + struct btrfs_path path; 259 | + struct stat st; 260 | + char *filename; 261 | + 262 | + if (check_argc_exact(argc, 3)) 263 | + usage_unknown_option(cmd, argv); 264 | + 265 | + filename = argv[1]; 266 | + info = open_ctree_fs_info(argv[2], 0, 0, 0, OPEN_CTREE_PARTIAL); 267 | + if (!info) { 268 | + fprintf(stderr, "unable to open %s\n", argv[2]); 269 | + exit(1); 270 | + } 271 | + 272 | + ret = stat(filename, &st); 273 | + if (ret < 0) { 274 | + fprintf(stderr, "unable to open %s\n", filename); 275 | + exit(1); 276 | + } 277 | + 278 | + if(st.st_size < 1024){ 279 | + fprintf(stderr, "file less than 1KB.abort%lu", (st.st_size )); 280 | + exit(1); 281 | + } 282 | + 283 | + btrfs_init_path(&path); 284 | + ret = btrfs_lookup_extent(info, &path, st.st_ino, 0); 285 | + ret = close_ctree(info->fs_root); 286 | + btrfs_close_all_devices(); 287 | + 288 | + return ret; 289 | +} 290 | +DEFINE_SIMPLE_COMMAND(inspect_dump_csum, "dump-csum"); 291 | diff --git a/cmds/inspect.c b/cmds/inspect.c 292 | index 5b946da0..12387ce0 100644 293 | --- a/cmds/inspect.c 294 | +++ b/cmds/inspect.c 295 | @@ -670,6 +670,7 @@ static const struct cmd_group inspect_cmd_group = { 296 | &cmd_struct_inspect_dump_tree, 297 | &cmd_struct_inspect_dump_super, 298 | &cmd_struct_inspect_tree_stats, 299 | + &cmd_struct_inspect_dump_csum, 300 | NULL 301 | } 302 | }; 303 | -- 304 | 2.11.0 305 | 306 | -------------------------------------------------------------------------------- /patch/btrfs-progs-v5.9/0001-Print-csum-for-a-given-file-on-stdout.patch: -------------------------------------------------------------------------------- 1 | commit 334f30852f7592844461ee979c72bb3ddec2c4d4 2 | Author: Jesse Litton 3 | Date: Wed Dec 23 12:53:53 2020 -0600 4 | 5 | [PATCH] Print csum for a given file on stdout. 6 | 7 | diff --git a/Makefile b/Makefile 8 | index 381b630d..b46a019f 100644 9 | --- a/Makefile 10 | +++ b/Makefile 11 | @@ -158,7 +158,8 @@ cmds_objects = cmds/subvolume.o cmds/filesystem.o cmds/device.o cmds/scrub.o \ 12 | cmds/rescue-super-recover.o \ 13 | cmds/property.o cmds/filesystem-usage.o cmds/inspect-dump-tree.o \ 14 | cmds/inspect-dump-super.o cmds/inspect-tree-stats.o cmds/filesystem-du.o \ 15 | - mkfs/common.o check/mode-common.o check/mode-lowmem.o 16 | + mkfs/common.o check/mode-common.o check/mode-lowmem.o \ 17 | + cmds/inspect-dump-csum.o 18 | libbtrfs_objects = common/send-stream.o common/send-utils.o kernel-lib/rbtree.o btrfs-list.o \ 19 | kernel-lib/radix-tree.o common/extent-cache.o kernel-shared/extent_io.o \ 20 | crypto/crc32c.o common/messages.o \ 21 | diff --git a/cmds/commands.h b/cmds/commands.h 22 | index 8fa85d6c..350f456a 100644 23 | --- a/cmds/commands.h 24 | +++ b/cmds/commands.h 25 | @@ -140,6 +140,7 @@ DECLARE_COMMAND(check); 26 | DECLARE_COMMAND(chunk_recover); 27 | DECLARE_COMMAND(super_recover); 28 | DECLARE_COMMAND(inspect); 29 | +DECLARE_COMMAND(inspect_dump_csum); 30 | DECLARE_COMMAND(inspect_dump_super); 31 | DECLARE_COMMAND(inspect_dump_tree); 32 | DECLARE_COMMAND(inspect_tree_stats); 33 | diff --git a/cmds/inspect-dump-csum.c b/cmds/inspect-dump-csum.c 34 | new file mode 100644 35 | index 00000000..29477198 36 | --- /dev/null 37 | +++ b/cmds/inspect-dump-csum.c 38 | @@ -0,0 +1,241 @@ 39 | +/* 40 | + * This program is free software; you can redistribute it and/or 41 | + * modify it under the terms of the GNU General Public 42 | + * License v2 as published by the Free Software Foundation. 43 | + * 44 | + * This program is distributed in the hope that it will be useful, 45 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of 46 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 47 | + * General Public License for more details. 48 | + * 49 | + * You should have received a copy of the GNU General Public 50 | + * License along with this program; if not, write to the 51 | + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 52 | + * Boston, MA 021110-1307, USA. 53 | + */ 54 | + 55 | +#include "kerncompat.h" 56 | +#include 57 | +#include 58 | +#include 59 | +#include 60 | +#include 61 | +#include 62 | +#include 63 | +#include 64 | +#include 65 | + 66 | +#include "kernel-shared/ctree.h" 67 | +#include "kernel-shared/disk-io.h" 68 | +#include "kernel-shared/print-tree.h" 69 | +#include "kernel-shared/transaction.h" 70 | +#include "kernel-lib/list.h" 71 | +#include "common/utils.h" 72 | +#include "cmds/commands.h" 73 | +#include "crypto/crc32c.h" 74 | +#include "common/help.h" 75 | +#include "kernel-shared/volumes.h" 76 | + 77 | + 78 | +const char * const cmd_inspect_dump_csum_usage[] = { 79 | + "btrfs inspect-internal dump-csum ", 80 | + "Get csums for the given file.", 81 | + NULL 82 | +}; 83 | + 84 | +int btrfs_lookup_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, 85 | + struct btrfs_path *path, u64 bytenr, int cow, int total_csums) 86 | +{ 87 | + int ret; 88 | + int i; 89 | + int start_pos = 0; 90 | + struct btrfs_key file_key; 91 | + struct btrfs_key found_key; 92 | + struct btrfs_csum_item *item; 93 | + struct extent_buffer *leaf; 94 | + u64 csum_offset = 0; 95 | + u16 csum_size = 96 | + btrfs_super_csum_size(root->fs_info->super_copy); 97 | + int csums_in_item = 0; 98 | + unsigned int tree_csum = 0; 99 | + int pending_csums = total_csums; 100 | + static int cnt=1; 101 | + 102 | + file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 103 | + file_key.offset = bytenr; 104 | + file_key.type = BTRFS_EXTENT_CSUM_KEY; 105 | + ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); 106 | + if (ret < 0) 107 | + goto fail; 108 | + while(1){ 109 | + leaf = path->nodes[0]; 110 | + if (ret > 0) { 111 | + ret = 1; 112 | + if (path->slots[0] == 0) 113 | + goto fail; 114 | + path->slots[0]--; 115 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 116 | + if (found_key.type != BTRFS_EXTENT_CSUM_KEY){ 117 | + fprintf(stderr, "\nInvalid key found."); 118 | + goto fail; 119 | + } 120 | + 121 | + csum_offset = ((bytenr - found_key.offset) / root->fs_info->sectorsize) * csum_size; 122 | + csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); 123 | + csums_in_item /= csum_size; 124 | + csums_in_item -= ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 125 | + start_pos=csum_offset; 126 | + } 127 | + if (path->slots[0] >= btrfs_header_nritems(leaf)) { 128 | + if (pending_csums > 0){ 129 | + ret = btrfs_next_leaf(root, path); 130 | + if (ret == 0) 131 | + continue; 132 | + } 133 | + } 134 | + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 135 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 136 | + if (!ret){ 137 | + start_pos=0; 138 | + csum_offset = ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 139 | + csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); 140 | + csums_in_item /= csum_size; 141 | + } 142 | + if (csums_in_item > pending_csums){ 143 | + //possibly,some other csums on this item. 144 | + for(i = 0; i < pending_csums; i++, cnt++){ 145 | + read_extent_buffer(leaf, &tree_csum, 146 | + (unsigned long)item + ((i*csum_size)+start_pos) , csum_size); 147 | + fprintf(stdout, "%x ", tree_csum); 148 | + if (cnt % 8 == 0) 149 | + fprintf(stdout, "\n"); 150 | + } 151 | + pending_csums = 0; 152 | + return 0; 153 | + }else{ 154 | + for(i = 0; i < csums_in_item; i++, cnt++){ 155 | + read_extent_buffer(leaf, &tree_csum, 156 | + (unsigned long)item+((i*csum_size)+start_pos), csum_size); 157 | + fprintf(stdout, "%x ", tree_csum); 158 | + if (cnt % 8 == 0) 159 | + fprintf(stdout, "\n"); 160 | + } 161 | + } 162 | + pending_csums -= csums_in_item; 163 | + ret = 0; 164 | + if (pending_csums > 0){ 165 | + path->slots[0]++; 166 | + 167 | + }else 168 | + return 0; 169 | + } 170 | +fail: 171 | + fprintf(stderr, "btrfs_lookup_csums search failed."); 172 | + if (ret > 0) 173 | + ret = -ENOENT; 174 | + return ret; 175 | +} 176 | + 177 | +int btrfs_lookup_extent(struct btrfs_fs_info *info, struct btrfs_path *path, 178 | + u64 ino, int cow){ 179 | + struct btrfs_key key; 180 | + struct btrfs_key found_key; 181 | + struct btrfs_file_extent_item *fi; 182 | + struct extent_buffer *leaf; 183 | + struct btrfs_root *fs_root; 184 | + int ret = -1; 185 | + int slot; 186 | + int total_csums = 0; 187 | + u64 bytenr; 188 | + u64 itemnum = 0; 189 | + struct btrfs_path *path1 = NULL; 190 | + 191 | + fs_root = info->fs_root; 192 | + key.objectid = ino; 193 | + key.type = BTRFS_EXTENT_DATA_KEY; 194 | + key.offset = 0; 195 | + ret = btrfs_search_slot(NULL,fs_root,&key,path,0,0); 196 | + 197 | + if(ret < 0) 198 | + goto error; 199 | + 200 | + if (ret > 1){ 201 | + fprintf(stderr, "Unable to find the entry"); 202 | + return ret; 203 | + } 204 | + u16 csum_size = btrfs_super_csum_size(info->csum_root->fs_info->super_copy); 205 | + while(1){ 206 | + leaf = path->nodes[0]; 207 | + slot = path->slots[0]; 208 | + if (slot >= btrfs_header_nritems(leaf)){ 209 | + ret = btrfs_next_leaf(fs_root, path); 210 | + if (ret == 0) 211 | + continue; 212 | + if (ret < 0) 213 | + goto error; 214 | + } 215 | + btrfs_item_key_to_cpu(leaf, &found_key, slot); 216 | + if (found_key.type != BTRFS_EXTENT_DATA_KEY){ 217 | + btrfs_release_path(path); 218 | + return -EINVAL; 219 | + } 220 | + 221 | + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 222 | + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 223 | + total_csums=(btrfs_file_extent_num_bytes(leaf, fi) / 1024) / csum_size; 224 | + path->slots[0]++; 225 | + itemnum++; 226 | + path1 = btrfs_alloc_path(); 227 | + ret = btrfs_lookup_csums(NULL,info->csum_root, path1, bytenr, 0, 228 | + total_csums); 229 | + btrfs_release_path(path1); 230 | + if (ret) { 231 | + fprintf(stderr, "\n Error: btrfs_lookup_csum"); 232 | + return 1; 233 | + } 234 | + } 235 | + 236 | +error: 237 | + btrfs_release_path(path); 238 | + return ret; 239 | +} 240 | + 241 | +//int cmd_inspect_dump_csum(int argc, char **argv) 242 | +static int cmd_inspect_dump_csum(const struct cmd_struct *cmd, 243 | + int argc, char **argv) 244 | +{ 245 | + struct btrfs_fs_info *info; 246 | + int ret; 247 | + struct btrfs_path path; 248 | + struct stat st; 249 | + char *filename; 250 | + 251 | + if (check_argc_exact(argc, 3)) 252 | + usage_unknown_option(cmd, argv); 253 | + 254 | + filename = argv[1]; 255 | + info = open_ctree_fs_info(argv[2], 0, 0, 0, OPEN_CTREE_PARTIAL); 256 | + if (!info) { 257 | + fprintf(stderr, "unable to open %s\n", argv[2]); 258 | + exit(1); 259 | + } 260 | + 261 | + ret = stat(filename, &st); 262 | + if (ret < 0) { 263 | + fprintf(stderr, "unable to open %s\n", filename); 264 | + exit(1); 265 | + } 266 | + 267 | + if(st.st_size < 1024){ 268 | + fprintf(stderr, "file less than 1KB.abort%lu", (st.st_size )); 269 | + exit(1); 270 | + } 271 | + 272 | + btrfs_init_path(&path); 273 | + ret = btrfs_lookup_extent(info, &path, st.st_ino, 0); 274 | + ret = close_ctree(info->fs_root); 275 | + btrfs_close_all_devices(); 276 | + 277 | + return ret; 278 | +} 279 | +DEFINE_SIMPLE_COMMAND(inspect_dump_csum, "dump-csum"); 280 | diff --git a/cmds/inspect.c b/cmds/inspect.c 281 | index 2530b904..f8f17a94 100644 282 | --- a/cmds/inspect.c 283 | +++ b/cmds/inspect.c 284 | @@ -667,6 +667,7 @@ static const struct cmd_group inspect_cmd_group = { 285 | &cmd_struct_inspect_dump_tree, 286 | &cmd_struct_inspect_dump_super, 287 | &cmd_struct_inspect_tree_stats, 288 | + &cmd_struct_inspect_dump_csum, 289 | NULL 290 | } 291 | }; 292 | -------------------------------------------------------------------------------- /patch/btrfs-progs-v6.1/0001-Print-csum-for-a-given-file-on-stdout.patch: -------------------------------------------------------------------------------- 1 | From ec690704ff11868d538d957b004e87ebe9f08d11 Mon Sep 17 00:00:00 2001 2 | From: "lakshmipathi.g" 3 | Date: Tue, 18 May 2021 14:52:41 +0530 4 | Subject: [PATCH] Print csum for a given file on stdout. 5 | 6 | Signed-off-by: lakshmipathi.g 7 | --- 8 | Makefile | 2 +- 9 | cmds/commands.h | 1 + 10 | cmds/inspect-dump-csum.c | 246 +++++++++++++++++++++++++++++++++++++++ 11 | cmds/inspect.c | 1 + 12 | 4 files changed, 249 insertions(+), 1 deletion(-) 13 | create mode 100644 cmds/inspect-dump-csum.c 14 | 15 | diff --git a/Makefile b/Makefile 16 | index 23b1fae1..4d83e7c3 100644 17 | --- a/Makefile 18 | +++ b/Makefile 19 | @@ -201,7 +201,7 @@ objects = \ 20 | libbtrfsutil/stubs.o \ 21 | libbtrfsutil/subvolume.o 22 | 23 | -cmds_objects = cmds/subvolume.o cmds/subvolume-list.o \ 24 | +cmds_objects = cmds/subvolume.o cmds/subvolume-list.o cmds/inspect-dump-csum.o \ 25 | cmds/filesystem.o cmds/device.o cmds/scrub.o \ 26 | cmds/inspect.o cmds/balance.o cmds/send.o cmds/receive.o \ 27 | cmds/quota.o cmds/qgroup.o cmds/replace.o check/main.o \ 28 | diff --git a/cmds/commands.h b/cmds/commands.h 29 | index 5ab7c881..df62c6b1 100644 30 | --- a/cmds/commands.h 31 | +++ b/cmds/commands.h 32 | @@ -139,6 +139,7 @@ DECLARE_COMMAND(device); 33 | DECLARE_COMMAND(scrub); 34 | DECLARE_COMMAND(check); 35 | DECLARE_COMMAND(inspect); 36 | +DECLARE_COMMAND(inspect_dump_csum); 37 | DECLARE_COMMAND(inspect_dump_super); 38 | DECLARE_COMMAND(inspect_dump_tree); 39 | DECLARE_COMMAND(inspect_tree_stats); 40 | diff --git a/cmds/inspect-dump-csum.c b/cmds/inspect-dump-csum.c 41 | new file mode 100644 42 | index 00000000..f46b23c4 43 | --- /dev/null 44 | +++ b/cmds/inspect-dump-csum.c 45 | @@ -0,0 +1,246 @@ 46 | +/* 47 | + * This program is free software; you can redistribute it and/or 48 | + * modify it under the terms of the GNU General Public 49 | + * License v2 as published by the Free Software Foundation. 50 | + * 51 | + * This program is distributed in the hope that it will be useful, 52 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of 53 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 54 | + * General Public License for more details. 55 | + * 56 | + * You should have received a copy of the GNU General Public 57 | + * License along with this program; if not, write to the 58 | + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 59 | + * Boston, MA 021110-1307, USA. 60 | + */ 61 | + 62 | +#include "kerncompat.h" 63 | +#include 64 | +#include 65 | +#include 66 | +#include 67 | +#include 68 | +#include 69 | +#include 70 | +#include 71 | +#include 72 | + 73 | +#include "kernel-shared/ctree.h" 74 | +#include "kernel-shared/disk-io.h" 75 | +#include "kernel-shared/print-tree.h" 76 | +#include "kernel-shared/transaction.h" 77 | +#include "kernel-lib/list.h" 78 | +#include "common/utils.h" 79 | +#include "cmds/commands.h" 80 | +#include "crypto/crc32c.h" 81 | +#include "common/help.h" 82 | +#include "kernel-shared/volumes.h" 83 | + 84 | + 85 | +const char * const cmd_inspect_dump_csum_usage[] = { 86 | + "btrfs inspect-internal dump-csum ", 87 | + "Get csums for the given file.", 88 | + NULL 89 | +}; 90 | + 91 | +int btrfs_lookup_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, 92 | + struct btrfs_path *path, u64 bytenr, int cow, int total_csums) 93 | +{ 94 | + int ret; 95 | + int i; 96 | + int start_pos = 0; 97 | + struct btrfs_key file_key; 98 | + struct btrfs_key found_key; 99 | + struct btrfs_csum_item *item; 100 | + struct extent_buffer *leaf; 101 | + u64 csum_offset = 0; 102 | + u16 csum_size = 103 | + btrfs_super_csum_size(root->fs_info->super_copy); 104 | + int csums_in_item = 0; 105 | + unsigned int tree_csum = 0; 106 | + int pending_csums = total_csums; 107 | + static int cnt=1; 108 | + 109 | + file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 110 | + file_key.offset = bytenr; 111 | + file_key.type = BTRFS_EXTENT_CSUM_KEY; 112 | + ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); 113 | + if (ret < 0) 114 | + goto fail; 115 | + while(1){ 116 | + leaf = path->nodes[0]; 117 | + if (ret > 0) { 118 | + ret = 1; 119 | + if (path->slots[0] == 0) 120 | + goto fail; 121 | + path->slots[0]--; 122 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 123 | + if (found_key.type != BTRFS_EXTENT_CSUM_KEY){ 124 | + fprintf(stderr, "\nInvalid key found."); 125 | + goto fail; 126 | + } 127 | + 128 | + csum_offset = ((bytenr - found_key.offset) / root->fs_info->sectorsize) * csum_size; 129 | + csums_in_item = btrfs_item_size(leaf, path->slots[0]); 130 | + csums_in_item /= csum_size; 131 | + csums_in_item -= ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 132 | + start_pos=csum_offset; 133 | + } 134 | + if (path->slots[0] >= btrfs_header_nritems(leaf)) { 135 | + if (pending_csums > 0){ 136 | + ret = btrfs_next_leaf(root, path); 137 | + if (ret == 0) 138 | + continue; 139 | + } 140 | + } 141 | + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 142 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 143 | + if (!ret){ 144 | + start_pos=0; 145 | + csum_offset = ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 146 | + csums_in_item = btrfs_item_size(leaf, path->slots[0]); 147 | + csums_in_item /= csum_size; 148 | + } 149 | + if (csums_in_item > pending_csums){ 150 | + //possibly,some other csums on this item. 151 | + for(i = 0; i < pending_csums; i++, cnt++){ 152 | + read_extent_buffer(leaf, &tree_csum, 153 | + (unsigned long)item + ((i*csum_size)+start_pos) , csum_size); 154 | + fprintf(stdout, "%x ", tree_csum); 155 | + if (cnt % 8 == 0) 156 | + fprintf(stdout, "\n"); 157 | + } 158 | + pending_csums = 0; 159 | + return 0; 160 | + }else{ 161 | + for(i = 0; i < csums_in_item; i++, cnt++){ 162 | + read_extent_buffer(leaf, &tree_csum, 163 | + (unsigned long)item+((i*csum_size)+start_pos), csum_size); 164 | + fprintf(stdout, "%x ", tree_csum); 165 | + if (cnt % 8 == 0) 166 | + fprintf(stdout, "\n"); 167 | + } 168 | + } 169 | + pending_csums -= csums_in_item; 170 | + ret = 0; 171 | + if (pending_csums > 0){ 172 | + path->slots[0]++; 173 | + 174 | + }else 175 | + return 0; 176 | + } 177 | +fail: 178 | + fprintf(stderr, "btrfs_lookup_csums search failed."); 179 | + if (ret > 0) 180 | + ret = -ENOENT; 181 | + return ret; 182 | +} 183 | + 184 | +int btrfs_lookup_extent(struct btrfs_fs_info *info, struct btrfs_path *path, 185 | + u64 ino, int cow){ 186 | + struct btrfs_key key; 187 | + struct btrfs_key found_key; 188 | + struct btrfs_file_extent_item *fi; 189 | + struct extent_buffer *leaf; 190 | + struct btrfs_root *fs_root; 191 | + int ret = -1; 192 | + int slot; 193 | + int total_csums = 0; 194 | + u64 bytenr; 195 | + u64 itemnum = 0; 196 | + struct btrfs_path *path1 = NULL; 197 | + 198 | + fs_root = info->fs_root; 199 | + key.objectid = ino; 200 | + key.type = BTRFS_EXTENT_DATA_KEY; 201 | + key.offset = 0; 202 | + ret = btrfs_search_slot(NULL,fs_root,&key,path,0,0); 203 | + 204 | + if(ret < 0) 205 | + goto error; 206 | + 207 | + if (ret > 1){ 208 | + fprintf(stderr, "Unable to find the entry"); 209 | + return ret; 210 | + } 211 | + struct btrfs_root *csum_root = btrfs_csum_root(info, 0); 212 | + u16 csum_size = btrfs_super_csum_size(csum_root->fs_info->super_copy); 213 | + while(1){ 214 | + leaf = path->nodes[0]; 215 | + slot = path->slots[0]; 216 | + if (slot >= btrfs_header_nritems(leaf)){ 217 | + ret = btrfs_next_leaf(fs_root, path); 218 | + if (ret == 0) 219 | + continue; 220 | + if (ret < 0) 221 | + goto error; 222 | + } 223 | + btrfs_item_key_to_cpu(leaf, &found_key, slot); 224 | + if (found_key.type != BTRFS_EXTENT_DATA_KEY){ 225 | + btrfs_release_path(path); 226 | + return -EINVAL; 227 | + } 228 | + 229 | + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 230 | + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 231 | + total_csums=(btrfs_file_extent_num_bytes(leaf, fi) / 1024) / csum_size; 232 | + path->slots[0]++; 233 | + itemnum++; 234 | + path1 = btrfs_alloc_path(); 235 | + struct btrfs_root *csum_root = btrfs_csum_root(info, 0); 236 | + ret = btrfs_lookup_csums(NULL,csum_root, path1, bytenr, 0, 237 | + total_csums); 238 | + btrfs_release_path(path1); 239 | + if (ret) { 240 | + fprintf(stderr, "\n Error: btrfs_lookup_csum"); 241 | + return 1; 242 | + } 243 | + } 244 | + 245 | +error: 246 | + btrfs_release_path(path); 247 | + return ret; 248 | +} 249 | + 250 | +//int cmd_inspect_dump_csum(int argc, char **argv) 251 | +static int cmd_inspect_dump_csum(const struct cmd_struct *cmd, 252 | + int argc, char **argv) 253 | +{ 254 | + struct btrfs_fs_info *info; 255 | + int ret; 256 | + struct btrfs_path path; 257 | + struct stat st; 258 | + char *filename; 259 | + struct open_ctree_flags ocf = { 0 }; 260 | + ocf.flags = OPEN_CTREE_PARTIAL; 261 | + ocf.filename = argv[2]; 262 | + 263 | + if (check_argc_exact(argc, 3)) 264 | + usage_unknown_option(cmd, argv); 265 | + 266 | + filename = argv[1]; 267 | + info = open_ctree_fs_info(&ocf); 268 | + if (!info) { 269 | + fprintf(stderr, "unable to open %s\n", argv[2]); 270 | + exit(1); 271 | + } 272 | + 273 | + ret = stat(filename, &st); 274 | + if (ret < 0) { 275 | + fprintf(stderr, "unable to open %s\n", filename); 276 | + exit(1); 277 | + } 278 | + 279 | + if(st.st_size < 1024){ 280 | + fprintf(stderr, "file less than 1KB.abort%lu", (st.st_size )); 281 | + exit(1); 282 | + } 283 | + 284 | + btrfs_init_path(&path); 285 | + ret = btrfs_lookup_extent(info, &path, st.st_ino, 0); 286 | + ret = close_ctree(info->fs_root); 287 | + btrfs_close_all_devices(); 288 | + 289 | + return ret; 290 | +} 291 | +DEFINE_SIMPLE_COMMAND(inspect_dump_csum, "dump-csum"); 292 | diff --git a/cmds/inspect.c b/cmds/inspect.c 293 | index 5e327e74..e2b972ab 100644 294 | --- a/cmds/inspect.c 295 | +++ b/cmds/inspect.c 296 | @@ -1535,6 +1535,7 @@ static const struct cmd_group inspect_cmd_group = { 297 | &cmd_struct_inspect_dump_tree, 298 | &cmd_struct_inspect_dump_super, 299 | &cmd_struct_inspect_tree_stats, 300 | + &cmd_struct_inspect_dump_csum, 301 | #if EXPERIMENTAL 302 | &cmd_struct_inspect_list_chunks, 303 | #endif 304 | -- 305 | 2.39.1 306 | 307 | -------------------------------------------------------------------------------- /patch/btrfs-progs-v6.11/0001-Print-csum-for-a-given-file-on-stdout.patch: -------------------------------------------------------------------------------- 1 | From 3e4eb749f28d87aaa2bdf74f89bb91bf4651d945 Mon Sep 17 00:00:00 2001 2 | From: Lakshmipathi 3 | Date: Sun, 3 Nov 2024 22:22:08 +0530 4 | Subject: [PATCH] Print csum for a given file on stdout 5 | 6 | Signed-off-by: Lakshmipathi 7 | --- 8 | Makefile | 2 +- 9 | cmds/commands.h | 1 + 10 | cmds/inspect-dump-csum.c | 244 +++++++++++++++++++++++++++++++++++++++ 11 | cmds/inspect.c | 1 + 12 | 4 files changed, 247 insertions(+), 1 deletion(-) 13 | create mode 100644 cmds/inspect-dump-csum.c 14 | 15 | diff --git a/Makefile b/Makefile 16 | index 2f8d2fa0..a460cacb 100644 17 | --- a/Makefile 18 | +++ b/Makefile 19 | @@ -251,7 +251,7 @@ objects = \ 20 | libbtrfsutil/stubs.o \ 21 | libbtrfsutil/subvolume.o 22 | 23 | -cmds_objects = cmds/subvolume.o cmds/subvolume-list.o \ 24 | +cmds_objects = cmds/subvolume.o cmds/subvolume-list.o cmds/inspect-dump-csum.o \ 25 | cmds/filesystem.o cmds/device.o cmds/scrub.o \ 26 | cmds/inspect.o cmds/balance.o cmds/send.o cmds/receive.o \ 27 | cmds/quota.o cmds/qgroup.o cmds/replace.o check/main.o \ 28 | diff --git a/cmds/commands.h b/cmds/commands.h 29 | index c19e664a..098be282 100644 30 | --- a/cmds/commands.h 31 | +++ b/cmds/commands.h 32 | @@ -140,6 +140,7 @@ DECLARE_COMMAND(device); 33 | DECLARE_COMMAND(scrub); 34 | DECLARE_COMMAND(check); 35 | DECLARE_COMMAND(inspect); 36 | +DECLARE_COMMAND(inspect_dump_csum); 37 | DECLARE_COMMAND(inspect_dump_super); 38 | DECLARE_COMMAND(inspect_dump_tree); 39 | DECLARE_COMMAND(inspect_tree_stats); 40 | diff --git a/cmds/inspect-dump-csum.c b/cmds/inspect-dump-csum.c 41 | new file mode 100644 42 | index 00000000..e61c09c3 43 | --- /dev/null 44 | +++ b/cmds/inspect-dump-csum.c 45 | @@ -0,0 +1,244 @@ 46 | +/* 47 | + * This program is free software; you can redistribute it and/or 48 | + * modify it under the terms of the GNU General Public 49 | + * License v2 as published by the Free Software Foundation. 50 | + * 51 | + * This program is distributed in the hope that it will be useful, 52 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of 53 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 54 | + * General Public License for more details. 55 | + * 56 | + * You should have received a copy of the GNU General Public 57 | + * License along with this program; if not, write to the 58 | + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 59 | + * Boston, MA 021110-1307, USA. 60 | + */ 61 | + 62 | +#include "kerncompat.h" 63 | +#include 64 | +#include 65 | +#include 66 | +#include 67 | +#include 68 | +#include 69 | +#include 70 | +#include 71 | +#include 72 | + 73 | +#include "kernel-shared/ctree.h" 74 | +#include "kernel-shared/disk-io.h" 75 | +#include "kernel-shared/print-tree.h" 76 | +#include "kernel-shared/transaction.h" 77 | +#include "kernel-lib/list.h" 78 | +#include "common/utils.h" 79 | +#include "cmds/commands.h" 80 | +#include "crypto/crc32c.h" 81 | +#include "common/help.h" 82 | +#include "kernel-shared/volumes.h" 83 | + 84 | + 85 | +static const char * const cmd_inspect_dump_csum_usage[] = { 86 | + "btrfs inspect-internal dump-csum ", 87 | + "Get csums for the given file.", 88 | + NULL 89 | +}; 90 | + 91 | +static int btrfs_lookup_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, 92 | + struct btrfs_path *path, u64 bytenr, int cow, int total_csums) 93 | +{ 94 | + int ret; 95 | + int i; 96 | + int start_pos = 0; 97 | + struct btrfs_key file_key; 98 | + struct btrfs_key found_key; 99 | + struct btrfs_csum_item *item; 100 | + struct extent_buffer *leaf; 101 | + u64 csum_offset = 0; 102 | + u16 csum_size = 103 | + btrfs_super_csum_size(root->fs_info->super_copy); 104 | + int csums_in_item = 0; 105 | + unsigned int tree_csum = 0; 106 | + int pending_csums = total_csums; 107 | + static int cnt=1; 108 | + 109 | + file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 110 | + file_key.offset = bytenr; 111 | + file_key.type = BTRFS_EXTENT_CSUM_KEY; 112 | + ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); 113 | + if (ret < 0) 114 | + goto fail; 115 | + while(1){ 116 | + leaf = path->nodes[0]; 117 | + if (ret > 0) { 118 | + ret = 1; 119 | + if (path->slots[0] == 0) 120 | + goto fail; 121 | + path->slots[0]--; 122 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 123 | + if (found_key.type != BTRFS_EXTENT_CSUM_KEY){ 124 | + fprintf(stderr, "\nInvalid key found."); 125 | + goto fail; 126 | + } 127 | + 128 | + csum_offset = ((bytenr - found_key.offset) / root->fs_info->sectorsize) * csum_size; 129 | + csums_in_item = btrfs_item_size(leaf, path->slots[0]); 130 | + csums_in_item /= csum_size; 131 | + csums_in_item -= ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 132 | + start_pos=csum_offset; 133 | + } 134 | + if (path->slots[0] >= btrfs_header_nritems(leaf)) { 135 | + if (pending_csums > 0){ 136 | + ret = btrfs_next_leaf(root, path); 137 | + if (ret == 0) 138 | + continue; 139 | + } 140 | + } 141 | + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 142 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 143 | + if (!ret){ 144 | + start_pos=0; 145 | + csum_offset = ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 146 | + csums_in_item = btrfs_item_size(leaf, path->slots[0]); 147 | + csums_in_item /= csum_size; 148 | + } 149 | + if (csums_in_item > pending_csums){ 150 | + //possibly,some other csums on this item. 151 | + for(i = 0; i < pending_csums; i++, cnt++){ 152 | + read_extent_buffer(leaf, &tree_csum, 153 | + (unsigned long)item + ((i*csum_size)+start_pos) , csum_size); 154 | + fprintf(stdout, "%x ", tree_csum); 155 | + if (cnt % 8 == 0) 156 | + fprintf(stdout, "\n"); 157 | + } 158 | + pending_csums = 0; 159 | + return 0; 160 | + }else{ 161 | + for(i = 0; i < csums_in_item; i++, cnt++){ 162 | + read_extent_buffer(leaf, &tree_csum, 163 | + (unsigned long)item+((i*csum_size)+start_pos), csum_size); 164 | + fprintf(stdout, "%x ", tree_csum); 165 | + if (cnt % 8 == 0) 166 | + fprintf(stdout, "\n"); 167 | + } 168 | + } 169 | + pending_csums -= csums_in_item; 170 | + ret = 0; 171 | + if (pending_csums > 0){ 172 | + path->slots[0]++; 173 | + 174 | + }else 175 | + return 0; 176 | + } 177 | +fail: 178 | + fprintf(stderr, "btrfs_lookup_csums search failed."); 179 | + if (ret > 0) 180 | + ret = -ENOENT; 181 | + return ret; 182 | +} 183 | + 184 | +static int btrfs_lookup_extent(struct btrfs_fs_info *info, struct btrfs_path *path, 185 | + u64 ino, int cow){ 186 | + struct btrfs_key key; 187 | + struct btrfs_key found_key; 188 | + struct btrfs_file_extent_item *fi; 189 | + struct extent_buffer *leaf; 190 | + struct btrfs_root *fs_root; 191 | + int ret = -1; 192 | + int slot; 193 | + int total_csums = 0; 194 | + u64 bytenr; 195 | + u64 itemnum = 0; 196 | + struct btrfs_path *path1 = NULL; 197 | + 198 | + fs_root = info->fs_root; 199 | + key.objectid = ino; 200 | + key.type = BTRFS_EXTENT_DATA_KEY; 201 | + key.offset = 0; 202 | + ret = btrfs_search_slot(NULL,fs_root,&key,path,0,0); 203 | + 204 | + if(ret < 0) 205 | + goto error; 206 | + 207 | + if (ret > 1){ 208 | + fprintf(stderr, "Unable to find the entry"); 209 | + return ret; 210 | + } 211 | + struct btrfs_root *csum_root = btrfs_csum_root(info, 0); 212 | + u16 csum_size = btrfs_super_csum_size(csum_root->fs_info->super_copy); 213 | + while(1){ 214 | + leaf = path->nodes[0]; 215 | + slot = path->slots[0]; 216 | + if (slot >= btrfs_header_nritems(leaf)){ 217 | + ret = btrfs_next_leaf(fs_root, path); 218 | + if (ret == 0) 219 | + continue; 220 | + if (ret < 0) 221 | + goto error; 222 | + } 223 | + btrfs_item_key_to_cpu(leaf, &found_key, slot); 224 | + if (found_key.type != BTRFS_EXTENT_DATA_KEY){ 225 | + btrfs_release_path(path); 226 | + return -EINVAL; 227 | + } 228 | + 229 | + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 230 | + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 231 | + total_csums=(btrfs_file_extent_num_bytes(leaf, fi) / 1024) / csum_size; 232 | + path->slots[0]++; 233 | + itemnum++; 234 | + path1 = btrfs_alloc_path(); 235 | + csum_root = btrfs_csum_root(info, 0); 236 | + ret = btrfs_lookup_csums(NULL,csum_root, path1, bytenr, 0, 237 | + total_csums); 238 | + btrfs_release_path(path1); 239 | + if (ret) { 240 | + fprintf(stderr, "\n Error: btrfs_lookup_csum"); 241 | + return 1; 242 | + } 243 | + } 244 | + 245 | +error: 246 | + btrfs_release_path(path); 247 | + return ret; 248 | +} 249 | + 250 | +static int cmd_inspect_dump_csum(const struct cmd_struct *cmd, 251 | + int argc, char **argv) 252 | +{ 253 | + struct btrfs_fs_info *info; 254 | + int ret; 255 | + struct btrfs_path path = { 0 }; 256 | + struct stat st; 257 | + char *filename; 258 | + struct open_ctree_args oca = { 0 }; 259 | + oca.flags = OPEN_CTREE_PARTIAL; 260 | + oca.filename = argv[2]; 261 | + 262 | + if (check_argc_exact(argc, 3)) 263 | + usage_unknown_option(cmd, argv); 264 | + 265 | + filename = argv[1]; 266 | + info = open_ctree_fs_info(&oca); 267 | + if (!info) { 268 | + fprintf(stderr, "unable to open %s\n", argv[2]); 269 | + exit(1); 270 | + } 271 | + 272 | + ret = stat(filename, &st); 273 | + if (ret < 0) { 274 | + fprintf(stderr, "unable to open %s\n", filename); 275 | + exit(1); 276 | + } 277 | + 278 | + if(st.st_size < 1024){ 279 | + fprintf(stderr, "file less than 1KB.abort%lu", (st.st_size )); 280 | + exit(1); 281 | + } 282 | + 283 | + ret = btrfs_lookup_extent(info, &path, st.st_ino, 0); 284 | + ret = close_ctree(info->fs_root); 285 | + btrfs_close_all_devices(); 286 | + 287 | + return ret; 288 | +} 289 | +DEFINE_SIMPLE_COMMAND(inspect_dump_csum, "dump-csum"); 290 | diff --git a/cmds/inspect.c b/cmds/inspect.c 291 | index d689e085..97688260 100644 292 | --- a/cmds/inspect.c 293 | +++ b/cmds/inspect.c 294 | @@ -1562,6 +1562,7 @@ static const struct cmd_group inspect_cmd_group = { 295 | &cmd_struct_inspect_dump_tree, 296 | &cmd_struct_inspect_dump_super, 297 | &cmd_struct_inspect_tree_stats, 298 | + &cmd_struct_inspect_dump_csum, 299 | &cmd_struct_inspect_list_chunks, 300 | NULL 301 | } 302 | -- 303 | 2.43.0 304 | 305 | -------------------------------------------------------------------------------- /patch/btrfs-progs-v6.3.3/0001-Print-csum-for-a-given-file-on-stdout.patch: -------------------------------------------------------------------------------- 1 | From c2404830ec792b2a0e07881912e0f3844eaca184 Mon Sep 17 00:00:00 2001 2 | From: Anna Lee <150648636+a-n-n-a-l-e-e@users.noreply.github.com> 3 | Date: Wed, 15 Nov 2023 07:02:43 +0000 4 | Subject: [PATCH] Print csum for a given file on stdout 5 | 6 | --- 7 | Makefile | 2 +- 8 | cmds/commands.h | 1 + 9 | cmds/inspect-dump-csum.c | 244 +++++++++++++++++++++++++++++++++++++++ 10 | cmds/inspect.c | 1 + 11 | 4 files changed, 247 insertions(+), 1 deletion(-) 12 | create mode 100644 cmds/inspect-dump-csum.c 13 | 14 | diff --git a/Makefile b/Makefile 15 | index 30d48ef2..5facde30 100644 16 | --- a/Makefile 17 | +++ b/Makefile 18 | @@ -224,7 +224,7 @@ objects = \ 19 | libbtrfsutil/stubs.o \ 20 | libbtrfsutil/subvolume.o 21 | 22 | -cmds_objects = cmds/subvolume.o cmds/subvolume-list.o \ 23 | +cmds_objects = cmds/subvolume.o cmds/subvolume-list.o cmds/inspect-dump-csum.o \ 24 | cmds/filesystem.o cmds/device.o cmds/scrub.o \ 25 | cmds/inspect.o cmds/balance.o cmds/send.o cmds/receive.o \ 26 | cmds/quota.o cmds/qgroup.o cmds/replace.o check/main.o \ 27 | diff --git a/cmds/commands.h b/cmds/commands.h 28 | index 5ab7c881..df62c6b1 100644 29 | --- a/cmds/commands.h 30 | +++ b/cmds/commands.h 31 | @@ -139,6 +139,7 @@ DECLARE_COMMAND(device); 32 | DECLARE_COMMAND(scrub); 33 | DECLARE_COMMAND(check); 34 | DECLARE_COMMAND(inspect); 35 | +DECLARE_COMMAND(inspect_dump_csum); 36 | DECLARE_COMMAND(inspect_dump_super); 37 | DECLARE_COMMAND(inspect_dump_tree); 38 | DECLARE_COMMAND(inspect_tree_stats); 39 | diff --git a/cmds/inspect-dump-csum.c b/cmds/inspect-dump-csum.c 40 | new file mode 100644 41 | index 00000000..e61c09c3 42 | --- /dev/null 43 | +++ b/cmds/inspect-dump-csum.c 44 | @@ -0,0 +1,244 @@ 45 | +/* 46 | + * This program is free software; you can redistribute it and/or 47 | + * modify it under the terms of the GNU General Public 48 | + * License v2 as published by the Free Software Foundation. 49 | + * 50 | + * This program is distributed in the hope that it will be useful, 51 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of 52 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 53 | + * General Public License for more details. 54 | + * 55 | + * You should have received a copy of the GNU General Public 56 | + * License along with this program; if not, write to the 57 | + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 58 | + * Boston, MA 021110-1307, USA. 59 | + */ 60 | + 61 | +#include "kerncompat.h" 62 | +#include 63 | +#include 64 | +#include 65 | +#include 66 | +#include 67 | +#include 68 | +#include 69 | +#include 70 | +#include 71 | + 72 | +#include "kernel-shared/ctree.h" 73 | +#include "kernel-shared/disk-io.h" 74 | +#include "kernel-shared/print-tree.h" 75 | +#include "kernel-shared/transaction.h" 76 | +#include "kernel-lib/list.h" 77 | +#include "common/utils.h" 78 | +#include "cmds/commands.h" 79 | +#include "crypto/crc32c.h" 80 | +#include "common/help.h" 81 | +#include "kernel-shared/volumes.h" 82 | + 83 | + 84 | +static const char * const cmd_inspect_dump_csum_usage[] = { 85 | + "btrfs inspect-internal dump-csum ", 86 | + "Get csums for the given file.", 87 | + NULL 88 | +}; 89 | + 90 | +static int btrfs_lookup_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, 91 | + struct btrfs_path *path, u64 bytenr, int cow, int total_csums) 92 | +{ 93 | + int ret; 94 | + int i; 95 | + int start_pos = 0; 96 | + struct btrfs_key file_key; 97 | + struct btrfs_key found_key; 98 | + struct btrfs_csum_item *item; 99 | + struct extent_buffer *leaf; 100 | + u64 csum_offset = 0; 101 | + u16 csum_size = 102 | + btrfs_super_csum_size(root->fs_info->super_copy); 103 | + int csums_in_item = 0; 104 | + unsigned int tree_csum = 0; 105 | + int pending_csums = total_csums; 106 | + static int cnt=1; 107 | + 108 | + file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 109 | + file_key.offset = bytenr; 110 | + file_key.type = BTRFS_EXTENT_CSUM_KEY; 111 | + ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); 112 | + if (ret < 0) 113 | + goto fail; 114 | + while(1){ 115 | + leaf = path->nodes[0]; 116 | + if (ret > 0) { 117 | + ret = 1; 118 | + if (path->slots[0] == 0) 119 | + goto fail; 120 | + path->slots[0]--; 121 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 122 | + if (found_key.type != BTRFS_EXTENT_CSUM_KEY){ 123 | + fprintf(stderr, "\nInvalid key found."); 124 | + goto fail; 125 | + } 126 | + 127 | + csum_offset = ((bytenr - found_key.offset) / root->fs_info->sectorsize) * csum_size; 128 | + csums_in_item = btrfs_item_size(leaf, path->slots[0]); 129 | + csums_in_item /= csum_size; 130 | + csums_in_item -= ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 131 | + start_pos=csum_offset; 132 | + } 133 | + if (path->slots[0] >= btrfs_header_nritems(leaf)) { 134 | + if (pending_csums > 0){ 135 | + ret = btrfs_next_leaf(root, path); 136 | + if (ret == 0) 137 | + continue; 138 | + } 139 | + } 140 | + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 141 | + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 142 | + if (!ret){ 143 | + start_pos=0; 144 | + csum_offset = ( bytenr - found_key.offset ) / root->fs_info->sectorsize; 145 | + csums_in_item = btrfs_item_size(leaf, path->slots[0]); 146 | + csums_in_item /= csum_size; 147 | + } 148 | + if (csums_in_item > pending_csums){ 149 | + //possibly,some other csums on this item. 150 | + for(i = 0; i < pending_csums; i++, cnt++){ 151 | + read_extent_buffer(leaf, &tree_csum, 152 | + (unsigned long)item + ((i*csum_size)+start_pos) , csum_size); 153 | + fprintf(stdout, "%x ", tree_csum); 154 | + if (cnt % 8 == 0) 155 | + fprintf(stdout, "\n"); 156 | + } 157 | + pending_csums = 0; 158 | + return 0; 159 | + }else{ 160 | + for(i = 0; i < csums_in_item; i++, cnt++){ 161 | + read_extent_buffer(leaf, &tree_csum, 162 | + (unsigned long)item+((i*csum_size)+start_pos), csum_size); 163 | + fprintf(stdout, "%x ", tree_csum); 164 | + if (cnt % 8 == 0) 165 | + fprintf(stdout, "\n"); 166 | + } 167 | + } 168 | + pending_csums -= csums_in_item; 169 | + ret = 0; 170 | + if (pending_csums > 0){ 171 | + path->slots[0]++; 172 | + 173 | + }else 174 | + return 0; 175 | + } 176 | +fail: 177 | + fprintf(stderr, "btrfs_lookup_csums search failed."); 178 | + if (ret > 0) 179 | + ret = -ENOENT; 180 | + return ret; 181 | +} 182 | + 183 | +static int btrfs_lookup_extent(struct btrfs_fs_info *info, struct btrfs_path *path, 184 | + u64 ino, int cow){ 185 | + struct btrfs_key key; 186 | + struct btrfs_key found_key; 187 | + struct btrfs_file_extent_item *fi; 188 | + struct extent_buffer *leaf; 189 | + struct btrfs_root *fs_root; 190 | + int ret = -1; 191 | + int slot; 192 | + int total_csums = 0; 193 | + u64 bytenr; 194 | + u64 itemnum = 0; 195 | + struct btrfs_path *path1 = NULL; 196 | + 197 | + fs_root = info->fs_root; 198 | + key.objectid = ino; 199 | + key.type = BTRFS_EXTENT_DATA_KEY; 200 | + key.offset = 0; 201 | + ret = btrfs_search_slot(NULL,fs_root,&key,path,0,0); 202 | + 203 | + if(ret < 0) 204 | + goto error; 205 | + 206 | + if (ret > 1){ 207 | + fprintf(stderr, "Unable to find the entry"); 208 | + return ret; 209 | + } 210 | + struct btrfs_root *csum_root = btrfs_csum_root(info, 0); 211 | + u16 csum_size = btrfs_super_csum_size(csum_root->fs_info->super_copy); 212 | + while(1){ 213 | + leaf = path->nodes[0]; 214 | + slot = path->slots[0]; 215 | + if (slot >= btrfs_header_nritems(leaf)){ 216 | + ret = btrfs_next_leaf(fs_root, path); 217 | + if (ret == 0) 218 | + continue; 219 | + if (ret < 0) 220 | + goto error; 221 | + } 222 | + btrfs_item_key_to_cpu(leaf, &found_key, slot); 223 | + if (found_key.type != BTRFS_EXTENT_DATA_KEY){ 224 | + btrfs_release_path(path); 225 | + return -EINVAL; 226 | + } 227 | + 228 | + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 229 | + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 230 | + total_csums=(btrfs_file_extent_num_bytes(leaf, fi) / 1024) / csum_size; 231 | + path->slots[0]++; 232 | + itemnum++; 233 | + path1 = btrfs_alloc_path(); 234 | + csum_root = btrfs_csum_root(info, 0); 235 | + ret = btrfs_lookup_csums(NULL,csum_root, path1, bytenr, 0, 236 | + total_csums); 237 | + btrfs_release_path(path1); 238 | + if (ret) { 239 | + fprintf(stderr, "\n Error: btrfs_lookup_csum"); 240 | + return 1; 241 | + } 242 | + } 243 | + 244 | +error: 245 | + btrfs_release_path(path); 246 | + return ret; 247 | +} 248 | + 249 | +static int cmd_inspect_dump_csum(const struct cmd_struct *cmd, 250 | + int argc, char **argv) 251 | +{ 252 | + struct btrfs_fs_info *info; 253 | + int ret; 254 | + struct btrfs_path path = { 0 }; 255 | + struct stat st; 256 | + char *filename; 257 | + struct open_ctree_args oca = { 0 }; 258 | + oca.flags = OPEN_CTREE_PARTIAL; 259 | + oca.filename = argv[2]; 260 | + 261 | + if (check_argc_exact(argc, 3)) 262 | + usage_unknown_option(cmd, argv); 263 | + 264 | + filename = argv[1]; 265 | + info = open_ctree_fs_info(&oca); 266 | + if (!info) { 267 | + fprintf(stderr, "unable to open %s\n", argv[2]); 268 | + exit(1); 269 | + } 270 | + 271 | + ret = stat(filename, &st); 272 | + if (ret < 0) { 273 | + fprintf(stderr, "unable to open %s\n", filename); 274 | + exit(1); 275 | + } 276 | + 277 | + if(st.st_size < 1024){ 278 | + fprintf(stderr, "file less than 1KB.abort%lu", (st.st_size )); 279 | + exit(1); 280 | + } 281 | + 282 | + ret = btrfs_lookup_extent(info, &path, st.st_ino, 0); 283 | + ret = close_ctree(info->fs_root); 284 | + btrfs_close_all_devices(); 285 | + 286 | + return ret; 287 | +} 288 | +DEFINE_SIMPLE_COMMAND(inspect_dump_csum, "dump-csum"); 289 | diff --git a/cmds/inspect.c b/cmds/inspect.c 290 | index 117efb51..4d9292b9 100644 291 | --- a/cmds/inspect.c 292 | +++ b/cmds/inspect.c 293 | @@ -1537,6 +1537,7 @@ static const struct cmd_group inspect_cmd_group = { 294 | &cmd_struct_inspect_dump_tree, 295 | &cmd_struct_inspect_dump_super, 296 | &cmd_struct_inspect_tree_stats, 297 | + &cmd_struct_inspect_dump_csum, 298 | #if EXPERIMENTAL 299 | &cmd_struct_inspect_list_chunks, 300 | #endif 301 | -- 302 | 2.42.0 303 | 304 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | PTable 3 | -------------------------------------------------------------------------------- /tests/TESTS.md: -------------------------------------------------------------------------------- 1 | This file documents how to use test.py and how performance numbers calculated. 2 | 3 | ``` 4 | mkfs.btrfs /dev/xvdc 5 | mount /dev/xvdc /mnt 6 | python test.py -d /mnt 7 | ``` 8 | 9 | Above should create test data on /mnt. This will create files in specific layout format 10 | `fn___`. For example, "fn_abcd_1m_4m" 11 | refers to 4mb file with 1mb of a,b,c and d. i.e first 1mb is `a`, second 1mb is `b`, 12 | third 1mb is `c` and fourth `1mb` is d. 13 | 14 | Test run results: 15 | ---------------- 16 | 17 | All three mode saved same amount of data. Original data: 18 | 19 | ``` 20 | /dev/xvdc 104857600 16626016 87204352 17% /mnt 21 | ``` 22 | 23 | 24 | After dedupe: 25 | 26 | ``` 27 | /dev/xvdc 104857600 26880 103787776 1% /mnt 28 | ``` 29 | 30 | 31 | 1. Running in default mode: 32 | 33 | ``` 34 | dduper --device /dev/xvdc --dir /mnt --chunk-size 1024 35 | 36 | dduper took 2245.63775706 seconds 37 | ``` 38 | 39 | 2. Running in fast mode: 40 | 41 | ``` 42 | dduper --fast-mode --device /dev/xvdc --dir /mnt --chunk-size 1024 43 | 44 | dduper took 265.656284094 seconds 45 | ``` 46 | 47 | 3. Running insane mode. 48 | 49 | ``` 50 | dduper --fast-mode --skip --device /dev/xvdc --dir /mnt --chunk-size 1024 --recurse 51 | 52 | dduper took 3.16962099075 seconds 53 | ``` 54 | 55 | -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | f1m = 1 4 | f10m = 10 5 | f100m = 100 6 | f512m = 512 7 | mb = 1024 * 1024 8 | layout = ["abcd", "aaaa", "abac", "abcdabcd",'cdcdcd'] 9 | seg_size = [f1m, f10m, f100m, f512m] 10 | 11 | 12 | def file_layout(filename, layout, seg_size): 13 | print("filename:%s layout:%s seg_size:%s file_size:%s" % 14 | (filename, layout, seg_size, len(layout) * seg_size)) 15 | 16 | with open(filename, "w") as fd: 17 | for c in layout: 18 | content = c * (seg_size * mb) 19 | fd.write(content) 20 | 21 | 22 | if __name__ == '__main__': 23 | parser = argparse.ArgumentParser() 24 | 25 | parser.add_argument('-d', 26 | '--dir_path', 27 | action='store', 28 | dest='dir_path', 29 | type=str, 30 | help='BTRFS dir (ex: /mnt/playground) ', 31 | required=True) 32 | 33 | results = parser.parse_args() 34 | print(results.dir_path) 35 | print('*' * 100) 36 | print( 37 | "\t\t\t *** Files format: fn___ ***" 38 | ) 39 | print('*' * 100) 40 | for sz in seg_size: 41 | for lt in layout: 42 | file_layout(results.dir_path+"/fn_" + str(lt) + "_" + str(sz) + "m_" + str(len(lt) * sz) +"m", 43 | lt, sz) 44 | -------------------------------------------------------------------------------- /tests/verify.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "This will create 512mb under /tmp and validates dduper behaviour." 4 | echo "\n Would like to continue?" 5 | echo "Enter 'ctrl+c' to abort, 'y' to continue:" 6 | read junk 7 | 8 | echo "-------setup image-----------------------" 9 | echo "creating 512mb btrfs img" 10 | IMG="/tmp/img" 11 | MNT_DIR="/tmp/btrfs_mnt" 12 | 13 | losetup -f 14 | mkdir -p $MNT_DIR 15 | truncate -s512m $IMG 16 | mkfs.btrfs -f $IMG 17 | 18 | echo "-------mount image-----------------------" 19 | echo "mounting it under $MNT_DIR" 20 | mount $IMG $MNT_DIR 21 | 22 | 23 | echo "-------setup files-----------------------" 24 | echo "Creating 50mb test file" 25 | dd if=/dev/urandom of=/tmp/f1 bs=1M count=50 26 | 27 | echo "Coping to mount point" 28 | cp -v /tmp/f1 $MNT_DIR/f1 29 | cp -v /tmp/f1 $MNT_DIR/f2 30 | loop_dev=$(/sbin/losetup --find --show $IMG) 31 | sync 32 | 33 | used_space2=$(df --output=used -h -m $MNT_DIR | tail -1 | tr -d ' ') 34 | 35 | echo "-------dduper verification-----------------------" 36 | echo "Running simple dduper --dry-run" 37 | dduper --device ${loop_dev} --dir $MNT_DIR --dry-run 38 | 39 | echo "Running simple dduper in default mode" 40 | dduper --device ${loop_dev} --dir $MNT_DIR 41 | 42 | sync 43 | sleep 5 44 | used_space3=$(df --output=used -h -m $MNT_DIR | tail -1 | tr -d ' ') 45 | 46 | echo "-------results summary-----------------------" 47 | echo "disk usage before de-dupe: $used_space2 MB" 48 | echo "disk usage after de-dupe: $used_space3 MB" 49 | 50 | deduped=$(expr $used_space2 - $used_space3) 51 | 52 | if [ $deduped -eq 50 ];then 53 | echo "dduper verification passed" 54 | else 55 | echo "dduper verification failed" 56 | fi 57 | 58 | umount $MNT_DIR 59 | --------------------------------------------------------------------------------